def update_elements(self): """Insert a new dropdown list for the element""" # # Get the group type # elements = None group_type = self.group_type_box.getcurselection()[0] import Protool if group_type == "Residues": P = Protool.structureIO() P.parsepdb(self.pdblines) residues = P.residues.keys() residues.sort() elements = [] for res in residues: elements.append("%s %s" % (res, P.resname(res))) elif group_type == "Atoms": P = Protool.structureIO() P.parsepdb(self.pdblines) atoms = P.atoms.keys() for res in P.residues.keys(): resname = P.resname(res) if self.AAdefs.has_key(resname): defatoms = self.AAdefs[resname]["atoms"] # print defatoms for defatom, coord, dummy in defatoms: atom_name = "%s:%s" % (res, defatom) if not P.atoms.has_key(atom_name): atoms.append(atom_name) # print 'Adding',atom_name atoms.sort() elements = [] for at in atoms: elements.append(at) elif group_type == "Titratable groups": P = Protool.structureIO() P.parsepdb(self.pdblines) P.get_titratable_groups() titgrps = P.titratable_groups.keys() titgrps.sort() elements = [] for res in titgrps: for titgrp in P.titratable_groups[res]: name = "%s %s" % (res, titgrp["name"]) elements.append(name) else: print "Unkown group type", group_type # # Make the new dropdown list # if elements: self.group_elements_box.setlist(elements) return
def update_elements(self): """Insert a new dropdown list for the element""" # # Get the group type # elements = None group_type = self.group_type_box.getcurselection()[0] import Protool if group_type == 'Residues': P = Protool.structureIO() P.parsepdb(self.pdblines) residues = P.residues.keys() residues.sort() elements = [] for res in residues: elements.append('%s %s' % (res, P.resname(res))) elif group_type == 'Atoms': P = Protool.structureIO() P.parsepdb(self.pdblines) atoms = P.atoms.keys() for res in P.residues.keys(): resname = P.resname(res) if self.AAdefs.has_key(resname): defatoms = self.AAdefs[resname]['atoms'] #print defatoms for defatom, coord, dummy in defatoms: atom_name = '%s:%s' % (res, defatom) if not P.atoms.has_key(atom_name): atoms.append(atom_name) #print 'Adding',atom_name atoms.sort() elements = [] for at in atoms: elements.append(at) elif group_type == 'Titratable groups': P = Protool.structureIO() P.parsepdb(self.pdblines) P.get_titratable_groups() titgrps = P.titratable_groups.keys() titgrps.sort() elements = [] for res in titgrps: for titgrp in P.titratable_groups[res]: name = '%s %s' % (res, titgrp['name']) elements.append(name) else: print 'Unkown group type', group_type # # Make the new dropdown list # if elements: self.group_elements_box.setlist(elements) return
def checkMutation(self, DB, name, ref=None, X=None): """Check mutations based on ref sequence and current mutant sequence, should be triggered whenever ref protein is altered so that the mutation codes are updated.""" prot = DB.get(name) if prot.aaseq == None: return if ref == None: ref = self.DB.meta.refprotein refseq = self.AAList2String(DB.get(ref).aaseq) if prot.aaseq == None: return #get mutations from sequence seq = self.AAList2String(prot.aaseq) if seq == refseq: return #get alignment for pdb seq and AA from DNA seq import PEATSA.Core as Core if X == None: #we need to also provide the ref structure import Protool X = Protool.structureIO() X.parsepdb(DB.get(ref).Structure) print X mset = Core.Data.mutationSetFromSequencesAndStructure(refseq, seq, X) #prot.Mutations = '+'.join(mset.mutationCodes()) prot.Mutations = mset.codeString(X) return
def setSequencesfromMutationCodes(self, DB=None, callback=None, selected=None): """Set the aa sequence using wt ref aa and mutation code Assumes mutation code is consistent with ref aa seq""" if DB == None: return proteins = DB.getRecs() refprot = DB.meta.refprotein refseq = DB[refprot].aaseq refaa = self.AAList2String(refseq) refpdb = DB[refprot].Structure #Create protool oinstance for ref pdb import Protool Xref = Protool.structureIO() Xref.parsepdb(refpdb) for protein in selected: rec = DB.get(protein) if rec.hasStructure() == 'available': continue print 'Protein:', protein #if no sequence try create one from mutation code if rec.aaseq == None and rec.Mutations != None: print 'no sequence, using mutation code and ref protein seq' import PEATSA.Core as Core print 'Record has mutation code %s' %rec.Mutations mutationSet = Core.Data.MutationSet(rec.Mutations) Xref.Remove_All_NonAminoAcids() refaa = Core.Data.GetChainSequences(Xref)['A'] #print refaa mutseq = mutationSet.applyToSequence(refaa, id='A', offset=None, pdb=Xref) rec.aaseq = self.string2AAseq(mutseq) return
def __init__(self): import Protool self.PI = Protool.structureIO() self.aas = self.PI.trueaminoacids.keys() self.PI.readpdb('test.pdb') # import FFF.FFFcontrol as FFFC import os, sys scriptdir = os.path.split(os.path.abspath(__file__))[0] FFFdir = os.path.split(scriptdir)[0] Rotamerlib = FFFC.Rotamer_class( os.path.join(FFFdir, 'parameters/small_lib')) self.FFF = FFFC.FFF() self.FFF.read_pdb('test.pdb') #self.Model=FFFC.pKa_class(self.FFF,Rotamerlib,os.path.join(FFFdir,'parameters')) self.Model = FFFC.model_class(self.FFF, Rotamerlib, os.path.join(FFFdir, 'parameters')) # # Test mutations # self.mutate_test() #self.Model.repair_all() # # Build all hydrogens - standard protonation state # #self.Model.build_hydrogens() #self.FFF.write_pqr('2lzt.pqr.pdb') return
def convert_classic_to_PEAT(operations): """Convert a set of classic mutations to a set of PEAT operations The classic operations are in the format: A12G+R45V etc.""" # # Do a quick sanity check # for junk in ['?', 'unknown', 'empty']: if operations.lower().find(junk) != -1: return False # # Deal with the operations # sp = operations.split('+') import Protool, string P = Protool.structureIO() POP = [] for op in sp: if op == 'wt': continue old = op[0] new = op[-1] number = int(op[1:-1]) try: POP.append('%s:%s:%s:%s' % ('', string.zfill(number, P.length_of_residue_numbers), P.one_to_three[old], P.one_to_three[new])) except KeyError: return False return string.join(POP, '+')
def get_net_charge(pdbfile,HIS): """Get the net charge within 20 A of the HIS""" import Protool X=Protool.structureIO() X.readpdb(pdbfile) close=[] HIS_ND1='%s:ND1' %HIS HIS_NE2='%s:NE2' %HIS for residue in X.residues.keys(): for atom in X.residues[residue]: #print atom mdist=min(X.dist(HIS_ND1,atom),X.dist(HIS_NE2,atom)) if mdist<50.0: close.append(residue) break elif mdist>355.0: break # Got all close residues, now count charge charge=0.0 nc={'ASP':-1,'GLU':-1,'LYS':+1,'ARG':+1,'HIS':+1} close.sort() print close for res in close: restype=X.resname(res) if nc.has_key(restype): charge=charge+nc[restype] print res,restype,nc[restype],charge print 'Net charge',charge return charge
def checkMutation(self, DB, name, ref=None, X=None): """Check mutations based on ref sequence and current mutant sequence, should be triggered whenever ref protein is altered so that the mutation codes are updated.""" prot = DB.get(name) if prot.aaseq == None: return if ref == None: ref = self.DB.meta.refprotein refseq = self.AAList2String(DB.get(ref).aaseq) if prot.aaseq == None: return #get mutations from sequence seq = self.AAList2String(prot.aaseq) if seq == refseq: return #get alignment for pdb seq and AA from DNA seq import PEATSA.Core as Core if X == None: #we need to also provide the ref structure import Protool X=Protool.structureIO() X.parsepdb(DB.get(ref).Structure) print X mset = Core.Data.mutationSetFromSequencesAndStructure(refseq, seq, X) #prot.Mutations = '+'.join(mset.mutationCodes()) prot.Mutations = mset.codeString(X) return
def convert_classic_to_PEAT(operations): """Convert a set of classic mutations to a set of PEAT operations The classic operations are in the format: A12G+R45V etc.""" # # Do a quick sanity check # for junk in ['?','unknown','empty']: if operations.lower().find(junk)!=-1: return False # # Deal with the operations # sp=operations.split('+') import Protool, string P=Protool.structureIO() POP=[] for op in sp: if op=='wt': continue old=op[0] new=op[-1] number=int(op[1:-1]) try: POP.append('%s:%s:%s:%s' %('',string.zfill(number,P.length_of_residue_numbers),P.one_to_three[old],P.one_to_three[new])) except KeyError: return False return string.join(POP,'+')
def __init__(self): import Protool self.PI=Protool.structureIO() self.aas=self.PI.trueaminoacids.keys() self.PI.readpdb('test.pdb') # import FFF.FFFcontrol as FFFC import os, sys scriptdir=os.path.split(os.path.abspath(__file__))[0] FFFdir=os.path.split(scriptdir)[0] Rotamerlib=FFFC.Rotamer_class(os.path.join(FFFdir,'parameters/small_lib')) self.FFF=FFFC.FFF() self.FFF.read_pdb('test.pdb') #self.Model=FFFC.pKa_class(self.FFF,Rotamerlib,os.path.join(FFFdir,'parameters')) self.Model=FFFC.model_class(self.FFF,Rotamerlib,os.path.join(FFFdir,'parameters')) # # Test mutations # self.mutate_test() #self.Model.repair_all() # # Build all hydrogens - standard protonation state # #self.Model.build_hydrogens() #self.FFF.write_pqr('2lzt.pqr.pdb') return
def remALT(self,pdbfile, environment): import Protool x = Protool.structureIO() x.readpdb('%s.pdb' % (pdbfile)) x.RemoveALT() x.writepdb('%s.pdb' % (pdbfile), dont_write_HETATMS=1) environment.output('Removed alternate residues')
def remALT(self,pdbfile): import Protool x = Protool.structureIO() x.readpdb('%s.pdb' % (pdbfile)) x.RemoveALT() x.writepdb('%s.pdb' % (pdbfile), dont_write_HETATMS=1) print 'Removed alternate residues'
def load_groups(self): # # Get all the titratable groups in the pdb file # import Protool P=Protool.structureIO() P.readpdb(self.params['pdb']) self.groups=P.get_titratable_groups() return
def load_groups(self): # # Get all the titratable groups in the pdb file # import Protool P = Protool.structureIO() P.readpdb(self.params['pdb']) self.groups = P.get_titratable_groups() return
def findSequenceDifferences(child_sequence, parent_sequence,full_parent_sequence,ignoreCterm=False): """ # Find all amino acid differences between child_sequence and parent_sequence Child sequence and parent sequence must be aligned and in 1-letter format: child_sequence, parent_sequence: AAADEFFG full parent sequence is a Protool.sequence object """ # # Loop over the sequences - changes are record from parent -> child # import string operations=[] import Protool PI=Protool.structureIO() # Cterm_add=0 insert_num=0 full_parent_count=0 #for count in range(len(record_sequence)): # parent_aa=parent_sequence[count] # child_aa=record_sequence[count] for parent_aa,child_aa in zip(parent_sequence,child_sequence): # #print parent_aa,child_aa if parent_aa!=child_aa: # Find the PDB file residue number if full_parent_count>=len(full_parent_sequence): # If we have an insertion at the Cterm aa_identifier=full_parent_sequence[-1][0] if ignoreCterm: continue else: aa_identifier=full_parent_sequence[full_parent_count][0] #if aa_identifier[-1]==':': # aa_identifier=aa_identifier[:-1] # # Convert to 3letter format # if parent_aa!='-': full_parent_count=full_parent_count+1 parent_aa=PI.one_to_three[parent_aa] if child_aa!='-': child_aa=PI.one_to_three[child_aa] if parent_aa=='-': operations.append('insert%d:%s:%s' %(insert_num,aa_identifier,child_aa)) insert_num=insert_num+1 elif child_aa=='-': insert_num=0 operations.append('delete:%s:%s' %(aa_identifier,parent_aa)) else: insert_num=0 operations.append('%s:%s:%s' %(aa_identifier,parent_aa,child_aa)) else: full_parent_count=full_parent_count+1 return operations
def remALT(self, pdb): '''Removes alternative residues from the working pdb. Replaces the working pdb.''' import Protool x = Protool.structureIO() x.readpdb('%s.pdb' % (pdb)) x.RemoveALT() x.writepdb('%s.pdb' % (pdb), dont_write_HETATMS=1) print "[ProteinComplexTool] Alternative Residues removed."
def matrix(): import os dirs=os.listdir('data') xs=[] ys=[] for dir in dirs: print 'Processing %s' %dir # # find the PDB file # realdir=os.path.join(os.getcwd(),'data',dir) files=os.listdir(realdir) for file in files: realfile=os.path.join(realdir,file) if file[-4:]=='.pdb': import pKa.pKaTool.pKaIO X=pKa.pKaTool.pKaIO.pKaIO(realfile) X.assess_status() if X.calculation_completed==1: # # Hurra, the calc is complete. Load the matrix # PBEmatrix=X.read_matrix() # # Now calculate the same matrix with Protool # P=Protool.structureIO() P.readpdb(realfile) P.get_titratable_groups() dist_matrix=P.Calculate_matrix(8) # # Plot it # x=[] y=[] for group1 in PBEmatrix.keys(): for group2 in PBEmatrix.keys(): PBE_ene=PBEmatrix[group1][group2][0] try: new_ene=dist_matrix[group1][group2] except: continue # # Load the values, distances in x, PBE_ene in y # if new_ene and PBE_ene: x.append(abs(new_ene)) y.append(abs(PBE_ene)) # # Append these result to the big arrays # ys.append(y) xs.append(x) plotit(xs,ys,'Matrix',dirs) return
def CleanPDB2PQR(inputFile, outputFile, forceField="amber", removeWater=True, removeLigand=True, removeAltLoc=True, addHydrogens=True, correct=True): '''Cleans a PDB by using PDB2PQR See CleanPDB for argument details Note: With pdb2pqr you cannot remove-water or ligands. Errors: Raises an exception if the inputFile is not a valid PDB file.''' import Protool try: command = 'pdb2pqr --chain --ff=%s' % (forceField) if removeWater == True: print >>sys.stderr, 'Warn: Currently PDB2PQR does not remove waters from pdb files' if removeLigand == True: print >>sys.stderr, 'Warn: Currently PDB2PQR can not be used to remove heterogens from PDB files' if addHydrogens == False: print >>sys.stderr, 'Warn: Turning of Hydrogen addition with PDB2PQR automatically turns of rotamer correction' command = command + " --clean " else: if correct == False: command = command + " --nodebump " #Protool ignores altlocs so we can use it to remove them #Do this first as Protool as when protool reads then writes a pdb2pqr cleaned file #it raises an error on reading it again if removeAltLoc is True: pdb = Protool.structureIO() pdb.readpdb(inputFile) pdb.writepdb(outputFile) inputFile = outputFile command = command + ' %s %s' % (inputFile, outputFile) print 'Using: ', command process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdout, stderr = process.communicate() if process.returncode != 0: raise ProteinDesignToolException, 'Error using pdb2pqr to clean pdb file %s' % inputFile except BaseException, data: print 'Encountered an exception cleaning pdb %s' % inputFile if stdout is not None: print 'PDB2PQR output follows:' print stdout raise
def remALT(self,pdb): '''Removes alternative residues from the working pdb. Replaces the working pdb.''' import Protool x = Protool.structureIO() x.readpdb('%s.pdb' % (pdb)) x.RemoveALT() x.writepdb('%s.pdb' % (pdb), dont_write_HETATMS=1) print "[ProteinComplexTool] Alternative Residues removed."
def addPDBFile(self, DB=None, name=None, pdbfile=None, pdbdata=None, pdbname=None, gui=True): """Add a PDB file to the record given as argument""" import os, tkMessageBox if pdbdata == None and pdbfile == None: savedir=os.getcwd() global PDB_code pdbfile=tkFileDialog.askopenfilename(defaultextension='.pdb', initialdir=savedir, filetypes=[("PDB file","*.pdb"), ("PDB file","*.brk"), ("All files","*.*")]) if not pdbfile: return if pdbfile: pdbname = os.path.basename(pdbfile) import Protool self.X=Protool.structureIO() # Extracting PDB_code from pdbfile if pdbdata != None: self.X.readpdb(data=pdbdata) elif os.path.isfile(pdbfile): PDB_code=pdbfile.split('/').pop().split('.')[0] # Try to read it using Protool try: self.X.readpdb(filename=pdbfile) except: tkMessageBox.showwarning('Error reading PDB file', 'I could not read the PDB file. This probably means that the PDB file is corrupt in some way.') return AlignmentMap = None if gui==True: if tkMessageBox.askyesno('Reset AA Seq?', 'Do you want to reset the amino acid Sequence?'): AlignmentMap = self.checkPDBSequence(name) #store it DB.storePDB(name, self.X, AlignmentMap) if hasattr(DB.meta,'refprotein'): ref = DB.meta.refprotein #if this is the reference protein remodel mutations and rewrite mut codes if name == ref: print name, ref print 'rechecking mutation codes, ref prot structure has changed' #get new mutation codes import PEATSA.Core as Core for p in DB.getRecs(): self.checkMutation(DB, p, ref, self.X) #self.checkModels(DB) #add the original pdb name DB.data[name]['pdbname'] = pdbname return
def get_sequences(self): """Extract all sequences from the PDB files""" import Protool for calc in self.multcalcs.keys(): X=Protool.structureIO() X.parsepdb(self.multcalcs[calc]['pdblines']) self.multcalcs[calc]['sequence']=X.sequence[:] aa1='' for number,restype in self.multcalcs[calc]['sequence']: aa1=aa1+self.aas[restype] self.multcalcs[calc]['aa1seq']=aa1 return
def pir2Protool(sequence): """ Reformats a regular one-letter sequence (e.g. 'ASDDE') to the protool type sequence ( [[':0001','ALA'], [':0002','SER'] ...) """ import Protool, string X=Protool.structureIO() list=[] number=1 for letter in sequence: list.append([':'+string.zfill(number,4),X.one_to_three[letter]]) number=number+1 return list
def get_sequences(self): """Extract all sequences from the PDB files""" import Protool for calc in self.multcalcs.keys(): X = Protool.structureIO() X.parsepdb(self.multcalcs[calc]['pdblines']) self.multcalcs[calc]['sequence'] = X.sequence[:] aa1 = '' for number, restype in self.multcalcs[calc]['sequence']: aa1 = aa1 + self.aas[restype] self.multcalcs[calc]['aa1seq'] = aa1 return
def pir2Protool(sequence): """ Reformats a regular one-letter sequence (e.g. 'ASDDE') to the protool type sequence ( [[':0001','ALA'], [':0002','SER'] ...) """ import Protool, string X = Protool.structureIO() list = [] number = 1 for letter in sequence: list.append([':' + string.zfill(number, 4), X.one_to_three[letter]]) number = number + 1 return list
def load_PDB(self): """Load a PDB file""" import tkFileDialog, os filename=tkFileDialog.askopenfilename(defaultextension='.pdb', initialdir=os.getcwd(), parent=self.Dcontrol, filetypes=[("PDB file","*.pdb"), ("All files","*.*")]) if filename: import Protool self.P=Protool.structureIO() self.P.readpdb(filename) return
def cubescanplot(options): """Read a cubescan output file and show the results""" fd=open(options.cubescanfile) import pickle data=pickle.load(fd) fd.close() # cubegrid=data[0] scanresults=data[1] # # Instantiate scoring class # SC=score_class(options) # # Score the ghosts from each cubescan # scores=[] for cube in sorted(scanresults.keys()): calc_ghosts=scanresults[cube] xs,ys,experrors,satisfied=SC.score_ghosts(calc_ghosts) rmsd=RMSD(xs,ys,experrors) scores.append([rmsd,cube]) #import pylab #pylab.errorbar(xs,ys,xerr=experrors,fmt='ro') #pylab.plot(xs,xs,'g-') #pylab.xlabel('Experimental dCS') #pylab.ylabel('Calculated dCS') #pylab.title('Cubescan of cube %4d, atom: %s, RMSD: %5.3f' %(cube,options.atom,rmsd)) #pylab.savefig('Cubescan_%d.png' %(cube)) #pylab.clf() rmsds=[] scores.sort() import Protool P=Protool.structureIO() P.readpdb('2LZT_H.pdb') count=0 for rmsd,cube in scores[:25]: print '%4d, rmsd: %5.2f' %(cube,rmsd) center=cubegrid[cube]['coord'] P.add_atom('X:%4d:CS' %(count+1000), atomnumber=0,atomname='CS', chainid='X',residuename='CSS',residuenumber='999', xcoord=center[0],ycoord=center[1],zcoord=center[2],update=1,BFACTOR=None,OCCUPANCY=None,CHARGE=None,RADIUS=None,tag=None,accept_duplicate=False) count=count+1 rmsds.append(rmsd) P.writepdb('cubescan.pdb') import pylab pylab.hist(rmsds) pylab.savefig('Cubescanhist.png') return
def background(): """Find correlation between background interaction energy and number of hbonds""" import os dirs=os.listdir('data') xs=[] ys=[] count=0 #dirs=['1bli'] for dir in dirs: print 'Processing %s' %dir # # find the PDB file # realdir=os.path.join(os.getcwd(),'data',dir) files=os.listdir(realdir) for file in files: realfile=os.path.join(realdir,file) if file[-4:]=='.pdb': import pKaTool.pKaIO X=pKaTool.pKaIO.pKaIO(realfile) X.assess_status() if X.calculation_completed==1: # # Hurra, the calc is complete. Load the desolvation energies # PBEbackground=X.read_backgr() # # Now calculate the desolvation energies with Protool # P=Protool.structureIO() P.readpdb(realfile) P.get_titratable_groups() P.calculate_background() # # x=[] y=[] residues= PBEbackground.keys() residues.sort() for residue in residues: if P.background.has_key(residue) and PBEbackground[residue]<19.0: x.append(P.background[residue]) #x.append(count) count=count+1 y.append(PBEbackground[residue]) print '%12s %5.2f %5.2f' %(residue,P.background[residue],PBEbackground[residue]) xs.append(x) ys.append(y) plotit(xs,ys,'Background',dirs) return
def Model_Mutations_old(pdbfile, mol2files, mutations, max_overlap=0.5, return_score=False): """Model a number of mutations in a pdbfile when one or more ligands are present""" # # Initialise mutate routines # MUT = Mutate(max_bump=max_overlap) # # Read PDB file # import Protool P = Protool.structureIO() P.readpdb(pdbfile) P.remove_all_hydrogens() # # Read mol2 file # L = Protool.ligand(P) for mol2file in mol2files: print "Added %s with tag 'LIGAND'" % mol2file L.readmol2(mol2file, tag='LIGAND') # # Pass combined pdb file to mutate routines and mutate # MUT.new_PDB(P) import pKa.pKD_tools as pKD_tools total_bump = 0.0 # # Model # for mutation in mutations: # # Get info # resid = pKD_tools.get_resid_from_mut(mutation) newres = pKD_tools.get_newrestyp_from_mut(mutation) oldres = pKD_tools.get_oldrestyp_from_mut(mutation) bump_score = MUT.mutate(resid, newres, orgtype=oldres) if bump_score is None or bump_score is False or bump_score > max_overlap: print 'Cannot model this set of mutations - too many bumps' return False, 20.0 print 'Bump score for %s: %5.3f' % (mutation, bump_score) total_bump = total_bump + bump_score print 'Total bump score for all mutations: %5.3f' % (bump_score) if return_score: return MUT, bump_score return MUT
def check_mutation_syntax(operations,sequence=None): """Check the syntax of an operation string and check that the residue is present in the sequence""" single=get_single_operations(operations) for op in single: resid=get_resid_from_mut(op) new=get_newrestyp_from_mut(op) old=get_oldrestyp_from_mut(op) import Protool X=Protool.structureIO() if not X.aminoacids.has_key(old) or not X.aminoacids.has_key(new): raise Exception('New or old residue is not an amino acid: %s' %op) if sequence: if not [resid,old] in sequence: raise Exception('Original sequence does not contain this residue: %s:%s' %(resid,old)) return combine_operations(single)
def load_PDB(self): """Load a PDB file""" import tkFileDialog, os filename = tkFileDialog.askopenfilename( defaultextension=".pdb", initialdir=os.getcwd(), parent=self.Dcontrol, filetypes=[("PDB file", "*.pdb"), ("All files", "*.*")], ) if filename: import Protool self.P = Protool.structureIO() self.P.readpdb(filename) return
def protoolStructure(self): '''Returns a protool structure instance initialised using the structure data stored in the database''' import tempfile, Protool structureData = self.structure() temp = tempfile.NamedTemporaryFile() temp.write(structureData) temp.flush() try: object = Protool.structureIO() object.readpdb(temp.name) except Exception, data: raise Exceptions.FileFormatError, 'Format of stored PDB file for job %s not valid.\nUnderlying error - %s' % (self.identification, data)
def CleanWHATIF(inputFile, outputFile, removeWater=True, removeLigand=True, removeAltLoc=True, addHydrogens=True, correct=True): '''Cleans a PDB using WHAT-IF Errors: Raises an exception if the inputFile is not a valid PDB file.''' import pKarun.WI_tools, Protool #First try to load the pdb using protool. #If its not valid try: pdb = Protool.structureIO() pdb.readpdb(inputFile) except Exception, data: raise Exceptions.FileFormatError, 'Format of specified PDB file %s not valid.\nUnderlying error - %s' % (inputFile, data)
def check_mutation_syntax(operations, sequence=None): """Check the syntax of an operation string and check that the residue is present in the sequence""" single = get_single_operations(operations) for op in single: resid = get_resid_from_mut(op) new = get_newrestyp_from_mut(op) old = get_oldrestyp_from_mut(op) import Protool X = Protool.structureIO() if not X.aminoacids.has_key(old) or not X.aminoacids.has_key(new): raise Exception('New or old residue is not an amino acid: %s' % op) if sequence: if not [resid, old] in sequence: raise Exception( 'Original sequence does not contain this residue: %s:%s' % (resid, old)) return combine_operations(single)
def Model_Mutations_old(pdbfile,mol2files,mutations,max_overlap=0.5,return_score=False): """Model a number of mutations in a pdbfile when one or more ligands are present""" # # Initialise mutate routines # MUT=Mutate(max_bump=max_overlap) # # Read PDB file # import Protool P=Protool.structureIO() P.readpdb(pdbfile) P.remove_all_hydrogens() # # Read mol2 file # L=Protool.ligand(P) for mol2file in mol2files: print "Added %s with tag 'LIGAND'" %mol2file L.readmol2(mol2file,tag='LIGAND') # # Pass combined pdb file to mutate routines and mutate # MUT.new_PDB(P) import pKa.pKD_tools as pKD_tools total_bump=0.0 # # Model # for mutation in mutations: # # Get info # resid=pKD_tools.get_resid_from_mut(mutation) newres=pKD_tools.get_newrestyp_from_mut(mutation) oldres=pKD_tools.get_oldrestyp_from_mut(mutation) bump_score=MUT.mutate(resid,newres,orgtype=oldres) if bump_score is None or bump_score is False or bump_score>max_overlap: print 'Cannot model this set of mutations - too many bumps' return False,20.0 print 'Bump score for %s: %5.3f' %(mutation,bump_score) total_bump=total_bump+bump_score print 'Total bump score for all mutations: %5.3f' %(bump_score) if return_score: return MUT,bump_score return MUT
def get_PDB(self, name): """Get a PDB chain given as <PDBID><chainname>""" import Protool X = Protool.structureIO() import os # # If this is a local file that exists, then we use that # if os.path.isfile(name): X.readpdb(name) return X else: CID = name[-1] name = name[:-1] pdbfile = os.path.join(PDBdir, name + ".pdb") if not os.path.isfile(pdbfile): print "Getting %s from the PDB" % (os.path.split(pdbfile)[-1]) import Protool.PDBServices as PS PDB = PS.PDBServices() lines = PDB.getPDB(name) if len(lines) < 100: import string raise Exception("PDB file not found: %s" % (string.join(lines))) X.parsepdb(lines) X.writepdb(pdbfile) else: X.readpdb(pdbfile) X.RemoveALT() # Removes alternative atoms X.Remove_All_NonAminoAcids() # Deletes all ligands and waters # # Keep only the chain we are interested in # for residue in X.residues: thisCID = X.chainid(residue) if CID == "_" and thisCID == "": pass elif CID != "_" and thisCID == CID: pass else: X.remove_residue(residue) X.Update() return X
def get_PDB(self, name): """Get a PDB chain given as <PDBID><chainname>""" import Protool X = Protool.structureIO() import os # # If this is a local file that exists, then we use that # if os.path.isfile(name): X.readpdb(name) return X else: CID = name[-1] name = name[:-1] pdbfile = os.path.join(PDBdir, name + '.pdb') if not os.path.isfile(pdbfile): print 'Getting %s from the PDB' % (os.path.split(pdbfile)[-1]) import Protool.PDBServices as PS PDB = PS.PDBServices() lines = PDB.getPDB(name) if len(lines) < 100: import string raise Exception('PDB file not found: %s' % (string.join(lines))) X.parsepdb(lines) X.writepdb(pdbfile) else: X.readpdb(pdbfile) X.RemoveALT() # Removes alternative atoms X.Remove_All_NonAminoAcids() # Deletes all ligands and waters # # Keep only the chain we are interested in # for residue in X.residues: thisCID = X.chainid(residue) if CID == '_' and thisCID == '': pass elif CID != '_' and thisCID == CID: pass else: X.remove_residue(residue) X.Update() return X
def desolvation(): """Find correlation between desolvation and acc""" import os dirs=os.listdir('data') xs=[] ys=[] for dir in dirs: print 'Processing %s' %dir # # find the PDB file # realdir=os.path.join(os.getcwd(),'data',dir) files=os.listdir(realdir) for file in files: realfile=os.path.join(realdir,file) if file[-4:]=='.pdb': import pKa.pKaTool.pKaIO X=pKa.pKaTool.pKaIO.pKaIO(realfile) X.assess_status() if X.calculation_completed==1: # # Hurra, the calc is complete. Load the desolvation energies # PBEdesolv=X.read_desolv() # # Now calculate the desolvation energies with Protool # P=Protool.structureIO() P.readpdb(realfile) P.get_titratable_groups() P.calculate_desolvation() # # x=[] y=[] for residue in PBEdesolv.keys(): if P.desolv.has_key(residue): x.append(P.desolv[residue]) y.append(PBEdesolv[residue]) xs.append(x) ys.append(y) plotit(xs,ys,'Desolvation',dirs) return
def readpdb(structure,CID): """Get a PDBID from the PDB website and read it into Protool""" print 'Getting PDBID:chain %s:%s from the PDB website' %(structure,CID) PDBID=structure pdblines=PDB.getPDB(PDBID) print 'Reading the PDB into Protool' import Protool X=Protool.structureIO() X.parsepdb(pdblines) # # Delete all residues except the ones in the chain we need # chains=X.chains.keys() for chain in chains: if chain!=CID: print 'Deleting chain',chain for res in X.chains[chain]: X.Delete_residue(res,update=False) X.Update() return X
def readpdb(structure, CID): """Get a PDBID from the PDB website and read it into Protool""" print 'Getting PDBID:chain %s:%s from the PDB website' % (structure, CID) PDBID = structure pdblines = PDB.getPDB(PDBID) print 'Reading the PDB into Protool' import Protool X = Protool.structureIO() X.parsepdb(pdblines) # # Delete all residues except the ones in the chain we need # chains = X.chains.keys() for chain in chains: if chain != CID: print 'Deleting chain', chain for res in X.chains[chain]: X.Delete_residue(res, update=False) X.Update() return X
def __init__(self, PDB, csvfile, exp_pKa_file, options): """Load the PDB file and load the experimental data""" self.options = options import Protool self.PI = Protool.structureIO() self.PI.readpdb(PDB) self.TGs = self.PI.get_titratable_groups() # # Read the experimental data # stab_data = self.read_stability(csvfile) # # Read the pKa values # pKa_values = self.read_pKa_values(exp_pKa_file) # # Should we test the predictions? # if options.test: # self.test_charge() self.test_pHstab(pKa_values, stab_data) elif options.plotdata: self.plot_all_curves(stab_data) return # # Do the fitting # if options.singlefits: pKa_values = self.singlefits(stab_data, pKa_values) # print 'Now fitting all unfolded pKa values' # print # self.fit_pKa_values(stab_data,pKa_values) elif options.doublemuts: self.doublemuts(stab_data, pKa_values) else: # # Just fit the full pH-stability profile # self.fit_pKa_values(stab_data, pKa_values) return
def find_excludes_from_structure(self): """This is an old function for selecting residues to exclude. Does not work that great""" excludes={} # # Get the PDB file # import Protool X=Protool.structureIO() X.readpdb(options.pdbfile) # # Find all residues next to tg and next to Hbond-tg + residues with bb in VDW distance of sidechain # for tg in sorted(exp_ghosts.keys()): excludes[tg]=[] import string tg_res=string.join(tg.split(':')[:2],':') tg_num=int(tg.split(':')[1]) CID=tg.split(':')[0] # Before - After excludes[tg].append('%s:%s' %(CID,string.zfill(tg_num-1,4))) excludes[tg].append('%s:%s' %(CID,string.zfill(tg_num+1,4))) # # Find Hbonding and VdW partners for side chain atoms # for res2 in X.residues.keys(): if res2==tg_res: continue for atom in X.residues[tg_res]: if X.is_backbone(atom) or X.is_hydrogen(atom): continue for atom2 in X.residues[res2]: if X.dist(atom,atom2)<6.0: if not res2 in excludes[tg]: excludes[tg].append(res2) break for tg in sorted(excludes.keys()): print tg print excludes[tg] print '-------' return excludes
def __init__(self, PDB, csvfile, exp_pKa_file, options): """Load the PDB file and load the experimental data""" self.options = options import Protool self.PI = Protool.structureIO() self.PI.readpdb(PDB) self.TGs = self.PI.get_titratable_groups() # # Read the experimental data # stab_data = self.read_stability(csvfile) # # Read the pKa values # pKa_values = self.read_pKa_values(exp_pKa_file) # # Should we test the predictions? # if options.test: #self.test_charge() self.test_pHstab(pKa_values, stab_data) elif options.plotdata: self.plot_all_curves(stab_data) return # # Do the fitting # if options.singlefits: pKa_values = self.singlefits(stab_data, pKa_values) #print 'Now fitting all unfolded pKa values' #print #self.fit_pKa_values(stab_data,pKa_values) elif options.doublemuts: self.doublemuts(stab_data, pKa_values) else: # # Just fit the full pH-stability profile # self.fit_pKa_values(stab_data, pKa_values) return
def setSequencesfromMutationCodes(self, DB=None, callback=None, selected=None): """Set the aa sequence using wt ref aa and mutation code Assumes mutation code is consistent with ref aa seq""" if DB == None: return proteins = DB.getRecs() refprot = DB.meta.refprotein refseq = DB[refprot].aaseq refaa = self.AAList2String(refseq) refpdb = DB[refprot].Structure #Create protool oinstance for ref pdb import Protool Xref = Protool.structureIO() Xref.parsepdb(refpdb) for protein in selected: rec = DB.get(protein) if rec.hasStructure() == 'available': continue print 'Protein:', protein #if no sequence try create one from mutation code if rec.aaseq == None and rec.Mutations != None: print 'no sequence, using mutation code and ref protein seq' import PEATSA.Core as Core print 'Record has mutation code %s' % rec.Mutations mutationSet = Core.Data.MutationSet(rec.Mutations) Xref.Remove_All_NonAminoAcids() refaa = Core.Data.GetChainSequences(Xref)['A'] #print refaa mutseq = mutationSet.applyToSequence(refaa, id='A', offset=None, pdb=Xref) rec.aaseq = self.string2AAseq(mutseq) return
def CleanWHATIF(inputFile, outputFile, removeWater=True, removeLigand=True, removeAltLoc=True, addHydrogens=True, correct=True): '''Cleans a PDB using WHAT-IF Errors: Raises an exception if the inputFile is not a valid PDB file.''' import pKarun.WI_tools, Protool #First try to load the pdb using protool. #If its not valid try: pdb = Protool.structureIO() pdb.readpdb(inputFile) except Exception, data: raise Exceptions.FileFormatError, 'Format of specified PDB file %s not valid.\nUnderlying error - %s' % ( inputFile, data)
def Protool2pir(sequence): """ Reformats a protool style sequence to a regular one-letter sequence""" import Protool X=Protool.structureIO() seq='' # # Get rid of final stop codon # if sequence[-1][1]=='***': sequence=sequence[:-1] ignored={} for aa in sequence: if aa[1]: # # Ignore water # if not X.three_to_one.has_key(aa[1]): ignored[aa[1]]=1 else: seq=seq+X.three_to_one[aa[1]] else: seq=seq+'-' return seq,ignored
def Protool2pir(sequence): """ Reformats a protool style sequence to a regular one-letter sequence""" import Protool X = Protool.structureIO() seq = '' # # Get rid of final stop codon # if sequence[-1][1] == '***': sequence = sequence[:-1] ignored = {} for aa in sequence: if aa[1]: # # Ignore water # if not X.three_to_one.has_key(aa[1]): ignored[aa[1]] = 1 else: seq = seq + X.three_to_one[aa[1]] else: seq = seq + '-' return seq, ignored
def main(): # # Do all the analyses we want # print print 'Design_plots.py: Do all analyses of the Design_pKa runs' print print 'Usage Design_plots.py [files] <type>' print # # dpKa vs. distance from active site & number of mutations # import sys # # Get the type # type=sys.argv[-1] if type=='two': # # Analysing a single group # files=get_files(sys.argv[1:-2],type,sys.argv[-2]) else: # # Get the files # files=get_files(sys.argv[1:-1],type) # # If not files then exit # if files==[]: print 'Error: Did not find any files to match criteria' return # # Prepare the data matrix # raw_data={} max_dist=25 max_muts=20 distance_range=range(max_dist+1) nummuts_range=range(1,max_muts+1) for num in nummuts_range: raw_data[num]={} for dist in distance_range: raw_data[num][dist]=[0.0] # # Loop over all the files # added=0 big_dict={} tot_target={} for file in files: if file[-5:]=='.lock': continue print 'Processing %s' %file try: import pickle fd=open(file) d=pickle.load(fd) fd.close() except: continue # # Set the prefix # prefix=get_prefix(file) # # ----------------------------------- # # Loop over all the design-data # targets=d.keys() targets.sort() for target in targets: #if target!=':0231:ASP': # continue # # pdbfile and wt_full are not interesting # if target=='pdbfile' or target=='wt_full': continue target_pka=d[target] designs=target_pka.keys() designs.sort() if designs==['pKa out of range']: continue # # Loop over each design (normally +20 and -20 for Design_dist_nummuts) # for design in designs: #if design!='m20': # continue try: nummuts=target_pka[design].keys() except: #print 'Skipping:',target_pka[design] continue nummuts.sort() for num in nummuts: dist_cutoffs=target_pka[design][num].keys() for cutoff in dist_cutoffs: #text='%15s %4s #muts: %2d, dist_co: %5.2f, sols:' %(target,design,num,float(cutoff)) #print text # # Make sure we have a bin for the this distance cutoff # #if not raw_data[num].has_key(cutoff): # raw_data[num][cutoff]=[] # # Loop over all solutions and store the dpKa values # sol_dict=target_pka[design][num][cutoff] solutions=sol_dict.keys() # # Loop over all the solutions # for sol in solutions: if sol_dict[sol].has_key(type): dpka=sol_dict[sol][type][target] mutations=sol_dict[sol]['mutations'] # # Count the number of mutations # nums=0 for mut in mutations: if mut: nums=nums+1 # # Add the data to the array # # We skip all data points outside the range specified # by max_muts and max_dist # skip=None if not raw_data.has_key(nums): skip=1 if not skip: if not raw_data[nums].has_key(cutoff): skip=1 if not skip: raw_data[nums][cutoff].append(dpka) # # Add to the big dictionary # import os tname=prefix+target if not big_dict.has_key(tname): big_dict[tname]=[] clean_muts=[] for mut in mutations: if mut: clean_muts.append(mut) big_dict[tname].append([clean_muts,dpka]) # # Keep track of how many we add # added=added+1 #print 'Adding: nummuts: %2d, cutoff: %4.1f, dpka: %4.2f' %(nums,cutoff,dpka) #except: # pass #print '--------------------' # # Read the definition of the active site # act_site=read_actsit_def() # # Get properties from the PDB files/wt pKa calculation # import string, os for file in files: if file[-5:]=='.lock': continue prefix=get_prefix(file) # # Analysis # print 'Analysing for %s' %prefix # # Read the PDB file # pdbfile=os.path.join(basedir,prefix[:4],prefix) import Protool Z=Protool.structureIO() Z.readpdb(pdbfile) # # Get the relative accs # import WI_tools accs=WI_tools.relative_accessibility(pdbfile) # # Open the wt pKa calc # import pKaTool.pKaIO as pKaIO X=pKaIO.pKaIO(pdbfile) pkavals=X.readpka() matrix=X.read_matrix() for residue in pkavals.keys(): target=prefix+residue if not tot_target.has_key(target): tot_target[target]={} tot_target[target]['pKa']=pkavals[residue]['pKa'] elecs=[] for other_res in matrix[residue].keys(): elecs.append(matrix[residue][other_res][0]) tot_target[target]['elecs']=elecs[:] # # Insert number of aas # tot_target[target]['prot_aas']=len(Z.residues.keys()) # # Is this target in the vicinity of the active site? # tot_target[target]['act_site']=None target_res=target.split('pdb')[1] target_res=':'+target_res.split(':')[1] try: target_atoms=Z.residues[target_res] except: print target_res print Z.residues.keys() stop if act_site.has_key(prefix): for act_res in act_site[prefix]: r_act_res=':'+act_res.split(':')[1] for atom2 in Z.residues[r_act_res]: for target_atom in target_atoms: #print 'Comparing',target_atom,atom2 if Z.distance(target_atom,atom2)<5.0: tot_target[target]['act_site']='Yes' # # Insert rel. acc # if residue[-6:]==':CTERM': residue=residue[:-6] if residue[-6:]==':NTERM': residue=residue[:-6] #print accs[residue]['sum'] tot_target[target]['relacc']=accs[residue]['sum']['rel'] #print residue,accs[residue] print print ' All done' # # How many solutions in total? # print 'I added %5d solutions to the matrix' %added # # For each target, what's the maximum dpKa? # targets=big_dict.keys() targets.sort() max_dpkas=[] all=[] actsite_dpkas=[] all_actsite_dpkas=[] file_dpkas={} for target in targets: tmp_dpkas=[] for solution,dpka in big_dict[target]: tmp_dpkas.append(abs(dpka)) if not file_dpkas.has_key(target[:4]): file_dpkas[target[:4]]={} file_dpkas[target[:4]]['dpkas']=[] file_dpkas[target[:4]]['num_target']=0 file_dpkas[target[:4]]['max_dpka']=0.0 # # Add the new dpKa # file_dpkas[target[:4]]['dpkas'].append(abs(dpka)) avg,var,sdev=average(tmp_dpkas) print 'Average pKa shift for %25s is %5.2f (%5.2f)' %(target,avg,sdev) tmp_dpkas.sort() max_dpka=tmp_dpkas[-1] max_dpkas.append(max_dpka) all=all+tmp_dpkas # # Store the average and max dpka for each target # tot_target[target]['avg_dpka']=avg tot_target[target]['max_dpka']=max_dpka # # Set the aa size # file_dpkas[target[:4]]['prot_aas']=tot_target[target]['prot_aas'] # # Increment the number of targets designed for this protein # file_dpkas[target[:4]]['num_target']=file_dpkas[target[:4]]['num_target']+1 # # Is is an active site target? # if tot_target[target]['act_site']: actsite_dpkas.append(max_dpka) all_actsite_dpkas=all_actsite_dpkas+tmp_dpkas # # Write the PDB files # for file in files: tf_max=[] if file[-5:]=='.lock': continue prefix=get_prefix(file) # # Writing Yasara script # print 'Writing Yasara script for %s' %prefix # # Read the PDB file # fd=open('yasara.mcr','w') pdbfile=os.path.join(basedir,prefix[:4],prefix) fd.write('LoadPDB %s\n' %pdbfile) fd.write('ColorAll 606060\n') fd.write('Style Stick\n') fd.write('HUD Off\n') fd.write('HideRes Hoh\n') import Protool Z=Protool.structureIO() Z.readpdb(pdbfile) # # Zero all B-factors # #for residue in Z.residues.keys(): # for atom in Z.residues[residue]: # Z.atoms[atom]['B-factor']=0.0 # # Loop over all targets and set the colour # colors={1.0:'Blue', 2.0:'Cyan', 3.0:'Green', 4.0:'Yellow', 5.0:'Red'} for target in tot_target.keys(): # # Collect stats on the max abs(dpka) # pos=target.find(prefix) if pos!=-1: if tot_target[target].has_key('max_dpka'): tf_max.append(abs(tot_target[target]['max_dpka'])) # # Write the PDB file # if pos!=-1: resnum=target[pos+len(prefix):] resnum=':'+resnum.split(':')[1] if Z.residues.has_key(resnum): if tot_target[target].has_key('max_dpka'): col_cutoff=colors.keys() col_cutoff.sort() co=0.5 for col in col_cutoff: co=col if tot_target[target]['max_dpka']<col: break colour=colors[co] fd.write('ColorRes %d,%s\n' %(int(resnum[1:]),colour)) else: fd.write('ColorRes %d,%s\n' %(int(resnum[1:]),'aaaaaa')) else: raise 'Residue not found',target #Z.writepdb('BF_dpKa') print 'Number of max_pkas in %s is %d' %(prefix,len(tf_max)) avg,var,sdev=average(tf_max) print '%s, average max dpKa %5.1f, sdev: %5.1f' %(prefix,avg,sdev) #fd.write('exit\n') fd.close() # # Print all the stats # print print 'Number of targets designed is : %4d ' %(len(targets)) all_targets=len(tot_target.keys()) print 'Number of targets in total: : %4d ' %all_targets print '%% designed : %5.2f' %(float(len(targets))/float(all_targets)*100.0) print print 'Number of active site targets : %5.2f' %(len(actsite_dpkas)) # # Get average Delta pKas # avg,var,sdev=average(all) print 'Average dpKa for all targets is : %5.2f (%5.2f)' %(avg,sdev) avg,var,sdev=average(max_dpkas) print 'Average MAX dpKa for all targets is :%5.2f (%5.2f)' %(avg,sdev) # Max dpka for active sites avg,var,sdev=average(actsite_dpkas) print 'Average MAX dpKa for active site targets: %5.2f (%5.2f)' %(avg,sdev) # avg,var,sdev=average(all_actsite_dpkas) print 'Average dpKa for actsit target :%5.2f (%5.2f)' %(avg,sdev) print print 'Average dpKa per protein' prots=file_dpkas.keys() prots.sort() for prot in prots: avg,var,sdev=average(file_dpkas[prot]['dpkas']) num_target=file_dpkas[prot]['num_target'] aa_size=file_dpkas[prot]['prot_aas'] num_sol=len(file_dpkas[prot]['dpkas']) print 'Average dpKa for %s is : %5.2f (%5.2f) [#targets %4d, #aas %4d, #sols/target %5.2f]' %(prot,avg,sdev,num_target,aa_size,float(num_sol)/float(num_target)) # # Stats on the types of targets designed # designed={} import pKarun Y=pKarun.pKanalyse() for target in big_dict.keys(): rtype=Y.get_residue_type(target) if not designed.has_key(rtype): designed[rtype]=0 designed[rtype]=designed[rtype]+1 des=designed.keys() # # Look at the targets not designed # not_designed={} all_targets=tot_target.keys() all_targets.sort() import pKarun Y=pKarun.pKanalyse() for target in all_targets: if not big_dict.has_key(target): rtype=Y.get_residue_type(target) if not not_designed.has_key(rtype): not_designed[rtype]=0 not_designed[rtype]=not_designed[rtype]+1 # # Stats # print print 'Stats on types of groups designed' types=['ASP','GLU','TYR','CYS','CTERM','NTERM','LYS','ARG','HIS'] types.sort() for rtyp in types: if designed.has_key(rtyp): des=designed[rtyp] else: des=0 if not_designed.has_key(rtyp): ndes=not_designed[rtyp] else: ndes=0 tot=ndes+des if tot>0: avg='%5.2f' %(float(des)/float(tot)*100.0) else: avg='NA' print '%8s des: %3d notD: %3d, tot: %3d %% designed: %s' %(rtyp,des,ndes,tot,avg) # # Relation between average dpKa obtained and accessibility, type and electrostatic interactions. # print # # Plot of avg dpKa vs. sum of abs electrostatic interactions # avg_dpka=[] max_dpka=[] sum_elec=[] acc=[] for target in all_targets: dpkas=[] if big_dict.has_key(target): for mutants,dpka in big_dict[target]: dpkas.append(abs(dpka)) e_sum=[] for elec in tot_target[target]['elecs']: e_sum.append(elec) # max_dpka.append(max(dpkas)) # avg,var,sdev=average(dpkas) avg_dpka.append(avg) # avg,var,sdev=average(e_sum) sum_elec.append(get_sum(e_sum)) # # Accessibility # acc.append(tot_target[target]['relacc']) else: #print 'No design for',target pass import dislin_driver file=dislin_driver.graf_mult2(acc,[avg_dpka,max_dpka], title='Effect of solvent exposure', x_legend='Relative accessibility of target', y_legend='abs(dpKa)', legends=['Avg. dpKa','Max. dpKa']) #os.system('eog %s' %file) # # Any difference for active site targets? # # # Plot it # nummuts={} nums=raw_data.keys() nums.sort() for num in nums: for co in raw_data[num].keys(): max_val=-1.0 sum=0.0 count=0 for dpka in raw_data[num][co]: if abs(dpka)>max_val: max_val=abs(dpka) if dpka>0.01: sum=sum+abs(dpka) count=count+1 # # Sort as function of number of mutations for other stat # if not nummuts.has_key(num): nummuts[num]=[] nummuts[num].append(abs(dpka)) if count==0: raw_data[num][co]=0 else: raw_data[num][co]=float(sum)/float(count) #raw_data[num][co]=max_val import dislin_driver #dislin_driver.colour_2D(raw_data,'','','# of mutations','distance from target (A)','abs(dpKa)','dpka.tif') import os #os.system('gimp dpka.tif') # # Get dpKa as a function of # of mutants # #nums=nummuts.keys() #nums.sort() #x=[] #y=[] #for num in nums: # for dpka in nummuts[num]: # x.append(num) # y.append(dpka) #file=dislin_driver.graf_mult2(x,[y], # title='dpKa, number of mutations', # x_legend='Number of mutations', # y_legend='abs(dpKa)') #os.system('gimp %s' %file) # # Save bigdict # fd=open('/home/nielsen/pKa-design/done_distnummuts/bigdict','w') import pickle pickle.dump(big_dict,fd) fd.close()
def Model_Mutations(pdbfile, mol2files, mutations, max_overlap=0.5, max_totalbump=1.0, return_score=False, store_mutation_operations=False): """Model a number of mutations in a pdbfile when one or more ligands are present""" # # Check for stupidity # if max_overlap > max_totalbump: max_totalbump = max_overlap print 'Adjusted total bump cutoff to %5.2f' % max_totalbump # # Read PDB file # import Protool P = Protool.structureIO() P.readpdb(pdbfile) P.remove_all_hydrogens() # # Read mol2 file # L = Protool.ligand(P) for mol2file in mol2files: print "Added %s with tag 'LIGAND'" % mol2file L.readmol2(mol2file, tag='LIGAND') # # Get the pdb lines # pdblines = P.writepdb('junk.pdb', nowrite=True) # # Pass the lines to FFF # import FFF.FFFcontrol myFFF = FFF.FFFcontrol.FFF() myFFF.parse_lines(pdblines) #myFFF.soup_stat() Model = FFF.FFFcontrol.model_class(myFFF, Rotamerlib, FFFaadef_dir) # # Store the wild type PDB file # if store_mutation_operations: wt_lines = myFFF.make_pdblines('PDB') # import pKa.pKD_tools as pKD_tools total_bump = 0.0 for mutation in mutations: resid = pKD_tools.get_resid_from_mut(mutation) chainid = resid.split(':')[0] resid = resid.split(':')[1] # # Get rid of the leading zeros # done = False while not done: if resid[0] == '0' and len(resid) > 1: resid = resid[1:] else: done = True # newres = pKD_tools.get_newrestyp_from_mut(mutation) oldres = pKD_tools.get_oldrestyp_from_mut(mutation) opttype = 3 # Rotamer library energies = Model.Mutate(chainid, resid, newres, opttype, max_overlap) bump_score = energies[0] Sum = energies[1] Coulomb = energies[2] # total_bump = total_bump + bump_score print 'Bump score: %5.2f, total bump: %5.2f' % (bump_score, total_bump) if bump_score > max_overlap or total_bump > max_totalbump: print 'Cannot model this set of mutations - too many bumps' if return_score: return False, total_bump else: return False print 'Bump score for %s: %5.3f' % (mutation, bump_score) print 'Total bump score for all mutations: %5.3f' % (total_bump) class FFF_fix: def __init__(self, FFF): self.PI = FFF return # # Create the instance # FFF_instance = FFF_fix(myFFF) # # Keep track of the changes that were made to the PDB file # if store_mutation_operations: mut_lines = FFF_instance.PI.make_pdblines('PDB') import Protool WT = Protool.structureIO() WT.parsepdb(wt_lines) wt_atoms = sorted(WT.atoms.keys()) # MUT = Protool.structureIO() MUT.parsepdb(mut_lines) # mut_atoms = sorted(MUT.atoms.keys()) wt_count = 0 mutcount = 0 def coord_diff(atom1, atom2): diff = 0.0 for coord in ['X', 'Y', 'Z']: diff = diff + abs(atom1[coord] - atom2[coord]) return diff operations = [] for atom in wt_atoms: if not atom in mut_atoms: operations.append(['delete', atom, WT.atoms[atom]]) elif coord_diff(WT.atoms[atom], MUT.atoms[atom]) > 0.1: operations.append(['delete', atom, WT.atoms[atom]]) operations.append(['add', atom, MUT.atoms[atom]]) else: pass for atom in mut_atoms: if not atom in wt_atoms: operations.append(['add', atom, MUT.atoms[atom]]) # # Store these in FFF_fix # FFF_instance.mutate_operations = operations[:] # # Return the info # if (return_score): return FFF_instance, total_bump return FFF_instance
def test_function(options): """Model a mutation, or test the function""" if not options.test_modelling: import os mutations = options.mutations pdbfile = options.pdbfile if not os.path.isfile(pdbfile): raise Exception('PDB file not found: %s' % pdbfile) MUT, bs = Model_Mutations( pdbfile, [], mutations, return_score=True, max_overlap=options.bump, max_totalbump=options.totalbump, store_mutation_operations=options.store_operations) if MUT: MUT.PI.writepdb(options.outfile) else: print 'Cannot model mutations' return else: # # Test the modelling of PDB files by FFF and compare it to Protool # import Protool X = Protool.structureIO() X.readpdb(options.pdbfile) res = X.residues.keys() res.sort() aas = X.trueaminoacids.keys() x = [] y = [] import random bettermodel = [] while len(x) < 2000: resi = random.choice(res) aa = random.choice(aas) mutation = '%s:%s:%s' % (resi, X.resname(resi), aa) print 'Calculating for %s' % mutation oldscore = None newscore = None for function in ['Model_Mutations', 'Model_Mutations_old']: import os resultfile = os.path.join(os.getcwd(), 'scores/' + mutation + str(function)) if not os.path.isfile(resultfile): M, score = eval(function)(options.pdbfile, [], [mutation], return_score=True) fd = open(resultfile, 'w') import pickle A = score pickle.dump(A, fd) fd.close() else: import pickle fd = open(resultfile) score = pickle.load(fd) fd.close() # # Assign scores to the right variables # if resultfile[-4:] == '_old': oldscore = score else: newscore = score if oldscore > 10: bettermodel.append(newscore) continue x.append(oldscore) y.append(newscore) print 'In %d cases FFF was able to construct a model where Protool was not' % len( bettermodel) print bettermodel import pylab pylab.scatter(x, y) pylab.show() return
def main(options, args): """Extract mutations from a multiple sequence alignment""" import PEATDB.sequence_alignment as SA if not options.fasta: alignment = SA.read_clustal_aln_file(args[0]) else: alignment = SA.read_fasta_aln_file(args[0]) print sorted(alignment.keys()) HEWL_seq = alignment[options.wt] fd = open('mutations.txt', 'w') fd2 = open('frequencies.csv', 'w') aas = convert.keys() aas.sort() import string fd2.write('WT Residue number, %s\n' % (string.join(aas, ','))) # real_pos = 0 lines = [] PDB_mutations = {} # for position in range(0, len(HEWL_seq)): res_observed = {} if HEWL_seq[position] == '-': continue real_pos = real_pos + 1 #print 'Now looking at position',real_pos for seq in alignment.keys(): res_pos = alignment[seq][position] if not res_observed.has_key(res_pos): res_observed[res_pos] = 0 res_observed[res_pos] = res_observed[res_pos] + 1 # # Calculate frequencies of observation # total = sum(res_observed.values()) text = '%3d' % real_pos #print res_observed.keys() for aa in aas: if res_observed.has_key(aa): text = text + ',%5.1f' % (float(res_observed[aa]) / total * 100.0) else: text = text + ',%5.1f' % (0) fd2.write(text + '\n') # # ----- # lines += ['%3d %d\n' % (real_pos, len(res_observed.keys()))] for mut in res_observed.keys(): if mut == '-': continue if mut == HEWL_seq[position]: continue # org_res = HEWL_seq[position] new_res = mut import string if org_res == 'X' or new_res == 'X': continue # # Within the PDB file? # if real_pos < options.start_aa or real_pos > options.end_aa: pass else: PDB_residue = '%s:%s' % (options.CID, string.zfill( real_pos + options.offset - options.noffset, 4)) if not PDB_mutations.has_key(PDB_residue): PDB_mutations[PDB_residue] = [] PDB_mutations[PDB_residue].append(convert[new_res]) muttext = '%s:%s:%s:%s' % ( options.CID, string.zfill(real_pos + options.offset - options.noffset, 4), convert[org_res], convert[new_res]) fd.write('%s,%s\n' % (muttext, muttext)) #print muttext fd.close() fd2.close() # # Read PDB file? # if options.pdbfile: import Protool PI = Protool.structureIO() PI.readpdb(options.pdbfile) # # Plot the figure? # if options.plotfigure: xs = [] ys = [] zs = [] for residue in sorted(PDB_mutations.keys()): resnum = int(residue.split(':')[1]) xs.append(resnum) ys.append(len(PDB_mutations[residue])) zs.append(PI.dist(options.atom, residue + ':CA')) import pylab pylab.plot(zs, ys, 'ro') pylab.xlabel('Distance from %s' % (options.atom)) pylab.ylabel('Number of mutations') pylab.show() return
def __init__(self, pdbfile): """Load the PDB file""" import Protool self.PI = Protool.structureIO() self.PI.readpdb(pdbfile) return
def main(): # # Parse the arguments # import sys, string defaults = get_defaults() # args = string.join(sys.argv[1:]) args = string.split(args, '-') for arg in args: split = string.split(string.strip(arg)) if split == []: continue parm_name = split[0] if not defaults.has_key(parm_name): raise 'Unknown parameter: ', parm_name # # Deal with T/F # if len(split) == 1: if defaults[parm_name][1] == 'T/F': if defaults[parm_name][0]: defaults[parm_name][0] = None else: defaults[parm_name][0] = 1 # # Deal with all the other cases # elif len(split) == 2: if defaults[parm_name][1] == 'number': defaults[parm_name][0] = string.atof(split[1]) else: defaults[parm_name][0] = split[1] else: raise 'Incorrect usage' # # Reformat # params = {} for key in defaults.keys(): params[key] = defaults[key][0] # # Load the file and design the primer # if not params['seq']: usage() seq_file = params['seq'] new_AA = params['mutation'][-1] AA_number = int(params['mutation'][1:-1]) S = DNA_sequence.sequence() DNA_seq = S.readpir(seq_file) Tm_desired = params['Tm'] find_restriction_site = 1 if params['no_restriction_site']: find_restriction_site = None # # Do it! # import Protool X = Protool.structureIO() new_AA = X.threetoone[new_AA] # # Call the function for designing primers # new_enzymes, primers_results_dict, enzymes_that_already_cut, primer_starting_position, comb_on_Tm = exhaustive_research( DNA_seq, AA_number, new_AA, Tm_desired, find_restriction_site, enzyme_list=None) megaprimer_dict = megaprimer(DNA_seq, Tm_desired=65) return
def main(options, args): """Start the calculations""" import os, shutil top = os.getcwd() # # Read the filelist # if args[0] != 'all': fd = open(args[0]) files = fd.readlines() fd.close() else: files = os.listdir(top) # # ------------------------------------- # # Loop over all files and do the task # for filename in sorted(files): # # clean filename # if filename[0] == '#': continue if filename[-4:] != '.pdb': continue import string filename = string.strip(filename) filename = filename.split()[0] if filename[0] == '#': continue # # If we have a file then create a dir # if options.filestructure == 'files': if filename[-4:] == '.pdb': dirname = filename[:-4] else: dirname = filename + '_dir' if not os.path.isdir(dirname): os.mkdir(dirname) # # copy the pdb file to the dir # shutil.copy(filename, dirname) else: dirname = filename # # Make the dirname absolute # dirname = os.path.join(top, dirname) # # Change dir to the calc dir # os.chdir(dirname) # # Delete TOPOLOGY.H and DELRAD.DAT + DELCRG.DAT if they exist # import shutil copyfiles = ['DELRAD.DAT', 'DELCRG.DAT', 'TOPOLOGY.H'] for copyfile in copyfiles: if os.path.lexists(os.path.join(dirname, copyfile)): os.unlink(os.path.join(dirname, copyfile)) # # Find the pdb file # pdbfile = False searchnames = [filename, filename + '.pdb'] for sname in searchnames: rname = os.path.join(dirname, sname) if os.path.isfile(rname): pdbfile = rname break if not pdbfile: raise Exception('Could not find PDB file in %s' % os.getcwd()) # # EM + MD? # if options.EM: #corall(pdbfile) class options: def __init__(otherself): otherself.type = 'pKa' otherself.pdbfile = pdbfile otherself.clean = False return Goptions = options() # import GromacsEM pdblines = GromacsEM.EMone(Goptions) import Protool PI = Protool.structureIO() PI.parsepdb(pdblines) PI.Remove_All_NonAminoAcids() # Make sure all waters are removed pdbfile = pdbfile + 'test' PI.writepdb(pdbfile) corall(pdbfile) # Do final corall stop # # Should we do a corall? # if options.corall: corall(pdbfile) # # Copy the DEL* files and TOPOLOGY.H # for copyfile in copyfiles: shutil.copy(os.path.join(top, copyfile), os.path.join(dirname, copyfile)) # # Instantiate pKarun # PM = pKarun_base.pKamisc() params = PM.parse_parameterlist(['-dbcrit 1000'], skip2first=False) print params X = pKarun_base.pKarun(os.getcwd(), pdbfile, params) # # Carry out the tasks # if options.tasks == 'titration': print 'Runing solvepka in ', os.getcwd() X.solvepka() elif options.tasks == 'desolv': X.desolv() elif options.tasks == 'backgr': X.backgr() elif options.tasks == 'matrix': X.matrix() elif options.tasks == 'all': print 'Running all' print options.tasks X.all() # # Change dir back # os.chdir(top) print 'Back in ', os.getcwd() import sys sys.stdout.flush()
def pdb2pka_sugelm(self): """Explore all possible mutations and calculate a phimap for each using pdb2pka (APBS)""" import Protool P = Protool.structureIO() P.readpdb(self.pdbfile) P.RemoveALT() #import Protool.mutate #MUT=Protool.mutate.Mutate(P) # # Construct arrays # import pKD_dict self.data = pKD_dict.pKD_dict() self.atom_data = pKD_dict.pKD_dict() # # Create dir for mutant PDB files # import os mutdir = os.path.join(self.topdir, self.pdbfile + '.pdbs') if not os.path.isdir(mutdir): os.mkdir(mutdir) # # Loop over all residues # residues = P.residues.keys() residues.sort() for residue in residues: orgres = P.resname(residue) print 'Calculating for %s %s' % (residue, P.resname(residue)) # # If neutral mutate to Asp, Glu, Lys, Arg, His # targets = [] for res in ['ARG', 'LYS', 'HIS', 'ASP', 'GLU']: if P.resname(residue) != res: targets.append(res) #if orgres=='GLU': # targets.append('GLN') #elif orgres=='ASP': # targets.append('ASN') #elif orgres=='HIS': # targets.append('PHE') #elif orgres=='ARG' or P.resname(residue)=='LYS': # targets.append('MET') # # Target identified. Now model each # for target in targets: import pKD_tools resid = pKD_tools.get_resid_from_res(residue) orgres = P.resname(residue) filename = os.path.join( mutdir, '%s:%s:%s.pdb' % (residue, orgres, target)) mutation = '%s:%s:%s' % (residue, orgres, target) if not os.path.isfile(filename): import Design_pKa_help Design_pKa_help.make_mutation(self.pdbfile, mutation) NP = Protool.structureIO() NP.readpdb(filename) NP.writepdb(filename, TER=None) # # Calculate the interaction energies # protein, routines, forcefield, apbs_setup, lig_titgrps = pdb2pka.pre_init( pdbfilename=filename, ff='parse', ligand=None, verbose=1) mypkaRoutines = pdb2pka.pKaRoutines(protein, routines, forcefield, apbs_setup) # # Find our group # sp = residue.split(':') chainid = sp[0] resnum = int(sp[1]) mypkaRoutines.findTitratableGroups() this_pKa = None for pKa in mypkaRoutines.pKas: print pKa.residue.resSeq, resnum print pKa.residue.chainID, chainid print pKa.residue.name, target print pKa.pKaGroup.name, target print '--------------' print 'ChainID', pKa.residue.chainID if pKa.residue.resSeq == resnum and pKa.residue.chainID == chainid and pKa.residue.name == target and pKa.pKaGroup.name == target: #print 'Found group',pKa.residue.resSeq,pKa.pKaGroup.name this_pKa = pKa break if not this_pKa: raise Exception, 'Could not find inserted titratable group' mypkaRoutines.get_interaction_energies_setup(this_pKa, mode='pKD') matrix = mypkaRoutines.matrix # # Dig the interaction energies out of the pdb2pka array # for titration1 in matrix[this_pKa].keys(): for state1 in matrix[this_pKa][titration1].keys(): grp_sub = matrix[this_pKa][titration1][state1] if mypkaRoutines.is_charged(this_pKa, titration1, state1): for pKa2 in grp_sub.keys(): import string chainID2 = pKa.residue.chainID resid2 = '%s:%s' % ( chainID2, string.zfill(pKa2.residue.resSeq, 4)) for titration2 in grp_sub[pKa2].keys(): for state2 in grp_sub[pKa2][ titration2].keys(): if mypkaRoutines.is_charged( pKa2, titration2, state2): # # Both states are charged, so now we can pull the # interaction energies out # if not self.data.has_key(mutation): self.data[mutation] = {} self.data[mutation][ resid2] = grp_sub[pKa2][ titration2][state2] # # Get the potentials at all atoms too # all_pots = mypkaRoutines.all_potentials[ this_pKa][titration1][state1] sub_all_pots = all_pots[pKa2][ titration2][state2] for atom in sub_all_pots.keys(): resid = mutation import pKD_tools resid2 = pKD_tools.get_resid_from_res( atom) atomname = atom.split(':')[ -1] #atom.name if atomname[ 0] == 'H' or atomname in [ 'N', 'C', 'O' ]: continue # Skip all H atoms and all non-CA backbone atoms to save memory if not self.atom_data.has_key( resid): self.atom_data[resid] = {} if not self.atom_data[ resid].has_key(resid2): self.atom_data[resid][ resid2] = {} self.atom_data[resid][resid2][ atomname] = abs( sub_all_pots[atom]) return self.data, self.atom_data
if options.partner1 is None: print 'Stability results for the first partner must be provided' run = False if options.partner2 is None: print 'Stability results for the second partner must be provided' run = False if not run: sys.exit(1) complex = Core.Matrix.matrixFromCSVFile(options.complex) partner1 = Core.Matrix.matrixFromCSVFile(options.partner1) partner2 = Core.Matrix.matrixFromCSVFile(options.partner2) pdb = Protool.structureIO() pdb.readpdb(options.protein) combined = zip(complex.mutations, complex.total) print 'Data found for %d mutants of the complex' % len(combined) interactionEnergies = [] for element in combined: set = Core.Data.MutationSet(code=element[0]) code = "+".join(set.reducedMutationCodes(pdb)) values = [] if MutationsInPartner(code, partner1): data = partner1.dataForMutation(code) values.append(data[-1])
def cubescanplot(options): """Read a cubescan output file and show the results""" fd = open(options.cubescanfile) import pickle data = pickle.load(fd) fd.close() # cubegrid = data[0] scanresults = data[1] # # Instantiate scoring class # SC = score_class(options) # # Score the ghosts from each cubescan # scores = [] for cube in sorted(scanresults.keys()): calc_ghosts = scanresults[cube] xs, ys, experrors, satisfied = SC.score_ghosts(calc_ghosts) rmsd = RMSD(xs, ys, experrors) scores.append([rmsd, cube]) #import pylab #pylab.errorbar(xs,ys,xerr=experrors,fmt='ro') #pylab.plot(xs,xs,'g-') #pylab.xlabel('Experimental dCS') #pylab.ylabel('Calculated dCS') #pylab.title('Cubescan of cube %4d, atom: %s, RMSD: %5.3f' %(cube,options.atom,rmsd)) #pylab.savefig('Cubescan_%d.png' %(cube)) #pylab.clf() rmsds = [] scores.sort() import Protool P = Protool.structureIO() P.readpdb('2LZT_H.pdb') count = 0 for rmsd, cube in scores[:25]: print '%4d, rmsd: %5.2f' % (cube, rmsd) center = cubegrid[cube]['coord'] P.add_atom('X:%4d:CS' % (count + 1000), atomnumber=0, atomname='CS', chainid='X', residuename='CSS', residuenumber='999', xcoord=center[0], ycoord=center[1], zcoord=center[2], update=1, BFACTOR=None, OCCUPANCY=None, CHARGE=None, RADIUS=None, tag=None, accept_duplicate=False) count = count + 1 rmsds.append(rmsd) P.writepdb('cubescan.pdb') import pylab pylab.hist(rmsds) pylab.savefig('Cubescanhist.png') return
def __init__(self,pdbfile): """Load the PDB file""" import Protool self.PI=Protool.structureIO() self.PI.readpdb(pdbfile) return
def main(): """Load the PDB file, make all Ala mutations and calculate dpKa for the target residues""" try: import sys pdbfile = sys.argv[1] target_residues = sys.argv[2] except: print print 'Usage: pKa_alascan.py <pdbfile> <target residues>' print 'Example: pKa_alascan.py 2lzt.pdb :0035:GLU,:0052:ASP' print 'This command will perform a full Alanine scan and report the effect of each mutation on the pKa values of Glu 35 and Asp 52' print 'If ALL is supplied instead of a list of target residues, then dpKas will be calculated for all residues' print raise Exception, 'Incorrect usage' # # Start the work # import Protool P = Protool.structureIO() P.readpdb(pdbfile) residues = P.residues.keys() residues.sort() # # All titgroups? # if target_residues == 'ALL': titgroups = P.get_titratable_groups() import string target_residues = string.join(titgroups, ',') # # Start looping # results = {} import pickle, os for residue in residues: # # Define result filename # resultfile = os.path.join(os.getcwd(), 'alascan_%s.result' % residue) if os.path.isfile(resultfile): fd = open(resultfile) results[residue] = pickle.load(fd) fd.close() else: if P.resname(residue) == 'ALA' or P.resname( residue) == 'GLY' or not P.isaa(residue): print 'Skipping', residue, P.resname(residue) continue print 'Calculating for residue', residue, P.resname(residue) recalc_intpka = 1 defaults = local_defaults(pdbfile, target_residues, recalc_intpka) # # Set the mutations # defaults['mutations'][0] = '%s:%s:%s' % (residue, P.resname(residue), 'ALA') import pKa.Design_pKa as Design_pKa # # Calculate the dpKas # solutions, pKd_dict = Design_pKa.run_opt(defaults) results[residue] = solutions.copy() # # Save this result # fd = open(resultfile, 'w') pickle.dump(results[residue], fd) fd.close() # # Save all # name = '%s.alascan.pickle' % pdbfile fd = open(name, 'w') import pickle pickle.dump(results, fd) fd.close()