def Model_Mutations_old(pdbfile, mol2files, mutations, max_overlap=0.5, return_score=False): """Model a number of mutations in a pdbfile when one or more ligands are present""" # # Initialise mutate routines # MUT = Mutate(max_bump=max_overlap) # # Read PDB file # import Protool P = Protool.structureIO() P.readpdb(pdbfile) P.remove_all_hydrogens() # # Read mol2 file # L = Protool.ligand(P) for mol2file in mol2files: print "Added %s with tag 'LIGAND'" % mol2file L.readmol2(mol2file, tag='LIGAND') # # Pass combined pdb file to mutate routines and mutate # MUT.new_PDB(P) import pKa.pKD_tools as pKD_tools total_bump = 0.0 # # Model # for mutation in mutations: # # Get info # resid = pKD_tools.get_resid_from_mut(mutation) newres = pKD_tools.get_newrestyp_from_mut(mutation) oldres = pKD_tools.get_oldrestyp_from_mut(mutation) bump_score = MUT.mutate(resid, newres, orgtype=oldres) if bump_score is None or bump_score is False or bump_score > max_overlap: print 'Cannot model this set of mutations - too many bumps' return False, 20.0 print 'Bump score for %s: %5.3f' % (mutation, bump_score) total_bump = total_bump + bump_score print 'Total bump score for all mutations: %5.3f' % (bump_score) if return_score: return MUT, bump_score return MUT
def Model_Mutations_old(pdbfile,mol2files,mutations,max_overlap=0.5,return_score=False): """Model a number of mutations in a pdbfile when one or more ligands are present""" # # Initialise mutate routines # MUT=Mutate(max_bump=max_overlap) # # Read PDB file # import Protool P=Protool.structureIO() P.readpdb(pdbfile) P.remove_all_hydrogens() # # Read mol2 file # L=Protool.ligand(P) for mol2file in mol2files: print "Added %s with tag 'LIGAND'" %mol2file L.readmol2(mol2file,tag='LIGAND') # # Pass combined pdb file to mutate routines and mutate # MUT.new_PDB(P) import pKa.pKD_tools as pKD_tools total_bump=0.0 # # Model # for mutation in mutations: # # Get info # resid=pKD_tools.get_resid_from_mut(mutation) newres=pKD_tools.get_newrestyp_from_mut(mutation) oldres=pKD_tools.get_oldrestyp_from_mut(mutation) bump_score=MUT.mutate(resid,newres,orgtype=oldres) if bump_score is None or bump_score is False or bump_score>max_overlap: print 'Cannot model this set of mutations - too many bumps' return False,20.0 print 'Bump score for %s: %5.3f' %(mutation,bump_score) total_bump=total_bump+bump_score print 'Total bump score for all mutations: %5.3f' %(bump_score) if return_score: return MUT,bump_score return MUT
def Model_Mutations(pdbfile, mol2files, mutations, max_overlap=0.5, max_totalbump=1.0, return_score=False, store_mutation_operations=False): """Model a number of mutations in a pdbfile when one or more ligands are present""" # # Check for stupidity # if max_overlap > max_totalbump: max_totalbump = max_overlap print 'Adjusted total bump cutoff to %5.2f' % max_totalbump # # Read PDB file # import Protool P = Protool.structureIO() P.readpdb(pdbfile) P.remove_all_hydrogens() # # Read mol2 file # L = Protool.ligand(P) for mol2file in mol2files: print "Added %s with tag 'LIGAND'" % mol2file L.readmol2(mol2file, tag='LIGAND') # # Get the pdb lines # pdblines = P.writepdb('junk.pdb', nowrite=True) # # Pass the lines to FFF # import FFF.FFFcontrol myFFF = FFF.FFFcontrol.FFF() myFFF.parse_lines(pdblines) #myFFF.soup_stat() Model = FFF.FFFcontrol.model_class(myFFF, Rotamerlib, FFFaadef_dir) # # Store the wild type PDB file # if store_mutation_operations: wt_lines = myFFF.make_pdblines('PDB') # import pKa.pKD_tools as pKD_tools total_bump = 0.0 for mutation in mutations: resid = pKD_tools.get_resid_from_mut(mutation) chainid = resid.split(':')[0] resid = resid.split(':')[1] # # Get rid of the leading zeros # done = False while not done: if resid[0] == '0' and len(resid) > 1: resid = resid[1:] else: done = True # newres = pKD_tools.get_newrestyp_from_mut(mutation) oldres = pKD_tools.get_oldrestyp_from_mut(mutation) opttype = 3 # Rotamer library energies = Model.Mutate(chainid, resid, newres, opttype, max_overlap) bump_score = energies[0] Sum = energies[1] Coulomb = energies[2] # total_bump = total_bump + bump_score print 'Bump score: %5.2f, total bump: %5.2f' % (bump_score, total_bump) if bump_score > max_overlap or total_bump > max_totalbump: print 'Cannot model this set of mutations - too many bumps' if return_score: return False, total_bump else: return False print 'Bump score for %s: %5.3f' % (mutation, bump_score) print 'Total bump score for all mutations: %5.3f' % (total_bump) class FFF_fix: def __init__(self, FFF): self.PI = FFF return # # Create the instance # FFF_instance = FFF_fix(myFFF) # # Keep track of the changes that were made to the PDB file # if store_mutation_operations: mut_lines = FFF_instance.PI.make_pdblines('PDB') import Protool WT = Protool.structureIO() WT.parsepdb(wt_lines) wt_atoms = sorted(WT.atoms.keys()) # MUT = Protool.structureIO() MUT.parsepdb(mut_lines) # mut_atoms = sorted(MUT.atoms.keys()) wt_count = 0 mutcount = 0 def coord_diff(atom1, atom2): diff = 0.0 for coord in ['X', 'Y', 'Z']: diff = diff + abs(atom1[coord] - atom2[coord]) return diff operations = [] for atom in wt_atoms: if not atom in mut_atoms: operations.append(['delete', atom, WT.atoms[atom]]) elif coord_diff(WT.atoms[atom], MUT.atoms[atom]) > 0.1: operations.append(['delete', atom, WT.atoms[atom]]) operations.append(['add', atom, MUT.atoms[atom]]) else: pass for atom in mut_atoms: if not atom in wt_atoms: operations.append(['add', atom, MUT.atoms[atom]]) # # Store these in FFF_fix # FFF_instance.mutate_operations = operations[:] # # Return the info # if (return_score): return FFF_instance, total_bump return FFF_instance
def checkModels(self, DB=None, callback=None, selected=None, usemutationcodes=False): """Check that we have modelled a structure for everything we can""" if DB == None: return proteins = DB.getRecs() refprot = DB.meta.refprotein refseq = DB[refprot].aaseq refaa = self.AAList2String(refseq) refpdb = DB[refprot].Structure refpdbfile = os.path.join(os.getcwd(), 'ref.pdb') self.writePDB(refpdb, refpdbfile) failed = [] # Check that Protool is loaded if not self.MUT: self.initProtool() #Create protool oinstance for ref pdb import Protool Xref = Protool.structureIO() Xref.parsepdb(refpdb) # Find all potential parents records_with_structure=[] for protein in proteins: rec = DB.get(protein) if rec.hasStructure() == 'available': records_with_structure.append(protein) # Loop over selected or all if selected == None: selected = list(set(proteins) - set(records_with_structure)) numrecords=len(selected) count=1 for protein in selected: rec = DB.get(protein) if rec.hasStructure() == 'available': continue print 'Protein:', protein #if no sequence try create one from mutation code if rec.aaseq == None and rec.Mutations != None: #print refaa print 'no sequence, using mutation code and ref protein seq' import PEATSA.Core as Core print 'Record has mutation code %s' %rec.Mutations mutationSet = Core.Data.MutationSet(rec.Mutations) mutseq = mutationSet.applyToSequence(refaa, id='A', pdb=Xref) rec.aaseq = self.string2AAseq(mutseq) parent_with_structure = [] for parent in records_with_structure: parentrec = DB.get(parent) is_parent, operations = rec.getAncestry(parentrec) # We can only model on X-ray structures if parentrec.hasStructure() == 'available' and is_parent: parent_with_structure.append([parent, len(operations)]) # Record failure to model if parent_with_structure == []: continue # Find the best parent def compare_func(x,y): if x[1]>y[1]: return 1 elif x[1]==y[1]: return 0 if x[1]<y[1]: return -1 parent_with_structure.sort(cmp=compare_func) parent = parent_with_structure[0][0] operations = rec.getAncestry(parentrec)[1] print 'Using %s as template with %d operations.' %(parent, len(operations)) # Start the modelling pdblines = parentrec.Structure # Load the pdb file import Protool X=Protool.structureIO() X.parsepdb(pdblines) self.MUT.new_PDB(X) self.MUT.max_tolerated_bump=0.5 atom_changes=[] skip_protein=None self.MUT.new_mutation() for operation in operations: # Is this a deletion? if operation.find('delete')!=-1: print 'This is a deletion - Jens should write code for modelling this' print 'Deletion ignored for now' continue elif operation.find('insert')!=-1: print 'This is an insertion - Jens should write code for modelling insertions' print 'Insertion ignored for now' continue # This is a normal mutation # Get the residue number, old residue and new residue import pKa.pKD_tools as pKD_tools new_res = pKD_tools.get_newrestyp_from_mut(operation) old_res = pKD_tools.get_oldrestyp_from_mut(operation) resid = pKD_tools.get_resid_from_mut(operation) #print operation, resid if not X.residues.has_key(resid): print 'No structural info for mutation %8s. Not modelling this mutation\n' %operation skip_protein=True continue # Actually make the mutation bump_score=self.MUT.mutate(resid,new_res,orgtype=old_res) print 'Mutation: %s, bump_score: %s' %(resid+new_res,str(bump_score)) if bump_score is None: skip_protein=True break else: atom_changes=atom_changes+self.MUT.mutate_operations self.MUT.mutate_operations=[] # Update progress completion = float(count)/float(numrecords)*100.0 if callback != None: callback(completion) else: print '%4d of %4d, completion; %5.2f%%' %(count,float(numrecords),completion) count=count+1 # Did it work? if skip_protein: print print 'Modelling failed for %s' %protein failed.append(protein) rec.Structure = 'Bumps' rec.structuretype = 'failed model' continue # We have all sets of changes in atom_changes rec.Structure = {'Rotamer_operations': atom_changes} rec.Structure['parent'] = parent rec.structuretype = 'peat model' print 'Done' if len(failed)>0: print 'Failed to model the following proteins:' for f in failed: print f return
def makemutantSequence(self, sequence, operations): """Apply the specified mutations to a sequence and return the mutant seq Sequence must be in the [[A:0001:ALA],[A:0002:GLU]] format Operations is a list of the following types: Mutations: A:0001:ALA:ASP Deletions: delete:A:0002:GLU Insertions: insert:1:A:0003:THR:ALA, insert:2:A:0003:THR:TRP (insert THR,TRP after A:0003:THR) Operations are always performed in sequence numbering order """ if operations==[]: return sequence ops_sorted={} insertions=[] for operation in operations: s_op=operation.split(':') # Normal mutation import pKa.pKD_tools as pKD_tools resid=pKD_tools.get_resid_from_mut(operation) if ops_sorted.has_key(resid): raise Exception('More than one operation on the same residue: %s' %resid) ops_sorted[resid]=['mutate',operation] # Perform the operations new_seq=[] new_count=None new_chain=None for resid,restyp in sequence: # Make sure that the chain hasn't changed or if we are at the beginning then init if resid.split(':')[0]!=new_chain: #Initialise sp_resid=resid.split(':') new_chain=sp_resid[0] new_count=int(sp_resid[1]) newresid='%s:%s' %(new_chain,string.zfill(new_count,4)) # Does this residue have an operation? if ops_sorted.has_key(resid): op=ops_sorted[resid] if op[0]=='delete': # Deletion if op[1]==restyp: pass # This deletes the residue else: raise Exception('Incorrect org residue in deletion: %s' %op) elif op[0]=='insert': # Insertion inserts=op[1].keys() inserts.sort() for i in inserts: if i[0]==restyp: new_seq.append([newresid,i[1]]) new_count=new_count+1 newresid='%s:%s' %(new_chain,string.zfill(new_count,4)) elif op[0]=='mutate': # Mutation import pKa.pKD_tools as pKD_tools orgres=pKD_tools.get_oldrestyp_from_mut(op[1]) if orgres==restyp: new_seq.append([newresid,pKD_tools.get_newrestyp_from_mut(op[1])]) new_count=new_count+1 newresid='%s:%s' %(new_chain,string.zfill(new_count,4)) pass else: raise Exception('Unknown mutations spec: %s' %op) else: new_seq.append([resid,restyp]) new_count=new_count+1 newresid='%s:%s' %(new_chain,string.zfill(new_count,4)) return new_seq
def checkModels(self, DB=None, callback=None, selected=None, usemutationcodes=False): """Check that we have modelled a structure for everything we can""" if DB == None: return proteins = DB.getRecs() refprot = DB.meta.refprotein refseq = DB[refprot].aaseq refaa = self.AAList2String(refseq) refpdb = DB[refprot].Structure refpdbfile = os.path.join(os.getcwd(), 'ref.pdb') self.writePDB(refpdb, refpdbfile) failed = [] # Check that Protool is loaded if not self.MUT: self.initProtool() #Create protool oinstance for ref pdb import Protool Xref = Protool.structureIO() Xref.parsepdb(refpdb) # Find all potential parents records_with_structure = [] for protein in proteins: rec = DB.get(protein) if rec.hasStructure() == 'available': records_with_structure.append(protein) # Loop over selected or all if selected == None: selected = list(set(proteins) - set(records_with_structure)) numrecords = len(selected) count = 1 for protein in selected: rec = DB.get(protein) if rec.hasStructure() == 'available': continue print 'Protein:', protein #if no sequence try create one from mutation code if rec.aaseq == None and rec.Mutations != None: #print refaa print 'no sequence, using mutation code and ref protein seq' import PEATSA.Core as Core print 'Record has mutation code %s' % rec.Mutations mutationSet = Core.Data.MutationSet(rec.Mutations) mutseq = mutationSet.applyToSequence(refaa, id='A', pdb=Xref) rec.aaseq = self.string2AAseq(mutseq) parent_with_structure = [] for parent in records_with_structure: parentrec = DB.get(parent) is_parent, operations = rec.getAncestry(parentrec) # We can only model on X-ray structures if parentrec.hasStructure() == 'available' and is_parent: parent_with_structure.append([parent, len(operations)]) # Record failure to model if parent_with_structure == []: continue # Find the best parent def compare_func(x, y): if x[1] > y[1]: return 1 elif x[1] == y[1]: return 0 if x[1] < y[1]: return -1 parent_with_structure.sort(cmp=compare_func) parent = parent_with_structure[0][0] operations = rec.getAncestry(parentrec)[1] print 'Using %s as template with %d operations.' % ( parent, len(operations)) # Start the modelling pdblines = parentrec.Structure # Load the pdb file import Protool X = Protool.structureIO() X.parsepdb(pdblines) self.MUT.new_PDB(X) self.MUT.max_tolerated_bump = 0.5 atom_changes = [] skip_protein = None self.MUT.new_mutation() for operation in operations: # Is this a deletion? if operation.find('delete') != -1: print 'This is a deletion - Jens should write code for modelling this' print 'Deletion ignored for now' continue elif operation.find('insert') != -1: print 'This is an insertion - Jens should write code for modelling insertions' print 'Insertion ignored for now' continue # This is a normal mutation # Get the residue number, old residue and new residue import pKa.pKD_tools as pKD_tools new_res = pKD_tools.get_newrestyp_from_mut(operation) old_res = pKD_tools.get_oldrestyp_from_mut(operation) resid = pKD_tools.get_resid_from_mut(operation) #print operation, resid if not X.residues.has_key(resid): print 'No structural info for mutation %8s. Not modelling this mutation\n' % operation skip_protein = True continue # Actually make the mutation bump_score = self.MUT.mutate(resid, new_res, orgtype=old_res) print 'Mutation: %s, bump_score: %s' % (resid + new_res, str(bump_score)) if bump_score is None: skip_protein = True break else: atom_changes = atom_changes + self.MUT.mutate_operations self.MUT.mutate_operations = [] # Update progress completion = float(count) / float(numrecords) * 100.0 if callback != None: callback(completion) else: print '%4d of %4d, completion; %5.2f%%' % ( count, float(numrecords), completion) count = count + 1 # Did it work? if skip_protein: print print 'Modelling failed for %s' % protein failed.append(protein) rec.Structure = 'Bumps' rec.structuretype = 'failed model' continue # We have all sets of changes in atom_changes rec.Structure = {'Rotamer_operations': atom_changes} rec.Structure['parent'] = parent rec.structuretype = 'peat model' print 'Done' if len(failed) > 0: print 'Failed to model the following proteins:' for f in failed: print f return
def makemutantSequence(self, sequence, operations): """Apply the specified mutations to a sequence and return the mutant seq Sequence must be in the [[A:0001:ALA],[A:0002:GLU]] format Operations is a list of the following types: Mutations: A:0001:ALA:ASP Deletions: delete:A:0002:GLU Insertions: insert:1:A:0003:THR:ALA, insert:2:A:0003:THR:TRP (insert THR,TRP after A:0003:THR) Operations are always performed in sequence numbering order """ if operations == []: return sequence ops_sorted = {} insertions = [] for operation in operations: s_op = operation.split(':') # Normal mutation import pKa.pKD_tools as pKD_tools resid = pKD_tools.get_resid_from_mut(operation) if ops_sorted.has_key(resid): raise Exception( 'More than one operation on the same residue: %s' % resid) ops_sorted[resid] = ['mutate', operation] # Perform the operations new_seq = [] new_count = None new_chain = None for resid, restyp in sequence: # Make sure that the chain hasn't changed or if we are at the beginning then init if resid.split(':')[0] != new_chain: #Initialise sp_resid = resid.split(':') new_chain = sp_resid[0] new_count = int(sp_resid[1]) newresid = '%s:%s' % (new_chain, string.zfill(new_count, 4)) # Does this residue have an operation? if ops_sorted.has_key(resid): op = ops_sorted[resid] if op[0] == 'delete': # Deletion if op[1] == restyp: pass # This deletes the residue else: raise Exception( 'Incorrect org residue in deletion: %s' % op) elif op[0] == 'insert': # Insertion inserts = op[1].keys() inserts.sort() for i in inserts: if i[0] == restyp: new_seq.append([newresid, i[1]]) new_count = new_count + 1 newresid = '%s:%s' % (new_chain, string.zfill(new_count, 4)) elif op[0] == 'mutate': # Mutation import pKa.pKD_tools as pKD_tools orgres = pKD_tools.get_oldrestyp_from_mut(op[1]) if orgres == restyp: new_seq.append([ newresid, pKD_tools.get_newrestyp_from_mut(op[1]) ]) new_count = new_count + 1 newresid = '%s:%s' % (new_chain, string.zfill(new_count, 4)) pass else: raise Exception('Unknown mutations spec: %s' % op) else: new_seq.append([resid, restyp]) new_count = new_count + 1 newresid = '%s:%s' % (new_chain, string.zfill(new_count, 4)) return new_seq
def Model_Mutations(pdbfile,mol2files,mutations,max_overlap=0.5,max_totalbump=1.0,return_score=False,store_mutation_operations=False): """Model a number of mutations in a pdbfile when one or more ligands are present""" # # Check for stupidity # if max_overlap>max_totalbump: max_totalbump=max_overlap print 'Adjusted total bump cutoff to %5.2f' %max_totalbump # # Read PDB file # import Protool P=Protool.structureIO() P.readpdb(pdbfile) P.remove_all_hydrogens() # # Read mol2 file # L=Protool.ligand(P) for mol2file in mol2files: print "Added %s with tag 'LIGAND'" %mol2file L.readmol2(mol2file,tag='LIGAND') # # Get the pdb lines # pdblines=P.writepdb('junk.pdb',nowrite=True) # # Pass the lines to FFF # import FFF.FFFcontrol myFFF=FFF.FFFcontrol.FFF() myFFF.parse_lines(pdblines) #myFFF.soup_stat() Model=FFF.FFFcontrol.model_class(myFFF,Rotamerlib,FFFaadef_dir) # # Store the wild type PDB file # if store_mutation_operations: wt_lines=myFFF.make_pdblines('PDB') # import pKa.pKD_tools as pKD_tools total_bump=0.0 for mutation in mutations: resid=pKD_tools.get_resid_from_mut(mutation) chainid=resid.split(':')[0] resid=resid.split(':')[1] # # Get rid of the leading zeros # done=False while not done: if resid[0]=='0' and len(resid)>1: resid=resid[1:] else: done=True # newres=pKD_tools.get_newrestyp_from_mut(mutation) oldres=pKD_tools.get_oldrestyp_from_mut(mutation) opttype=3 # Rotamer library energies=Model.Mutate(chainid,resid,newres,opttype,max_overlap) bump_score=energies[0] Sum=energies[1] Coulomb=energies[2] # total_bump=total_bump+bump_score print 'Bump score: %5.2f, total bump: %5.2f' %(bump_score,total_bump) if bump_score>max_overlap or total_bump>max_totalbump: print 'Cannot model this set of mutations - too many bumps' if return_score: return False, total_bump else: return False print 'Bump score for %s: %5.3f' %(mutation,bump_score) print 'Total bump score for all mutations: %5.3f' %(total_bump) class FFF_fix: def __init__(self,FFF): self.PI=FFF return # # Create the instance # FFF_instance=FFF_fix(myFFF) # # Keep track of the changes that were made to the PDB file # if store_mutation_operations: mut_lines=FFF_instance.PI.make_pdblines('PDB') import Protool WT=Protool.structureIO() WT.parsepdb(wt_lines) wt_atoms=sorted(WT.atoms.keys()) # MUT=Protool.structureIO() MUT.parsepdb(mut_lines) # mut_atoms=sorted(MUT.atoms.keys()) wt_count=0 mutcount=0 def coord_diff(atom1,atom2): diff=0.0 for coord in ['X','Y','Z']: diff=diff+abs(atom1[coord]-atom2[coord]) return diff operations=[] for atom in wt_atoms: if not atom in mut_atoms: operations.append(['delete',atom,WT.atoms[atom]]) elif coord_diff(WT.atoms[atom],MUT.atoms[atom])>0.1: operations.append(['delete',atom,WT.atoms[atom]]) operations.append(['add',atom,MUT.atoms[atom]]) else: pass for atom in mut_atoms: if not atom in wt_atoms: operations.append(['add',atom,MUT.atoms[atom]]) # # Store these in FFF_fix # FFF_instance.mutate_operations=operations[:] # # Return the info # if (return_score): return FFF_instance,total_bump return FFF_instance