def read_sequences(pdbfiles, newfiles): # # Load all the PDB files and Make sure that the sequences are ok # align = {} allseqs = {} print 'Reading pdb files and extracting sequences' import Protool for pdb in pdbfiles: pdbfile = newfiles[pdb] X = Protool.structureIO_fast() print 'Reading: %s' % pdbfile X.readpdb(pdbfile) X.RemoveALT() s_keys = X.residues.keys() s_keys.sort() # # Construct a special list for the sequence # newlist = [] for s in s_keys: if X.three_to_one.has_key(X.resname(s)): newlist.append([s, X.three_to_one[X.resname(s)]]) align[pdb] = newlist[:] allseqs[pdb] = X.Seq2Pir(None, pdb) return align, allseqs
def read_sequences(pdbfiles,newfiles): # # Load all the PDB files and Make sure that the sequences are ok # align={} allseqs={} print 'Reading pdb files and extracting sequences' import Protool for pdb in pdbfiles: pdbfile=newfiles[pdb] X=Protool.structureIO_fast() print 'Reading: %s' %pdbfile X.readpdb(pdbfile) X.RemoveALT() s_keys=X.residues.keys() s_keys.sort() # # Construct a special list for the sequence # newlist=[] for s in s_keys: if X.three_to_one.has_key(X.resname(s)): newlist.append([s,X.three_to_one[X.resname(s)]]) align[pdb]=newlist[:] allseqs[pdb]=X.Seq2Pir(None,pdb) return align,allseqs
def main(): intro_text() # # Get the filename # import sys try: msf_file=sys.argv[1] except: usage_text() raise Exception() # # if the filename starts with - then we only want ro extract all the sequences # makeseqs=None if msf_file[0]=='-': # # We only want to extract sequences from the pdb files # # The rest of the argv contains the pdb files # pdbfiles=sys.argv[2:] newfiles=clean_files(pdbfiles) align,allseqs=read_sequences(pdbfiles,newfiles) # # Write pir file # seqs=allseqs.keys() seqs.sort() import os pirfile=os.path.join(os.getcwd(),'all_seqs.pir') fd=open(pirfile,'w') for seq in seqs: for line in allseqs[seq]: fd.write(line) fd.write('\n') fd.close() # # Done # Now the user has to align that file # return # # ---------------------------------------------- # else: # # This the normal case. Setup the pKa calculations # # # Read the alignment # seq_keys,seqs=read_aln(msf_file) newfiles=clean_files(seq_keys) align,allseqs=read_sequences(seq_keys,newfiles) # # Check that the sequences correspond to the alignment sequences # AND build an alignment of the pdb sequences # pdb_align={} for seq in align.keys(): pdbcount=0 pdb_align[seq]=[] for letter in seqs[seq]: if letter=='.': pdb_align[seq].append('.') else: if letter!=align[seq][pdbcount][1]: print print 'Different letter at position %d ' %pdbcount print 'in %s ' %seq print 'alignment says: %1s while pdbfile says: %1s' %(letter,align[seq][pdbcount][1]) print print seqs[seq] print align[seq] raise "sequences do not match" else: print align[seq][pdbcount] pdb_align[seq].append(align[seq][pdbcount][0]) pdbcount=pdbcount+1 print 'All sequences ok' print # # Prompt the user to get the file to work with # print 'Select the one that you want to identify active site groups for' for seq in seq_keys: print seq print done=None while not done: define_seq=raw_input('Enter name: ') if seqs.has_key(define_seq): done=1 # # ok use selected a sequence # # open the corresponding pdbfile and print the sequence in there # for residue in align[define_seq]: print residue[0],residue[1] print input=raw_input("Enter residues (separate by ,): ") import string residues=string.split(input,',') # # Check that the residues exist # align_numbers=[] for residue in residues: found=None number=0 for res in pdb_align[define_seq]: #print residue,res if residue==res: found=1 align_numbers.append(number) break number=number+1 if not found: raise "Residue not found: ",residue print 'All residues found in %s' %define_seq print align_numbers for number in align_numbers: print number,pdb_align[define_seq][number] # # ok, prepare all the directories # for seq in align.keys(): import os dir=seq+'_pka' if not os.path.isdir(dir): os.mkdir(dir) # # Copy the pdbfile # pdbfile=os.path.join(os.getcwd(),dir,seq+'.clean.pdb') if os.path.isfile(pdbfile): os.unlink(pdbfile) os.link(os.path.join(os.getcwd(),seq+'.clean.pdb'),pdbfile) # # Find the number of the titratable group # import pKa Y=pKa.pKatools() groups=Y.get_titratable_residues2(seq+'.clean.pdb') groupnums=[] # # Get the residues from the alignment # residues=[] for number in align_numbers: residues.append(pdb_align[seq][number]) print 'I found these residues %s for %s' %(str(residues),seq) for residue in residues: found=None for group in groups: print group[0],residue print print 'You have to fix this for the new output from get_titratable_residues' print raise Exception() # !!!!!! if group[0]==residue: groupnums.append(group[2]) found=1 break if not found: import Protool T=Protool.structureIO_fast() T.readpdb(seq+'.clean.pdb') print 'I could not find a titratable group to match this residue: %s %s in %s' %(residue,T.resname(residue),seq) a=raw_input('Should I just ignore this group? (Y/N) ') if a=='y' or a=='Y': pass else: raise 'Could not identify group' # # Write the Invocation file # grps='' for grp in groupnums: grps=grps+str(grp)+',' grps=grps[:-1] invocation=os.path.join(os.getcwd(),dir,'Invocation') fd=open(invocation,'w') fd.write('/net/home/jnielsen/lib/python/pKarun.py %s -dbcrit 1000 -subset -group_cutoff 2.0 -groups %s\n' %(seq+'.clean.pdb',grps)) fd.close() # # Copy the parameter files # os.system('cp /net/home/jnielsen/pkaparms/TOPOLOGY.H '+dir) os.system('cp /net/home/jnielsen/pkaparms/DELRAD.DAT '+dir) os.system('cp /net/home/jnielsen/pkaparms/DELCRG.DAT '+dir) return
def get_min_dist(self, target_res, mutation): # # Get the minimum distance between the target residue and # the mutated residue # # Do we know this result already? # if self.data.has_key(target_res): if self.data[target_res].has_key(mutation): return self.data[target_res][mutation] # # No, so we have to calculate it # The mutated PDB file might have been created already # mutfile = None if hasattr(self.parent, 'mutfile_names'): if self.parent.mutfile_names.has_key(mutation): mutfile = self.parent.mutfile_names[mutation] if not mutfile: mutfile, score = make_mutation(self.pdbfile, mutation, self.topdir) if not mutfile: return None import Protool X = Protool.structureIO_fast() X.readpdb(mutfile) # # We save a minimum distance for the target residue # min_dist = 9999.9 for target_atom in X.residues[X.resnum(target_res)]: if X.is_backbone(target_atom) or X.is_hydrogen(target_atom): continue # # Loop over all atoms in the mutated residue # import pKD_tools new_resnum = pKD_tools.get_resid_from_mut( mutation) #':'+pKD_tools.get_resnum_from_mut(mutation) for mut_atom in X.residues[new_resnum]: if X.is_backbone(mut_atom) or X.is_hydrogen(mut_atom): continue # # Get distance # distance = X.dist(mut_atom, target_atom) if distance < min_dist: min_dist = distance # # Check the distance in the wt pdb file - we might have removed atoms # X2 = Protool.structureIO_fast() X2.readpdb(self.pdbfile) for target_atom in X2.residues[X2.resnum(target_res)]: if X2.is_backbone(target_atom) or X2.is_hydrogen(target_atom): continue # # Loop over all atoms in the wild type residue # import pKD_tools wt_resnum = pKD_tools.get_resid_from_mut( mutation) #':'+pKD_tools.get_resnum_from_mut(mutation) for wt_atom in X2.residues[wt_resnum]: if X2.is_backbone(wt_atom) or X2.is_hydrogen(wt_atom): continue # # Get distance # distance = X2.dist(wt_atom, target_atom) if distance < min_dist: min_dist = distance # # Save the result # if not self.data.has_key(target_res): self.data[target_res] = {} self.data[target_res][mutation] = min_dist self.data_added = self.data_added + 1 self.check_status() # # Return the distance # return min_dist
def analyse_one_pdbfile(pdbfile,bigdict=None): """Load the MC.tabdata file and the sugelm file to determine the effective dielectric constant for single mutations""" mcfile=pdbfile+'.MC.tabdata' sugelm=pdbfile+'.sugelm_data' print 'Loading:' print mcfile print sugelm print import os if not os.path.isfile(mcfile): return [],[],0 if not os.path.isfile(sugelm): return [],[],0 if bigdict: fd=open(bigdict) import pickle big_dict=pickle.load(fd) fd.close() # # Get the PDB file # import pKaTool.pKaIO P=pKaTool.pKaIO.pKaIO(pdbfile) # Matrix matrix=P.read_matrix() # Intrinsic pKa values for mutated residues intpkafile=pdbfile+'.intpka_data' fd=open(intpkafile) import pickle mutant_intpkas=pickle.load(fd) fd.close() # # Read the PDB file # import Protool PDB=Protool.structureIO() PDB.readpdb(pdbfile) # # Start calculating # mc=getfile(mcfile) su=getfile(sugelm)['data'] print 'Number of mutations in sugelm',len(su.keys()) print 'Number of mutations in tabdata',len(mc.keys()) sites={} import string for mutation in su.keys(): orgres=mutation.split(':')[:2] orgres=string.join(orgres,':') sites[orgres]=1 print 'Number of unique sites',len(sites.keys()) # # Should we do the full solutions or just single mutations? # if bigdict: print 'Getting mutations from bigdict' mutations=[] for key in big_dict.keys(): pdbfile_name=os.path.split(pdbfile)[1] pos=key.find(pdbfile_name) if pos!=-1: target=key[pos+len(pdbfile_name):] for muts,dpka in big_dict[key]: mutations.append([target,muts,dpka]) else: mutations=mc.keys() # # Load the wild type PDB file # X_wt=Protool.structureIO() X_wt.readpdb(pdbfile) # # go on, calculate the difference between the MC result and the phi result # mutations.sort() phi=[] real=[] ratio=[] dist=[] epses_dpKa=[] epses=[] import types for mutant in mutations: if type(mutant) is types.ListType: # # Deal with multiple mutations # sum_phidpka=0.0 sum_MCdpka=0.0 mutations=mutant[1] target=mutant[0] rdpka=mutant[2] if len(mutations)<2: continue for mutation in mutations: phidpka,MCdpka=get_phidpka(mutation=mutation,target=target,su=su,mc=mc,matrix=matrix) sum_phidpka=sum_phidpka+phidpka sum_MCdpka=sum_MCdpka+MCdpka phi.append(sum_phidpka) real.append(rdpka) #if abs(rdpka-sum_phidpka)>1.0: # print 'T: %15s phidpka %5.1f rdpka: %5.1f muts: %s ' %(target,sum_phidpka,rdpka,str(mutations)) else: # # This is for looking at single mutations # import Design_pKa_help X=Design_pKa_help.pKa_dist(pdbfile) targets=mc[mutant].keys() targets.sort() # # Load the mutant PDB file if we can # import os pdbdir=pdbfile+'.pdbs' mutfile=os.path.join(pdbdir,mutant+'.pdb') if os.path.isfile(mutfile): import Protool X_mut=Protool.structureIO_fast() X_mut.readpdb(mutfile) else: X_mut=None # # Loop over all targets # for target in targets: # # Get the distance between the target and the mutatn # targetnum=':'+target.split(':')[1] mutantnum=':'+mutant.split(':')[1] distance=X.get_min_dist(target,mutant) # # Get the delta pKa values # phidpka,rdpka=get_phidpka(mutation=mutant,target=target,su=su,mc=mc,matrix=matrix) if not rdpka: continue if abs(phidpka)>=0.0001 and abs(rdpka)<20.0: phi.append(phidpka) real.append(abs(rdpka)) dist.append(distance) # # Effective eps # eps_eff,distance_eps=get_effective_eps(target,mutant,abs(rdpka),X_mut=X_mut,X_wt=X_wt,phidpka=phidpka) if eps_eff: epses.append(eps_eff) #epses_dpKa.append(abs(rdpka)) epses_dpKa.append(distance_eps) #ratio.append(rdpka/phidpka) #print phidpka,rdpka tabdata_muts=len(mutations) return phi,real,tabdata_muts,dist,epses_dpKa,epses
def main(): intro_text() # # Get the filename # import sys try: msf_file = sys.argv[1] except: usage_text() raise Exception() # # if the filename starts with - then we only want ro extract all the sequences # makeseqs = None if msf_file[0] == '-': # # We only want to extract sequences from the pdb files # # The rest of the argv contains the pdb files # pdbfiles = sys.argv[2:] newfiles = clean_files(pdbfiles) align, allseqs = read_sequences(pdbfiles, newfiles) # # Write pir file # seqs = allseqs.keys() seqs.sort() import os pirfile = os.path.join(os.getcwd(), 'all_seqs.pir') fd = open(pirfile, 'w') for seq in seqs: for line in allseqs[seq]: fd.write(line) fd.write('\n') fd.close() # # Done # Now the user has to align that file # return # # ---------------------------------------------- # else: # # This the normal case. Setup the pKa calculations # # # Read the alignment # seq_keys, seqs = read_aln(msf_file) newfiles = clean_files(seq_keys) align, allseqs = read_sequences(seq_keys, newfiles) # # Check that the sequences correspond to the alignment sequences # AND build an alignment of the pdb sequences # pdb_align = {} for seq in align.keys(): pdbcount = 0 pdb_align[seq] = [] for letter in seqs[seq]: if letter == '.': pdb_align[seq].append('.') else: if letter != align[seq][pdbcount][1]: print print 'Different letter at position %d ' % pdbcount print 'in %s ' % seq print 'alignment says: %1s while pdbfile says: %1s' % ( letter, align[seq][pdbcount][1]) print print seqs[seq] print align[seq] raise "sequences do not match" else: print align[seq][pdbcount] pdb_align[seq].append(align[seq][pdbcount][0]) pdbcount = pdbcount + 1 print 'All sequences ok' print # # Prompt the user to get the file to work with # print 'Select the one that you want to identify active site groups for' for seq in seq_keys: print seq print done = None while not done: define_seq = raw_input('Enter name: ') if seqs.has_key(define_seq): done = 1 # # ok use selected a sequence # # open the corresponding pdbfile and print the sequence in there # for residue in align[define_seq]: print residue[0], residue[1] print input = raw_input("Enter residues (separate by ,): ") import string residues = string.split(input, ',') # # Check that the residues exist # align_numbers = [] for residue in residues: found = None number = 0 for res in pdb_align[define_seq]: #print residue,res if residue == res: found = 1 align_numbers.append(number) break number = number + 1 if not found: raise "Residue not found: ", residue print 'All residues found in %s' % define_seq print align_numbers for number in align_numbers: print number, pdb_align[define_seq][number] # # ok, prepare all the directories # for seq in align.keys(): import os dir = seq + '_pka' if not os.path.isdir(dir): os.mkdir(dir) # # Copy the pdbfile # pdbfile = os.path.join(os.getcwd(), dir, seq + '.clean.pdb') if os.path.isfile(pdbfile): os.unlink(pdbfile) os.link(os.path.join(os.getcwd(), seq + '.clean.pdb'), pdbfile) # # Find the number of the titratable group # import pKa Y = pKa.pKatools() groups = Y.get_titratable_residues2(seq + '.clean.pdb') groupnums = [] # # Get the residues from the alignment # residues = [] for number in align_numbers: residues.append(pdb_align[seq][number]) print 'I found these residues %s for %s' % (str(residues), seq) for residue in residues: found = None for group in groups: print group[0], residue print print 'You have to fix this for the new output from get_titratable_residues' print raise Exception() # !!!!!! if group[0] == residue: groupnums.append(group[2]) found = 1 break if not found: import Protool T = Protool.structureIO_fast() T.readpdb(seq + '.clean.pdb') print 'I could not find a titratable group to match this residue: %s %s in %s' % ( residue, T.resname(residue), seq) a = raw_input('Should I just ignore this group? (Y/N) ') if a == 'y' or a == 'Y': pass else: raise 'Could not identify group' # # Write the Invocation file # grps = '' for grp in groupnums: grps = grps + str(grp) + ',' grps = grps[:-1] invocation = os.path.join(os.getcwd(), dir, 'Invocation') fd = open(invocation, 'w') fd.write( '/net/home/jnielsen/lib/python/pKarun.py %s -dbcrit 1000 -subset -group_cutoff 2.0 -groups %s\n' % (seq + '.clean.pdb', grps)) fd.close() # # Copy the parameter files # os.system('cp /net/home/jnielsen/pkaparms/TOPOLOGY.H ' + dir) os.system('cp /net/home/jnielsen/pkaparms/DELRAD.DAT ' + dir) os.system('cp /net/home/jnielsen/pkaparms/DELCRG.DAT ' + dir) return
def analyse_one_pdbfile(pdbfile, bigdict=None): """Load the MC.tabdata file and the sugelm file to determine the effective dielectric constant for single mutations""" mcfile = pdbfile + '.MC.tabdata' sugelm = pdbfile + '.sugelm_data' print 'Loading:' print mcfile print sugelm print import os if not os.path.isfile(mcfile): return [], [], 0 if not os.path.isfile(sugelm): return [], [], 0 if bigdict: fd = open(bigdict) import pickle big_dict = pickle.load(fd) fd.close() # # Get the PDB file # import pKaTool.pKaIO P = pKaTool.pKaIO.pKaIO(pdbfile) # Matrix matrix = P.read_matrix() # Intrinsic pKa values for mutated residues intpkafile = pdbfile + '.intpka_data' fd = open(intpkafile) import pickle mutant_intpkas = pickle.load(fd) fd.close() # # Read the PDB file # import Protool PDB = Protool.structureIO() PDB.readpdb(pdbfile) # # Start calculating # mc = getfile(mcfile) su = getfile(sugelm)['data'] print 'Number of mutations in sugelm', len(su.keys()) print 'Number of mutations in tabdata', len(mc.keys()) sites = {} import string for mutation in su.keys(): orgres = mutation.split(':')[:2] orgres = string.join(orgres, ':') sites[orgres] = 1 print 'Number of unique sites', len(sites.keys()) # # Should we do the full solutions or just single mutations? # if bigdict: print 'Getting mutations from bigdict' mutations = [] for key in big_dict.keys(): pdbfile_name = os.path.split(pdbfile)[1] pos = key.find(pdbfile_name) if pos != -1: target = key[pos + len(pdbfile_name):] for muts, dpka in big_dict[key]: mutations.append([target, muts, dpka]) else: mutations = mc.keys() # # Load the wild type PDB file # X_wt = Protool.structureIO() X_wt.readpdb(pdbfile) # # go on, calculate the difference between the MC result and the phi result # mutations.sort() phi = [] real = [] ratio = [] dist = [] epses_dpKa = [] epses = [] import types for mutant in mutations: if type(mutant) is types.ListType: # # Deal with multiple mutations # sum_phidpka = 0.0 sum_MCdpka = 0.0 mutations = mutant[1] target = mutant[0] rdpka = mutant[2] if len(mutations) < 2: continue for mutation in mutations: phidpka, MCdpka = get_phidpka(mutation=mutation, target=target, su=su, mc=mc, matrix=matrix) sum_phidpka = sum_phidpka + phidpka sum_MCdpka = sum_MCdpka + MCdpka phi.append(sum_phidpka) real.append(rdpka) #if abs(rdpka-sum_phidpka)>1.0: # print 'T: %15s phidpka %5.1f rdpka: %5.1f muts: %s ' %(target,sum_phidpka,rdpka,str(mutations)) else: # # This is for looking at single mutations # import Design_pKa_help X = Design_pKa_help.pKa_dist(pdbfile) targets = mc[mutant].keys() targets.sort() # # Load the mutant PDB file if we can # import os pdbdir = pdbfile + '.pdbs' mutfile = os.path.join(pdbdir, mutant + '.pdb') if os.path.isfile(mutfile): import Protool X_mut = Protool.structureIO_fast() X_mut.readpdb(mutfile) else: X_mut = None # # Loop over all targets # for target in targets: # # Get the distance between the target and the mutatn # targetnum = ':' + target.split(':')[1] mutantnum = ':' + mutant.split(':')[1] distance = X.get_min_dist(target, mutant) # # Get the delta pKa values # phidpka, rdpka = get_phidpka(mutation=mutant, target=target, su=su, mc=mc, matrix=matrix) if not rdpka: continue if abs(phidpka) >= 0.0001 and abs(rdpka) < 20.0: phi.append(phidpka) real.append(abs(rdpka)) dist.append(distance) # # Effective eps # eps_eff, distance_eps = get_effective_eps(target, mutant, abs(rdpka), X_mut=X_mut, X_wt=X_wt, phidpka=phidpka) if eps_eff: epses.append(eps_eff) #epses_dpKa.append(abs(rdpka)) epses_dpKa.append(distance_eps) #ratio.append(rdpka/phidpka) #print phidpka,rdpka tabdata_muts = len(mutations) return phi, real, tabdata_muts, dist, epses_dpKa, epses
def get_min_dist(self,target_res,mutation): # # Get the minimum distance between the target residue and # the mutated residue # # Do we know this result already? # if self.data.has_key(target_res): if self.data[target_res].has_key(mutation): return self.data[target_res][mutation] # # No, so we have to calculate it # The mutated PDB file might have been created already # mutfile=None if hasattr(self.parent,'mutfile_names'): if self.parent.mutfile_names.has_key(mutation): mutfile=self.parent.mutfile_names[mutation] if not mutfile: mutfile,score=make_mutation(self.pdbfile,mutation,self.topdir) if not mutfile: return None import Protool X=Protool.structureIO_fast() X.readpdb(mutfile) # # We save a minimum distance for the target residue # min_dist=9999.9 for target_atom in X.residues[X.resnum(target_res)]: if X.is_backbone(target_atom) or X.is_hydrogen(target_atom): continue # # Loop over all atoms in the mutated residue # import pKD_tools new_resnum=pKD_tools.get_resid_from_mut(mutation) #':'+pKD_tools.get_resnum_from_mut(mutation) for mut_atom in X.residues[new_resnum]: if X.is_backbone(mut_atom) or X.is_hydrogen(mut_atom): continue # # Get distance # distance=X.dist(mut_atom,target_atom) if distance<min_dist: min_dist=distance # # Check the distance in the wt pdb file - we might have removed atoms # X2=Protool.structureIO_fast() X2.readpdb(self.pdbfile) for target_atom in X2.residues[X2.resnum(target_res)]: if X2.is_backbone(target_atom) or X2.is_hydrogen(target_atom): continue # # Loop over all atoms in the wild type residue # import pKD_tools wt_resnum=pKD_tools.get_resid_from_mut(mutation) #':'+pKD_tools.get_resnum_from_mut(mutation) for wt_atom in X2.residues[wt_resnum]: if X2.is_backbone(wt_atom) or X2.is_hydrogen(wt_atom): continue # # Get distance # distance=X2.dist(wt_atom,target_atom) if distance<min_dist: min_dist=distance # # Save the result # if not self.data.has_key(target_res): self.data[target_res]={} self.data[target_res][mutation]=min_dist self.data_added=self.data_added+1 self.check_status() # # Return the distance # return min_dist