def getRMSD(self, nSeqUnbound, seqUnboundAli, nSeqBound, seqBoundAli): ''' Computes rmsd for nSeqUnbound chain unbound and nSeqBound bound chain @param nSeqUnbound: int. The index of the bound sequence that will be aligned @param seqUnboundAli: str. The alignment result for unbound sequence number nSeqUnbound @param nSeqBound: int. The index of the bound sequence that will be aligned @param seqBoundAli: str. The alignment result for bound sequence number nSeqBound @return rmsd. float. Root mean square deviation of CA of both imput chains @return boundToUnboundResDict. {Bio.PDB.Residue_bound --> Bio.PDB.Residue_unbound} ''' boundToUnboundResDict, atomBoundToUnboundMap = self.build2SeqsDictMap( nSeqUnbound, seqUnboundAli, nSeqBound, seqBoundAli) atoms_x, atoms_y = zip(*atomBoundToUnboundMap) coords_x = np.array([elem.get_coord() for elem in atoms_x]) coords_y = np.array([elem.get_coord() for elem in atoms_y]) sup = SVDSuperimposer() rmsd = sup._rms(coords_x, coords_y) # print(boundToUnboundResDict) return rmsd, boundToUnboundResDict
def calc_DockQ(model, native, use_CA_only=False): exec_path = os.path.dirname(os.path.abspath(sys.argv[0])) atom_for_sup = ['CA', 'C', 'N', 'O'] if (use_CA_only): atom_for_sup = ['CA'] cmd_fnat = exec_path + '/fnat ' + model + ' ' + native + ' 5' #cmd_interface=exec_path + '/fnat ' + model + ' ' + native + ' 10 backbone' cmd_interface = exec_path + '/fnat ' + model + ' ' + native + ' 10' #fnat_out = os.popen(cmd_fnat).readlines() fnat_out = commands.getoutput(cmd_fnat) # sys.exit() (fnat, nat_correct, nat_total, fnonnat, nonnat_count, model_total, interface5A) = parse_fnat(fnat_out) assert fnat != -1, "Error running cmd: %s\n" % (cmd_fnat) # inter_out = os.popen(cmd_interface).readlines() inter_out = commands.getoutput(cmd_interface) (fnat_bb, nat_correct_bb, nat_total_bb, fnonnat_bb, nonnat_count_bb, model_total_bb, interface) = parse_fnat(inter_out) assert fnat_bb != -1, "Error running cmd: %s\n" % (cmd_interface) #print fnat #Use same interface as for fnat for iRMS #interface=interface5A # Start the parser pdb_parser = Bio.PDB.PDBParser(QUIET=True) # Get the structures ref_structure = pdb_parser.get_structure("reference", native) sample_structure = pdb_parser.get_structure("model", model) # Use the first model in the pdb-files for alignment # Change the number 0 if you want to align to another structure ref_model = ref_structure[0] sample_model = sample_structure[0] # Make a list of the atoms (in the structures) you wish to align. # In this case we use CA atoms whose index is in the specified range ref_atoms = [] sample_atoms = [] common_interface = [] chain_res = {} #find atoms common in both sample and native atoms_def_sample = [] atoms_def_in_both = [] #first read in sample for sample_chain in sample_model: # print sample_chain chain = sample_chain.id # print chain for sample_res in sample_chain: # print sample_res if sample_res.get_id()[0] != ' ': #Skip hetatm. continue resname = sample_res.get_id()[1] key = str(resname) + chain for a in atom_for_sup: atom_key = key + '.' + a if a in sample_res: if atom_key in atoms_def_sample: print atom_key + ' already added (MODEL)!!!' atoms_def_sample.append(atom_key) #then read in native also present in sample for ref_chain in ref_model: chain = ref_chain.id for ref_res in ref_chain: #print ref_res if ref_res.get_id()[0] != ' ': #Skip hetatm. # print ref_res.get_id() continue resname = ref_res.get_id()[1] key = str(resname) + chain for a in atom_for_sup: atom_key = key + '.' + a if a in ref_res and atom_key in atoms_def_sample: if atom_key in atoms_def_in_both: print atom_key + ' already added (Native)!!!' atoms_def_in_both.append(atom_key) # print atoms_def_in_both for sample_chain in sample_model: chain = sample_chain.id if chain not in chain_res.keys(): chain_res[chain] = [] for sample_res in sample_chain: if sample_res.get_id()[0] != ' ': #Skip hetatm. continue resname = sample_res.get_id()[1] key = str(resname) + chain chain_res[chain].append(key) if key in interface: for a in atom_for_sup: atom_key = key + '.' + a if a in sample_res and atom_key in atoms_def_in_both: sample_atoms.append(sample_res[a]) common_interface.append(key) #print inter_pairs chain_ref = {} common_residues = [] # Iterate of all chains in the model in order to find all residues for ref_chain in ref_model: # Iterate of all residues in each model in order to find proper atoms # print dir(ref_chain) chain = ref_chain.id if chain not in chain_ref.keys(): chain_ref[chain] = [] for ref_res in ref_chain: if ref_res.get_id()[0] != ' ': #Skip hetatm. continue resname = ref_res.get_id()[1] key = str(resname) + chain #print ref_res # print key # print chain_res.values() if key in chain_res[chain]: # if key is present in sample #print key for a in atom_for_sup: atom_key = key + '.' + a if a in ref_res and atom_key in atoms_def_in_both: chain_ref[chain].append(ref_res[a]) common_residues.append(key) #chain_sample.append((ref_res['CA']) if key in common_interface: # Check if residue number ( .get_id() ) is in the list # Append CA atom to list #print key for a in atom_for_sup: atom_key = key + '.' + a #print atom_key if a in ref_res and atom_key in atoms_def_in_both: ref_atoms.append(ref_res[a]) #get the ones that are present in native chain_sample = {} for sample_chain in sample_model: chain = sample_chain.id if chain not in chain_sample.keys(): chain_sample[chain] = [] for sample_res in sample_chain: if sample_res.get_id()[0] != ' ': #Skip hetatm. continue resname = sample_res.get_id()[1] key = str(resname) + chain if key in common_residues: for a in atom_for_sup: atom_key = key + '.' + a if a in sample_res and atom_key in atoms_def_in_both: chain_sample[chain].append(sample_res[a]) #if key in common_residues: # print key #sample_atoms.append(sample_res['CA']) #common_interface.append(key) assert len(ref_atoms) != 0, "length of native is zero" assert len(sample_atoms) != 0, "length of model is zero" assert len(ref_atoms) == len( sample_atoms ), "Different number of atoms in native and model %d %d\n" % ( len(ref_atoms), len(sample_atoms)) super_imposer = Bio.PDB.Superimposer() super_imposer.set_atoms(ref_atoms, sample_atoms) super_imposer.apply(sample_model.get_atoms()) # Print RMSD: irms = super_imposer.rms (chain1, chain2) = chain_sample.keys() ligand_chain = chain1 receptor_chain = chain2 len1 = len(chain_res[chain1]) len2 = len(chain_res[chain2]) assert len1 != 0, "%s chain has zero length!\n" % chain1 assert len2 != 0, "%s chain has zero length!\n" % chain2 class1 = 'ligand' class2 = 'receptor' if (len(chain_sample[chain1]) > len(chain_sample[chain2])): receptor_chain = chain1 ligand_chain = chain2 class1 = 'receptor' class2 = 'ligand' #print len1 #print len2 #print chain_sample.keys() #Set to align on receptor assert len(chain_ref[receptor_chain]) == len( chain_sample[receptor_chain] ), "Different number of atoms in native and model receptor (chain %c) %d %d\n" % ( receptor_chain, len( chain_ref[receptor_chain]), len(chain_sample[receptor_chain])) super_imposer.set_atoms(chain_ref[receptor_chain], chain_sample[receptor_chain]) super_imposer.apply(sample_model.get_atoms()) receptor_chain_rms = super_imposer.rms #print receptor_chain_rms #print dir(super_imposer) #print chain1_rms #Grep out the transformed ligand coords #print ligand_chain #print chain_ref[ligand_chain] #print chain_sample[ligand_chain] #l1=len(chain_ref[ligand_chain]) #l2=len(chain_sample[ligand_chain]) assert len(chain_ref[ligand_chain]) != 0 or len( chain_sample[ligand_chain] ) != 0, "Zero number of equivalent atoms in native and model ligand (chain %s) %d %d.\nCheck that the residue numbers in model and native is consistent\n" % ( ligand_chain, len( chain_ref[ligand_chain]), len(chain_sample[ligand_chain])) assert len(chain_ref[ligand_chain]) == len( chain_sample[ligand_chain] ), "Different number of atoms in native and model ligand (chain %c) %d %d\n" % ( ligand_chain, len( chain_ref[ligand_chain]), len(chain_sample[ligand_chain])) coord1 = np.array([atom.coord for atom in chain_ref[ligand_chain]]) coord2 = np.array([atom.coord for atom in chain_sample[ligand_chain]]) #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]]) #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]]) #print len(coord1) #print len(coord2) sup = SVDSuperimposer() Lrms = sup._rms( coord1, coord2) #using the private _rms function which does not superimpose #super_imposer.set_atoms(chain_ref[ligand_chain], chain_sample[ligand_chain]) #super_imposer.apply(sample_model.get_atoms()) #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]]) #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]]) #Rrms= sup._rms(coord1,coord2) #should give same result as above line #diff = coord1-coord2 #l = len(diff) #number of atoms #from math import sqrt #print sqrt(sum(sum(diff*diff))/l) #print np.sqrt(np.sum(diff**2)/l) DockQ = (float(fnat) + 1 / (1 + (irms / 1.5) * (irms / 1.5)) + 1 / (1 + (Lrms / 8.5) * (Lrms / 8.5))) / 3 dict = {} dict['DockQ'] = DockQ dict['irms'] = irms dict['Lrms'] = Lrms dict['fnat'] = fnat dict['nat_correct'] = nat_correct dict['nat_total'] = nat_total dict['fnonnat'] = fnonnat dict['nonnat_count'] = nonnat_count dict['model_total'] = model_total dict['chain1'] = chain1 dict['chain2'] = chain2 dict['len1'] = len1 dict['len2'] = len2 dict['class1'] = class1 dict['class2'] = class2 return dict
def calc_DockQ(model,native,use_CA_only=False): exec_path=os.path.dirname(os.path.abspath(sys.argv[0])) atom_for_sup=['CA','C','N','O'] if(use_CA_only): atom_for_sup=['CA'] cmd_fnat=exec_path + '/fnat ' + model + ' ' + native + ' 5' #cmd_interface=exec_path + '/fnat ' + model + ' ' + native + ' 10 backbone' cmd_interface=exec_path + '/fnat ' + model + ' ' + native + ' 10' #fnat_out = os.popen(cmd_fnat).readlines() fnat_out = commands.getoutput(cmd_fnat) # sys.exit() (fnat,nat_correct,nat_total,fnonnat,nonnat_count,model_total,interface5A)=parse_fnat(fnat_out) assert fnat!=-1, "Error running cmd: %s\n" % (cmd_fnat) # inter_out = os.popen(cmd_interface).readlines() inter_out = commands.getoutput(cmd_interface) (fnat_bb,nat_correct_bb,nat_total_bb,fnonnat_bb,nonnat_count_bb,model_total_bb,interface)=parse_fnat(inter_out) assert fnat_bb!=-1, "Error running cmd: %s\n" % (cmd_interface) #print fnat #Use same interface as for fnat for iRMS #interface=interface5A # Start the parser pdb_parser = Bio.PDB.PDBParser(QUIET = True) # Get the structures ref_structure = pdb_parser.get_structure("reference", native) sample_structure = pdb_parser.get_structure("model", model) # Use the first model in the pdb-files for alignment # Change the number 0 if you want to align to another structure ref_model = ref_structure[0] sample_model = sample_structure[0] # Make a list of the atoms (in the structures) you wish to align. # In this case we use CA atoms whose index is in the specified range ref_atoms = [] sample_atoms = [] common_interface=[] chain_res={} #find atoms common in both sample and native atoms_def_sample=[] atoms_def_in_both=[] #first read in sample for sample_chain in sample_model: # print sample_chain chain=sample_chain.id # print chain for sample_res in sample_chain: # print sample_res if sample_res.get_id()[0] != ' ': #Skip hetatm. continue resname=sample_res.get_id()[1] key=str(resname) + chain for a in atom_for_sup: atom_key=key + '.' + a if a in sample_res: if atom_key in atoms_def_sample: print atom_key + ' already added (MODEL)!!!' atoms_def_sample.append(atom_key) #then read in native also present in sample for ref_chain in ref_model: chain=ref_chain.id for ref_res in ref_chain: #print ref_res if ref_res.get_id()[0] != ' ': #Skip hetatm. # print ref_res.get_id() continue resname=ref_res.get_id()[1] key=str(resname) + chain for a in atom_for_sup: atom_key=key + '.' + a if a in ref_res and atom_key in atoms_def_sample: if atom_key in atoms_def_in_both: print atom_key + ' already added (Native)!!!' atoms_def_in_both.append(atom_key) # print atoms_def_in_both for sample_chain in sample_model: chain=sample_chain.id if chain not in chain_res.keys(): chain_res[chain]=[] for sample_res in sample_chain: if sample_res.get_id()[0] != ' ': #Skip hetatm. continue resname=sample_res.get_id()[1] key=str(resname) + chain chain_res[chain].append(key) if key in interface: for a in atom_for_sup: atom_key=key + '.' + a if a in sample_res and atom_key in atoms_def_in_both: sample_atoms.append(sample_res[a]) common_interface.append(key) #print inter_pairs chain_ref={} common_residues=[] # Iterate of all chains in the model in order to find all residues for ref_chain in ref_model: # Iterate of all residues in each model in order to find proper atoms # print dir(ref_chain) chain=ref_chain.id if chain not in chain_ref.keys(): chain_ref[chain]=[] for ref_res in ref_chain: if ref_res.get_id()[0] != ' ': #Skip hetatm. continue resname=ref_res.get_id()[1] key=str(resname) + chain #print ref_res # print key # print chain_res.values() if key in chain_res[chain]: # if key is present in sample #print key for a in atom_for_sup: atom_key=key + '.' + a if a in ref_res and atom_key in atoms_def_in_both: chain_ref[chain].append(ref_res[a]) common_residues.append(key) #chain_sample.append((ref_res['CA']) if key in common_interface: # Check if residue number ( .get_id() ) is in the list # Append CA atom to list #print key for a in atom_for_sup: atom_key=key + '.' + a #print atom_key if a in ref_res and atom_key in atoms_def_in_both: ref_atoms.append(ref_res[a]) #get the ones that are present in native chain_sample={} for sample_chain in sample_model: chain=sample_chain.id if chain not in chain_sample.keys(): chain_sample[chain]=[] for sample_res in sample_chain: if sample_res.get_id()[0] != ' ': #Skip hetatm. continue resname=sample_res.get_id()[1] key=str(resname) + chain if key in common_residues: for a in atom_for_sup: atom_key=key + '.' + a if a in sample_res and atom_key in atoms_def_in_both: chain_sample[chain].append(sample_res[a]) #if key in common_residues: # print key #sample_atoms.append(sample_res['CA']) #common_interface.append(key) assert len(ref_atoms)!=0, "length of native is zero" assert len(sample_atoms)!=0, "length of model is zero" assert len(ref_atoms)==len(sample_atoms), "Different number of atoms in native and model %d %d\n" % (len(ref_atoms),len(sample_atoms)) super_imposer = Bio.PDB.Superimposer() super_imposer.set_atoms(ref_atoms, sample_atoms) super_imposer.apply(sample_model.get_atoms()) # Print RMSD: irms=super_imposer.rms (chain1,chain2)=chain_sample.keys() ligand_chain=chain1 receptor_chain=chain2 len1=len(chain_res[chain1]) len2=len(chain_res[chain2]) assert len1!=0, "%s chain has zero length!\n" % chain1 assert len2!=0, "%s chain has zero length!\n" % chain2 class1='ligand' class2='receptor' if(len(chain_sample[chain1]) > len(chain_sample[chain2])): receptor_chain=chain1 ligand_chain=chain2 class1='receptor' class2='ligand' #print len1 #print len2 #print chain_sample.keys() #Set to align on receptor assert len(chain_ref[receptor_chain])==len(chain_sample[receptor_chain]), "Different number of atoms in native and model receptor (chain %c) %d %d\n" % (receptor_chain,len(chain_ref[receptor_chain]),len(chain_sample[receptor_chain])) super_imposer.set_atoms(chain_ref[receptor_chain], chain_sample[receptor_chain]) super_imposer.apply(sample_model.get_atoms()) receptor_chain_rms=super_imposer.rms #print receptor_chain_rms #print dir(super_imposer) #print chain1_rms #Grep out the transformed ligand coords #print ligand_chain #print chain_ref[ligand_chain] #print chain_sample[ligand_chain] #l1=len(chain_ref[ligand_chain]) #l2=len(chain_sample[ligand_chain]) assert len(chain_ref[ligand_chain])!=0 or len(chain_sample[ligand_chain])!=0, "Zero number of equivalent atoms in native and model ligand (chain %s) %d %d.\nCheck that the residue numbers in model and native is consistent\n" % (ligand_chain,len(chain_ref[ligand_chain]),len(chain_sample[ligand_chain])) assert len(chain_ref[ligand_chain])==len(chain_sample[ligand_chain]), "Different number of atoms in native and model ligand (chain %c) %d %d\n" % (ligand_chain,len(chain_ref[ligand_chain]),len(chain_sample[ligand_chain])) coord1=np.array([atom.coord for atom in chain_ref[ligand_chain]]) coord2=np.array([atom.coord for atom in chain_sample[ligand_chain]]) #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]]) #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]]) #print len(coord1) #print len(coord2) sup=SVDSuperimposer() Lrms = sup._rms(coord1,coord2) #using the private _rms function which does not superimpose #super_imposer.set_atoms(chain_ref[ligand_chain], chain_sample[ligand_chain]) #super_imposer.apply(sample_model.get_atoms()) #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]]) #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]]) #Rrms= sup._rms(coord1,coord2) #should give same result as above line #diff = coord1-coord2 #l = len(diff) #number of atoms #from math import sqrt #print sqrt(sum(sum(diff*diff))/l) #print np.sqrt(np.sum(diff**2)/l) DockQ=(float(fnat) + 1/(1+(irms/1.5)*(irms/1.5)) + 1/(1+(Lrms/8.5)*(Lrms/8.5)))/3 dict={} dict['DockQ']=DockQ dict['irms']=irms dict['Lrms']=Lrms dict['fnat']=fnat dict['nat_correct']=nat_correct dict['nat_total']=nat_total dict['fnonnat']=fnonnat dict['nonnat_count']=nonnat_count dict['model_total']=model_total dict['chain1']=chain1 dict['chain2']=chain2 dict['len1']=len1 dict['len2']=len2 dict['class1']=class1 dict['class2']=class2 return dict