def gen_RMSD_matrix(mol_list): """Input: list of molecule filenames Output: dictionary, where keys are mol names and their values are lists with RMSD values""" #Set up OBAling object align = openbabel.OBAlign() #Convert to OBMol-s PyMols = [pybel.readfile("mopout", filename).next() for filename in mol_list] rows = [[" "] + mol_list] #all rows #loop for mol in PyMols: row = [mol.OBMol.GetTitle()] #setup reference align.SetRefMol(mol.OBMol) #inner loop for mol_in in PyMols: #setup target align.SetTargetMol(mol_in.OBMol) if align.Align(): #if alignment is possible rmsd = align.GetRMSD() row.append(rmsd) #add to list #end of inner loop rows.append(row) #end of outer loop return rows
def uniform_labels(self, mol1, mol2): """ Pair the geometrically equivalent atoms of the molecules. Calculate RMSD on all possible isomorphism mappings and return mapping with the least RMSD Args: mol1: First molecule. OpenBabel OBMol or pymatgen Molecule object. mol2: Second molecule. OpenBabel OBMol or pymatgen Molecule object. Returns: (list1, list2) if uniform atom order is found. list1 and list2 are for mol1 and mol2, respectively. Their length equal to the number of atoms. They represents the uniform atom order of the two molecules. The value of each element is the original atom index in mol1 or mol2 of the current atom in uniform atom order. (None, None) if unform atom is not available. """ obmol1 = BabelMolAdaptor(mol1).openbabel_mol obmol2 = BabelMolAdaptor(mol2).openbabel_mol h1 = self.get_molecule_hash(obmol1) h2 = self.get_molecule_hash(obmol2) if h1 != h2: return None, None query = ob.CompileMoleculeQuery(obmol1) isomapper = ob.OBIsomorphismMapper.GetInstance(query) isomorph = ob.vvpairUIntUInt() isomapper.MapAll(obmol2, isomorph) sorted_isomorph = [sorted(x, key=lambda morp: morp[0]) for x in isomorph] label2_list = tuple([tuple([p[1] + 1 for p in x]) for x in sorted_isomorph]) vmol1 = obmol1 aligner = ob.OBAlign(True, False) aligner.SetRefMol(vmol1) least_rmsd = float("Inf") best_label2 = None label1 = list(range(1, obmol1.NumAtoms() + 1)) # noinspection PyProtectedMember elements1 = InchiMolAtomMapper._get_elements(vmol1, label1) for label2 in label2_list: # noinspection PyProtectedMember elements2 = InchiMolAtomMapper._get_elements(obmol2, label2) if elements1 != elements2: continue vmol2 = ob.OBMol() for i in label2: vmol2.AddAtom(obmol2.GetAtom(i)) aligner.SetTargetMol(vmol2) aligner.Align() rmsd = aligner.GetRMSD() if rmsd < least_rmsd: least_rmsd = rmsd best_label2 = copy.copy(label2) return label1, best_label2
def prune(pybel_list, min_RMSD): """ iterates over all pairs and deletes second molecule in pair if RMSD between paired molecules is less then min_RMSD """ #Set up OBAling object align = openbabel.OBAlign() #Loop i = 0 total_removed = 0 while i < len(pybel_list): referens = pybel_list[i].OBMol #reference align.SetRefMol(referens) j = i + 1 while j < len(pybel_list): target = pybel_list[j].OBMol #target align.SetTargetMol(target) #Align and ret rmsd if align.Align(): rmsd = align.GetRMSD() if rmsd < min_RMSD: pybel_list.pop(j) #remove from both lists total_removed += 1 else: j = j + 1 else: print "Couldn't align" raise Exception() #end of inner loop i = i + 1 #end of outer loop print "finished deleting, total number of \ removed conformers is", total_removed return pybel_list
def superimpose(mol1, mol2): """Superimposes second molecule and returns rmsd""" align = openbabel.OBAlign(mol1.OBMol, mol2.OBMol) if align.Align(): align.UpdateCoords(mol2.OBMol) return align.GetRMSD() else: raise Exception("Couldn't align, probably different molecules")
def check(mol, rmsd): align = ob.OBAlign() align.SetRefMol(mol.OBMol) for f in os.listdir("."): if f[-4:] == ".out": align.SetTargetMol(pybel.readfile("mopout", f).next().OBMol) if align.Align(): if align.GetRMSD() < rmsd: print f, align.GetRMSD()
def compare_rmsd_dic(compar_pymols, reference): dic = {} align = ob.OBAlign() align.SetRefMol(reference.OBMol) for pymol in compar_pymols: align.SetTargetMol(pymol.OBMol) if align.Align(): dic[pymol] = align.GetRMSD() return dic
def rmsd(mol1, mol2): a = next(pybel.readfile("xyz", mol1)) b = next(pybel.readfile("xyz", mol2)) align = openbabel.OBAlign(False, True) align.SetRefMol(a.OBMol) align.SetTargetMol(b.OBMol) align.Align() return align.GetRMSD()
def rmsd(target, ref, invalid): if target in invalid: return np.nan #0.0 else: a = next(pybel.readfile("xyz", ref)) b = next(pybel.readfile("xyz", target)) align = openbabel.OBAlign(False, True) align.SetRefMol(a.OBMol) align.SetTargetMol(b.OBMol) align.Align() return align.GetRMSD()
def get_rmsd(ref, tar, H=False, sym=True): #openbabel r_name, r_ext = os.path.splitext(ref) obconversion = ob.OBConversion() obconversion.SetInFormat(r_ext) obmol = ob.OBMol() this = obconversion.ReadFile(obmol, ref) t_name, t_ext = os.path.splitext(tar) obconversion2 = ob.OBConversion() obconversion2.SetInFormat(t_ext) obmol2 = ob.OBMol() that = obconversion.ReadFile(obmol2, tar) aligner = ob.OBAlign(H, sym) # includeH, symmetry aligner.SetMethod(1) aligner.SetRefMol(obmol) aligner.SetTargetMol(obmol2) aligner.Align() rms = aligner.GetRMSD() return rms
def removeClose(mol_list, minRMSD): """Input: list of pybel molecules, minRMSD Checks RMSD between every pair in list. If their RMSD is lower, then minRMSD - removes 2nd filename from list. Prints, how much each molecule had similair conformers. Returns processed list""" #Set up OBAling object align = openbabel.OBAlign() #Convert to OBMol-s OBMols = to_OBMol(mol_list) print "Finished converting" #Loop i = 0 total_removed = 0 while i < len(mol_list): referens = OBMols[i] #reference align.SetRefMol(referens) j = i + 1 removed_confs = 0 while j < len(mol_list): target = OBMols[j] #target align.SetTargetMol(target) #Align and ret rmsd if align.Align(): rmsd = align.GetRMSD() if rmsd < minRMSD: os.unlink(mol_list[j]) #delete file mol_list.pop(j) #remove from both lists OBMols.pop(j) removed_confs += 1 else: j = j + 1 else: print "Couldn't align" raise Exception() #end of inner loop print "Molecule",mol_list[i],"had",removed_confs,"similair conformers." i = i + 1 total_removed += removed_confs #end of outer loop print "finished deleting, total number of removed conformers is",total_removed
def _calc_rms(self, mol1, mol2, clabel1, clabel2): """ Calculate the RMSD. Args: mol1: The first molecule. OpenBabel OBMol or pymatgen Molecule object mol2: The second molecule. OpenBabel OBMol or pymatgen Molecule object clabel1: The atom indices that can reorder the first molecule to uniform atom order clabel1: The atom indices that can reorder the second molecule to uniform atom order Returns: The RMSD. """ obmol1 = BabelMolAdaptor(mol1).openbabel_mol obmol2 = BabelMolAdaptor(mol2).openbabel_mol cmol1 = ob.OBMol() for i in clabel1: oa1 = obmol1.GetAtom(i) a1 = cmol1.NewAtom() a1.SetAtomicNum(oa1.GetAtomicNum()) a1.SetVector(oa1.GetVector()) cmol2 = ob.OBMol() for i in clabel2: oa2 = obmol2.GetAtom(i) a2 = cmol2.NewAtom() a2.SetAtomicNum(oa2.GetAtomicNum()) a2.SetVector(oa2.GetVector()) aligner = ob.OBAlign(True, False) aligner.SetRefMol(cmol1) aligner.SetTargetMol(cmol2) aligner.Align() return aligner.GetRMSD()
def find_duplicates_and_unique(pybel_list, min_rmsd, stict=False, energ_dif=0.1): """return two lists with unique and duplicate molecules in pybel format""" align = openbabel.OBAlign() i = 0 duplicate_list = [] #iterate over all while i < len(pybel_list): align.SetRefMol(pybel_list[i].OBMol) j = i + 1 #iterate over reference pairs print "Molecule " + get_filename( pybel_list[i].title) + " has been set." while j < len(pybel_list): align.SetTargetMol(pybel_list[j].OBMol) if align.Align(): rmsd = align.GetRMSD() if rmsd < min_rmsd: if stict: if abs(pybel_list[i].energy - pybel_list[j].energy) < energ_dif: print pybel_list[i].title + " and " + pybel_list[ j].title + " are simmilair" print "Enegy difference: " + str( pybel_list[i].energy - pybel_list[j].energy) print "RMSD score: " + str(rmsd) print "Not copying " + pybel_list[j].title duplicate_list.append(pybel_list.pop(j)) else: j = j + 1 else: print "Couldn't align" raise Exception() i += 1 print "Total number of duplicates: " + str(len(duplicate_list)) print "Molecules remaining: " + str(len(pybel_list)) return pybel_list, duplicate_list
def removeClose(sdf_filename, minRMSD): """""" align = openbabel.OBAlign() pyMols = to_pyMol(sdf_filename) print "Finished converting" #Loop i = 0 total_removed = 0 while i < len(pyMols): referens = pyMols[i].OBMol #reference align.SetRefMol(referens) j = i + 1 removed_confs = 0 while j < len(pyMols): target = pyMols[j].OBMol #target align.SetTargetMol(target) #Align and ret rmsd if align.Align(): rmsd = align.GetRMSD() if rmsd < minRMSD: pyMols.pop(j) removed_confs += 1 else: j = j + 1 else: print "Couldn't align" raise Exception() #end of inner loop total_removed += removed_confs i += 1 #end of outer loop print "finished deleting, total number of removed conformers is", total_removed #Create SDF file out_name = os.path.splitext(sdf_filename)[0] + '_pruned' + str(minRMSD) +\ '.sdf' out_mol = pybel.Outputfile('sdf', out_name) for pymol in pyMols: out_mol.write(pymol) out_mol.close() return out_name
def cluster(ob_rep_mols): """Function creates dictionary, which keys are representive molecules and values are dictionaries, which contain molecule names as keys and RMSD scores as values.""" #Firstly dictionary, which represents clusters is created. #It's done by zipping 2 lists - one with rep. molecules, other with empty dictionaries dic = dict(zip(ob_rep_mols, [{} for i in ob_rep_mols])) #Then OBAlign object is initialized align = ob.OBAlign(False, False) #After that function loops over all files in directory for f in os.listdir("."): #If file name ends with .out, it indicates, that it is mopout file if f[-4:] == ".out": #Function converts it and uses as reference in OBAlign object align.SetRefMol(pybel.readfile("mopout", f).next().OBMol) #Then we need to find, to which cluster belongs given molecule #For that we setup two vaiables ref_m = None #Ref molecule, to which molecule in dir belongs rmsd_m = 1000 #And RMSD score between them #And start loop for ref in ob_rep_mols: #It loops over representative molecules and set ups them as target molecules #in OBAling file align.SetTargetMol(ref) #Then it checks RMSD between them (if it is possible) if align.Align(): rmsd = align.GetRMSD() if rmsd < rmsd_m: rmsd_m = rmsd ref_m = ref #Inner loop end #After end of loop molecule in directory goes to dictionary as the value of that #representative molecule, to which it is more similair #RMSD between them is also saved print "Min rmsd:", rmsd_m dic[ref_m][f] = rmsd_m #Loop over all files in directory ends #Dictionary is populated and returned return dic
def removeClose(mol_list, minRMSD): """Input: list of molecule filenames, minRMSD Checks RMSD between every pair in list. If their RMSD is lower, then minRMSD - removes 2nd filename from list. Prints, how much each molecule had similair conformers. Returns processed list""" #Set up OBAling object align = openbabel.OBAlign() #Convert to OBMol-s OBMols = to_OBMol(mol_list) print "Finished converting" #Loop i = 0 while i < len(mol_list): referens = OBMols[i] #reference align.SetRefMol(referens) j = i + 1 removed_confs = 0 while j < len(mol_list): target = OBMols[j] #target align.SetTargetMol(target) #Align and ret rmsd align.Align() rmsd = align.GetRMSD() if rmsd < minRMSD: mol_list.pop(j) #remove from both lists OBMols.pop(j) removed_confs += 1 else: j = j + 1 #end of inner loop print "Molecule", mol_list[ i], "had", removed_confs, "similair conformers." i = i + 1 #end of outer loop return mol_list
def _align_hydrogen_atoms(mol1, mol2, heavy_indices1, heavy_indices2): """ Align the label of topologically identical atoms of second molecule towards first molecule Args: mol1: First molecule. OpenBabel OBMol object mol2: Second molecule. OpenBabel OBMol object heavy_indices1: inchi label map of the first molecule heavy_indices2: label map of the second molecule Return: corrected label map of all atoms of the second molecule """ num_atoms = mol2.NumAtoms() all_atom = set(range(1, num_atoms + 1)) hydrogen_atoms1 = all_atom - set(heavy_indices1) hydrogen_atoms2 = all_atom - set(heavy_indices2) label1 = heavy_indices1 + tuple(hydrogen_atoms1) label2 = heavy_indices2 + tuple(hydrogen_atoms2) cmol1 = ob.OBMol() for i in label1: oa1 = mol1.GetAtom(i) a1 = cmol1.NewAtom() a1.SetAtomicNum(oa1.GetAtomicNum()) a1.SetVector(oa1.GetVector()) cmol2 = ob.OBMol() for i in label2: oa2 = mol2.GetAtom(i) a2 = cmol2.NewAtom() a2.SetAtomicNum(oa2.GetAtomicNum()) a2.SetVector(oa2.GetVector()) aligner = ob.OBAlign(False, False) aligner.SetRefMol(cmol1) aligner.SetTargetMol(cmol2) aligner.Align() aligner.UpdateCoords(cmol2) hydrogen_label2 = [] hydrogen_label1 = list(range(len(heavy_indices1) + 1, num_atoms + 1)) for h2 in range(len(heavy_indices2) + 1, num_atoms + 1): distance = 99999.0 idx = hydrogen_label1[0] a2 = cmol2.GetAtom(h2) for h1 in hydrogen_label1: a1 = cmol1.GetAtom(h1) d = a1.GetDistance(a2) if d < distance: distance = d idx = h1 hydrogen_label2.append(idx) hydrogen_label1.remove(idx) hydrogen_orig_idx2 = label2[len(heavy_indices2):] hydrogen_canon_orig_map2 = [ (canon, orig) for canon, orig in zip(hydrogen_label2, hydrogen_orig_idx2) ] hydrogen_canon_orig_map2.sort(key=lambda m: m[0]) hydrogen_canon_indices2 = [x[1] for x in hydrogen_canon_orig_map2] canon_label1 = label1 canon_label2 = heavy_indices2 + tuple(hydrogen_canon_indices2) return canon_label1, canon_label2
def align(self, inc_hyd = False, symmetry = False, filt = False): """ Align two structures via OpenBabel. :param inc_hyd: include hydrogens :type inc_hyd: bool :param symmetry: consider symmetry of the molecule :type symmetry: bool :param filt: invoke primitive filter :type filt: bool :raises: SetupError """ if not self.ref_file: raise errors.SetupError('reference file not set yet') if self.ref_file == self.mol_file: logger.write('Identical files: will not perform alignment') return conv = ob.OBConversion() conv.SetInAndOutFormats(self.ref_fmt, self.mol_fmt) ref = ob.OBMol() tgt = ob.OBMol() # ignore warning messages about non-standard PDB errlev = ob.obErrorLog.GetOutputLevel() ob.obErrorLog.SetOutputLevel(0) conv.ReadFile(ref, self.ref_file) conv.ReadFile(tgt, self.mol_file) ob.obErrorLog.SetOutputLevel(errlev) if filt: # delete unwanted atoms in reference delat = [] for atom in ob.OBMolAtomIter(ref): resname = atom.GetResidue().GetName() # FIXME: replace with proper function if resname in const.IGNORE_RESIDUES: delat.append(atom) ref.BeginModify() for atom in delat: ref.DeleteAtom(atom, True) ref.EndModify() # copy wanted atoms from target to new OBMol cpy = ob.OBMol() for atom in ob.OBMolAtomIter(tgt): resname = atom.GetResidue().GetName() # FIXME: replace with proper function if resname not in const.IGNORE_RESIDUES: # NOTE: this copies only some info but incl. coordinates cpy.AddAtom(atom) molecs = ob.OBAlign(ref, cpy, inc_hyd, symmetry) else: molecs = ob.OBAlign(ref, tgt, inc_hyd, symmetry) logger.write('Aligning %s with %s as reference' % (self.mol_file, self.ref_file) ) if not molecs.Align(): logger.write('Alignment failed') return logger.write('RMSD is %.2f' % molecs.GetRMSD() ) if filt: rotate = molecs.GetRotMatrix() molecs.UpdateCoords(ref) first = True for atom in ob.OBMolAtomIter(tgt): tmpvec = ob.vector3(atom.GetVector()) tmpvec *= rotate if first: # we obviously can't do this directly: OB's vector3 does not # support the '-' but only the '-=' operator. But the # latter leads to a memory corruption bug... # shift = ref.GetAtom(1).GetVector() # shift -= cpy.GetAtom(1).GetVector() # we assume atoms 1 are equivalent in both molecules... v1 = ref.GetAtom(1).GetVector() v2 = cpy.GetAtom(1).GetVector() x = v1.GetX() - v2.GetX() y = v1.GetY() - v2.GetY() z = v1.GetZ() - v2.GetZ() shift = ob.vector3(x, y, z) first = False tmpvec += shift atom.SetVector(tmpvec) else: if not molecs.UpdateCoords(tgt): logger.write('Coordinate update failed') return try: conv.WriteFile(tgt, self.mol_file) except IOError as why: raise errors.SetupError(why) self.mol_atomtype = 'sybyl'
def _align_heavy_atoms(mol1, mol2, vmol1, vmol2, ilabel1, ilabel2, eq_atoms): """ Align the label of topologically identical atoms of second molecule towards first molecule Args: mol1: First molecule. OpenBabel OBMol object mol2: Second molecule. OpenBabel OBMol object vmol1: First virtual molecule constructed by centroids. OpenBabel OBMol object vmol2: First virtual molecule constructed by centroids. OpenBabel OBMol object ilabel1: inchi label map of the first molecule ilabel2: inchi label map of the second molecule eq_atoms: equivalent atom lables Return: corrected inchi labels of heavy atoms of the second molecule """ nvirtual = vmol1.NumAtoms() nheavy = len(ilabel1) for i in ilabel2: # add all heavy atoms a1 = vmol1.NewAtom() a1.SetAtomicNum(1) a1.SetVector(0.0, 0.0, 0.0) # useless, just to pair with vmol2 oa2 = mol2.GetAtom(i) a2 = vmol2.NewAtom() a2.SetAtomicNum(1) # align using the virtual atoms, these atoms are not # used to align, but match by positions a2.SetVector(oa2.GetVector()) aligner = ob.OBAlign(False, False) aligner.SetRefMol(vmol1) aligner.SetTargetMol(vmol2) aligner.Align() aligner.UpdateCoords(vmol2) canon_mol1 = ob.OBMol() for i in ilabel1: oa1 = mol1.GetAtom(i) a1 = canon_mol1.NewAtom() a1.SetAtomicNum(oa1.GetAtomicNum()) a1.SetVector(oa1.GetVector()) aligned_mol2 = ob.OBMol() for i in range(nvirtual + 1, nvirtual + nheavy + 1): oa2 = vmol2.GetAtom(i) a2 = aligned_mol2.NewAtom() a2.SetAtomicNum(oa2.GetAtomicNum()) a2.SetVector(oa2.GetVector()) canon_label2 = list(range(1, nheavy + 1)) for symm in eq_atoms: for i in symm: canon_label2[i - 1] = -1 for symm in eq_atoms: candidates1 = list(symm) candidates2 = list(symm) for c2 in candidates2: distance = 99999.0 canon_idx = candidates1[0] a2 = aligned_mol2.GetAtom(c2) for c1 in candidates1: a1 = canon_mol1.GetAtom(c1) d = a1.GetDistance(a2) if d < distance: distance = d canon_idx = c1 canon_label2[c2 - 1] = canon_idx candidates1.remove(canon_idx) canon_inchi_orig_map2 = [ (canon, inchi, orig) for canon, inchi, orig in zip( canon_label2, list(range(1, nheavy + 1)), ilabel2) ] canon_inchi_orig_map2.sort(key=lambda m: m[0]) heavy_atom_indices2 = tuple([x[2] for x in canon_inchi_orig_map2]) return heavy_atom_indices2
def align_ligand(dummies, ligand): # fit dummy atoms of ligand to defined positions log.info('Aligning ligand dummy atoms to desired dummy atoms...') # 0.9 create local copy, as this function would otherwise modify the given ligand aligned_ligand = openbabel.OBMol(ligand) # 1.0 get dummy atoms from ligand log.debug('... get dummy atoms of ligand') ligand_dummies = get_dummies(ligand) # 1.1 get translation vector from read-in position to origin log.info('... determing translation vector from read-in to origin') translation = ligand_dummies.Center(1) ## DEBUG #obconversion = openbabel.OBConversion() #obconversion.SetOutFormat("pdb") #obconversion.WriteFile(ligand_dummies,"ligand_dummies_centered.pdb") # 1.2 initialize OBAlign for alignment to final destination log.info('... doing the alignment for dummy atoms') aligner = openbabel.OBAlign(dummies, ligand_dummies) success=aligner.Align() if success == False: return None, None #log.info('... done.') rmsd=aligner.GetRMSD() log.debug('RMSD of alignment: ' + str(rmsd)) ## 1.2.1 get Rotation Matrix for alignment log.info('... determining the rotation matrix') rotation_matrix = aligner.GetRotMatrix() rot = openbabel.double_array([1,2,3,4,5,6,7,8,9]) rotation_matrix.GetArray(rot) # .. only for debugging: arewedebugging = log.getLogger() if arewedebugging.isEnabledFor(logging.DEBUG): log.debug('--- rotation matrix ---') for i in range(0,9): log.debug(str(i)+ " : " + str(rot[i])) # 1.3 generate positioning vector ## NB: centering would not work because of rotation!! ## update cooordinates to new value aligner.UpdateCoords(ligand_dummies) log.info('... generating positioning vector') positioning = openbabel.vector3() ## calculate the vector for positioning to destination n = 0 for atom in openbabel.OBMolAtomIter(ligand_dummies): n += 1 positioning += atom.GetVector() positioning /= n # 1.4 move all ligand atoms to fit the pose the dummy atoms have been aligned to ## 1.4.1 generate inverted translation vector to translate ligand to origin translation_to_origin = translation translation_to_origin *= -1 ## 1.4.2 generate inverted rotation matrix to reconstruct alignment results rotation_inversion = rotation_matrix.transpose() rot_inv = openbabel.double_array([1,2,3,4,5,6,7,8,9]) rotation_inversion.GetArray(rot_inv) ## 1.4.3 apply translation to origin, rotation, and translation to final destination aligned_ligand.Translate(translation_to_origin) aligned_ligand.Rotate(rot_inv) aligned_ligand.Translate(positioning) ## 1.5 clean output ligand of dummy atoms, if desired if remove_dummies: log.info('Cleaning the ligand of unwanted dummies...') _temp_atom = [] for atom in openbabel.OBMolAtomIter(aligned_ligand): #aligned_ligand.AddResidue(atom.GetResidue()) #aligned_ligand.AddAtom(atom) if atom.GetAtomicNum() == 0: _temp_atom.append(atom) for a in _temp_atom: aligned_ligand.DeleteAtom(a) #else: #aligned_ligand = ligand log.info('... returning the aligned ligand.') return aligned_ligand, rmsd