def calculate_mol_similarity(molA, molB): """ Function to calculate the similarity between two oemol objects should be used to utils/openeye.py or openmoltools :param molA: oemol object of molecule A :param molB: oemol object of molecule B :return: float, tanimoto score of the two molecules, between 0 and 1 """ fpA = oegraphsim.OEFingerPrint() fpB = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(fpA, molA, oegraphsim.OEFPType_MACCS166) oegraphsim.OEMakeFP(fpB, molB, oegraphsim.OEFPType_MACCS166) return oegraphsim.OETanimoto(fpA, fpB)
def calculate_fp(self): for mol in self.act_list: fp = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(fp, mol, self.args.fptype) self.fp_list.append(fp)
# Customer is hereby permitted to use, copy, and modify the Sample Code, # subject to these terms. OpenEye claims no rights to Customer's # modifications. Modification of Sample Code is at Customer's sole and # exclusive risk. Sample Code may require Customer to have a then # current license or subscription to the applicable OpenEye offering. # THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. from openeye import oechem from openeye import oegraphsim mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "c1ccccc1") # @ <SNIPPET-MAKE-FP> fpA = oegraphsim.OEFingerPrint() oegraphsim.OEMakePathFP(fpA, mol) fpB = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(fpB, mol, fpA.GetFPTypeBase()) # @ </SNIPPET-MAKE-FP> if oegraphsim.OEIsSameFPType(fpA, fpB): print("same fingerprint types") else: print("different fingerprint types")
from openeye import oechem from openeye import oegraphsim mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "c1ccccc1") # @ <SNIPPET-MAKE-PATH-1> fp = oegraphsim.OEFingerPrint() oegraphsim.OEMakePathFP(fp, mol) # @ </SNIPPET-MAKE-PATH-1> print(fp.GetFPTypeBase().GetFPTypeString()) # @ <SNIPPET-MAKE-PATH-2> oegraphsim.OEMakeFP(fp, mol, oegraphsim.OEFPType_Path) # @ </SNIPPET-MAKE-PATH-2> print(fp.GetFPTypeBase().GetFPTypeString()) # @ <SNIPPET-MAKE-PATH-3> numbits = 1024 minbonds = 0 maxbonds = 5 oegraphsim.OEMakePathFP(fp, mol, numbits, minbonds, maxbonds, oegraphsim.OEFPAtomType_DefaultPathAtom, oegraphsim.OEFPBondType_DefaultPathBond) # @ </SNIPPET-MAKE-PATH-3> print(fp.GetFPTypeBase().GetFPTypeString())
# liable for any damages or liability in connection with the Sample Code # or its use. from openeye import oechem from openeye import oegraphsim import sys if len(sys.argv) != 3: oechem.OEThrow.Usage("%s <queryfile> <targetfile>" % sys.argv[0]) ifs = oechem.oemolistream() if not ifs.open(sys.argv[1]): oechem.OEThrow.Fatal("Unable to open %s for reading" % sys.argv[1]) qmol = oechem.OEGraphMol() oechem.OEReadMolecule(ifs, qmol) qfp = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(qfp, qmol, oegraphsim.OEFPType_Path) if not ifs.open(sys.argv[2]): oechem.OEThrow.Fatal("Unable to open %s for reading" % sys.argv[2]) # @ <SNIPPET-DB-SIM-CALC-FROM-FILE> fpdb = oegraphsim.OEFPDatabase(qfp.GetFPTypeBase()) for tmol in ifs.GetOEGraphMols(): fpdb.AddFP(tmol) for score in fpdb.GetScores(qfp): print("%.3f" % score.GetScore()) # @ </SNIPPET-DB-SIM-CALC-FROM-FILE>
# Customer is hereby permitted to use, copy, and modify the Sample Code, # subject to these terms. OpenEye claims no rights to Customer's # modifications. Modification of Sample Code is at Customer's sole and # exclusive risk. Sample Code may require Customer to have a then # current license or subscription to the applicable OpenEye offering. # THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. from openeye import oechem from openeye import oegraphsim mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "c1ccccc1") # @ <SNIPPET-MAKE-LINGO-1> fp = oegraphsim.OEFingerPrint() oegraphsim.OEMakeLingoFP(fp, mol) # @ </SNIPPET-MAKE-LINGO-1> print(fp.GetFPTypeBase().GetFPTypeString()) # @ <SNIPPET-MAKE-LINGO-2> oegraphsim.OEMakeFP(fp, mol, oegraphsim.OEFPType_Lingo) # @ </SNIPPET-MAKE-LINGO-2> print(fp.GetFPTypeBase().GetFPTypeString())
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. from openeye import oechem from openeye import oegraphsim # @ <SNIPPET-BIT-STRING> def GetBitString(fp): bitstring = '' for b in range(0, fp.GetSize()): if fp.IsBitOn(b): bitstring += '1' else: bitstring += '0' return bitstring # @ </SNIPPET-BIT-STRING> mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "c1ccncc1") fp = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(fp, mol, oegraphsim.OEFPType_MACCS166) print(GetBitString(fp))
def cluster_similar_molecules( smiles, fingerprint_type=oegraphsim.OEFPType_Tree, eps=0.5, min_samples=2 ): """The method attempts to cluster a sets of molecules based on their similarity using a Tanimoto distance metric and the `sklearn` DBSCAN clustering code. Notes ----- This is based on the code by David Mobley: https://github.com/openforcefield/release-1-benchmarking/blob/master/QM_molecule_selection/divide_sets.ipynb Parameters ---------- smiles: list of str The SMILES representations of the molecules to cluster. fingerprint_type The type of molecular fingerprint to use. eps: float The `eps` parameter to pass as an argument to DBSCAN while clustering. min_samples: int The `min_samples` parameter to pass as an argument to DBSCAN while clustering. Returns ------- dict of str and list of str The clustered SMILES patterns. """ assert isinstance(smiles, list) # Build fingerprints fingerprints = {} for smiles_pattern in smiles: oe_molecule = oechem.OEMol() oechem.OEParseSmiles(oe_molecule, smiles_pattern) fingerprint = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(fingerprint, oe_molecule, fingerprint_type) fingerprints[smiles_pattern] = fingerprint # Build a similarity matrix distance_matrix = np.zeros((len(smiles), len(smiles))) for i, smiles_i in enumerate(smiles): for j, smiles_j in enumerate(smiles): if i == j: continue distance_matrix[i, j] = 1.0 - oegraphsim.OETanimoto( fingerprints[smiles_i], fingerprints[smiles_j] ) # Cluster the data clustering = DBSCAN(eps=eps, min_samples=min_samples, metric="precomputed") clustered_smiles = clustering.fit(distance_matrix) labels = clustered_smiles.labels_ smiles_by_cluster = {} for label in set(labels): smiles_by_cluster[label] = [ smiles[i] for i, x in enumerate(labels) if x == label ] return smiles_by_cluster
fpA = oegraphsim.OEFingerPrint() molB = oechem.OEGraphMol() oechem.OESmilesToMol(molB, "COc1cc2ccc(cc2c(=O)o1)NC(=N)N") fpB = oegraphsim.OEFingerPrint() molC = oechem.OEGraphMol() oechem.OESmilesToMol(molC, "COc1c(c2ccc(cc2c(=O)o1)NC(=N)N)Cl") fpC = oegraphsim.OEFingerPrint() # @ <SNIPPET-FP-YULE-1> def CalculateYule(fpA, fpB): onlyA, onlyB, bothAB, neitherAB = oechem.OEGetBitCounts(fpA, fpB) yule = float(bothAB * neitherAB - onlyA * onlyB) yule /= float(bothAB * neitherAB + onlyA * onlyB) return yule # @ </SNIPPET-FP-YULE-1> # @ <SNIPPET-FP-YULE-2> oegraphsim.OEMakeFP(fpA, molA, oegraphsim.OEFPType_Path) oegraphsim.OEMakeFP(fpB, molB, oegraphsim.OEFPType_Path) oegraphsim.OEMakeFP(fpC, molC, oegraphsim.OEFPType_Path) print("Yule(A,B) = %.3f" % CalculateYule(fpA, fpB)) print("Yule(A,C) = %.3f" % CalculateYule(fpA, fpC)) print("Yule(B,C) = %.3f" % CalculateYule(fpB, fpC)) # @ </SNIPPET-FP-YULE-2>
# Customer is hereby permitted to use, copy, and modify the Sample Code, # subject to these terms. OpenEye claims no rights to Customer's # modifications. Modification of Sample Code is at Customer's sole and # exclusive risk. Sample Code may require Customer to have a then # current license or subscription to the applicable OpenEye offering. # THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. from openeye import oechem from openeye import oegraphsim mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "c1ccccc1") fpA = oegraphsim.OEFingerPrint() fpB = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(fpA, mol, oegraphsim.OEFPType_Path) oegraphsim.OEMakeFP(fpB, mol, oegraphsim.OEFPType_Path) # @ <SNIPPET-FP-COMPARE> if fpA == fpB: print("same fingerprints") else: print("different fingerprints") # @ </SNIPPET-FP-COMPARE>
from openeye import oechem from openeye import oegraphsim mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "c1ccccc1") # @ <SNIPPET-MAKE-CIRCULAR-1> fp = oegraphsim.OEFingerPrint() oegraphsim.OEMakeCircularFP(fp, mol) # @ </SNIPPET-MAKE-CIRCULAR-1> print(fp.GetFPTypeBase().GetFPTypeString()) # @ <SNIPPET-MAKE-CIRCULAR-2> oegraphsim.OEMakeFP(fp, mol, oegraphsim.OEFPType_Circular) # @ </SNIPPET-MAKE-CIRCULAR-2> print(fp.GetFPTypeBase().GetFPTypeString()) # @ <SNIPPET-MAKE-CIRCULAR-3> numbits = 1024 minradius = 0 maxradius = 3 oegraphsim.OEMakeCircularFP(fp, mol, numbits, minradius, maxradius, oegraphsim.OEFPAtomType_DefaultCircularAtom, oegraphsim.OEFPBondType_DefaultCircularBond) # @ </SNIPPET-MAKE-CIRCULAR-3> print(fp.GetFPTypeBase().GetFPTypeString())
from openeye import oechem from openeye import oegraphsim mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "c1ccccc1") # @ <SNIPPET-MAKE-TREE-1> fp = oegraphsim.OEFingerPrint() oegraphsim.OEMakeTreeFP(fp, mol) # @ </SNIPPET-MAKE-TREE-1> print(fp.GetFPTypeBase().GetFPTypeString()) # @ <SNIPPET-MAKE-TREE-2> oegraphsim.OEMakeFP(fp, mol, oegraphsim.OEFPType_Tree) # @ </SNIPPET-MAKE-TREE-2> print(fp.GetFPTypeBase().GetFPTypeString()) # @ <SNIPPET-MAKE-TREE-3> numbits = 1024 minbonds = 0 maxbonds = 5 oegraphsim.OEMakeTreeFP(fp, mol, numbits, minbonds, maxbonds, oegraphsim.OEFPAtomType_DefaultTreeAtom, oegraphsim.OEFPBondType_DefaultTreeBond) # @ </SNIPPET-MAKE-TREE-3> print(fp.GetFPTypeBase().GetFPTypeString())
# current license or subscription to the applicable OpenEye offering. # THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. from openeye import oechem from openeye import oegraphsim # @ <SNIPPET-FP-TANIMOTO> molA = oechem.OEGraphMol() oechem.OESmilesToMol(molA, "c1ccc2c(c1)c(c(oc2=O)OCCSC(=N)N)Cl") fpA = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(fpA, molA, oegraphsim.OEFPType_MACCS166) molB = oechem.OEGraphMol() oechem.OESmilesToMol(molB, "COc1cc2ccc(cc2c(=O)o1)NC(=N)N") fpB = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(fpB, molB, oegraphsim.OEFPType_MACCS166) molC = oechem.OEGraphMol() oechem.OESmilesToMol(molC, "COc1c(c2ccc(cc2c(=O)o1)NC(=N)N)Cl") fpC = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(fpC, molC, oegraphsim.OEFPType_MACCS166) print("Tanimoto(A,B) = %.3f" % oegraphsim.OETanimoto(fpA, fpB)) print("Tanimoto(A,C) = %.3f" % oegraphsim.OETanimoto(fpA, fpC)) print("Tanimoto(B,C) = %.3f" % oegraphsim.OETanimoto(fpB, fpC)) # @ </SNIPPET-FP-TANIMOTO>