def PrintTanimoto(molA, molB, minb, maxb): fpA = oegraphsim.OEFingerPrint() fpB = oegraphsim.OEFingerPrint() numbits = 2048 atype = oegraphsim.OEFPAtomType_DefaultPathAtom btype = oegraphsim.OEFPBondType_DefaultPathBond oegraphsim.OEMakePathFP(fpA, molA, numbits, minb, maxb, atype, btype) oegraphsim.OEMakePathFP(fpB, molB, numbits, minb, maxb, atype, btype) print("Tanimoto(A,B) = %.3f" % oegraphsim.OETanimoto(fpA, fpB))
def PrintTanimoto(molA, molB, atype, btype): fpA = oegraphsim.OEFingerPrint() fpB = oegraphsim.OEFingerPrint() numbits = 2048 minb = 0 maxb = 5 oegraphsim.OEMakePathFP(fpA, molA, numbits, minb, maxb, atype, btype) oegraphsim.OEMakePathFP(fpB, molB, numbits, minb, maxb, atype, btype) print("Tanimoto(A,B) = %.3f" % oegraphsim.OETanimoto(fpA, fpB))
def calculate_mol_similarity(molA, molB): """ Function to calculate the similarity between two oemol objects should be used to utils/openeye.py or openmoltools :param molA: oemol object of molecule A :param molB: oemol object of molecule B :return: float, tanimoto score of the two molecules, between 0 and 1 """ fpA = oegraphsim.OEFingerPrint() fpB = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(fpA, molA, oegraphsim.OEFPType_MACCS166) oegraphsim.OEMakeFP(fpB, molB, oegraphsim.OEFPType_MACCS166) return oegraphsim.OETanimoto(fpA, fpB)
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. import sys from openeye import oechem from openeye import oegraphsim # @ <SNIPPET-CALC-FROM-FILE> if len(sys.argv) != 3: oechem.OEThrow.Usage("%s <queryfile> <targetfile>" % sys.argv[0]) ifs = oechem.oemolistream() if not ifs.open(sys.argv[1]): oechem.OEThrow.Fatal("Unable to open %s for reading" % sys.argv[1]) qmol = oechem.OEGraphMol() if not oechem.OEReadMolecule(ifs, qmol): oechem.OEThrow.Fatal("Unable to read query molecule") qfp = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(qfp, qmol, oegraphsim.OEFPType_Path) if not ifs.open(sys.argv[2]): oechem.OEThrow.Fatal("Unable to open %s for reading" % sys.argv[2]) tfp = oegraphsim.OEFingerPrint() for tmol in ifs.GetOEGraphMols(): oegraphsim.OEMakeFP(tfp, tmol, oegraphsim.OEFPType_Path) print("%.3f" % oegraphsim.OETanimoto(qfp, tfp)) # @ </SNIPPET-CALC-FROM-FILE>
def cluster_similar_molecules( smiles, fingerprint_type=oegraphsim.OEFPType_Tree, eps=0.5, min_samples=2 ): """The method attempts to cluster a sets of molecules based on their similarity using a Tanimoto distance metric and the `sklearn` DBSCAN clustering code. Notes ----- This is based on the code by David Mobley: https://github.com/openforcefield/release-1-benchmarking/blob/master/QM_molecule_selection/divide_sets.ipynb Parameters ---------- smiles: list of str The SMILES representations of the molecules to cluster. fingerprint_type The type of molecular fingerprint to use. eps: float The `eps` parameter to pass as an argument to DBSCAN while clustering. min_samples: int The `min_samples` parameter to pass as an argument to DBSCAN while clustering. Returns ------- dict of str and list of str The clustered SMILES patterns. """ assert isinstance(smiles, list) # Build fingerprints fingerprints = {} for smiles_pattern in smiles: oe_molecule = oechem.OEMol() oechem.OEParseSmiles(oe_molecule, smiles_pattern) fingerprint = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(fingerprint, oe_molecule, fingerprint_type) fingerprints[smiles_pattern] = fingerprint # Build a similarity matrix distance_matrix = np.zeros((len(smiles), len(smiles))) for i, smiles_i in enumerate(smiles): for j, smiles_j in enumerate(smiles): if i == j: continue distance_matrix[i, j] = 1.0 - oegraphsim.OETanimoto( fingerprints[smiles_i], fingerprints[smiles_j] ) # Cluster the data clustering = DBSCAN(eps=eps, min_samples=min_samples, metric="precomputed") clustered_smiles = clustering.fit(distance_matrix) labels = clustered_smiles.labels_ smiles_by_cluster = {} for label in set(labels): smiles_by_cluster[label] = [ smiles[i] for i, x in enumerate(labels) if x == label ] return smiles_by_cluster
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. from openeye import oechem from openeye import oegraphsim # @ <SNIPPET-FP-TANIMOTO> molA = oechem.OEGraphMol() oechem.OESmilesToMol(molA, "c1ccc2c(c1)c(c(oc2=O)OCCSC(=N)N)Cl") fpA = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(fpA, molA, oegraphsim.OEFPType_MACCS166) molB = oechem.OEGraphMol() oechem.OESmilesToMol(molB, "COc1cc2ccc(cc2c(=O)o1)NC(=N)N") fpB = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(fpB, molB, oegraphsim.OEFPType_MACCS166) molC = oechem.OEGraphMol() oechem.OESmilesToMol(molC, "COc1c(c2ccc(cc2c(=O)o1)NC(=N)N)Cl") fpC = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(fpC, molC, oegraphsim.OEFPType_MACCS166) print("Tanimoto(A,B) = %.3f" % oegraphsim.OETanimoto(fpA, fpB)) print("Tanimoto(A,C) = %.3f" % oegraphsim.OETanimoto(fpA, fpC)) print("Tanimoto(B,C) = %.3f" % oegraphsim.OETanimoto(fpB, fpC)) # @ </SNIPPET-FP-TANIMOTO>
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. from __future__ import print_function from openeye import oechem from openeye import oegraphsim mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "c1ccccc1") # @ <SNIPPET-PATH-FP-TYPE> fpA = oegraphsim.OEFingerPrint() numbits = 1024 minbonds = 0 maxbonds = 5 oegraphsim.OEMakePathFP(fpA, mol, numbits, minbonds, maxbonds, oegraphsim.OEFPAtomType_DefaultPathAtom, oegraphsim.OEFPBondType_DefaultPathBond) fpB = oegraphsim.OEFingerPrint() numbits = 2048 oegraphsim.OEMakePathFP(fpB, mol, numbits, minbonds, maxbonds, oegraphsim.OEFPAtomType_DefaultPathAtom, oegraphsim.OEFPBondType_DefaultPathBond) print("same fingerprint types = %r" % oegraphsim.OEIsSameFPType(fpA, fpB)) print(oegraphsim.OETanimoto(fpA, fpB)) # @ </SNIPPET-PATH-FP-TYPE>