def PrintTanimoto(molA, molB, minb, maxb): fpA = oegraphsim.OEFingerPrint() fpB = oegraphsim.OEFingerPrint() numbits = 2048 atype = oegraphsim.OEFPAtomType_DefaultPathAtom btype = oegraphsim.OEFPBondType_DefaultPathBond oegraphsim.OEMakePathFP(fpA, molA, numbits, minb, maxb, atype, btype) oegraphsim.OEMakePathFP(fpB, molB, numbits, minb, maxb, atype, btype) print("Tanimoto(A,B) = %.3f" % oegraphsim.OETanimoto(fpA, fpB))
def PrintTanimoto(molA, molB, atype, btype): fpA = oegraphsim.OEFingerPrint() fpB = oegraphsim.OEFingerPrint() numbits = 2048 minb = 0 maxb = 5 oegraphsim.OEMakePathFP(fpA, molA, numbits, minb, maxb, atype, btype) oegraphsim.OEMakePathFP(fpB, molB, numbits, minb, maxb, atype, btype) print("Tanimoto(A,B) = %.3f" % oegraphsim.OETanimoto(fpA, fpB))
def calculate_mol_similarity(molA, molB): """ Function to calculate the similarity between two oemol objects should be used to utils/openeye.py or openmoltools :param molA: oemol object of molecule A :param molB: oemol object of molecule B :return: float, tanimoto score of the two molecules, between 0 and 1 """ fpA = oegraphsim.OEFingerPrint() fpB = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(fpA, molA, oegraphsim.OEFPType_MACCS166) oegraphsim.OEMakeFP(fpB, molB, oegraphsim.OEFPType_MACCS166) return oegraphsim.OETanimoto(fpA, fpB)
def calculate_fp(self): for mol in self.act_list: fp = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(fp, mol, self.args.fptype) self.fp_list.append(fp)
def circular_wrapper(smi, num_bits=1024, min_radius=2, max_radius=2): mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, smi) fp = oegraphsim.OEFingerPrint() oegraphsim.OEMakeCircularFP(fp, mol, num_bits, min_radius, max_radius, oegraphsim.OEFPAtomType_DefaultPathAtom, oegraphsim.OEFPBondType_DefaultPathBond) return get_bit_string(fp)
def maccs_wrapper(smi, ): mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, smi) fp = oegraphsim.OEFingerPrint() oegraphsim.OEMakeMACCS166FP( fp, mol, ) return get_bit_string(fp)
# Customer is hereby permitted to use, copy, and modify the Sample Code, # subject to these terms. OpenEye claims no rights to Customer's # modifications. Modification of Sample Code is at Customer's sole and # exclusive risk. Sample Code may require Customer to have a then # current license or subscription to the applicable OpenEye offering. # THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. from openeye import oechem from openeye import oegraphsim mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "c1ccccc1") # @ <SNIPPET-MAKE-FP> fpA = oegraphsim.OEFingerPrint() oegraphsim.OEMakePathFP(fpA, mol) fpB = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(fpB, mol, fpA.GetFPTypeBase()) # @ </SNIPPET-MAKE-FP> if oegraphsim.OEIsSameFPType(fpA, fpB): print("same fingerprint types") else: print("different fingerprint types")
# current license or subscription to the applicable OpenEye offering. # THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. from openeye import oechem from openeye import oegraphsim mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "c1ccccc1") # @ <SNIPPET-MAKE-PATH-1> fp = oegraphsim.OEFingerPrint() oegraphsim.OEMakePathFP(fp, mol) # @ </SNIPPET-MAKE-PATH-1> print(fp.GetFPTypeBase().GetFPTypeString()) # @ <SNIPPET-MAKE-PATH-2> oegraphsim.OEMakeFP(fp, mol, oegraphsim.OEFPType_Path) # @ </SNIPPET-MAKE-PATH-2> print(fp.GetFPTypeBase().GetFPTypeString()) # @ <SNIPPET-MAKE-PATH-3> numbits = 1024 minbonds = 0 maxbonds = 5
def cluster_similar_molecules( smiles, fingerprint_type=oegraphsim.OEFPType_Tree, eps=0.5, min_samples=2 ): """The method attempts to cluster a sets of molecules based on their similarity using a Tanimoto distance metric and the `sklearn` DBSCAN clustering code. Notes ----- This is based on the code by David Mobley: https://github.com/openforcefield/release-1-benchmarking/blob/master/QM_molecule_selection/divide_sets.ipynb Parameters ---------- smiles: list of str The SMILES representations of the molecules to cluster. fingerprint_type The type of molecular fingerprint to use. eps: float The `eps` parameter to pass as an argument to DBSCAN while clustering. min_samples: int The `min_samples` parameter to pass as an argument to DBSCAN while clustering. Returns ------- dict of str and list of str The clustered SMILES patterns. """ assert isinstance(smiles, list) # Build fingerprints fingerprints = {} for smiles_pattern in smiles: oe_molecule = oechem.OEMol() oechem.OEParseSmiles(oe_molecule, smiles_pattern) fingerprint = oegraphsim.OEFingerPrint() oegraphsim.OEMakeFP(fingerprint, oe_molecule, fingerprint_type) fingerprints[smiles_pattern] = fingerprint # Build a similarity matrix distance_matrix = np.zeros((len(smiles), len(smiles))) for i, smiles_i in enumerate(smiles): for j, smiles_j in enumerate(smiles): if i == j: continue distance_matrix[i, j] = 1.0 - oegraphsim.OETanimoto( fingerprints[smiles_i], fingerprints[smiles_j] ) # Cluster the data clustering = DBSCAN(eps=eps, min_samples=min_samples, metric="precomputed") clustered_smiles = clustering.fit(distance_matrix) labels = clustered_smiles.labels_ smiles_by_cluster = {} for label in set(labels): smiles_by_cluster[label] = [ smiles[i] for i, x in enumerate(labels) if x == label ] return smiles_by_cluster
def main(argv=[__name__]): itf = oechem.OEInterface() oechem.OEConfigure(itf, InterfaceData) defopts = oegraphsim.OEFPDatabaseOptions(10, oegraphsim.OESimMeasure_Tanimoto) oegraphsim.OEConfigureFPDatabaseOptions(itf, defopts) oegraphsim.OEConfigureFingerPrint( itf, oegraphsim.OEGetFPType(oegraphsim.OEFPType_Tree)) if not oechem.OEParseCommandLine(itf, argv): return 0 qfname = itf.GetString("-query") mfname = itf.GetString("-molfname") ofname = itf.GetString("-out") # initialize databases timer = oechem.OEWallTimer() timer.Start() ifs = oechem.oemolistream() if not ifs.open(qfname): oechem.OEThrow.Fatal("Cannot open input file!") query = oechem.OEGraphMol() if not oechem.OEReadMolecule(ifs, query): oechem.OEThrow.Fatal("Cannot read query molecule!") moldb = oechem.OEMolDatabase() if not moldb.Open(mfname): oechem.OEThrow.Fatal("Cannot open molecule database!") ofs = oechem.oemolostream() if not ofs.open(ofname): oechem.OEThrow.Fatal("Cannot open output file!") fptype = oegraphsim.OESetupFingerPrint(itf) oechem.OEThrow.Info("Using fingerprint type %s" % fptype.GetFPTypeString()) fpdb = oegraphsim.OEFPDatabase(fptype) emptyfp = oegraphsim.OEFingerPrint() emptyfp.SetFPTypeBase(fptype) nrmols = moldb.GetMaxMolIdx() mol = oechem.OEGraphMol() for idx in range(0, nrmols): if moldb.GetMolecule(mol, idx): fpdb.AddFP(mol) else: fpdb.AddFP(emptyfp) nrfps = fpdb.NumFingerPrints() oechem.OEThrow.Info("%5.2f sec to initialize databases" % timer.Elapsed()) opts = oegraphsim.OEFPDatabaseOptions() oegraphsim.OESetupFPDatabaseOptions(opts, itf) # search fingerprint database timer.Start() scores = fpdb.GetSortedScores(query, opts) oechem.OEThrow.Info("%5.2f sec to search %d fingerprints" % (timer.Elapsed(), nrfps)) timer.Start() hit = oechem.OEGraphMol() for si in scores: if moldb.GetMolecule(hit, si.GetIdx()): oechem.OEWriteMolecule(ofs, hit) oechem.OEThrow.Info("%5.2f sec to write %d hits" % (timer.Elapsed(), opts.GetLimit())) return 0