def searchSubStructureWithFingerPrint(self, oeQueryMol, fpType, minFpScore, maxFpResults, matchOpts="graph-relaxed"): """Return graph match search results for the input OE molecule using fingerprint pre-filtering. Args: oeQueryMol (OEmol): OE graph molecule fpTypeCutoffList (list): [(finger print type, min score),...] maxFpResults (int): maximum number of finger print results returned matchOpts (str, optional): graph match criteria type (graph-strict|graph-relaxed). Defaults to "graph-relaxed". Returns: (bool, list, list): status, graph match and finger match lists of type (MatchResults) """ hL = [] retStatus = True try: fpDb = self.__fpDbD[fpType] opts = oegraphsim.OEFPDatabaseOptions(maxFpResults, oegraphsim.OESimMeasure_Tanimoto) if minFpScore: opts.SetCutoff(minFpScore) scores = fpDb.GetSortedScores(oeQueryMol, opts) idxList = [si.GetIdx() for si in scores] retStatus, hL = self.searchSubStructure(oeQueryMol, idxList=idxList, reverseFlag=False, matchOpts=matchOpts) except Exception as e: retStatus = False logger.exception("Failing with %s", str(e)) return retStatus, hL
def getFingerPrintScores(self, oeQueryMol, fpType, minFpScore, maxFpResults): """Return finger print search scores for the input OE molecule. Args: oeQueryMol (OEmol): OE graph molecule fpType (str): fingerprint type [TREE,PATH,MACCS,CIRCULAR,LINGO] fpMinScore (float): min fingerprint match score (0.0-1.0) maxFpResults (int): maximum number of finger print results returned Returns: (bool, list): status, finger match lists of type (MatchResults) """ hL = [] retStatus = True try: fpDb = self.__fpDbD[fpType] opts = oegraphsim.OEFPDatabaseOptions(maxFpResults, oegraphsim.OESimMeasure_Tanimoto) if minFpScore: opts.SetCutoff(minFpScore) scores = fpDb.GetSortedScores(oeQueryMol, opts) hL = [MatchResults(ccId=self.__oeMolDb.GetTitle(si.GetIdx()), searchType="fp", fpType=fpType, fpScore=si.GetScore(), oeIdx=si.GetIdx()) for si in scores] except Exception as e: retStatus = False logger.exception("Failing with %s", str(e)) return retStatus, hL
def searchFingerPrints(self, oeQueryMol, fpType, minFpScore=None, maxFpResults=50, annotateMols=False, verbose=False): hL = [] retStatus = True try: fpDb = self.__fpDbD[fpType] if fpType in self.__fpDbD else {} if not fpDb: retStatus = False return retStatus, hL # opts = oegraphsim.OEFPDatabaseOptions(maxFpResults, oegraphsim.OESimMeasure_Tanimoto) if minFpScore: opts.SetCutoff(minFpScore) # if verbose: logger.info("Using %d fingerprint %s type %s", fpDb.NumFingerPrints(), fpType, fpDb.GetFPTypeBase().GetFPTypeString()) startTime = time.time() # scores = fpDb.GetSortedScores(oeQueryMol, opts) oeMol = oechem.OEGraphMol() for si in scores: if self.__oeMolDb.GetMolecule(oeMol, si.GetIdx()): ccId = self.__oeMolDb.GetTitle(si.GetIdx()) if annotateMols: tS = "For %s index %r %r similarity score %.4f " % (ccId, si.GetIdx(), self.__idxTitleD[si.GetIdx()], si.GetScore()) oechem.OESetSDData(oeMol, fpType, tS) hL.append(MatchResults(ccId=ccId, oeMol=oeMol, searchType="fp", fpType=fpType, fpScore=si.GetScore())) if verbose: endTime = time.time() logger.info("Fingerprint %s returning %d hits (%.4f sec)", fpType, len(hL), endTime - startTime) except Exception as e: retStatus = False logger.exception("Failing fpType %r with %s", fpType, str(e)) return retStatus, hL
def main(argv=[__name__]): itf = oechem.OEInterface() oechem.OEConfigure(itf, InterfaceData) defopts = oegraphsim.OEFPDatabaseOptions(10, oegraphsim.OESimMeasure_Tanimoto) oegraphsim.OEConfigureFPDatabaseOptions(itf, defopts) oegraphsim.OEConfigureFPDatabaseMemoryType(itf) if not oechem.OEParseCommandLine(itf, argv): return 0 qfname = itf.GetString("-query") mfname = itf.GetString("-molfname") ffname = itf.GetString("-fpdbfname") ofname = itf.GetString("-out") # initialize databases timer = oechem.OEWallTimer() timer.Start() ifs = oechem.oemolistream() if not ifs.open(qfname): oechem.OEThrow.Fatal("Cannot open input file!") query = oechem.OEGraphMol() if not oechem.OEReadMolecule(ifs, query): oechem.OEThrow.Fatal("Cannot read query molecule!") moldb = oechem.OEMolDatabase() if not moldb.Open(mfname): oechem.OEThrow.Fatal("Cannot open molecule database!") memtype = oegraphsim.OEGetFPDatabaseMemoryType(itf) fpdb = oegraphsim.OEFastFPDatabase(ffname, memtype) if not fpdb.IsValid(): oechem.OEThrow.Fatal("Cannot open fingerprint database!") nrfps = fpdb.NumFingerPrints() memtypestr = fpdb.GetMemoryTypeString() ofs = oechem.oemolostream() if not ofs.open(ofname): oechem.OEThrow.Fatal("Cannot open output file!") if not oegraphsim.OEAreCompatibleDatabases(moldb, fpdb): oechem.OEThrow.Fatal("Databases are not compatible!") oechem.OEThrow.Info("%5.2f sec to initialize databases" % timer.Elapsed()) fptype = fpdb.GetFPTypeBase() oechem.OEThrow.Info("Using fingerprint type %s" % fptype.GetFPTypeString()) opts = oegraphsim.OEFPDatabaseOptions() oegraphsim.OESetupFPDatabaseOptions(opts, itf) # search fingerprint database timer.Start() scores = fpdb.GetSortedScores(query, opts) oechem.OEThrow.Info("%5.2f sec to search %d fingerprints %s" % (timer.Elapsed(), nrfps, memtypestr)) timer.Start() nrhits = 0 hit = oechem.OEGraphMol() for si in scores: if moldb.GetMolecule(hit, si.GetIdx()): nrhits += 1 oechem.OESetSDData(hit, "Similarity score", "%.2f" % si.GetScore()) oechem.OEWriteMolecule(ofs, hit) oechem.OEThrow.Info("%5.2f sec to write %d hits" % (timer.Elapsed(), nrhits)) return 0
def main(argv=[__name__]): itf = oechem.OEInterface() oechem.OEConfigure(itf, InterfaceData) defopts = oegraphsim.OEFPDatabaseOptions(10, oegraphsim.OESimMeasure_Tanimoto) oegraphsim.OEConfigureFPDatabaseOptions(itf, defopts) oegraphsim.OEConfigureFingerPrint( itf, oegraphsim.OEGetFPType(oegraphsim.OEFPType_Tree)) if not oechem.OEParseCommandLine(itf, argv): return 0 qfname = itf.GetString("-query") mfname = itf.GetString("-molfname") ofname = itf.GetString("-out") # initialize databases timer = oechem.OEWallTimer() timer.Start() ifs = oechem.oemolistream() if not ifs.open(qfname): oechem.OEThrow.Fatal("Cannot open input file!") query = oechem.OEGraphMol() if not oechem.OEReadMolecule(ifs, query): oechem.OEThrow.Fatal("Cannot read query molecule!") moldb = oechem.OEMolDatabase() if not moldb.Open(mfname): oechem.OEThrow.Fatal("Cannot open molecule database!") ofs = oechem.oemolostream() if not ofs.open(ofname): oechem.OEThrow.Fatal("Cannot open output file!") fptype = oegraphsim.OESetupFingerPrint(itf) oechem.OEThrow.Info("Using fingerprint type %s" % fptype.GetFPTypeString()) fpdb = oegraphsim.OEFPDatabase(fptype) emptyfp = oegraphsim.OEFingerPrint() emptyfp.SetFPTypeBase(fptype) nrmols = moldb.GetMaxMolIdx() mol = oechem.OEGraphMol() for idx in range(0, nrmols): if moldb.GetMolecule(mol, idx): fpdb.AddFP(mol) else: fpdb.AddFP(emptyfp) nrfps = fpdb.NumFingerPrints() oechem.OEThrow.Info("%5.2f sec to initialize databases" % timer.Elapsed()) opts = oegraphsim.OEFPDatabaseOptions() oegraphsim.OESetupFPDatabaseOptions(opts, itf) # search fingerprint database timer.Start() scores = fpdb.GetSortedScores(query, opts) oechem.OEThrow.Info("%5.2f sec to search %d fingerprints" % (timer.Elapsed(), nrfps)) timer.Start() hit = oechem.OEGraphMol() for si in scores: if moldb.GetMolecule(hit, si.GetIdx()): oechem.OEWriteMolecule(ofs, hit) oechem.OEThrow.Info("%5.2f sec to write %d hits" % (timer.Elapsed(), opts.GetLimit())) return 0