def main(): if not oefastrocs.OEFastROCSIsGPUReady(): oechem.OEThrow.Info("No supported GPU available!") return 0 args = getargs() dbname = args.d # read in database ifs = oechem.oemolistream() if not ifs.open(dbname): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) print("Opening database file %s ..." % dbname) timer = oechem.OEWallTimer() opts = oefastrocs.OEShapeDatabaseOptions() opts.SetLimit(1) dbase = oefastrocs.OEShapeDatabase() moldb = oechem.OEMolDatabase() if not moldb.Open(ifs): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) dots = oechem.OEThreadedDots(10000, 200, "conformers") if not dbase.Open(moldb, dots): oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname) dots.Total() print("%f seconds to load database" % timer.Elapsed()) df = pd.read_csv(args.i) res = [] for smile in tqdm(df.loc[:, 'smiles'].tolist()): resn = len(res) try: q = FromString(smile)[0] for score in dbase.GetSortedScores(q, 1): res.append(score.GetTanimotoCombo()) break except KeyboardInterrupt: print("caught") exit() except: res.append(np.nan) if len(res) == resn: res.append(np.nan) df['fastroc'] = res print(df.head) df.to_csv(args.o, sep=',', index=False) return 0
def run(self): """ Open the database file and load it into the OEShapeDatabase """ timer = oechem.OEWallTimer() sys.stderr.write("Opening database file %s ...\n" % self.dbname) if not self.moldb.Open(self.dbname): oechem.OEThrow.Fatal("Unable to open '%s'" % self.dbname) dots = oechem.OEThreadedDots(10000, 200, "conformers") if not self.shapedb.Open(self.moldb, dots): oechem.OEThrow.Fatal( "Unable to initialize OEShapeDatabase on '%s'" % self.dbname) dots.Total() sys.stderr.write("%s seconds to load database\n" % timer.Elapsed()) self.loadedEvent.set()
def main(argv=[__name__]): itf = oechem.OEInterface() oechem.OEConfigure(itf, InterfaceData) oegraphsim.OEConfigureFingerPrint( itf, oegraphsim.OEGetFPType(oegraphsim.OEFPType_Tree)) if not oechem.OEParseCommandLine(itf, argv): return 1 ifname = itf.GetString("-in") ffname = itf.GetString("-fpdb") if oechem.OEGetFileExtension(ffname) != "fpbin": oechem.OEThrow.Fatal( "Fingerprint database file should have '.fpbin' file extension!") idxfname = oechem.OEGetMolDatabaseIdxFileName(ifname) if not os.path.exists(idxfname): if not oechem.OECreateMolDatabaseIdx(ifname): oechem.OEThrow.Warning("Unable to create %s molecule index file" % idxfname) oechem.OEThrow.Info("Using %s index molecule file" % idxfname) moldb = oechem.OEMolDatabase() if not moldb.Open(ifname): oechem.OEThrow.Fatal("Cannot open molecule database file!") nrmols = moldb.GetMaxMolIdx() fptype = oegraphsim.OESetupFingerPrint(itf) oechem.OEThrow.Info("Using fingerprint type %s" % fptype.GetFPTypeString()) opts = oegraphsim.OECreateFastFPDatabaseOptions(fptype) opts.SetTracer(oechem.OEDots(100000, 1000, "fingerprints")) oechem.OEThrow.Info("Generating fingerprints with %d threads" % opts.GetNumProcessors()) timer = oechem.OEWallTimer() if not oegraphsim.OECreateFastFPDatabaseFile(ffname, ifname, opts): oechem.OEThrow.Fatal("Cannot create fingerprint database file!") oechem.OEThrow.Info("%5.2f secs to generate %d fingerprints" % (timer.Elapsed(), nrmols)) return 0
def main(argv=[__name__]): parser = argparse.ArgumentParser() # positional arguments retaining backward compatibility parser.add_argument( 'database', help='File containing the database molecules to be search \ (format not restricted to *.oeb).') parser.add_argument( 'query', default=[], nargs='+', help='File containing the query molecule(s) to be search \ (format not restricted to *.oeb).') parser.add_argument( '--nHits', dest='nHits', type=int, default=100, help='Number of hits to return (default = number of database mols).') parser.add_argument('--cutoff', dest='cutoff', type=float, default=argparse.SUPPRESS, help='Specify a cutoff criteria for scores.') parser.add_argument( '--tversky', dest='tversky', action='store_true', default=argparse.SUPPRESS, help='Switch to Tversky similarity scoring (default = Tanimoto).') args = parser.parse_args() dbname = args.database if not oefastrocs.OEFastROCSIsGPUReady(): oechem.OEThrow.Info("No supported GPU available!") return 0 # set options opts = oefastrocs.OEShapeDatabaseOptions() opts.SetLimit(args.nHits) print("Number of hits set to %u" % opts.GetLimit()) if hasattr(args, 'cutoff') is not False: opts.SetCutoff(args.cutoff) print("Cutoff set to %f" % args.cutoff) if hasattr(args, 'tversky') is not False: opts.SetSimFunc(args.tversky) print("Tversky similarity scoring set.") # read in database ifs = oechem.oemolistream() if not ifs.open(dbname): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) print("\nOpening database file %s ..." % dbname) timer = oechem.OEWallTimer() dbase = oefastrocs.OEShapeDatabase() moldb = oechem.OEMolDatabase() if not moldb.Open(ifs): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) dots = oechem.OEThreadedDots(10000, 200, "conformers") if not dbase.Open(moldb, dots): oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname) dots.Total() print("%f seconds to load database\n" % timer.Elapsed()) for qfname in args.query: # read in query qfs = oechem.oemolistream() if not qfs.open(qfname): oechem.OEThrow.Fatal("Unable to open '%s'" % qfname) mcmol = oechem.OEMol() if not oechem.OEReadMolecule(qfs, mcmol): oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname) qfs.rewind() ext = oechem.OEGetFileExtension(qfname) qmolidx = 0 while oechem.OEReadMolecule(qfs, mcmol): # write out to file name based on molecule title ofs = oechem.oemolostream() moltitle = mcmol.GetTitle() if len(moltitle) == 0: moltitle = str(qmolidx) ofname = moltitle + "_results." + ext if not ofs.open(ofname): oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4]) print("Searching for %s of %s (%s conformers)" % (moltitle, qfname, mcmol.NumConfs())) qconfidx = 0 for conf in mcmol.GetConfs(): for score in dbase.GetSortedScores(conf, opts): dbmol = oechem.OEMol() dbmolidx = score.GetMolIdx() if not moldb.GetMolecule(dbmol, dbmolidx): print( "Unable to retrieve molecule '%u' from the database" % dbmolidx) continue mol = oechem.OEGraphMol( dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx()))) oechem.OESetSDData(mol, "QueryConfidx", "%s" % qconfidx) oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto()) oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto()) oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo()) score.Transform(mol) oechem.OEWriteMolecule(ofs, mol) qconfidx += 1 print("%s conformers processed" % qconfidx) print("Wrote results to %s\n" % ofname) qmolidx += 1 return 0
def main(argv=[__name__]): itf = oechem.OEInterface() oechem.OEConfigure(itf, InterfaceData) defopts = oegraphsim.OEFPDatabaseOptions(10, oegraphsim.OESimMeasure_Tanimoto) oegraphsim.OEConfigureFPDatabaseOptions(itf, defopts) oegraphsim.OEConfigureFPDatabaseMemoryType(itf) if not oechem.OEParseCommandLine(itf, argv): return 0 qfname = itf.GetString("-query") mfname = itf.GetString("-molfname") ffname = itf.GetString("-fpdbfname") ofname = itf.GetString("-out") # initialize databases timer = oechem.OEWallTimer() timer.Start() ifs = oechem.oemolistream() if not ifs.open(qfname): oechem.OEThrow.Fatal("Cannot open input file!") query = oechem.OEGraphMol() if not oechem.OEReadMolecule(ifs, query): oechem.OEThrow.Fatal("Cannot read query molecule!") moldb = oechem.OEMolDatabase() if not moldb.Open(mfname): oechem.OEThrow.Fatal("Cannot open molecule database!") memtype = oegraphsim.OEGetFPDatabaseMemoryType(itf) fpdb = oegraphsim.OEFastFPDatabase(ffname, memtype) if not fpdb.IsValid(): oechem.OEThrow.Fatal("Cannot open fingerprint database!") nrfps = fpdb.NumFingerPrints() memtypestr = fpdb.GetMemoryTypeString() ofs = oechem.oemolostream() if not ofs.open(ofname): oechem.OEThrow.Fatal("Cannot open output file!") if not oegraphsim.OEAreCompatibleDatabases(moldb, fpdb): oechem.OEThrow.Fatal("Databases are not compatible!") oechem.OEThrow.Info("%5.2f sec to initialize databases" % timer.Elapsed()) fptype = fpdb.GetFPTypeBase() oechem.OEThrow.Info("Using fingerprint type %s" % fptype.GetFPTypeString()) opts = oegraphsim.OEFPDatabaseOptions() oegraphsim.OESetupFPDatabaseOptions(opts, itf) # search fingerprint database timer.Start() scores = fpdb.GetSortedScores(query, opts) oechem.OEThrow.Info("%5.2f sec to search %d fingerprints %s" % (timer.Elapsed(), nrfps, memtypestr)) timer.Start() nrhits = 0 hit = oechem.OEGraphMol() for si in scores: if moldb.GetMolecule(hit, si.GetIdx()): nrhits += 1 oechem.OESetSDData(hit, "Similarity score", "%.2f" % si.GetScore()) oechem.OEWriteMolecule(ofs, hit) oechem.OEThrow.Info("%5.2f sec to write %d hits" % (timer.Elapsed(), nrhits)) return 0
def main(argv=[__name__]): if len(argv) < 3: oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0]) if not oefastrocs.OEFastROCSIsGPUReady(): oechem.OEThrow.Info("No supported GPU available!") return 0 dbname = argv[1] # read in database ifs = oechem.oemolistream() if not ifs.open(dbname): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) print("Opening database file %s ..." % dbname) timer = oechem.OEWallTimer() dbase = oefastrocs.OEShapeDatabase() moldb = oechem.OEMolDatabase() if not moldb.Open(ifs): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) dots = oechem.OEThreadedDots(10000, 200, "conformers") if not dbase.Open(moldb, dots): oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname) dots.Total() print("%f seconds to load database" % timer.Elapsed()) for qfname in argv[2:]: # read in query qfs = oechem.oemolistream() if not qfs.open(qfname): oechem.OEThrow.Fatal("Unable to open '%s'" % qfname) query = oechem.OEGraphMol() if not oechem.OEReadMolecule(qfs, query): oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname) ext = oechem.OEGetFileExtension(qfname) base = qfname[:-(len(ext) + 1)] # write out everthing to a similary named file ofs = oechem.oemolostream() ofname = base + "_results." + ext if not ofs.open(ofname): oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4]) print("Searching for %s" % qfname) numHits = moldb.NumMols() for score in dbase.GetSortedScores(query, numHits): dbmol = oechem.OEMol() molidx = score.GetMolIdx() if not moldb.GetMolecule(dbmol, molidx): print("Unable to retrieve molecule '%u' from the database" % molidx) continue mol = oechem.OEGraphMol(dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx()))) oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto()) oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto()) oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo()) score.Transform(mol) oechem.OEWriteMolecule(ofs, mol) print("Wrote results to %s" % ofname) return 0
def GetBestOverlays(self, querymolstr, options, iformat, oformat): """ Return a string of the format specified by 'oformat' containing nhits overlaid confomers using querymolstr as the query interpretted as iformat. querymolstr - a string containing a molecule to use as the query options - an instance of OEShapeDatabaseOptions iformat - a string representing the file extension to parse the querymolstr as. Note: old clients could be passing .sq files, so iformat == '.oeb' will try to interpret the file as a .sq file. oformat - file format to write the results as """ timer = oechem.OEWallTimer() # make sure to wait for the load to finish blocking = True loaded = self.IsLoaded(blocking) assert loaded if iformat.startswith(".sq"): query = ReadShapeQuery(querymolstr) else: # read in query qfs = oechem.oemolistream() qfs = SetupStream(qfs, iformat) if not qfs.openstring(querymolstr): raise ValueError("Unable to open input molecule string") query = oechem.OEGraphMol() if not oechem.OEReadMolecule(qfs, query): if iformat == ".oeb": # could be an old client trying to send a .sq file. query = ReadShapeQuery(querymolstr) else: raise ValueError( "Unable to read a molecule from the string of format '%s'" % iformat) ofs = oechem.oemolostream() ofs = SetupStream(ofs, oformat) if not ofs.openstring(): raise ValueError("Unable to openstring for output") # do we only want shape based results? # this is a "Write" lock to be paranoid and not overload the GPU self.rwlock.AcquireWriteLock() try: # do search scores = self.shapedb.GetSortedScores(query, options) sys.stderr.write("%f seconds to do search\n" % timer.Elapsed()) finally: self.rwlock.ReleaseWriteLock() timer.Start() # write results for score in scores: mcmol = oechem.OEMol() if not self.moldb.GetMolecule(mcmol, score.GetMolIdx()): oechem.OEThrow.Warning( "Can't retrieve molecule %i from the OEMolDatabase, " "skipping..." % score.GetMolIdx()) continue # remove hydrogens to make output smaller, this also # ensures OEPrepareFastROCSMol will have the same output oechem.OESuppressHydrogens(mcmol) mol = oechem.OEGraphMol( mcmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx()))) oechem.OECopySDData(mol, mcmol) if options.GetSimFunc() == oefastrocs.OEShapeSimFuncType_Tanimoto: oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto()) oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto()) oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo()) else: oechem.OESetSDData(mol, "ShapeTversky", "%.4f" % score.GetShapeTversky()) oechem.OESetSDData(mol, "ColorTversky", "%.4f" % score.GetColorTversky()) oechem.OESetSDData(mol, "TverskyCombo", "%.4f" % score.GetTverskyCombo()) if options.GetInitialOrientation( ) != oefastrocs.OEFastROCSOrientation_Inertial: oechem.OEAddSDData( mol, "Opt. Starting Pos.", GetAltStartsString(options.GetInitialOrientation())) score.Transform(mol) oechem.OEWriteMolecule(ofs, mol) output = ofs.GetString() sys.stderr.write("%f seconds to write hitlist\n" % timer.Elapsed()) sys.stderr.flush() ofs.close() return output
def QueryResults(self): """ Return the best nhits results of these servers. """ timer = oechem.OEWallTimer() thrdpool = LaunchFunctionThreadPool(ShapeServer.QueryResults) for server in self.shapeservers: thrdpool.AddThread(server) data = [] for oebdata in thrdpool.GetResults(): data.append(oebdata.data) sys.stderr.write("%f seconds to get results back" % timer.Elapsed()) data = b"".join(data) if not data: sys.stderr.write("Possible query error, no data returned \ by any of the downstream servers") return "" timer.Start() # read in from OEB strings ifs = oechem.oemolistream() ifs = SetupStream(ifs, self.oformat) if not ifs.openstring(data): sys.stderr.write( "Unable to open OEB string from downstream server") return "" mols = [oechem.OEGraphMol(mol) for mol in ifs.GetOEGraphMols()] def GetScoreToCmp(mol): if oechem.OEHasSDData(mol, "ShapeTanimoto"): # sort by shape tanimoto if oechem.OEHasSDData(mol, "TanimotoCombo"): return float(oechem.OEGetSDData(mol, "TanimotoCombo")) return float(oechem.OEGetSDData(mol, "ShapeTanimoto")) else: # sort by shape tversky if oechem.OEHasSDData(mol, "TverskyCombo"): return float(oechem.OEGetSDData(mol, "TverskyCombo")) return float(oechem.OEGetSDData(mol, "ShapeTversky")) mols.sort(key=GetScoreToCmp) mols.reverse() # write back out to an OEB string ofs = oechem.oemolostream() ofs = SetupStream(ofs, self.oformat) ofs.openstring() nhits = self.nhits if not nhits: nhits = len(mols) for mol in mols[:nhits]: oechem.OEWriteMolecule(ofs, mol) sys.stderr.write("%f seconds to collate hitlist" % timer.Elapsed()) return Binary(ofs.GetString())
def main(argv=[__name__]): itf = oechem.OEInterface() oechem.OEConfigure(itf, InterfaceData) defopts = oegraphsim.OEFPDatabaseOptions(10, oegraphsim.OESimMeasure_Tanimoto) oegraphsim.OEConfigureFPDatabaseOptions(itf, defopts) oegraphsim.OEConfigureFingerPrint( itf, oegraphsim.OEGetFPType(oegraphsim.OEFPType_Tree)) if not oechem.OEParseCommandLine(itf, argv): return 0 qfname = itf.GetString("-query") mfname = itf.GetString("-molfname") ofname = itf.GetString("-out") # initialize databases timer = oechem.OEWallTimer() timer.Start() ifs = oechem.oemolistream() if not ifs.open(qfname): oechem.OEThrow.Fatal("Cannot open input file!") query = oechem.OEGraphMol() if not oechem.OEReadMolecule(ifs, query): oechem.OEThrow.Fatal("Cannot read query molecule!") moldb = oechem.OEMolDatabase() if not moldb.Open(mfname): oechem.OEThrow.Fatal("Cannot open molecule database!") ofs = oechem.oemolostream() if not ofs.open(ofname): oechem.OEThrow.Fatal("Cannot open output file!") fptype = oegraphsim.OESetupFingerPrint(itf) oechem.OEThrow.Info("Using fingerprint type %s" % fptype.GetFPTypeString()) fpdb = oegraphsim.OEFPDatabase(fptype) emptyfp = oegraphsim.OEFingerPrint() emptyfp.SetFPTypeBase(fptype) nrmols = moldb.GetMaxMolIdx() mol = oechem.OEGraphMol() for idx in range(0, nrmols): if moldb.GetMolecule(mol, idx): fpdb.AddFP(mol) else: fpdb.AddFP(emptyfp) nrfps = fpdb.NumFingerPrints() oechem.OEThrow.Info("%5.2f sec to initialize databases" % timer.Elapsed()) opts = oegraphsim.OEFPDatabaseOptions() oegraphsim.OESetupFPDatabaseOptions(opts, itf) # search fingerprint database timer.Start() scores = fpdb.GetSortedScores(query, opts) oechem.OEThrow.Info("%5.2f sec to search %d fingerprints" % (timer.Elapsed(), nrfps)) timer.Start() hit = oechem.OEGraphMol() for si in scores: if moldb.GetMolecule(hit, si.GetIdx()): oechem.OEWriteMolecule(ofs, hit) oechem.OEThrow.Info("%5.2f sec to write %d hits" % (timer.Elapsed(), opts.GetLimit())) return 0
fpdb = oegraphsim.OEFPDatabase(oegraphsim.OEFPType_Path) emptyfp = oegraphsim.OEFingerPrint() emptyfp.SetFPTypeBase(fpdb.GetFPTypeBase()) mol = oechem.OEGraphMol() for idx in range(0, nrmols): if moldb.GetMolecule(mol, idx): fpdb.AddFP(mol) else: fpdb.AddFP(emptyfp) nrfps = fpdb.NumFingerPrints() timer = oechem.OEWallTimer() while True: # read query SMILES from stdin sys.stdout.write("Enter SMILES> ") line = sys.stdin.readline() line = line.rstrip() if len(line) == 0: sys.exit(0) # parse query query = oechem.OEGraphMol() if not oechem.OESmilesToMol(query, line): oechem.OEThrow.Warning("Invalid SMILES string")