Esempio n. 1
0
def main():
    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    args = getargs()

    dbname = args.d
    # read in database
    ifs = oechem.oemolistream()
    if not ifs.open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    print("Opening database file %s ..." % dbname)
    timer = oechem.OEWallTimer()
    opts = oefastrocs.OEShapeDatabaseOptions()
    opts.SetLimit(1)
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()

    if not moldb.Open(ifs):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" %
                             dbname)

    dots.Total()
    print("%f seconds to load database" % timer.Elapsed())

    df = pd.read_csv(args.i)
    res = []
    for smile in tqdm(df.loc[:, 'smiles'].tolist()):
        resn = len(res)
        try:
            q = FromString(smile)[0]

            for score in dbase.GetSortedScores(q, 1):
                res.append(score.GetTanimotoCombo())
                break
        except KeyboardInterrupt:
            print("caught")
            exit()
        except:
            res.append(np.nan)
        if len(res) == resn:
            res.append(np.nan)

    df['fastroc'] = res
    print(df.head)
    df.to_csv(args.o, sep=',', index=False)

    return 0
    def run(self):
        """ Open the database file and load it into the OEShapeDatabase """
        timer = oechem.OEWallTimer()
        sys.stderr.write("Opening database file %s ...\n" % self.dbname)
        if not self.moldb.Open(self.dbname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % self.dbname)

        dots = oechem.OEThreadedDots(10000, 200, "conformers")
        if not self.shapedb.Open(self.moldb, dots):
            oechem.OEThrow.Fatal(
                "Unable to initialize OEShapeDatabase on '%s'" % self.dbname)

        dots.Total()
        sys.stderr.write("%s seconds to load database\n" % timer.Elapsed())
        self.loadedEvent.set()
Esempio n. 3
0
def main(argv=[__name__]):

    itf = oechem.OEInterface()
    oechem.OEConfigure(itf, InterfaceData)
    oegraphsim.OEConfigureFingerPrint(
        itf, oegraphsim.OEGetFPType(oegraphsim.OEFPType_Tree))

    if not oechem.OEParseCommandLine(itf, argv):
        return 1

    ifname = itf.GetString("-in")
    ffname = itf.GetString("-fpdb")

    if oechem.OEGetFileExtension(ffname) != "fpbin":
        oechem.OEThrow.Fatal(
            "Fingerprint database file should have '.fpbin' file extension!")

    idxfname = oechem.OEGetMolDatabaseIdxFileName(ifname)

    if not os.path.exists(idxfname):
        if not oechem.OECreateMolDatabaseIdx(ifname):
            oechem.OEThrow.Warning("Unable to create %s molecule index file" %
                                   idxfname)

    oechem.OEThrow.Info("Using %s index molecule file" % idxfname)

    moldb = oechem.OEMolDatabase()
    if not moldb.Open(ifname):
        oechem.OEThrow.Fatal("Cannot open molecule database file!")

    nrmols = moldb.GetMaxMolIdx()

    fptype = oegraphsim.OESetupFingerPrint(itf)
    oechem.OEThrow.Info("Using fingerprint type %s" % fptype.GetFPTypeString())

    opts = oegraphsim.OECreateFastFPDatabaseOptions(fptype)
    opts.SetTracer(oechem.OEDots(100000, 1000, "fingerprints"))
    oechem.OEThrow.Info("Generating fingerprints with %d threads" %
                        opts.GetNumProcessors())

    timer = oechem.OEWallTimer()
    if not oegraphsim.OECreateFastFPDatabaseFile(ffname, ifname, opts):
        oechem.OEThrow.Fatal("Cannot create fingerprint database file!")

    oechem.OEThrow.Info("%5.2f secs to generate %d fingerprints" %
                        (timer.Elapsed(), nrmols))

    return 0
Esempio n. 4
0
def main(argv=[__name__]):

    parser = argparse.ArgumentParser()

    # positional arguments retaining backward compatibility
    parser.add_argument(
        'database',
        help='File containing the database molecules to be search \
                              (format not restricted to *.oeb).')
    parser.add_argument(
        'query',
        default=[],
        nargs='+',
        help='File containing the query molecule(s) to be search \
                              (format not restricted to *.oeb).')
    parser.add_argument(
        '--nHits',
        dest='nHits',
        type=int,
        default=100,
        help='Number of hits to return (default = number of database mols).')
    parser.add_argument('--cutoff',
                        dest='cutoff',
                        type=float,
                        default=argparse.SUPPRESS,
                        help='Specify a cutoff criteria for scores.')
    parser.add_argument(
        '--tversky',
        dest='tversky',
        action='store_true',
        default=argparse.SUPPRESS,
        help='Switch to Tversky similarity scoring (default = Tanimoto).')

    args = parser.parse_args()

    dbname = args.database

    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    # set options
    opts = oefastrocs.OEShapeDatabaseOptions()
    opts.SetLimit(args.nHits)
    print("Number of hits set to %u" % opts.GetLimit())
    if hasattr(args, 'cutoff') is not False:
        opts.SetCutoff(args.cutoff)
        print("Cutoff set to %f" % args.cutoff)
    if hasattr(args, 'tversky') is not False:
        opts.SetSimFunc(args.tversky)
        print("Tversky similarity scoring set.")

    # read in database
    ifs = oechem.oemolistream()
    if not ifs.open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    print("\nOpening database file %s ..." % dbname)
    timer = oechem.OEWallTimer()
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()
    if not moldb.Open(ifs):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" %
                             dbname)

    dots.Total()
    print("%f seconds to load database\n" % timer.Elapsed())

    for qfname in args.query:

        # read in query
        qfs = oechem.oemolistream()
        if not qfs.open(qfname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % qfname)

        mcmol = oechem.OEMol()
        if not oechem.OEReadMolecule(qfs, mcmol):
            oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname)
        qfs.rewind()

        ext = oechem.OEGetFileExtension(qfname)

        qmolidx = 0
        while oechem.OEReadMolecule(qfs, mcmol):

            # write out to file name based on molecule title
            ofs = oechem.oemolostream()
            moltitle = mcmol.GetTitle()
            if len(moltitle) == 0:
                moltitle = str(qmolidx)
            ofname = moltitle + "_results." + ext
            if not ofs.open(ofname):
                oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4])

            print("Searching for %s of %s (%s conformers)" %
                  (moltitle, qfname, mcmol.NumConfs()))

            qconfidx = 0
            for conf in mcmol.GetConfs():

                for score in dbase.GetSortedScores(conf, opts):

                    dbmol = oechem.OEMol()
                    dbmolidx = score.GetMolIdx()
                    if not moldb.GetMolecule(dbmol, dbmolidx):
                        print(
                            "Unable to retrieve molecule '%u' from the database"
                            % dbmolidx)
                        continue

                    mol = oechem.OEGraphMol(
                        dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))

                    oechem.OESetSDData(mol, "QueryConfidx", "%s" % qconfidx)
                    oechem.OESetSDData(mol, "ShapeTanimoto",
                                       "%.4f" % score.GetShapeTanimoto())
                    oechem.OESetSDData(mol, "ColorTanimoto",
                                       "%.4f" % score.GetColorTanimoto())
                    oechem.OESetSDData(mol, "TanimotoCombo",
                                       "%.4f" % score.GetTanimotoCombo())
                    score.Transform(mol)

                    oechem.OEWriteMolecule(ofs, mol)

                qconfidx += 1

            print("%s conformers processed" % qconfidx)
            print("Wrote results to %s\n" % ofname)

        qmolidx += 1
    return 0
Esempio n. 5
0
def main(argv=[__name__]):

    itf = oechem.OEInterface()
    oechem.OEConfigure(itf, InterfaceData)

    defopts = oegraphsim.OEFPDatabaseOptions(10,
                                             oegraphsim.OESimMeasure_Tanimoto)
    oegraphsim.OEConfigureFPDatabaseOptions(itf, defopts)
    oegraphsim.OEConfigureFPDatabaseMemoryType(itf)

    if not oechem.OEParseCommandLine(itf, argv):
        return 0

    qfname = itf.GetString("-query")
    mfname = itf.GetString("-molfname")
    ffname = itf.GetString("-fpdbfname")
    ofname = itf.GetString("-out")

    # initialize databases

    timer = oechem.OEWallTimer()
    timer.Start()

    ifs = oechem.oemolistream()
    if not ifs.open(qfname):
        oechem.OEThrow.Fatal("Cannot open input file!")

    query = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(ifs, query):
        oechem.OEThrow.Fatal("Cannot read query molecule!")

    moldb = oechem.OEMolDatabase()
    if not moldb.Open(mfname):
        oechem.OEThrow.Fatal("Cannot open molecule database!")

    memtype = oegraphsim.OEGetFPDatabaseMemoryType(itf)

    fpdb = oegraphsim.OEFastFPDatabase(ffname, memtype)
    if not fpdb.IsValid():
        oechem.OEThrow.Fatal("Cannot open fingerprint database!")
    nrfps = fpdb.NumFingerPrints()
    memtypestr = fpdb.GetMemoryTypeString()

    ofs = oechem.oemolostream()
    if not ofs.open(ofname):
        oechem.OEThrow.Fatal("Cannot open output file!")

    if not oegraphsim.OEAreCompatibleDatabases(moldb, fpdb):
        oechem.OEThrow.Fatal("Databases are not compatible!")

    oechem.OEThrow.Info("%5.2f sec to initialize databases" % timer.Elapsed())

    fptype = fpdb.GetFPTypeBase()
    oechem.OEThrow.Info("Using fingerprint type %s" % fptype.GetFPTypeString())

    opts = oegraphsim.OEFPDatabaseOptions()
    oegraphsim.OESetupFPDatabaseOptions(opts, itf)

    # search fingerprint database

    timer.Start()
    scores = fpdb.GetSortedScores(query, opts)
    oechem.OEThrow.Info("%5.2f sec to search %d fingerprints %s" %
                        (timer.Elapsed(), nrfps, memtypestr))

    timer.Start()
    nrhits = 0
    hit = oechem.OEGraphMol()
    for si in scores:
        if moldb.GetMolecule(hit, si.GetIdx()):
            nrhits += 1
            oechem.OESetSDData(hit, "Similarity score", "%.2f" % si.GetScore())
            oechem.OEWriteMolecule(ofs, hit)
    oechem.OEThrow.Info("%5.2f sec to write %d hits" %
                        (timer.Elapsed(), nrhits))

    return 0
Esempio n. 6
0
def main(argv=[__name__]):
    if len(argv) < 3:
        oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0])

    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    dbname = argv[1]
    # read in database
    ifs = oechem.oemolistream()
    if not ifs.open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    print("Opening database file %s ..." % dbname)
    timer = oechem.OEWallTimer()
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()
    if not moldb.Open(ifs):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname)

    dots.Total()
    print("%f seconds to load database" % timer.Elapsed())

    for qfname in argv[2:]:
        # read in query
        qfs = oechem.oemolistream()
        if not qfs.open(qfname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % qfname)

        query = oechem.OEGraphMol()
        if not oechem.OEReadMolecule(qfs, query):
            oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname)

        ext = oechem.OEGetFileExtension(qfname)
        base = qfname[:-(len(ext) + 1)]

        # write out everthing to a similary named file
        ofs = oechem.oemolostream()
        ofname = base + "_results." + ext
        if not ofs.open(ofname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4])

        print("Searching for %s" % qfname)
        numHits = moldb.NumMols()
        for score in dbase.GetSortedScores(query, numHits):
            dbmol = oechem.OEMol()
            molidx = score.GetMolIdx()
            if not moldb.GetMolecule(dbmol, molidx):
                print("Unable to retrieve molecule '%u' from the database" % molidx)
                continue

            mol = oechem.OEGraphMol(dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))

            oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto())
            oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto())
            oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo())
            score.Transform(mol)

            oechem.OEWriteMolecule(ofs, mol)
        print("Wrote results to %s" % ofname)

    return 0
    def GetBestOverlays(self, querymolstr, options, iformat, oformat):
        """ Return a string of the format specified by 'oformat'
        containing nhits overlaid confomers using querymolstr as the
        query interpretted as iformat.

        querymolstr - a string containing a molecule to use as the query
        options - an instance of OEShapeDatabaseOptions
        iformat - a string representing the file extension to parse the querymolstr as.
                  Note: old clients could be passing .sq files, so
                  iformat == '.oeb' will try to interpret the file as
                  a .sq file.
        oformat - file format to write the results as
        """
        timer = oechem.OEWallTimer()

        # make sure to wait for the load to finish
        blocking = True
        loaded = self.IsLoaded(blocking)
        assert loaded

        if iformat.startswith(".sq"):
            query = ReadShapeQuery(querymolstr)
        else:
            # read in query
            qfs = oechem.oemolistream()
            qfs = SetupStream(qfs, iformat)
            if not qfs.openstring(querymolstr):
                raise ValueError("Unable to open input molecule string")

            query = oechem.OEGraphMol()
            if not oechem.OEReadMolecule(qfs, query):
                if iformat == ".oeb":  # could be an old client trying to send a .sq file.
                    query = ReadShapeQuery(querymolstr)
                else:
                    raise ValueError(
                        "Unable to read a molecule from the string of format '%s'"
                        % iformat)

        ofs = oechem.oemolostream()
        ofs = SetupStream(ofs, oformat)
        if not ofs.openstring():
            raise ValueError("Unable to openstring for output")

        # do we only want shape based results?

        # this is a "Write" lock to be paranoid and not overload the GPU
        self.rwlock.AcquireWriteLock()
        try:
            # do search
            scores = self.shapedb.GetSortedScores(query, options)
            sys.stderr.write("%f seconds to do search\n" % timer.Elapsed())
        finally:
            self.rwlock.ReleaseWriteLock()

        timer.Start()
        # write results
        for score in scores:
            mcmol = oechem.OEMol()
            if not self.moldb.GetMolecule(mcmol, score.GetMolIdx()):
                oechem.OEThrow.Warning(
                    "Can't retrieve molecule %i from the OEMolDatabase, "
                    "skipping..." % score.GetMolIdx())
                continue
            # remove hydrogens to make output smaller, this also
            # ensures OEPrepareFastROCSMol will have the same output
            oechem.OESuppressHydrogens(mcmol)

            mol = oechem.OEGraphMol(
                mcmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))
            oechem.OECopySDData(mol, mcmol)

            if options.GetSimFunc() == oefastrocs.OEShapeSimFuncType_Tanimoto:
                oechem.OESetSDData(mol, "ShapeTanimoto",
                                   "%.4f" % score.GetShapeTanimoto())
                oechem.OESetSDData(mol, "ColorTanimoto",
                                   "%.4f" % score.GetColorTanimoto())
                oechem.OESetSDData(mol, "TanimotoCombo",
                                   "%.4f" % score.GetTanimotoCombo())
            else:
                oechem.OESetSDData(mol, "ShapeTversky",
                                   "%.4f" % score.GetShapeTversky())
                oechem.OESetSDData(mol, "ColorTversky",
                                   "%.4f" % score.GetColorTversky())
                oechem.OESetSDData(mol, "TverskyCombo",
                                   "%.4f" % score.GetTverskyCombo())

            if options.GetInitialOrientation(
            ) != oefastrocs.OEFastROCSOrientation_Inertial:
                oechem.OEAddSDData(
                    mol, "Opt. Starting Pos.",
                    GetAltStartsString(options.GetInitialOrientation()))

            score.Transform(mol)

            oechem.OEWriteMolecule(ofs, mol)

        output = ofs.GetString()
        sys.stderr.write("%f seconds to write hitlist\n" % timer.Elapsed())
        sys.stderr.flush()
        ofs.close()

        return output
Esempio n. 8
0
    def QueryResults(self):
        """ Return the best nhits results of these servers. """
        timer = oechem.OEWallTimer()
        thrdpool = LaunchFunctionThreadPool(ShapeServer.QueryResults)

        for server in self.shapeservers:
            thrdpool.AddThread(server)

        data = []
        for oebdata in thrdpool.GetResults():
            data.append(oebdata.data)

        sys.stderr.write("%f seconds to get results back" % timer.Elapsed())

        data = b"".join(data)
        if not data:
            sys.stderr.write("Possible query error, no data returned \
                             by any of the downstream servers")
            return ""

        timer.Start()
        # read in from OEB strings
        ifs = oechem.oemolistream()
        ifs = SetupStream(ifs, self.oformat)
        if not ifs.openstring(data):
            sys.stderr.write(
                "Unable to open OEB string from downstream server")
            return ""

        mols = [oechem.OEGraphMol(mol) for mol in ifs.GetOEGraphMols()]

        def GetScoreToCmp(mol):
            if oechem.OEHasSDData(mol, "ShapeTanimoto"):
                # sort by shape tanimoto
                if oechem.OEHasSDData(mol, "TanimotoCombo"):
                    return float(oechem.OEGetSDData(mol, "TanimotoCombo"))
                return float(oechem.OEGetSDData(mol, "ShapeTanimoto"))
            else:
                # sort by shape tversky
                if oechem.OEHasSDData(mol, "TverskyCombo"):
                    return float(oechem.OEGetSDData(mol, "TverskyCombo"))
                return float(oechem.OEGetSDData(mol, "ShapeTversky"))

        mols.sort(key=GetScoreToCmp)
        mols.reverse()

        # write back out to an OEB string
        ofs = oechem.oemolostream()
        ofs = SetupStream(ofs, self.oformat)
        ofs.openstring()

        nhits = self.nhits
        if not nhits:
            nhits = len(mols)

        for mol in mols[:nhits]:
            oechem.OEWriteMolecule(ofs, mol)

        sys.stderr.write("%f seconds to collate hitlist" % timer.Elapsed())

        return Binary(ofs.GetString())
Esempio n. 9
0
def main(argv=[__name__]):

    itf = oechem.OEInterface()
    oechem.OEConfigure(itf, InterfaceData)

    defopts = oegraphsim.OEFPDatabaseOptions(10,
                                             oegraphsim.OESimMeasure_Tanimoto)
    oegraphsim.OEConfigureFPDatabaseOptions(itf, defopts)
    oegraphsim.OEConfigureFingerPrint(
        itf, oegraphsim.OEGetFPType(oegraphsim.OEFPType_Tree))

    if not oechem.OEParseCommandLine(itf, argv):
        return 0

    qfname = itf.GetString("-query")
    mfname = itf.GetString("-molfname")
    ofname = itf.GetString("-out")

    # initialize databases

    timer = oechem.OEWallTimer()
    timer.Start()

    ifs = oechem.oemolistream()
    if not ifs.open(qfname):
        oechem.OEThrow.Fatal("Cannot open input file!")

    query = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(ifs, query):
        oechem.OEThrow.Fatal("Cannot read query molecule!")

    moldb = oechem.OEMolDatabase()
    if not moldb.Open(mfname):
        oechem.OEThrow.Fatal("Cannot open molecule database!")

    ofs = oechem.oemolostream()
    if not ofs.open(ofname):
        oechem.OEThrow.Fatal("Cannot open output file!")

    fptype = oegraphsim.OESetupFingerPrint(itf)
    oechem.OEThrow.Info("Using fingerprint type %s" % fptype.GetFPTypeString())
    fpdb = oegraphsim.OEFPDatabase(fptype)

    emptyfp = oegraphsim.OEFingerPrint()
    emptyfp.SetFPTypeBase(fptype)

    nrmols = moldb.GetMaxMolIdx()

    mol = oechem.OEGraphMol()
    for idx in range(0, nrmols):
        if moldb.GetMolecule(mol, idx):
            fpdb.AddFP(mol)
        else:
            fpdb.AddFP(emptyfp)

    nrfps = fpdb.NumFingerPrints()
    oechem.OEThrow.Info("%5.2f sec to initialize databases" % timer.Elapsed())

    opts = oegraphsim.OEFPDatabaseOptions()
    oegraphsim.OESetupFPDatabaseOptions(opts, itf)

    # search fingerprint database

    timer.Start()
    scores = fpdb.GetSortedScores(query, opts)
    oechem.OEThrow.Info("%5.2f sec to search %d fingerprints" %
                        (timer.Elapsed(), nrfps))

    timer.Start()
    hit = oechem.OEGraphMol()
    for si in scores:
        if moldb.GetMolecule(hit, si.GetIdx()):
            oechem.OEWriteMolecule(ofs, hit)
    oechem.OEThrow.Info("%5.2f sec to write %d hits" %
                        (timer.Elapsed(), opts.GetLimit()))

    return 0
Esempio n. 10
0
fpdb = oegraphsim.OEFPDatabase(oegraphsim.OEFPType_Path)

emptyfp = oegraphsim.OEFingerPrint()
emptyfp.SetFPTypeBase(fpdb.GetFPTypeBase())

mol = oechem.OEGraphMol()
for idx in range(0, nrmols):
    if moldb.GetMolecule(mol, idx):
        fpdb.AddFP(mol)
    else:
        fpdb.AddFP(emptyfp)

nrfps = fpdb.NumFingerPrints()

timer = oechem.OEWallTimer()
while True:

    # read query SMILES from stdin

    sys.stdout.write("Enter SMILES> ")
    line = sys.stdin.readline()
    line = line.rstrip()
    if len(line) == 0:
        sys.exit(0)

    # parse query

    query = oechem.OEGraphMol()
    if not oechem.OESmilesToMol(query, line):
        oechem.OEThrow.Warning("Invalid SMILES string")