Beispiel #1
0
    def createOeBinaryDatabaseAndIndex(self, oebMolFilePath, oeMolDbFilePath):
        """Create OE binary database file and associated index from the input serial
        binary data file.

        Args:
            oebMolFilePath (str): input OeMol stream binary file path
            oeMolDbFilePath (str): output OeMolDatabase file path

        Returns:
           int:  number of molecules processed in the database.
        """
        molCount = 0
        try:
            startTime = time.time()
            moldb = oechem.OEMolDatabase()
            if not moldb.Open(oebMolFilePath):
                logger.error("Read fails for %r", oebMolFilePath)
                return molCount
            #
            logger.info(
                "Opened database in format %r num mols %d max index %d",
                moldb.GetFormat(), moldb.NumMols(), moldb.GetMaxMolIdx())
            moldb.Save(oeMolDbFilePath)
            tL = list(moldb.GetTitles())
            logger.info("First and last titles: %r %r", tL[0], tL[-1])
            molCount = moldb.NumMols()
            endTime = time.time()
            logger.info("Completed operation at %s (%.4f seconds)",
                        time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
                        endTime - startTime)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return molCount
Beispiel #2
0
def _sort_by_rotbond(ifs, outdir):
    """

    Parameters
    ----------
    ifs: str
        absolute path to molecule database
    outdir: str
        absolute path to where output files should be written.
    """

    nrotors_map = {}
    moldb = oechem.OEMolDatabase(ifs)
    mol = oechem.OEGraphMol()
    for idx in range(moldb.GetMaxMolIdx()):
        if moldb.GetMolecule(mol, idx):
            nrotors = sum([bond.IsRotor() for bond in mol.GetBonds()])
            if nrotors not in nrotors_map:
                nrotors_map[nrotors] = []
            nrotors_map[nrotors].append(idx)
    # Write out a separate database for each num rotors
    for nrotor in nrotors_map:
        size = len(nrotors_map[nrotor])
        ofname = os.path.join(outdir, 'nrotor_{}.smi'.format(nrotor))
        ofs = new_output_stream(ofname)
        write_oedatabase(moldb, ofs, nrotors_map[nrotor], size)
Beispiel #3
0
 def initialize(self, dbdict):
     self('DELETE FROM db', commit=True)
     for name, path in dbdict.items():
         db = oechem.OEMolDatabase(path)
         nmols = db.NumMols()
         self('INSERT INTO db (name, path, molcount) VALUES (?,?,?)',
              (name, path, nmols),
              commit=True)
Beispiel #4
0
def SplitChunk(ifs, chunksize, outbase, ext):
    moldb = oechem.OEMolDatabase(ifs)
    chunk, count = 1, chunksize

    for idx in range(moldb.GetMaxMolIdx()):
        if count == chunksize:
            ofs = NewOutputStream(outbase, ext, chunk)
            chunk, count = chunk + 1, 0
        count += 1
        moldb.WriteMolecule(ofs, idx)
Beispiel #5
0
def MolSort(ifs, ofs):
    moldb = oechem.OEMolDatabase(ifs)

    titles = [(t, i) for i, t in enumerate(moldb.GetTitles())]
    titles.sort()

    indices = [i for t, i in titles]

    moldb.Order(indices)
    moldb.Save(ofs)
Beispiel #6
0
def MolCount(fname):

    ifs = oechem.oemolistream()
    if not ifs.open(fname):
        oechem.OEThrow.Warning("Unable to open %s for reading" % fname)
        return 0

    moldb = oechem.OEMolDatabase(ifs)
    nummols = moldb.NumMols()
    print("%s contains %d molecule(s)." % (fname, nummols))
    return nummols
Beispiel #7
0
def main():
    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    args = getargs()

    dbname = args.d
    # read in database
    ifs = oechem.oemolistream()
    if not ifs.open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    print("Opening database file %s ..." % dbname)
    timer = oechem.OEWallTimer()
    opts = oefastrocs.OEShapeDatabaseOptions()
    opts.SetLimit(1)
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()

    if not moldb.Open(ifs):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" %
                             dbname)

    dots.Total()
    print("%f seconds to load database" % timer.Elapsed())

    df = pd.read_csv(args.i)
    res = []
    for smile in tqdm(df.loc[:, 'smiles'].tolist()):
        resn = len(res)
        try:
            q = FromString(smile)[0]

            for score in dbase.GetSortedScores(q, 1):
                res.append(score.GetTanimotoCombo())
                break
        except KeyboardInterrupt:
            print("caught")
            exit()
        except:
            res.append(np.nan)
        if len(res) == resn:
            res.append(np.nan)

    df['fastroc'] = res
    print(df.head)
    df.to_csv(args.o, sep=',', index=False)

    return 0
Beispiel #8
0
 def loadOeBinaryDatabaseAndIndex(self, oeMolDbFilePath):
     molDb = None
     try:
         molDb = oechem.OEMolDatabase()
         if not molDb.Open(oeMolDbFilePath):
             logger.error("Unable to open %r", oeMolDbFilePath)
         molCount = molDb.NumMols()
         logger.info("Loaded OE database file containing %d molecules",
                     molCount)
     except Exception as e:
         logger.exception("Loading %r failing with %s", oeMolDbFilePath,
                          str(e))
     return molDb
Beispiel #9
0
    def test_sort_nrotors(self):
        """Tests sorting fragments by nrotors"""
        ifs = get_fn('frags.smi')
        out_dir = self.get_writes_dir()
        fragment._sort_by_rotbond(ifs, outdir=out_dir)

        mol = oechem.OEGraphMol()
        for n in range(1, 6):
            db = get_fn('nrotor_{}.smi'.format(n), written=True)
            moldb = oechem.OEMolDatabase(db)
            for idx in range(moldb.GetMaxMolIdx()):
                if moldb.GetMolecule(mol, idx):
                    nrotor = sum([bond.IsRotor() for bond in mol.GetBonds()])
                    self.assertEquals(n, nrotor)
Beispiel #10
0
def main(argv=[__name__]):

    itf = oechem.OEInterface()
    oechem.OEConfigure(itf, InterfaceData)
    oegraphsim.OEConfigureFingerPrint(
        itf, oegraphsim.OEGetFPType(oegraphsim.OEFPType_Tree))

    if not oechem.OEParseCommandLine(itf, argv):
        return 1

    ifname = itf.GetString("-in")
    ffname = itf.GetString("-fpdb")

    if oechem.OEGetFileExtension(ffname) != "fpbin":
        oechem.OEThrow.Fatal(
            "Fingerprint database file should have '.fpbin' file extension!")

    idxfname = oechem.OEGetMolDatabaseIdxFileName(ifname)

    if not os.path.exists(idxfname):
        if not oechem.OECreateMolDatabaseIdx(ifname):
            oechem.OEThrow.Warning("Unable to create %s molecule index file" %
                                   idxfname)

    oechem.OEThrow.Info("Using %s index molecule file" % idxfname)

    moldb = oechem.OEMolDatabase()
    if not moldb.Open(ifname):
        oechem.OEThrow.Fatal("Cannot open molecule database file!")

    nrmols = moldb.GetMaxMolIdx()

    fptype = oegraphsim.OESetupFingerPrint(itf)
    oechem.OEThrow.Info("Using fingerprint type %s" % fptype.GetFPTypeString())

    opts = oegraphsim.OECreateFastFPDatabaseOptions(fptype)
    opts.SetTracer(oechem.OEDots(100000, 1000, "fingerprints"))
    oechem.OEThrow.Info("Generating fingerprints with %d threads" %
                        opts.GetNumProcessors())

    timer = oechem.OEWallTimer()
    if not oegraphsim.OECreateFastFPDatabaseFile(ffname, ifname, opts):
        oechem.OEThrow.Fatal("Cannot create fingerprint database file!")

    oechem.OEThrow.Info("%5.2f secs to generate %d fingerprints" %
                        (timer.Elapsed(), nrmols))

    return 0
    def __init__(self, itf):
        """ Create a MCMolShapeDatabase from the parameters specified by the OEInterface. """
        self.rwlock = ReadWriteLock()
        self.loadedEvent = Event()

        self.dbname = itf.GetString("-dbase")
        self.moldb = oechem.OEMolDatabase()

        self.dbtype = GetDatabaseType(itf.GetBool("-shapeOnly"))
        self.shapedb = oefastrocs.OEShapeDatabase(*GetShapeDatabaseArgs(itf))

        # this thread is daemonic so a KeyboardInterupt
        # during the load will cancel the process
        self.loaderThread = DatabaseLoaderThread(self.shapedb, self.moldb,
                                                 self.dbname, self.loadedEvent)
        self.loaderThread.setDaemon(True)
        self.loaderThread.start()
Beispiel #12
0
def SplitNParts(ifs, nparts, outbase, ext):
    moldb = oechem.OEMolDatabase(ifs)
    molcount = moldb.NumMols()

    chunksize, lft = divmod(molcount, nparts)
    if lft != 0:
        chunksize += 1
    chunk, count = 1, 0

    ofs = NewOutputStream(outbase, ext, chunk)
    for idx in range(moldb.GetMaxMolIdx()):
        count += 1
        if count > chunksize:
            if chunk == lft:
                chunksize -= 1

            ofs.close()
            chunk, count = chunk + 1, 1
            ofs = NewOutputStream(outbase, ext, chunk)

        moldb.WriteMolecule(ofs, idx)
Beispiel #13
0
def main(argv=[__name__]):

    parser = argparse.ArgumentParser()

    # positional arguments retaining backward compatibility
    parser.add_argument(
        'database',
        help='File containing the database molecules to be search \
                              (format not restricted to *.oeb).')
    parser.add_argument(
        'query',
        default=[],
        nargs='+',
        help='File containing the query molecule(s) to be search \
                              (format not restricted to *.oeb).')
    parser.add_argument(
        '--nHits',
        dest='nHits',
        type=int,
        default=100,
        help='Number of hits to return (default = number of database mols).')
    parser.add_argument('--cutoff',
                        dest='cutoff',
                        type=float,
                        default=argparse.SUPPRESS,
                        help='Specify a cutoff criteria for scores.')
    parser.add_argument(
        '--tversky',
        dest='tversky',
        action='store_true',
        default=argparse.SUPPRESS,
        help='Switch to Tversky similarity scoring (default = Tanimoto).')

    args = parser.parse_args()

    dbname = args.database

    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    # set options
    opts = oefastrocs.OEShapeDatabaseOptions()
    opts.SetLimit(args.nHits)
    print("Number of hits set to %u" % opts.GetLimit())
    if hasattr(args, 'cutoff') is not False:
        opts.SetCutoff(args.cutoff)
        print("Cutoff set to %f" % args.cutoff)
    if hasattr(args, 'tversky') is not False:
        opts.SetSimFunc(args.tversky)
        print("Tversky similarity scoring set.")

    # read in database
    ifs = oechem.oemolistream()
    if not ifs.open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    print("\nOpening database file %s ..." % dbname)
    timer = oechem.OEWallTimer()
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()
    if not moldb.Open(ifs):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" %
                             dbname)

    dots.Total()
    print("%f seconds to load database\n" % timer.Elapsed())

    for qfname in args.query:

        # read in query
        qfs = oechem.oemolistream()
        if not qfs.open(qfname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % qfname)

        mcmol = oechem.OEMol()
        if not oechem.OEReadMolecule(qfs, mcmol):
            oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname)
        qfs.rewind()

        ext = oechem.OEGetFileExtension(qfname)

        qmolidx = 0
        while oechem.OEReadMolecule(qfs, mcmol):

            # write out to file name based on molecule title
            ofs = oechem.oemolostream()
            moltitle = mcmol.GetTitle()
            if len(moltitle) == 0:
                moltitle = str(qmolidx)
            ofname = moltitle + "_results." + ext
            if not ofs.open(ofname):
                oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4])

            print("Searching for %s of %s (%s conformers)" %
                  (moltitle, qfname, mcmol.NumConfs()))

            qconfidx = 0
            for conf in mcmol.GetConfs():

                for score in dbase.GetSortedScores(conf, opts):

                    dbmol = oechem.OEMol()
                    dbmolidx = score.GetMolIdx()
                    if not moldb.GetMolecule(dbmol, dbmolidx):
                        print(
                            "Unable to retrieve molecule '%u' from the database"
                            % dbmolidx)
                        continue

                    mol = oechem.OEGraphMol(
                        dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))

                    oechem.OESetSDData(mol, "QueryConfidx", "%s" % qconfidx)
                    oechem.OESetSDData(mol, "ShapeTanimoto",
                                       "%.4f" % score.GetShapeTanimoto())
                    oechem.OESetSDData(mol, "ColorTanimoto",
                                       "%.4f" % score.GetColorTanimoto())
                    oechem.OESetSDData(mol, "TanimotoCombo",
                                       "%.4f" % score.GetTanimotoCombo())
                    score.Transform(mol)

                    oechem.OEWriteMolecule(ofs, mol)

                qconfidx += 1

            print("%s conformers processed" % qconfidx)
            print("Wrote results to %s\n" % ofname)

        qmolidx += 1
    return 0
Beispiel #14
0
def main(argv=[__name__]):
    if len(argv) < 3:
        oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0])
        return 0

    # check system
    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    # read in database
    dbname = argv[1]
    print("Opening database file %s ..." % dbname)
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()

    if not moldb.Open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" %
                             dbname)

    # customize search options
    opts = oefastrocs.OEShapeDatabaseOptions()

    opts.SetLimit(5)

    for qfname in argv[2:]:
        # read in query
        qfs = oechem.oemolistream()
        if not qfs.open(qfname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % qfname)

        query = oechem.OEGraphMol()
        if not oechem.OEReadMolecule(qfs, query):
            oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname)

        ext = oechem.OEGetFileExtension(qfname)
        base = qfname[:-(len(ext) + 1)]

        # write out everthing to a similary named file
        ofs = oechem.oemolostream()
        ofname = base + "_results." + ext
        if not ofs.open(ofname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4])
        oechem.OEWriteMolecule(ofs, query)

        print("Searching for %s" % qfname)
        for score in dbase.GetSortedScores(query, opts):
            print("Score for mol %u(conf %u) %f shape %f color" %
                  (score.GetMolIdx(), score.GetConfIdx(),
                   score.GetShapeTanimoto(), score.GetColorTanimoto()))
            dbmol = oechem.OEMol()
            molidx = score.GetMolIdx()
            if not moldb.GetMolecule(dbmol, molidx):
                print("Unable to retrieve molecule '%u' from the database" %
                      molidx)
                continue

            mol = oechem.OEGraphMol(
                dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))
            oechem.OESetSDData(mol, "ShapeTanimoto",
                               "%.4f" % score.GetShapeTanimoto())
            oechem.OESetSDData(mol, "ColorTanimoto",
                               "%.4f" % score.GetColorTanimoto())
            oechem.OESetSDData(mol, "TanimotoCombo",
                               "%.4f" % score.GetTanimotoCombo())
            score.Transform(mol)

            oechem.OEWriteMolecule(ofs, mol)
        print("Wrote results to %s" % ofname)

    return 0
# current license or subscription to the applicable OpenEye offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
# liable for any damages or liability in connection with the Sample Code
# or its use.

# @ <SNIPPET>
from openeye import oechem
import sys

if len(sys.argv) != 4:
    oechem.OEThrow.Usage("%s <input> <output> <index>" % sys.argv[0])

moldb = oechem.OEMolDatabase()
if not moldb.Open(sys.argv[1]):
    oechem.OEThrow.Fatal("Unable to open %s for reading" % sys.argv[1])

ofs = oechem.oemolostream()
if not ofs.open(sys.argv[2]):
    oechem.OEThrow.Fatal("Unable to open %s for writing" % sys.argv[2])

idx = int(sys.argv[3])

mol = oechem.OEMol()
if not moldb.GetMolecule(mol, idx):
    oechem.OEThrow.Fatal("Unable to read a molecule from index %u" % idx)

oechem.OEWriteMolecule(ofs, mol)
# @ </SNIPPET>
Beispiel #16
0
def main(argv=[__name__]):

    itf = oechem.OEInterface()
    oechem.OEConfigure(itf, InterfaceData)

    defopts = oegraphsim.OEFPDatabaseOptions(10,
                                             oegraphsim.OESimMeasure_Tanimoto)
    oegraphsim.OEConfigureFPDatabaseOptions(itf, defopts)
    oegraphsim.OEConfigureFPDatabaseMemoryType(itf)

    if not oechem.OEParseCommandLine(itf, argv):
        return 0

    qfname = itf.GetString("-query")
    mfname = itf.GetString("-molfname")
    ffname = itf.GetString("-fpdbfname")
    ofname = itf.GetString("-out")

    # initialize databases

    timer = oechem.OEWallTimer()
    timer.Start()

    ifs = oechem.oemolistream()
    if not ifs.open(qfname):
        oechem.OEThrow.Fatal("Cannot open input file!")

    query = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(ifs, query):
        oechem.OEThrow.Fatal("Cannot read query molecule!")

    moldb = oechem.OEMolDatabase()
    if not moldb.Open(mfname):
        oechem.OEThrow.Fatal("Cannot open molecule database!")

    memtype = oegraphsim.OEGetFPDatabaseMemoryType(itf)

    fpdb = oegraphsim.OEFastFPDatabase(ffname, memtype)
    if not fpdb.IsValid():
        oechem.OEThrow.Fatal("Cannot open fingerprint database!")
    nrfps = fpdb.NumFingerPrints()
    memtypestr = fpdb.GetMemoryTypeString()

    ofs = oechem.oemolostream()
    if not ofs.open(ofname):
        oechem.OEThrow.Fatal("Cannot open output file!")

    if not oegraphsim.OEAreCompatibleDatabases(moldb, fpdb):
        oechem.OEThrow.Fatal("Databases are not compatible!")

    oechem.OEThrow.Info("%5.2f sec to initialize databases" % timer.Elapsed())

    fptype = fpdb.GetFPTypeBase()
    oechem.OEThrow.Info("Using fingerprint type %s" % fptype.GetFPTypeString())

    opts = oegraphsim.OEFPDatabaseOptions()
    oegraphsim.OESetupFPDatabaseOptions(opts, itf)

    # search fingerprint database

    timer.Start()
    scores = fpdb.GetSortedScores(query, opts)
    oechem.OEThrow.Info("%5.2f sec to search %d fingerprints %s" %
                        (timer.Elapsed(), nrfps, memtypestr))

    timer.Start()
    nrhits = 0
    hit = oechem.OEGraphMol()
    for si in scores:
        if moldb.GetMolecule(hit, si.GetIdx()):
            nrhits += 1
            oechem.OESetSDData(hit, "Similarity score", "%.2f" % si.GetScore())
            oechem.OEWriteMolecule(ofs, hit)
    oechem.OEThrow.Info("%5.2f sec to write %d hits" %
                        (timer.Elapsed(), nrhits))

    return 0
def main(argv=[__name__]):
    if len(argv) < 3:
        oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0])

    dbname = argv[1]
    # read in database
    ifs = oechem.oemolistream()
    if not ifs.open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    print("Opening database file %s ..." % dbname)
    timer = oechem.OEWallTimer()
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()
    if not moldb.Open(ifs):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" %
                             dbname)

    dots.Total()
    print("%s seconds to load database" % timer.Elapsed())

    opts = oefastrocs.OEShapeDatabaseOptions()
    opts.SetSimFunc(oefastrocs.OEShapeSimFuncType_Tversky)
    numHits = moldb.NumMols()
    opts.SetLimit(numHits)

    for qfname in argv[2:]:
        # read in query
        qfs = oechem.oemolistream()
        if not qfs.open(qfname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % argv[1])

        query = oechem.OEGraphMol()
        if not oechem.OEReadMolecule(qfs, query):
            oechem.OEThrow.Fatal("Unable to read query from '%s'" % argv[1])

        ext = oechem.OEGetFileExtension(qfname)
        base = qfname[:-(len(ext) + 1)]

        # write out everthing to a similary named file
        ofs = oechem.oemolostream()
        ofname = base + "_results." + ext
        if not ofs.open(ofname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4])

        print("Searching for %s" % qfname)
        for score in dbase.GetSortedScores(query, opts):
            dbmol = oechem.OEMol()
            molidx = score.GetMolIdx()
            if not moldb.GetMolecule(dbmol, molidx):
                print("Unable to retrieve molecule '%u' from the database" %
                      molidx)
                continue
            mol = oechem.OEGraphMol(
                dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))

            oechem.OESetSDData(mol, "ShapeTversky",
                               "%.4f" % score.GetShapeTversky())
            oechem.OESetSDData(mol, "ColorTversky",
                               "%.4f" % score.GetColorTversky())
            oechem.OESetSDData(mol, "TverskyCombo",
                               "%.4f" % score.GetTverskyCombo())
            score.Transform(mol)

            oechem.OEWriteMolecule(ofs, mol)
        print("Wrote results to %s" % ofname)
    return 0
Beispiel #18
0
def main(argv=[__name__]):

    itf = oechem.OEInterface()
    oechem.OEConfigure(itf, InterfaceData)

    defopts = oegraphsim.OEFPDatabaseOptions(10,
                                             oegraphsim.OESimMeasure_Tanimoto)
    oegraphsim.OEConfigureFPDatabaseOptions(itf, defopts)
    oegraphsim.OEConfigureFingerPrint(
        itf, oegraphsim.OEGetFPType(oegraphsim.OEFPType_Tree))

    if not oechem.OEParseCommandLine(itf, argv):
        return 0

    qfname = itf.GetString("-query")
    mfname = itf.GetString("-molfname")
    ofname = itf.GetString("-out")

    # initialize databases

    timer = oechem.OEWallTimer()
    timer.Start()

    ifs = oechem.oemolistream()
    if not ifs.open(qfname):
        oechem.OEThrow.Fatal("Cannot open input file!")

    query = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(ifs, query):
        oechem.OEThrow.Fatal("Cannot read query molecule!")

    moldb = oechem.OEMolDatabase()
    if not moldb.Open(mfname):
        oechem.OEThrow.Fatal("Cannot open molecule database!")

    ofs = oechem.oemolostream()
    if not ofs.open(ofname):
        oechem.OEThrow.Fatal("Cannot open output file!")

    fptype = oegraphsim.OESetupFingerPrint(itf)
    oechem.OEThrow.Info("Using fingerprint type %s" % fptype.GetFPTypeString())
    fpdb = oegraphsim.OEFPDatabase(fptype)

    emptyfp = oegraphsim.OEFingerPrint()
    emptyfp.SetFPTypeBase(fptype)

    nrmols = moldb.GetMaxMolIdx()

    mol = oechem.OEGraphMol()
    for idx in range(0, nrmols):
        if moldb.GetMolecule(mol, idx):
            fpdb.AddFP(mol)
        else:
            fpdb.AddFP(emptyfp)

    nrfps = fpdb.NumFingerPrints()
    oechem.OEThrow.Info("%5.2f sec to initialize databases" % timer.Elapsed())

    opts = oegraphsim.OEFPDatabaseOptions()
    oegraphsim.OESetupFPDatabaseOptions(opts, itf)

    # search fingerprint database

    timer.Start()
    scores = fpdb.GetSortedScores(query, opts)
    oechem.OEThrow.Info("%5.2f sec to search %d fingerprints" %
                        (timer.Elapsed(), nrfps))

    timer.Start()
    hit = oechem.OEGraphMol()
    for si in scores:
        if moldb.GetMolecule(hit, si.GetIdx()):
            oechem.OEWriteMolecule(ofs, hit)
    oechem.OEThrow.Info("%5.2f sec to write %d hits" %
                        (timer.Elapsed(), opts.GetLimit()))

    return 0
def main(argv=[__name__]):
    if len(argv) < 3:
        oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0])
        return 0

    # check system
    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    # read in database
    dbname = argv[1]
    print("Opening database file %s ..." % dbname)
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()

    if not moldb.Open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" %
                             dbname)

    # customize search options
    opts = oefastrocs.OEShapeDatabaseOptions()
    opts.SetInitialOrientation(
        oefastrocs.OEFastROCSOrientation_UserInertialStarts)

    opts.SetLimit(5)

    for qfname in argv[2:]:
        # read in query
        qfs = oechem.oemolistream()
        if not qfs.open(qfname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % qfname)

        query = oechem.OEGraphMol()
        if not oechem.OEReadMolecule(qfs, query):
            oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname)

        ext = oechem.OEGetFileExtension(qfname)
        base = qfname[:-(len(ext) + 1)]

        # write out everthing to a similary named file
        ofs = oechem.oemolostream()
        ofname = base + "_user_results." + ext
        if not ofs.open(ofname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4])
        oechem.OEWriteMolecule(ofs, query)

        startsCoords = oechem.OEFloatVector()
        atomIdx = 1
        xyz = query.GetCoords()[atomIdx]
        for x in xyz:
            startsCoords.append(x)
        if not len(startsCoords) % 3 == 0:
            oechem.OEThrow.Fatal(
                "Something went wrong whilst reading in user-starts coordinates"
            )

        opts.SetUserStarts(oechem.OEFloatVector(startsCoords),
                           int(len(startsCoords) / 3))

        opts.SetMaxOverlays(opts.GetNumInertialStarts() *
                            opts.GetNumUserStarts())

        if opts.GetInitialOrientation(
        ) == oefastrocs.OEFastROCSOrientation_UserInertialStarts:
            numStarts = opts.GetNumUserStarts()
            print("This example will use %u starts" % numStarts)

        print("Searching for %s" % qfname)
        for score in dbase.GetSortedScores(query, opts):
            print("Score for mol %u(conf %u) %f shape %f color" %
                  (score.GetMolIdx(), score.GetConfIdx(),
                   score.GetShapeTanimoto(), score.GetColorTanimoto()))
            dbmol = oechem.OEMol()
            molidx = score.GetMolIdx()
            if not moldb.GetMolecule(dbmol, molidx):
                print("Unable to retrieve molecule '%u' from the database" %
                      molidx)
                continue

            mol = oechem.OEGraphMol(
                dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))
            oechem.OESetSDData(mol, "ShapeTanimoto",
                               "%.4f" % score.GetShapeTanimoto())
            oechem.OESetSDData(mol, "ColorTanimoto",
                               "%.4f" % score.GetColorTanimoto())
            oechem.OESetSDData(mol, "TanimotoCombo",
                               "%.4f" % score.GetTanimotoCombo())
            score.Transform(mol)

            oechem.OEWriteMolecule(ofs, mol)
        print("Wrote results to %s" % ofname)

    return 0
Beispiel #20
0
def get_molcount(path):
    db = oechem.OEMolDatabase(path)
    return db.NumMols()