Exemple #1
0
def main():
    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    args = getargs()

    dbname = args.d
    # read in database
    ifs = oechem.oemolistream()
    if not ifs.open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    print("Opening database file %s ..." % dbname)
    timer = oechem.OEWallTimer()
    opts = oefastrocs.OEShapeDatabaseOptions()
    opts.SetLimit(1)
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()

    if not moldb.Open(ifs):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" %
                             dbname)

    dots.Total()
    print("%f seconds to load database" % timer.Elapsed())

    df = pd.read_csv(args.i)
    res = []
    for smile in tqdm(df.loc[:, 'smiles'].tolist()):
        resn = len(res)
        try:
            q = FromString(smile)[0]

            for score in dbase.GetSortedScores(q, 1):
                res.append(score.GetTanimotoCombo())
                break
        except KeyboardInterrupt:
            print("caught")
            exit()
        except:
            res.append(np.nan)
        if len(res) == resn:
            res.append(np.nan)

    df['fastroc'] = res
    print(df.head)
    df.to_csv(args.o, sep=',', index=False)

    return 0
    def __init__(self, dbname, cutoff, shapeOnly):
        self.cutoff = cutoff

        # set up and options and database based upon shapeOnly
        self.defaultOptions = oefastrocs.OEShapeDatabaseOptions()
        dbtype = oefastrocs.OEShapeDatabaseType_Default
        if shapeOnly:
            dbtype = oefastrocs.OEShapeDatabaseType_Shape

        self.defaultOptions.SetScoreType(dbtype)
        self.shapedb = oefastrocs.OEShapeDatabase(dbtype)
        self.dbmols = []
        volumes = []

        # read in database
        ifs = oechem.oemolistream()
        if not ifs.open(dbname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

        count = 0
        for mol in ifs.GetOEGraphMols():
            title = mol.GetTitle()
            if not title:
                title = "Untitled" + str(count)
                mol.SetTitle(title)
                count += 1

            idx = self.shapedb.AddMol(oechem.OEMol(mol))

            volume = oeshape.OEGetCachedSelfShape(mol)
            if volume == 0.0:
                volume = oeshape.OESelfShape(mol)
            volumes.append((volume, idx))

            dbmol = oechem.OEGraphMol(mol, oechem.OEMolBaseType_OEDBMol)
            dbmol.Compress()
            self.dbmols.append(dbmol)

        numMols = len(volumes)

        # find the molecule with the median volume as our first query
        volumes.sort()
        medianVolume, medianIdx = volumes[numMols // 2]

        self.nextClusterHeadIdx = medianIdx
        self.remainingMolecules = numMols

        self.tanimotos = [0.0] * numMols

        self.scoreGetter = GetScoreGetter(shapeOnly)
    def __init__(self, shapedb, querymolstr, nhits, iformat, oformat,
                 errorLevel, **kwargs):
        """ Create a new thread to perform a query. The query doesn't
        execute until start is called.
        shapedb - database to run the query against

        See MCMolShapeDatabase.GetBestOverlays for a description of
        the querymolstr and nhits arguments.
        """
        Thread.__init__(self)

        self.shapeOnly = kwargs.pop('shapeOnly', False)
        self.tversky = kwargs.pop('tversky', False)
        self.altStarts = kwargs.pop('altStarts', False)
        self.randStarts = kwargs.pop('randStarts', False)

        self.shapedb = shapedb
        self.querymolstr = querymolstr
        self.iformat = iformat
        self.oformat = oformat
        self.scoretype = GetDatabaseType(self.shapeOnly)
        self.simFuncType = GetSimFuncType(self.tversky)

        numHistBins = 200
        if self.shapeOnly:
            numHistBins = 100
        self.tracer = oefastrocs.OEDBTracer(numHistBins)
        self.options = oefastrocs.OEShapeDatabaseOptions()
        self.options.SetTracer(self.tracer)
        self.options.SetLimit(nhits)
        self.options.SetScoreType(self.scoretype)
        self.options.SetSimFunc(self.simFuncType)

        if self.altStarts:
            self.options.SetInitialOrientation(GetStartType(self.altStarts))
            if self.randStarts:
                self.options.SetNumRandomStarts(self.randStarts)

        self.lock = Lock()
        self.errorLevel = errorLevel
    def GetCluster(self, query):
        options = oefastrocs.OEShapeDatabaseOptions(self.defaultOptions)

        dots = oechem.OEDots(10000, 200, "molecules searched")

        minTani = sys.float_info.max
        minIdx = None
        for score in self.shapedb.GetScores(query, options):
            idx = score.GetMolIdx()
            # check if already in a cluster
            if self.dbmols[idx] is None:
                continue

            if self.cutoff < self.scoreGetter(score):
                yield self._removeMolecule(idx), score
            else:
                self.tanimotos[idx] = max(self.tanimotos[idx], self.scoreGetter(score))

                minTani, minIdx = min((minTani, minIdx), (self.tanimotos[idx], idx))
            dots.Update()
        dots.Total()

        self.nextClusterHeadIdx = minIdx
def main(argv=[__name__]):
    itf = oechem.OEInterface(InterfaceData, argv)

    ifs = oechem.oemolistream()
    if not ifs.open(itf.GetString("-dbase")):
        oechem.OEThrow.Fatal("Unable to open %s for reading" %
                             itf.GetString("-dbase"))

    colname = "TanimotoCombo"
    getter = oefastrocs.OEShapeDatabaseScore.GetTanimotoCombo
    dbtype = oefastrocs.OEShapeDatabaseType_Default
    if itf.GetBool("-shapeOnly"):
        colname = "ShapeTanimoto"
        getter = oefastrocs.OEShapeDatabaseScore.GetShapeTanimoto
        dbtype = oefastrocs.OEShapeDatabaseType_Shape

    csvwriter = csv.writer(open(itf.GetString("-matrix"), 'w'))
    csvwriter.writerow(["Title1", "Title2", colname])

    shapedb = oefastrocs.OEShapeDatabase(dbtype)
    options = oefastrocs.OEShapeDatabaseOptions()
    options.SetScoreType(dbtype)

    lmat = [[]]
    titles = []
    for mol in ifs.GetOEMols():
        if titles:
            bestscores = [0.0] * len(titles)
            for conf in mol.GetConfs():
                for score in shapedb.GetScores(conf, options):
                    midx = score.GetMolIdx()
                    bestscores[midx] = max(bestscores[midx], getter(score))

            lmat.append(bestscores)

        shapedb.AddMol(mol)

        title = mol.GetTitle()
        if not title:
            title = str(len(titles) + 1)
        titles.append(title)

    # write csv file
    csvwriter = csv.writer(open(itf.GetString("-matrix"), 'w'))
    csvwriter.writerow(titles)
    nrows = len(titles)
    for i in range(nrows):
        row = [i + 1]
        for j in range(nrows):
            val = 2.0
            if itf.GetBool("-shapeOnly"):
                val = 1.0

            if j > i:
                val -= lmat[j][i]
            elif j < i:
                val -= lmat[i][j]
            elif j == i:
                val = 0.0

            row.append("%.3f" % val)

        csvwriter.writerow(row)

    return 0
Exemple #6
0
def main(argv=[__name__]):

    parser = argparse.ArgumentParser()

    # positional arguments retaining backward compatibility
    parser.add_argument(
        'database',
        help='File containing the database molecules to be search \
                              (format not restricted to *.oeb).')
    parser.add_argument(
        'query',
        default=[],
        nargs='+',
        help='File containing the query molecule(s) to be search \
                              (format not restricted to *.oeb).')
    parser.add_argument(
        '--nHits',
        dest='nHits',
        type=int,
        default=100,
        help='Number of hits to return (default = number of database mols).')
    parser.add_argument('--cutoff',
                        dest='cutoff',
                        type=float,
                        default=argparse.SUPPRESS,
                        help='Specify a cutoff criteria for scores.')
    parser.add_argument(
        '--tversky',
        dest='tversky',
        action='store_true',
        default=argparse.SUPPRESS,
        help='Switch to Tversky similarity scoring (default = Tanimoto).')

    args = parser.parse_args()

    dbname = args.database

    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    # set options
    opts = oefastrocs.OEShapeDatabaseOptions()
    opts.SetLimit(args.nHits)
    print("Number of hits set to %u" % opts.GetLimit())
    if hasattr(args, 'cutoff') is not False:
        opts.SetCutoff(args.cutoff)
        print("Cutoff set to %f" % args.cutoff)
    if hasattr(args, 'tversky') is not False:
        opts.SetSimFunc(args.tversky)
        print("Tversky similarity scoring set.")

    # read in database
    ifs = oechem.oemolistream()
    if not ifs.open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    print("\nOpening database file %s ..." % dbname)
    timer = oechem.OEWallTimer()
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()
    if not moldb.Open(ifs):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" %
                             dbname)

    dots.Total()
    print("%f seconds to load database\n" % timer.Elapsed())

    for qfname in args.query:

        # read in query
        qfs = oechem.oemolistream()
        if not qfs.open(qfname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % qfname)

        mcmol = oechem.OEMol()
        if not oechem.OEReadMolecule(qfs, mcmol):
            oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname)
        qfs.rewind()

        ext = oechem.OEGetFileExtension(qfname)

        qmolidx = 0
        while oechem.OEReadMolecule(qfs, mcmol):

            # write out to file name based on molecule title
            ofs = oechem.oemolostream()
            moltitle = mcmol.GetTitle()
            if len(moltitle) == 0:
                moltitle = str(qmolidx)
            ofname = moltitle + "_results." + ext
            if not ofs.open(ofname):
                oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4])

            print("Searching for %s of %s (%s conformers)" %
                  (moltitle, qfname, mcmol.NumConfs()))

            qconfidx = 0
            for conf in mcmol.GetConfs():

                for score in dbase.GetSortedScores(conf, opts):

                    dbmol = oechem.OEMol()
                    dbmolidx = score.GetMolIdx()
                    if not moldb.GetMolecule(dbmol, dbmolidx):
                        print(
                            "Unable to retrieve molecule '%u' from the database"
                            % dbmolidx)
                        continue

                    mol = oechem.OEGraphMol(
                        dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))

                    oechem.OESetSDData(mol, "QueryConfidx", "%s" % qconfidx)
                    oechem.OESetSDData(mol, "ShapeTanimoto",
                                       "%.4f" % score.GetShapeTanimoto())
                    oechem.OESetSDData(mol, "ColorTanimoto",
                                       "%.4f" % score.GetColorTanimoto())
                    oechem.OESetSDData(mol, "TanimotoCombo",
                                       "%.4f" % score.GetTanimotoCombo())
                    score.Transform(mol)

                    oechem.OEWriteMolecule(ofs, mol)

                qconfidx += 1

            print("%s conformers processed" % qconfidx)
            print("Wrote results to %s\n" % ofname)

        qmolidx += 1
    return 0
Exemple #7
0
def main(argv=[__name__]):
    if len(argv) < 3:
        oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0])
        return 0

    # check system
    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    # read in database
    dbname = argv[1]
    print("Opening database file %s ..." % dbname)
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()

    if not moldb.Open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" %
                             dbname)

    # customize search options
    opts = oefastrocs.OEShapeDatabaseOptions()

    opts.SetLimit(5)

    for qfname in argv[2:]:
        # read in query
        qfs = oechem.oemolistream()
        if not qfs.open(qfname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % qfname)

        query = oechem.OEGraphMol()
        if not oechem.OEReadMolecule(qfs, query):
            oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname)

        ext = oechem.OEGetFileExtension(qfname)
        base = qfname[:-(len(ext) + 1)]

        # write out everthing to a similary named file
        ofs = oechem.oemolostream()
        ofname = base + "_results." + ext
        if not ofs.open(ofname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4])
        oechem.OEWriteMolecule(ofs, query)

        print("Searching for %s" % qfname)
        for score in dbase.GetSortedScores(query, opts):
            print("Score for mol %u(conf %u) %f shape %f color" %
                  (score.GetMolIdx(), score.GetConfIdx(),
                   score.GetShapeTanimoto(), score.GetColorTanimoto()))
            dbmol = oechem.OEMol()
            molidx = score.GetMolIdx()
            if not moldb.GetMolecule(dbmol, molidx):
                print("Unable to retrieve molecule '%u' from the database" %
                      molidx)
                continue

            mol = oechem.OEGraphMol(
                dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))
            oechem.OESetSDData(mol, "ShapeTanimoto",
                               "%.4f" % score.GetShapeTanimoto())
            oechem.OESetSDData(mol, "ColorTanimoto",
                               "%.4f" % score.GetColorTanimoto())
            oechem.OESetSDData(mol, "TanimotoCombo",
                               "%.4f" % score.GetTanimotoCombo())
            score.Transform(mol)

            oechem.OEWriteMolecule(ofs, mol)
        print("Wrote results to %s" % ofname)

    return 0
def main(argv=[__name__]):
    if len(argv) < 3:
        oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0])
        return 0

    # check system
    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    # read in database
    dbname = argv[1]
    print("Opening database file %s ..." % dbname)
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()

    if not moldb.Open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" %
                             dbname)

    # customize search options
    opts = oefastrocs.OEShapeDatabaseOptions()
    opts.SetInitialOrientation(
        oefastrocs.OEFastROCSOrientation_UserInertialStarts)

    opts.SetLimit(5)

    for qfname in argv[2:]:
        # read in query
        qfs = oechem.oemolistream()
        if not qfs.open(qfname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % qfname)

        query = oechem.OEGraphMol()
        if not oechem.OEReadMolecule(qfs, query):
            oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname)

        ext = oechem.OEGetFileExtension(qfname)
        base = qfname[:-(len(ext) + 1)]

        # write out everthing to a similary named file
        ofs = oechem.oemolostream()
        ofname = base + "_user_results." + ext
        if not ofs.open(ofname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4])
        oechem.OEWriteMolecule(ofs, query)

        startsCoords = oechem.OEFloatVector()
        atomIdx = 1
        xyz = query.GetCoords()[atomIdx]
        for x in xyz:
            startsCoords.append(x)
        if not len(startsCoords) % 3 == 0:
            oechem.OEThrow.Fatal(
                "Something went wrong whilst reading in user-starts coordinates"
            )

        opts.SetUserStarts(oechem.OEFloatVector(startsCoords),
                           int(len(startsCoords) / 3))

        opts.SetMaxOverlays(opts.GetNumInertialStarts() *
                            opts.GetNumUserStarts())

        if opts.GetInitialOrientation(
        ) == oefastrocs.OEFastROCSOrientation_UserInertialStarts:
            numStarts = opts.GetNumUserStarts()
            print("This example will use %u starts" % numStarts)

        print("Searching for %s" % qfname)
        for score in dbase.GetSortedScores(query, opts):
            print("Score for mol %u(conf %u) %f shape %f color" %
                  (score.GetMolIdx(), score.GetConfIdx(),
                   score.GetShapeTanimoto(), score.GetColorTanimoto()))
            dbmol = oechem.OEMol()
            molidx = score.GetMolIdx()
            if not moldb.GetMolecule(dbmol, molidx):
                print("Unable to retrieve molecule '%u' from the database" %
                      molidx)
                continue

            mol = oechem.OEGraphMol(
                dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))
            oechem.OESetSDData(mol, "ShapeTanimoto",
                               "%.4f" % score.GetShapeTanimoto())
            oechem.OESetSDData(mol, "ColorTanimoto",
                               "%.4f" % score.GetColorTanimoto())
            oechem.OESetSDData(mol, "TanimotoCombo",
                               "%.4f" % score.GetTanimotoCombo())
            score.Transform(mol)

            oechem.OEWriteMolecule(ofs, mol)
        print("Wrote results to %s" % ofname)

    return 0
def main(argv=[__name__]):
    if len(argv) < 3:
        oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0])

    dbname = argv[1]
    # read in database
    ifs = oechem.oemolistream()
    if not ifs.open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    print("Opening database file %s ..." % dbname)
    timer = oechem.OEWallTimer()
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()
    if not moldb.Open(ifs):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" %
                             dbname)

    dots.Total()
    print("%s seconds to load database" % timer.Elapsed())

    opts = oefastrocs.OEShapeDatabaseOptions()
    opts.SetSimFunc(oefastrocs.OEShapeSimFuncType_Tversky)
    numHits = moldb.NumMols()
    opts.SetLimit(numHits)

    for qfname in argv[2:]:
        # read in query
        qfs = oechem.oemolistream()
        if not qfs.open(qfname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % argv[1])

        query = oechem.OEGraphMol()
        if not oechem.OEReadMolecule(qfs, query):
            oechem.OEThrow.Fatal("Unable to read query from '%s'" % argv[1])

        ext = oechem.OEGetFileExtension(qfname)
        base = qfname[:-(len(ext) + 1)]

        # write out everthing to a similary named file
        ofs = oechem.oemolostream()
        ofname = base + "_results." + ext
        if not ofs.open(ofname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4])

        print("Searching for %s" % qfname)
        for score in dbase.GetSortedScores(query, opts):
            dbmol = oechem.OEMol()
            molidx = score.GetMolIdx()
            if not moldb.GetMolecule(dbmol, molidx):
                print("Unable to retrieve molecule '%u' from the database" %
                      molidx)
                continue
            mol = oechem.OEGraphMol(
                dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))

            oechem.OESetSDData(mol, "ShapeTversky",
                               "%.4f" % score.GetShapeTversky())
            oechem.OESetSDData(mol, "ColorTversky",
                               "%.4f" % score.GetColorTversky())
            oechem.OESetSDData(mol, "TverskyCombo",
                               "%.4f" % score.GetTverskyCombo())
            score.Transform(mol)

            oechem.OEWriteMolecule(ofs, mol)
        print("Wrote results to %s" % ofname)
    return 0