Ejemplo n.º 1
0
    def searchSubStructureWithFingerPrint(self, oeQueryMol, fpType, minFpScore, maxFpResults, matchOpts="graph-relaxed"):
        """Return graph match search results for the input OE molecule using fingerprint pre-filtering.

        Args:
            oeQueryMol (OEmol): OE graph molecule
            fpTypeCutoffList (list): [(finger print type, min score),...]
            maxFpResults (int): maximum number of finger print results returned
            matchOpts (str, optional): graph match criteria type (graph-strict|graph-relaxed). Defaults to "graph-relaxed".

        Returns:
            (bool, list, list): status, graph match and finger match lists of type (MatchResults)
        """
        hL = []
        retStatus = True
        try:
            fpDb = self.__fpDbD[fpType]
            opts = oegraphsim.OEFPDatabaseOptions(maxFpResults, oegraphsim.OESimMeasure_Tanimoto)
            if minFpScore:
                opts.SetCutoff(minFpScore)
            scores = fpDb.GetSortedScores(oeQueryMol, opts)
            idxList = [si.GetIdx() for si in scores]
            retStatus, hL = self.searchSubStructure(oeQueryMol, idxList=idxList, reverseFlag=False, matchOpts=matchOpts)
        except Exception as e:
            retStatus = False
            logger.exception("Failing with %s", str(e))
        return retStatus, hL
Ejemplo n.º 2
0
    def getFingerPrintScores(self, oeQueryMol, fpType, minFpScore, maxFpResults):
        """Return finger print search scores for the input OE molecule.

        Args:
            oeQueryMol (OEmol): OE graph molecule
            fpType (str): fingerprint type  [TREE,PATH,MACCS,CIRCULAR,LINGO]
            fpMinScore (float): min fingerprint match score (0.0-1.0)
            maxFpResults (int): maximum number of finger print results returned

        Returns:
            (bool, list): status, finger match lists of type (MatchResults)
        """
        hL = []
        retStatus = True
        try:
            fpDb = self.__fpDbD[fpType]
            opts = oegraphsim.OEFPDatabaseOptions(maxFpResults, oegraphsim.OESimMeasure_Tanimoto)
            if minFpScore:
                opts.SetCutoff(minFpScore)
            scores = fpDb.GetSortedScores(oeQueryMol, opts)
            hL = [MatchResults(ccId=self.__oeMolDb.GetTitle(si.GetIdx()), searchType="fp", fpType=fpType, fpScore=si.GetScore(), oeIdx=si.GetIdx()) for si in scores]
        except Exception as e:
            retStatus = False
            logger.exception("Failing with %s", str(e))
        return retStatus, hL
Ejemplo n.º 3
0
    def searchFingerPrints(self, oeQueryMol, fpType, minFpScore=None, maxFpResults=50, annotateMols=False, verbose=False):
        hL = []
        retStatus = True
        try:
            fpDb = self.__fpDbD[fpType] if fpType in self.__fpDbD else {}
            if not fpDb:
                retStatus = False
                return retStatus, hL
            #
            opts = oegraphsim.OEFPDatabaseOptions(maxFpResults, oegraphsim.OESimMeasure_Tanimoto)
            if minFpScore:
                opts.SetCutoff(minFpScore)
            #
            if verbose:
                logger.info("Using %d fingerprint %s type %s", fpDb.NumFingerPrints(), fpType, fpDb.GetFPTypeBase().GetFPTypeString())
                startTime = time.time()
            #
            scores = fpDb.GetSortedScores(oeQueryMol, opts)
            oeMol = oechem.OEGraphMol()
            for si in scores:
                if self.__oeMolDb.GetMolecule(oeMol, si.GetIdx()):
                    ccId = self.__oeMolDb.GetTitle(si.GetIdx())
                    if annotateMols:
                        tS = "For %s index %r %r similarity score %.4f " % (ccId, si.GetIdx(), self.__idxTitleD[si.GetIdx()], si.GetScore())
                        oechem.OESetSDData(oeMol, fpType, tS)
                    hL.append(MatchResults(ccId=ccId, oeMol=oeMol, searchType="fp", fpType=fpType, fpScore=si.GetScore()))
            if verbose:
                endTime = time.time()
                logger.info("Fingerprint %s returning %d hits (%.4f sec)", fpType, len(hL), endTime - startTime)
        except Exception as e:
            retStatus = False
            logger.exception("Failing fpType %r with %s", fpType, str(e))

        return retStatus, hL
Ejemplo n.º 4
0
def main(argv=[__name__]):

    itf = oechem.OEInterface()
    oechem.OEConfigure(itf, InterfaceData)

    defopts = oegraphsim.OEFPDatabaseOptions(10,
                                             oegraphsim.OESimMeasure_Tanimoto)
    oegraphsim.OEConfigureFPDatabaseOptions(itf, defopts)
    oegraphsim.OEConfigureFPDatabaseMemoryType(itf)

    if not oechem.OEParseCommandLine(itf, argv):
        return 0

    qfname = itf.GetString("-query")
    mfname = itf.GetString("-molfname")
    ffname = itf.GetString("-fpdbfname")
    ofname = itf.GetString("-out")

    # initialize databases

    timer = oechem.OEWallTimer()
    timer.Start()

    ifs = oechem.oemolistream()
    if not ifs.open(qfname):
        oechem.OEThrow.Fatal("Cannot open input file!")

    query = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(ifs, query):
        oechem.OEThrow.Fatal("Cannot read query molecule!")

    moldb = oechem.OEMolDatabase()
    if not moldb.Open(mfname):
        oechem.OEThrow.Fatal("Cannot open molecule database!")

    memtype = oegraphsim.OEGetFPDatabaseMemoryType(itf)

    fpdb = oegraphsim.OEFastFPDatabase(ffname, memtype)
    if not fpdb.IsValid():
        oechem.OEThrow.Fatal("Cannot open fingerprint database!")
    nrfps = fpdb.NumFingerPrints()
    memtypestr = fpdb.GetMemoryTypeString()

    ofs = oechem.oemolostream()
    if not ofs.open(ofname):
        oechem.OEThrow.Fatal("Cannot open output file!")

    if not oegraphsim.OEAreCompatibleDatabases(moldb, fpdb):
        oechem.OEThrow.Fatal("Databases are not compatible!")

    oechem.OEThrow.Info("%5.2f sec to initialize databases" % timer.Elapsed())

    fptype = fpdb.GetFPTypeBase()
    oechem.OEThrow.Info("Using fingerprint type %s" % fptype.GetFPTypeString())

    opts = oegraphsim.OEFPDatabaseOptions()
    oegraphsim.OESetupFPDatabaseOptions(opts, itf)

    # search fingerprint database

    timer.Start()
    scores = fpdb.GetSortedScores(query, opts)
    oechem.OEThrow.Info("%5.2f sec to search %d fingerprints %s" %
                        (timer.Elapsed(), nrfps, memtypestr))

    timer.Start()
    nrhits = 0
    hit = oechem.OEGraphMol()
    for si in scores:
        if moldb.GetMolecule(hit, si.GetIdx()):
            nrhits += 1
            oechem.OESetSDData(hit, "Similarity score", "%.2f" % si.GetScore())
            oechem.OEWriteMolecule(ofs, hit)
    oechem.OEThrow.Info("%5.2f sec to write %d hits" %
                        (timer.Elapsed(), nrhits))

    return 0
Ejemplo n.º 5
0
def main(argv=[__name__]):

    itf = oechem.OEInterface()
    oechem.OEConfigure(itf, InterfaceData)

    defopts = oegraphsim.OEFPDatabaseOptions(10,
                                             oegraphsim.OESimMeasure_Tanimoto)
    oegraphsim.OEConfigureFPDatabaseOptions(itf, defopts)
    oegraphsim.OEConfigureFingerPrint(
        itf, oegraphsim.OEGetFPType(oegraphsim.OEFPType_Tree))

    if not oechem.OEParseCommandLine(itf, argv):
        return 0

    qfname = itf.GetString("-query")
    mfname = itf.GetString("-molfname")
    ofname = itf.GetString("-out")

    # initialize databases

    timer = oechem.OEWallTimer()
    timer.Start()

    ifs = oechem.oemolistream()
    if not ifs.open(qfname):
        oechem.OEThrow.Fatal("Cannot open input file!")

    query = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(ifs, query):
        oechem.OEThrow.Fatal("Cannot read query molecule!")

    moldb = oechem.OEMolDatabase()
    if not moldb.Open(mfname):
        oechem.OEThrow.Fatal("Cannot open molecule database!")

    ofs = oechem.oemolostream()
    if not ofs.open(ofname):
        oechem.OEThrow.Fatal("Cannot open output file!")

    fptype = oegraphsim.OESetupFingerPrint(itf)
    oechem.OEThrow.Info("Using fingerprint type %s" % fptype.GetFPTypeString())
    fpdb = oegraphsim.OEFPDatabase(fptype)

    emptyfp = oegraphsim.OEFingerPrint()
    emptyfp.SetFPTypeBase(fptype)

    nrmols = moldb.GetMaxMolIdx()

    mol = oechem.OEGraphMol()
    for idx in range(0, nrmols):
        if moldb.GetMolecule(mol, idx):
            fpdb.AddFP(mol)
        else:
            fpdb.AddFP(emptyfp)

    nrfps = fpdb.NumFingerPrints()
    oechem.OEThrow.Info("%5.2f sec to initialize databases" % timer.Elapsed())

    opts = oegraphsim.OEFPDatabaseOptions()
    oegraphsim.OESetupFPDatabaseOptions(opts, itf)

    # search fingerprint database

    timer.Start()
    scores = fpdb.GetSortedScores(query, opts)
    oechem.OEThrow.Info("%5.2f sec to search %d fingerprints" %
                        (timer.Elapsed(), nrfps))

    timer.Start()
    hit = oechem.OEGraphMol()
    for si in scores:
        if moldb.GetMolecule(hit, si.GetIdx()):
            oechem.OEWriteMolecule(ofs, hit)
    oechem.OEThrow.Info("%5.2f sec to write %d hits" %
                        (timer.Elapsed(), opts.GetLimit()))

    return 0