예제 #1
0
 def __displayAlignedDescriptorPair(self,
                                    ccId,
                                    descrRef,
                                    buildTypeRef,
                                    descrFit,
                                    buildTypeFit,
                                    title=None,
                                    limitPerceptions=True):
     oemfRef = OeMoleculeFactory()
     oemfRef.setDescriptor(descrRef, buildTypeRef, ccId)
     oemfRef.build(molBuildType=buildTypeRef,
                   limitPerceptions=limitPerceptions)
     oeMolRef = oemfRef.getMol()
     #
     oemfFit = OeMoleculeFactory()
     oemfFit.setDescriptor(descrFit, buildTypeFit, ccId)
     oemfFit.build(molBuildType=buildTypeFit,
                   limitPerceptions=limitPerceptions)
     oeMolFit = oemfFit.getMol()
     #
     oed = OeDepictMCSAlignPage()
     oed.setSearchType(sType="graph-relaxed", minAtomMatchFraction=0.50)
     oed.setDisplayOptions(labelAtomName=True,
                           labelAtomCIPStereo=True,
                           labelAtomIndex=False,
                           labelBondIndex=False,
                           highlightStyleFit="ballAndStickInverse",
                           bondDisplayWidth=0.5)
     oed.setRefMol(oeMolRef, ccId)
     oed.setFitMol(oeMolFit, ccId)
     myTitle = title if title else buildTypeRef + "-" + buildTypeFit
     imgPath = os.path.join(self.__workPath, myTitle + "-" + ccId + ".svg")
     logger.info("Using image path %r", imgPath)
     aML = oed.alignPair(imagePath=imgPath)
     if aML:
         logger.info("%s aligned image path %r", ccId, imgPath)
         for (rCC, rAt, tCC, tAt) in aML:
             logger.debug("%5s %-5s %5s %-5s", rCC, rAt, tCC, tAt)
예제 #2
0
    def buildOeBinaryMolCacheFromIndex(self,
                                       filePath,
                                       ccIdxD,
                                       quietFlag=False,
                                       fpTypeList=None,
                                       limitPerceptions=False,
                                       suppressHydrogens=False):
        """Build cache of OEGraphMol() objects from the input chemical component search index.

        Args:
            filePath (str): output cache file path
            ccIdxD (dict): search index dictionary
            quietFlag (bool, optional): suppress OE output. Defaults to False.
            fpTypeList (list, optional): list of fingerprint types. Defaults to None.
            limitPerceptions (bool, optional): suppress automatic chemical perceptions. Defaults to False.
            suppressHydrogens (bool, optional): suppress explicit hydrogen count. Defaults to False.

        Returns:
            (int, int, list): chem comp success count, error count, chem comp identifier failure list
        """
        failIdList = []
        ccCount = 0
        errCount = 0
        startTime = time.time()
        try:
            ofs = oechem.oemolostream()
            ofs.SetFormat(oechem.OEFormat_OEB)
            if ofs.open(filePath):
                oemf = OeMoleculeFactory()
                if quietFlag:
                    oemf.setQuiet()
                for searchCcId, ccIdx in ccIdxD.items():
                    oemf.setDescriptor(ccIdx["smiles"], "oe-iso-smiles",
                                       searchCcId)
                    ok = oemf.build(molBuildType="oe-iso-smiles",
                                    limitPerceptions=limitPerceptions)
                    if ok and fpTypeList:
                        fpOk = oemf.addFingerPrints(fpTypeList)
                        if not fpOk:
                            logger.info("Fingerprint generation fails for %r",
                                        searchCcId)
                    if ok:
                        if not suppressHydrogens:
                            oemf.addExplicitHydrogens()
                            oemf.setSimpleAtomNames()
                        oeMol = oemf.getMol(
                            suppressHydrogens=suppressHydrogens)
                        oechem.OEWriteMolecule(ofs, oeMol)
                        ccCount += 1
                    if not ok:
                        # build failed incomplete component (e.g. missing atoms or bonds)
                        errCount += 1
                        failIdList.append(searchCcId)
            else:
                logger.error("Unable to open cache database %s", filePath)
                errCount += 1
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        #
        endTime = time.time()
        logger.info("Completed operation at %s (%.4f seconds)",
                    time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
                    endTime - startTime)
        return ccCount, errCount, failIdList
    def testCompareByBuildType(self):
        """Compare depictions constructed molecules with various builds from chemical defintions -
        all build types 8769 (all)
        connect - smiles 6743
        model vs iso smiles 5937
        ideal va iso smiles  7047
        """
        doDepict = False
        ccResultD = {}
        genResultD = {}
        smilesByBuildTypeD = {}
        try:
            ccMolD, ccIdxD = self.__getChemCompDefs()
            #
            limitPerceptions = True
            # molBuildTypeL = ["model-xyz", "ideal-xyz", "connection-table", "oe-iso-smiles"]
            molBuildTypeL = ["ideal-xyz", "oe-iso-smiles"]
            #
            startTime = time.time()
            oefm = OeMoleculeFactory()
            oefm.setQuiet()
            for molBuildType in molBuildTypeL:
                for ccId, idxD in ccIdxD.items():
                    ccObj = ccMolD[ccId]
                    # ----
                    ccIsoSmiles = idxD["oe-iso-smiles"]
                    ccSmiles = idxD["oe-smiles"]
                    # ----
                    tId = oefm.setChemCompDef(ccObj)
                    if not tId:
                        logger.info("Skipping bad component %r", ccId)
                        continue
                    self.assertEqual(tId, ccId)
                    ok = oefm.build(molBuildType=molBuildType,
                                    limitPerceptions=limitPerceptions)
                    if not ok:
                        logger.info("Build using %r failed for %s",
                                    molBuildType, ccId)
                        continue
                    # ------
                    oeMol = oefm.getGraphMol()
                    oeIsoSmiles = oefm.getIsoSMILES()
                    oeSmiles = oefm.getCanSMILES()
                    ccEq = oeIsoSmiles == ccIsoSmiles and oeSmiles == ccSmiles
                    #
                    oefmR = OeMoleculeFactory()
                    oefmR.setQuiet()
                    ccIdGen = ccId + "_gen"
                    oefmR.setDescriptor(oeIsoSmiles, "oe-iso-smiles", ccIdGen)
                    ok = oefmR.build(molBuildType="oe-iso-smiles",
                                     limitPerceptions=limitPerceptions)
                    if not ok:
                        logger.info("Build using %r failed for %s",
                                    molBuildType, ccIdGen)
                        continue
                    # ------
                    #
                    # oeMolGen = oefmR.getGraphMol()
                    oeIsoSmilesGen = oefmR.getIsoSMILES()
                    oeSmilesGen = oefmR.getCanSMILES()
                    genEq = oeIsoSmiles == oeIsoSmilesGen and oeSmiles == oeSmilesGen
                    smilesByBuildTypeD.setdefault(ccId, {}).setdefault(
                        molBuildType, []).append(oeIsoSmilesGen)
                    #
                    logger.debug("%s buildType %s ccEq %r genEq %r", ccId,
                                 molBuildType, ccEq, genEq)
                    if not ccEq:
                        ccResultD.setdefault(molBuildType, []).append(ccId)
                    if not genEq:
                        genResultD.setdefault(molBuildType, []).append(ccId)

                    if doDepict:
                        pS = "-limited" if limitPerceptions else ""
                        imagePath = os.path.join(
                            self.__workPath,
                            ccId + "-%s%s.svg" % (molBuildType, pS))
                        oed = OeDepict()
                        title = ""
                        oed.setMolTitleList([(ccId, oeMol, title)])
                        oed.setDisplayOptions(labelAtomName=False,
                                              labelAtomCIPStereo=True,
                                              labelAtomIndex=False,
                                              labelBondIndex=False,
                                              cellBorders=False,
                                              bondDisplayWidth=0.5)
                        oed.setGridOptions(rows=1, cols=1)
                        oed.prepare()
                        oed.write(imagePath)
            logger.info(
                "Completed comparing %d molecules in %d builds (%.4f seconds)",
                len(ccIdxD), len(molBuildTypeL),
                time.time() - startTime)
            #
            #
            for molBuildType in molBuildTypeL:
                if molBuildType in genResultD:
                    logger.info("GEN %s (%d) %r", molBuildType,
                                len(genResultD[molBuildType]),
                                genResultD[molBuildType])

            numDiff = 0
            for ccId, btD in smilesByBuildTypeD.items():
                tS = set()
                for molBuildType, sL in btD.items():
                    tS.add(sL[0])
                if len(tS) > 1:
                    numDiff += 1
                    logger.debug("%s diff smiles (%d) %r", ccId, len(tS), tS)
            logger.info("Components with inconsistent SMILES %d", numDiff)
            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
    def testRoundTrip(self):
        """Round trip smiles comparisons -"""
        try:
            ccMolD = self.__getChemCompDefs()
            # useCache = True
            # quietFlag = False
            # molBuildTypeL = ["model-xyz", "ideal-xyz", None]
            # molBuildTypeL = [None]
            buildTypeRef = "oe-iso-smiles"
            oemf1 = OeMoleculeFactory()
            oemf2 = OeMoleculeFactory()
            #
            for ccId, ccObj in ccMolD.items():
                # ----
                ccIt = iter(PdbxChemCompIt(ccObj))
                cc = next(ccIt)
                formula = cc.getFormulaWithCharge()
                # ccId = cc.getId()
                ccName = cc.getName()
                ifCharge = cc.getFormalChargeAsInt()
                isAmbiguous = cc.getAmbiguousFlag() in ["Y", "y"]
                isCurrent = cc.getReleaseStatus() in ["REL"]
                logger.debug("%s name %r formula %r charge %d", ccId, ccName,
                             formula, ifCharge)
                # ----
                ccId = oemf1.setChemCompDef(ccObj)
                ok = oemf1.build(molBuildType=buildTypeRef,
                                 limitPerceptions=False)
                if not ok:
                    logger.info(
                        "Build using %r failed for %s (ambiguous flag %r current %r)",
                        buildTypeRef, ccId, isAmbiguous, isCurrent)
                #
                isDiff = False
                #
                if isDiff:
                    genIsoSmi = oemf1.getCanSMILES()
                    oemf2 = OeMoleculeFactory()
                    oemf2.setDescriptor(genIsoSmi, "oe-iso-smiles", ccId)
                    oemf2.build(molBuildType="oe-iso-smiles",
                                limitPerceptions=False)
                    regenIsoSmi = oemf2.getIsoSMILES()
                    if genIsoSmi != regenIsoSmi:
                        logger.info(
                            "%s  regenerated ISOSMILES differ \n -- INP: %s\n -- OUT: %s",
                            ccId, genIsoSmi, regenIsoSmi)

                    oed = OeDepictMCSAlignPage()
                    oed.setDisplayOptions(
                        labelAtomName=True,
                        labelAtomCIPStereo=True,
                        labelAtomIndex=False,
                        labelBondIndex=False,
                        labelBondCIPStereo=True,
                        highlightStyleFit="ballAndStickInverse",
                        highLightNotMatchColorRef="pink",
                        bondDisplayWidth=0.5,
                    )
                    oed.setRefMol(oemf1.getGraphMol(), ccId)
                    oed.setFitMol(oemf2.getGraphMol(), ccId)
                    imgPath = os.path.join(
                        self.__workPath,
                        "compare-assigned-" + ccId + "-calc-" + ccId + ".svg")
                    logger.info("Using image path %r", imgPath)
                    aML = oed.alignPair(imagePath=imgPath)
                    if aML:
                        for (rCC, rAt, tCC, tAt) in aML:
                            logger.info("%5s %-5s %5s %-5s", rCC, rAt, tCC,
                                        tAt)
                else:
                    logger.debug("%s matched all cases", ccId)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
예제 #5
0
    def testSssWithFingerPrintFromDescriptor(self):
        oemp = OeMoleculeProvider(**self.__myKwargs)
        ok = oemp.testCache()
        ccmP = ChemCompIndexProvider(**self.__myKwargs)
        ccIdxD = ccmP.getIndex()
        ok = ccmP.testCache(minCount=self.__minCount)
        self.assertTrue(ok)
        limitPerceptions = False
        # minFpScore = 0.5
        maxFpResults = 50
        matchOpts = "graph-relaxed"
        numMols = 20
        oeioU = OeIoUtils()
        oesU = OeSearchUtils(oemp, fpTypeList=self.__fpTypeList)
        missTupL = []
        missedD = {}
        missedFpD = {}
        # ----
        startTime = time.time()
        for ccId, ccD in list(ccIdxD.items())[:numMols]:
            for buildType in [
                    "oe-iso-smiles", "oe-smiles", "acdlabs-smiles",
                    "cactvs-iso-smiles", "cactvs-smiles", "inchi"
            ]:
                if buildType in ccD:
                    logger.debug("Search %s %r", ccId, ccD[buildType])
                    if buildType in ["inchi"]:
                        oemf = OeMoleculeFactory()
                        oemf.setDescriptor(ccD["inchi"], "inchi", ccId)
                        ok = oemf.build(molBuildType="inchi",
                                        limitPerceptions=limitPerceptions)
                        if not ok:
                            logger.info("%s build failed with InChI %r", ccId,
                                        ccD["inchi"])
                        else:
                            oeMol = oemf.getMol()
                            if oemf.getInChI() != ccD["inchi"]:
                                logger.info(
                                    "%s regenerated InChI differs\n%r\n%s",
                                    ccId, ccD["inchi"], oemf.getInChI())
                    else:
                        oeMol = oeioU.smilesToMol(
                            ccD[buildType], limitPerceptions=limitPerceptions)
                    if not oeMol:
                        continue
                    maxHits = 0
                    minHits = maxFpResults
                    selfHit = False
                    for fpType, minFpScore in self.__fpTypeCuttoffList:
                        retStatus, mL = oesU.searchSubStructureWithFingerPrint(
                            oeMol,
                            fpType,
                            minFpScore,
                            maxFpResults,
                            matchOpts=matchOpts)
                        self.assertTrue(retStatus)
                        logger.debug("%s fpType %r hits %d", ccId, fpType,
                                     len(mL))
                        maxHits = max(maxHits, len(mL))
                        minHits = min(minHits, len(mL))
                        matchedSelf = self.__resultContains(ccId, mL)
                        selfHit = selfHit or matchedSelf
                        if not matchedSelf:
                            missedFpD.setdefault(ccId, []).append(
                                (buildType, fpType, len(mL)))
                    if not selfHit:
                        missedD.setdefault(ccId, []).append(buildType)

                    logger.info("%s (%r) buildType %r min hits %d max hits %d",
                                ccId, selfHit, buildType, minHits, maxHits)
                else:
                    logger.info("%s missing descriptor %r", ccId, buildType)
        #
        for ccId, missL in missedD.items():
            logger.info("%s missed list %r", ccId, missL)
            if ccId in missedFpD:
                logger.info("%s unmatched for fpTypes %r", ccId,
                            missedFpD[ccId])
        # ----
        doDepict = False
        if doDepict:
            mD = {}
            for missTup in missTupL:
                mD.setdefault(missTup[0], []).append(missTup[1])

            for ccId, buildTypeL in mD.items():
                idxD = ccIdxD[ccId]
                if "oe-iso-smiles" in idxD:
                    for buildType in buildTypeL:
                        self.__displayAlignedDescriptorPair(
                            ccId,
                            idxD["oe-iso-smiles"],
                            "oe-iso-smiles",
                            idxD[buildType],
                            buildType,
                            title=None,
                            limitPerceptions=True)

        logger.info("%s fingerprints search on %d in (%.4f seconds)",
                    len(self.__fpTypeList), numMols,
                    time.time() - startTime)