Beispiel #1
0
    def getCCDefObj(self, dataContainer, molBuildType="model-xyz", suppressHydrogens=False):
        """Build OE molecule from the input chemical component definition object."""
        #
        oemf = OeMoleculeFactory()
        if not self.__verbose:
            oemf.setQuiet()
        ccId = oemf.setChemCompDef(dataContainer)
        oemf.build(molBuildType=molBuildType)

        if self.__verbose:
            logger.info("  CCId               = %s", ccId)
            logger.info("  Title              = %s", oemf.getTitle())
            logger.info("  SMILES             = %s", oemf.getCanSMILES())
            logger.info("  SMILES (stereo)    = %s", oemf.getIsoSMILES())
            logger.info("  Formula (Hill)     = %s", oemf.getFormula())
            logger.info("  InChI key          = %s", oemf.getInChIKey())
            logger.info("  InChI              = %s", oemf.getInChI())

        fD = {}
        fD = {"Formula": oemf.getFormula(), "SMILES": oemf.getCanSMILES(), "SMILES_STEREO": oemf.getIsoSMILES(), "InChI": oemf.getInChI(), "InChIKey": oemf.getInChIKey()}

        if suppressHydrogens:
            tMol = oemf.getGraphMolSuppressH()
        else:
            tMol = oemf.getMol()

        fD["OEMOL"] = tMol
        fD["xyz"] = oemf.getAtomDetails(xyzType="model")

        return (ccId, tMol, fD)
Beispiel #2
0
    def getCCDefFile(self, ccFilePath, molBuildType="model-xyz", suppressHydrogens=False):
        """Fetch the molecule definition (ccPath) and build OE molecules
        for comparison.

        """
        #
        mU = MarshalUtil(workPath=self.__workPath)
        rdCcObjL = mU.doImport(ccFilePath, fmt="mmcif")
        oemf = OeMoleculeFactory()
        if not self.__verbose:
            oemf.setQuiet()
        ccId = oemf.setChemCompDef(rdCcObjL[0])
        oemf.build(molBuildType=molBuildType)

        if self.__verbose:
            logger.info("  CCId               = %s", ccId)
            logger.info("  Title              = %s", oemf.getTitle())
            logger.info("  SMILES             = %s", oemf.getCanSMILES())
            logger.info("  SMILES (stereo)    = %s", oemf.getIsoSMILES())
            logger.info("  Formula (Hill)     = %s", oemf.getFormula())
            logger.info("  InChI key          = %s", oemf.getInChIKey())
            logger.info("  InChI              = %s", oemf.getInChI())

        fD = {}
        fD = {"Formula": oemf.getFormula(), "SMILES": oemf.getCanSMILES(), "SMILES_STEREO": oemf.getIsoSMILES(), "InChI": oemf.getInChI(), "InChIKey": oemf.getInChIKey()}

        if suppressHydrogens:
            tMol = oemf.getGraphMolSuppressH()
        else:
            tMol = oemf.getMol()

        fD["OEMOL"] = tMol
        fD["xyz"] = oemf.getAtomDetails(xyzType="model")

        return (ccId, tMol, fD)
    def testCompareByBuildType(self):
        """Compare depictions constructed molecules with various builds from chemical defintions -
        all build types 8769 (all)
        connect - smiles 6743
        model vs iso smiles 5937
        ideal va iso smiles  7047
        """
        doDepict = False
        ccResultD = {}
        genResultD = {}
        smilesByBuildTypeD = {}
        try:
            ccMolD, ccIdxD = self.__getChemCompDefs()
            #
            limitPerceptions = True
            # molBuildTypeL = ["model-xyz", "ideal-xyz", "connection-table", "oe-iso-smiles"]
            molBuildTypeL = ["ideal-xyz", "oe-iso-smiles"]
            #
            startTime = time.time()
            oefm = OeMoleculeFactory()
            oefm.setQuiet()
            for molBuildType in molBuildTypeL:
                for ccId, idxD in ccIdxD.items():
                    ccObj = ccMolD[ccId]
                    # ----
                    ccIsoSmiles = idxD["oe-iso-smiles"]
                    ccSmiles = idxD["oe-smiles"]
                    # ----
                    tId = oefm.setChemCompDef(ccObj)
                    if not tId:
                        logger.info("Skipping bad component %r", ccId)
                        continue
                    self.assertEqual(tId, ccId)
                    ok = oefm.build(molBuildType=molBuildType,
                                    limitPerceptions=limitPerceptions)
                    if not ok:
                        logger.info("Build using %r failed for %s",
                                    molBuildType, ccId)
                        continue
                    # ------
                    oeMol = oefm.getGraphMol()
                    oeIsoSmiles = oefm.getIsoSMILES()
                    oeSmiles = oefm.getCanSMILES()
                    ccEq = oeIsoSmiles == ccIsoSmiles and oeSmiles == ccSmiles
                    #
                    oefmR = OeMoleculeFactory()
                    oefmR.setQuiet()
                    ccIdGen = ccId + "_gen"
                    oefmR.setDescriptor(oeIsoSmiles, "oe-iso-smiles", ccIdGen)
                    ok = oefmR.build(molBuildType="oe-iso-smiles",
                                     limitPerceptions=limitPerceptions)
                    if not ok:
                        logger.info("Build using %r failed for %s",
                                    molBuildType, ccIdGen)
                        continue
                    # ------
                    #
                    # oeMolGen = oefmR.getGraphMol()
                    oeIsoSmilesGen = oefmR.getIsoSMILES()
                    oeSmilesGen = oefmR.getCanSMILES()
                    genEq = oeIsoSmiles == oeIsoSmilesGen and oeSmiles == oeSmilesGen
                    smilesByBuildTypeD.setdefault(ccId, {}).setdefault(
                        molBuildType, []).append(oeIsoSmilesGen)
                    #
                    logger.debug("%s buildType %s ccEq %r genEq %r", ccId,
                                 molBuildType, ccEq, genEq)
                    if not ccEq:
                        ccResultD.setdefault(molBuildType, []).append(ccId)
                    if not genEq:
                        genResultD.setdefault(molBuildType, []).append(ccId)

                    if doDepict:
                        pS = "-limited" if limitPerceptions else ""
                        imagePath = os.path.join(
                            self.__workPath,
                            ccId + "-%s%s.svg" % (molBuildType, pS))
                        oed = OeDepict()
                        title = ""
                        oed.setMolTitleList([(ccId, oeMol, title)])
                        oed.setDisplayOptions(labelAtomName=False,
                                              labelAtomCIPStereo=True,
                                              labelAtomIndex=False,
                                              labelBondIndex=False,
                                              cellBorders=False,
                                              bondDisplayWidth=0.5)
                        oed.setGridOptions(rows=1, cols=1)
                        oed.prepare()
                        oed.write(imagePath)
            logger.info(
                "Completed comparing %d molecules in %d builds (%.4f seconds)",
                len(ccIdxD), len(molBuildTypeL),
                time.time() - startTime)
            #
            #
            for molBuildType in molBuildTypeL:
                if molBuildType in genResultD:
                    logger.info("GEN %s (%d) %r", molBuildType,
                                len(genResultD[molBuildType]),
                                genResultD[molBuildType])

            numDiff = 0
            for ccId, btD in smilesByBuildTypeD.items():
                tS = set()
                for molBuildType, sL in btD.items():
                    tS.add(sL[0])
                if len(tS) > 1:
                    numDiff += 1
                    logger.debug("%s diff smiles (%d) %r", ccId, len(tS), tS)
            logger.info("Components with inconsistent SMILES %d", numDiff)
            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
    def __testReproduceDescriptors(self, molBuildType, limitPerceptions=True):
        #
        ccMolD, ccIdxD = self.__getChemCompDefs()
        oemf = OeMoleculeFactory()
        countD = defaultdict(int)
        for ccId, ccDef in ccMolD.items():
            tId = oemf.setChemCompDef(ccDef)
            if ccId != tId:
                continue
            oemf.build(molBuildType=molBuildType,
                       limitPerceptions=limitPerceptions)
            oeMol = oemf.getMol()
            #
            countD["total components"] += 1
            if ccId not in ccIdxD:
                logger.info("Missing ccIndex entry for %s", ccId)
                continue
            ccdD = ccIdxD[ccId]
            if ccdD["ambiguous"]:
                countD["ambiguous component"] += 1
                continue
            #
            countD["total molecules"] += 1

            nativeCanIsoSmiles = oechem.OECreateIsoSmiString(oeMol)
            canIsoSmiles = oechem.OEMolToSmiles(oeMol)
            isoSmiles = oemf.getIsoSMILES()
            canSmiles = oemf.getCanSMILES()
            # check interal consistency
            if nativeCanIsoSmiles != isoSmiles:
                logger.error("%s stored and calculated OE smiles differ %s %s",
                             ccId, nativeCanIsoSmiles, isoSmiles)
            if canIsoSmiles != isoSmiles:
                logger.error(
                    "%s calculated OE ISO and canonical smiles differ %s %s",
                    ccId, isoSmiles, canIsoSmiles)

            # compare with archived values
            if isoSmiles != ccdD["oe-iso-smiles"]:
                logger.info("%s ISO SMILES differ \nccd: %r  \nOE:  %r", ccId,
                            ccdD["oe-iso-smiles"], isoSmiles)
                countD["iso_smiles_diff"] += 1
            # ----------
            if canSmiles != ccdD["oe-smiles"]:
                logger.info("%s CAN SMILES differ \nccd: %r  \nOE:  %r", ccId,
                            ccdD["oe-smiles"], canSmiles)
                countD["smiles_diff"] += 1

            formula = oemf.getFormula()
            if formula.upper() != ccdD["formula"].upper():
                logger.debug("%s formulas differ \nccd: %r  \nOE:  %r", ccId,
                             ccdD["formula"], formula)
                countD["formula_diff"] += 1
            # ---------
            inchiKey = oemf.getInChIKey()
            if inchiKey != ccdD["inchikey"]:
                logger.debug("%s InChI keys differ \nccd: %r  \nOE:  %r", ccId,
                             ccdD["inchikey"], inchiKey)
                countD["inchikey_diff"] += 1
            #
            inchi = oemf.getInChI()
            if inchi != ccdD["inchi"]:
                logger.debug("%s InChIs differ \nccd: %r  \nOE:  %r", ccId,
                             ccdD["inchi"], inchi)
                countD["inchi_diff"] += 1
        #
        #
        for ky, vl in countD.items():
            logger.info("%-12s %6d", ky, vl)
    def testRoundTrip(self):
        """Round trip smiles comparisons -"""
        try:
            ccMolD = self.__getChemCompDefs()
            # useCache = True
            # quietFlag = False
            # molBuildTypeL = ["model-xyz", "ideal-xyz", None]
            # molBuildTypeL = [None]
            buildTypeRef = "oe-iso-smiles"
            oemf1 = OeMoleculeFactory()
            oemf2 = OeMoleculeFactory()
            #
            for ccId, ccObj in ccMolD.items():
                # ----
                ccIt = iter(PdbxChemCompIt(ccObj))
                cc = next(ccIt)
                formula = cc.getFormulaWithCharge()
                # ccId = cc.getId()
                ccName = cc.getName()
                ifCharge = cc.getFormalChargeAsInt()
                isAmbiguous = cc.getAmbiguousFlag() in ["Y", "y"]
                isCurrent = cc.getReleaseStatus() in ["REL"]
                logger.debug("%s name %r formula %r charge %d", ccId, ccName,
                             formula, ifCharge)
                # ----
                ccId = oemf1.setChemCompDef(ccObj)
                ok = oemf1.build(molBuildType=buildTypeRef,
                                 limitPerceptions=False)
                if not ok:
                    logger.info(
                        "Build using %r failed for %s (ambiguous flag %r current %r)",
                        buildTypeRef, ccId, isAmbiguous, isCurrent)
                #
                isDiff = False
                #
                if isDiff:
                    genIsoSmi = oemf1.getCanSMILES()
                    oemf2 = OeMoleculeFactory()
                    oemf2.setDescriptor(genIsoSmi, "oe-iso-smiles", ccId)
                    oemf2.build(molBuildType="oe-iso-smiles",
                                limitPerceptions=False)
                    regenIsoSmi = oemf2.getIsoSMILES()
                    if genIsoSmi != regenIsoSmi:
                        logger.info(
                            "%s  regenerated ISOSMILES differ \n -- INP: %s\n -- OUT: %s",
                            ccId, genIsoSmi, regenIsoSmi)

                    oed = OeDepictMCSAlignPage()
                    oed.setDisplayOptions(
                        labelAtomName=True,
                        labelAtomCIPStereo=True,
                        labelAtomIndex=False,
                        labelBondIndex=False,
                        labelBondCIPStereo=True,
                        highlightStyleFit="ballAndStickInverse",
                        highLightNotMatchColorRef="pink",
                        bondDisplayWidth=0.5,
                    )
                    oed.setRefMol(oemf1.getGraphMol(), ccId)
                    oed.setFitMol(oemf2.getGraphMol(), ccId)
                    imgPath = os.path.join(
                        self.__workPath,
                        "compare-assigned-" + ccId + "-calc-" + ccId + ".svg")
                    logger.info("Using image path %r", imgPath)
                    aML = oed.alignPair(imagePath=imgPath)
                    if aML:
                        for (rCC, rAt, tCC, tAt) in aML:
                            logger.info("%5s %-5s %5s %-5s", rCC, rAt, tCC,
                                        tAt)
                else:
                    logger.debug("%s matched all cases", ccId)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Beispiel #6
0
    def __getMiscFile(self, filePath, suppressHydrogens=False, importType="2D", title=None, largestPart=False):
        """Fetch a miscellaneous chemical file (ccPath) and build OE molecules
        for comparison.

        """
        try:
            oeioU = OeIoUtils()
            oeMolL = oeioU.fileToMols(filePath, use3D=importType == "3D", largestPart=largestPart)
            logger.info("Read (%d) from %s ", len(oeMolL), filePath)
            oeMol = oeMolL[0]

            ccId = title if title else oeMol.GetTitle()
            if title:
                oeMol.SetTitle(ccId)
            #
            oemf = OeMoleculeFactory()
            if not self.__verbose:
                oemf.setQuiet()
            oemf.setOeMol(oeMol, ccId)
            #
            fD = oemf.getOeMoleculeFeatures()
            if self.__verbose:
                logger.info("  Title              = %s", title)
                logger.info("  Title OEMF         = %s", oemf.getTitle())
                logger.info("  SMILES             = %s", oemf.getCanSMILES())
                logger.info("  SMILES (stereo)    = %s", oemf.getIsoSMILES())
                logger.info("  Formula (Hill)     = %s", oemf.getFormula())
                logger.info("  InChI key          = %s", oemf.getInChIKey())
                logger.info("  InChI              = %s", oemf.getInChI())
            #
            ccId = oemf.getTitle()
            if suppressHydrogens:
                tMol = oemf.getGraphMolSuppressH()
            else:
                tMol = oemf.getMol()

            molXyzL = []
            if importType == "3D":
                for atm in tMol.GetAtoms():
                    xyzL = oechem.OEFloatArray(3)
                    tMol.GetCoords(atm, xyzL)
                    molXyzL.append(
                        ComponentAtomDetails(
                            atIdx=atm.GetIdx(),
                            atNo=atm.GetAtomicNum(),
                            atName=atm.GetName(),
                            atType=atm.GetType(),
                            x=xyzL[0],
                            y=xyzL[1],
                            z=xyzL[2],
                            atFormalCharge=atm.GetFormalCharge(),
                        )
                    )
            fD = {}
            fD = {
                "Formula": oemf.getFormula(),
                "SMILES": oemf.getCanSMILES(),
                "SMILES_STEREO": oemf.getIsoSMILES(),
                "InChI": oemf.getInChI(),
                "InChIKey": oemf.getInChIKey(),
                "xyz": molXyzL,
            }
            for atm in tMol.GetAtoms():
                xyzL = oechem.OEFloatArray(3)
                tMol.GetCoords(atm, xyzL)
                if self.__verbose:
                    logger.debug("atom  %s %s %s %s %r", atm.GetIdx(), atm.GetAtomicNum(), atm.GetName(), atm.GetType(), xyzL)

            fD["OEMOL"] = tMol
            return (ccId, tMol, fD)
        except Exception as e:
            logger.exception("Failing with %s", str(e))

        return None, None, None