def __displayAlignedDescriptorPair(self, ccId, descrRef, buildTypeRef, descrFit, buildTypeFit, title=None, limitPerceptions=True): oemfRef = OeMoleculeFactory() oemfRef.setDescriptor(descrRef, buildTypeRef, ccId) oemfRef.build(molBuildType=buildTypeRef, limitPerceptions=limitPerceptions) oeMolRef = oemfRef.getMol() # oemfFit = OeMoleculeFactory() oemfFit.setDescriptor(descrFit, buildTypeFit, ccId) oemfFit.build(molBuildType=buildTypeFit, limitPerceptions=limitPerceptions) oeMolFit = oemfFit.getMol() # oed = OeDepictMCSAlignPage() oed.setSearchType(sType="graph-relaxed", minAtomMatchFraction=0.50) oed.setDisplayOptions(labelAtomName=True, labelAtomCIPStereo=True, labelAtomIndex=False, labelBondIndex=False, highlightStyleFit="ballAndStickInverse", bondDisplayWidth=0.5) oed.setRefMol(oeMolRef, ccId) oed.setFitMol(oeMolFit, ccId) myTitle = title if title else buildTypeRef + "-" + buildTypeFit imgPath = os.path.join(self.__workPath, myTitle + "-" + ccId + ".svg") logger.info("Using image path %r", imgPath) aML = oed.alignPair(imagePath=imgPath) if aML: logger.info("%s aligned image path %r", ccId, imgPath) for (rCC, rAt, tCC, tAt) in aML: logger.debug("%5s %-5s %5s %-5s", rCC, rAt, tCC, tAt)
def buildOeBinaryMolCacheFromIndex(self, filePath, ccIdxD, quietFlag=False, fpTypeList=None, limitPerceptions=False, suppressHydrogens=False): """Build cache of OEGraphMol() objects from the input chemical component search index. Args: filePath (str): output cache file path ccIdxD (dict): search index dictionary quietFlag (bool, optional): suppress OE output. Defaults to False. fpTypeList (list, optional): list of fingerprint types. Defaults to None. limitPerceptions (bool, optional): suppress automatic chemical perceptions. Defaults to False. suppressHydrogens (bool, optional): suppress explicit hydrogen count. Defaults to False. Returns: (int, int, list): chem comp success count, error count, chem comp identifier failure list """ failIdList = [] ccCount = 0 errCount = 0 startTime = time.time() try: ofs = oechem.oemolostream() ofs.SetFormat(oechem.OEFormat_OEB) if ofs.open(filePath): oemf = OeMoleculeFactory() if quietFlag: oemf.setQuiet() for searchCcId, ccIdx in ccIdxD.items(): oemf.setDescriptor(ccIdx["smiles"], "oe-iso-smiles", searchCcId) ok = oemf.build(molBuildType="oe-iso-smiles", limitPerceptions=limitPerceptions) if ok and fpTypeList: fpOk = oemf.addFingerPrints(fpTypeList) if not fpOk: logger.info("Fingerprint generation fails for %r", searchCcId) if ok: if not suppressHydrogens: oemf.addExplicitHydrogens() oemf.setSimpleAtomNames() oeMol = oemf.getMol( suppressHydrogens=suppressHydrogens) oechem.OEWriteMolecule(ofs, oeMol) ccCount += 1 if not ok: # build failed incomplete component (e.g. missing atoms or bonds) errCount += 1 failIdList.append(searchCcId) else: logger.error("Unable to open cache database %s", filePath) errCount += 1 except Exception as e: logger.exception("Failing with %s", str(e)) # endTime = time.time() logger.info("Completed operation at %s (%.4f seconds)", time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - startTime) return ccCount, errCount, failIdList
def testCompareByBuildType(self): """Compare depictions constructed molecules with various builds from chemical defintions - all build types 8769 (all) connect - smiles 6743 model vs iso smiles 5937 ideal va iso smiles 7047 """ doDepict = False ccResultD = {} genResultD = {} smilesByBuildTypeD = {} try: ccMolD, ccIdxD = self.__getChemCompDefs() # limitPerceptions = True # molBuildTypeL = ["model-xyz", "ideal-xyz", "connection-table", "oe-iso-smiles"] molBuildTypeL = ["ideal-xyz", "oe-iso-smiles"] # startTime = time.time() oefm = OeMoleculeFactory() oefm.setQuiet() for molBuildType in molBuildTypeL: for ccId, idxD in ccIdxD.items(): ccObj = ccMolD[ccId] # ---- ccIsoSmiles = idxD["oe-iso-smiles"] ccSmiles = idxD["oe-smiles"] # ---- tId = oefm.setChemCompDef(ccObj) if not tId: logger.info("Skipping bad component %r", ccId) continue self.assertEqual(tId, ccId) ok = oefm.build(molBuildType=molBuildType, limitPerceptions=limitPerceptions) if not ok: logger.info("Build using %r failed for %s", molBuildType, ccId) continue # ------ oeMol = oefm.getGraphMol() oeIsoSmiles = oefm.getIsoSMILES() oeSmiles = oefm.getCanSMILES() ccEq = oeIsoSmiles == ccIsoSmiles and oeSmiles == ccSmiles # oefmR = OeMoleculeFactory() oefmR.setQuiet() ccIdGen = ccId + "_gen" oefmR.setDescriptor(oeIsoSmiles, "oe-iso-smiles", ccIdGen) ok = oefmR.build(molBuildType="oe-iso-smiles", limitPerceptions=limitPerceptions) if not ok: logger.info("Build using %r failed for %s", molBuildType, ccIdGen) continue # ------ # # oeMolGen = oefmR.getGraphMol() oeIsoSmilesGen = oefmR.getIsoSMILES() oeSmilesGen = oefmR.getCanSMILES() genEq = oeIsoSmiles == oeIsoSmilesGen and oeSmiles == oeSmilesGen smilesByBuildTypeD.setdefault(ccId, {}).setdefault( molBuildType, []).append(oeIsoSmilesGen) # logger.debug("%s buildType %s ccEq %r genEq %r", ccId, molBuildType, ccEq, genEq) if not ccEq: ccResultD.setdefault(molBuildType, []).append(ccId) if not genEq: genResultD.setdefault(molBuildType, []).append(ccId) if doDepict: pS = "-limited" if limitPerceptions else "" imagePath = os.path.join( self.__workPath, ccId + "-%s%s.svg" % (molBuildType, pS)) oed = OeDepict() title = "" oed.setMolTitleList([(ccId, oeMol, title)]) oed.setDisplayOptions(labelAtomName=False, labelAtomCIPStereo=True, labelAtomIndex=False, labelBondIndex=False, cellBorders=False, bondDisplayWidth=0.5) oed.setGridOptions(rows=1, cols=1) oed.prepare() oed.write(imagePath) logger.info( "Completed comparing %d molecules in %d builds (%.4f seconds)", len(ccIdxD), len(molBuildTypeL), time.time() - startTime) # # for molBuildType in molBuildTypeL: if molBuildType in genResultD: logger.info("GEN %s (%d) %r", molBuildType, len(genResultD[molBuildType]), genResultD[molBuildType]) numDiff = 0 for ccId, btD in smilesByBuildTypeD.items(): tS = set() for molBuildType, sL in btD.items(): tS.add(sL[0]) if len(tS) > 1: numDiff += 1 logger.debug("%s diff smiles (%d) %r", ccId, len(tS), tS) logger.info("Components with inconsistent SMILES %d", numDiff) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testRoundTrip(self): """Round trip smiles comparisons -""" try: ccMolD = self.__getChemCompDefs() # useCache = True # quietFlag = False # molBuildTypeL = ["model-xyz", "ideal-xyz", None] # molBuildTypeL = [None] buildTypeRef = "oe-iso-smiles" oemf1 = OeMoleculeFactory() oemf2 = OeMoleculeFactory() # for ccId, ccObj in ccMolD.items(): # ---- ccIt = iter(PdbxChemCompIt(ccObj)) cc = next(ccIt) formula = cc.getFormulaWithCharge() # ccId = cc.getId() ccName = cc.getName() ifCharge = cc.getFormalChargeAsInt() isAmbiguous = cc.getAmbiguousFlag() in ["Y", "y"] isCurrent = cc.getReleaseStatus() in ["REL"] logger.debug("%s name %r formula %r charge %d", ccId, ccName, formula, ifCharge) # ---- ccId = oemf1.setChemCompDef(ccObj) ok = oemf1.build(molBuildType=buildTypeRef, limitPerceptions=False) if not ok: logger.info( "Build using %r failed for %s (ambiguous flag %r current %r)", buildTypeRef, ccId, isAmbiguous, isCurrent) # isDiff = False # if isDiff: genIsoSmi = oemf1.getCanSMILES() oemf2 = OeMoleculeFactory() oemf2.setDescriptor(genIsoSmi, "oe-iso-smiles", ccId) oemf2.build(molBuildType="oe-iso-smiles", limitPerceptions=False) regenIsoSmi = oemf2.getIsoSMILES() if genIsoSmi != regenIsoSmi: logger.info( "%s regenerated ISOSMILES differ \n -- INP: %s\n -- OUT: %s", ccId, genIsoSmi, regenIsoSmi) oed = OeDepictMCSAlignPage() oed.setDisplayOptions( labelAtomName=True, labelAtomCIPStereo=True, labelAtomIndex=False, labelBondIndex=False, labelBondCIPStereo=True, highlightStyleFit="ballAndStickInverse", highLightNotMatchColorRef="pink", bondDisplayWidth=0.5, ) oed.setRefMol(oemf1.getGraphMol(), ccId) oed.setFitMol(oemf2.getGraphMol(), ccId) imgPath = os.path.join( self.__workPath, "compare-assigned-" + ccId + "-calc-" + ccId + ".svg") logger.info("Using image path %r", imgPath) aML = oed.alignPair(imagePath=imgPath) if aML: for (rCC, rAt, tCC, tAt) in aML: logger.info("%5s %-5s %5s %-5s", rCC, rAt, tCC, tAt) else: logger.debug("%s matched all cases", ccId) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testSssWithFingerPrintFromDescriptor(self): oemp = OeMoleculeProvider(**self.__myKwargs) ok = oemp.testCache() ccmP = ChemCompIndexProvider(**self.__myKwargs) ccIdxD = ccmP.getIndex() ok = ccmP.testCache(minCount=self.__minCount) self.assertTrue(ok) limitPerceptions = False # minFpScore = 0.5 maxFpResults = 50 matchOpts = "graph-relaxed" numMols = 20 oeioU = OeIoUtils() oesU = OeSearchUtils(oemp, fpTypeList=self.__fpTypeList) missTupL = [] missedD = {} missedFpD = {} # ---- startTime = time.time() for ccId, ccD in list(ccIdxD.items())[:numMols]: for buildType in [ "oe-iso-smiles", "oe-smiles", "acdlabs-smiles", "cactvs-iso-smiles", "cactvs-smiles", "inchi" ]: if buildType in ccD: logger.debug("Search %s %r", ccId, ccD[buildType]) if buildType in ["inchi"]: oemf = OeMoleculeFactory() oemf.setDescriptor(ccD["inchi"], "inchi", ccId) ok = oemf.build(molBuildType="inchi", limitPerceptions=limitPerceptions) if not ok: logger.info("%s build failed with InChI %r", ccId, ccD["inchi"]) else: oeMol = oemf.getMol() if oemf.getInChI() != ccD["inchi"]: logger.info( "%s regenerated InChI differs\n%r\n%s", ccId, ccD["inchi"], oemf.getInChI()) else: oeMol = oeioU.smilesToMol( ccD[buildType], limitPerceptions=limitPerceptions) if not oeMol: continue maxHits = 0 minHits = maxFpResults selfHit = False for fpType, minFpScore in self.__fpTypeCuttoffList: retStatus, mL = oesU.searchSubStructureWithFingerPrint( oeMol, fpType, minFpScore, maxFpResults, matchOpts=matchOpts) self.assertTrue(retStatus) logger.debug("%s fpType %r hits %d", ccId, fpType, len(mL)) maxHits = max(maxHits, len(mL)) minHits = min(minHits, len(mL)) matchedSelf = self.__resultContains(ccId, mL) selfHit = selfHit or matchedSelf if not matchedSelf: missedFpD.setdefault(ccId, []).append( (buildType, fpType, len(mL))) if not selfHit: missedD.setdefault(ccId, []).append(buildType) logger.info("%s (%r) buildType %r min hits %d max hits %d", ccId, selfHit, buildType, minHits, maxHits) else: logger.info("%s missing descriptor %r", ccId, buildType) # for ccId, missL in missedD.items(): logger.info("%s missed list %r", ccId, missL) if ccId in missedFpD: logger.info("%s unmatched for fpTypes %r", ccId, missedFpD[ccId]) # ---- doDepict = False if doDepict: mD = {} for missTup in missTupL: mD.setdefault(missTup[0], []).append(missTup[1]) for ccId, buildTypeL in mD.items(): idxD = ccIdxD[ccId] if "oe-iso-smiles" in idxD: for buildType in buildTypeL: self.__displayAlignedDescriptorPair( ccId, idxD["oe-iso-smiles"], "oe-iso-smiles", idxD[buildType], buildType, title=None, limitPerceptions=True) logger.info("%s fingerprints search on %d in (%.4f seconds)", len(self.__fpTypeList), numMols, time.time() - startTime)