def getCCDefObj(self, dataContainer, molBuildType="model-xyz", suppressHydrogens=False): """Build OE molecule from the input chemical component definition object.""" # oemf = OeMoleculeFactory() if not self.__verbose: oemf.setQuiet() ccId = oemf.setChemCompDef(dataContainer) oemf.build(molBuildType=molBuildType) if self.__verbose: logger.info(" CCId = %s", ccId) logger.info(" Title = %s", oemf.getTitle()) logger.info(" SMILES = %s", oemf.getCanSMILES()) logger.info(" SMILES (stereo) = %s", oemf.getIsoSMILES()) logger.info(" Formula (Hill) = %s", oemf.getFormula()) logger.info(" InChI key = %s", oemf.getInChIKey()) logger.info(" InChI = %s", oemf.getInChI()) fD = {} fD = {"Formula": oemf.getFormula(), "SMILES": oemf.getCanSMILES(), "SMILES_STEREO": oemf.getIsoSMILES(), "InChI": oemf.getInChI(), "InChIKey": oemf.getInChIKey()} if suppressHydrogens: tMol = oemf.getGraphMolSuppressH() else: tMol = oemf.getMol() fD["OEMOL"] = tMol fD["xyz"] = oemf.getAtomDetails(xyzType="model") return (ccId, tMol, fD)
def getCCDefFile(self, ccFilePath, molBuildType="model-xyz", suppressHydrogens=False): """Fetch the molecule definition (ccPath) and build OE molecules for comparison. """ # mU = MarshalUtil(workPath=self.__workPath) rdCcObjL = mU.doImport(ccFilePath, fmt="mmcif") oemf = OeMoleculeFactory() if not self.__verbose: oemf.setQuiet() ccId = oemf.setChemCompDef(rdCcObjL[0]) oemf.build(molBuildType=molBuildType) if self.__verbose: logger.info(" CCId = %s", ccId) logger.info(" Title = %s", oemf.getTitle()) logger.info(" SMILES = %s", oemf.getCanSMILES()) logger.info(" SMILES (stereo) = %s", oemf.getIsoSMILES()) logger.info(" Formula (Hill) = %s", oemf.getFormula()) logger.info(" InChI key = %s", oemf.getInChIKey()) logger.info(" InChI = %s", oemf.getInChI()) fD = {} fD = {"Formula": oemf.getFormula(), "SMILES": oemf.getCanSMILES(), "SMILES_STEREO": oemf.getIsoSMILES(), "InChI": oemf.getInChI(), "InChIKey": oemf.getInChIKey()} if suppressHydrogens: tMol = oemf.getGraphMolSuppressH() else: tMol = oemf.getMol() fD["OEMOL"] = tMol fD["xyz"] = oemf.getAtomDetails(xyzType="model") return (ccId, tMol, fD)
def testBuildCifFromOE(self): """Build chemical component definitions from OE Mol object""" try: ccMolD = self.__getChemCompDefs() oemf = OeMoleculeFactory() # for ccId, ccObj in list(ccMolD.items())[:10]: # ---- tId = oemf.setChemCompDef(ccObj) self.assertEqual(tId, ccId) ok = oemf.build(molBuildType="model-xyz") self.assertTrue(ok) fp = os.path.join(self.__ccCifPath, ccId + "-gen.cif") oeMol = oemf.getMol() oeccU = OeChemCompUtils() ok = oeccU.addOeMol(ccId, oeMol, missingModelXyz=False, writeIdealXyz=False) self.assertTrue(ok) ok = oeccU.write(fp) # ---- except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def __buildChemCompSearchIndex(self, ccObjD, descrD, limitPerceptions=False, molLimit=None): """Internal method return a dictionary of extracted chemical component descriptors and formula.""" rD = {} try: for ii, ccId in enumerate(ccObjD, 1): if molLimit and ii > molLimit: break # ---- oemf = OeMoleculeFactory() oemf.setQuiet() tId = oemf.setChemCompDef(ccObjD[ccId]) if tId != ccId: logger.error("%s chemical component definition import error", ccId) # ---- oemf.clearExternalDescriptors() for smi in descrD[ccId] if ccId in descrD else []: oemf.addExternalDescriptor("smiles", smi, "chemaxon-smiles") # ---- smiD = oemf.buildRelated(limitPerceptions=limitPerceptions) logger.debug("%s related molecular forms %d", ccId, len(smiD)) rD.update(smiD) except Exception as e: logger.exception("Failing with %s", str(e)) return rD
def __buildChemCompSearchIndex(self, procName, ccIdList, descrD, limitPerceptions=False, quietFlag=False): """Internal method return a dictionary of extracted chemical component descriptors and formula.""" rL = [] fL = [] try: for ccId in ccIdList: if ccId not in self.__ccObjD: logger.error("%s missing chemical definition for %s", procName, ccId) fL.append(ccId) continue dataContainer = self.__ccObjD[ccId] # ---- oemf = OeMoleculeFactory() if quietFlag: oemf.setQuiet() tId = oemf.setChemCompDef(dataContainer) if tId != ccId: logger.error("%s %s chemical component definition import error", procName, ccId) fL.append(ccId) continue # ---- oemf.clearExternalDescriptors() for smi in descrD[ccId] if ccId in descrD else []: oemf.addExternalDescriptor("smiles", smi, "chemaxon-smiles") # ---- relD = oemf.buildRelated(limitPerceptions=limitPerceptions) logger.debug("%s %s related molecular forms %d", procName, ccId, len(relD)) if relD: rL.extend([relD[v] for v in relD]) else: fL.append(ccId) except Exception as e: logger.exception("%s failing with %s", procName, str(e)) return rL, fL
def testRoundTripOps(self): """Test IO operation on generated related molecules""" try: oeIoU = OeIoUtils() mU = MarshalUtil() mU.mkdir(self.__molfileDirPath) ccMolD = self.__getChemCompDefs() oemf = OeMoleculeFactory() for ccId, ccObj in list(ccMolD.items())[:10]: # ---- tId = oemf.setChemCompDef(ccObj) self.assertEqual(tId, ccId) relatedIdxD = oemf.buildRelated(limitPerceptions=False) logger.info("%s generated %d molecular forms", ccId, len(relatedIdxD)) for sId, idxD in relatedIdxD.items(): logger.info("sId %r smiles %r", sId, idxD["smiles"]) mol2Path = os.path.join(self.__molfileDirPath, sId + ".mol2") oeMol = oeIoU.descriptorToMol(idxD["smiles"], "oe-iso-smiles", limitPerceptions=False, messageTag=None) oeIoU.write(mol2Path, oeMol, constantMol=True, addSdTags=True) tMolL = oeIoU.fileToMols(mol2Path) # nextMol2Path = os.path.join(self.__molfileDirPath, sId + "-next.mol2") oeIoU.write(nextMol2Path, tMolL[0], constantMol=True, addSdTags=True) sdfPath = os.path.join(self.__molfileDirPath, sId + ".mol") oeMol = oeIoU.descriptorToMol(idxD["smiles"], "oe-iso-smiles", limitPerceptions=False, messageTag=None) oeIoU.write(sdfPath, oeMol, constantMol=True, addSdTags=True) # tMolL = oeIoU.fileToMols(sdfPath) nextSdfPath = os.path.join(self.__molfileDirPath, sId + "-next.sdf") oeIoU.write(nextSdfPath, tMolL[0], constantMol=True, addSdTags=True) # ---- except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testBuildRelated(self): """Test build molecules chemical definitions -""" try: logger.info("Starting") ccMolD = self.__getChemCompDefs() oemf = OeMoleculeFactory(quietMode=True) relD = {} for ccId, ccObj in list(ccMolD.items())[:100]: # ---- tId = oemf.setChemCompDef(ccObj) self.assertEqual(tId, ccId) tD = oemf.buildRelated(limitPerceptions=False) logger.info("%s related molecular forms %d", ccId, len(tD)) relD.update(tD) logger.info("Total molecular forms (%d)", len(relD)) # ---- except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def chemCompToMol(self, ccdFilePath, molBuildType="model-xyz", quietFlag=False): retMolL = [] try: rdCcObjL = self.__mU.doImport(ccdFilePath, fmt="mmcif") logger.info("Read %s with %d definitions", ccdFilePath, len(rdCcObjL)) oemf = OeMoleculeFactory() if quietFlag: oemf.setQuiet() for ccObj in rdCcObjL: ccId = oemf.setChemCompDef(ccObj) if ccId: ok = oemf.build(molBuildType=molBuildType) if ok: oeMol = oemf.getMol() retMolL.append(oeMol) except Exception as e: logger.exception("Loading %s failing with %s", ccdFilePath, str(e)) return retMolL
def testBuilders(self): try: ccMolD = self.__getChemCompDefs() quietFlag = False molBuildTypeL = ["model-xyz", "ideal-xyz"] for molBuildType in molBuildTypeL: oemf = OeMoleculeFactory() if quietFlag: oemf.setQuiet() # eCount = 0 for tId, ccObj in ccMolD.items(): ccId = oemf.setChemCompDef(ccObj) self.assertEqual(tId, ccId) logger.debug("Building %s using molBuildType %r", ccId, molBuildType) if ccId: ok = oemf.build(molBuildType=molBuildType) logger.debug( "Comparing built component %s using molBuildType %r", ccId, molBuildType) # ok = oemf.compare() ok = True if not ok: logger.info("Failing on %s molBuildType %r", ccId, molBuildType) eCount += 1 # self.assertTrue(ok) else: logger.error("Cannot process %r", ccObj.getName()) logger.info( "Processed %d components molBuildType %r errors %d", len(ccMolD), molBuildType, eCount) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testDepictByBuildType(self): """Compare depictions constructed molecules with various builds from chemical defintions -""" try: ccIdList = self.__ccIdList ccMolD = self.__getChemCompDefs() # limitPerceptions = True molBuildTypeL = ["model-xyz", "ideal-xyz", "connection-table", "oe-iso-smiles"] # startTime = time.time() oefm = OeMoleculeFactory() for molBuildType in molBuildTypeL: for ccId in ccIdList: ccObj = ccMolD[ccId] # ---- tId = oefm.setChemCompDef(ccObj) self.assertEqual(tId, ccId) ok = oefm.build(molBuildType=molBuildType, limitPerceptions=limitPerceptions) if not ok: logger.info("Build using %r failed for %s", molBuildType, ccId) continue # oeMol = oefm.getGraphMol() pS = "-limited" if limitPerceptions else "" imagePath = os.path.join(self.__workPath, ccId + "-%s%s.svg" % (molBuildType, pS)) oed = OeDepict() title = "" oed.setMolTitleList([(ccId, oeMol, title)]) oed.setDisplayOptions(labelAtomName=False, labelAtomCIPStereo=True, labelAtomIndex=False, labelBondIndex=False, cellBorders=False, bondDisplayWidth=0.5) oed.setGridOptions(rows=1, cols=1) oed.prepare() oed.write(imagePath) logger.info("Completed depictions on %d molecules (%.4f seconds)", len(ccIdList) * len(molBuildTypeL), time.time() - startTime) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testBuildRelatedExtra(self): """Test build molecules chemical definitions including extra chemaxon descriptors""" try: caxP = ChemAxonDescriptorProvider( ccUrlTarget=self.__ccUrlTarget, birdUrlTarget=self.__birdUrlTarget, cachePath=self.__cachePath, useCache=True, ccFileNamePrefix="cc-abbrev") ok = caxP.testCache() self.assertTrue(ok) descrD = caxP.getDescriptorIndex() # ccMolD = self.__getChemCompDefs() oemf = OeMoleculeFactory(quietMode=True) relD = {} for ccId, ccObj in list(ccMolD.items())[:100]: # ---- tId = oemf.setChemCompDef(ccObj) self.assertEqual(tId, ccId) # oemf.clearExternalDescriptors() for smi in descrD[ccId] if ccId in descrD else []: oemf.addExternalDescriptor("smiles", smi, "chemaxon-smiles") # tD = oemf.buildRelated(limitPerceptions=False) logger.info("%s related molecular forms %d", ccId, len(tD)) relD.update(tD) logger.info("Total molecular forms (%d)", len(relD)) # ---- except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def buildOeBinaryMolCache(self, filePath, ccObjD, molBuildType="model-xyz", quietFlag=False, fpTypeList=None, limitPerceptions=False, suppressHydrogens=False): """Build cache of OEMol() objects from the input chemical component definition list. Args: filePath (str): output cache file path ccObjD (dict): chemical component object dictionary molBuildType (str, optional): [description]. Defaults to "model-xyz". quietFlag (bool, optional): [description]. Defaults to False. fpTypeList (list, optional): fingerprint type list. Defaults to None. limitPerceptions (bool, optional): suppress automatic chemical perceptions. Defaults to False. suppressHydrogens (bool, optional): suppress explicit hydrogen count. Defaults to False. Returns: (int, int, list): chem comp success count, error count, chem comp identifier failure list """ ok = False startTime = time.time() failIdList = [] ccCount = 0 errCount = 0 try: ofs = oechem.oemolostream() ofs.SetFormat(oechem.OEFormat_OEB) if ofs.open(filePath): oemf = OeMoleculeFactory() if quietFlag: oemf.setQuiet() for ccId, ccObj in ccObjD.items(): tId = oemf.setChemCompDef(ccObj) if tId and tId == ccId: ok = oemf.build(molBuildType=molBuildType, limitPerceptions=limitPerceptions) if ok and fpTypeList: fpOk = oemf.addFingerPrints(fpTypeList) if not fpOk: logger.info( "Fingerprint generation fails for %r", ccId) if ok: oeMol = oemf.getMol( suppressHydrogens=suppressHydrogens) oechem.OEWriteMolecule(ofs, oeMol) ccCount += 1 if not ok or not tId: # build failed incomplete component (e.g. missing atoms or bonds) errCount += 1 failIdList.append(ccId) else: logger.error("Unable to open cache database %s", filePath) errCount += 1 except Exception as e: logger.exception("Failing with %s", str(e)) # endTime = time.time() logger.info("Completed operation at %s (%.4f seconds)", time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - startTime) return ccCount, errCount, failIdList
def testCompareByBuildType(self): """Compare depictions constructed molecules with various builds from chemical defintions - all build types 8769 (all) connect - smiles 6743 model vs iso smiles 5937 ideal va iso smiles 7047 """ doDepict = False ccResultD = {} genResultD = {} smilesByBuildTypeD = {} try: ccMolD, ccIdxD = self.__getChemCompDefs() # limitPerceptions = True # molBuildTypeL = ["model-xyz", "ideal-xyz", "connection-table", "oe-iso-smiles"] molBuildTypeL = ["ideal-xyz", "oe-iso-smiles"] # startTime = time.time() oefm = OeMoleculeFactory() oefm.setQuiet() for molBuildType in molBuildTypeL: for ccId, idxD in ccIdxD.items(): ccObj = ccMolD[ccId] # ---- ccIsoSmiles = idxD["oe-iso-smiles"] ccSmiles = idxD["oe-smiles"] # ---- tId = oefm.setChemCompDef(ccObj) if not tId: logger.info("Skipping bad component %r", ccId) continue self.assertEqual(tId, ccId) ok = oefm.build(molBuildType=molBuildType, limitPerceptions=limitPerceptions) if not ok: logger.info("Build using %r failed for %s", molBuildType, ccId) continue # ------ oeMol = oefm.getGraphMol() oeIsoSmiles = oefm.getIsoSMILES() oeSmiles = oefm.getCanSMILES() ccEq = oeIsoSmiles == ccIsoSmiles and oeSmiles == ccSmiles # oefmR = OeMoleculeFactory() oefmR.setQuiet() ccIdGen = ccId + "_gen" oefmR.setDescriptor(oeIsoSmiles, "oe-iso-smiles", ccIdGen) ok = oefmR.build(molBuildType="oe-iso-smiles", limitPerceptions=limitPerceptions) if not ok: logger.info("Build using %r failed for %s", molBuildType, ccIdGen) continue # ------ # # oeMolGen = oefmR.getGraphMol() oeIsoSmilesGen = oefmR.getIsoSMILES() oeSmilesGen = oefmR.getCanSMILES() genEq = oeIsoSmiles == oeIsoSmilesGen and oeSmiles == oeSmilesGen smilesByBuildTypeD.setdefault(ccId, {}).setdefault( molBuildType, []).append(oeIsoSmilesGen) # logger.debug("%s buildType %s ccEq %r genEq %r", ccId, molBuildType, ccEq, genEq) if not ccEq: ccResultD.setdefault(molBuildType, []).append(ccId) if not genEq: genResultD.setdefault(molBuildType, []).append(ccId) if doDepict: pS = "-limited" if limitPerceptions else "" imagePath = os.path.join( self.__workPath, ccId + "-%s%s.svg" % (molBuildType, pS)) oed = OeDepict() title = "" oed.setMolTitleList([(ccId, oeMol, title)]) oed.setDisplayOptions(labelAtomName=False, labelAtomCIPStereo=True, labelAtomIndex=False, labelBondIndex=False, cellBorders=False, bondDisplayWidth=0.5) oed.setGridOptions(rows=1, cols=1) oed.prepare() oed.write(imagePath) logger.info( "Completed comparing %d molecules in %d builds (%.4f seconds)", len(ccIdxD), len(molBuildTypeL), time.time() - startTime) # # for molBuildType in molBuildTypeL: if molBuildType in genResultD: logger.info("GEN %s (%d) %r", molBuildType, len(genResultD[molBuildType]), genResultD[molBuildType]) numDiff = 0 for ccId, btD in smilesByBuildTypeD.items(): tS = set() for molBuildType, sL in btD.items(): tS.add(sL[0]) if len(tS) > 1: numDiff += 1 logger.debug("%s diff smiles (%d) %r", ccId, len(tS), tS) logger.info("Components with inconsistent SMILES %d", numDiff) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def __testReproduceDescriptors(self, molBuildType, limitPerceptions=True): # ccMolD, ccIdxD = self.__getChemCompDefs() oemf = OeMoleculeFactory() countD = defaultdict(int) for ccId, ccDef in ccMolD.items(): tId = oemf.setChemCompDef(ccDef) if ccId != tId: continue oemf.build(molBuildType=molBuildType, limitPerceptions=limitPerceptions) oeMol = oemf.getMol() # countD["total components"] += 1 if ccId not in ccIdxD: logger.info("Missing ccIndex entry for %s", ccId) continue ccdD = ccIdxD[ccId] if ccdD["ambiguous"]: countD["ambiguous component"] += 1 continue # countD["total molecules"] += 1 nativeCanIsoSmiles = oechem.OECreateIsoSmiString(oeMol) canIsoSmiles = oechem.OEMolToSmiles(oeMol) isoSmiles = oemf.getIsoSMILES() canSmiles = oemf.getCanSMILES() # check interal consistency if nativeCanIsoSmiles != isoSmiles: logger.error("%s stored and calculated OE smiles differ %s %s", ccId, nativeCanIsoSmiles, isoSmiles) if canIsoSmiles != isoSmiles: logger.error( "%s calculated OE ISO and canonical smiles differ %s %s", ccId, isoSmiles, canIsoSmiles) # compare with archived values if isoSmiles != ccdD["oe-iso-smiles"]: logger.info("%s ISO SMILES differ \nccd: %r \nOE: %r", ccId, ccdD["oe-iso-smiles"], isoSmiles) countD["iso_smiles_diff"] += 1 # ---------- if canSmiles != ccdD["oe-smiles"]: logger.info("%s CAN SMILES differ \nccd: %r \nOE: %r", ccId, ccdD["oe-smiles"], canSmiles) countD["smiles_diff"] += 1 formula = oemf.getFormula() if formula.upper() != ccdD["formula"].upper(): logger.debug("%s formulas differ \nccd: %r \nOE: %r", ccId, ccdD["formula"], formula) countD["formula_diff"] += 1 # --------- inchiKey = oemf.getInChIKey() if inchiKey != ccdD["inchikey"]: logger.debug("%s InChI keys differ \nccd: %r \nOE: %r", ccId, ccdD["inchikey"], inchiKey) countD["inchikey_diff"] += 1 # inchi = oemf.getInChI() if inchi != ccdD["inchi"]: logger.debug("%s InChIs differ \nccd: %r \nOE: %r", ccId, ccdD["inchi"], inchi) countD["inchi_diff"] += 1 # # for ky, vl in countD.items(): logger.info("%-12s %6d", ky, vl)
def __buildChemCompIndex(self, cD, molBuildType="model-xyz", doFeatures=True): """Internal method return a dictionary of extracted chemical component descriptors and formula.""" rD = {} try: quietFlag = True for _, dataContainer in cD.items(): ccIt = iter(PdbxChemCompIt(dataContainer)) cc = next(ccIt, None) ccId = cc.getId() formula = str(cc.getFormula()).replace(" ", "") ambiguousFlag = cc.getAmbiguousFlag().upper() in ["Y", "YES"] tch = cc.getFormalCharge() fcharge = int(tch) if tch and tch not in [".", "?"] else 0 # logger.debug("ccId %r formula %r ambiguous %r fcharge %r", ccId, formula, ambiguousFlag, fcharge) if fcharge: sign = "+" if fcharge > 0 else "-" mag = str(abs(fcharge)) if abs(fcharge) > 1 else "" formula = formula + sign + mag # atIt = PdbxChemCompAtomIt(dataContainer) typeCounts = defaultdict(int) for at in atIt: aType = at.getType().upper() typeCounts[aType] += 1 # rD[ccId] = { "formula": formula, "type-counts": typeCounts, "ambiguous": ambiguousFlag, "feature-counts": {} } desIt = PdbxChemCompDescriptorIt(dataContainer) for des in desIt: desBuildType = des.getMolBuildType() tS = des.getDescriptor() descr = tS.strip() if tS else None if not descr: continue if desBuildType in [ "oe-iso-smiles", "oe-smiles", "acdlabs-smiles", "cactvs-iso-smiles", "cactvs-smiles", "inchi", "inchikey" ]: rD[ccId][desBuildType] = descr else: logger.error("%s unexpected descriptor build type %r", ccId, desBuildType) if doFeatures: oemf = OeMoleculeFactory() if quietFlag: oemf.setQuiet() tId = oemf.setChemCompDef(dataContainer) if tId != ccId: logger.error( "%s chemical component definition import error", ccId) continue ok = oemf.build(molBuildType=molBuildType) if ok: rD[ccId]["feature-counts"] = oemf.getFeatureCounts() except Exception as e: logger.exception("Failing with %s", str(e)) return rD
def testRoundTrip(self): """Round trip smiles comparisons -""" try: ccMolD = self.__getChemCompDefs() # useCache = True # quietFlag = False # molBuildTypeL = ["model-xyz", "ideal-xyz", None] # molBuildTypeL = [None] buildTypeRef = "oe-iso-smiles" oemf1 = OeMoleculeFactory() oemf2 = OeMoleculeFactory() # for ccId, ccObj in ccMolD.items(): # ---- ccIt = iter(PdbxChemCompIt(ccObj)) cc = next(ccIt) formula = cc.getFormulaWithCharge() # ccId = cc.getId() ccName = cc.getName() ifCharge = cc.getFormalChargeAsInt() isAmbiguous = cc.getAmbiguousFlag() in ["Y", "y"] isCurrent = cc.getReleaseStatus() in ["REL"] logger.debug("%s name %r formula %r charge %d", ccId, ccName, formula, ifCharge) # ---- ccId = oemf1.setChemCompDef(ccObj) ok = oemf1.build(molBuildType=buildTypeRef, limitPerceptions=False) if not ok: logger.info( "Build using %r failed for %s (ambiguous flag %r current %r)", buildTypeRef, ccId, isAmbiguous, isCurrent) # isDiff = False # if isDiff: genIsoSmi = oemf1.getCanSMILES() oemf2 = OeMoleculeFactory() oemf2.setDescriptor(genIsoSmi, "oe-iso-smiles", ccId) oemf2.build(molBuildType="oe-iso-smiles", limitPerceptions=False) regenIsoSmi = oemf2.getIsoSMILES() if genIsoSmi != regenIsoSmi: logger.info( "%s regenerated ISOSMILES differ \n -- INP: %s\n -- OUT: %s", ccId, genIsoSmi, regenIsoSmi) oed = OeDepictMCSAlignPage() oed.setDisplayOptions( labelAtomName=True, labelAtomCIPStereo=True, labelAtomIndex=False, labelBondIndex=False, labelBondCIPStereo=True, highlightStyleFit="ballAndStickInverse", highLightNotMatchColorRef="pink", bondDisplayWidth=0.5, ) oed.setRefMol(oemf1.getGraphMol(), ccId) oed.setFitMol(oemf2.getGraphMol(), ccId) imgPath = os.path.join( self.__workPath, "compare-assigned-" + ccId + "-calc-" + ccId + ".svg") logger.info("Using image path %r", imgPath) aML = oed.alignPair(imagePath=imgPath) if aML: for (rCC, rAt, tCC, tAt) in aML: logger.info("%5s %-5s %5s %-5s", rCC, rAt, tCC, tAt) else: logger.debug("%s matched all cases", ccId) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testSelfConsistency(self): """Compare constructed molecule with underlying chemical definitions -""" try: failL = [] ccMolD = self.__getChemCompDefs() # # molBuildTypeL = ["model-xyz", "ideal-xyz", None] # molBuildTypeL = [None] # oemf = OeMoleculeFactory() macmp = MoleculeAnnotationsCompare() limitPerceptions = False # buildTypeRef = "oe-iso-smiles" buildTypeRef = "model-xyz" filterHydrogens = False if buildTypeRef in [ "oe-iso-smiles", "oe-smiles", "cactvs-smiles", "cactvs-iso-smiles", "acdlabs-smiles", "inchi" ]: filterHydrogens = True # for ccId, ccObj in ccMolD.items(): # ---- tId = oemf.setChemCompDef(ccObj) self.assertEqual(tId, ccId) ok = oemf.build(molBuildType=buildTypeRef, limitPerceptions=limitPerceptions, normalize=False) if not ok: logger.info("Build using %r failed for %s", buildTypeRef, ccId) continue # doTautomers = False if doTautomers: tautomerMolL = oemf.getTautomerMolList() logger.info("%s number reasonable tautomers %d", ccId, len(tautomerMolL)) # refFD = macmp.getChemCompFeatures( ccObj, descriptorProgram="OPENEYE", filterHydrogens=filterHydrogens) tstFD = oemf.getOeMoleculeFeatures( filterHydrogens=filterHydrogens) # logger.info("tstFD %r", tstFD) ok, retCmp = macmp.compare(refFD, tstFD, tstInfo="Openeye ISO SMILES") if not ok: logger.info("Comparison failed build type %r and %r", buildTypeRef, ccId) logger.debug( "diff -> atomatic atoms %r stereo atoms %r bond types %r aromatic bonds %r", retCmp.difAromaticAtoms, retCmp.difStereoAtoms, retCmp.difTypeBonds, retCmp.difAromaticBonds, ) failL.append(ccId) # logger.info("Failures (%d) %r: ", len(failL), failL) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()