def toMolFile(self, identifier, identifierType, molfilePath=None, fmt="mol", **kwargs): """Create molfile (fmt) from InChI, SMILES descriptors or PDB identifier.""" try: molfilePath = molfilePath if molfilePath else self.__makeMolfilePath( fmt=fmt) oeio = OeIoUtils() if identifierType.lower() in ["smiles"]: oeMol = oeio.smilesToMol(identifier) oeMol.SetTitle("From SMILES") elif identifierType.lower() in ["inchi"]: oeMol = oeio.inchiToMol(identifier) oeMol.SetTitle("From InChI") elif identifierType.lower() in ["identifierpdb"]: ccsw = ChemCompSearchWrapper() oesmP = ccsw.getSearchMoleculeProvider() oeMol = oesmP.getMol(identifier) # ok = self.__toMolFile(oeMol, molfilePath, **kwargs) return molfilePath if ok else None except Exception as e: logger.exception("Failing with %s", str(e)) return None
def testSubStructureSearchScreened(self): oeioU = OeIoUtils() oemp = OeMoleculeProvider(**self.__myKwargs) ok = oemp.testCache() ccmP = ChemCompIndexProvider(**self.__myKwargs) ccIdxD = ccmP.getIndex() ok = ccmP.testCache(minCount=self.__minCount) self.assertTrue(ok) oesU = OeSearchUtils(oemp, screenType=self.__screenType, numProc=self.__numProc) numMols = 20 missL = [] for ccId, ccD in list(ccIdxD.items())[:numMols]: # ---- startTime = time.time() if "oe-smiles" not in ccD: continue logger.info("Search %s %r", ccId, ccD["oe-smiles"]) oeQMol = oeioU.smartsToQmol(ccD["oe-smiles"]) retStatus, mL = oesU.searchSubStructureScreened(oeQMol, maxMatches=100) if retStatus: logger.info("%s (status=%r) match length %d in (%.4f seconds)", ccId, retStatus, len(mL), time.time() - startTime) if not self.__resultContains(ccId, mL): missL.append(ccId) # # self.assertGreaterEqual(len(mL), 1) # ---- logger.info("Missed searches (%d) %r", len(missL), missL)
def getOeMolDatabase(self): if not self.__oeMolDb: oeIo = OeIoUtils() self.__oeMolDb = oeIo.loadOeBinaryDatabaseAndIndex( os.path.join(self.__dirPath, self.__getOeMolDbFileName())) self.__oeMolDbTitleD = self.__getOeMolDbTitleIndex() return self.__oeMolDb, self.__oeMolDbTitleD
def __fingerPrintSearch(self, numMols, **kwargs): maxFpResults = kwargs.get("maxFpResults", 50) limitPerceptions = kwargs.get("limitPerceptions", False) fpTypeCuttoffList = kwargs.get("fpTypeCuttoffList", [("TREE", 0.6)]) buildTypeList = kwargs.get("buildTypeList", ["oe-iso-smiles"]) # oesmP, ccIdxD = self.__getSearchDataProviders(**kwargs) oesU = OeSearchUtils(oesmP, fpTypeList=[tup[0] for tup in fpTypeCuttoffList]) oeioU = OeIoUtils() # This will reload the oe binary cache. oeMol = oesmP.getMol("004") self.assertGreaterEqual(len(list(oeMol.GetAtoms())), 12) missedFpD = {} missedBuildD = {} numMols = min(len(ccIdxD), numMols) if numMols else len(ccIdxD) logger.info("Begin finger print search on %d molecules", numMols) # ---- startTime = time.time() for ccId, ccD in list(ccIdxD.items())[:numMols]: for buildType in buildTypeList: if buildType in ccD: oeMol = oeioU.descriptorToMol( ccD[buildType], buildType, limitPerceptions=limitPerceptions, messageTag=ccId + ":" + buildType) if not oeMol: continue selfHit = False for fpType, minFpScore in fpTypeCuttoffList: retStatus, mL = oesU.searchFingerPrints( oeMol, fpType=fpType, minFpScore=minFpScore, maxFpResults=maxFpResults) self.assertTrue(retStatus) # matchedSelf = self.__resultContains(ccId, mL) selfHit = selfHit or matchedSelf if not matchedSelf: missedFpD.setdefault(ccId, []).append( (buildType, fpType, len(mL))) # if not selfHit: missedBuildD.setdefault(ccId, []).append(buildType) # ------ for ccId, bTL in missedBuildD.items(): logger.info("%s missed all fptypes: buildtype list %r", ccId, bTL) if ccId in missedFpD: logger.info("%s unmatched by fpTypes %r", ccId, missedFpD[ccId]) # ---- logger.info("%s fingerprints search on %d in (%.4f seconds)", len(fpTypeCuttoffList), numMols, time.time() - startTime) # ---- return True
def matchByDescriptor(self, descriptor, descriptorType, matchOpts="graph-relaxed", searchId=None): """Return graph match (w/ finger print pre-filtering) and finger print search results for the input desriptor. Args: descriptor (str): molecular descriptor (SMILES, InChI) descriptorType (str): descriptor type (SMILES, InChI matchOpts (str, optional): graph match criteria (graph-relaxed, graph-relaxed-stereo, graph-strict, fingerprint-similarity, Defaults to "graph-relaxed") searchId (str, optional): search identifier for logging. Defaults to None. Returns: (statusCode, list, list): status, graph match and finger match lists of type (MatchResults) -100 descriptor processing error -200 search execution error 0 search execution success """ ssL = fpL = [] retStatus = False statusCode = -200 try: fpTypeCuttoffD = self.__configD["oesmpKwargs"][ "fpTypeCuttoffD"] if "fpTypeCuttoffD" in self.__configD[ "oesmpKwargs"] else {} maxFpResults = self.__configD["oesmpKwargs"][ "maxFpResults"] if "maxFpResults" in self.__configD[ "oesmpKwargs"] else 50 limitPerceptions = self.__configD["oesmpKwargs"][ "limitPerceptions"] if "limitPerceptions" in self.__configD[ "oesmpKwargs"] else False # searchId = searchId if searchId else "query" messageTag = searchId + ":" + descriptorType oeioU = OeIoUtils() oeMol = oeioU.descriptorToMol(descriptor, descriptorType, limitPerceptions=limitPerceptions, messageTag=messageTag) oeMol = oeioU.suppressHydrogens(oeMol) if not oeMol: logger.warning("descriptor type %r molecule build fails: %r", descriptorType, descriptor) return self.__statusDescriptorError, ssL, fpL # retStatus, ssL, fpL = self.__oesU.searchSubStructureAndFingerPrint( oeMol, list(fpTypeCuttoffD.items())[:2], maxFpResults, matchOpts=matchOpts) statusCode = 0 if retStatus else self.__searchError except Exception as e: logger.exception("Failing with %s", str(e)) # return statusCode, ssL, fpL
def __subStructureSearchScreened(self, numMols, **kwargs): # buildTypeList = kwargs.get("buildTypeList", ["oe-iso-smiles"]) screenTypeList = kwargs.get("screenTypeList", ["SMARTS"]) oesmP, ccIdxD = self.__getSearchDataProviders(**kwargs) for screenType in screenTypeList: oesU = OeSearchUtils(oesmP, screenType=screenType, numProc=self.__numProc) oeioU = OeIoUtils() # missL = [] numMols = min(len(ccIdxD), numMols) if numMols else len(ccIdxD) for ( ii, ccId, ) in enumerate(list(ccIdxD.keys())[:numMols]): ccD = ccIdxD[ccId] for buildType in buildTypeList: if buildType in ccD: if screenType == "SMARTS": smiles = oeioU.descriptorToSmiles(ccD[buildType], buildType, messageTag=ccId + ":" + buildType) oeQMol = oeioU.descriptorToMol(smiles, "SMARTS", messageTag=ccId + ":" + buildType) else: oeQMol = oeioU.descriptorToQMol(ccD[buildType], "SMARTS", messageTag=ccId + ":" + buildType) if not oeQMol: logger.debug("%s build failed for %s - skipping", ccId, buildType) continue # ---- startTime = time.time() retStatus, mL = oesU.searchSubStructureScreened( oeQMol, maxMatches=100) if retStatus: logger.debug( "%s - %s - %s (status=%r) match length %d in (%.4f seconds)", ccId, buildType, screenType, retStatus, len(mL), time.time() - startTime) if not self.__resultContains(ccId, mL): missL.append((ccId, buildType, screenType)) # ---- if ii % 100 == 0: logger.info("Completed %d of %d missed count %d", ii, numMols, len(missL)) logger.info("Screen %r missed searches (%d) %r", screenType, len(missL), missL) return True
def subStructSearchByDescriptor(self, descriptor, descriptorType, matchOpts="sub-struct-graph-relaxed", searchId=None): """Return graph match (w/ finger print pre-filtering) and finger print search results for the input desriptor. Args: descriptor (str): molecular descriptor (SMILES, InChI) descriptorType (str): descriptor type (SMILES, InChI) matchOpts (str, optional): graph match criteria (sub-struct-graph-relaxed, sub-struct-graph-relaxed-stereo, sub-struct-graph-strict). Defaults to "sub-struct-graph-relaxed". searchId (str, optional): search identifier for logging. Defaults to None. Returns: (statusCode, list, list): status, substructure search results of type (MatchResults), empty list placeholder -100 descriptor processing error -200 search execution error 0 search execution success """ ssL = [] retStatus = False statusCode = -200 try: limitPerceptions = self.__configD["oesmpKwargs"][ "limitPerceptions"] if "limitPerceptions" in self.__configD[ "oesmpKwargs"] else False numProc = self.__configD["oesmpKwargs"][ "numProc"] if "numProc" in self.__configD["oesmpKwargs"] else 4 # searchId = searchId if searchId else "query" messageTag = searchId + ":" + descriptorType oeioU = OeIoUtils() oeMol = oeioU.descriptorToMol(descriptor, descriptorType, limitPerceptions=limitPerceptions, messageTag=messageTag) oeMol = oeioU.suppressHydrogens(oeMol) if not oeMol: logger.warning("descriptor type %r molecule build fails: %r", descriptorType, descriptor) return self.__statusDescriptorError, ssL, [] # ccIdL = self.__oesubsU.prefilterIndex(oeMol, self.__siIdxP, matchOpts=matchOpts) retStatus, ssL = self.__oesubsU.searchSubStructure( oeMol, ccIdList=ccIdL, matchOpts=matchOpts, numProc=numProc) statusCode = 0 if retStatus else self.__searchError except Exception as e: logger.exception("Failing with %s", str(e)) # return statusCode, ssL, []
def getMol(self, ccId): try: if not self.__oeMolD: oeIo = OeIoUtils() self.__oeMolD = oeIo.readOeBinaryMolCache( os.path.join(self.__dirPath, self.__getOeMolFileName())) logger.info("Loading OE binary molecule cache length %d", len(self.__oeMolD)) return self.__oeMolD[ccId] except Exception as e: logger.exception("Get molecule %r failing with %s", ccId, str(e)) return None
def getSubSearchDb(self, screenType="SMARTS", numProc=1, forceRefresh=False): if not self.__ssDb or forceRefresh: oeIo = OeIoUtils() fp = os.path.join(self.__dirPath, self.__getSubSearchFileName(screenType)) logger.info("Opening screened substructure search database %r", fp) self.__ssDb = oeIo.loadOeSubSearchDatabase(fp, screenType, numProc=numProc) return self.__ssDb
def __exhaustiveSubStructureSearch(self, numMols, **kwargs): """Exhaustive substructure search.""" try: limitPerceptions = kwargs.get("limitPerceptions", False) buildTypeList = kwargs.get("buildTypeList", ["oe-iso-smiles"]) oesmP, ccIdxD = self.__getSearchDataProviders(**kwargs) oesU = OeSearchUtils(oesmP, fpTypeList=[]) oeioU = OeIoUtils() # for ccId, ccD in list(ccIdxD.items())[:numMols]: matchCount = 0 mtS = set() for buildType in buildTypeList: if buildType in ccD: oeMol = oeioU.descriptorToMol( ccD[buildType], buildType, limitPerceptions=limitPerceptions, messageTag=ccId + ":" + buildType) if not oeMol: logger.error( "%s %s build query molecule build fails (skipping)", ccId, buildType) continue # ---- startTime = time.time() retStatus, mL = oesU.searchSubStructure( oeMol, matchOpts="graph-strict") if not retStatus: logger.info("%s match fails for build type %s", ccId, buildType) elif not self.__resultContains(ccId, mL): logger.info( "%s failed match length %d build type %s in (%.4f seconds)", ccId, len(mL), buildType, time.time() - startTime) elif self.__resultContains(ccId, mL): mtS.update([m.ccId for m in mL]) matchCount += 1 self.assertTrue(retStatus) self.assertTrue(self.__resultContains(ccId, mL)) if matchCount: logger.info("%s MATCHES %d: %r", ccId, matchCount, mtS) else: logger.info("%s NO MATCHES", ccId) # ---- return True except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() return False
def getOeMolD(self): try: if not self.__oeMolD: startTime = time.time() oeIo = OeIoUtils() self.__oeMolD = oeIo.readOeBinaryMolCache( os.path.join(self.__dirPath, self.__getOeSearchMolFileName())) logger.info( "Loading OE binary molecule cache length %d (%.4f seconds)", len(self.__oeMolD), time.time() - startTime) return self.__oeMolD except Exception as e: logger.exception("Failing with %s", str(e)) return None
def getFingerPrintDb(self, fpType, fpDbType="STANDARD", rebuild=False): if fpType not in self.__fpDbD or rebuild: oeIo = OeIoUtils() fastFpDbPath = os.path.join(self.__dirPath, self.__getFastFpDbFileName(fpType)) oeMolDbFilePath = os.path.join(self.__dirPath, self.__getOeMolDbFileName()) fpDb = oeIo.loadOeFingerPrintDatabase(oeMolDbFilePath, fastFpDbPath, inMemory=True, fpType=fpType, fpDbType=fpDbType) if fpDb: self.__fpDbD[fpType] = fpDb # return self.__fpDbD[fpType]
def __toMolFile(self, oeMol, molfilePath, **kwargs): """Write the Args: oeMol (object): instance of an OE graph molecule molfilePath (string): file path for molfile (type determined by extension) Returns: bool: True for success or False otherwise """ try: _ = kwargs oeio = OeIoUtils() oeio.write(molfilePath, oeMol, constantMol=True) return True except Exception as e: logger.exception("Failing with %s", str(e)) return False
def __getMol(self, query, queryType, queryId, limitPerceptions=False, suppressHydrogens=True): oeioU = OeIoUtils() if queryType == "CC": oeMol = self.__oesmP.getMol(query) else: oeMol = oeioU.descriptorToMol(query, queryType, limitPerceptions=limitPerceptions, messageTag=queryId) # if suppressHydrogens: oeMol = oeioU.suppressHydrogens(oeMol) oeMol.SetTitle(queryId) return oeMol
def testDepictSMILES(self): """Test case - create depiction from SMILES descriptor.""" try: imagePath = os.path.join(self.__workPath, "benzene-from-smi.svg") oeio = OeIoUtils() oeMol = oeio.smilesToMol("c1ccccc1") oed = OeDepict() oed.setMolTitleList([("benzene", oeMol, "Title for benzene")]) oed.setDisplayOptions(labelAtomName=False, labelAtomCIPStereo=True, labelBondCIPStereo=True, labelAtomIndex=False, labelBondIndex=False, bondDisplayWidth=1.0) oed.setGridOptions(rows=1, cols=1) oed.prepare() oed.write(imagePath) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testSubStructureSearchScreenedFiltered(self): myKwargs = { "cachePath": self.__cachePath, "useCache": True, "fpTypeList": self.__fpTypeList, "ccFileNamePrefix": "cc-filtered", "oeFileNamePrefix": "oe-filtered", "molBuildType": "oe-iso-smiles", "limitPerceptions": False, } oeioU = OeIoUtils() oemp = OeMoleculeProvider(**myKwargs) ok = oemp.testCache() ccmP = ChemCompIndexProvider(**myKwargs) ccIdxD = ccmP.getIndex() ok = ccmP.testCache(minCount=self.__minCount) self.assertTrue(ok) oesU = OeSearchUtils(oemp, screenType=self.__screenType, numProc=self.__numProc) numMols = 5000 missL = [] for ccId, ccD in list(ccIdxD.items())[:numMols]: # ---- startTime = time.time() if "oe-smiles" not in ccD: continue logger.info("Search %s %r", ccId, ccD["oe-smiles"]) oeQMol = oeioU.smartsToQmol(ccD["oe-smiles"]) retStatus, mL = oesU.searchSubStructureScreened(oeQMol, maxMatches=100) logger.info("%s (status=%r)match length %d in (%.4f seconds)", ccId, retStatus, len(mL), time.time() - startTime) if not self.__resultContains(ccId, mL): missL.append(ccId) # self.assertGreaterEqual(len(mL), 1) # ---- logger.info("Missed searches (%d) %r", len(missL), missL)
def testDepictOneSDF(self): """Test case - get, read, build OE molecule from SDF file, and depict the molecule.""" try: imagePath = os.path.join(self.__workPath, "benzene-from-smi.svg") sdfPath = os.path.join(self.__dataPath, "ATP.sdf") oeio = OeIoUtils() oeMolL = oeio.fileToMols(sdfPath) # oed = OeDepict() oed.setMolTitleList([("ATP", oeMolL[0], "Title for ATP")]) oed.setDisplayOptions(labelAtomName=True, labelAtomCIPStereo=True, labelBondCIPStereo=True, labelAtomIndex=False, labelBondIndex=False, bondDisplayWidth=0.5) oed.setGridOptions(rows=1, cols=1) oed.prepare() oed.write(imagePath) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def depictMolecule(self, identifier, identifierType, imagePath=None, **kwargs): """Create depiction from InChI, SMILES descriptors or PDB identifier.""" try: imagePath = imagePath if imagePath else self.__makeImagePath() oeio = OeIoUtils() if identifierType.lower() in ["smiles"]: oeMol = oeio.smilesToMol(identifier) elif identifierType.lower() in ["inchi"]: oeMol = oeio.inchiToMol(identifier) elif identifierType.lower() in ["identifierpdb"]: ccsw = ChemCompSearchWrapper() oesmP = ccsw.getSearchMoleculeProvider() oeMol = oesmP.getMol(identifier) # ok = self.__depictOne(oeMol, imagePath, **kwargs) return imagePath if ok else None except Exception as e: logger.exception("Failing with %s", str(e)) return None
def __displayAlignedDescriptorPair(self, ccId, descrRef, buildTypeRef, descrFit, buildTypeFit, title=None, limitPerceptions=True): oeioU = OeIoUtils() oeMolRef = oeioU.descriptorToMol(descrRef, buildTypeRef, limitPerceptions=limitPerceptions, messageTag=ccId + ":" + buildTypeRef) oeMolFit = oeioU.descriptorToMol(descrFit, buildTypeFit, limitPerceptions=limitPerceptions, messageTag=ccId + ":" + buildTypeFit) # oed = OeDepictMCSAlignPage() oed.setSearchType(sType="graph-relaxed", minAtomMatchFraction=0.50) oed.setDisplayOptions(labelAtomName=True, labelAtomCIPStereo=True, labelAtomIndex=False, labelBondIndex=False, highlightStyleFit="ballAndStickInverse", bondDisplayWidth=0.5) oed.setRefMol(oeMolRef, ccId) oed.setFitMol(oeMolFit, ccId) myTitle = title if title else buildTypeRef + "-" + buildTypeFit imgPath = os.path.join(self.__workPath, myTitle + "-" + ccId + ".svg") logger.info("Using image path %r", imgPath) aML = oed.alignPair(imagePath=imgPath) if aML: logger.info("%s aligned image path %r", ccId, imgPath) for (rCC, rAt, tCC, tAt) in aML: logger.debug("%5s %-5s %5s %-5s", rCC, rAt, tCC, tAt)
def __exhaustiveSubStructureSearch(self, numMols, **kwargs): """Exhaustive substructure search.""" try: limitPerceptions = kwargs.get("limitPerceptions", False) buildTypeList = kwargs.get("buildTypeList", ["oe-iso-smiles"]) oesmP, ccIdxD = self.__getSearchDataProviders(**kwargs) oesU = OeSearchUtils(oesmP, fpTypeList=[]) oeioU = OeIoUtils() # for ccId, ccD in list(ccIdxD.items())[:numMols]: for buildType in buildTypeList: if buildType in ccD: oeMol = oeioU.descriptorToMol( ccD[buildType], buildType, limitPerceptions=limitPerceptions, messageTag=ccId + ":" + buildType) if not oeMol: continue # ---- startTime = time.time() retStatus, mL = oesU.searchSubStructure( oeMol, matchOpts="graph-strict") if not self.__resultContains(ccId, mL): logger.info( "%s match length %d build type %s in (%.4f seconds)", ccId, len(mL), buildType, time.time() - startTime) self.assertTrue(retStatus) self.assertTrue(self.__resultContains(ccId, mL)) # ---- return True except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() return False
def testIoOps(self): """Test IO operation on generated related molecules""" try: oeIoU = OeIoUtils() mU = MarshalUtil() mU.mkdir(self.__molfileDirPath) ccMolD = self.__getChemCompDefs() oemf = OeMoleculeFactory() for ccId, ccObj in list(ccMolD.items())[:10]: # ---- tId = oemf.setChemCompDef(ccObj) self.assertEqual(tId, ccId) relatedIdxD = oemf.buildRelated(limitPerceptions=False) logger.info("%s generated %d molecular forms", ccId, len(relatedIdxD)) for sId, idxD in relatedIdxD.items(): logger.info("sId %r smiles %r", sId, idxD["smiles"]) mol2Path = os.path.join(self.__molfileDirPath, sId + ".mol2") oeMol = oeIoU.descriptorToMol(idxD["smiles"], "oe-iso-smiles", limitPerceptions=False, messageTag=None) oeIoU.write(mol2Path, oeMol, constantMol=True, addSdTags=True) sdfPath = os.path.join(self.__molfileDirPath, sId + ".mol") oeMol = oeIoU.descriptorToMol(idxD["smiles"], "oe-iso-smiles", limitPerceptions=False, messageTag=None) oeIoU.write(sdfPath, oeMol, constantMol=True, addSdTags=True) # ---- except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def makeFiles(self, fmt="sdf"): """ Create files (mol, mol2) for all public chemical components. """ try: if fmt not in ["mol", "mol2", "mol2h", "sdf"]: return False if not self.__setLicense(self.__licensePath): logger.error("Invalid license details - exiting") return False for ccId, oeMol in self.__oeMolD.items(): if self.__molBuildType == "ideal-xyz": filePath = os.path.join(self.__fileDirPath, fmt, ccId[0], ccId + "_ideal." + fmt) oeioU = OeIoUtils() oeioU.write(filePath, oeMol, constantMol=True) else: filePath = os.path.join(self.__fileDirPath, fmt, ccId[0], ccId + "_model." + fmt) oeioU = OeIoUtils() oeioU.write(filePath, oeMol, constantMol=True) return True except Exception as e: logger.exception("Failing with %s", str(e)) return False
def alignMoleculePair(self, refIdentifier, refIdentifierType, fitIdentifier, fitIdentifierType, imagePath=None, **kwargs): """Create aligned depiction for a target molecule InChI, SMILES descriptors or PDB identifier.""" try: imagePath = imagePath if imagePath else self.__makeImagePath() oeio = OeIoUtils() ccsw = ChemCompSearchWrapper() oesmP = ccsw.getSearchMoleculeProvider() # --- if refIdentifierType.lower() in ["smiles"]: oeMolRef = oeio.smilesToMol(refIdentifier) elif refIdentifierType.lower() in ["inchi"]: oeMolRef = oeio.inchiToMol(refIdentifier) elif refIdentifierType.lower() in ["identifierpdb"]: oeMolRef = oesmP.getMol(refIdentifier) # if fitIdentifierType.lower() in ["smiles"]: oeMolFit = oeio.smilesToMol(fitIdentifier) elif fitIdentifierType.lower() in ["inchi"]: oeMolFit = oeio.inchiToMol(fitIdentifier) elif fitIdentifierType.lower() in ["identifierpdb"]: oeMolFit = oesmP.getMol(fitIdentifier) # --- logger.info("oeMolRef atoms %r", oeMolRef.NumAtoms()) logger.info("oeMolFit atoms %r", oeMolFit.NumAtoms()) displayIdRef = "Ref" displayIdFit = "Fit" ok = self.__depictAlignedPair(oeMolRef, displayIdRef, oeMolFit, displayIdFit, imagePath, **kwargs) return imagePath if ok else None except Exception as e: logger.exception("Failing with %s", str(e)) return None
def __sssWithFingerPrintFromDescriptor(self, numMols, **kwargs): maxFpResults = kwargs.get("maxResults", 50) limitPerceptions = kwargs.get("limitPerceptions", False) fpTypeCuttoffList = kwargs.get("fpTypeCuttoffList", [("TREE", 0.6)]) buildTypeList = kwargs.get("buildTypeList", ["oe-iso-smiles"]) doDisplay = kwargs.get("doDisplay", False) # oesmP, ccIdxD = self.__getSearchDataProviders(**kwargs) oesU = OeSearchUtils(oesmP, fpTypeList=[tup[0] for tup in fpTypeCuttoffList]) oeioU = OeIoUtils() # This will reload the oe binary cache. oeMol = oesmP.getMol("004") self.assertGreaterEqual(len(list(oeMol.GetAtoms())), 12) # matchOpts = "graph-relaxed" matchOpts = "graph-strict" missTupL = [] missedD = {} missedFpD = {} numMols = min(len(ccIdxD), numMols) if numMols else len(ccIdxD) logger.info( "Begin substructure search w/ finger print filter on %d molecules", numMols) # ---- startTime = time.time() for ( ii, ccId, ) in enumerate(list(ccIdxD.keys())[:numMols]): ccD = ccIdxD[ccId] for buildType in buildTypeList: if buildType in ccD: startTime1 = time.time() oeMol = oeioU.descriptorToMol( ccD[buildType], buildType, limitPerceptions=limitPerceptions, messageTag=ccId + ":" + buildType) if not oeMol: logger.debug("%s build failed for %s - skipping", ccId, buildType) continue maxHits = 0 minHits = maxFpResults selfHit = False for fpType, minFpScore in fpTypeCuttoffList: retStatus, mL = oesU.searchSubStructureWithFingerPrint( oeMol, fpType, minFpScore, maxFpResults, matchOpts=matchOpts) self.assertTrue(retStatus) logger.debug("%s fpType %r hits %d", ccId, fpType, len(mL)) maxHits = max(maxHits, len(mL)) minHits = min(minHits, len(mL)) matchedSelf = self.__resultContains(ccId, mL) selfHit = selfHit or matchedSelf if not matchedSelf: missedFpD.setdefault(ccId, []).append( (buildType, fpType, len(mL))) if not selfHit: missedD.setdefault(ccId, []).append(buildType) if maxHits < 1 or not selfHit: logger.info( "%s (%r) buildType %r min hits %d max hits %d (%.4f seconds)", ccId, selfHit, buildType, minHits, maxHits, time.time() - startTime1) else: logger.debug("%s missing descriptor %r", ccId, buildType) if ii % 100 == 0: logger.info("Completed %d of %d missed count %d", ii, numMols, len(missedD)) # for ccId, missL in missedD.items(): logger.info("%s missed list %r", ccId, missL) if ccId in missedFpD: logger.info("%s unmatched for fpTypes %r", ccId, missedFpD[ccId]) # ---- if doDisplay: mD = {} for missTup in missTupL: mD.setdefault(missTup[0], []).append(missTup[1]) for ccId, buildTypeL in mD.items(): idxD = ccIdxD[ccId] if "oe-iso-smiles" in idxD: for buildType in buildTypeL: self.__displayAlignedDescriptorPair( ccId, idxD["oe-iso-smiles"], "oe-iso-smiles", idxD[buildType], buildType, title=None, limitPerceptions=True) logger.info("%s fingerprints search on %d in (%.4f seconds)", len(fpTypeCuttoffList), numMols, time.time() - startTime) return True
def buildSearchFiles(self, **kwargs): """Build cif, sdf (optional), and mol2 files for components in the chemical component search index. Exclude ions or other extraneous molecules lacking bonds. Args: ccUrlTarget (str): locator for source chemical component dictionary (default: full public dictionary) birdUrlTarget (str): locator for source BIRD dictionary (default: full public dictionary) limitPerceptions (bool): restrict automatic perceptions in OE molecular build operations (default: False) numProc (int): number of processors useCache (bool): use existing resource file where possible (default: True) molLimit (str): limit the number to ingested chemical compont (default: None) quietFlag (bool): suppress output in OE library operations (default: True) Returns: (int): number molfiles generated """ cachePath = self.__cachePath ccUrlTarget = kwargs.get("ccUrlTarget", None) birdUrlTarget = kwargs.get("birdUrlTarget", None) molLimit = kwargs.get("molLimit", None) quietFlag = kwargs.get("quietFlag", True) fpTypeList = kwargs.get("fpTypeList", []) screenTypeList = kwargs.get("screenTypeList", []) ccFileNamePrefix = "cc-%s" % self.__prefix if self.__prefix else "cc-full" oeFileNamePrefix = "oe-%s" % self.__prefix if self.__prefix else "oe-cc-full" numProc = kwargs.get("numProc", 2) minCount = kwargs.get("minCount", 0) useCache = kwargs.get("useCache", True) useSdf = kwargs.get("useSdf", True) useMol2 = kwargs.get("useMol2", False) limitPerceptions = kwargs.get("limitPerceptions", False) logSizes = False # startTime = time.time() ccmP = ChemCompMoleculeProvider(cachePath=cachePath, useCache=useCache, ccFileNamePrefix=ccFileNamePrefix, ccUrlTarget=ccUrlTarget, birdUrlTarget=birdUrlTarget, molLimit=molLimit) ok = ccmP.testCache(minCount=minCount, logSizes=logSizes) logger.info( "Completed chemical component provider load %r (%.4f seconds)", ok, time.time() - startTime) # startTime = time.time() oesmp = OeSearchMoleculeProvider( ccUrlTarget=ccUrlTarget, birdUrlTarget=birdUrlTarget, cachePath=cachePath, ccFileNamePrefix=ccFileNamePrefix, oeFileNamePrefix=oeFileNamePrefix, useCache=useCache, quietFlag=quietFlag, fpTypeList=fpTypeList, screenTypeList=screenTypeList, numProc=numProc, molLimit=molLimit, limitPerceptions=limitPerceptions, ) ok = oesmp.testCache() logger.info("Completed OE molecule provider load %r (%.4f seconds)", ok, time.time() - startTime) # startTime = time.time() ccSIdxP = ChemCompSearchIndexProvider( cachePath=cachePath, useCache=useCache, ccFileNamePrefix=ccFileNamePrefix, limitPerceptions=limitPerceptions, numProc=numProc) ok = ccSIdxP.testCache() logger.info( "Completed chemical component search index load %r (%.4f seconds)", ok, time.time() - startTime) # ccSIdx = ccSIdxP.getIndex() if ccSIdxP and ok else {} logger.info("Search index status %r index length %d", ok, len(ccSIdx)) # ccIdD = {} mU = MarshalUtil() oeU = OeIoUtils(dirPath=cachePath) numMols = 0 searchFileDirPath = self.getSearchDirFilePath() pathTupList = [] for sId in ccSIdx: ccId = sId.split("|")[0] # standard CIF definition if ccId not in ccIdD: cifPath = os.path.join(searchFileDirPath, ccId[0], ccId, ccId + ".cif") if not (useCache and mU.exists(cifPath)): ccMol = ccmP.getMol(ccId) if not self.__checkCif(ccMol): continue mU.doExport(cifPath, [ccMol], fmt="mmcif") # oeMol = oesmp.getMol(sId) if not self.__checkOeMol(oeMol): continue # # Sanity checks on the generated OE molecule # cifPath = os.path.join(searchFileDirPath, ccId[0], ccId, sId + ".cif") if sId != ccId and not (useCache and mU.exists(cifPath)): oeccU = OeChemCompUtils() ok = oeccU.addOeMol(sId, oeMol, missingModelXyz=True, writeIdealXyz=False) if ok: oeccU.write(cifPath) if useSdf: molFilePath = os.path.join(searchFileDirPath, ccId[0], ccId, sId + ".sdf") if not (useCache and mU.exists(molFilePath)): ok = oeU.write(molFilePath, oeMol, constantMol=False, addSdTags=True) if ok: pathTupList.append((sId, molFilePath, "sdf")) # if useMol2: mol2FilePath = os.path.join(searchFileDirPath, ccId[0], ccId, sId + ".mol2") if not (useCache and mU.exists(mol2FilePath)): oeU.write(mol2FilePath, oeMol, constantMol=False, addSdTags=True) if ok: pathTupList.append((sId, mol2FilePath, "mol2")) numMols += 1 # self.__storePathList(pathTupList) return numMols
def __fingerPrintScores(self, numMols, **kwargs): maxFpResults = kwargs.get("maxResults", 50) limitPerceptions = kwargs.get("limitPerceptions", True) fpTypeCuttoffList = kwargs.get("fpTypeCuttoffList", [("TREE", 0.6)]) buildTypeList = kwargs.get("buildTypeList", ["oe-iso-smiles"]) doDisplay = kwargs.get("doDisplay", False) failedIdList = kwargs.get("failedIdList", []) # oesmP, ccIdxD = self.__getSearchDataProviders(**kwargs) oesU = OeSearchUtils(oesmP, fpTypeList=[tup[0] for tup in fpTypeCuttoffList]) oeioU = OeIoUtils() # This will reload the oe binary cache. oeMol = oesmP.getMol("004") self.assertGreaterEqual(len(list(oeMol.GetAtoms())), 12) # missedFpD = {} missedBuildD = {} numMols = min(len(ccIdxD), numMols) if numMols else len(ccIdxD) logger.info("Begin finger print score search on %d molecules", numMols) # ---- startTime = time.time() # for ccId, ccD in list(ccIdxD.items())[:numMols]: for ii, ccId in enumerate(failedIdList[:numMols]): ccD = ccIdxD[ccId] for buildType in buildTypeList: if buildType in ccD: oeMol = oeioU.descriptorToMol( ccD[buildType], buildType, limitPerceptions=limitPerceptions, messageTag=ccId + ":" + buildType) if not oeMol: logger.debug("%s build failed for %s - skipping", ccId, buildType) continue maxHits = 0 minHits = maxFpResults selfHit = False # startTime1 = time.time() for fpType, minFpScore in fpTypeCuttoffList: retStatus, mL = oesU.getFingerPrintScores( oeMol, fpType, minFpScore, maxFpResults) self.assertTrue(retStatus) logger.debug("%s fpType %r hits %d", ccId, fpType, len(mL)) maxHits = max(maxHits, len(mL)) minHits = min(minHits, len(mL)) matchedSelf = self.__resultContains(ccId, mL) selfHit = selfHit or matchedSelf if not matchedSelf: missedFpD.setdefault(ccId, []).append( (buildType, fpType, len(mL))) # if not selfHit: missedBuildD.setdefault(ccId, []).append(buildType) # if maxHits < 1 or not selfHit: logger.info( "%s MISSED for buildType %r min hits %d max hits %d (%.4f seconds)", ccId, buildType, minHits, maxHits, time.time() - startTime1) else: logger.debug( "%s MATCHED for buildType %r min hits %d max hits %d (%.4f seconds)", ccId, buildType, minHits, maxHits, time.time() - startTime1) else: logger.debug("%s missing descriptor %r", ccId, buildType) if ii % 100 == 0: logger.info( "Completed %d of %d missed count %d in (%.4f seconds)", ii, len(failedIdList), len(missedBuildD), time.time() - startTime) # ------ for ccId, bTL in missedBuildD.items(): logger.info("%s missed all fptypes: buildtype list %r", ccId, bTL) if ccId in missedFpD: logger.info("%s unmatched by fpTypes %r", ccId, missedFpD[ccId]) # if doDisplay: for ccId, bTL in missedBuildD.items(): idxD = ccIdxD[ccId] if "oe-iso-smiles" in idxD: for bT in bTL: self.__displayAlignedDescriptorPair( ccId, idxD["oe-iso-smiles"], "oe-iso-smiles", idxD[bT], bT, title=None, limitPerceptions=True) logger.info("%s fingerprints search on %d in (%.4f seconds)", len(fpTypeCuttoffList), numMols, time.time() - startTime)
def __reload(self, **kwargs): """Reload the dictionary of OE molecules and related data artifacts for chemical component definitions. Args: molBuildType (str): coordinates to use in building OE molecules from CIF components (model, ideal or None) limitPerceptions(bool): process input descriptors in essentially verbatim mode (default: True) fpTypeList (list): fingerprint type (TREE,PATH,MACCS,CIRCULAR,LINGO) screenTypeList (list): fast sub search screen type (MOLECULE, SMARTS, MDL, ... ) useCache (bool, optional): flag to use cached files. Defaults to True. cachePath (str): path to the top cache directory. Defaults to '.'. numProc (int): number processors to engage in screen substructure search database generation. molLimit (int, optional): limiting number of molecules in data store (default: 0 no limit) suppressHydrogens (bool, optional): flag to suppress explicit hydrogens in the OE data store. Returns: (dict): dictionary of constructed OE molecules """ useCache = kwargs.get("useCache", True) cachePath = kwargs.get("cachePath", ".") numProc = kwargs.get("numProc", 2) molLimit = kwargs.get("molLimit", 0) fpTypeList = kwargs.get("fpTypeList", ["TREE", "PATH", "MACCS", "CIRCULAR", "LINGO"]) # screenTypeList = kwargs.get("screenTypeList", ["SMARTS"]) screenTypeList = kwargs.get("screenTypeList", []) molBuildType = kwargs.get("molBuildType", "model-xyz") limitPerceptions = kwargs.get("limitPerceptions", False) quietFlag = kwargs.get("quietFlag", True) suppressHydrogens = kwargs.get("suppressHydrogens", False) logSizes = kwargs.get("logSizes", False) fpDbType = "STANDARD" # ccCount = 0 oeCount = 0 errCount = 0 failIdList = [] oeIo = OeIoUtils(quietFlag=quietFlag) # -------- oeMolFilePath = os.path.join(self.__dirPath, self.__getOeMolFileName()) if not useCache or (useCache and not self.__mU.exists(oeMolFilePath)): cmpKwargs = { k: v for k, v in kwargs.items() if k not in ["cachePath", "useCache", "molLimit"] } ccmP = ChemCompMoleculeProvider(cachePath=cachePath, useCache=True, molLimit=molLimit, **cmpKwargs) ok = ccmP.testCache(minCount=molLimit, logSizes=logSizes) ccObjD = ccmP.getMolD() if ok else {} ccCount = len(ccObjD) # ------- startTime = time.time() oeCount, errCount, failIdList = oeIo.buildOeBinaryMolCache( oeMolFilePath, ccObjD, molBuildType=molBuildType, quietFlag=quietFlag, fpTypeList=fpTypeList, limitPerceptions=limitPerceptions, suppressHydrogens=suppressHydrogens) logger.info( "Stored %d/%d OeMols (suppressH = %r) created with molBuildType %r (unconverted %d)", oeCount, ccCount, suppressHydrogens, molBuildType, errCount) if failIdList: logger.info("%r failures %r", molBuildType, failIdList) endTime = time.time() logger.info("Constructed %d/%d cached oeMols (%.4f seconds)", oeCount, ccCount, endTime - startTime) # -------- oeMolDbFilePath = os.path.join(self.__dirPath, self.__getOeMolDbFileName()) if not useCache or (useCache and not self.__mU.exists(oeMolDbFilePath)): startTime = time.time() molCount = oeIo.createOeBinaryDatabaseAndIndex( oeMolFilePath, oeMolDbFilePath) endTime = time.time() logger.info( "Created and stored %d indexed OeMols in OE database format (%.4f seconds)", molCount, endTime - startTime) # -------- if fpDbType == "FAST": for fpType in fpTypeList: startTime = time.time() # Fast FP search database file names fpPath = os.path.join(self.__dirPath, self.__getFastFpDbFileName(fpType)) if not useCache or (useCache and not self.__mU.exists(fpPath)): ok = oeIo.createOeFingerPrintDatabase(oeMolDbFilePath, fpPath, fpType=fpType) endTime = time.time() logger.info( "Created and stored %s fingerprint database (%.4f seconds)", fpType, endTime - startTime) # -------- if molBuildType in ["oe-iso-smiles"]: for screenType in screenTypeList: startTime = time.time() fp = os.path.join(self.__dirPath, self.__getSubSearchFileName(screenType)) if not useCache or (useCache and not self.__mU.exists(fp)): ok = oeIo.createOeSubSearchDatabase(oeMolFilePath, fp, screenType=screenType, numProc=numProc) endTime = time.time() logger.info( "Constructed screened substructure database (status %r) with screenType %s (%.4f seconds)", ok, screenType, endTime - startTime) # --------- ssDb = oeIo.loadOeSubSearchDatabase(fp, screenType=screenType, numProc=numProc) ok = ssDb.NumMolecules() == oeCount # ---------- return oeCount
def __reload(self, **kwargs): """Reload the dictionary of OE molecules and related data artifacts for chemical component definitions. Args: limitPerceptions(bool): process input descriptors in essentially verbatim mode (default: True) fpTypeList (list): fingerprint type (TREE,PATH,MACCS,CIRCULAR,LINGO) screenTypeList (list): fast sub search screen type (MOLECULE, SMARTS, MDL, ... ) useCache (bool, optional): flag to use cached files. Defaults to True. cachePath (str): path to the top cache directory. Defaults to '.'. numProc (int): number processors to engage in screen substructure search database generation. suppressHydrogens (bool, optional): flag to suppress explicit hydrogens in the OE data store. molLimit (int): Returns: (bool) : True for success or False othewise """ try: useCache = kwargs.get("useCache", True) cachePath = kwargs.get("cachePath", ".") numProc = kwargs.get("numProc", 2) molLimit = kwargs.get("molLimit", None) fpTypeList = kwargs.get( "fpTypeList", ["TREE", "PATH", "MACCS", "CIRCULAR", "LINGO"]) # screenTypeList = kwargs.get("screenTypeList", ["SMARTS"]) screenTypeList = kwargs.get("screenTypeList", None) limitPerceptions = kwargs.get("limitPerceptions", False) suppressHydrogens = kwargs.get("suppressHydrogens", False) quietFlag = kwargs.get("quietFlag", True) logSizes = kwargs.get("logSizes", False) fpDbType = "STANDARD" buildScreenedDb = True # oeCount = 0 errCount = 0 failIdList = [] oeIo = OeIoUtils(quietFlag=quietFlag) # -------- oeSearchMolFilePath = os.path.join(self.__dirPath, self.__getOeSearchMolFileName()) if not useCache or (useCache and not self.__mU.exists(oeSearchMolFilePath)): cmpKwargs = { k: v for k, v in kwargs.items() if k not in ["cachePath", "useCache", "molLimit"] } ccsiP = ChemCompSearchIndexProvider(cachePath=cachePath, useCache=True, molLimit=molLimit, **cmpKwargs) ok = ccsiP.testCache(minCount=molLimit, logSizes=logSizes) # ---- ccIdxD = ccsiP.getIndex() if ok else {} idxCount = len(ccIdxD) # ------- JDW OE mol construction here ----- startTime = time.time() oeCount, errCount, failIdList = oeIo.buildOeBinaryMolCacheFromIndex( oeSearchMolFilePath, ccIdxD, quietFlag=quietFlag, fpTypeList=fpTypeList, limitPerceptions=limitPerceptions, suppressHydrogens=suppressHydrogens) if failIdList: logger.info("failures %r", failIdList) endTime = time.time() logger.info( "Constructed %d/%d cached oeMols (unconverted %d) (%.4f seconds)", oeCount, idxCount, errCount, endTime - startTime) # -------- oeMolDbFilePath = os.path.join(self.__dirPath, self.__getOeMolDbFileName()) if not useCache or (useCache and not self.__mU.exists(oeMolDbFilePath)): startTime = time.time() molCount = oeIo.createOeBinaryDatabaseAndIndex( oeSearchMolFilePath, oeMolDbFilePath) endTime = time.time() logger.info( "Created and stored %d indexed oeMols in OE database format (%.4f seconds)", molCount, endTime - startTime) # -------- if fpDbType == "FAST": for fpType in fpTypeList: startTime = time.time() # Fast FP search database file names fpPath = os.path.join(self.__dirPath, self.__getFastFpDbFileName(fpType)) if not useCache or (useCache and not self.__mU.exists(fpPath)): ok = oeIo.createOeFingerPrintDatabase(oeMolDbFilePath, fpPath, fpType=fpType) endTime = time.time() logger.info( "Created and stored %s fingerprint database (%.4f seconds)", fpType, endTime - startTime) # -------- if buildScreenedDb and screenTypeList: for screenType in screenTypeList: startTime = time.time() fp = os.path.join(self.__dirPath, self.__getSubSearchFileName(screenType)) if not useCache or (useCache and not self.__mU.exists(fp)): ok = oeIo.createOeSubSearchDatabase( oeSearchMolFilePath, fp, screenType=screenType, numProc=numProc) endTime = time.time() logger.info( "Constructed screened substructure database (status %r) with screenType %s (%.4f seconds)", ok, screenType, endTime - startTime) # --------- ssDb = oeIo.loadOeSubSearchDatabase( fp, screenType=screenType, numProc=numProc) ok = ssDb.NumMolecules() == oeCount # ---------- # return True except Exception as e: logger.exception("Failing with %s", str(e)) return False
def testSssWithFingerPrintFromDescriptor(self): oemp = OeMoleculeProvider(**self.__myKwargs) ok = oemp.testCache() ccmP = ChemCompIndexProvider(**self.__myKwargs) ccIdxD = ccmP.getIndex() ok = ccmP.testCache(minCount=self.__minCount) self.assertTrue(ok) limitPerceptions = False # minFpScore = 0.5 maxFpResults = 50 matchOpts = "graph-relaxed" numMols = 20 oeioU = OeIoUtils() oesU = OeSearchUtils(oemp, fpTypeList=self.__fpTypeList) missTupL = [] missedD = {} missedFpD = {} # ---- startTime = time.time() for ccId, ccD in list(ccIdxD.items())[:numMols]: for buildType in [ "oe-iso-smiles", "oe-smiles", "acdlabs-smiles", "cactvs-iso-smiles", "cactvs-smiles", "inchi" ]: if buildType in ccD: logger.debug("Search %s %r", ccId, ccD[buildType]) if buildType in ["inchi"]: oemf = OeMoleculeFactory() oemf.setDescriptor(ccD["inchi"], "inchi", ccId) ok = oemf.build(molBuildType="inchi", limitPerceptions=limitPerceptions) if not ok: logger.info("%s build failed with InChI %r", ccId, ccD["inchi"]) else: oeMol = oemf.getMol() if oemf.getInChI() != ccD["inchi"]: logger.info( "%s regenerated InChI differs\n%r\n%s", ccId, ccD["inchi"], oemf.getInChI()) else: oeMol = oeioU.smilesToMol( ccD[buildType], limitPerceptions=limitPerceptions) if not oeMol: continue maxHits = 0 minHits = maxFpResults selfHit = False for fpType, minFpScore in self.__fpTypeCuttoffList: retStatus, mL = oesU.searchSubStructureWithFingerPrint( oeMol, fpType, minFpScore, maxFpResults, matchOpts=matchOpts) self.assertTrue(retStatus) logger.debug("%s fpType %r hits %d", ccId, fpType, len(mL)) maxHits = max(maxHits, len(mL)) minHits = min(minHits, len(mL)) matchedSelf = self.__resultContains(ccId, mL) selfHit = selfHit or matchedSelf if not matchedSelf: missedFpD.setdefault(ccId, []).append( (buildType, fpType, len(mL))) if not selfHit: missedD.setdefault(ccId, []).append(buildType) logger.info("%s (%r) buildType %r min hits %d max hits %d", ccId, selfHit, buildType, minHits, maxHits) else: logger.info("%s missing descriptor %r", ccId, buildType) # for ccId, missL in missedD.items(): logger.info("%s missed list %r", ccId, missL) if ccId in missedFpD: logger.info("%s unmatched for fpTypes %r", ccId, missedFpD[ccId]) # ---- doDepict = False if doDepict: mD = {} for missTup in missTupL: mD.setdefault(missTup[0], []).append(missTup[1]) for ccId, buildTypeL in mD.items(): idxD = ccIdxD[ccId] if "oe-iso-smiles" in idxD: for buildType in buildTypeL: self.__displayAlignedDescriptorPair( ccId, idxD["oe-iso-smiles"], "oe-iso-smiles", idxD[buildType], buildType, title=None, limitPerceptions=True) logger.info("%s fingerprints search on %d in (%.4f seconds)", len(self.__fpTypeList), numMols, time.time() - startTime)
def __getMiscFile(self, filePath, suppressHydrogens=False, importType="2D", title=None, largestPart=False): """Fetch a miscellaneous chemical file (ccPath) and build OE molecules for comparison. """ try: oeioU = OeIoUtils() oeMolL = oeioU.fileToMols(filePath, use3D=importType == "3D", largestPart=largestPart) logger.info("Read (%d) from %s ", len(oeMolL), filePath) oeMol = oeMolL[0] ccId = title if title else oeMol.GetTitle() if title: oeMol.SetTitle(ccId) # oemf = OeMoleculeFactory() if not self.__verbose: oemf.setQuiet() oemf.setOeMol(oeMol, ccId) # fD = oemf.getOeMoleculeFeatures() if self.__verbose: logger.info(" Title = %s", title) logger.info(" Title OEMF = %s", oemf.getTitle()) logger.info(" SMILES = %s", oemf.getCanSMILES()) logger.info(" SMILES (stereo) = %s", oemf.getIsoSMILES()) logger.info(" Formula (Hill) = %s", oemf.getFormula()) logger.info(" InChI key = %s", oemf.getInChIKey()) logger.info(" InChI = %s", oemf.getInChI()) # ccId = oemf.getTitle() if suppressHydrogens: tMol = oemf.getGraphMolSuppressH() else: tMol = oemf.getMol() molXyzL = [] if importType == "3D": for atm in tMol.GetAtoms(): xyzL = oechem.OEFloatArray(3) tMol.GetCoords(atm, xyzL) molXyzL.append( ComponentAtomDetails( atIdx=atm.GetIdx(), atNo=atm.GetAtomicNum(), atName=atm.GetName(), atType=atm.GetType(), x=xyzL[0], y=xyzL[1], z=xyzL[2], atFormalCharge=atm.GetFormalCharge(), ) ) fD = {} fD = { "Formula": oemf.getFormula(), "SMILES": oemf.getCanSMILES(), "SMILES_STEREO": oemf.getIsoSMILES(), "InChI": oemf.getInChI(), "InChIKey": oemf.getInChIKey(), "xyz": molXyzL, } for atm in tMol.GetAtoms(): xyzL = oechem.OEFloatArray(3) tMol.GetCoords(atm, xyzL) if self.__verbose: logger.debug("atom %s %s %s %s %r", atm.GetIdx(), atm.GetAtomicNum(), atm.GetName(), atm.GetType(), xyzL) fD["OEMOL"] = tMol return (ccId, tMol, fD) except Exception as e: logger.exception("Failing with %s", str(e)) return None, None, None