def testRoundTripOps(self): """Test IO operation on generated related molecules""" try: oeIoU = OeIoUtils() mU = MarshalUtil() mU.mkdir(self.__molfileDirPath) ccMolD = self.__getChemCompDefs() oemf = OeMoleculeFactory() for ccId, ccObj in list(ccMolD.items())[:10]: # ---- tId = oemf.setChemCompDef(ccObj) self.assertEqual(tId, ccId) relatedIdxD = oemf.buildRelated(limitPerceptions=False) logger.info("%s generated %d molecular forms", ccId, len(relatedIdxD)) for sId, idxD in relatedIdxD.items(): logger.info("sId %r smiles %r", sId, idxD["smiles"]) mol2Path = os.path.join(self.__molfileDirPath, sId + ".mol2") oeMol = oeIoU.descriptorToMol(idxD["smiles"], "oe-iso-smiles", limitPerceptions=False, messageTag=None) oeIoU.write(mol2Path, oeMol, constantMol=True, addSdTags=True) tMolL = oeIoU.fileToMols(mol2Path) # nextMol2Path = os.path.join(self.__molfileDirPath, sId + "-next.mol2") oeIoU.write(nextMol2Path, tMolL[0], constantMol=True, addSdTags=True) sdfPath = os.path.join(self.__molfileDirPath, sId + ".mol") oeMol = oeIoU.descriptorToMol(idxD["smiles"], "oe-iso-smiles", limitPerceptions=False, messageTag=None) oeIoU.write(sdfPath, oeMol, constantMol=True, addSdTags=True) # tMolL = oeIoU.fileToMols(sdfPath) nextSdfPath = os.path.join(self.__molfileDirPath, sId + "-next.sdf") oeIoU.write(nextSdfPath, tMolL[0], constantMol=True, addSdTags=True) # ---- except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def __toMolFile(self, oeMol, molfilePath, **kwargs): """Write the Args: oeMol (object): instance of an OE graph molecule molfilePath (string): file path for molfile (type determined by extension) Returns: bool: True for success or False otherwise """ try: _ = kwargs oeio = OeIoUtils() oeio.write(molfilePath, oeMol, constantMol=True) return True except Exception as e: logger.exception("Failing with %s", str(e)) return False
def makeFiles(self, fmt="sdf"): """ Create files (mol, mol2) for all public chemical components. """ try: if fmt not in ["mol", "mol2", "mol2h", "sdf"]: return False if not self.__setLicense(self.__licensePath): logger.error("Invalid license details - exiting") return False for ccId, oeMol in self.__oeMolD.items(): if self.__molBuildType == "ideal-xyz": filePath = os.path.join(self.__fileDirPath, fmt, ccId[0], ccId + "_ideal." + fmt) oeioU = OeIoUtils() oeioU.write(filePath, oeMol, constantMol=True) else: filePath = os.path.join(self.__fileDirPath, fmt, ccId[0], ccId + "_model." + fmt) oeioU = OeIoUtils() oeioU.write(filePath, oeMol, constantMol=True) return True except Exception as e: logger.exception("Failing with %s", str(e)) return False
def buildSearchFiles(self, **kwargs): """Build cif, sdf (optional), and mol2 files for components in the chemical component search index. Exclude ions or other extraneous molecules lacking bonds. Args: ccUrlTarget (str): locator for source chemical component dictionary (default: full public dictionary) birdUrlTarget (str): locator for source BIRD dictionary (default: full public dictionary) limitPerceptions (bool): restrict automatic perceptions in OE molecular build operations (default: False) numProc (int): number of processors useCache (bool): use existing resource file where possible (default: True) molLimit (str): limit the number to ingested chemical compont (default: None) quietFlag (bool): suppress output in OE library operations (default: True) Returns: (int): number molfiles generated """ cachePath = self.__cachePath ccUrlTarget = kwargs.get("ccUrlTarget", None) birdUrlTarget = kwargs.get("birdUrlTarget", None) molLimit = kwargs.get("molLimit", None) quietFlag = kwargs.get("quietFlag", True) fpTypeList = kwargs.get("fpTypeList", []) screenTypeList = kwargs.get("screenTypeList", []) ccFileNamePrefix = "cc-%s" % self.__prefix if self.__prefix else "cc-full" oeFileNamePrefix = "oe-%s" % self.__prefix if self.__prefix else "oe-cc-full" numProc = kwargs.get("numProc", 2) minCount = kwargs.get("minCount", 0) useCache = kwargs.get("useCache", True) useSdf = kwargs.get("useSdf", True) useMol2 = kwargs.get("useMol2", False) limitPerceptions = kwargs.get("limitPerceptions", False) logSizes = False # startTime = time.time() ccmP = ChemCompMoleculeProvider(cachePath=cachePath, useCache=useCache, ccFileNamePrefix=ccFileNamePrefix, ccUrlTarget=ccUrlTarget, birdUrlTarget=birdUrlTarget, molLimit=molLimit) ok = ccmP.testCache(minCount=minCount, logSizes=logSizes) logger.info( "Completed chemical component provider load %r (%.4f seconds)", ok, time.time() - startTime) # startTime = time.time() oesmp = OeSearchMoleculeProvider( ccUrlTarget=ccUrlTarget, birdUrlTarget=birdUrlTarget, cachePath=cachePath, ccFileNamePrefix=ccFileNamePrefix, oeFileNamePrefix=oeFileNamePrefix, useCache=useCache, quietFlag=quietFlag, fpTypeList=fpTypeList, screenTypeList=screenTypeList, numProc=numProc, molLimit=molLimit, limitPerceptions=limitPerceptions, ) ok = oesmp.testCache() logger.info("Completed OE molecule provider load %r (%.4f seconds)", ok, time.time() - startTime) # startTime = time.time() ccSIdxP = ChemCompSearchIndexProvider( cachePath=cachePath, useCache=useCache, ccFileNamePrefix=ccFileNamePrefix, limitPerceptions=limitPerceptions, numProc=numProc) ok = ccSIdxP.testCache() logger.info( "Completed chemical component search index load %r (%.4f seconds)", ok, time.time() - startTime) # ccSIdx = ccSIdxP.getIndex() if ccSIdxP and ok else {} logger.info("Search index status %r index length %d", ok, len(ccSIdx)) # ccIdD = {} mU = MarshalUtil() oeU = OeIoUtils(dirPath=cachePath) numMols = 0 searchFileDirPath = self.getSearchDirFilePath() pathTupList = [] for sId in ccSIdx: ccId = sId.split("|")[0] # standard CIF definition if ccId not in ccIdD: cifPath = os.path.join(searchFileDirPath, ccId[0], ccId, ccId + ".cif") if not (useCache and mU.exists(cifPath)): ccMol = ccmP.getMol(ccId) if not self.__checkCif(ccMol): continue mU.doExport(cifPath, [ccMol], fmt="mmcif") # oeMol = oesmp.getMol(sId) if not self.__checkOeMol(oeMol): continue # # Sanity checks on the generated OE molecule # cifPath = os.path.join(searchFileDirPath, ccId[0], ccId, sId + ".cif") if sId != ccId and not (useCache and mU.exists(cifPath)): oeccU = OeChemCompUtils() ok = oeccU.addOeMol(sId, oeMol, missingModelXyz=True, writeIdealXyz=False) if ok: oeccU.write(cifPath) if useSdf: molFilePath = os.path.join(searchFileDirPath, ccId[0], ccId, sId + ".sdf") if not (useCache and mU.exists(molFilePath)): ok = oeU.write(molFilePath, oeMol, constantMol=False, addSdTags=True) if ok: pathTupList.append((sId, molFilePath, "sdf")) # if useMol2: mol2FilePath = os.path.join(searchFileDirPath, ccId[0], ccId, sId + ".mol2") if not (useCache and mU.exists(mol2FilePath)): oeU.write(mol2FilePath, oeMol, constantMol=False, addSdTags=True) if ok: pathTupList.append((sId, mol2FilePath, "mol2")) numMols += 1 # self.__storePathList(pathTupList) return numMols