def fetchMatchedDataMp(self, numProc=6, chunkSize=5, useCache=True): rcD = {} cD = self.__getSearchResults() idList = list(cD.keys()) # --- mpu = MultiProcUtil(verbose=True) mpu.setWorkingDir(self.__cachePath) mpu.setOptions(optionsD={ "resultPath": self.__cachePath, "cD": cD, "useCache": useCache }) mpu.set(workerObj=self, workerMethod="fetchDataWorker") ok, failList, resultList, _ = mpu.runMulti(dataList=idList, numProc=numProc, numResults=1, chunkSize=chunkSize) logger.info("Run ended with status %r success count %d failures %r", ok, len(resultList[0]), len(failList)) for rTup in resultList[0]: rcD[rTup[0]] = rTup[1] # --- ok = self.storeResultIndex(rcD) logger.info("Final match result (w/sdf and metadata) (%d/%d)", len(rcD), len(cD)) return True
def build(self, alignType="relaxed-stereo", numProc=4, chunkSize=10, verbose=False): """Run the model build step in the chemical component model workflow. Args: alignType (str): "relaxed"|"strict"| relaxed-stereo". Default: relaxed-stereo numProc (int, optional): number of processes to invoke. Defaults to 4. chunkSize (int, optional): work chunksize. Defaults to 10. verbose (bool, optional): verbose logging. Defaults to False. Returns: (dict): {searchId: [{"targetId": , "modelId": , "modelPath": ,"matchId": , "parentId": , "rFactor": , }] """ retD = {} try: ccms = ChemCompModelSearch(self.__cachePath, None, None, prefix=self.__prefix) modelDirPath = self.getModelDirFilePath() imageDirPath = self.getModelImageDirFilePath() # idxPathD = ccms.getResultIndex() idxPathL = list(idxPathD.values()) pD = {} for sId in idxPathD: parentId = sId.split("|")[0] pD.setdefault(parentId, []).append(sId) logger.info("Using search result index length ridxD (%d) parent coverage (%d)", len(idxPathD), len(pD)) # pU = ChemCompModelBuildWorker(self.__cachePath, verbose=verbose) mpu = MultiProcUtil(verbose=True) mpu.setWorkingDir(modelDirPath) mpu.setOptions(optionsD={"modelDirPath": modelDirPath, "imageDirPath": imageDirPath, "alignType": alignType, "ccSIdxP": self.__ccSIdxP}) # mpu.set(workerObj=pU, workerMethod="build") ok, failList, resultList, _ = mpu.runMulti(dataList=idxPathL, numProc=numProc, numResults=1, chunkSize=chunkSize) logger.info("Run ended with status %r success count %d failures %r", ok, len(resultList[0]), len(failList)) successList = copy.copy(resultList[0]) for tD in successList: retD.setdefault(tD["parentId"], []).append(tD) # if retD: logger.info("Completed build with models for %d parent chemical definitions", len(retD)) else: logger.info("No models built") ok = self.storeModelIndex(retD) except Exception as e: logger.exception("Failing with %s", str(e)) return retD
def runSearch(self, molFilePathList, resultPath, searchType="similarity", numProc=4, chunkSize=10, timeOut=120): """Run CCDC search in multiprocess mode. Args: molFilePathList (list): input mol2/sdf path list to search resultPath (str): directory path to store results searchType (str, optional): search type (substructure|similarity). Defaults to "similarity". numProc (int, optional): number of processes to invoke. Defaults to 4. chunkSize (int, optional): work chunksize. Defaults to 10. timeOut (int, optional): search timeout Defaults: 120 seconds. """ logger.info("Starting with molfile path list length %d", len(molFilePathList)) successList = [] try: pU = CcdcSearchExecWorker(verbose=self.__verbose) mpu = MultiProcUtil(verbose=True) mpu.setWorkingDir(resultPath) mpu.setOptions( optionsD={ "resultPath": resultPath, "searchType": searchType, "pythonRootPath": self.__pythonRootPath, "csdHome": self.__csdHome, "timeOut": timeOut }) # mpu.set(workerObj=pU, workerMethod="search") ok, failList, resultList, _ = mpu.runMulti( dataList=molFilePathList, numProc=numProc, numResults=1, chunkSize=chunkSize) logger.info( "Run ended with status %r success count %d failures %r", ok, len(resultList[0]), len(failList)) successList = copy.copy(resultList[0]) except Exception as e: logger.exception("Failing with %s", str(e)) return successList
def build(self, alignType="relaxed-stereo", numProc=4, chunkSize=10, verbose=False, doFigures=True): """Run the model build step in the chemical component model workflow. Args: alignType (str): "relaxed"|"strict"| relaxed-stereo". Default: relaxed-stereo numProc (int, optional): number of processes to invoke. Defaults to 4. chunkSize (int, optional): work chunksize. Defaults to 10. verbose (bool, optional): verbose logging. Defaults to False. Returns: (dict): {searchId: [{"targetId": , "modelId": , "modelPath": ,"matchId": , "parentId": , "rFactor": , }] """ retD = {} try: mU = MarshalUtil(workPath=self.__cachePath) ccms = CODModelSearch(self.__cachePath, prefix=self.__prefix) modelDirPath = self.getModelDirFilePath() imageDirPath = self.getModelImageDirFilePath() # tD = ccms.getResultIndex() # Make parent index --- idxIdD = {} for idxId, iDL in tD.items(): pId = idxId.split("|")[0] idxIdD.setdefault(pId, []).extend(iDL) # idxIdL = list(idxIdD.keys()) midxIdL = [] for pId in idxIdL: fp = os.path.join(modelDirPath, pId, "model-index.json") if mU.exists(fp): # Skip empty indices fst = os.stat(fp) if fst.st_size > 10: continue midxIdL.append(pId) # logger.info( "Starting COD model build using (%d) from a total of results length (%d)", len(midxIdL), len(idxIdD)) # cmbw = CODModelBuildWorker(self.__cachePath, verbose=verbose, timeOut=self.__timeOut) mpu = MultiProcUtil(verbose=True) mpu.setWorkingDir(modelDirPath) mpu.setOptions( optionsD={ "modelDirPath": modelDirPath, "imageDirPath": imageDirPath, "alignType": alignType, "ccSIdxP": self.__ccSIdxP, "idxIdD": idxIdD, "oesmP": self.__oesmP, "ccmP": self.__ccmP, "doFigures": doFigures, }) # mpu.set(workerObj=cmbw, workerMethod="build") ok, failList, resultList, _ = mpu.runMulti(dataList=midxIdL, numProc=numProc, numResults=1, chunkSize=chunkSize) logger.info( "Run ended with status %r success count %d failures %r", ok, len(resultList[0]), len(failList)) successList = copy.copy(resultList[0]) # if successList: logger.info("Completed build with %d models ", len(successList)) else: logger.info("No models built") # # Build full index - # logger.info("Building full model index") for pId in idxIdL: fp = os.path.join(modelDirPath, pId, "model-index.json") if mU.exists(fp): tDL = mU.doImport(fp, fmt="json") for tD in tDL: retD.setdefault(tD["parentId"], []).append(tD) # retD = dict(sorted(retD.items())) logger.info("Storing models for %d parent components", len(retD)) ok = self.storeModelIndex(retD) except Exception as e: logger.exception("Failing with %s", str(e)) return retD