Ejemplo n.º 1
0
    def fetchMatchedDataMp(self, numProc=6, chunkSize=5, useCache=True):
        rcD = {}
        cD = self.__getSearchResults()
        idList = list(cD.keys())
        # ---
        mpu = MultiProcUtil(verbose=True)
        mpu.setWorkingDir(self.__cachePath)
        mpu.setOptions(optionsD={
            "resultPath": self.__cachePath,
            "cD": cD,
            "useCache": useCache
        })
        mpu.set(workerObj=self, workerMethod="fetchDataWorker")

        ok, failList, resultList, _ = mpu.runMulti(dataList=idList,
                                                   numProc=numProc,
                                                   numResults=1,
                                                   chunkSize=chunkSize)
        logger.info("Run ended with status %r success count %d failures %r",
                    ok, len(resultList[0]), len(failList))
        for rTup in resultList[0]:
            rcD[rTup[0]] = rTup[1]
        # ---
        ok = self.storeResultIndex(rcD)
        logger.info("Final match result (w/sdf and metadata) (%d/%d)",
                    len(rcD), len(cD))
        return True
Ejemplo n.º 2
0
    def build(self, alignType="relaxed-stereo", numProc=4, chunkSize=10, verbose=False):
        """Run the model build step in the chemical component model workflow.

        Args:
          alignType (str):  "relaxed"|"strict"| relaxed-stereo".  Default: relaxed-stereo
          numProc (int, optional): number of processes to invoke. Defaults to 4.
          chunkSize (int, optional): work chunksize. Defaults to 10.
          verbose (bool, optional): verbose logging.  Defaults to False.

        Returns:
            (dict): {searchId: [{"targetId": , "modelId": , "modelPath": ,"matchId": , "parentId": , "rFactor": , }]

        """
        retD = {}
        try:
            ccms = ChemCompModelSearch(self.__cachePath, None, None, prefix=self.__prefix)
            modelDirPath = self.getModelDirFilePath()
            imageDirPath = self.getModelImageDirFilePath()
            #
            idxPathD = ccms.getResultIndex()
            idxPathL = list(idxPathD.values())
            pD = {}
            for sId in idxPathD:
                parentId = sId.split("|")[0]
                pD.setdefault(parentId, []).append(sId)
            logger.info("Using search result index length ridxD (%d) parent coverage (%d)", len(idxPathD), len(pD))
            #
            pU = ChemCompModelBuildWorker(self.__cachePath, verbose=verbose)
            mpu = MultiProcUtil(verbose=True)
            mpu.setWorkingDir(modelDirPath)
            mpu.setOptions(optionsD={"modelDirPath": modelDirPath, "imageDirPath": imageDirPath, "alignType": alignType, "ccSIdxP": self.__ccSIdxP})
            #
            mpu.set(workerObj=pU, workerMethod="build")

            ok, failList, resultList, _ = mpu.runMulti(dataList=idxPathL, numProc=numProc, numResults=1, chunkSize=chunkSize)
            logger.info("Run ended with status %r success count %d failures %r", ok, len(resultList[0]), len(failList))
            successList = copy.copy(resultList[0])
            for tD in successList:
                retD.setdefault(tD["parentId"], []).append(tD)
            #
            if retD:
                logger.info("Completed build with models for %d parent chemical definitions", len(retD))
            else:
                logger.info("No models built")
            ok = self.storeModelIndex(retD)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return retD
Ejemplo n.º 3
0
    def runSearch(self,
                  molFilePathList,
                  resultPath,
                  searchType="similarity",
                  numProc=4,
                  chunkSize=10,
                  timeOut=120):
        """Run CCDC search in multiprocess mode.

        Args:
            molFilePathList (list): input mol2/sdf path list to search
            resultPath (str): directory path to store results
            searchType (str, optional): search type (substructure|similarity). Defaults to "similarity".
            numProc (int, optional): number of processes to invoke. Defaults to 4.
            chunkSize (int, optional): work chunksize. Defaults to 10.
            timeOut (int, optional): search timeout Defaults: 120 seconds.
        """
        logger.info("Starting with molfile path list length %d",
                    len(molFilePathList))
        successList = []
        try:
            pU = CcdcSearchExecWorker(verbose=self.__verbose)
            mpu = MultiProcUtil(verbose=True)
            mpu.setWorkingDir(resultPath)
            mpu.setOptions(
                optionsD={
                    "resultPath": resultPath,
                    "searchType": searchType,
                    "pythonRootPath": self.__pythonRootPath,
                    "csdHome": self.__csdHome,
                    "timeOut": timeOut
                })
            #
            mpu.set(workerObj=pU, workerMethod="search")

            ok, failList, resultList, _ = mpu.runMulti(
                dataList=molFilePathList,
                numProc=numProc,
                numResults=1,
                chunkSize=chunkSize)
            logger.info(
                "Run ended with status %r success count %d failures %r", ok,
                len(resultList[0]), len(failList))
            successList = copy.copy(resultList[0])
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return successList
Ejemplo n.º 4
0
    def build(self,
              alignType="relaxed-stereo",
              numProc=4,
              chunkSize=10,
              verbose=False,
              doFigures=True):
        """Run the model build step in the chemical component model workflow.

        Args:
          alignType (str):  "relaxed"|"strict"| relaxed-stereo".  Default: relaxed-stereo
          numProc (int, optional): number of processes to invoke. Defaults to 4.
          chunkSize (int, optional): work chunksize. Defaults to 10.
          verbose (bool, optional): verbose logging.  Defaults to False.

        Returns:
            (dict): {searchId: [{"targetId": , "modelId": , "modelPath": ,"matchId": , "parentId": , "rFactor": , }]

        """
        retD = {}
        try:
            mU = MarshalUtil(workPath=self.__cachePath)
            ccms = CODModelSearch(self.__cachePath, prefix=self.__prefix)
            modelDirPath = self.getModelDirFilePath()
            imageDirPath = self.getModelImageDirFilePath()
            #
            tD = ccms.getResultIndex()
            # Make parent index ---
            idxIdD = {}
            for idxId, iDL in tD.items():
                pId = idxId.split("|")[0]
                idxIdD.setdefault(pId, []).extend(iDL)
            #
            idxIdL = list(idxIdD.keys())
            midxIdL = []
            for pId in idxIdL:
                fp = os.path.join(modelDirPath, pId, "model-index.json")
                if mU.exists(fp):
                    # Skip empty indices
                    fst = os.stat(fp)
                    if fst.st_size > 10:
                        continue
                midxIdL.append(pId)
            #
            logger.info(
                "Starting COD model build using (%d) from a total of results length (%d)",
                len(midxIdL), len(idxIdD))
            #
            cmbw = CODModelBuildWorker(self.__cachePath,
                                       verbose=verbose,
                                       timeOut=self.__timeOut)
            mpu = MultiProcUtil(verbose=True)
            mpu.setWorkingDir(modelDirPath)
            mpu.setOptions(
                optionsD={
                    "modelDirPath": modelDirPath,
                    "imageDirPath": imageDirPath,
                    "alignType": alignType,
                    "ccSIdxP": self.__ccSIdxP,
                    "idxIdD": idxIdD,
                    "oesmP": self.__oesmP,
                    "ccmP": self.__ccmP,
                    "doFigures": doFigures,
                })
            #
            mpu.set(workerObj=cmbw, workerMethod="build")
            ok, failList, resultList, _ = mpu.runMulti(dataList=midxIdL,
                                                       numProc=numProc,
                                                       numResults=1,
                                                       chunkSize=chunkSize)
            logger.info(
                "Run ended with status %r success count %d failures %r", ok,
                len(resultList[0]), len(failList))
            successList = copy.copy(resultList[0])
            #
            if successList:
                logger.info("Completed build with %d models ",
                            len(successList))
            else:
                logger.info("No models built")
            #
            # Build full index -
            #
            logger.info("Building full model index")
            for pId in idxIdL:
                fp = os.path.join(modelDirPath, pId, "model-index.json")
                if mU.exists(fp):
                    tDL = mU.doImport(fp, fmt="json")
                    for tD in tDL:
                        retD.setdefault(tD["parentId"], []).append(tD)
            #
            retD = dict(sorted(retD.items()))
            logger.info("Storing models for %d parent components", len(retD))
            ok = self.storeModelIndex(retD)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return retD