Ejemplos de DataTransformFactory en Python, ejemplos de rcsb.db.processors.DataTransformFactory.DataTransformFactory en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: CockroachDbLoader.py Proyecto: rcsb/py-rcsb_db

    def __init__(self, schemaDefObj, ioObj=None, dbCon=None, workPath=".", cleanUp=False, warnings="default", verbose=True):
        """Map PDBx/mmCIF instance data to SQL loadable data using external schema definition.

        Args:
            schemaDefObj (object): Description
            ioObj (None, optional): Description
            dbCon (None, optional): Description
            workPath (str, optional): Description
            cleanUp (bool, optional): Description
            warnings (str, optional): Description
            verbose (bool, optional): Description
        """
        self.__verbose = verbose
        self.__debug = False
        self.__sD = schemaDefObj
        self.__ioObj = ioObj
        #
        self.__dbCon = dbCon
        self.__workingPath = workPath
        self.__pathList = []
        self.__cleanUp = cleanUp
        #
        # self.__sdp = SchemaDefDataPrep(schemaDefObj=schemaDefObj, ioObj=IoAdapter(), verbose=True)
        #
        self.__warningAction = warnings
        self.__fTypeRow = "skip-max-width"
        dtf = DataTransformFactory(schemaDefAccessObj=self.__sD, filterType=self.__fTypeRow)
        self.__sdp = SchemaDefDataPrep(schemaDefAccessObj=self.__sD, dtObj=dtf, workPath=self.__workingPath, verbose=self.__verbose)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: CrateDbLoader.py Proyecto: rcsb/py-rcsb_db

 def __init__(self,
              schemaDefObj,
              ioObj=None,
              dbCon=None,
              workPath=".",
              cleanUp=False,
              warnings="default",
              verbose=True):
     self.__verbose = verbose
     self.__debug = False
     self.__sD = schemaDefObj
     self.__ioObj = ioObj
     #
     self.__dbCon = dbCon
     self.__workingPath = workPath
     self.__pathList = []
     self.__cleanUp = cleanUp
     #
     # self.__sdp = SchemaDefDataPrep(schemaDefAccessObj=schemaDefObj, ioObj=IoAdapter(), verbose=True)
     #
     self.__warningAction = warnings
     #
     self.__fTypeRow = "skip-max-width"
     dtf = DataTransformFactory(schemaDefAccessObj=self.__sD,
                                filterType=self.__fTypeRow)
     self.__sdp = SchemaDefDataPrep(schemaDefAccessObj=self.__sD,
                                    dtObj=dtf,
                                    workPath=self.__workingPath,
                                    verbose=self.__verbose)

Ejemplo n.º 3

0

Mostrar archivo

    def __init__(self, cfgOb, schemaDefObj, cfgSectionName="site_info_configuration", dbCon=None, cachePath=".", workPath=".", cleanUp=False, warnings="default", verbose=True):
        self.__verbose = verbose
        self.__debug = False
        self.__cfgOb = cfgOb
        sectionName = cfgSectionName
        self.__sD = schemaDefObj

        #
        self.__dbCon = dbCon
        self.__cachePath = cachePath
        self.__workPath = workPath
        self.__pathList = []
        self.__cleanUp = cleanUp
        #
        self.__colSep = "&##&\t"
        self.__rowSep = "$##$\n"
        #
        #
        self.__fTypeRow = "skip-max-width"
        self.__fTypeCol = "skip-max-width"
        #
        self.__warningAction = warnings
        dtf = DataTransformFactory(schemaDefAccessObj=self.__sD, filterType=self.__fTypeRow)
        self.__sdp = SchemaDefDataPrep(schemaDefAccessObj=self.__sD, dtObj=dtf, workPath=self.__cachePath, verbose=self.__verbose)
        self.__rpP = RepositoryProvider(cfgOb=self.__cfgOb, cachePath=self.__cachePath)
        #
        schemaName = self.__sD.getName()
        modulePathMap = self.__cfgOb.get("DICT_METHOD_HELPER_MODULE_PATH_MAP", sectionName=sectionName)
        dP = DictionaryApiProviderWrapper(self.__cfgOb, self.__cachePath, useCache=True)
        dictApi = dP.getApiByName(schemaName)
        rP = DictMethodResourceProvider(self.__cfgOb, cachePath=self.__cachePath)
        self.__dmh = DictMethodRunner(dictApi, modulePathMap=modulePathMap, resourceProvider=rP)

Ejemplo n.º 4

0

Mostrar archivo

    def __simpleSchemaDataPrep(self, contentType, filterType, styleType, mockLength, rejectLength=0, dataSelectors=None, mergeContentTypes=None):
        """Internal method for preparing file-based data NOT requiring dynamic methods, slicing, or key injection.

        Args:
            contentType (str): Content type name
            filterType (str): List of data processing options (separated by '|') (e.g. "drop-empty-attributes|drop-empty-tables|skip-max-width|...)
            styleType (str): organization of output document (e.g. rowise-by-name)
            mockLength (int): Expected length of the test data for the input content type
            rejectLength (int, optional): number of input data sets rejected by the dataselection criteria. Defaults to 0.
            dataSelectors (list of str, optional): data selection criteria. Defaults to None.
            mergeContentTypes (list of str, optional): list content types to merge with the input data set. Defaults to None. (e.g. ['vrpt'])
        """
        try:
            dataSelectors = dataSelectors if dataSelectors else ["PUBLIC_RELEASE"]
            dD = self.__schP.makeSchemaDef(contentType, dataTyping="ANY", saveSchema=True)
            _ = SchemaDefAccess(dD)
            inputPathList = self.__rpP.getLocatorObjList(contentType=contentType, mergeContentTypes=mergeContentTypes)
            sd, _, _, _ = self.__schP.getSchemaInfo(databaseName=contentType, dataTyping="ANY")
            dtf = DataTransformFactory(schemaDefAccessObj=sd, filterType=filterType)
            sdp = SchemaDefDataPrep(schemaDefAccessObj=sd, dtObj=dtf, workPath=self.__cachePath, verbose=self.__verbose)
            #

            logger.debug("For %s mock length %d length of path list %d\n", contentType, mockLength, len(inputPathList))
            self.assertEqual(len(inputPathList), mockLength)
            tableDataDictList, containerNameList, rejectList = sdp.fetchDocuments(inputPathList, styleType=styleType, filterType=filterType, dataSelectors=dataSelectors)
            logger.debug("For %s mock length %d reject length %d length of tddl list %d\n", contentType, mockLength, rejectLength, len(tableDataDictList))
            self.assertEqual(len(tableDataDictList), mockLength - rejectLength)
            self.assertEqual(len(containerNameList), mockLength - rejectLength)

            if rejectList:
                logger.debug("For %s rejecting components %r", contentType, rejectList)
            #
            self.assertEqual(len(rejectList), rejectLength)
            fName = "simple-prep-%s-%s.json" % (contentType, styleType)
            if self.__exportFlag:
                fPath = os.path.join(self.__outputPath, fName)
                self.__mU.doExport(fPath, tableDataDictList, fmt="json", indent=3)
            if self.__diffFlag:
                fPath = os.path.join(self.__savedOutputPath, fName)
                refDocList = self.__mU.doImport(fPath, fmt="json")
                self.assertEqual(len(refDocList), len(tableDataDictList))
                #
                jD = diff(refDocList, tableDataDictList, syntax="explicit", marshal=True)
                if jD:
                    _, fn = os.path.split(fPath)
                    bn, _ = os.path.splitext(fn)
                    fPath = os.path.join(self.__outputPath, bn + "-diff.json")
                    logger.debug("jsondiff for %s %s = \n%s", contentType, styleType, pprint.pformat(jD, indent=3, width=100))
                    self.__mU.doExport(fPath, jD, fmt="json", indent=3)
                self.assertEqual(len(jD), 0)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

Ejemplo n.º 5

0

Mostrar archivo

Archivo: testSchemaDataPrepValidate-ihm.py Proyecto: informatics-isi-edu/protein-database

    def __testPrepDocumentsFromContainers(self, inputPathList, databaseName, collectionName, styleType="rowwise_by_name_with_cardinality", mergeContentTypes=None):
        """Test case -  create loadable PDBx data from repository files
        """
        try:

            sd, _, _, _ = self.__schP.getSchemaInfo(databaseName)
            #
            dP = DictionaryApiProviderWrapper(self.__cfgOb, self.__cachePath, useCache=False)
            dictApi = dP.getApiByName(databaseName)
            rP = DictMethodResourceProvider(self.__cfgOb, configName=self.__configName, cachePath=self.__cachePath, siftsAbbreviated="TEST")
            dmh = DictMethodRunner(dictApi, modulePathMap=self.__modulePathMap, resourceProvider=rP)
            #
            dtf = DataTransformFactory(schemaDefAccessObj=sd, filterType=self.__fTypeRow)
            sdp = SchemaDefDataPrep(schemaDefAccessObj=sd, dtObj=dtf, workPath=self.__cachePath, verbose=self.__verbose)
            containerList = self.__rpP.getContainerList(inputPathList)
            for container in containerList:
                cName = container.getName()
                logger.debug("Processing container %s", cName)
                dmh.apply(container)
                if self.__export:
                    savePath = os.path.join(HERE, "test-output", cName + "-with-method.cif")
                    #self.__mU.doExport(savePath, [container], fmt="mmcif")
            #
            tableIdExcludeList = sd.getCollectionExcluded(collectionName)
            tableIdIncludeList = sd.getCollectionSelected(collectionName)
            sliceFilter = sd.getCollectionSliceFilter(collectionName)
            sdp.setSchemaIdExcludeList(tableIdExcludeList)
            sdp.setSchemaIdIncludeList(tableIdIncludeList)
            #
            docList, containerNameList, _ = sdp.processDocuments(
                containerList, styleType=styleType, filterType=self.__fTypeRow, dataSelectors=["PUBLIC_RELEASE"], sliceFilter=sliceFilter, collectionName=collectionName
            )

            docList = sdp.addDocumentPrivateAttributes(docList, collectionName)
            docList = sdp.addDocumentSubCategoryAggregates(docList, collectionName)
            #
            mergeS = "-".join(mergeContentTypes) if mergeContentTypes else ""
            if self.__export and docList:
                # for ii, doc in enumerate(docList[:1]):
                for ii, doc in enumerate(docList):
                    cn = containerNameList[ii]
                    fp = os.path.join(HERE, "test-output", "prep-%s-%s-%s-%s.json" % (cn, databaseName, collectionName, mergeS))
                    self.__mU.doExport(fp, [doc], fmt="json", indent=3)
                    logger.debug("Exported %r", fp)
            #
            return docList, containerNameList

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

Ejemplo n.º 6

0

Mostrar archivo

Archivo: testSchemaDefDataPrep.py Proyecto: rcsb/py-rcsb_db

    def __fullSchemaDataPrep(self,
                             contentType,
                             filterType,
                             styleType,
                             mockLength,
                             rejectLength=0,
                             dataSelectors=None,
                             mergeContentTypes=None,
                             excludeExtras=None):
        """Internal method for preparing file-based data requiring dynamic methods, slicing, or key injection.

        Args:
            contentType (str): Content type name
            filterType (str): List of data processing options (separated by '|') (e.g. "drop-empty-attributes|drop-empty-tables|skip-max-width|...)
            styleType (str): organization of output document (e.g. rowise-by-name)
            mockLength (int): Expected length of the test data for the input content type
            rejectLength (int, optional): number of input data sets rejected by the dataselection criteria. Defaults to 0.
            dataSelectors (list of str, optional): data selection criteria. Defaults to None.
            mergeContentTypes (list of str, optional): list content types to merge with the input data set. Defaults to None. (e.g. ['vrpt'])
        """
        try:
            excludeExtras = excludeExtras if excludeExtras else []
            _ = mockLength
            _ = rejectLength
            dD = self.__schP.makeSchemaDef(contentType,
                                           dataTyping="ANY",
                                           saveSchema=True)
            _ = SchemaDefAccess(dD)
            inputPathList = self.__rpP.getLocatorObjList(
                contentType=contentType, mergeContentTypes=mergeContentTypes)
            sd, _, collectionNameList, _ = self.__schP.getSchemaInfo(
                databaseName=contentType, dataTyping="ANY")
            #
            dP = DictionaryApiProviderWrapper(self.__cachePath,
                                              cfgOb=self.__cfgOb,
                                              configName=self.__configName,
                                              useCache=True)
            dictApi = dP.getApiByName(contentType)
            #
            rP = DictMethodResourceProvider(
                self.__cfgOb,
                configName=self.__configName,
                cachePath=self.__cachePath,
                restoreUseStash=False,
                restoreUseGit=True,
                providerTypeExclude=self.__excludeType,
            )
            dmh = DictMethodRunner(dictApi,
                                   modulePathMap=self.__modulePathMap,
                                   resourceProvider=rP)
            #
            dtf = DataTransformFactory(schemaDefAccessObj=sd,
                                       filterType=filterType)
            sdp = SchemaDefDataPrep(schemaDefAccessObj=sd,
                                    dtObj=dtf,
                                    workPath=self.__cachePath,
                                    verbose=self.__verbose)
            containerList = self.__rpP.getContainerList(inputPathList)
            for container in containerList:
                cName = container.getName()
                logger.debug("Processing container %s", cName)
                dmh.apply(container)
            #
            for collectionName in collectionNameList:
                tableIdExcludeList = sd.getCollectionExcluded(collectionName)
                tableIdIncludeList = sd.getCollectionSelected(collectionName)
                sliceFilter = sd.getCollectionSliceFilter(collectionName)
                sdp.setSchemaIdExcludeList(tableIdExcludeList)
                sdp.setSchemaIdIncludeList(tableIdIncludeList)
                #
                docList, _, _ = sdp.processDocuments(
                    containerList,
                    styleType=styleType,
                    sliceFilter=sliceFilter,
                    filterType=filterType,
                    dataSelectors=dataSelectors,
                    collectionName=collectionName)

                docList = sdp.addDocumentPrivateAttributes(
                    docList, collectionName)
                docList = sdp.addDocumentSubCategoryAggregates(
                    docList, collectionName)

                # Special exclusions for the test harness. (removes timestamped data items to allow diffs.)
                self.__filterDocuments(docList, excludeExtras)
                mergeS = "-".join(
                    mergeContentTypes) if mergeContentTypes else ""
                fName = "full-prep-%s-%s-%s-%s.json" % (
                    contentType, collectionName, mergeS, styleType)
                if self.__exportFlag:
                    self.__logDocumentOrder(docList)
                    fPath = os.path.join(self.__outputPath, fName)
                    self.__mU.doExport(fPath, docList, fmt="json", indent=3)
                    logger.debug("Exported %r", fPath)
                #
                if self.__diffFlag:
                    fPath = os.path.join(self.__savedOutputPath, fName)
                    refDocList = self.__mU.doImport(fPath, fmt="json")
                    self.assertEqual(len(refDocList), len(docList))
                    logger.debug("For %s %s len refDocList %d", contentType,
                                 collectionName, len(refDocList))
                    logger.debug("For %s %s len docList %d", contentType,
                                 collectionName, len(docList))
                    jD = diff(refDocList,
                              docList,
                              syntax="explicit",
                              marshal=True)
                    if jD:
                        _, fn = os.path.split(fPath)
                        bn, _ = os.path.splitext(fn)
                        fPath = os.path.join(self.__outputPath,
                                             bn + "-diff.json")
                        logger.debug("jsondiff for %s %s = \n%s", contentType,
                                     collectionName,
                                     pprint.pformat(jD, indent=3, width=100))
                        self.__mU.doExport(fPath, jD, fmt="json", indent=3)
                    self.assertEqual(len(jD), 0)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()