Python ConfigUtil.getPath Beispiele

Programmiersprache: Python

Namespace / Paketname: rcsb.utils.config.ConfigUtil

Klasse / Typ: ConfigUtil

Methode / Funktion: getPath

Beispiele auf hotexamples.com: 15

Python ConfigUtil.getPath - 15 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die rcsb.utils.config.ConfigUtil.ConfigUtil.getPath, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

ConfigUtil(30)

get(24)

getPath(15)

replaceSectionName(6)

getList(4)

writeConfig(4)

appendConfig(3)

getEnvValue(2)

getHelper(2)

exportConfig(1)

getSecret(1)

importConfig(1)

Beispiel #1

Datei anzeigen

Datei: testConfigUtil.py Projekt: rcsb/py-rcsb_utils_config

 def testReadYamlConfigWithAutoAppend(self):
     try:
         cfgOb = ConfigUtil(
             configPath=self.__inpPathConfigAutoYaml,
             configFormat="yaml",
             mockTopPath=self.__mockTopPath,
             defaultSectionName="site_info_1",
             cachePath=None,
             useCache=False,
         )
         ok = cfgOb.appendConfig(self.__inpPathConfigAppendYaml,
                                 configFormat="yaml")
         self.assertTrue(ok)
         #
         for sName in [
                 "section_appended_1", "section_appended_2", "Section1",
                 "Section2"
         ]:
             pathBird = cfgOb.getPath("BIRD_REPO_PATH", sectionName=sName)
             pathPdbx = cfgOb.getPath("PDBX_REPO_PATH", sectionName=sName)
             #
             self.assertEqual(
                 pathBird, os.path.join(self.__mockTopPath,
                                        "MOCK_BIRD_REPO"))
             self.assertEqual(
                 pathPdbx,
                 os.path.join(self.__mockTopPath, "MOCK_PDBX_SANDBOX"))
             #
         #
         cfgOb = ConfigUtil(
             configPath=self.__inpPathConfigAutoYaml,
             configFormat="yaml",
             mockTopPath=self.__mockTopPath,
             defaultSectionName="site_info_1",
             cachePath=None,
             useCache=True,
         )
         ok = cfgOb.appendConfig(self.__inpPathConfigAppendYaml,
                                 configFormat="yaml")
         self.assertTrue(ok)
         #
         for sName in [
                 "section_appended_1", "section_appended_2", "Section1",
                 "Section2"
         ]:
             pathBird = cfgOb.getPath("BIRD_REPO_PATH", sectionName=sName)
             pathPdbx = cfgOb.getPath("PDBX_REPO_PATH", sectionName=sName)
             #
             self.assertEqual(
                 pathBird, os.path.join(self.__mockTopPath,
                                        "MOCK_BIRD_REPO"))
             self.assertEqual(
                 pathPdbx,
                 os.path.join(self.__mockTopPath, "MOCK_PDBX_SANDBOX"))
             #
     except Exception as e:
         logger.error("Failing with %s", str(e))
         self.fail()

Beispiel #2

Datei anzeigen

    def setUp(self):
        self.__verbose = True
        #
        mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
        pathConfig = os.path.join(TOPDIR, "rcsb", "db", "config",
                                  "exdb-config-example.yml")
        #
        configName = "site_info_configuration"
        cfgOb = ConfigUtil(configPath=pathConfig,
                           defaultSectionName=configName,
                           mockTopPath=mockTopPath)
        self.__pathClusterData = cfgOb.getPath(
            "RCSB_SEQUENCE_CLUSTER_DATA_PATH", sectionName=configName)
        # sample data set
        self.__dataSetId = "2018_23"

        # self.__levels = ["100", "95", "90", "70", "50", "30"]
        self.__levels = ["95"]
        #
        self.__workPath = os.path.join(HERE, "test-output")
        self.__pathSaveStyleCif = os.path.join(HERE, "test-output",
                                               "cluster-data-cif.json")
        self.__pathSaveStyleDocSequence = os.path.join(
            HERE, "test-output", "cluster-data-doc-sequence.json")
        self.__pathSaveStyleDocCluster = os.path.join(
            HERE, "test-output", "cluster-data-doc-cluster.json")
        #
        self.__startTime = time.time()
        logger.debug("Starting %s at %s", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()))

Beispiel #3

Datei anzeigen

class RepoHoldingsDataPrepTests(unittest.TestCase):
    def setUp(self):
        self.__verbose = True
        #
        self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
        self.__pathConfig = os.path.join(TOPDIR, "rcsb", "db", "config",
                                         "exdb-config-example.yml")
        self.__cachePath = os.path.join(TOPDIR, "CACHE")
        self.__updateId = "2019_25"
        #
        configName = "site_info_configuration"
        self.__cfgOb = ConfigUtil(configPath=self.__pathConfig,
                                  defaultSectionName=configName,
                                  mockTopPath=self.__mockTopPath)
        self.__sandboxPath = self.__cfgOb.getPath("RCSB_EXCHANGE_SANDBOX_PATH",
                                                  sectionName=configName)
        #
        self.__startTime = time.time()
        logger.info("Starting %s at %s", self.id(),
                    time.strftime("%Y %m %d %H:%M:%S", time.localtime()))

    def tearDown(self):
        endTime = time.time()
        logger.info("Completed %s at %s (%.4f seconds)", self.id(),
                    time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
                    endTime - self.__startTime)

    def testProcessLegacyFiles(self):
        """Test loading and processing operations for repository holdings and status echange data."""
        try:
            rhdp = RepoHoldingsDataPrep(cfgOb=self.__cfgOb,
                                        sandboxPath=self.__sandboxPath,
                                        cachePath=self.__cachePath)
            rL = rhdp.getHoldingsUpdateEntry(updateId=self.__updateId)
            logger.info("update data length %r", len(rL))
            self.assertGreaterEqual(len(rL), 10)
            #
            rL = rhdp.getHoldingsCurrentEntry(updateId=self.__updateId)
            self.assertGreaterEqual(len(rL), 10)
            logger.info("holdings data length %r", len(rL))
            #
            rL = rhdp.getHoldingsUnreleasedEntry(updateId=self.__updateId)
            self.assertGreaterEqual(len(rL), 10)
            logger.info("unreleased data length %r", len(rL))
            #
            rL = rhdp.getHoldingsRemovedEntry(updateId=self.__updateId)
            self.assertGreaterEqual(len(rL), 10)
            logger.info("removed data length %r", len(rL))

            rL = rhdp.getHoldingsCombinedEntry(updateId=self.__updateId)
            self.assertGreaterEqual(len(rL), 10)
            logger.info("combined data length %r", len(rL))
            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

Beispiel #4

Datei anzeigen

class DictionaryProviderTests(unittest.TestCase):
    def setUp(self):
        mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
        self.__cachePath = os.path.join(TOPDIR, "CACHE")
        self.__dirPath = os.path.join(self.__cachePath, "dictionaries")
        configPath = os.path.join(TOPDIR, "rcsb", "db", "config",
                                  "exdb-config-example.yml")
        configName = "site_info_configuration"
        self.__configName = configName
        self.__contentInfoConfigName = "content_info_helper_configuration"
        self.__cfgOb = ConfigUtil(configPath=configPath,
                                  defaultSectionName=configName,
                                  mockTopPath=mockTopPath)
        dictLocatorMap = self.__cfgOb.get(
            "DICT_LOCATOR_CONFIG_MAP",
            sectionName=self.__contentInfoConfigName)
        schemaName = "pdbx_core"
        self.__dictLocators = [
            self.__cfgOb.getPath(configLocator, sectionName=self.__configName)
            for configLocator in dictLocatorMap[schemaName]
        ]
        #
        self.__startTime = time.time()
        logger.debug("Starting %s at %s", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()))

    def tearDown(self):
        endTime = time.time()
        logger.debug("Completed %s at %s (%.4f seconds)", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
                     endTime - self.__startTime)

    def testResourceCache(self):
        """Test case - generate and check dictonary artifact and api caches
        """
        try:
            logger.debug("Dictionary locators %r", self.__dictLocators)
            dp = DictionaryApiProvider(dirPath=self.__dirPath, useCache=False)
            dApi = dp.getApi(self.__dictLocators)
            ok = dApi.testCache()
            self.assertTrue(ok)
            title = dApi.getDictionaryTitle()
            logger.debug("Title %r", title)
            self.assertEqual(
                title, "mmcif_pdbx.dic,rcsb_mmcif_ext.dic,vrpt_mmcif_ext.dic")
            # revL = dApi.getDictionaryHistory()
            numRev = dApi.getDictionaryRevisionCount()
            logger.debug("Number of dictionary revisions (numRev) %r", numRev)
            self.assertGreater(numRev, 220)
            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

Beispiel #5

Datei anzeigen

Datei: testConfigUtil.py Projekt: rcsb/py-rcsb_utils_config

    def testReadIniConfig(self):
        try:
            cfgOb = ConfigUtil(configPath=self.__inpPathConfigIni,
                               mockTopPath=self.__dataPath)
            sName = "DEFAULT"
            pathBird = cfgOb.getPath("BIRD_REPO_PATH", sectionName=sName)
            pathPdbx = cfgOb.getPath("PDBX_REPO_PATH", sectionName=sName)
            #
            self.assertEqual(
                pathBird, os.path.join(self.__mockTopPath, "MOCK_BIRD_REPO"))
            self.assertEqual(
                pathPdbx, os.path.join(self.__mockTopPath,
                                       "MOCK_PDBX_SANDBOX"))

            pathBird = cfgOb.get("BIRD_REPO_PATH", sectionName=sName)
            pathPdbx = cfgOb.get("PDBX_REPO_PATH", sectionName=sName)

            self.assertEqual(pathBird, "MOCK_BIRD_REPO")
            self.assertEqual(pathPdbx, "MOCK_PDBX_SANDBOX")
            sName = "Section1"
            #
            helperMethod = cfgOb.getHelper("DICT_METHOD_HELPER_MODULE",
                                           sectionName=sName)

            tv = helperMethod.echo("test_value")
            self.assertEqual(tv, "test_value")
            #
            tEnv = "TEST_ENV_VAR"
            tVal = "TEST_ENV_VAR_VALUE"
            os.environ[tEnv] = tVal
            eVal = cfgOb.getEnvValue("ENV_OPTION_A", sectionName=sName)
            self.assertEqual(tVal, eVal)
            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
        return {}

Beispiel #6

Datei anzeigen

Datei: testSchemaDataPrepValidate-ihm.py Projekt: informatics-isi-edu/protein-database

class SchemaDataPrepValidateTests(unittest.TestCase):
    def setUp(self):
        self.__numProc = 2
        # self.__fileLimit = 200
        self.__fileLimit = None
        self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
        self.__cachePath = os.path.join(TOPDIR, "CACHE")
        self.__configPath = os.path.join(TOPDIR, "rcsb", "db", "config", "exdb-config-example-ihm.yml")
        configName = "site_info_configuration"
        self.__configName = configName
        self.__cfgOb = ConfigUtil(configPath=self.__configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
        self.__mU = MarshalUtil(workPath=self.__cachePath)

        #self.__schP = SchemaProvider(self.__cfgOb, self.__cachePath, useCache=False, rebuildFlag=True)
        self.__schP = SchemaProvider(self.__cfgOb, self.__cachePath, useCache=True)
        self.__rpP = RepositoryProvider(cfgOb=self.__cfgOb, numProc=self.__numProc, fileLimit=self.__fileLimit, cachePath=self.__cachePath)
        #
        self.__birdRepoPath = self.__cfgOb.getPath("BIRD_REPO_PATH", sectionName=configName)
        #
        self.__fTypeRow = "drop-empty-attributes|drop-empty-tables|skip-max-width|convert-iterables|normalize-enums|translateXMLCharRefs"
        self.__fTypeCol = "drop-empty-tables|skip-max-width|convert-iterables|normalize-enums|translateXMLCharRefs"
        self.__verbose = True
        #
        self.__modulePathMap = self.__cfgOb.get("DICT_METHOD_HELPER_MODULE_PATH_MAP", sectionName=configName)
        self.__testDirPath = os.path.join(HERE, "test-output", "pdbx-files")
        self.__testIhmDirPath = os.path.join(HERE, "test-output", "ihm-files")
        self.__export = True
        #
        #self.__extraOpts = None
        # The following for extended parent/child info -
        self.__extraOpts = 'addParentRefs|addPrimaryKey'
        #
        self.__alldatabaseNameD = {
            "ihm_dev": ["ihm_dev"],
            "pdbx": ["pdbx", "pdbx_ext"],
            "pdbx_core": ["pdbx_core_entity", "pdbx_core_entry", "pdbx_core_assembly", "pdbx_core_entity_instance", "pdbx_core_entity_instance_validation"],
            "bird": ["bird"],
            "bird_family": ["family"],
            "chem_comp": ["chem_comp"],
            "bird_chem_comp": ["bird_chem_comp"],
            "bird_chem_comp_core": ["bird_chem_comp_core"],
        }

        self.__databaseNameD = {
            "pdbx_core": ["pdbx_core_entity", "pdbx_core_entry", "pdbx_core_assembly", "pdbx_core_entity_instance", "pdbx_core_entity_instance_validation"],
            "bird_chem_comp_core": ["bird_chem_comp_core"],
        }
        self.__mergeContentTypeD = {"pdbx_core": ["vrpt"]}
        # self.__databaseNameD = {"chem_comp_core": ["chem_comp_core"], "bird_chem_comp_core": ["bird_chem_comp_core"]}
        # self.__databaseNameD = {"ihm_dev_full": ["ihm_dev_full"]}
        # self.__databaseNameD = {"pdbx_core": ["pdbx_core_entity_instance_validation"]}
        # self.__databaseNameD = {"pdbx_core": ["pdbx_core_entity_monomer"]}
        self.__startTime = time.time()
        logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))

    def tearDown(self):
        endTime = time.time()
        logger.debug("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)

    def testValidateOptsRepo(self):
        # schemaLevel = "min"

        schemaLevel = "full"
        inputPathList = None
        eCount = self.__testValidateOpts(databaseNameD=self.__databaseNameD, inputPathList=inputPathList, schemaLevel=schemaLevel, mergeContentTypeD=self.__mergeContentTypeD)
        logger.info("Total validation errors schema level %s : %d", schemaLevel, eCount)
        self.assertLessEqual(eCount, 1)

    @unittest.skip("Disable troubleshooting test")
    def testValidateOptsList(self):
        schemaLevel = "min"
        inputPathList = self.__mU.doImport(os.path.join(HERE, "test-output", "failed-path.list"), "list")
        # inputPathList = glob.glob(self.__testDirPath + "/*.cif")
        if not inputPathList:
            return True
        databaseNameD = {"pdbx_core": ["pdbx_core_entity", "pdbx_core_entry", "pdbx_core_entity_instance", "pdbx_core_entity_instance_validation"]}
        for ii, subList in enumerate(chunkList(inputPathList[::-1], 40)):
            if ii < 5:
                continue
            eCount = self.__testValidateOpts(databaseNameD=databaseNameD, inputPathList=subList, schemaLevel=schemaLevel, mergeContentTypeD=self.__mergeContentTypeD)
            logger.info("Chunk %d total validation errors schema level %s : %d", ii, schemaLevel, eCount)
        # self.assertGreaterEqual(eCount, 20)

    #@unittest.skip("Disable IHM troubleshooting test")
    def testValidateOptsIhmRepo(self):
        schemaLevel = "min"
        inputPathList = None
        self.__export = True

        databaseNameD = {"ihm_dev_full": ["ihm_dev_full"]}
        databaseNameD = {"ihm_dev": ["ihm_dev"]}
        eCount = self.__testValidateOpts(databaseNameD=databaseNameD, inputPathList=inputPathList, schemaLevel=schemaLevel, mergeContentTypeD=self.__mergeContentTypeD)
        logger.info("Total validation errors schema level %s : %d", schemaLevel, eCount)
        # self.assertGreaterEqual(eCount, 20)
        #

    #@unittest.skip("Disable IHM troubleshooting test")
    def testValidateOptsIhmList(self):
        #schemaLevel = "full"
        schemaLevel = "min"

        inputPathList = glob.glob(self.__testIhmDirPath + "/*.cif")
        if not inputPathList:
            return True
        #databaseNameD = {"ihm_dev_full": ["ihm_dev_full"]}
        databaseNameD = {"ihm_dev": ["ihm_dev"]}
        eCount = self.__testValidateOpts(databaseNameD=databaseNameD, inputPathList=inputPathList, schemaLevel=schemaLevel, mergeContentTypeD=self.__mergeContentTypeD)
        logger.info("Total validation errors schema level %s : %d", schemaLevel, eCount)
        # self.assertGreaterEqual(eCount, 20)
        #

    def __testValidateOpts(self, databaseNameD, inputPathList=None, schemaLevel="full", mergeContentTypeD=None):
        #
        eCount = 0
        for databaseName in databaseNameD:
            mergeContentTypes = mergeContentTypeD[databaseName] if databaseName in mergeContentTypeD else None
            _ = self.__schP.makeSchemaDef(databaseName, dataTyping="ANY", saveSchema=True)
            pthList = inputPathList if inputPathList else self.__rpP.getLocatorObjList(databaseName, mergeContentTypes=mergeContentTypes)
            for collectionName in databaseNameD[databaseName]:
                cD = self.__schP.makeSchema(databaseName, collectionName, encodingType="JSON", level=schemaLevel, saveSchema=True, extraOpts=self.__extraOpts)
                #
                dL, cnL = self.__testPrepDocumentsFromContainers(
                    pthList, databaseName, collectionName, styleType="rowwise_by_name_with_cardinality", mergeContentTypes=mergeContentTypes
                )
                # Raises exceptions for schema compliance.
                try:
                    Draft4Validator.check_schema(cD)
                except Exception as e:
                    logger.error("%s %s schema validation fails with %s", databaseName, collectionName, str(e))
                #
                valInfo = Draft4Validator(cD, format_checker=FormatChecker())
                logger.info("Validating %d documents from %s %s", len(dL), databaseName, collectionName)
                for ii, dD in enumerate(dL):
                    logger.debug("Schema %s collection %s document %d", databaseName, collectionName, ii)
                    try:
                        cCount = 0
                        #for error in sorted(valInfo.iter_errors(dD), key=str):
                        #    logger.info("schema %s collection %s (%s) path %s error: %s", databaseName, collectionName, cnL[ii], error.path, error.message)
                        #    logger.debug("Failing document %d : %r", ii, list(dD.items()))
                        #    eCount += 1
                        #    cCount += 1
                        #if cCount > 0:
                        #    logger.info("schema %s collection %s container %s error count %d", databaseName, collectionName, cnL[ii], cCount)
                    except Exception as e:
                        logger.exception("Validation processing error %s", str(e))

        return eCount

    def __testPrepDocumentsFromContainers(self, inputPathList, databaseName, collectionName, styleType="rowwise_by_name_with_cardinality", mergeContentTypes=None):
        """Test case -  create loadable PDBx data from repository files
        """
        try:

            sd, _, _, _ = self.__schP.getSchemaInfo(databaseName)
            #
            dP = DictionaryApiProviderWrapper(self.__cfgOb, self.__cachePath, useCache=False)
            dictApi = dP.getApiByName(databaseName)
            rP = DictMethodResourceProvider(self.__cfgOb, configName=self.__configName, cachePath=self.__cachePath, siftsAbbreviated="TEST")
            dmh = DictMethodRunner(dictApi, modulePathMap=self.__modulePathMap, resourceProvider=rP)
            #
            dtf = DataTransformFactory(schemaDefAccessObj=sd, filterType=self.__fTypeRow)
            sdp = SchemaDefDataPrep(schemaDefAccessObj=sd, dtObj=dtf, workPath=self.__cachePath, verbose=self.__verbose)
            containerList = self.__rpP.getContainerList(inputPathList)
            for container in containerList:
                cName = container.getName()
                logger.debug("Processing container %s", cName)
                dmh.apply(container)
                if self.__export:
                    savePath = os.path.join(HERE, "test-output", cName + "-with-method.cif")
                    #self.__mU.doExport(savePath, [container], fmt="mmcif")
            #
            tableIdExcludeList = sd.getCollectionExcluded(collectionName)
            tableIdIncludeList = sd.getCollectionSelected(collectionName)
            sliceFilter = sd.getCollectionSliceFilter(collectionName)
            sdp.setSchemaIdExcludeList(tableIdExcludeList)
            sdp.setSchemaIdIncludeList(tableIdIncludeList)
            #
            docList, containerNameList, _ = sdp.processDocuments(
                containerList, styleType=styleType, filterType=self.__fTypeRow, dataSelectors=["PUBLIC_RELEASE"], sliceFilter=sliceFilter, collectionName=collectionName
            )

            docList = sdp.addDocumentPrivateAttributes(docList, collectionName)
            docList = sdp.addDocumentSubCategoryAggregates(docList, collectionName)
            #
            mergeS = "-".join(mergeContentTypes) if mergeContentTypes else ""
            if self.__export and docList:
                # for ii, doc in enumerate(docList[:1]):
                for ii, doc in enumerate(docList):
                    cn = containerNameList[ii]
                    fp = os.path.join(HERE, "test-output", "prep-%s-%s-%s-%s.json" % (cn, databaseName, collectionName, mergeS))
                    self.__mU.doExport(fp, [doc], fmt="json", indent=3)
                    logger.debug("Exported %r", fp)
            #
            return docList, containerNameList

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

Beispiel #7

Datei anzeigen

Datei: testContentDefinition.py Projekt: MShaffar19/py-rcsb_db

class ContentDefinitionTests(unittest.TestCase):
    def setUp(self):
        self.__verbose = True
        self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
        self.__pathConfig = os.path.join(TOPDIR, "rcsb", "db", "config", "exdb-config-example.yml")
        self.__configName = "site_info_configuration"
        self.__cfgOb = ConfigUtil(configPath=self.__pathConfig, defaultSectionName=self.__configName, mockTopPath=self.__mockTopPath)
        #
        #
        self.__pathPdbxDictionaryFile = self.__cfgOb.getPath("PDBX_DICT_LOCATOR", sectionName=self.__configName)
        self.__pathRcsbDictionaryFile = self.__cfgOb.getPath("RCSB_DICT_LOCATOR", sectionName=self.__configName)
        self.__pathVrptDictionaryFile = self.__cfgOb.getPath("VRPT_DICT_LOCATOR", sectionName=self.__configName)

        self.__mU = MarshalUtil()
        #
        self.__cachePath = os.path.join(TOPDIR, "CACHE")
        self.__dP = DictionaryApiProviderWrapper(self.__cfgOb, self.__cachePath, useCache=True)
        self.__startTime = time.time()
        logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))

    def tearDown(self):
        endTime = time.time()
        logger.debug("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)

    def testDefaults(self):
        """ Test the default case of using only dictionary content.
        """
        try:
            dictApi = self.__dP.getApiByLocators(dictLocators=[self.__pathPdbxDictionaryFile])
            ok = dictApi.testCache()
            self.assertTrue(ok)
            sdi = ContentDefinition(dictApi)
            nS = sdi.getSchemaNames()
            logger.debug("schema name length %d", len(nS))
            self.assertGreaterEqual(len(nS), 600)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def testHelper(self):
        """ Test the dictionary content supplemented by helper function

        """
        try:
            cH = ContentDefinitionHelper(cfgOb=self.__cfgOb)
            dictApi = self.__dP.getApiByLocators(dictLocators=[self.__pathPdbxDictionaryFile])
            sdi = ContentDefinition(dictApi, databaseName="chem_comp", contentDefHelper=cH)
            catNameL = sdi.getCategories()
            cfD = {}
            afD = {}
            for catName in catNameL:
                cfD[catName] = sdi.getCategoryFeatures(catName)
                afD[catName] = sdi.getAttributeFeatures(catName)

            #
            logger.debug("Dictionary category name length %d", len(catNameL))
            self.assertGreaterEqual(len(catNameL), 600)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def testExtensionWithHelper(self):
        """ Test the dictionary content supplemented by helper function

        """
        try:
            cH = ContentDefinitionHelper(cfgOb=self.__cfgOb)
            dictApi = self.__dP.getApiByLocators(dictLocators=[self.__pathPdbxDictionaryFile, self.__pathRcsbDictionaryFile])
            sdi = ContentDefinition(dictApi, databaseName="pdbx_core", contentDefHelper=cH)
            catNameL = sdi.getCategories()
            cfD = {}
            afD = {}
            for catName in catNameL:
                cfD[catName] = sdi.getCategoryFeatures(catName)
                afD[catName] = sdi.getAttributeFeatures(catName)

            #
            logger.debug("Dictionary category name length %d", len(catNameL))
            self.assertGreaterEqual(len(catNameL), 650)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def testRepoWithHelper(self):
        """ Test the dictionary content supplemented by helper function for auxiliary schema

        """
        try:
            cH = ContentDefinitionHelper(cfgOb=self.__cfgOb)
            dictApi = self.__dP.getApiByLocators(dictLocators=[self.__pathPdbxDictionaryFile, self.__pathRcsbDictionaryFile, self.__pathVrptDictionaryFile])
            sdi = ContentDefinition(dictApi, databaseName="repository_holdings", contentDefHelper=cH)
            catNameL = sdi.getCategories()
            cfD = {}
            afD = {}
            for catName in catNameL:
                cfD[catName] = sdi.getCategoryFeatures(catName)
                afD[catName] = sdi.getAttributeFeatures(catName)

            #
            logger.debug("Dictionary category name length %d", len(catNameL))
            self.assertGreaterEqual(len(catNameL), 680)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

Beispiel #8

Datei anzeigen

Datei: testSchemaSearchContexts.py Projekt: MShaffar19/py-rcsb_db

class SchemaSearchContextsTests(unittest.TestCase):
    skipFlag = True

    def setUp(self):
        self.__verbose = True
        mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
        pathConfig = os.path.join(TOPDIR, "rcsb", "db", "config",
                                  "exdb-config-example.yml")
        configName = "site_info_configuration"
        self.__cfgOb = ConfigUtil(configPath=pathConfig,
                                  defaultSectionName=configName,
                                  mockTopPath=mockTopPath)
        self.__docHelper = DocumentDefinitionHelper(cfgOb=self.__cfgOb)
        #
        self.__pathPdbxDictionaryFile = self.__cfgOb.getPath(
            "PDBX_DICT_LOCATOR", sectionName=configName)
        self.__pathRcsbDictionaryFile = self.__cfgOb.getPath(
            "RCSB_DICT_LOCATOR", sectionName=configName)
        self.__pathVrptDictionaryFile = self.__cfgOb.getPath(
            "VRPT_DICT_LOCATOR", sectionName=configName)

        # self.__mU = MarshalUtil()
        #
        self.__cachePath = os.path.join(TOPDIR, "CACHE")
        self.__dP = DictionaryApiProviderWrapper(self.__cfgOb,
                                                 self.__cachePath,
                                                 useCache=True)

        self.__startTime = time.time()
        logger.debug("Starting %s at %s", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()))

    def tearDown(self):
        endTime = time.time()
        logger.debug("Completed %s at %s (%.4f seconds)", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
                     endTime - self.__startTime)

    def testSearchGroups(self):
        ok = self.__docHelper.checkSearchGroups()
        self.assertTrue(ok)

    @unittest.skipIf(skipFlag, "Troubleshooting test")
    def testUnUsedIndexedItems(self):
        """Enumerate items that are indexed by have no search group assignments.

        collection_attribute_search_contexts
        """

        groupNameList = self.__docHelper.getSearchGroups()
        logger.info("Search groups (%d)", len(groupNameList))
        #
        nestedSearchableD = self.__assembleNestedCategorySearchables()
        nestedSearchableD.update(self.__assembleNestedSubCategorySearchables())
        #
        attribContextD = {}
        tD = self.__docHelper.getAllAttributeSearchContexts()
        for (catName, atName), contextL in tD.items():
            attribContextD.setdefault((catName, atName),
                                      []).extend([t[0] for t in contextL])
        logger.info("search context attribContextD %d", len(attribContextD))

        lookupD = {}
        # if (catName, atName) in nestedSearchableD:
        for groupName in groupNameList:
            # get attributes in group
            attributeTupList = self.__docHelper.getSearchGroupAttributes(
                groupName)
            # logger.info("")
            # logger.info("%s (%2d):", groupName, len(attributeTupList))
            for catName, atName in attributeTupList:
                lookupD.setdefault((catName, atName), []).append(groupName)
        #
        logger.info("Search group lookup len %d", len(lookupD))
        for (catName, atName), contextL in attribContextD.items():
            # logger.info("%s.%s contexL %r", catName, atName, contextL)

            if "full-text" in contextL:
                if (catName,
                        atName) in lookupD or (catName,
                                               atName) in nestedSearchableD:
                    continue
                logger.info("%s.%s contexL %r", catName, atName, contextL)

        #

        return True

    @unittest.skipIf(skipFlag, "Troubleshooting test")
    def testExpandSearchGroups(self):
        """Expand search groups and metadata content as these would be display in RCSB search menu."""
        _, afD = self.__getContentFeatures()
        groupNameList = self.__docHelper.getSearchGroups()
        logger.info("Search groups (%d)", len(groupNameList))
        #
        nestedSearchableD = self.__assembleNestedCategorySearchables()
        nestedSearchableD.update(self.__assembleNestedSubCategorySearchables())
        #
        for groupName in groupNameList:
            # get attributes in group
            attributeTupList = self.__docHelper.getSearchGroupAttributes(
                groupName)
            logger.info("")
            logger.info("%s (%2d):", groupName, len(attributeTupList))
            # Get search context and brief descriptions -
            for catName, atName in attributeTupList:
                searchContextTupL = self.__docHelper.getSearchContexts(
                    catName, atName)
                if not searchContextTupL:
                    logger.warning("Missing search context for %s.%s", catName,
                                   atName)
                descriptionText = self.__docHelper.getAttributeDescription(
                    catName, atName, contextType="brief")
                if not descriptionText:
                    logger.warning("Missing brief description %s.%s", catName,
                                   atName)
                #
                fD = afD[catName][atName] if catName in afD and atName in afD[
                    catName] else {}
                logger.debug("%s %s fD %r", catName, atName, fD)
                units = fD["UNITS"] if "UNITS" in fD else None
                #
                uS = ""
                if units:
                    uS = "(units=%s)" % units
                #
                nS = "(%s.%s)" % (catName, atName)
                if (catName, atName) in nestedSearchableD:
                    for dS in nestedSearchableD[(catName, atName)]:
                        logger.info(
                            "- %-55s: %s %s (%s)", dS, nS, uS,
                            ",".join([tup[0] for tup in searchContextTupL]))
                else:
                    logger.info(
                        "- %-55s: %s %s (%s)", descriptionText, nS, uS,
                        ",".join([tup[0] for tup in searchContextTupL]))

        return True

    def __assembleNestedCategorySearchables(self):
        """Assemble dictionary of searchable items in nested categories.

        Returns:
            (dict): {(category, atName): ["Materialized brief description", ... ]
        """
        # cfD, afD = self.__getContentFeatures()
        _, afD = self.__getContentFeatures()
        logger.info("")
        searchableCategoryD = {}
        groupNameList = self.__docHelper.getSearchGroups()
        logger.debug("Search group count (%d)", len(groupNameList))
        for groupName in groupNameList:
            # get attributes in group
            attributeTupList = self.__docHelper.getSearchGroupAttributes(
                groupName)
            for catName, atName in attributeTupList:
                searchableCategoryD.setdefault(catName, []).append(atName)
        logger.debug("Searchable category count (%d)",
                     len(searchableCategoryD))
        #
        retD = {}
        for catName in searchableCategoryD:
            nestedContextDL = self.__docHelper.getNestedContexts(catName)
            if not nestedContextDL:
                # not nested skip
                continue
            elif len(nestedContextDL) > 1:
                logger.warning("Multiple nested contexts for category %s",
                               catName)
            #
            for nestedContextD in nestedContextDL:
                contextPath = nestedContextD[
                    "FIRST_CONTEXT_PATH"] if "FIRST_CONTEXT_PATH" in nestedContextD else None
                if not contextPath:
                    logger.warning(
                        "Missing context path for nested category %s", catName)
                    continue
                #
                contextName = nestedContextD["CONTEXT_NAME"]

                #
                cpCatName = contextPath.split(".")[0]
                cpAtName = contextPath.split(".")[1]
                nestedPathSearchContext = self.__docHelper.getSearchContexts(
                    cpCatName, cpAtName)
                logger.debug("Nested (%r) context path for %r %r", contextName,
                             cpCatName, cpAtName)
                if not nestedPathSearchContext:
                    logger.warning(
                        "Missing nested (%r) search context for %r %r",
                        contextName, cpCatName, cpAtName)
                #
                nfD = afD[cpCatName][
                    cpAtName] if cpCatName in afD and cpAtName in afD[
                        cpCatName] else {}
                logger.debug("FeatureD %r", nfD)
                # --
                enumMapD = {}
                enumDL = nfD["ENUMS_ANNOTATED"]
                if not enumDL:
                    logger.warning("Missing nested enums %s.%s", cpCatName,
                                   cpAtName)
                else:
                    logger.debug("All context enums count %d", len(enumDL))
                    for enumD in enumDL:
                        logger.info("%s.%s enumD %r", cpCatName, cpAtName,
                                    enumD)
                        if "name" not in enumD:
                            logger.warning(
                                "Missing nested enum (name) for %s.%s",
                                cpCatName, cpAtName)
                    #
                    enumMapD = {
                        enumD["value"]:
                        enumD["name"] if "name" in enumD else enumD["detail"]
                        for enumD in enumDL
                    }
                # --
                nestedDescriptionText = self.__docHelper.getAttributeDescription(
                    cpCatName, cpAtName, contextType="brief")
                if not nestedDescriptionText:
                    logger.warning("Missing brief nested description %s.%s",
                                   cpCatName, cpAtName)
                else:
                    logger.debug("Nested context description: %r",
                                 nestedDescriptionText)
                # --
                cvDL = nestedContextD["CONTEXT_ATTRIBUTE_VALUES"] if "CONTEXT_ATTRIBUTE_VALUES" in nestedContextD else []
                if not cvDL:
                    logger.warning("Missing context attribute values for %s",
                                   catName)
                    # if no context values defined then use: all enums x searchable attributes in this category
                    #
                    # Template:  enum detail + search attribute brief description text
                    for enumD in enumDL:
                        for atName in searchableCategoryD[catName]:
                            briefDescr = self.__docHelper.getAttributeDescription(
                                catName, atName, contextType="brief")
                            # subCategories = nfD["SUB_CATEGORIES"] if "SUB_CATEGORIES" in nfD else None
                            tS = enumD["detail"] + " " + briefDescr
                            retD.setdefault((catName, atName), []).append(tS)
                else:
                    # Only use context values from the full enum list with specified search paths.
                    #
                    # Template:  context value (enum detail) + search path attribute (brief description text)
                    #  cVDL.append({"CONTEXT_VALUE": tD["CONTEXT_VALUE"], "SEARCH_PATHS": tD["SEARCH_PATHS"]})
                    #
                    for cvD in cvDL:
                        enumV = cvD["CONTEXT_VALUE"]
                        enumDetail = enumMapD[
                            enumV] if enumV in enumMapD else None
                        if not enumDetail:
                            logger.warning(
                                "%s %s missing detail for enum value %s",
                                catName, cpAtName, enumV)
                        for sp in cvD["SEARCH_PATHS"]:
                            if sp.count(".") > 1:
                                k = sp.rfind(".")
                                sp = sp[:k] + "_" + sp[k + 1:]
                            cnS = sp.split(".")[0]
                            anS = sp.split(".")[1]
                            briefDescr = self.__docHelper.getAttributeDescription(
                                cnS, anS, contextType="brief")
                            tS = enumDetail + " " + briefDescr
                            logger.debug("%s,%s tS %r", cnS, anS, tS)
                            retD.setdefault((cnS, anS), []).append(tS)
                        for aD in cvD["ATTRIBUTES"]:
                            sp = aD["PATH"]
                            if sp.count(".") > 1:
                                k = sp.rfind(".")
                                sp = sp[:k] + "_" + sp[k + 1:]
                            cnS = sp.split(".")[0]
                            anS = sp.split(".")[1]
                            briefDescr = self.__docHelper.getAttributeDescription(
                                cnS, anS, contextType="brief")
                            tS = enumDetail + " " + briefDescr
                            logger.debug("%s,%s tS %r", cnS, anS, tS)
                            retD.setdefault((cnS, anS), []).append(tS)
                            exL = aD["EXAMPLES"]
                            logger.info("%s,%s sp %r examplesL %r", cnS, anS,
                                        sp, exL)
        #
        for k, vL in retD.items():
            for v in vL:
                logger.debug("%s : %r", k, v)
        #
        return retD

    def __assembleNestedSubCategorySearchables(self):
        """Assemble dictionary of searchable items in nested subcategories.

        Returns:
            (dict): {(category, atName): ["Materialized brief description", ... ]
        """
        _, afD = self.__getContentFeatures()
        # logger.info("")
        searchableCategoryD = {}
        groupNameList = self.__docHelper.getSearchGroups()
        logger.debug("Search group count (%d)", len(groupNameList))
        for groupName in groupNameList:
            # get attributes in group
            attributeTupList = self.__docHelper.getSearchGroupAttributes(
                groupName)
            for catName, atName in attributeTupList:
                searchableCategoryD.setdefault(catName, []).append(atName)
        logger.debug("Searchable category count (%d)",
                     len(searchableCategoryD))
        #
        subcatNestedD = {}
        tD = self.__docHelper.getAllSubCategoryNestedContexts()
        for k, v in tD.items():
            for kk, vv in v.items():
                if kk in subcatNestedD:
                    logger.warning(
                        "Duplicate nested subcategory specifications in %r %r",
                        k, kk)
                # only take cases with an context path ...
                if "FIRST_CONTEXT_PATH" in vv:
                    subcatNestedD[kk[0]] = (kk[1], vv)
        #  cat = (subcat, {nested context dict})
        #
        retD = {}
        for catName in searchableCategoryD:
            if catName not in subcatNestedD:
                continue
            subCatName, nestedContextD = subcatNestedD[catName]
            #
            contextPath = nestedContextD[
                "FIRST_CONTEXT_PATH"] if "FIRST_CONTEXT_PATH" in nestedContextD else None
            if not contextPath:
                logger.warning("Missing context path for nested category %s",
                               catName)
                continue
            #
            if contextPath.count(".") > 1:
                k = contextPath.rfind(".")
                contextPath = contextPath[:k] + "_" + contextPath[k + 1:]
            logger.debug("%s subcategory %s context path %r", catName,
                         subCatName, contextPath)
            contextName = nestedContextD["CONTEXT_NAME"]
            cpCatName = contextPath.split(".")[0]
            cpAtName = contextPath.split(".")[1]
            nestedPathSearchContext = self.__docHelper.getSearchContexts(
                cpCatName, cpAtName)
            logger.debug("Nested (%r) context path for %r %r", contextName,
                         cpCatName, cpAtName)
            if not nestedPathSearchContext:
                logger.warning("Missing nested (%r) search context for %r %r",
                               contextName, cpCatName, cpAtName)
            #
            nfD = afD[cpCatName][
                cpAtName] if cpCatName in afD and cpAtName in afD[
                    cpCatName] else {}
            logger.debug("FeatureD %r", nfD)
            # --
            enumMapD = {}
            enumDL = nfD["ENUMS_ANNOTATED"]
            if not enumDL:
                logger.warning("Missing nested enums %s.%s", cpCatName,
                               cpAtName)
            else:
                logger.debug("All context enums count %d", len(enumDL))
                for enumD in enumDL:
                    if "name" not in enumD:
                        logger.warning("Missing nested enum (name) for %s.%s",
                                       cpCatName, cpAtName)
                #
                enumMapD = {
                    enumD["value"]:
                    enumD["name"] if "name" in enumD else enumD["detail"]
                    for enumD in enumDL
                }
            # --
            nestedDescriptionText = self.__docHelper.getAttributeDescription(
                cpCatName, cpAtName, contextType="brief")
            if not nestedDescriptionText:
                logger.warning("Missing brief nested description %s.%s",
                               cpCatName, cpAtName)
            else:
                logger.debug("Nested context description: %r",
                             nestedDescriptionText)
                # --
            cvDL = nestedContextD["CONTEXT_ATTRIBUTE_VALUES"] if "CONTEXT_ATTRIBUTE_VALUES" in nestedContextD else []
            #
            if not cvDL:
                logger.warning("Missing context attribute values for %s",
                               catName)
                # if no context values defined then use: all enums x searchable attributes in this category
                #
                # Template:  enum detail + search attribute brief description text
                for enumD in enumDL:
                    for atName in searchableCategoryD[catName]:
                        nnfD = afD[catName][atName]
                        subCatL = [d["id"] for d in nnfD["SUB_CATEGORIES"]
                                   ] if "SUB_CATEGORIES" in nnfD else None
                        logger.debug("%s.%s %s subCatL %r", catName, atName,
                                     subCatName, subCatL)
                        if subCatL and subCatName in subCatL:
                            briefDescr = self.__docHelper.getAttributeDescription(
                                catName, atName, contextType="brief")
                            tS = enumD["detail"] + " " + briefDescr
                            retD.setdefault((catName, atName), []).append(tS)
            else:
                # Only use context values from the full enum list with specified search paths.
                #
                # Template:  context value (enum detail) + search path attribute (brief description text)
                #  cVDL.append({"CONTEXT_VALUE": tD["CONTEXT_VALUE"], "SEARCH_PATHS": tD["SEARCH_PATHS"]})
                #
                for cvD in cvDL:
                    enumV = cvD["CONTEXT_VALUE"]
                    enumDetail = enumMapD[enumV] if enumV in enumMapD else None
                    if not enumDetail:
                        logger.warning(
                            "%s %s missing detail for enum value %s", catName,
                            cpAtName, enumV)
                    for sp in cvD["SEARCH_PATHS"]:
                        if sp.count(".") > 1:
                            k = sp.rfind(".")
                            sp = sp[:k] + "_" + sp[k + 1:]
                        cnS = sp.split(".")[0]
                        anS = sp.split(".")[1]
                        briefDescr = self.__docHelper.getAttributeDescription(
                            cnS, anS, contextType="brief")
                        tS = enumDetail + " " + briefDescr
                        retD.setdefault((cnS, anS), []).append(tS)
                    for aD in cvD["ATTRIBUTES"]:
                        sp = aD["PATH"]
                        if sp.count(".") > 1:
                            k = sp.rfind(".")
                            sp = sp[:k] + "_" + sp[k + 1:]
                        cnS = sp.split(".")[0]
                        anS = sp.split(".")[1]
                        briefDescr = self.__docHelper.getAttributeDescription(
                            cnS, anS, contextType="brief")
                        tS = enumDetail + " " + briefDescr
                        retD.setdefault((cnS, anS), []).append(tS)
                        exL = aD["EXAMPLES"]
                        logger.debug("%s,%s sp %r exL %r", cnS, anS, sp, exL)
        #
        for k, vL in retD.items():
            for v in vL:
                logger.debug("%s : %r", k, v)
        #
        return retD

    def __getContentFeatures(self):
        """Get category and attribute features"""
        try:
            cH = ContentDefinitionHelper(cfgOb=self.__cfgOb)
            dictApi = self.__dP.getApiByLocators(dictLocators=[
                self.__pathPdbxDictionaryFile, self.__pathRcsbDictionaryFile
            ])
            # logger.info("units = %r", dictApi.getUnits("pdbx_nmr_spectrometer", "manufacturer"))
            sdi = ContentDefinition(dictApi,
                                    databaseName="pdbx_core",
                                    contentDefHelper=cH)
            catNameL = sdi.getCategories()
            cfD = {}
            afD = {}
            for catName in catNameL:
                cfD[catName] = sdi.getCategoryFeatures(catName)
                afD[catName] = sdi.getAttributeFeatures(catName)
            #
            return cfD, afD
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return None, None

Beispiel #9

Datei anzeigen

Datei: testRepoHoldingsDataPrepValidate.py Projekt: rcsb/py-rcsb_db

class RepoHoldingsDataPrepValidateTests(unittest.TestCase):
    def setUp(self):
        self.__verbose = True
        #
        self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
        self.__pathConfig = os.path.join(TOPDIR, "rcsb", "db", "config",
                                         "exdb-config-example.yml")
        self.__cachePath = os.path.join(TOPDIR, "CACHE")
        self.__updateId = "2018_25"
        self.__export = False
        #
        configName = "site_info_configuration"
        self.__cfgOb = ConfigUtil(configPath=self.__pathConfig,
                                  defaultSectionName=configName,
                                  mockTopPath=self.__mockTopPath)
        self.__schP = SchemaProvider(self.__cfgOb,
                                     self.__cachePath,
                                     useCache=True)
        self.__sandboxPath = self.__cfgOb.getPath("RCSB_EXCHANGE_SANDBOX_PATH",
                                                  sectionName=configName)
        #
        self.__mU = MarshalUtil(workPath=self.__cachePath)

        self.__startTime = time.time()
        logger.debug("Starting %s at %s", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()))

    def tearDown(self):
        endTime = time.time()
        logger.debug("Completed %s at %s (%.4f seconds)", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
                     endTime - self.__startTime)

    def testValidateOptsStrict(self):
        updateId = self.__updateId
        schemaLevel = "full"
        eCount = self.__testValidateOpts(updateId, schemaLevel=schemaLevel)
        logger.info("Total validation errors schema level %s : %d",
                    schemaLevel, eCount)
        self.assertTrue(eCount <= 1)

    @unittest.skip("Troubleshooting test")
    def testValidateOptsMin(self):
        updateId = self.__updateId
        schemaLevel = "min"
        eCount = self.__testValidateOpts(updateId, schemaLevel=schemaLevel)
        logger.info("Total validation errors schema level %s : %d",
                    schemaLevel, eCount)
        self.assertTrue(eCount <= 1)

    def __testValidateOpts(self, updateId, schemaLevel="full"):
        schemaNames = ["repository_holdings"]
        collectionNames = {
            "repository_holdings": [
                "repository_holdings_update_entry",
                "repository_holdings_current_entry",
                "repository_holdings_unreleased_entry",
                "repository_holdings_removed_entry",
                "repository_holdings_combined_entry",
            ],
            "entity_sequence_clusters":
            ["cluster_members", "cluster_provenance", "entity_members"],
        }
        #
        eCount = 0
        for schemaName in schemaNames:
            for collectionName in collectionNames[schemaName]:
                _ = self.__schP.makeSchemaDef(schemaName,
                                              dataTyping="ANY",
                                              saveSchema=True)
                cD = self.__schP.makeSchema(schemaName,
                                            collectionName,
                                            encodingType="JSON",
                                            level=schemaLevel,
                                            saveSchema=True)
                dL = self.__getRepositoryHoldingsDocuments(
                    schemaName, collectionName, updateId)
                if self.__export:
                    savePath = os.path.join(HERE, "test-output",
                                            collectionName + ".json")
                    self.__mU.doExport(savePath, dL, fmt="json", indent=3)
                # Raises exceptions for schema compliance.
                Draft4Validator.check_schema(cD)
                #
                valInfo = Draft4Validator(cD, format_checker=FormatChecker())
                for ii, dD in enumerate(dL):
                    logger.debug("Schema %s collection %s document %d",
                                 schemaName, collectionName, ii)
                    try:
                        cCount = 0
                        for error in sorted(valInfo.iter_errors(dD), key=str):
                            logger.info(
                                "schema %s collection %s path %s error: %s",
                                schemaName, collectionName, error.path,
                                error.message)
                            logger.info(">>>")
                            logger.info(">>> failing object is %r", dD)
                            logger.info(">>>")
                            eCount += 1
                            cCount += 1
                        #
                        logger.debug("schema %s collection %s count %d",
                                     schemaName, collectionName, cCount)
                    except Exception as e:
                        logger.exception("Validation error %s", str(e))

        return eCount

    def __getRepositoryHoldingsDocuments(self, schemaName, collectionName,
                                         updateId):
        """Test loading and processing operations for legacy holdings and status data."""
        rL = []
        try:
            rhdp = RepoHoldingsDataPrep(cfgOb=self.__cfgOb,
                                        sandboxPath=self.__sandboxPath,
                                        workPath=self.__cachePath)
            if collectionName == "repository_holdings_update_entry":
                rL = rhdp.getHoldingsUpdateEntry(updateId=updateId)
                self.assertGreaterEqual(len(rL), 10)
                logger.debug("update data length %r", len(rL))
            #
            elif collectionName == "repository_holdings_current_entry":
                rL = rhdp.getHoldingsCurrentEntry(updateId=updateId)
                self.assertGreaterEqual(len(rL), 10)
                logger.debug("holdings data length %r", len(rL))
            #
            elif collectionName == "repository_holdings_unreleased_entry":
                rL = rhdp.getHoldingsUnreleasedEntry(updateId=updateId)
                self.assertGreaterEqual(len(rL), 10)
                logger.debug("unreleased data length %r", len(rL))
            #
            elif collectionName in ["repository_holdings_removed_entry"]:
                rL = rhdp.getHoldingsRemovedEntry(updateId=updateId)
                if collectionName == "repository_holdings_removed":
                    self.assertGreaterEqual(len(rL), 10)
                    logger.debug("removed data length %r", len(rL))
            elif collectionName == "repository_holdings_combined_entry":
                rL = rhdp.getHoldingsCombinedEntry(updateId=updateId)
                self.assertGreaterEqual(len(rL), 10)
                logger.debug("holdings data length %r", len(rL))

            #
        except Exception as e:
            logger.exception("%s %s failing with %s", schemaName,
                             collectionName, str(e))
            self.fail()

        return rL

Beispiel #10

Datei anzeigen

class ClusterDataPrepValidateTests(unittest.TestCase):
    def setUp(self):
        self.__verbose = True
        #
        self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
        self.__pathConfig = os.path.join(TOPDIR, "rcsb", "db", "config", "exdb-config-example.yml")
        self.__cachePath = os.path.join(TOPDIR, "CACHE")
        self.__updateId = "2018_25"
        #
        configName = "site_info_configuration"
        self.__cfgOb = ConfigUtil(configPath=self.__pathConfig, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
        self.__schP = SchemaProvider(self.__cfgOb, self.__cachePath, useCache=True)
        #
        self.__sandboxPath = self.__cfgOb.getPath("RCSB_EXCHANGE_SANDBOX_PATH", sectionName=configName)
        #
        self.__dataSetId = "2018_23"
        self.__pathClusterData = self.__cfgOb.getPath("RCSB_SEQUENCE_CLUSTER_DATA_PATH", sectionName=configName)
        # self.__levels = ['100', '95', '90', '70', '50', '30']
        self.__levels = ["100"]
        #
        self.__startTime = time.time()
        logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))

    def tearDown(self):
        endTime = time.time()
        logger.debug("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)

    def testValidateOptsStrict(self):
        updateId = self.__updateId
        validationLevel = "full"
        eCount = self.__testValidateOpts(updateId, validationLevel=validationLevel)
        logger.info("Total validation errors validation level %s : %d", validationLevel, eCount)
        self.assertTrue(eCount <= 1)

    def __testValidateOpts(self, updateId, validationLevel="full"):
        _ = updateId
        databaseNames = ["sequence_clusters"]
        collectionNames = {"sequence_clusters": ["cluster_provenance", "cluster_members", "entity_members"]}
        #
        eCount = 0
        for databaseName in databaseNames:
            for collectionName in collectionNames[databaseName]:
                _ = self.__schP.makeSchemaDef(databaseName, dataTyping="ANY", saveSchema=True)
                cD = self.__schP.makeSchema(databaseName, collectionName, encodingType="JSON", level=validationLevel, saveSchema=True)
                #
                dL = self.__getSequenceClusterData(collectionName, levels=self.__levels, dataSetId=self.__dataSetId, dataLocator=self.__pathClusterData)
                # Raises exceptions for schema compliance.
                Draft4Validator.check_schema(cD)
                #
                valInfo = Draft4Validator(cD, format_checker=FormatChecker())
                for _, dD in enumerate(dL):
                    # logger.debug("Schema %s collection %s document %d" % (schemaName, collectionName, ii))
                    try:
                        cCount = 0
                        for error in sorted(valInfo.iter_errors(dD), key=str):
                            logger.info("schema %s collection %s path %s error: %s", databaseName, collectionName, error.path, error.message)
                            logger.info(">>> failing object is %r", dD)
                            eCount += 1
                            cCount += 1
                        #
                        logger.debug("schema %s collection %s count %d", databaseName, collectionName, cCount)
                    except Exception as e:
                        logger.exception("Validation error %s", str(e))

        return eCount

    def __fetchProvenance(self):
        """Test case for fetching a provenance dictionary content."""
        try:
            provKeyName = "rcsb_entity_sequence_cluster_prov"
            provU = ProvenanceProvider(self.__cfgOb, self.__cachePath, useCache=True)
            pD = provU.fetch()
            return pD[provKeyName] if provKeyName in pD else {}
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def __getSequenceClusterData(self, collectionName, dataSetId=None, dataLocator=None, levels=None):
        """Test extraction on an example sequence cluster data set."""
        try:
            #
            if collectionName == "cluster_provenance":
                return [self.__fetchProvenance()]
            #
            entitySchemaName = "rcsb_entity_sequence_cluster_list"
            clusterSchemaName = "rcsb_entity_sequence_cluster_identifer_list"
            cdp = ClusterDataPrep(workPath=self.__cachePath, entitySchemaName=entitySchemaName, clusterSchemaName=clusterSchemaName)
            cifD, docBySequenceD, docByClusterD = cdp.extract(dataSetId, clusterSetLocator=dataLocator, levels=levels, clusterType="entity")
            self.assertEqual(len(cifD), 1)
            self.assertEqual(len(docBySequenceD), 1)
            self.assertEqual(len(docByClusterD), 1)
            if collectionName == "entity_members":
                return docBySequenceD[entitySchemaName]
            elif collectionName == "cluster_members":
                return docByClusterD[clusterSchemaName]

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
        return None

Beispiel #11

Datei anzeigen

def main():
    parser = argparse.ArgumentParser()
    #
    defaultConfigName = "site_info_configuration"
    #
    parser.add_argument(
        "--full",
        default=True,
        action="store_true",
        help="Fresh full load in a new tables/collections (Default)")
    #
    parser.add_argument("--etl_entity_sequence_clusters",
                        default=False,
                        action="store_true",
                        help="ETL entity sequence clusters")
    parser.add_argument("--etl_repository_holdings",
                        default=False,
                        action="store_true",
                        help="ETL repository holdings")
    # parser.add_argument("--etl_chemref", default=False, action="store_true", help="ETL integrated chemical reference data")
    # parser.add_argument("--etl_tree_node_lists", default=False, action='store_true', help="ETL tree node lists")

    parser.add_argument(
        "--data_set_id",
        default=None,
        help="Data set identifier (default= 2018_14 for current week)")
    #
    parser.add_argument(
        "--sequence_cluster_data_path",
        default=None,
        help="Sequence cluster data path (default set by configuration")
    parser.add_argument(
        "--sandbox_data_path",
        default=None,
        help="Date exchange sandboxPath data path (default set by configuration"
    )

    #
    parser.add_argument("--config_path",
                        default=None,
                        help="Path to configuration options file")
    parser.add_argument("--config_name",
                        default=defaultConfigName,
                        help="Configuration section name")

    parser.add_argument("--db_type",
                        default="mongo",
                        help="Database server type (default=mongo)")

    # parser.add_argument("--document_style", default="rowwise_by_name_with_cardinality",
    #                    help="Document organization (rowwise_by_name_with_cardinality|rowwise_by_name|columnwise_by_name|rowwise_by_id|rowwise_no_name")
    parser.add_argument("--read_back_check",
                        default=False,
                        action="store_true",
                        help="Perform read back check on all documents")
    #
    parser.add_argument("--num_proc",
                        default=2,
                        help="Number of processes to execute (default=2)")
    parser.add_argument("--chunk_size",
                        default=10,
                        help="Number of files loaded per process")
    parser.add_argument("--document_limit",
                        default=None,
                        help="Load document limit for testing")
    parser.add_argument("--prune_document_size",
                        default=None,
                        help="Prune large documents to this size limit (MB)")
    parser.add_argument("--debug",
                        default=False,
                        action="store_true",
                        help="Turn on verbose logging")
    parser.add_argument("--mock",
                        default=False,
                        action="store_true",
                        help="Use MOCK repository configuration for testing")
    parser.add_argument("--cache_path",
                        default=None,
                        help="Path containing cache directories")
    # parser.add_argument("--use_cache", default=False, action="store_true", help="Use cache files from remote resources")
    parser.add_argument("--rebuild_cache",
                        default=False,
                        action="store_true",
                        help="Rebuild cached resource files")
    # parser.add_argument("--rebuild_schema", default=False, action="store_true", help="Rebuild schema on-the-fly if not cached")
    #
    #
    args = parser.parse_args()
    #
    debugFlag = args.debug
    if debugFlag:
        logger.setLevel(logging.DEBUG)
    # ----------------------- - ----------------------- - ----------------------- - ----------------------- - ----------------------- -
    #                                       Configuration Details
    configPath = args.config_path
    configName = args.config_name
    # useCache = args.use_cache
    if not configPath:
        configPath = os.getenv("DBLOAD_CONFIG_PATH", None)
    try:
        if os.access(configPath, os.R_OK):
            os.environ["DBLOAD_CONFIG_PATH"] = configPath
            logger.info("Using configuation path %s (%s)", configPath,
                        configName)
        else:
            logger.error("Missing or access issue with config file %r",
                         configPath)
            exit(1)
        mockTopPath = os.path.join(TOPDIR, "rcsb",
                                   "mock-data") if args.mock else None
        cfgOb = ConfigUtil(configPath=configPath,
                           defaultSectionName=defaultConfigName,
                           mockTopPath=mockTopPath)
        if configName != defaultConfigName:
            cfgOb.replaceSectionName(defaultConfigName, configName)
        #
    except Exception as e:
        logger.error("Missing or access issue with config file %r with %s",
                     configPath, str(e))
        exit(1)

    #
    try:
        readBackCheck = args.read_back_check
        tU = TimeUtil()
        dataSetId = args.data_set_id if args.data_set_id else tU.getCurrentWeekSignature(
        )
        seqDataLocator = args.sequence_cluster_data_path if args.sequence_cluster_data_path else cfgOb.getPath(
            "RCSB_SEQUENCE_CLUSTER_DATA_PATH", sectionName=configName)
        sandboxPath = args.sandbox_data_path if args.sandbox_data_path else cfgOb.getPath(
            "RCSB_EXCHANGE_SANDBOX_PATH", sectionName=configName)
        numProc = int(args.num_proc)
        chunkSize = int(args.chunk_size)
        documentLimit = int(
            args.document_limit) if args.document_limit else None

        loadType = "full" if args.full else "replace"
        # loadType = 'replace' if args.replace else 'full'

        cachePath = args.cache_path if args.cache_path else "."
        rebuildCache = args.rebuild_cache if args.rebuild_cache else False
        # rebuildSchemaFlag = args.rebuild_schema if args.rebuild_schema else False
        #
        # if args.document_style not in ['rowwise_by_name', 'rowwise_by_name_with_cardinality', 'columnwise_by_name', 'rowwise_by_id', 'rowwise_no_name']:
        #    logger.error("Unsupported document style %s" % args.document_style)

        if args.db_type != "mongo":
            logger.error("Unsupported database server type %s", args.db_type)
    except Exception as e:
        logger.exception("Argument processing problem %s", str(e))
        parser.print_help(sys.stderr)
        exit(1)
    # ----------------------- - ----------------------- - ----------------------- - ----------------------- - ----------------------- -
    #  Rebuild or check resource cache
    ok = buildResourceCache(cfgOb,
                            configName,
                            cachePath,
                            rebuildCache=rebuildCache)
    if not ok:
        logger.error("Cache rebuild or check failure (rebuild %r) %r",
                     rebuildCache, cachePath)
        exit(1)
    ##
    if args.db_type == "mongo":
        if args.etl_entity_sequence_clusters:
            cw = SequenceClustersEtlWorker(cfgOb,
                                           numProc=numProc,
                                           chunkSize=chunkSize,
                                           documentLimit=documentLimit,
                                           verbose=debugFlag,
                                           readBackCheck=readBackCheck,
                                           workPath=cachePath)
            ok = cw.etl(dataSetId, seqDataLocator, loadType=loadType)
            okS = loadStatus(cw.getLoadStatus(),
                             cfgOb,
                             cachePath,
                             readBackCheck=readBackCheck)

        if args.etl_repository_holdings:
            rhw = RepoHoldingsEtlWorker(cfgOb,
                                        sandboxPath,
                                        cachePath,
                                        numProc=numProc,
                                        chunkSize=chunkSize,
                                        documentLimit=documentLimit,
                                        verbose=debugFlag,
                                        readBackCheck=readBackCheck)
            ok = rhw.load(dataSetId, loadType=loadType)
            okS = loadStatus(rhw.getLoadStatus(),
                             cfgOb,
                             cachePath,
                             readBackCheck=readBackCheck)

        logger.info("Operation completed with status %r " % ok and okS)

Beispiel #12

Datei anzeigen

Datei: RepoLoadWorkflow.py Projekt: MShaffar19/py-rcsb_db

class RepoLoadWorkflow(object):
    def __init__(self, **kwargs):
        #  Configuration Details
        configPath = kwargs.get("configPath", "exdb-config-example.yml")
        self.__configName = kwargs.get("configName", "site_info_configuration")
        mockTopPath = kwargs.get("mockTopPath", None)
        self.__cfgOb = ConfigUtil(configPath=configPath,
                                  defaultSectionName=self.__configName,
                                  mockTopPath=mockTopPath)
        #
        self.__cachePath = kwargs.get("cachePath", ".")
        self.__cachePath = os.path.abspath(self.__cachePath)
        self.__debugFlag = kwargs.get("debugFlag", False)
        if self.__debugFlag:
            logger.setLevel(logging.DEBUG)
        #
        #  Rebuild or check resource cache
        # rebuildCache = kwargs.get("rebuildCache", False)
        # self.__cacheStatus = self.buildResourceCache(rebuildCache=rebuildCache)
        # logger.debug("Cache status if %r", self.__cacheStatus)
        #

    def load(self, op, **kwargs):
        # if not self.__cacheStatus:
        #    logger.error("Resource cache test or rebuild has failed - exiting")
        #    return False
        # argument processing
        if op not in [
                "pdbx-loader", "etl-repository-holdings",
                "etl-entity-sequence-clusters"
        ]:
            logger.error("Unsupported operation %r - exiting", op)
            return False
        try:
            readBackCheck = kwargs.get("readBackCheck", False)
            numProc = int(kwargs.get("numProc", 1))
            chunkSize = int(kwargs.get("chunkSize", 10))
            fileLimit = int(
                kwargs.get("fileLimit")) if "fileLimit" in kwargs else None
            documentLimit = int(kwargs.get(
                "documentLimit")) if "documentLimit" in kwargs else None
            failedFilePath = kwargs.get("failFileListPath", None)
            loadFileListPath = kwargs.get("loadFileListPath", None)
            saveInputFileListPath = kwargs.get("saveFileListPath", None)
            schemaLevel = kwargs.get("schemaLevel",
                                     "min") if kwargs.get("schemaLevel") in [
                                         "min", "full"
                                     ] else "min"
            loadType = kwargs.get("loadType", "full")  # or replace
            updateSchemaOnReplace = kwargs.get("updateSchemaOnReplace", True)
            pruneDocumentSize = float(
                kwargs.get("pruneDocumentSize"
                           )) if "pruneDocumentSize" in kwargs else None

            # "Document organization (rowwise_by_name_with_cardinality|rowwise_by_name|columnwise_by_name|rowwise_by_id|rowwise_no_name",
            documentStyle = kwargs.get("documentStyle",
                                       "rowwise_by_name_with_cardinality")
            dbType = kwargs.get("dbType", "mongo")
            #
            databaseName = kwargs.get("databaseName", None)
            databaseNameList = self.__cfgOb.get(
                "DATABASE_NAMES_ALL",
                sectionName="database_catalog_configuration").split(",")
            collectionNameList = kwargs.get("collectionNameList", None)
            mergeValidationReports = kwargs.get("mergeValidationReports", True)
            #
            tU = TimeUtil()
            dataSetId = kwargs.get(
                "dataSetId"
            ) if "dataSetId" in kwargs else tU.getCurrentWeekSignature()
            seqDataLocator = self.__cfgOb.getPath(
                "RCSB_SEQUENCE_CLUSTER_DATA_PATH",
                sectionName=self.__configName)
            sandboxPath = self.__cfgOb.getPath("RCSB_EXCHANGE_SANDBOX_PATH",
                                               sectionName=self.__configName)

        except Exception as e:
            logger.exception(
                "Argument and configuration processing failing with %s",
                str(e))
            return False
        #

        if op == "pdbx-loader" and dbType == "mongo" and databaseName in databaseNameList:
            okS = True
            try:
                inputPathList = None
                if loadFileListPath:
                    mu = MarshalUtil(workPath=self.__cachePath)
                    inputPathList = mu.doImport(loadFileListPath, fmt="list")
                    if not inputPathList:
                        logger.error(
                            "Operation %r missing or empty input file path list %s - exiting",
                            op, loadFileListPath)
                        return False
            except Exception as e:
                logger.exception(
                    "Operation %r processing input path list failing with %s",
                    op, str(e))
                return False
            #
            try:
                mw = PdbxLoader(
                    self.__cfgOb,
                    self.__cachePath,
                    resourceName="MONGO_DB",
                    numProc=numProc,
                    chunkSize=chunkSize,
                    fileLimit=fileLimit,
                    verbose=self.__debugFlag,
                    readBackCheck=readBackCheck,
                )
                ok = mw.load(
                    databaseName,
                    collectionLoadList=collectionNameList,
                    loadType=loadType,
                    inputPathList=inputPathList,
                    styleType=documentStyle,
                    dataSelectors=["PUBLIC_RELEASE"],
                    failedFilePath=failedFilePath,
                    saveInputFileListPath=saveInputFileListPath,
                    pruneDocumentSize=pruneDocumentSize,
                    validationLevel=schemaLevel,
                    mergeContentTypes=["vrpt"]
                    if mergeValidationReports else None,
                    updateSchemaOnReplace=updateSchemaOnReplace,
                )
                okS = self.loadStatus(mw.getLoadStatus(),
                                      readBackCheck=readBackCheck)
            except Exception as e:
                logger.exception("Operation %r database %r failing with %s",
                                 op, databaseName, str(e))
        elif op == "etl-entity-sequence-clusters" and dbType == "mongo":
            cw = SequenceClustersEtlWorker(self.__cfgOb,
                                           numProc=numProc,
                                           chunkSize=chunkSize,
                                           documentLimit=documentLimit,
                                           verbose=self.__debugFlag,
                                           readBackCheck=readBackCheck,
                                           workPath=self.__cachePath)
            ok = cw.etl(dataSetId, seqDataLocator, loadType=loadType)
            okS = self.loadStatus(cw.getLoadStatus(),
                                  readBackCheck=readBackCheck)
        elif op == "etl-repository-holdings" and dbType == "mongo":
            rhw = RepoHoldingsEtlWorker(
                self.__cfgOb,
                sandboxPath,
                self.__cachePath,
                numProc=numProc,
                chunkSize=chunkSize,
                documentLimit=documentLimit,
                verbose=self.__debugFlag,
                readBackCheck=readBackCheck,
            )
            ok = rhw.load(dataSetId, loadType=loadType)
            okS = self.loadStatus(rhw.getLoadStatus(),
                                  readBackCheck=readBackCheck)

        logger.info("Completed operation %r with status %r", op, ok and okS)

        return ok and okS

    def loadStatus(self, statusList, readBackCheck=True):
        ret = False
        try:
            dl = DocumentLoader(self.__cfgOb,
                                self.__cachePath,
                                "MONGO_DB",
                                numProc=1,
                                chunkSize=2,
                                documentLimit=None,
                                verbose=False,
                                readBackCheck=readBackCheck)
            #
            sectionName = "data_exchange_configuration"
            databaseName = self.__cfgOb.get("DATABASE_NAME",
                                            sectionName=sectionName)
            collectionName = self.__cfgOb.get("COLLECTION_UPDATE_STATUS",
                                              sectionName=sectionName)
            ret = dl.load(databaseName,
                          collectionName,
                          loadType="append",
                          documentList=statusList,
                          indexAttributeList=[
                              "update_id", "database_name", "object_name"
                          ],
                          keyNames=None)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return ret

    def buildResourceCache(self, rebuildCache=False):
        """Generate and cache resource dependencies.
        """
        ret = False
        try:
            useCache = not rebuildCache
            logger.info("Cache setting useCache is %r", useCache)
            rp = DictMethodResourceProvider(self.__cfgOb,
                                            configName=self.__configName,
                                            cachePath=self.__cachePath)
            ret = rp.cacheResources(useCache=useCache)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return ret

Beispiel #13

Datei anzeigen

class RepoHoldingsRemoteLoaderTests(unittest.TestCase):
    def __init__(self, methodName="runTest"):
        super(RepoHoldingsRemoteLoaderTests, self).__init__(methodName)
        self.__verbose = True

    def setUp(self):
        #
        #
        mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
        configPath = os.path.join(TOPDIR, "rcsb", "db", "config",
                                  "exdb-config-example.yml")
        configName = "site_info_configuration"
        self.__cfgOb = ConfigUtil(configPath=configPath,
                                  defaultSectionName=configName,
                                  mockTopPath=mockTopPath)

        self.__resourceName = "MONGO_DB"
        self.__readBackCheck = True
        self.__numProc = 2
        self.__chunkSize = 10
        self.__documentLimit = None
        self.__filterType = "assign-dates"
        #
        self.__cachePath = os.path.join(TOPDIR, "CACHE")
        self.__sandboxPath = self.__cfgOb.getPath("RCSB_EXCHANGE_SANDBOX_PATH",
                                                  sectionName=configName)
        # sample data set
        self.__updateId = "2021_36"
        #
        eiP = EntryInfoProvider(cachePath=self.__cachePath, useCache=True)
        ok = eiP.testCache(minCount=0)
        self.assertTrue(ok)
        ok = eiP.restore(self.__cfgOb, configName, useStash=False, useGit=True)
        self.assertTrue(ok)
        ok = eiP.reload()
        self.assertTrue(ok)

        #
        self.__startTime = time.time()
        logger.debug("Starting %s at %s", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()))

    def tearDown(self):
        endTime = time.time()
        logger.debug("Completed %s at %s (%.4f seconds)", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
                     endTime - self.__startTime)

    def testLoadHoldingsRemote(self):
        """Test case - load legacy repository holdings and status data -

        [repository_holdings]
        DATABASE_NAME=repository_holdings
        DATABASE_VERSION_STRING=v5
        COLLECTION_HOLDINGS_UPDATE=rcsb_repository_holdings_update_entry
        COLLECTION_HOLDINGS_CURRENT=rcsb_repository_holdings_current_entry
        COLLECTION_HOLDINGS_UNRELEASED=rcsb_repository_holdings_unreleased_entry
        COLLECTION_HOLDINGS_REMOVED=rcsb_repository_holdings_removed_entry
        COLLECTION_HOLDINGS_COMBINED=rcsb_repository_holdings_combined_entry

        """
        try:
            sectionName = "repository_holdings_configuration"
            rhdp = RepoHoldingsRemoteDataPrep(cachePath=self.__cachePath,
                                              filterType=self.__filterType)
            #
            dl = DocumentLoader(
                self.__cfgOb,
                self.__cachePath,
                self.__resourceName,
                numProc=self.__numProc,
                chunkSize=self.__chunkSize,
                documentLimit=self.__documentLimit,
                verbose=self.__verbose,
                readBackCheck=self.__readBackCheck,
            )
            #
            databaseName = self.__cfgOb.get("DATABASE_NAME",
                                            sectionName=sectionName)
            logger.info("databaseName %r", databaseName)
            addValues = None
            #
            maxDoc = 5
            dList = rhdp.getHoldingsRemovedEntry(updateId=self.__updateId)
            dList = dList[:maxDoc] if maxDoc else dList
            collectionName = self.__cfgOb.get("COLLECTION_HOLDINGS_REMOVED",
                                              sectionName=sectionName)
            ok = dl.load(databaseName,
                         collectionName,
                         loadType="full",
                         documentList=dList,
                         indexAttributeList=["update_id", "entry_id"],
                         keyNames=None,
                         addValues=addValues)
            logger.info("Collection %r length %d load status %r",
                        collectionName, len(dList), ok)
            self.assertTrue(ok)
            #
            dList = rhdp.getHoldingsUnreleasedEntry(updateId=self.__updateId)
            dList = dList[:maxDoc] if maxDoc else dList
            collectionName = self.__cfgOb.get("COLLECTION_HOLDINGS_UNRELEASED",
                                              sectionName=sectionName)
            ok = dl.load(databaseName,
                         collectionName,
                         loadType="full",
                         documentList=dList,
                         indexAttributeList=["update_id", "entry_id"],
                         keyNames=None,
                         addValues=addValues)
            logger.info("Collection %r length %d load status %r",
                        collectionName, len(dList), ok)
            self.assertTrue(ok)
            #
            dList = rhdp.getHoldingsUpdateEntry(updateId=self.__updateId)
            dList = dList[:maxDoc] if maxDoc else dList
            collectionName = self.__cfgOb.get("COLLECTION_HOLDINGS_UPDATE",
                                              sectionName=sectionName)
            logger.info("collectionName %r", collectionName)
            ok = dl.load(databaseName,
                         collectionName,
                         loadType="full",
                         documentList=dList,
                         indexAttributeList=["update_id", "entry_id"],
                         keyNames=None,
                         addValues=addValues)
            logger.info("Collection %r length %d load status %r",
                        collectionName, len(dList), ok)
            self.assertTrue(ok)
            #
            dList = rhdp.getHoldingsCurrentEntry(updateId=self.__updateId)
            dList = dList[:maxDoc] if maxDoc else dList
            collectionName = self.__cfgOb.get("COLLECTION_HOLDINGS_CURRENT",
                                              sectionName=sectionName)
            ok = dl.load(databaseName,
                         collectionName,
                         loadType="full",
                         documentList=dList,
                         indexAttributeList=["update_id", "entry_id"],
                         keyNames=None,
                         addValues=addValues)
            logger.info("Collection %r length %d load status %r",
                        collectionName, len(dList), ok)
            self.assertTrue(ok)
            #
            dList = rhdp.getHoldingsCombinedEntry(updateId=self.__updateId)
            dList = dList[:maxDoc] if maxDoc else dList
            collectionName = self.__cfgOb.get("COLLECTION_HOLDINGS_COMBINED",
                                              sectionName=sectionName)
            ok = dl.load(databaseName,
                         collectionName,
                         loadType="full",
                         documentList=dList,
                         indexAttributeList=["update_id", "entry_id"],
                         keyNames=None,
                         addValues=addValues)
            logger.info("Collection %r length %d load status %r",
                        collectionName, len(dList), ok)
            self.assertTrue(ok)
            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

Beispiel #14

Datei anzeigen

Datei: testConfigUtil.py Projekt: rcsb/py-rcsb_utils_config

    def testReadYamlConfig(self):
        try:
            cfgOb = ConfigUtil(configPath=self.__inpPathConfigYaml,
                               configFormat="yaml",
                               mockTopPath=self.__mockTopPath)
            ok = cfgOb.appendConfig(self.__inpPathConfigAppendYaml,
                                    configFormat="yaml")
            self.assertTrue(ok)
            #
            sName = "DEFAULT"
            pathBird = cfgOb.getPath("BIRD_REPO_PATH", sectionName=sName)
            pathPdbx = cfgOb.getPath("PDBX_REPO_PATH", sectionName=sName)
            #
            self.assertEqual(
                pathBird, os.path.join(self.__mockTopPath, "MOCK_BIRD_REPO"))
            self.assertEqual(
                pathPdbx, os.path.join(self.__mockTopPath,
                                       "MOCK_PDBX_SANDBOX"))

            pathBird = cfgOb.get("BIRD_REPO_PATH", sectionName=sName)
            pathPdbx = cfgOb.get("PDBX_REPO_PATH", sectionName=sName)

            self.assertEqual(pathBird, "MOCK_BIRD_REPO")
            self.assertEqual(pathPdbx, "MOCK_PDBX_SANDBOX")
            sName = "Section1"
            #
            helperMethod = cfgOb.getHelper("DICT_METHOD_HELPER_MODULE",
                                           sectionName=sName)

            tv = helperMethod.echo("test_value")
            self.assertEqual(tv, "test_value")
            #
            tEnv = "TEST_ENV_VAR"
            tVal = "TEST_ENV_VAR_VALUE"
            os.environ[tEnv] = tVal
            eVal = cfgOb.getEnvValue("ENV_OPTION_A", sectionName=sName)
            self.assertEqual(tVal, eVal)

            ky = "42d13dfc9eb689e48c774aa5af8a7e15dbabcd5041939bef213eb37aed882fd6"
            os.environ["CONFIG_SUPPORT_TOKEN_ENV"] = ky
            #
            un = cfgOb.getSecret("SECRET_TEST_USERNAME",
                                 default=None,
                                 sectionName=sName,
                                 tokenName="CONFIG_SUPPORT_TOKEN")
            pw = cfgOb.getSecret("SECRET_TEST_PASSWORD",
                                 default=None,
                                 sectionName=sName,
                                 tokenName="CONFIG_SUPPORT_TOKEN")
            self.assertEqual(un, "testuser")
            self.assertEqual(pw, "testuserpassword")
            #
            un = cfgOb.get("_TEST_USERNAME",
                           default=None,
                           sectionName=sName,
                           tokenName="CONFIG_SUPPORT_TOKEN")
            pw = cfgOb.get("_TEST_PASSWORD",
                           default=None,
                           sectionName=sName,
                           tokenName="CONFIG_SUPPORT_TOKEN")
            self.assertEqual(un, "testuser")
            self.assertEqual(pw, "testuserpassword")
            #
            un = cfgOb.getSecret("_TEST_USERNAME",
                                 default=None,
                                 sectionName=sName,
                                 tokenName="CONFIG_SUPPORT_TOKEN")
            pw = cfgOb.getSecret("_TEST_PASSWORD",
                                 default=None,
                                 sectionName=sName,
                                 tokenName="CONFIG_SUPPORT_TOKEN")
            self.assertEqual(un, "testuser")
            self.assertEqual(pw, "testuserpassword")
            #
            sName = "Section2"
            un = cfgOb.getSecret("_TEST_USERNAME",
                                 default=None,
                                 sectionName=sName,
                                 tokenName="CONFIG_SUPPORT_TOKEN")
            pw = cfgOb.getSecret("_TEST_PASSWORD",
                                 default=None,
                                 sectionName=sName,
                                 tokenName="CONFIG_SUPPORT_TOKEN")
            self.assertEqual(un, "testuser")
            self.assertEqual(pw, "testuserpassword")
            # test fallback
            # CLEAR_TEXT_USERNAME: testuser2
            # CLEAR_TEXT_PASSWORD: changeme2
            un = cfgOb.get("_CLEAR_TEXT_USERNAME",
                           default=None,
                           sectionName=sName,
                           tokenName="CONFIG_SUPPORT_TOKEN")
            pw = cfgOb.get("_CLEAR_TEXT_PASSWORD",
                           default=None,
                           sectionName=sName,
                           tokenName="CONFIG_SUPPORT_TOKEN")
            self.assertEqual(un, "testuser2")
            self.assertEqual(pw, "changeme2")
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
        return {}

Beispiel #15

Datei anzeigen

Datei: testSequenceClusterLoader.py Projekt: MShaffar19/py-rcsb_db

class SequenceClusterLoaderTests(unittest.TestCase):
    def __init__(self, methodName="runTest"):
        super(SequenceClusterLoaderTests, self).__init__(methodName)
        self.__verbose = True

    def setUp(self):
        #
        #
        mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
        configPath = os.path.join(TOPDIR, "rcsb", "db", "config",
                                  "exdb-config-example.yml")
        configName = "site_info_configuration"
        self.__cfgOb = ConfigUtil(configPath=configPath,
                                  defaultSectionName=configName,
                                  mockTopPath=mockTopPath)
        # self.__cfgOb.dump()
        self.__resourceName = "MONGO_DB"
        self.__failedFilePath = os.path.join(HERE, "test-output",
                                             "failed-list.txt")
        self.__readBackCheck = True
        self.__numProc = 2
        self.__chunkSize = 10
        self.__documentLimit = 1000
        #
        # sample data set
        self.__dataSetId = "2018_23"
        self.__pathClusterData = self.__cfgOb.getPath(
            "RCSB_SEQUENCE_CLUSTER_DATA_PATH", sectionName=configName)
        self.__levels = ["100", "95", "90", "70", "50", "30"]
        #
        self.__workPath = os.path.join(HERE, "test-output")
        self.__cachePath = os.path.join(TOPDIR, "CACHE")
        self.__pathSaveStyleCif = os.path.join(HERE, "test-output",
                                               "cluster-data-cif.json")
        self.__pathSaveStyleDocSequence = os.path.join(
            HERE, "test-output", "cluster-data-doc-sequence.json")
        self.__pathSaveStyleDocCluster = os.path.join(
            HERE, "test-output", "cluster-data-doc-cluster.json")
        #
        self.__entitySchemaName = "rcsb_entity_sequence_cluster_list"
        self.__clusterSchemaName = "rcsb_entity_sequence_cluster_identifer_list"
        self.__provKeyName = "rcsb_entity_sequence_cluster_prov"
        #
        #
        self.__startTime = time.time()
        logger.debug("Starting %s at %s", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()))

    def tearDown(self):
        endTime = time.time()
        logger.debug("Completed %s at %s (%.4f seconds)", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
                     endTime - self.__startTime)

    def __fetchProvenance(self):
        """ Test case for fetching a provenance dictionary content.
        """
        try:
            provU = ProvenanceProvider(self.__cfgOb, self.__cachePath)
            pD = provU.fetch()
            return pD[self.__provKeyName] if self.__provKeyName in pD else {}
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def __testExtract(self, dataSetId, dataLocator, levels):
        """ Test extraction on an example sequence cluster data set.
        """
        try:
            cdp = ClusterDataPrep(workPath=self.__workPath,
                                  entitySchemaName=self.__entitySchemaName,
                                  clusterSchemaName=self.__clusterSchemaName)
            cifD, docBySequenceD, docByClusterD = cdp.extract(
                dataSetId,
                clusterSetLocator=dataLocator,
                levels=levels,
                clusterType="entity")
            self.assertEqual(len(cifD), 1)
            self.assertEqual(len(docBySequenceD), 1)
            self.assertEqual(len(docByClusterD), 1)
            return docBySequenceD, docByClusterD
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def testLoadCluster(self):
        """ Test case - load example sequence cluster document data
        """
        try:
            dl = DocumentLoader(
                self.__cfgOb,
                self.__cachePath,
                self.__resourceName,
                numProc=self.__numProc,
                chunkSize=self.__chunkSize,
                documentLimit=self.__documentLimit,
                verbose=self.__verbose,
                readBackCheck=self.__readBackCheck,
            )
            #
            docBySequenceD, docByClusterD = self.__testExtract(
                dataSetId=self.__dataSetId,
                dataLocator=self.__pathClusterData,
                levels=self.__levels)
            #
            dList = docBySequenceD[self.__entitySchemaName]
            ok = dl.load(
                "sequence_clusters",
                "entity_members",
                loadType="full",
                documentList=dList,
                indexAttributeList=["data_set_id", "entry_id", "entity_id"],
                keyNames=None)
            self.assertTrue(ok)
            dList = docByClusterD[self.__clusterSchemaName]
            ok = dl.load(
                "sequence_clusters",
                "cluster_members",
                loadType="full",
                documentList=dList,
                indexAttributeList=["data_set_id", "identity", "cluster_id"],
                keyNames=None)
            self.assertTrue(ok)
            pD = self.__fetchProvenance()
            ok = dl.load("sequence_clusters",
                         "cluster_provenance",
                         loadType="full",
                         documentList=[pD],
                         indexAttributeList=None,
                         keyNames=None)
            self.assertTrue(ok)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()