class SchemaDefCompareTests(unittest.TestCase): skipFlag = True def setUp(self): self.__verbose = True mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data") pathConfig = os.path.join(TOPDIR, "rcsb", "db", "config", "exdb-config-example.yml") configName = "site_info_configuration" self.__cachePath = os.path.join(TOPDIR, "CACHE") self.__cfgOb = ConfigUtil(configPath=pathConfig, defaultSectionName=configName, mockTopPath=mockTopPath) self.__schP = SchemaProvider(self.__cfgOb, self.__cachePath, useCache=True) # self.__validationLevels = self.__cfgOb.getList( "VALIDATION_LEVELS_TEST", sectionName="database_catalog_configuration") self.__encodingTypes = self.__cfgOb.getList( "ENCODING_TYPES_TEST", sectionName="database_catalog_configuration") # buildAll = True if buildAll: self.__databaseNameList = self.__cfgOb.getList( "DATABASE_NAMES_DEPLOYED", sectionName="database_catalog_configuration") self.__dataTypingList = self.__cfgOb.getList( "DATATYPING_DEPLOYED", sectionName="database_catalog_configuration") # else: self.__databaseNameList = self.__cfgOb.getList( "DATABASE_NAMES_TEST", sectionName="database_catalog_configuration") # self.__databaseNameList = ["repository_holdings"] self.__dataTypingList = self.__cfgOb.getList( "DATATYPING_TEST", sectionName="database_catalog_configuration") # self.__startTime = time.time() logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime())) def tearDown(self): endTime = time.time() logger.debug("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime) @unittest.skipIf(skipFlag, "Troubleshooting test") def testCompareSchemaDefs(self): try: difPathList = [] for databaseName in self.__databaseNameList: for dataTyping in self.__dataTypingList: logger.debug("Building schema %s with types %s", databaseName, dataTyping) pth = self.__schP.schemaDefCompare(databaseName, dataTyping) if pth: difPathList.append(pth) if difPathList: logger.info("Schema definition difference path list %r", [os.path.split(pth)[1] for pth in difPathList]) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() @unittest.skipIf(skipFlag, "Troubleshooting test") def testCompareCollectionSchema(self): try: difPathList = [] for databaseName in self.__databaseNameList: dD = self.__schP.makeSchemaDef(databaseName, dataTyping="ANY", saveSchema=False) sD = SchemaDefAccess(dD) for cd in sD.getCollectionInfo(): collectionName = cd["NAME"] for encodingType in self.__encodingTypes: if encodingType.lower() != "json": continue for level in self.__validationLevels: pth = self.__schP.jsonSchemaCompare( databaseName, collectionName, encodingType, level) if pth: difPathList.append(pth) if difPathList: logger.info("JSON schema difference path list %r", [os.path.split(pth)[1] for pth in difPathList]) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
class SchemaDefBuildTests(unittest.TestCase): def setUp(self): self.__verbose = True mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data") pathConfig = os.path.join(TOPDIR, "rcsb", "db", "config", "exdb-config-example.yml") configName = "site_info_configuration" self.__cachePath = os.path.join(TOPDIR, "CACHE") self.__cfgOb = ConfigUtil(configPath=pathConfig, defaultSectionName=configName, mockTopPath=mockTopPath) self.__schP = SchemaProvider(self.__cfgOb, self.__cachePath, useCache=False) # self.__validationLevels = self.__cfgOb.getList( "VALIDATION_LEVELS_TEST", sectionName="database_catalog_configuration") self.__encodingTypes = self.__cfgOb.getList( "ENCODING_TYPES_TEST", sectionName="database_catalog_configuration") # buildAll = True if buildAll: self.__databaseNameList = self.__cfgOb.getList( "DATABASE_NAMES_DEPLOYED", sectionName="database_catalog_configuration") self.__dataTypingList = self.__cfgOb.getList( "DATATYPING_DEPLOYED", sectionName="database_catalog_configuration") # else: self.__databaseNameList = self.__cfgOb.getList( "DATABASE_NAMES_TEST", sectionName="database_catalog_configuration") # self.__databaseNameList = ["repository_holdings"] self.__dataTypingList = self.__cfgOb.getList( "DATATYPING_TEST", sectionName="database_catalog_configuration") # self.__databaseNameList = ["sequence_clusters"] self.__saveSchema = True self.__compareDefSchema = False self.__compareSchema = False # self.__startTime = time.time() logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime())) def tearDown(self): endTime = time.time() logger.debug("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime) def testBuildSchemaDefs(self): try: for databaseName in self.__databaseNameList: for dataTyping in self.__dataTypingList: logger.debug("Building schema %s with types %s", databaseName, dataTyping) self.__schP.makeSchemaDef(databaseName, dataTyping=dataTyping, saveSchema=self.__saveSchema) if self.__compareDefSchema: self.__schP.schemaDefCompare(databaseName, dataTyping) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() def testBuildCollectionSchema(self): schemaDifPathList = [] for databaseName in self.__databaseNameList: dD = self.__schP.makeSchemaDef(databaseName, dataTyping="ANY", saveSchema=False) sD = SchemaDefAccess(dD) for cd in sD.getCollectionInfo(): collectionName = cd["NAME"] for encodingType in self.__encodingTypes: if encodingType.lower() == "rcsb": continue for level in self.__validationLevels: self.__schP.makeSchema(databaseName, collectionName, encodingType=encodingType, level=level, saveSchema=self.__saveSchema) if self.__compareSchema and encodingType.lower( ) == "json": pth = self.__schP.jsonSchemaCompare( databaseName, collectionName, encodingType, level) if pth: schemaDifPathList.append(pth) if schemaDifPathList: logger.info("Path dif list %r", schemaDifPathList) def testCompareSchema(self): databaseName = "pdbx_core" collectionName = "pdbx_core_entry" encodingType = "json" level = "full" # oldPath = os.path.join( HERE, "test-saved-output", "json-full-db-pdbx_core-col-pdbx_core_entry.json") mU = MarshalUtil(workPath=os.path.join(HERE, "test-output")) sOld = mU.doImport(oldPath, fmt="json") sNew = self.__schP.makeSchema(databaseName, collectionName, encodingType=encodingType, level=level) numDif, difD = self.__schP.schemaCompare(sOld, sNew) logger.debug("numDiffs %d", numDif) self.assertGreaterEqual(numDif, 141) self.assertGreaterEqual(len(difD["changed"]), 160) logger.debug("difD %r", difD) @unittest.skip("Deprecated test") def testCompareSchemaCategories(self): """Compare common categories across schema definitions.""" try: sdCc = SchemaDefAccess( self.__schP.makeSchemaDef("chem_comp_core", dataTyping="ANY", saveSchema=False)) sdBcc = SchemaDefAccess( self.__schP.makeSchemaDef("bird_chem_comp_core", dataTyping="ANY", saveSchema=False)) # logger.info("") for schemaId in ["CHEM_COMP", "PDBX_CHEM_COMP_AUDIT"]: atCcL = sdCc.getAttributeIdList(schemaId) atBCcL = sdBcc.getAttributeIdList(schemaId) logger.debug("%s attributes (%d) %r", schemaId, len(atCcL), atCcL) logger.debug("%s attributes (%d) %r", schemaId, len(atBCcL), atBCcL) sDif = set(atCcL) - set(atBCcL) if sDif: logger.info("For %s attribute differences %r", schemaId, sDif) self.assertEqual(len(sDif), 0) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() def testBuildColSchemaWithRefs(self): for databaseName in ["ihm_dev_full"]: dD = self.__schP.makeSchemaDef(databaseName, dataTyping="ANY", saveSchema=False) sD = SchemaDefAccess(dD) for cd in sD.getCollectionInfo(): collectionName = cd["NAME"] for schemaType in self.__encodingTypes: if schemaType.lower() == "rcsb": continue for level in self.__validationLevels: self.__schP.makeSchema( databaseName, collectionName, encodingType=schemaType, level=level, saveSchema=True, extraOpts="addParentRefs|addPrimaryKey")
def main(): parser = argparse.ArgumentParser() # defaultConfigName = "site_info_configuration" # parser.add_argument( "--update_chem_comp_ref", default=False, action="store_true", help="Update schema for Chemical Component reference definitions") parser.add_argument( "--update_chem_comp_core_ref", default=False, action="store_true", help="Update core schema for Chemical Component reference definitions") parser.add_argument( "--update_bird_chem_comp_ref", default=False, action="store_true", help="Update schema for Bird Chemical Component reference definitions") parser.add_argument( "--update_bird_chem_comp_core_ref", default=False, action="store_true", help= "Update core schema for Bird Chemical Component reference definitions") parser.add_argument("--update_bird_ref", default=False, action="store_true", help="Update schema for Bird reference definitions") parser.add_argument( "--update_bird_family_ref", default=False, action="store_true", help="Update schema for Bird Family reference definitions") parser.add_argument("--update_pdbx", default=False, action="store_true", help="Update schema for PDBx entry data") parser.add_argument("--update_pdbx_core", default=False, action="store_true", help="Update schema for PDBx core entry/entity data") parser.add_argument( "--update_pdbx_comp_model_core", default=False, action="store_true", help="Update schema for PDBx computational model core entry/entity data" ) # parser.add_argument("--update_repository_holdings", default=False, action="store_true", help="Update schema for repository holdings") parser.add_argument("--update_entity_sequence_clusters", default=False, action="store_true", help="Update schema for entity sequence clusters") parser.add_argument("--update_data_exchange", default=False, action="store_true", help="Update schema for data exchange status") parser.add_argument("--update_ihm_dev", default=False, action="store_true", help="Update schema for I/HM dev entry data") parser.add_argument("--update_drugbank_core", default=False, action="store_true", help="Update DrugBank schema") # parser.add_argument( "--update_config_all", default=False, action="store_true", help="Update using configuration settings (e.g. DATABASE_NAMES_ALL)") parser.add_argument( "--update_config_deployed", default=False, action="store_true", help= "Update using configuration settings (e.g. DATABASE_NAMES_DEPLOYED)") parser.add_argument( "--update_config_test", default=False, action="store_true", help="Update using configuration settings (e.g. DATABASE_NAMES_TEST)") # parser.add_argument("--config_path", default=None, help="Path to configuration options file") parser.add_argument("--config_name", default=defaultConfigName, help="Configuration section name") # parser.add_argument("--cache_path", default=None, help="Schema cache directory path") parser.add_argument( "--encoding_types", default=None, help="Schema encoding (rcsb|json|bson) (comma separated)") parser.add_argument( "--validation_levels", default=None, help="Schema validation level (full|min) (comma separated)") parser.add_argument("--compare_only", default=False, action="store_true", help="Perform comparison with cached schema") # parser.add_argument("--debug", default=False, action="store_true", help="Turn on verbose logging") parser.add_argument( "--mock", default=False, action="store_true", help="Use MOCK repository configuration for dependencies and testing") # parser.add_argument("--working_path", default=None, help="Working/alternative path for temporary and schema files") args = parser.parse_args() # debugFlag = args.debug if debugFlag: logger.setLevel(logging.DEBUG) # ----------------------- - ----------------------- - ----------------------- - ----------------------- - ----------------------- - # Configuration Details configPath = args.config_path configName = args.config_name cachePath = args.cache_path compareOnly = args.compare_only # encodingTypes = args.encoding_types.split( ",") if args.encoding_types else [] validationLevels = args.validation_levels.split( ",") if args.validation_levels else [] dataTypingList = ["ANY", "SQL"] if not configPath: configPath = os.getenv("DBLOAD_CONFIG_PATH", None) try: if os.access(configPath, os.R_OK): os.environ["DBLOAD_CONFIG_PATH"] = configPath logger.info("Using configuation path %s (%s)", configPath, configName) else: logger.error("Missing or access issue with config file %r", configPath) exit(1) mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data") if args.mock else None cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=defaultConfigName, mockTopPath=mockTopPath) if configName != defaultConfigName: cfgOb.replaceSectionName(defaultConfigName, configName) except Exception as e: logger.error("Missing or access issue with config file %r with %s", configPath, str(e)) exit(1) # databaseNameList = [] if args.update_chem_comp_ref: databaseNameList.append("chem_comp") if args.update_bird_chem_comp_ref: databaseNameList.append("bird_chem_comp") if args.update_chem_comp_core_ref: databaseNameList.append("chem_comp_core") if args.update_bird_chem_comp_core_ref: databaseNameList.append("bird_chem_comp_core") if args.update_bird_ref: databaseNameList.append("bird") if args.update_bird_family_ref: databaseNameList.append("bird_family") if args.update_pdbx: databaseNameList.append("pdbx") if args.update_pdbx_core: databaseNameList.append("pdbx_core") if args.update_pdbx_comp_model_core: databaseNameList.append("pdbx_comp_model_core") if args.update_repository_holdings: databaseNameList.append("repository_holdings") if args.update_entity_sequence_clusters: databaseNameList.append("sequence_clusters") if args.update_data_exchange: databaseNameList.append("data_exchange") if args.update_ihm_dev: databaseNameList.append("ihm_dev") if args.update_drugbank_core: databaseNameList.append("drugbank_core") if args.update_config_deployed: databaseNameList = cfgOb.getList( "DATABASE_NAMES_DEPLOYED", sectionName="database_catalog_configuration") dataTypingList = cfgOb.getList( "DATATYPING_DEPLOYED", sectionName="database_catalog_configuration") validationLevels = cfgOb.getList( "VALIDATION_LEVELS_DEPLOYED", sectionName="database_catalog_configuration") encodingTypes = cfgOb.getList( "ENCODING_TYPES_DEPLOYED", sectionName="database_catalog_configuration") if args.update_config_all: databaseNameList = cfgOb.getList( "DATABASE_NAMES_ALL", sectionName="database_catalog_configuration") dataTypingList = cfgOb.getList( "DATATYPING_ALL", sectionName="database_catalog_configuration") validationLevels = cfgOb.getList( "VALIDATION_LEVELS_ALL", sectionName="database_catalog_configuration") encodingTypes = cfgOb.getList( "ENCODING_TYPES_ALL", sectionName="database_catalog_configuration") if args.update_config_test: databaseNameList = cfgOb.getList( "DATABASE_NAMES_TEST", sectionName="database_catalog_configuration") dataTypingList = cfgOb.getList( "DATATYPING_TEST", sectionName="database_catalog_configuration") validationLevels = cfgOb.getList( "VALIDATION_LEVELS_TEST", sectionName="database_catalog_configuration") encodingTypes = cfgOb.getList( "ENCODING_TYPES_TEST", sectionName="database_catalog_configuration") # scnD = cfgOb.get("document_collection_names", sectionName="document_helper_configuration") # databaseNameList = list(set(databaseNameList)) logger.debug("Collections %s", list(scnD.items())) logger.debug("databaseNameList %s", databaseNameList) if compareOnly: schP = SchemaProvider(cfgOb, cachePath, useCache=True) difPathList = [] for databaseName in databaseNameList: for dataTyping in dataTypingList: logger.debug("Building schema %s with types %s", databaseName, dataTyping) pth = schP.schemaDefCompare(databaseName, dataTyping) if pth: difPathList.append(pth) if difPathList: logger.info("Schema definition difference path list %r", difPathList) difPathList = [] for databaseName in databaseNameList: dD = schP.makeSchemaDef(databaseName, dataTyping="ANY", saveSchema=False) sD = SchemaDefAccess(dD) for cd in sD.getCollectionInfo(): collectionName = cd["NAME"] for encodingType in encodingTypes: if encodingType.lower() != "json": continue for level in validationLevels: pth = schP.jsonSchemaCompare(databaseName, collectionName, encodingType, level) if pth: difPathList.append(pth) if difPathList: logger.info("JSON schema difference path list %r", difPathList) else: schP = SchemaProvider(cfgOb, cachePath, useCache=False) for databaseName in databaseNameList: for encodingType in encodingTypes: if encodingType == "rcsb": for dataTyping in dataTypingList: logger.info( "Creating schema definition for content type %s data typing %s", databaseName, dataTyping) schP.makeSchemaDef(databaseName, dataTyping=dataTyping, saveSchema=True) else: if databaseName in scnD: for dD in scnD[databaseName]: collectionName = dD["NAME"] for validationLevel in validationLevels: logger.info( "Creating %r schema for content type %s collection %s", encodingType, databaseName, collectionName) schP.makeSchema(databaseName, collectionName, encodingType=encodingType, level=validationLevel, saveSchema=True)