def testProviderReadValidationReport(self): mU = MarshalUtil() vpr = ValidationReportAdapter(dirPath=os.path.join( self.__workPath, "vprt"), useCache=False, cleaCache=True) vrd = vpr.getReader() cL = mU.doImport(self.__exampleFileXray, fmt="xml", marshalHelper=vrd.toCif) ok = mU.doExport(self.__cifFileXray, cL, fmt="mmcif") self.assertTrue(ok) # vpr = ValidationReportAdapter(dirPath=os.path.join( self.__workPath, "vprt"), useCache=True, cleaCache=False) vrd = vpr.getReader() xrt = mU.doImport(self.__exampleFileNmr, fmt="xml") cL = vrd.toCif(xrt) ok = mU.doExport(self.__cifFileNmr, cL, fmt="mmcif") self.assertTrue(ok) # vpr = ValidationReportAdapter(dirPath=os.path.join( self.__workPath, "vprt"), useCache=True, cleaCache=False) vrd = vpr.getReader() xrt = mU.doImport(self.__exampleFilEm, fmt="xml") cL = vrd.toCif(xrt) ok = mU.doExport(self.__cifFileEm, cL, fmt="mmcif") self.assertTrue(ok)
def __reload(self, dirPath, baseVersion, useCache, **kwargs): startTime = time.time() mU = MarshalUtil(workPath=dirPath) chemblDbUrl = kwargs.get( "ChEMBLDbUrl", "ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/latest/") ok = False fU = FileUtil() fU.mkdir(dirPath) # # ChEMBL current version <baseVersion>,... # template: chembl_<baseVersion>.fa.gz # targetFileName = "chembl_" + str(baseVersion) + ".fa.gz" mappingFileName = "chembl_uniprot_mapping.txt" # chemblTargetPath = os.path.join(dirPath, targetFileName) chemblMappingPath = os.path.join(dirPath, mappingFileName) mappingFilePath = os.path.join(dirPath, "chembl_uniprot_mapping.json") # mapD = {} if useCache and fU.exists(mappingFilePath): logger.info("useCache %r using %r and %r and %r", useCache, chemblTargetPath, chemblMappingPath, mappingFilePath) mapD = mU.doImport(mappingFilePath, fmt="json") else: # Get the ChEMBL UniProt mapping file url = os.path.join(chemblDbUrl, mappingFileName) ok = fU.get(url, chemblMappingPath) logger.info("Fetched %r url %s path %s", ok, url, chemblMappingPath) logger.info("Reading ChEMBL mapping file path %s", mappingFilePath) rowL = mU.doImport(chemblMappingPath, fmt="tdd", rowFormat="list") for row in rowL: mapD[row[0]] = (row[1], row[2], row[3]) ok = mU.doExport(mappingFilePath, mapD, fmt="json") logger.info("Processed mapping path %s (%d) %r", mappingFilePath, len(mapD), ok) # # Get the target FASTA files -- for vers in range(baseVersion, baseVersion + 10): logger.info("Now fetching version %r", vers) self.__version = vers targetFileName = "chembl_" + str(vers) + ".fa.gz" chemblTargetPath = os.path.join(dirPath, "chembl_targets_raw.fa.gz") url = os.path.join(chemblDbUrl, targetFileName) ok = fU.get(url, chemblTargetPath) logger.info("Fetched %r url %s path %s", ok, url, chemblTargetPath) if ok: break # logger.info("Completed reload at %s (%.4f seconds)", time.strftime("%Y %m %d %H:%M:%S", time.localtime()), time.time() - startTime) # return mapD
def testReadUrlTarfile(self): """Test the case to read URL target and extract a member""" try: mU = MarshalUtil(workPath=self.__workPath) _, fn = os.path.split(self.__urlTarget) # nmL = mU.doImport(self.__urlTarget, fmt="tdd", rowFormat="list", tarMember="names.dmp") self.assertGreater(len(nmL), 2000000) logger.info("Names %d", len(nmL)) ndL = mU.doImport(os.path.join(self.__workPath, fn), fmt="tdd", rowFormat="list", tarMember="nodes.dmp") self.assertGreater(len(ndL), 2000000) logger.info("Nodes %d", len(ndL)) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def __rebuildCache(self, **kwargs): mU = MarshalUtil() # source directory path srcDirPath = kwargs.get("srcDirPath", None) # cache details cacheKwargs = kwargs.get("cacheKwargs", {"fmt": "pickle"}) useCache = kwargs.get("useCache", True) entrySaveLimit = kwargs.get("entrySaveLimit", None) abbreviated = str(kwargs.get("abbreviated", "TEST")).upper() # # cacheDirPath = kwargs.get("cacheDirPath", None) cacheDirPath = self.__cacheDirPath pyVersion = sys.version_info[0] ext = "pic" if cacheKwargs["fmt"] == "pickle" else "json" saveFilePath = os.path.join(cacheDirPath, "sifts-summary-py%s.%s" % (str(pyVersion), ext)) # ssD = {} try: if useCache and os.access(saveFilePath, os.R_OK): ssD = mU.doImport(saveFilePath, **cacheKwargs) else: if not srcDirPath: logger.error("Missing SIFTS source path details") return ssD ssD = self.__getSummaryMapping(srcDirPath, abbreviated=abbreviated) if entrySaveLimit: ssD = {k: ssD[k] for k in list(ssD.keys())[:entrySaveLimit]} mU.mkdir(cacheDirPath) ok = mU.doExport(saveFilePath, ssD, **cacheKwargs) logger.debug("Saving SIFTS summary serialized data file %s (%d) status %r", saveFilePath, len(ssD), ok) except Exception as e: logger.exception("Failing with %s", str(e)) return ssD
def reloadDump(self, fmt="json"): """Reload PubChem reference data store from saved dump. Args: fmt (str, optional): format of the backup file (pickle or json). Defaults to "json". Returns: (int): number of objects restored. """ numUpd = 0 try: # Read from disk backup and update object store - if fmt in ["json", "pickle"]: fp = self.__getdumpFilePath(fmt="json") logger.info("Restoring object store from %s", fp) mU = MarshalUtil(workPath=self.__dirPath) matchD = mU.doImport(fp, fmt=fmt) numUpd = self.__reloadDump( matchD, self.__databaseName, self.__matchIndexCollectionName, indexAttributeNames=["rcsb_id", "rcsb_last_update"]) except Exception as e: logger.exception("Failing for %r with %s", self.__dirPath, str(e)) # -- return numUpd
def getJsonSchema(self, databaseName, collectionName, encodingType="BSON", level="full", extraOpts=None): """Return JSON schema (w/ BSON types) object for the input collection and level.and Args: databaseName (str): database name collectionName (str): collection name in document store encodingType (str, optional): data type convention (BSON|JSON) level (str, optional): Completeness of the schema (e.g. min or full) Returns: dict: Schema object """ sObj = None schemaLocator = self.__getJsonSchemaLocator(databaseName, collectionName, encodingType=encodingType, level=level) # if self.__rebuildFlag: filePath = os.path.join(self.__schemaCachePath, self.__fileU.getFileName(schemaLocator)) self.makeSchema(databaseName, collectionName, encodingType=encodingType, level=level, extraOpts=extraOpts) else: filePath = self.__reload(schemaLocator, self.__jsonSchemaCachePath, useCache=self.__useCache) mU = MarshalUtil(workPath=self.__workPath) if filePath and mU.exists(filePath): mU = MarshalUtil(workPath=self.__workPath) sObj = mU.doImport(filePath, fmt="json") else: logger.debug("Failed to read schema for %s %r", collectionName, level) return sObj
def updateDefaultDataTypeMap(self, filePath, mapD, dataTyping="ANY"): """Update data file containing application default data type mapping with any updates from the input type mapping dictionary mapD['cif_type_code'] -> ['application_name', 'app_type_code', 'app_precision_default', 'app_width_default', 'type_code'] data_rcsb_data_type_map loop_ _pdbx_data_type_application_map.application_name _pdbx_data_type_application_map.type_code _pdbx_data_type_application_map.app_type_code _pdbx_data_type_application_map.app_precision_default _pdbx_data_type_application_map.app_width_default # .... type mapping data ... """ try: # mD = copy.deepcopy(mapD) mU = MarshalUtil(workPath=self.__workPath) containerList = mU.doImport(filePath, fmt="mmcif", enforceAscii=True, useCharRefs=True, raiseExceptions=True) for container in containerList: if container.getName() == "rcsb_data_type_map": catObj = container.getObj("pdbx_data_type_application_map") rIL = [] for ii in range(catObj.getRowCount()): dD = catObj.getRowAttributeDict(ii) if dD["application_name"] == dataTyping: rIL.append(ii) mD[dD["type_code"]] = { k: dD[k] for k in [ "application_name", "app_type_code", "app_precision_default", "app_width_default", "type_code" ] } continue ok = catObj.removeRows(rIL) atNameL = catObj.getAttributeList() for ky in mapD: row = [mapD[ky][atN] for atN in atNameL] catObj.append(row) # # Write updated data file mU = MarshalUtil(workPath=self.__workPath) ok = mU.doExport(filePath, containerList, fmt="mmcif", enforceAscii=True, useCharRefs=True, raiseExceptions=True) return ok except Exception as e: logger.exception("Failing with %s", str(e)) return False
def __getApi(self, dictLocators, **kwargs): """ Return an instance of a dictionary API instance for the input dictionary locator list. """ consolidate = kwargs.get("consolidate", True) replaceDefinition = kwargs.get("replaceDefinitions", True) verbose = kwargs.get("verbose", True) # ok = self.__reload(dictLocators, self.__dirPath, useCache=self.__useCache) # dApi = None if ok: mU = MarshalUtil() containerList = [] for dictLocator in dictLocators: cacheFilePath = os.path.join( self.__dirPath, self.__fileU.getFileName(dictLocator)) containerList.extend( mU.doImport(cacheFilePath, fmt="mmcif-dict")) # dApi = DictionaryApi(containerList=containerList, consolidate=consolidate, replaceDefinition=replaceDefinition, verbose=verbose) return dApi
def testSubsetBuildMoleculeCacheFiltered(self): """Test construction of a filtered selection of chemical component definitions.""" mU = MarshalUtil() fD = mU.doImport(self.__missedIdsPath, fmt="json") filterIdD = {ccId: True for ccId in fD["filteredIdList"]} self.__testBuildMoleculeCacheFiles(filterIdD=filterIdD, ccFileNamePrefix="cc-filtered")
def readDefaultDataTypeMap(self, locator, dataTyping="ANY"): """Read data file containing application default data type mapping data_rcsb_data_type_map loop_ _pdbx_data_type_application_map.application_name _pdbx_data_type_application_map.type_code _pdbx_data_type_application_map.app_type_code _pdbx_data_type_application_map.app_precision_default _pdbx_data_type_application_map.app_width_default # .... type mapping data ... Return (dict): map[cifType] -> appType, width, precision mapD['cif_type_code'] -> ['application_name', 'app_type_code', 'app_precision_default', 'app_width_default', 'type_code'] """ try: # mapD = {} mU = MarshalUtil(workPath=self.__workPath) containerList = mU.doImport(locator, fmt="mmcif", enforceAscii=True, useCharRefs=True, raiseExceptions=True) for container in containerList: if container.getName() == "rcsb_data_type_map": catObj = container.getObj("pdbx_data_type_application_map") for ii in range(catObj.getRowCount()): dD = catObj.getRowAttributeDict(ii) if dD["application_name"] == dataTyping: mapD[dD["type_code"]] = {k: dD[k] for k in ["app_type_code", "application_name", "type_code"]} mapD[dD["type_code"]].update({k: int(dD[k]) for k in ["app_precision_default", "app_width_default"]}) return mapD except Exception as e: logger.exception("Failing with %s", str(e)) return {}
def __parseFasta(self, fastaPath, taxonPath, cachePath, dirPath, addTaxonomy=False): # input paths chemblTargetRawPath = os.path.join(dirPath, "chembl_targets_raw.fa.gz") mU = MarshalUtil(workPath=cachePath) oD = {} uD = {} missTax = 0 taxonL = [] try: if addTaxonomy: umP = UniProtIdMappingProvider(cachePath) umP.reload(useCache=True) # fD = mU.doImport(chemblTargetRawPath, fmt="fasta", commentStyle="default") # for seqId, sD in fD.items(): chemblId = seqId.strip().split(" ")[0].strip() unpId = seqId[seqId.find("[") + 1:seqId.find("]")] seq = sD["sequence"] cD = { "sequence": seq, "uniprotId": unpId, "chemblId": chemblId } if addTaxonomy: taxId = umP.getMappedId(unpId, mapName="NCBI-taxon") cD["taxId"] = taxId if taxId else -1 if not taxId: missTax += 1 # seqId = "" cL = [] for k, v in cD.items(): if k in ["sequence"]: continue cL.append(str(v)) cL.append(str(k)) seqId = "|".join(cL) oD[seqId] = cD if addTaxonomy: taxonL.append("%s\t%s" % (seqId, taxId)) # uD.setdefault(unpId, []).append(chemblId) # ok1 = mU.doExport(fastaPath, oD, fmt="fasta", makeComment=True) ok3 = True if addTaxonomy: ok3 = mU.doExport(taxonPath, taxonL, fmt="list") return ok1 & ok3 except Exception as e: logger.exception("Failing with %s", str(e)) # return False
def getCCDefFile(self, ccFilePath, molBuildType="model-xyz", suppressHydrogens=False): """Fetch the molecule definition (ccPath) and build OE molecules for comparison. """ # mU = MarshalUtil(workPath=self.__workPath) rdCcObjL = mU.doImport(ccFilePath, fmt="mmcif") oemf = OeMoleculeFactory() if not self.__verbose: oemf.setQuiet() ccId = oemf.setChemCompDef(rdCcObjL[0]) oemf.build(molBuildType=molBuildType) if self.__verbose: logger.info(" CCId = %s", ccId) logger.info(" Title = %s", oemf.getTitle()) logger.info(" SMILES = %s", oemf.getCanSMILES()) logger.info(" SMILES (stereo) = %s", oemf.getIsoSMILES()) logger.info(" Formula (Hill) = %s", oemf.getFormula()) logger.info(" InChI key = %s", oemf.getInChIKey()) logger.info(" InChI = %s", oemf.getInChI()) fD = {} fD = {"Formula": oemf.getFormula(), "SMILES": oemf.getCanSMILES(), "SMILES_STEREO": oemf.getIsoSMILES(), "InChI": oemf.getInChI(), "InChIKey": oemf.getInChIKey()} if suppressHydrogens: tMol = oemf.getGraphMolSuppressH() else: tMol = oemf.getMol() fD["OEMOL"] = tMol fD["xyz"] = oemf.getAtomDetails(xyzType="model") return (ccId, tMol, fD)
class ValidationReportReaderTests(unittest.TestCase): def setUp(self): self.__mU = MarshalUtil() self.__dirPath = os.path.join(HERE, "test-data") self.__workPath = os.path.join(HERE, "test-output") self.__exampleFileXray = os.path.join(self.__dirPath, "3rer_validation.xml") self.__cifFileXray = os.path.join(self.__workPath, "3rer_validation.cif") # self.__exampleFileNmr = os.path.join(self.__dirPath, "6drg_validation.xml") self.__cifFileNmr = os.path.join(self.__workPath, "6drg_validation.cif") # self.__exampleFileEm = os.path.join(self.__dirPath, "5a32_validation.xml") self.__cifFileEm = os.path.join(self.__workPath, "5a32_validation.cif") # self.__dictionaryMapPath = os.path.join(HERE, "test-data", "vrpt_dictmap_v4.json") self.__dictionaryMap = self.__mU.doImport(self.__dictionaryMapPath, fmt="json") def tearDown(self): pass def testReadXrayValidationReport(self): vrr = ValidationReportReader(self.__dictionaryMap) xrt = self.__mU.doImport(self.__exampleFileXray, fmt="xml") cL = vrr.toCif(xrt) ok = self.__mU.doExport(self.__cifFileXray, cL, fmt="mmcif") self.assertTrue(ok) def testReadNmrValidationReport(self): vrr = ValidationReportReader(self.__dictionaryMap) xrt = self.__mU.doImport(self.__exampleFileNmr, fmt="xml") cL = vrr.toCif(xrt) ok = self.__mU.doExport(self.__cifFileNmr, cL, fmt="mmcif") self.assertTrue(ok) def testReadEmValidationReport(self): vrr = ValidationReportReader(self.__dictionaryMap) xrt = self.__mU.doImport(self.__exampleFileEm, fmt="xml") cL = vrr.toCif(xrt) ok = self.__mU.doExport(self.__cifFileEm, cL, fmt="mmcif") self.assertTrue(ok)
def jsonSchemaCompare(self, databaseName, collectionName, encodingType, level, extraOpts=None): """Compare computed JSON schema defintion with current source/cached version. Args: databaseName (str): schema name collectionName (str): collection name encodingType (str): schema data type conventions (JSON|BSON) level (str): metadata level (min|full) extraOpts (str): extra schema construction options Returns: (str): path to the difference file or None """ mU = MarshalUtil(workPath=self.__workPath) schemaDiffPath = os.path.join(self.__cachePath, "schema_diff") mU.mkdir(schemaDiffPath) schemaLocator = self.__getJsonSchemaLocator(databaseName, collectionName, encodingType, level) fn = self.__fileU.getFileName(schemaLocator) schemaPath = os.path.join(self.__jsonSchemaCachePath, fn) # sD = self.makeSchema(databaseName, collectionName, encodingType=encodingType, level=level, saveSchema=False, extraOpts=extraOpts) v2 = self.__getSchemaVersion(sD) # ---- # tPath = os.path.join(self.__jsonSchemaCachePath, self.__fileU.getFileName(schemaPath) + "-test") # logger.info("Exporting json schema to %s", tPath) # mU.doExport(tPath, sD, fmt="json", indent=3) # ---- # sDCache = mU.doImport(schemaPath, fmt="json") v1 = self.__getSchemaVersion(sDCache) if not v1: logger.error("no version for %s - %s %s", schemaLocator, databaseName, collectionName) # numDiff, difD = self.schemaCompare(sDCache, sD) # jD = diff(sDCache, sD, marshal=True, syntax="explicit") diffPath = None if numDiff: logger.debug("diff for %s %s %s %s = \n%s", databaseName, collectionName, encodingType, level, pprint.pformat(difD, indent=3, width=100)) bn, _ = os.path.splitext(fn) diffPath = os.path.join(schemaDiffPath, bn + "-" + v1 + "-" + v2 + "-diff.json") mU.doExport(diffPath, difD, fmt="json", indent=3) return diffPath
def getSchemaInfo(self, databaseName, dataTyping="ANY"): """Convenience method to return essential schema details for the input repository content type. Args: databaseName (str): schema name (e.g. pdbx, bird, chem_comp, ...) dataTyping (str, optional): Application name for the target schema (e.g. ANY, SQL, ...) Returns: tuple: SchemaDefAccess(object), target database name, target collection name list, primary index attribute list """ sd = None dbName = None collectionNameList = [] docIndexD = {} try: mU = MarshalUtil(workPath=self.__workPath) schemaLocator = self.__getSchemaDefLocator(databaseName, dataTyping=dataTyping) if self.__rebuildFlag: filePath = os.path.join( self.__schemaCachePath, self.__fileU.getFileName(schemaLocator)) self.makeSchemaDef(databaseName, dataTyping=dataTyping, saveSchema=True) else: filePath = self.__reload(schemaLocator, self.__schemaCachePath, useCache=self.__useCache) if not filePath: logger.error("Unable to recover schema %s (%s)", databaseName, dataTyping) logger.debug("ContentType %r dataTyping %r schemaLocator %r", databaseName, dataTyping, schemaLocator) schemaDef = mU.doImport(filePath, fmt="json") if schemaDef: logger.debug( "Using cached schema definition for %s application %s", databaseName, dataTyping) sd = SchemaDefAccess(schemaDef) if sd: dbName = sd.getDatabaseName() collectionInfoList = sd.getCollectionInfo() logger.debug("Schema %s database name %s collections %r", databaseName, dbName, collectionInfoList) for cd in collectionInfoList: collectionName = cd["NAME"] collectionNameList.append(collectionName) docIndexD[collectionName] = sd.getDocumentIndices( collectionName) except Exception as e: logger.exception("Retreiving schema %s for %s failing with %s", databaseName, dataTyping, str(e)) return sd, dbName, collectionNameList, docIndexD
def fetch(self): try: provenanceFileCachePath = self.__reload(self.__provenanceLocator, self.__provenanceCachePath, useCache=self.__useCache) mU = MarshalUtil(workPath=self.__workPath) return mU.doImport(provenanceFileCachePath, fmt="json") except Exception as e: logger.exception("Failed retreiving provenance with %s", str(e)) return {}
def __getRegistry(self, registryPath): """""" try: mU = MarshalUtil() obj = mU.doImport(registryPath, fmt="json") return obj["mmcif_dictionary_registry"] except Exception as e: logger.exception("Failing for %r with %s", registryPath, str(e))
def testGetCollectionObjects(self): sP = os.path.join( self.__cachePath, "json_schema_definitions", "json-full-db-pdbx_core-col-pdbx_core_nonpolymer_entity.json") mU = MarshalUtil() sD = mU.doImport(sP, fmt="json") logger.info("kys %r", list(sD.keys())) for ky in sD["properties"]: logger.info(" - %s", ky)
def fetchModelIndex(self): mD = {} try: mU = MarshalUtil(workPath=self.__cachePath) fp = self.__getModelIndexPath() mD = mU.doImport(fp, fmt="json") except Exception as e: logger.exception("Failing with %s", str(e)) return mD
def __readSiftsSummaryFile(self, filePath, rowFormat="dict"): """Read input SIFTS summary file and return a list of dictionaries.""" try: mU = MarshalUtil() cL = mU.doImport(filePath, fmt="csv", rowFormat=rowFormat) logger.debug("Container list %d", len(cL)) except Exception as e: logger.exception("Failing with %s", str(e)) return cL
def fetchPathList(self): pathList = [] fp = self.getIndexFilePath() try: mU = MarshalUtil() pathList = mU.doImport(fp, fmt="json") except Exception as e: logger.info("Failing for %r with %s", fp, str(e)) return pathList
def __getItemCounts(self, itemCoverageFilePath): # mU = MarshalUtil() rowList = mU.doImport(itemCoverageFilePath, fmt="tdd", rowFormat="list") itemCountD = {} for row in rowList: itemCountD[row[0]] = int(row[1]) # return itemCountD
def __getSearchResults(self): """Read search results and convert to a chemical component dictionary.""" fp = self.getRawResultFilePath() mU = MarshalUtil(workPath=self.__cachePath) rawL = mU.doImport(fp, fmt="json") rD = {} for cD in rawL: rD.setdefault(cD["ccId"], []).append(cD) return rD
def __checkAssembledModels(self, assembleModelPath): catNameL = [ "pdbx_chem_comp_model", "pdbx_chem_comp_model_atom", "pdbx_chem_comp_model_bond", "pdbx_chem_comp_model_descriptor", "pdbx_chem_comp_model_reference", "pdbx_chem_comp_model_feature", "pdbx_chem_comp_model_audit", ] mU = MarshalUtil(workPath=self.__cachePath) dataContainerL = mU.doImport(assembleModelPath, fmt="mmcif") logger.info("Read %d data containers", len(dataContainerL)) rD = {} cnD = {} for dataContainer in dataContainerL: nm = dataContainer.getName() logger.debug("datacontainer %r", nm) if nm in cnD: logger.info("Duplicate container id %r", nm) cnD[nm] = True # pId = self.__parseId(nm)[0] cObj = dataContainer.getObj("pdbx_chem_comp_model") modelId = cObj.getValue("id", 0) if modelId != nm: logger.error("modelId %r datablock %r", modelId, nm) # tD = {} for catName in catNameL: cObj = dataContainer.getObj(catName) nRows = cObj.getRowCount() tD[catName] = nRows cObj = dataContainer.getObj("pdbx_chem_comp_model_feature") skip = False for ii in range(cObj.getRowCount()): fN = cObj.getValue("feature_name", ii) fV = cObj.getValue("feature_value", ii) if fN == "heavy_atoms_only" and fV == "Y": skip = True break if not skip: rD.setdefault(pId, []).append(tD) # for pId, tDL in rD.items(): for catName in catNameL: minV = 100000 maxV = -1 for tD in tDL: minV = min(minV, tD[catName]) maxV = max(maxV, tD[catName]) if maxV - minV > 2 and catName not in [ "pdbx_chem_comp_model_feature" ]: logger.error("%s %s row count inconsistency %d %d", pId, catName, minV, maxV)
def testReadUrlTarfileFail(self): """Test the case to read URL target and extract a member (failing case)""" try: mU = MarshalUtil(workPath=self.__workPath) rL = mU.doImport(self.__urlTargetBad, fmt="tdd", rowFormat="list", tarMember="names.dmp") logger.info("Return is %r", rL) self.assertEqual(len(rL), 0) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def readTestFile(self, filePath): """Read input and return a list of dictionaries.""" cL = [] try: mU = MarshalUtil() cL = mU.doImport(filePath, fmt="tdd") logger.debug("Container list %d", len(cL)) except Exception as e: logger.exception("Failing with %s", str(e)) return cL
def update(self, provD): ok = False try: provenanceFileCachePath = self.__reload(self.__provenanceLocator, self.__provenanceCachePath, useCache=self.__useCache) mU = MarshalUtil(workPath=self.__workPath) tD = mU.doImport(provenanceFileCachePath, fmt="json") tD.update(provD) ok = mU.doExport(provenanceFileCachePath, tD, fmt="json") except Exception as e: logger.exception("Failed updating provenance with %s", str(e)) return ok
def __rebuildCache(self, urlTargetIsoLtwa, dirPath, useCache): """Rebuild the cache of ISO abbreviation term data Args: urlTargetIsoLtwa (str): URL for ISO4 LTWA title word abbreviations dirPath (str): cache path useCache (bool): flag to use cached files Returns: tuple: (dict) title word abbreviations (dict) language conflict dictionary (list) multi-word abbreviation targets Notes: ISO source file (tab delimited UTF-16LE) is maintained at the ISSN site - https://www.issn.org/wp-content/uploads/2013/09/LTWA_20160915.txt """ aD = {} mU = MarshalUtil(workPath=dirPath) fmt = "json" ext = fmt if fmt == "json" else "pic" isoLtwaNamePath = os.path.join(dirPath, "iso-ltwa.%s" % ext) logger.debug("Using cache data path %s", dirPath) mU.mkdir(dirPath) if not useCache: for fp in [isoLtwaNamePath]: try: os.remove(fp) except Exception: pass # if useCache and mU.exists(isoLtwaNamePath): aD = mU.doImport(isoLtwaNamePath, fmt=fmt) logger.debug("Abbreviation name length %d", len(aD["abbrev"])) elif not useCache: # ------ fU = FileUtil() logger.info("Fetch data from source %s in %s", urlTargetIsoLtwa, dirPath) fp = os.path.join(dirPath, fU.getFileName(urlTargetIsoLtwa)) ok = fU.get(urlTargetIsoLtwa, fp) aD = self.__getLtwaTerms(dirPath, fp) ok = mU.doExport(isoLtwaNamePath, aD, fmt=fmt) logger.debug("abbrevD keys %r", list(aD.keys())) logger.debug("Caching %d ISO LTWA in %s status %r", len(aD["abbrev"]), isoLtwaNamePath, ok) # abbrevD = aD["abbrev"] if "abbrev" in aD else {} conflictD = aD["conflicts"] if "conflicts" in aD else {} multiWordTermL = aD[ "multi_word_abbrev"] if "multi_word_abbrev" in aD else [] # return abbrevD, conflictD, multiWordTermL
def __reload(self, urlTarget, dirPath, useCache=True): """Reload local cache of mapping resources to support validation report reader and translator. Args: urlTarget (list, str): URL for schema mapping file dirPath (str): path to the directory containing cache files useCache (bool, optional): flag to use cached files. Defaults to True. Returns: (object): instance of ValidationReportReader() """ mapD = {} # mU = MarshalUtil() fU = FileUtil() fn = fU.getFileName(urlTarget) mappingFilePath = os.path.join(dirPath, fn) mU.mkdir(dirPath) # # if not useCache: # for fp in [mappingFilePath]: # try: # os.remove(fp) # except Exception: # pass # # logger.debug("Loading validation mapping data in %s (useCache %r)", fn, useCache) if useCache and fU.exists(mappingFilePath): mapD = mU.doImport(mappingFilePath, fmt="json") else: logger.info("Fetching url %s to resource file %s", urlTarget, mappingFilePath) tS = uuid.uuid4().hex tP = os.path.join(dirPath, "._" + tS) ok = fU.get(urlTarget, tP) if ok: mapD = mU.doImport(tP, fmt="json") os.replace(tP, mappingFilePath) return mapD
def readIndex(self): try: mU = MarshalUtil() if not mU.exists(self._indexFilePath): return False indexObj = mU.doImport(self._indexFilePath, fmt=self.__fmt) if indexObj is not None and len(indexObj) > 0: self._rL.extend(indexObj) return True except Exception as e: logger.error("Failing with %s", str(e)) return False