def strip(self, inpPath, outPath, stripList=None): """Strip categories from inpPath and write to outPath""" if stripList is None: stripList = [] try: myDataList = [] with open(inpPath, "r") as ifh: pRd = PdbxReader(ifh) pRd.read(myDataList) # myBlock = myDataList[0] myName = myBlock.getName() newContainer = DataContainer(myName) for objName in myBlock.getObjNameList(): myObj = myBlock.getObj(objName) if myObj.getName() not in stripList: newContainer.append(myObj) # with open(outPath, "w") as ofh: pWr = PdbxWriter(ofh) pWr.setPreferSingleQuotes() pWr.write([newContainer]) return True except Exception as e: logger.exception("Failing with %s", str(e)) return False
def test_read_small_data_file(self, reader_paths): myDataList = [] ifh = open(str(reader_paths['pathPdbxDataFile']), "r") pRd = PdbxReader(ifh) pRd.read(myDataList) ifh.close() # assert len(myDataList) == 1
def test_read_write_data_file_stop(self, rw_data): myDataList = [] with open(str(rw_data['pathTestFileStop']), "r") as ifh: pRd = PdbxReader(ifh) pRd.read(myDataList) with open(str(rw_data['pathOutputFile3']), "w") as ofh: pWr = PdbxWriter(ofh) pWr.write(myDataList) assert len(myDataList) == 1
def test_read_write_data_file(self, writer_paths): myDataList = [] with open(str(writer_paths['pathPdbxDataFile']), "r") as ifh: pRd = PdbxReader(ifh) pRd.read(myDataList) with open(str(writer_paths['pathOutputFile1']), "w") as ofh: pWr = PdbxWriter(ofh) pWr.write(myDataList) assert len(myDataList) == 1
def testUpdateDataFile(self): """Test case - update data file """ try: # Create a initial data file -- # myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append([9, 2, 3, 4, 5, 6, 7]) aCat.append([10, 2, 3, 4, 5, 6, 7]) aCat.append([11, 2, 3, 4, 5, 6, 7]) aCat.append([12, 2, 3, 4, 5, 6, 7]) curContainer.append(aCat) myDataList.append(curContainer) ofh = open(self.__pathOutputFile1, "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) ofh.close() # # # Read and update the data - # myDataList = [] ifh = open(self.__pathOutputFile1, "r") pRd = PdbxReader(ifh) pRd.read(myDataList) ifh.close() # myBlock = myDataList[0] # myBlock.printIt() myCat = myBlock.getObj("pdbx_seqtool_mapping_ref") # myCat.printIt() for iRow in range(0, myCat.getRowCount()): myCat.setValue("some value", "ref_mon_id", iRow) myCat.setValue(100, "ref_mon_num", iRow) with open(self.__pathOutputFile2, "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) # self.assertEqual(len(myDataList), 1) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def __read(self, inputPath): """ Read status file """ try: self.__myContainerList = [] with open(inputPath, "r") as ifh: pRd = PdbxReader(ifh) pRd.read(self.__myContainerList) return True except Exception as e: logger.exception("Failing with %s" % str(e)) return False
def __loaddaintschema(self): """load da_internal schema from configuration""" schemapath = self.__ci.get('SITE_DA_INTERNAL_SCHEMA_PATH') if not schemapath: print("ERROR: SITE_DA_INTERNAL_SCHEMA_PATH not in site-config") return False if not len(self.__daintschema): with open(schemapath, 'r') as fin: prd = PdbxReader(fin) self.__daintschema = [] prd.read(containerList = self.__daintschema, selectList=['rcsb_attribute_def', 'rcsb_table_abbrev'])
def testRowListInitialization(self): """Test case - Row list initialization of a data category and data block""" try: # fn = self.__pathOutputFile4 attributeNameList = [ "aOne", "aTwo", "aThree", "aFour", "aFive", "aSix", "aSeven", "aEight", "aNine", "aTen" ] rowList = [ [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], ] nameCat = "myCategory" # # curContainer = DataContainer("myblock") aCat = DataCategory(nameCat, attributeNameList, rowList) # aCat.printIt() curContainer.append(aCat) # curContainer.printIt() # myContainerList = [] myContainerList.append(curContainer) ofh = open(fn, "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myContainerList) ofh.close() myContainerList = [] ifh = open(fn, "r") pRd = PdbxReader(ifh) pRd.read(myContainerList) ifh.close() for container in myContainerList: for objName in container.getObjNameList(): name, aList, rList = container.getObj(objName).get() logger.debug("Recovered data category %s", name) logger.debug("Attribute list %r", repr(aList)) logger.debug("Row list %r", repr(rList)) self.assertEqual(len(myContainerList), 1) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testReadBigDataFile(self): """Test case - read large data file""" try: # myDataList = [] with open(self.__pathBigPdbxDataFile, "r") as ifh: pRd = PdbxReader(ifh) pRd.read(myDataList) self.assertEqual(len(myDataList), 1) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testReadDataFile(self): """Test case - read data file""" try: # myDataList = [] ifh = open(self.__pathPdbxDataFile, "r") pRd = PdbxReader(ifh) pRd.read(myDataList) ifh.close() self.assertEqual(len(myDataList), 1) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testReadSFDataFile(self): """Test case - read PDB structure factor data file and compute statistics on f/sig(f). """ try: # myContainerList = [] with open(self.__pathSFDataFile, "r") as ifh: pRd = PdbxReader(ifh) pRd.read(myContainerList) c0 = myContainerList[0] # catObj = c0.getObj("refln") if catObj is None: return False # nRows = catObj.getRowCount() # # Get column name index. # itDict = {} itNameList = catObj.getItemNameList() for idxIt, itName in enumerate(itNameList): itDict[str(itName).lower()] = idxIt # idf = itDict["_refln.f_meas_au"] idsigf = itDict["_refln.f_meas_sigma_au"] minR = 100 maxR = -1 sumR = 0 icount = 0 for row in catObj.getRowList(): try: fV = float(row[idf]) sigf = float(row[idsigf]) ratio = sigf / fV # self.lfh.write(" %f %f %f\n" % (f,sigf,ratio)) maxR = max(maxR, ratio) minR = min(minR, ratio) sumR += ratio icount += 1 except Exception: continue ifh.close() logger.debug("f/sig(f) min %f max %f avg %f count %d", minR, maxR, sumR / icount, icount) self.assertEqual(icount, 99242) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testReadWriteDataFileStop(self): """Test case - data file read write test with stop tokens""" try: myDataList = [] with open(self.__pathTestFileStop, "r") as ifh: pRd = PdbxReader(ifh) pRd.read(myDataList) with open(self.__pathOutputFile3, "w") as ofh: pWr = PdbxWriter(ofh) pWr.write(myDataList) self.assertEqual(len(myDataList), 1) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testRowDictInitialization(self): """Test case - Row dictionary initialization of a data category and data block """ try: # rLen = 10 fn = self.__pathOutputFile5 attributeNameList = ["a", "b", "c", "d"] rowList = [{"a": 1, "b": 2, "c": 3, "d": 4} for i in range(rLen)] nameCat = "myCategory" # # curContainer = DataContainer("myblock") aCat = DataCategory(nameCat, attributeNameList, rowList) aCat.append({"a": 1, "b": 2, "c": 3, "d": 4}) aCat.append({"a": 1, "b": 2, "c": 3, "d": 4}) aCat.extend(rowList) curContainer.append(aCat) aCat.renameAttributes({"a": "aa", "b": "bb", "c": "cc", "d": "dd"}) aCat.setName("renamedCategory") # # myContainerList = [] myContainerList.append(curContainer) ofh = open(fn, "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myContainerList) ofh.close() myContainerList = [] ifh = open(fn, "r") pRd = PdbxReader(ifh) pRd.read(myContainerList) ifh.close() for container in myContainerList: for objName in container.getObjNameList(): name, aList, rList = container.getObj(objName).get() logger.debug("Recovered data category %s", name) logger.debug("Attribute list %r", repr(aList)) logger.debug("Row list %r", repr(rList)) self.assertEqual(len(myContainerList), 1) self.assertEqual(len(rList), 2 * rLen + 2) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testReadSFDataFile(self, reader_paths): myContainerList = [] ifh = open(str(reader_paths['pathSFDataFile']), "r") pRd = PdbxReader(ifh) pRd.read(myContainerList) c0 = myContainerList[0] # catObj = c0.getObj("refln") if catObj is None: return False # nRows = catObj.getRowCount() # # Get column name index. # itDict = {} itNameList = catObj.getItemNameList() for idxIt, itName in enumerate(itNameList): itDict[str(itName).lower()] = idxIt # idf = itDict['_refln.f_meas_au'] idsigf = itDict['_refln.f_meas_sigma_au'] minR = 100 maxR = -1 sumR = 0 icount = 0 for row in catObj.getRowList(): try: f = float(row[idf]) sigf = float(row[idsigf]) ratio = sigf / f # self.lfh.write(" %f %f %f\n" % (f,sigf,ratio)) maxR = max(maxR, ratio) minR = min(minR, ratio) sumR += ratio icount += 1 except Exception: continue ifh.close() print("f/sig(f) min %f max %f avg %f count %d\n" % (minR, maxR, sumR / icount, icount)) assert icount == 99242
def test_single_row_file(self, rw_data): myDataList = [] with open(str(rw_data['pathTestFile']), "r") as ifh: pRd = PdbxReader(ifh) pRd.read(myDataList) myBlock = myDataList[0] myCat = myBlock.getObj('symmetry') print("----attribute list %r\n" % myCat.getAttributeList()) row = myCat.getRow(0) print("----ROW %r\n" % row) # # myCat.dumpIt() with open(str(rw_data['pathOutputFile2']), "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) assert len(myDataList) == 1
def _testmerge(self, pathin): with open(pathin, "r") as ifh: pdbxr = PdbxReader(ifh) dlist = [] pdbxr.read(dlist) # Two blocks self.assertEqual(len(dlist), 2, "Two blocks in merge") block = dlist[0] # Merge cat = block.getObj('struct') self.assertIsNotNone(cat, 'Missing struct category') self.assertEqual(cat.getRowCount(), 1, "Should only have a single row") rd = cat.getRowItemDict(0) # print(rd) self.assertEqual(rd, {'_struct.title': 'Start title', '_struct.pdbx_descriptor': 'Override descriptor', '_struct.new': 'Something to add'}, 'struct category mismatch') # Merge cat = block.getObj('exptl') self.assertIsNotNone(cat, 'Missing exptl category') self.assertEqual(cat.getRowCount(), 1, "Should only have a single row") rd = cat.getRowItemDict(0) # print(rd) self.assertEqual(rd, {'_exptl.method': 'NEW', '_exptl.entry_id': 'something', '_exptl.absorpt_coefficient_mu': '?', '_exptl.details': 'some details'}, 'exptl category mismatch') # Replace category non-existant cat = block.getObj('third') self.assertIsNotNone(cat, 'Missing third category') self.assertEqual(cat.getRowCount(), 3, "Should only have a single row") rd = cat.getRowItemDict(0) self.assertEqual(rd, {'_third.id': '1', '_third.val': 'a'}, 'third category mismatch') rd = cat.getRowItemDict(1) self.assertEqual(rd, {'_third.id': '2', '_third.val': 'b'}, 'third category mismatch')
def __read(self): if not self.__filePath: return # try: ifh = open(self.__filePath, "r") pRd = PdbxReader(ifh) pRd.read(self.__dataList) ifh.close() if self.__dataList: self.__container = self.__dataList[0] self.__blockID = self.__container.getName() idx = 0 for container in self.__dataList: self.__dataMap[container.getName()] = idx idx += 1 # # except Exception as e: self.__lfh.write("Read %s failed %s.\n" % (self.__filePath, str(e)))
def test_update_data_file(self, writer_paths): myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append((1, 2, 3, 4, 5, 6, 7)) aCat.append((1, 2, 3, 4, 5, 6, 7)) aCat.append((1, 2, 3, 4, 5, 6, 7)) aCat.append((1, 2, 3, 4, 5, 6, 7)) curContainer.append(aCat) myDataList.append(curContainer) with open(str(writer_paths['pathOutputFile1']), "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) # # Read and update the data - # myDataList = [] with open(str(writer_paths['pathOutputFile1']), "r") as ifh: pRd = PdbxReader(ifh) pRd.read(myDataList) # myBlock = myDataList[0] # myBlock.printIt() myCat = myBlock.getObj('pdbx_seqtool_mapping_ref') # myCat.printIt() for iRow in range(0, myCat.getRowCount()): myCat.setValue('some value', 'ref_mon_id', iRow) myCat.setValue(100, 'ref_mon_num', iRow) with open(str(writer_paths['pathOutputFile2']), "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) assert len(myDataList) == 1
def test_update_data_file(self, rw_data): myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append([9, 2, 3, 4, 5, 6, 7]) aCat.append([10, 2, 3, 4, 5, 6, 7]) aCat.append([11, 2, 3, 4, 5, 6, 7]) aCat.append([12, 2, 3, 4, 5, 6, 7]) curContainer.append(aCat) myDataList.append(curContainer) ofh = open(str(rw_data['pathOutputFile1']), "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) ofh.close() myDataList = [] ifh = open(str(rw_data['pathOutputFile1']), "r") pRd = PdbxReader(ifh) pRd.read(myDataList) ifh.close() myBlock = myDataList[0] myCat = myBlock.getObj('pdbx_seqtool_mapping_ref') for iRow in range(0, myCat.getRowCount()): myCat.setValue('some value', 'ref_mon_id', iRow) myCat.setValue(100, 'ref_mon_num', iRow) with open(str(rw_data['pathOutputFile2']), "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) assert len(myDataList) == 1
def __testReaders(self, fPath): """Tests python and IoAdapter readers and checks values""" # Python reader myContainerList = [] with open(fPath, "r") as ifh: pRd = PdbxReader(ifh) pRd.read(myContainerList) self.__testValues(myContainerList) # C++ IoAdapter reader try: io = IoAdapter(raiseExceptions=True) containerList = io.readFile(fPath, outDirPath=self.__pathOutputDir) logger.debug("Read %d data blocks", len(containerList)) self.assertEqual(len(containerList), 1) except Exception as e: logger.error("Failing with %s", str(e)) self.fail() self.__testValues(containerList)
def test_row_list_initialization(self, rw_data): fn = rw_data['pathOutputFile4'] attributeNameList = ['aOne', 'aTwo', 'aThree', 'aFour', 'aFive', 'aSix', 'aSeven', 'aEight', 'aNine', 'aTen'] rowList = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] ] nameCat = 'myCategory' curContainer = DataContainer("myblock") aCat = DataCategory(nameCat, attributeNameList, rowList) curContainer.append(aCat) myContainerList = [] myContainerList.append(curContainer) ofh = open(str(fn), "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myContainerList) ofh.close() myContainerList = [] ifh = open(str(fn), "r") pRd = PdbxReader(ifh) pRd.read(myContainerList) ifh.close() for container in myContainerList: for objName in container.getObjNameList(): name, aList, rList = container.getObj(objName).get() print("Recovered data category %s\n" % name) print("Attribute list %r\n" % repr(aList)) print("Row list %r\n" % repr(rList)) assert len(myContainerList) == 1
def test_row_dict_initialization(self, rw_data): rLen = 10 fn = rw_data['pathOutputFile5'] attributeNameList = ['a', 'b', 'c', 'd'] rowList = [{'a': 1, 'b': 2, 'c': 3, 'd': 4} for i in range(rLen)] nameCat = 'myCategory' # # curContainer = DataContainer("myblock") aCat = DataCategory(nameCat, attributeNameList, rowList) aCat.append({'a': 1, 'b': 2, 'c': 3, 'd': 4}) aCat.append({'a': 1, 'b': 2, 'c': 3, 'd': 4}) aCat.extend(rowList) curContainer.append(aCat) aCat.renameAttributes({'a': 'aa', 'b': 'bb', 'c': 'cc', 'd': 'dd'}) aCat.setName('renamedCategory') # # myContainerList = [] myContainerList.append(curContainer) ofh = open(str(fn), "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myContainerList) ofh.close() myContainerList = [] ifh = open(str(fn), "r") pRd = PdbxReader(ifh) pRd.read(myContainerList) ifh.close() for container in myContainerList: for objName in container.getObjNameList(): name, aList, rList = container.getObj(objName).get() print("Recovered data category %s\n" % name) print("Attribute list %r\n" % repr(aList)) print("Row list %r\n" % repr(rList)) assert len(myContainerList) == 1 assert len(rList) == 2 * rLen + 2
def _testmerge(self, pathin): with open(pathin, "r") as ifh: pdbxr = PdbxReader(ifh) dlist = [] pdbxr.read(dlist) # Two blocks self.assertEqual(len(dlist), 2, "Two blocks in merge") block = dlist[0] # Merge cat = block.getObj("struct") self.assertIsNotNone(cat, "Missing struct category") self.assertEqual(cat.getRowCount(), 1, "Should only have a single row") rd = cat.getRowItemDict(0) # print(rd) self.assertEqual(rd, {"_struct.title": "Start title", "_struct.pdbx_descriptor": "Override descriptor", "_struct.new": "Something to add"}, "struct category mismatch") # Merge cat = block.getObj("exptl") self.assertIsNotNone(cat, "Missing exptl category") self.assertEqual(cat.getRowCount(), 1, "Should only have a single row") rd = cat.getRowItemDict(0) # print(rd) self.assertEqual( rd, {"_exptl.method": "NEW", "_exptl.entry_id": "something", "_exptl.absorpt_coefficient_mu": "?", "_exptl.details": "some details"}, "exptl category mismatch" ) # Replace category non-existant cat = block.getObj("third") self.assertIsNotNone(cat, "Missing third category") self.assertEqual(cat.getRowCount(), 3, "Should only have a single row") rd = cat.getRowItemDict(0) self.assertEqual(rd, {"_third.id": "1", "_third.val": "a"}, "third category mismatch") rd = cat.getRowItemDict(1) self.assertEqual(rd, {"_third.id": "2", "_third.val": "b"}, "third category mismatch")
def readFile(self, inputFilePath, enforceAscii=False, selectList=None, excludeFlag=False, logFilePath=None, outDirPath=None, cleanUp=False, **kwargs): """Parse the data blocks in the input mmCIF format data file into list of data or definition containers. The data category content within each data block is stored a collection of DataCategory objects within each container. Args: inputFilePath (string): Input file path enforceAscii (bool, optional): Flag to requiring ASCII encoding. See encoding error options. selectList (List, optional): List of data category names to be extracted or excluded from the input file (default: select/extract) excludeFlag (bool, optional): Flag to indicate selectList should be treated as an exclusion list logFilePath (string, optional): Log file path (if not provided this will be derived from the input file.) outDirPath (string, optional): Path for translated/re-encoded files and default logfiles. cleanUp (bool, optional): Flag to automatically remove logs and temporary files on exit. **kwargs: Placeholder for missing keyword arguments. Returns: List of DataContainers: Contents of input file parsed into a list of DataContainer objects. """ if kwargs: logger.warning("Unsupported keyword arguments %s", kwargs.keys()) filePath = str(inputFilePath) # oPath = outDirPath if outDirPath else '.' oPath = self._chooseTemporaryPath(inputFilePath, outDirPath=outDirPath) containerList = [] if enforceAscii: encoding = "ascii" else: encoding = "utf-8" try: # lPath = logFilePath if not lPath: lPath = self._getDefaultFileName(filePath, fileType="cif-parser-log", outDirPath=oPath) # self._setLogFilePath(lPath) # --- if self.__isLocal(filePath) and not self._fileExists(filePath): return [] # if sys.version_info[0] > 2: if self.__isLocal(filePath): filePath = self._uncompress(filePath, oPath) with open(filePath, "r", encoding=encoding, errors=self._readEncodingErrors) as ifh: pRd = PdbxReader(ifh) pRd.read(containerList, selectList, excludeFlag=excludeFlag) else: with closing(requests.get(filePath)) as ifh: it = (line.decode(encoding) for line in ifh.iter_lines()) pRd = PdbxReader(it) pRd.read(containerList, selectList, excludeFlag=excludeFlag) else: if self.__isLocal(filePath): filePath = self._uncompress(filePath, oPath) if enforceAscii: with io.open(filePath, "r", encoding=encoding, errors=self._readEncodingErrors) as ifh: pRd = PdbxReader(ifh) pRd.read(containerList, selectList, excludeFlag=excludeFlag) else: with open(filePath, "r") as ifh: pRd = PdbxReader(ifh) pRd.read(containerList, selectList, excludeFlag=excludeFlag) else: with closing(requests.get(filePath)) as ifh: it = (line.decode(encoding) for line in ifh.iter_lines()) pRd = PdbxReader(it) pRd.read(containerList, selectList, excludeFlag=excludeFlag) if cleanUp: self._cleanupFile(lPath, lPath) self._cleanupFile(filePath != str(inputFilePath), filePath) self._setContainerProperties(containerList, locator=str(inputFilePath), load_date=self._getTimeStamp(), uid=uuid.uuid4().hex) except (PdbxError, PdbxSyntaxError) as ex: msg = "File %r with %s" % (filePath, str(ex)) self._appendToLog([msg]) self._cleanupFile(lPath and cleanUp, lPath) if self._raiseExceptions: raise_from(ex, None) # raise ex from None except Exception as e: msg = "File %r with %s" % (filePath, str(e)) self._appendToLog([msg]) self._cleanupFile(lPath and cleanUp, lPath) if self._raiseExceptions: raise e else: logger.error("Failing read for %s with %s", filePath, str(e)) return containerList
def __readSchemaMap(self, schemaMapFile): """Read RCSB schema map file and return the list of table names, attribute definitions, attribute mapping, table and attribute abbreviations. """ tableNameList = [] atDefList = [] atMapList = [] tableAbbrevD = {} attribAbbrevD = {} try: # myContainerList = [] ifh = open(schemaMapFile, "r", encoding="utf-8") pRd = PdbxReader(ifh) pRd.read(myContainerList) ifh.close() # for myContainer in myContainerList: cN = str(myContainer.getName()).lower() # # read schema details -- # if cN == "rcsb_schema": # catObj = myContainer.getObj("rcsb_table") if catObj is not None: i1 = catObj.getAttributeIndex("table_name") for row in catObj.getRowList(): tableNameList.append(row[i1]) # catObj = myContainer.getObj("rcsb_attribute_def") atList = [ "table_name", "attribute_name", "data_type", "index_flag", "null_flag", "width", "precision", "populated" ] indList = [] if catObj is not None: for at in atList: indList.append(catObj.getAttributeIndex(at)) for row in catObj.getRowList(): dD = {} for ii, at in enumerate(atList): dD[at] = row[indList[ii]] atDefList.append(dD) # # _rcsb_table_abbrev.table_name # _rcsb_table_abbrev.table_abbrev # catObj = myContainer.getObj("rcsb_table_abbrev") if catObj is not None: i1 = catObj.getAttributeIndex("table_name") i2 = catObj.getAttributeIndex("table_abbrev") for row in catObj.getRowList(): tableAbbrevD[row[i1]] = row[i2] # # _rcsb_attribute_abbrev.table_name # _rcsb_attribute_abbrev.attribute_name # _rcsb_attribute_abbrev.attribute_abbrev catObj = myContainer.getObj("rcsb_attribute_abbrev") if catObj is not None: i1 = catObj.getAttributeIndex("table_name") i2 = catObj.getAttributeIndex("attribute_name") i3 = catObj.getAttributeIndex("attribute_abbrev") for row in catObj.getRowList(): if row[i1] not in attribAbbrevD: attribAbbrevD[row[i1]] = {} attribAbbrevD[row[i1]][row[i2]] = row[i3] # read attribute mapping details -- # elif cN == "rcsb_schema_map": catObj = myContainer.getObj("rcsb_attribute_map") atList = [ "target_table_name", "target_attribute_name", "source_item_name", "condition_id", "function_id" ] indList = [] if catObj is not None: for at in atList: indList.append(catObj.getAttributeIndex(at)) for row in catObj.getRowList(): dD = {} for ii, at in enumerate(atList): dD[at] = row[indList[ii]] atMapList.append(dD) else: logger.error("+ERROR -unanticipated data container %s", cN) except Exception as e: logger.error("+ERROR - error processing schema map file %s", schemaMapFile) logger.exception("Failing with %s", str(e)) return tableNameList, atDefList, atMapList, tableAbbrevD, attribAbbrevD