def strip(self, inpPath, outPath, stripList=[]): """ Strip categories from inpPath and write to outPath """ try: myDataList = [] with open(inpPath, "r") as ifh: pRd = PdbxReader(ifh) pRd.read(myDataList) # myBlock = myDataList[0] myName = myBlock.getName() newContainer = DataContainer(myName) for objName in myBlock.getObjNameList(): myObj = myBlock.getObj(objName) if myObj.getName() not in stripList: newContainer.append(myObj) # with open(outPath, "w") as ofh: pWr = PdbxWriter(ofh) pWr.setPreferSingleQuotes() pWr.write([newContainer]) return True except Exception as e: logger.exception("Failing with %s" % str(e)) return False
def testSerialize(self): try: for storeStringsAsBytes in [True, False]: tcL = [] ioPy = IoAdapter() containerList = ioPy.readFile(self.__pathTextCif) for container in containerList: cName = container.getName() tc = DataContainer(cName) for catName in container.getObjNameList(): dObj = container.getObj(catName) tObj = DataCategoryTyped(dObj, dictionaryApi=self.__dApi, copyInputData=True) tc.append(tObj) tcL.append(tc) # bcw = BinaryCifWriter(self.__dApi, storeStringsAsBytes=storeStringsAsBytes, applyTypes=False, useFloat64=True) bcw.serialize(self.__testBcifOutput, tcL) self.assertEqual(containerList[0], containerList[0]) self.assertEqual(tcL[0], tcL[0]) bcr = BinaryCifReader(storeStringsAsBytes=storeStringsAsBytes) cL = bcr.deserialize(self.__testBcifOutput) # ioPy = IoAdapter() ok = ioPy.writeFile(self.__testBcifTranslated, cL) self.assertTrue(ok) self.assertTrue(self.__same(tcL[0], cL[0])) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def test_single_row(self, rw_data): myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.appendAttribute("details") aCat.append([1, 2, 3, 4, 5, 6, 7, 'data_my_big_data_file']) aCat.append([1, 2, 3, 4, 5, 6, 7, 'loop_my_big_data_loop']) aCat.append([1, 2, 3, 4, 5, 6, 7, 'save_my_big_data_saveframe']) aCat.append([1, 2, 3, 4, 5, 6, 7, '_category.item']) curContainer.append(aCat) bCat = curContainer.getObj("pdbx_seqtool_mapping_ref") print("----attribute list %r\n" % bCat.getAttributeList()) row = bCat.getRow(0) print("----ROW %r\n" % row) with open(str(rw_data['pathOutputFile2']), "w") as ofh: myDataList.append(curContainer) pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) assert len(myDataList) == 1
def testWriteDataFile(self): """Test case - write data file""" try: # myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append([1, 2, 3, 4, 5, 6, 7]) aCat.append([1, 2, 3, 4, 5, 6, 7]) aCat.append([1, 2, 3, 4, 5, 6, 7]) aCat.append([1, 2, 3, 4, 5, 6, 7]) aCat.append([7, 6, 5, 4, 3, 2, 1]) # aCat.printIt() curContainer.append(aCat) # curContainer.printIt() # myDataList.append(curContainer) with open(self.__pathOutputFile1, "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) self.assertEqual(len(myDataList), 1) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testUpdateDataFile(self): """Test case - update data file """ try: # Create a initial data file -- # myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append([9, 2, 3, 4, 5, 6, 7]) aCat.append([10, 2, 3, 4, 5, 6, 7]) aCat.append([11, 2, 3, 4, 5, 6, 7]) aCat.append([12, 2, 3, 4, 5, 6, 7]) curContainer.append(aCat) myDataList.append(curContainer) ofh = open(self.__pathOutputFile1, "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) ofh.close() # # # Read and update the data - # myDataList = [] ifh = open(self.__pathOutputFile1, "r") pRd = PdbxReader(ifh) pRd.read(myDataList) ifh.close() # myBlock = myDataList[0] # myBlock.printIt() myCat = myBlock.getObj("pdbx_seqtool_mapping_ref") # myCat.printIt() for iRow in range(0, myCat.getRowCount()): myCat.setValue("some value", "ref_mon_id", iRow) myCat.setValue(100, "ref_mon_num", iRow) with open(self.__pathOutputFile2, "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) # self.assertEqual(len(myDataList), 1) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testRowListInitialization(self): """Test case - Row list initialization of a data category and data block""" try: # fn = self.__pathOutputFile4 attributeNameList = [ "aOne", "aTwo", "aThree", "aFour", "aFive", "aSix", "aSeven", "aEight", "aNine", "aTen" ] rowList = [ [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], ] nameCat = "myCategory" # # curContainer = DataContainer("myblock") aCat = DataCategory(nameCat, attributeNameList, rowList) # aCat.printIt() curContainer.append(aCat) # curContainer.printIt() # myContainerList = [] myContainerList.append(curContainer) ofh = open(fn, "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myContainerList) ofh.close() myContainerList = [] ifh = open(fn, "r") pRd = PdbxReader(ifh) pRd.read(myContainerList) ifh.close() for container in myContainerList: for objName in container.getObjNameList(): name, aList, rList = container.getObj(objName).get() logger.debug("Recovered data category %s", name) logger.debug("Attribute list %r", repr(aList)) logger.debug("Row list %r", repr(rList)) self.assertEqual(len(myContainerList), 1) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def __write_mmcif(self, pathout, coef, entry_id): """Writes out the specific map coefficients """ # Categories that will not be copied _striplist = [ 'audit', 'diffrn_radiation_wavelength', 'exptl_crystal', 'reflns_scale' ] # refln attributes to keep _keepattr = ['index_h', 'index_k', 'index_l', 'fom'] if coef == 'fo': _keepattr.extend(['pdbx_DELFWT', 'pdbx_DELPHWT']) else: _keepattr.extend(['pdbx_FWT', 'pdbx_PHWT']) # Datablockname blkname = "{}{}".format(entry_id, coef) new_cont = DataContainer(blkname) # Only care about first block blockin = self.__sf[0] for objname in blockin.getObjNameList(): if objname in _striplist: continue myobj = blockin.getObj(objname) # Make a copy of the original - as likely will need to modify modobj = copy.deepcopy(myobj) if objname == 'entry': modobj.setValue(entry_id, 'id', 0) if objname in ['cell', 'symmetry']: modobj.setValue(entry_id, 'entry_id', 0) if objname == 'refln': # Remove all but what we want # Make a copy to ensure not messed with during operation for attr in list(modobj.getAttributeList()): if attr not in _keepattr: modobj.removeAttribute(attr) new_cont.append(modobj) # new_cont.printIt() io = IoAdapterCore() # Write out a single block ret = io.writeFile(pathout, [new_cont]) return ret
def __processContent(self, cifFileObj): """Internal method to transfer parsed data from the wrapped input C++ CifFile object into the list of Python DataContainer objects. Args: cifFileObj (wrapped CifFile object): Wrapped input C++ CifFile object Returns: list of DataContainer objects: List of Python DataContainer objects """ containerList = [] containerNameList = [] try: # ----- Repackage the data content ---- # containerList = [] containerNameList = [] containerNameList = list( cifFileObj.GetBlockNames(containerNameList)) for containerName in containerNameList: # aContainer = DataContainer(containerName) # block = cifFileObj.GetBlock(containerName) tableNameList = [] tableNameList = list(block.GetTableNames(tableNameList)) for tableName in tableNameList: table = block.GetTable(tableName) attributeNameList = list(table.GetColumnNames()) numRows = table.GetNumRows() rowList = [] for iRow in range(0, numRows): row = table.GetRow(iRow) # row = table.GetRow(iRow).decode('unicode_escape').encode('utf-8') # row = [p.encode('ascii', 'xmlcharrefreplace') for p in table.GetRow(iRow)] rowList.append(list(row)) aCategory = DataCategory( tableName, attributeNameList, rowList, copyInputData=False, raiseExceptions=self._raiseExceptions) aContainer.append(aCategory) containerList.append(aContainer) except Exception as e: msg = "Failing packaging with %s" % str(e) self._logError(msg) return containerList
def _createfile2(pathout): my_data_list = [] cur_container = DataContainer("test") acat = DataCategory("new") acat.appendAttribute("item") acat.append(('1',)) cur_container.append(acat) acat = DataCategory("second_category") acat.appendAttribute('row') acat.appendAttribute('rowb') acat.append(('1', '2')) cur_container.append(acat) acat = DataCategory("third") acat.appendAttribute('id') acat.appendAttribute('val') acat.append(('1', 'a')) acat.append(('2', 'b')) acat.append(('3', 'c')) cur_container.append(acat) acat = DataCategory("exptl") acat.appendAttribute('method') acat.appendAttribute('entry_id') acat.append(('NEW', 'something')) cur_container.append(acat) acat = DataCategory("struct") acat.appendAttribute('new') acat.appendAttribute('pdbx_descriptor') acat.append(('Something to add', 'Override descriptor')) cur_container.append(acat) my_data_list.append(cur_container) with open(pathout, "w") as ofh: pdbxw = PdbxWriter(ofh) pdbxw.setAlignmentFlag(flag=True) pdbxw.write(my_data_list)
def _createfile2(pathout): my_data_list = [] cur_container = DataContainer("test") acat = DataCategory("new") acat.appendAttribute("item") acat.append(("1",)) cur_container.append(acat) acat = DataCategory("second_category") acat.appendAttribute("row") acat.appendAttribute("rowb") acat.append(("1", "2")) cur_container.append(acat) acat = DataCategory("third") acat.appendAttribute("id") acat.appendAttribute("val") acat.append(("1", "a")) acat.append(("2", "b")) acat.append(("3", "c")) cur_container.append(acat) acat = DataCategory("exptl") acat.appendAttribute("method") acat.appendAttribute("entry_id") acat.append(("NEW", "something")) cur_container.append(acat) acat = DataCategory("struct") acat.appendAttribute("new") acat.appendAttribute("pdbx_descriptor") acat.append(("Something to add", "Override descriptor")) cur_container.append(acat) my_data_list.append(cur_container) with open(pathout, "w") as ofh: pdbxw = PdbxWriter(ofh) pdbxw.setAlignmentFlag(flag=True) pdbxw.write(my_data_list)
def writeDefaultDataTypeMap(self, outPath, dataTyping="ANY"): """Write data file containing application default dictionary to application data type mapping data_rcsb_data_type_map loop_ _pdbx_data_type_application_map.application_name _pdbx_data_type_application_map.type_code _pdbx_data_type_application_map.app_type_code _pdbx_data_type_application_map.app_precision_default _pdbx_data_type_application_map.app_width_default # .... type mapping data ... """ try: # containerList = [] curContainer = DataContainer("rcsb_data_type_map") aCat = DataCategory("pdbx_data_type_application_map") aCat.appendAttribute("application_name") aCat.appendAttribute("type_code") aCat.appendAttribute("app_type_code") aCat.appendAttribute("app_width_default") aCat.appendAttribute("app_precision_default") for (cifType, simpleType, defWidth, defPrecision) in zip( DataTypeApplicationInfo.cifTypes, DataTypeApplicationInfo.appTypes, DataTypeApplicationInfo.defaultWidths, DataTypeApplicationInfo.defaultPrecisions): if self.__isNull(cifType): continue aCat.append( [dataTyping, cifType, simpleType, defWidth, defPrecision]) curContainer.append(aCat) containerList.append(curContainer) # mU = MarshalUtil(workPath=self.__workPath) ok = mU.doExport(outPath, containerList, fmt="mmcif", enforceAscii=True, useCharRefs=True, raiseExceptions=True) return ok except Exception as e: logger.exception("Failing with %s", str(e)) return False
def __generateData(self): """Generates data for test. __testValues must be in sync""" curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_test") aCat.appendAttribute("ordinal") aCat.appendAttribute("details") aCat.append([1, "data_my_big_data_file"]) aCat.append([2, "loop_my_big_data_loop"]) aCat.append([3, "save_my_big_data_saveframe"]) aCat.append([4, "_category.item"]) aCat.append([5, "Data_my_big_data_file"]) aCat.append([6, "Loop_my_big_data_loop"]) aCat.append([7, "Save_my_big_data_saveframe"]) aCat.append([8, "DatA_my_big_data_file"]) curContainer.append(aCat) return curContainer
def testRowDictInitialization(self): """Test case - Row dictionary initialization of a data category and data block """ try: # rLen = 10 fn = self.__pathOutputFile5 attributeNameList = ["a", "b", "c", "d"] rowList = [{"a": 1, "b": 2, "c": 3, "d": 4} for i in range(rLen)] nameCat = "myCategory" # # curContainer = DataContainer("myblock") aCat = DataCategory(nameCat, attributeNameList, rowList) aCat.append({"a": 1, "b": 2, "c": 3, "d": 4}) aCat.append({"a": 1, "b": 2, "c": 3, "d": 4}) aCat.extend(rowList) curContainer.append(aCat) aCat.renameAttributes({"a": "aa", "b": "bb", "c": "cc", "d": "dd"}) aCat.setName("renamedCategory") # # myContainerList = [] myContainerList.append(curContainer) ofh = open(fn, "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myContainerList) ofh.close() myContainerList = [] ifh = open(fn, "r") pRd = PdbxReader(ifh) pRd.read(myContainerList) ifh.close() for container in myContainerList: for objName in container.getObjNameList(): name, aList, rList = container.getObj(objName).get() logger.debug("Recovered data category %s", name) logger.debug("Attribute list %r", repr(aList)) logger.debug("Row list %r", repr(rList)) self.assertEqual(len(myContainerList), 1) self.assertEqual(len(rList), 2 * rLen + 2) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def test_update_data_file(self, writer_paths): myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append((1, 2, 3, 4, 5, 6, 7)) aCat.append((1, 2, 3, 4, 5, 6, 7)) aCat.append((1, 2, 3, 4, 5, 6, 7)) aCat.append((1, 2, 3, 4, 5, 6, 7)) curContainer.append(aCat) myDataList.append(curContainer) with open(str(writer_paths['pathOutputFile1']), "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) # # Read and update the data - # myDataList = [] with open(str(writer_paths['pathOutputFile1']), "r") as ifh: pRd = PdbxReader(ifh) pRd.read(myDataList) # myBlock = myDataList[0] # myBlock.printIt() myCat = myBlock.getObj('pdbx_seqtool_mapping_ref') # myCat.printIt() for iRow in range(0, myCat.getRowCount()): myCat.setValue('some value', 'ref_mon_id', iRow) myCat.setValue(100, 'ref_mon_num', iRow) with open(str(writer_paths['pathOutputFile2']), "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) assert len(myDataList) == 1
def test_update_data_file(self, rw_data): myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append([9, 2, 3, 4, 5, 6, 7]) aCat.append([10, 2, 3, 4, 5, 6, 7]) aCat.append([11, 2, 3, 4, 5, 6, 7]) aCat.append([12, 2, 3, 4, 5, 6, 7]) curContainer.append(aCat) myDataList.append(curContainer) ofh = open(str(rw_data['pathOutputFile1']), "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) ofh.close() myDataList = [] ifh = open(str(rw_data['pathOutputFile1']), "r") pRd = PdbxReader(ifh) pRd.read(myDataList) ifh.close() myBlock = myDataList[0] myCat = myBlock.getObj('pdbx_seqtool_mapping_ref') for iRow in range(0, myCat.getRowCount()): myCat.setValue('some value', 'ref_mon_id', iRow) myCat.setValue(100, 'ref_mon_num', iRow) with open(str(rw_data['pathOutputFile2']), "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) assert len(myDataList) == 1
def test_row_list_initialization(self, rw_data): fn = rw_data['pathOutputFile4'] attributeNameList = ['aOne', 'aTwo', 'aThree', 'aFour', 'aFive', 'aSix', 'aSeven', 'aEight', 'aNine', 'aTen'] rowList = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] ] nameCat = 'myCategory' curContainer = DataContainer("myblock") aCat = DataCategory(nameCat, attributeNameList, rowList) curContainer.append(aCat) myContainerList = [] myContainerList.append(curContainer) ofh = open(str(fn), "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myContainerList) ofh.close() myContainerList = [] ifh = open(str(fn), "r") pRd = PdbxReader(ifh) pRd.read(myContainerList) ifh.close() for container in myContainerList: for objName in container.getObjNameList(): name, aList, rList = container.getObj(objName).get() print("Recovered data category %s\n" % name) print("Attribute list %r\n" % repr(aList)) print("Row list %r\n" % repr(rList)) assert len(myContainerList) == 1
def _createfile1(pathout): my_data_list = [] cur_container = DataContainer("myblock") acat = DataCategory("pdbx_item_enumeration") acat.appendAttribute("name") acat.appendAttribute("value") acat.appendAttribute("detail") acat.append(('1', '2', '3')) cur_container.append(acat) acat = DataCategory("exptl") acat.appendAttribute('absorpt_coefficient_mu') acat.appendAttribute('entry_id') acat.appendAttribute('method') acat.appendAttribute('details') acat.append(('?', 'D_12345', 'X-RAY DIFFRACTION', 'some details')) cur_container.append(acat) acat = DataCategory("struct") acat.appendAttribute('title') acat.appendAttribute('pdbx_descriptor') acat.append(('Start title', 'Start Descriptor')) cur_container.append(acat) my_data_list.append(cur_container) # Second block cur_container = DataContainer("secondblock") acat = DataCategory("pdbx_item_enumeration") acat.appendAttribute("name") acat.appendAttribute("value") acat.appendAttribute("detail") acat.append(('3', '2', '1')) cur_container.append(acat) my_data_list.append(cur_container) with open(pathout, "w") as ofh: pdbxw = PdbxWriter(ofh) pdbxw.setAlignmentFlag(flag=True) pdbxw.write(my_data_list)
def _createfile1(pathout): my_data_list = [] cur_container = DataContainer("myblock") acat = DataCategory("pdbx_item_enumeration") acat.appendAttribute("name") acat.appendAttribute("value") acat.appendAttribute("detail") acat.append(("1", "2", "3")) cur_container.append(acat) acat = DataCategory("exptl") acat.appendAttribute("absorpt_coefficient_mu") acat.appendAttribute("entry_id") acat.appendAttribute("method") acat.appendAttribute("details") acat.append(("?", "D_12345", "X-RAY DIFFRACTION", "some details")) cur_container.append(acat) acat = DataCategory("struct") acat.appendAttribute("title") acat.appendAttribute("pdbx_descriptor") acat.append(("Start title", "Start Descriptor")) cur_container.append(acat) my_data_list.append(cur_container) # Second block cur_container = DataContainer("secondblock") acat = DataCategory("pdbx_item_enumeration") acat.appendAttribute("name") acat.appendAttribute("value") acat.appendAttribute("detail") acat.append(("3", "2", "1")) cur_container.append(acat) my_data_list.append(cur_container) with open(pathout, "w") as ofh: pdbxw = PdbxWriter(ofh) pdbxw.setAlignmentFlag(flag=True) pdbxw.write(my_data_list)
def testSingleRow(self): """Test case - read /write single row and null row in data file """ try: # myDataList = [] # ofh = open(self.__pathOutputFile1, "w") curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.appendAttribute("details") aCat.append([1, 2, 3, 4, 5, 6, 7, "data_my_big_data_file"]) aCat.append([1, 2, 3, 4, 5, 6, 7, "loop_my_big_data_loop"]) aCat.append([1, 2, 3, 4, 5, 6, 7, "save_my_big_data_saveframe"]) aCat.append([1, 2, 3, 4, 5, 6, 7, "_category.item"]) # aCat.dumpIt() curContainer.append(aCat) # bCat = curContainer.getObj("pdbx_seqtool_mapping_ref") logger.debug("----attribute list %r", bCat.getAttributeList()) row = bCat.getRow(0) logger.debug("----ROW %r", row) # with open(self.__pathOutputFile2, "w") as ofh: myDataList.append(curContainer) pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) self.assertEqual(len(myDataList), 1) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def test_row_dict_initialization(self, rw_data): rLen = 10 fn = rw_data['pathOutputFile5'] attributeNameList = ['a', 'b', 'c', 'd'] rowList = [{'a': 1, 'b': 2, 'c': 3, 'd': 4} for i in range(rLen)] nameCat = 'myCategory' # # curContainer = DataContainer("myblock") aCat = DataCategory(nameCat, attributeNameList, rowList) aCat.append({'a': 1, 'b': 2, 'c': 3, 'd': 4}) aCat.append({'a': 1, 'b': 2, 'c': 3, 'd': 4}) aCat.extend(rowList) curContainer.append(aCat) aCat.renameAttributes({'a': 'aa', 'b': 'bb', 'c': 'cc', 'd': 'dd'}) aCat.setName('renamedCategory') # # myContainerList = [] myContainerList.append(curContainer) ofh = open(str(fn), "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myContainerList) ofh.close() myContainerList = [] ifh = open(str(fn), "r") pRd = PdbxReader(ifh) pRd.read(myContainerList) ifh.close() for container in myContainerList: for objName in container.getObjNameList(): name, aList, rList = container.getObj(objName).get() print("Recovered data category %s\n" % name) print("Attribute list %r\n" % repr(aList)) print("Row list %r\n" % repr(rList)) assert len(myContainerList) == 1 assert len(rList) == 2 * rLen + 2
def test_write_data_file(self, rw_data): myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append([1, 2, 3, 4, 5, 6, 7]) aCat.append([1, 2, 3, 4, 5, 6, 7]) aCat.append([1, 2, 3, 4, 5, 6, 7]) aCat.append([1, 2, 3, 4, 5, 6, 7]) aCat.append([7, 6, 5, 4, 3, 2, 1]) curContainer.append(aCat) myDataList.append(curContainer) with open(str(rw_data['pathOutputFile1']), "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) assert len(myDataList) == 1
def test_write_data_file(self, writer_paths): myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append( (1, 2, 3, 4, '55555555555555555555555555555555555555555555', 6, 7)) aCat.append((1, 2, 3, 4, '5555', 6, 7)) aCat.append((1, 2, 3, 4, '5555555555', 6, 7)) aCat.append((1, 2, 3, 4, '5', 6, 7)) curContainer.append(aCat) myDataList.append(curContainer) with open(str(writer_paths['pathOutputFile1']), "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.setAlignmentFlag(flag=True) pdbxW.write(myDataList) assert len(myDataList) == 1
def __dictionaryPragma(self, dictName, dictDescription, version, updateDate, comment): """ Add CIF dictionary header details including name, version and history. ; Returns: Data container (object) data container with dictionary history and version details """ # dataH = DataContainer("pdbx_vrpt_ext.dic") dc = DataCategory("datablock", attributeNameList=["id", "description"]) dc.append([dictName, dictDescription]) dataH.append(dc) dc = DataCategory( "dictionary", attributeNameList=["title", "datablock_id", "version"]) dc.append([dictName, dictName, version]) dataH.append(dc) dc = DataCategory("dictionary_history", attributeNameList=["version", "update", "revision"]) dc.append([version, updateDate, comment]) dataH.append(dc) return dataH
class mmCIFUtil: """Using pdbx mmCIF utility to parse mmCIF file""" def __init__(self, verbose=False, log=sys.stderr, filePath=None): # pylint: disable=unused-argument # self.__verbose = verbose self.__lfh = log self.__filePath = filePath self.__dataList = [] self.__dataMap = {} self.__container = None self.__blockID = None self.__read() # def __read(self): if not self.__filePath: return # try: ifh = open(self.__filePath, "r") pRd = PdbxReader(ifh) pRd.read(self.__dataList) ifh.close() if self.__dataList: self.__container = self.__dataList[0] self.__blockID = self.__container.getName() idx = 0 for container in self.__dataList: self.__dataMap[container.getName()] = idx idx += 1 # # except Exception as e: self.__lfh.write("Read %s failed %s.\n" % (self.__filePath, str(e))) # def GetBlockID(self): """Return first block ID""" return self.__blockID def GetValueAndItemByBlock(self, blockName, catName): """Get category values and item names""" dList = [] iList = [] if blockName not in self.__dataMap: return dList, iList # catObj = self.__dataList[self.__dataMap[blockName]].getObj(catName) if not catObj: return dList, iList # iList = catObj.getAttributeList() rowList = catObj.getRowList() for row in rowList: tD = {} for idxIt, itName in enumerate(iList): if row[idxIt] != "?" and row[idxIt] != ".": tD[itName] = row[idxIt] # if tD: dList.append(tD) # # return dList, iList def GetValueAndItem(self, catName): dList, iList = self.GetValueAndItemByBlock(self.__blockID, catName) return dList, iList def GetValue(self, catName): """Get category values based on category name 'catName'. The results are stored in a list of dictionaries with item name as key """ dList, _iList = self.GetValueAndItemByBlock(self.__blockID, catName) return dList def GetSingleValue(self, catName, itemName): """Get the first value of item name 'itemName' from 'itemName' item in 'catName' category.""" text = "" dlist = self.GetValue(catName) if dlist: if itemName in dlist[0]: text = dlist[0][itemName] return text # def UpdateSingleRowValue(self, catName, itemName, row, value): """Update value in single row""" catObj = self.__container.getObj(catName) if catObj is None: return # catObj.setValue(value, itemName, row) def UpdateMultipleRowsValue(self, catName, itemName, value): """Update value in multiple rows""" catObj = self.__container.getObj(catName) if catObj is None: return # rowNo = catObj.getRowCount() for row in range(0, rowNo): catObj.setValue(value, itemName, row) # def AddBlock(self, blockID): """Add Data Block""" self.__container = DataContainer(blockID) self.__blockID = blockID self.__dataMap[blockID] = len(self.__dataList) self.__dataList.append(self.__container) def AddCategory(self, categoryID, items): """Add Category""" category = DataCategory(categoryID) for item in items: category.appendAttribute(item) # self.__container.append(category) def RemoveCategory(self, categoryID): return self.__container.remove(categoryID) def InsertData(self, categoryID, dataList): """""" catObj = self.__container.getObj(categoryID) if catObj is None: return # for data in dataList: catObj.append(data) # def WriteCif(self, outputFilePath=None): """Write out cif file""" if not outputFilePath: return # ofh = open(outputFilePath, "w") pdbxW = PdbxWriter(ofh) pdbxW.write(self.__dataList) ofh.close() def GetCategories(self): return self.__container.getObjNameList() def GetAttributes(self, category): return self.__container.getObj(category).getAttributeList() def category_as_dict(self, category, block=None): if block is None: block = self.__blockID values, attributes = self.GetValueAndItemByBlock(block, category) data = [[x[y] if y in x else None for y in attributes] for x in values] return {category: {"Items": attributes, "Values": data}} def block_as_dict(self, block=None): if block is None: block = self.__blockID data = {} for category in self.GetCategories(): data.update(self.category_as_dict(category, block=block)) return data
def __buildCif(self, rD, containerName="vrpt"): """ Construct a mmCIF data category objects for the input extracted data. Args: rD (dict): extracted data organized by category. containerName (str) : data block name Returns: containers (list): data container list """ # curContainer = DataContainer(containerName) for elName in rD: catName = elName if (not rD[elName]) or (not self.__attribD[catName]) or ( catName in ["programs"]): continue hasOrdinal = "ordinal" in self.__attribD[catName] rowList = rD[elName] # Find the unique attribute content across the rowlist and the ordinal value atS = set() for ii, rowD in enumerate(rowList, 1): if hasOrdinal: rowD["ordinal"] = ii if "icode" in rowD: rowD["icode"] = str(rowD["icode"]).strip() if "altcode" in rowD: rowD["altcode"] = str(rowD["altcode"]).strip() atS.update(rowD.keys()) attributeNameList = list(atS) # # Set a reasonable order for these attributes # sD = {ky: self.__atOrdD[ky] for ky in attributeNameList} srtAtL = [ tup[0] for tup in sorted(sD.items(), key=operator.itemgetter(1)) ] logger.debug("Category %s sorted attributes %r", catName, srtAtL) aCat = DataCategory(catName, srtAtL, rowList) curContainer.append(aCat) # # Adjust schema names - # atD = self.__dictionaryMap["attributes"] for catName in curContainer.getObjNameList(): catObj = curContainer.getObj(catName) atNameList = catObj.getAttributeList() mapD = {} mapCatName = self.__dictionaryMap["categories"][ catName] if catName in self.__dictionaryMap[ "categories"] else catName for atName in atNameList: mapD[atName] = atD[(catName, atName)]["at"] if ( catName, atName) in atD else atName catObj.renameAttributes(mapD) catObj.setName(mapCatName) # # Map provenance items from programs.properties - # catObj = curContainer.getObj("program") if catObj and catObj.hasAttribute("properties"): for iRow in range(catObj.getRowCount()): pV = catObj.getValue("properties", iRow) pVL = [v.strip() for v in pV.split(",")] nL = [ self.__atMap[ky] if ky in self.__atMap else ky for ky in pVL ] catObj.setValue(",".join(nL), "properties", iRow) # logger.info("Row %r properties %r" % (iRow, pV)) # return [curContainer]
def __writeModel(self, targetId, targetObj, fitFD, fitXyzMapD, fitAtomUnMappedL, matchD, modelId, modelPath): """Write the chemical component model for the input chemical component Id and associated atom mapping and feature details -- ComponentAtomDetails = namedtuple("ComponentAtomDetails", "index atNo name aType x y z fCharge") AlignAtomMap = namedtuple("AlignAtomMap", "refId refAtIdx refAtNo refAtName fitId fitAtIdx fitAtNo fitAtName") AlignAtomUnMapped = namedtuple("AlignAtomUnMapped", "fitId fitAtIdx fitAtNo fitAtType fitAtName fitAtFormalCharge x y z fitNeighbors") """ try: unMappedTypeD = defaultdict(int) hAtomPrefix = "HEX" variantType = self.__getBuildVariant(targetId) # if not self.__testUnMappedProtonation(fitAtomUnMappedL): logger.info( "Unmapped non-hydrogen atoms target %r model %r unMapped count (%d)", targetId, modelId, len(fitAtomUnMappedL)) return False, variantType # Get atom partners for the unmapped atoms fitAtMapD = {} for refAtName, fAtTup in fitXyzMapD.items(): fitAtMapD[fAtTup.atName] = refAtName if fitAtomUnMappedL: # Check if neighbors are all mapped ok = True for fitUnTup in fitAtomUnMappedL: for nAtName in fitUnTup.fitNeighbors: if nAtName not in fitAtMapD: ok = False logger.info( "Missing mapped neighbor for %r target %r model %r", nAtName, targetId, modelId) break if not ok: return False, variantType else: logger.debug("%s match has unmapped protonation", modelId) variantType = "tautomer_protomer" # # kList = ["xyz", "SMILES", "SMILES_STEREO", "InChI", "InChIKey"] for k in kList: if k not in fitFD: logger.error( "Fit feature dictionary for %s missing key %s", targetId, k) return False, variantType # ------------ dataContainer = DataContainer(modelId) # myContainer = targetObj dbName = myContainer.getName() if dbName.upper() != targetId.upper(): logger.info("mismatch datablock (%r) and targetId (%r)", dbName, targetId) cObj = None if myContainer.exists("chem_comp"): cObj = myContainer.getObj("chem_comp") # # catName = "pdbx_chem_comp_model" if not dataContainer.exists(catName): dataContainer.append( DataCategory(catName, attributeNameList=["id", "comp_id"])) # parentId = targetId.split("|")[0] wObj = dataContainer.getObj(catName) wObj.setValue(modelId, "id", 0) wObj.setValue(parentId, "comp_id", 0) # # -------- --------- catName = "pdbx_chem_comp_model_atom" if not dataContainer.exists(catName): dataContainer.append( DataCategory(catName, attributeNameList=[ "model_id", "atom_id", "type_symbol", "charge", "model_Cartn_x", "model_Cartn_y", "model_Cartn_z", "ordinal_id" ])) wObj = dataContainer.getObj(catName) # if myContainer.exists("chem_comp_atom"): cObj = myContainer.getObj("chem_comp_atom") # # Only write the mapped atoms in case we are missing hydrogens in the mapping # jj = 0 for ii in range(cObj.getRowCount()): atName = cObj.getValue("atom_id", ii) atType = cObj.getValue("type_symbol", ii) if atName not in fitXyzMapD: unMappedTypeD[atType] += 1 continue fitXyz = fitXyzMapD[atName] # # fCharge = cObj.getValue("charge", ii) # wObj.setValue(modelId, "model_id", jj) wObj.setValue(atName, "atom_id", jj) wObj.setValue(atType, "type_symbol", jj) # wObj.setValue(fitXyz.atFormalCharge, "charge", jj) wObj.setValue("%.4f" % fitXyz.x, "model_Cartn_x", jj) wObj.setValue("%.4f" % fitXyz.y, "model_Cartn_y", jj) wObj.setValue("%.4f" % fitXyz.z, "model_Cartn_z", jj) wObj.setValue(jj + 1, "ordinal_id", jj) jj += 1 # # Add the unmapped atoms ... # AlignAtomUnMapped = namedtuple("AlignAtomUnMapped", "fitId fitAtIdx fitAtNo fitAtType fitAtName fitNeighbors") ii = wObj.getRowCount() for jj, uTup in enumerate(fitAtomUnMappedL): refAtomName = hAtomPrefix + str(jj) wObj.setValue(modelId, "model_id", ii) wObj.setValue(refAtomName, "atom_id", ii) wObj.setValue(uTup.fitAtType, "type_symbol", ii) wObj.setValue(uTup.fitAtFormalCharge, "charge", ii) wObj.setValue("%.4f" % uTup.x, "model_Cartn_x", ii) wObj.setValue("%.4f" % uTup.y, "model_Cartn_y", ii) wObj.setValue("%.4f" % uTup.z, "model_Cartn_z", ii) wObj.setValue(ii + 1, "ordinal_id", ii) # -------- --------- catName = "pdbx_chem_comp_model_bond" if not dataContainer.exists(catName): dataContainer.append( DataCategory(catName, attributeNameList=[ "model_id", "atom_id_1", "atom_id_2", "value_order", "ordinal_id" ])) wObj = dataContainer.getObj(catName) # if myContainer.exists("chem_comp_bond"): cObj = myContainer.getObj("chem_comp_bond") # jj = 0 for ii in range(cObj.getRowCount()): at1 = cObj.getValue("atom_id_1", ii) if at1 not in fitXyzMapD: continue at2 = cObj.getValue("atom_id_2", ii) if at2 not in fitXyzMapD: continue bType = cObj.getValue("value_order", ii) # wObj.setValue(modelId, "model_id", jj) wObj.setValue(at1, "atom_id_1", jj) wObj.setValue(at2, "atom_id_2", jj) wObj.setValue(bType, "value_order", jj) wObj.setValue(jj + 1, "ordinal_id", jj) jj += 1 # ii = wObj.getRowCount() for jj, uTup in enumerate(fitAtomUnMappedL): at1 = hAtomPrefix + str(jj) for nAt in uTup.fitNeighbors: at2 = fitAtMapD[nAt] wObj.setValue(modelId, "model_id", ii) wObj.setValue(at1, "atom_id_1", ii) wObj.setValue(at2, "atom_id_2", ii) wObj.setValue("SING", "value_order", ii) wObj.setValue(ii + 1, "ordinal_id", ii) # -------- --------- catName = "pdbx_chem_comp_model_descriptor" if not dataContainer.exists(catName): dataContainer.append( DataCategory( catName, attributeNameList=["model_id", "type", "descriptor"])) wObj = dataContainer.getObj(catName) # ii = 0 wObj.setValue(modelId, "model_id", ii) wObj.setValue("SMILES", "type", ii) wObj.setValue(fitFD["SMILES"], "descriptor", ii) ii += 1 wObj.setValue(modelId, "model_id", ii) wObj.setValue("SMILES_CANONICAL", "type", ii) wObj.setValue(fitFD["SMILES_STEREO"], "descriptor", ii) ii += 1 wObj.setValue(modelId, "model_id", ii) wObj.setValue("InChI", "type", ii) wObj.setValue(fitFD["InChI"], "descriptor", ii) ii += 1 wObj.setValue(modelId, "model_id", ii) wObj.setValue("InChIKey", "type", ii) wObj.setValue(fitFD["InChIKey"], "descriptor", ii) # # -------- --------- if matchD["queryId"] is not None: catName = "pdbx_chem_comp_model_reference" if not dataContainer.exists(catName): dataContainer.append( DataCategory(catName, attributeNameList=[ "model_id", "db_name", "db_code" ])) wObj = dataContainer.getObj(catName) ii = 0 wObj.setValue(modelId, "model_id", ii) wObj.setValue("COD", "db_name", ii) wObj.setValue(matchD["queryId"], "db_code", ii) # featureD = {} v = matchD["rValue"] vS = str(v) if v is not None and len(vS) > 0: featureD["r_factor"] = "%.3f" % float(v) # v = matchD["diffrnTemp"] vS = str(v) # remove string artifacts from temperature string ... if v is not None and len(vS) > 0: tV = vS.upper() try: if tV.endswith("DEG.C"): tV = tV.replace("AT", "") tV = tV.replace("DEG.C", "") tV = float(tV.strip()) tV = tV + 273.15 else: tV = tV.replace("AT", "") tV = tV.replace("K", "") tV = float(tV.strip()) featureD["experiment_temperature"] = tV except Exception as e: logger.exception( "Temperature conversion fails for %s (%r) with %s", modelId, vS, tV) # v = matchD["publicationDOI"] vS = str(v) if v is not None and len(vS) > 0: featureD["publication_doi"] = v # v = matchD["version"] vS = str(v) if v is not None and len(vS) > 0: featureD["cod_version"] = v # if matchD["radiationSource"] and "neutron" in matchD[ "radiationSource"]: featureD["neutron_radiation_experiment"] = True if matchD["hasDisorder"] in ["Y"]: featureD["has_disorder"] = True # if len(unMappedTypeD) == 1 and "H" in unMappedTypeD: logger.info("model %r heavy_atoms_only", modelId) featureD["heavy_atoms_only"] = True else: featureD["all_atoms_have_sites"] = True # -------- --------- catName = "pdbx_chem_comp_model_feature" if not dataContainer.exists(catName): dataContainer.append( DataCategory(catName, attributeNameList=[ "model_id", "feature_name", "feature_value" ])) wObj = dataContainer.getObj(catName) # fKeyList = [ "experiment_temperature", "publication_doi", "r_factor", "csd_version" ] ii = 0 for fKey in fKeyList: if fKey in featureD: wObj.setValue(modelId, "model_id", ii) wObj.setValue(fKey, "feature_name", ii) wObj.setValue(str(featureD[fKey]), "feature_value", ii) ii += 1 # boolKeyList = [ "has_disorder", "neutron_radiation_experiment", "heavy_atoms_only", "all_atoms_have_sites" ] for fKey in boolKeyList: if fKey in featureD: if featureD[fKey]: wObj.setValue(modelId, "model_id", ii) wObj.setValue(fKey, "feature_name", ii) wObj.setValue("Y", "feature_value", ii) ii += 1 # if variantType: wObj.setValue(modelId, "model_id", ii) wObj.setValue(variantType + "_match", "feature_name", ii) wObj.setValue("Y", "feature_value", ii) ii += 1 # -------- --------- catName = "pdbx_chem_comp_model_audit" if not dataContainer.exists(catName): dataContainer.append( DataCategory( catName, attributeNameList=["model_id", "action_type", "date"])) wObj = dataContainer.getObj(catName) # ii = 0 wObj.setValue(modelId, "model_id", ii) wObj.setValue("Initial release", "action_type", ii) wObj.setValue(self.__getToday(), "date", ii) # wObj.setValue('RCSB', 'processing_site', ii) # wObj.setValue('JDW', 'annotator', ii) # wObj.setValue('?', 'details', ii) # mU = MarshalUtil(workPath=self.__cachePath) ok = mU.doExport(modelPath, [dataContainer], fmt="mmcif") return ok, variantType except Exception as e: logger.exception("Failing for %r with %s", targetId, str(e)) return False, ""
def testGenDDLm(self): """Generating alternative DDLm metadata format. (starting point)""" try: myIo = IoAdapterPy(self.__verbose, self.__lfh) self.__containerList = myIo.readFile( inputFilePath=self.__pathPdbxDictionary) dApi = DictionaryApi(containerList=self.__containerList, consolidate=True, verbose=self.__verbose) parentD = dApi.getParentDictionary() # oCList = [] dDef = DataContainer("mmcif_pdbx_ddlm_auto") dc = DataCategory("dictionary") dc.appendAttribute("title") dc.appendAttribute("class") dc.appendAttribute("version") dc.appendAttribute("date") dc.appendAttribute("ddl_conformance") dc.appendAttribute("text") dc.append([ "mmcif_pdbx_ddlm_auto", "Instance", "latest", "2018-03-09", "ddlm best effort", "Software converted PDBx dictionary using DDLm semantics" ]) dDef.append(dc) oCList.append(dDef) catIdx = dApi.getCategoryIndex() for catName in sorted(catIdx.keys()): attNameList = catIdx[catName] # created definition container - cDef = DefinitionContainer(catName) oCList.append(cDef) # dc = DataCategory("definition") dc.appendAttribute("id") dc.appendAttribute("scope") dc.appendAttribute("class") dc.appendAttribute("update") dc.append([catName, "Category", "Loop", "2018-03-09"]) cDef.append(dc) val = dApi.getCategoryDescription(category=catName) dc = DataCategory("description") dc.appendAttribute("text") dc.append([val]) cDef.append(dc) # dc = DataCategory("name") dc.appendAttribute("category_id") dc.appendAttribute("object_id") valList = dApi.getCategoryGroupList(category=catName) pcg = catName for val in valList: if val != "inclusive_group": pcg = val break dc.append([catName, pcg]) cDef.append(dc) valList = dApi.getCategoryKeyList(category=catName) if not valList: self.__lfh.write("Missing caegory key for category %s\n" % catName) else: dc = DataCategory("category") dc.appendAttribute("key_id") kItemName = CifName.itemName(catName, "synthetic_key") dc.append([kItemName]) cDef.append(dc) iDef = DefinitionContainer(kItemName) self.__makeKeyItem(catName, "synthetic_key", valList, iDef) oCList.append(iDef) for attName in attNameList: itemName = CifName.itemName(catName, attName) iDef = DefinitionContainer(itemName) oCList.append(iDef) # dc = DataCategory("definition") dc.appendAttribute("id") dc.appendAttribute("scope") dc.appendAttribute("class") dc.appendAttribute("update") dc.append([itemName, "Item", "Single", "2013-08-22"]) iDef.append(dc) # val = dApi.getDescription(category=catName, attribute=attName) dc = DataCategory("description") dc.appendAttribute("text") dc.append([val]) iDef.append(dc) # dc = DataCategory("name") dc.appendAttribute("category_id") dc.appendAttribute("object_id") # if itemName in parentD: dc.appendAttribute("linked_item_id") dc.append([catName, attName, parentD[itemName][0]]) else: dc.append([catName, attName]) iDef.append(dc) # # aliasList = dApi.getItemAliasList(category=catName, attribute=attName) if aliasList: dc = DataCategory("alias") dc.appendAttribute("definition_id") for alias in aliasList: dc.append([alias[0]]) iDef.append(dc) enList = dApi.getEnumListAltWithDetail(category=catName, attribute=attName) tC = dApi.getTypeCode(category=catName, attribute=attName) tcontainer = "Single" purpose = "Describe" source = "Recorded" contents = "Text" # if tC is None: self.__lfh.write("Missing data type attribute %s\n" % attName) elif tC in [ "code", "atcode", "name", "idname", "symop", "fax", "phone", "email", "code30", "ec-type" ]: purpose = "Encode" contents = "Text" source = "Assigned" elif tC in ["ucode"]: purpose = "Encode" contents = "Code" source = "Assigned" elif tC in ["line", "uline", "text"]: purpose = "Describe" source = "Recorded" contents = "Text" elif tC in ["int"]: purpose = "Number" source = "Recorded" contents = "Integer" elif tC in ["int-range"]: purpose = "Number" source = "Recorded" contents = "Range" elif tC in ["float"]: purpose = "Measurand" source = "Recorded" contents = "Real" elif tC in ["float-range"]: purpose = "Measurand" source = "Recorded" contents = "Range" elif tC.startswith("yyyy"): source = "Assigned" contents = "Date" purpose = "Describe" if enList: purpose = "State" dc = DataCategory("type") dc.appendAttribute("purpose") dc.appendAttribute("source") dc.appendAttribute("contents") dc.appendAttribute("container") dc.append([purpose, source, contents, tcontainer]) iDef.append(dc) # if enList: dc = DataCategory("enumeration_set") dc.appendAttribute("state") dc.appendAttribute("detail") for en in enList: dc.append([en[0], en[1]]) iDef.append(dc) dfv = dApi.getDefaultValue(category=catName, attribute=attName) bvList = dApi.getBoundaryList(category=catName, attribute=attName) if ((dfv is not None) and (dfv not in ["?", "."])) or bvList: row = [] dc = DataCategory("enumeration") if dfv is not None: dc.appendAttribute("default") row.append(dfv) if bvList: dc.appendAttribute("range") mminVp = -1000000 mmaxVp = 10000000 mminV = mmaxVp mmaxV = mminVp for bv in bvList: minV = float(bv[0]) if bv[0] != "." else mminVp maxV = float(bv[1]) if bv[1] != "." else mmaxVp mminV = min(mminV, minV) mmaxV = max(mmaxV, maxV) if mminV == mminVp: mminV = "" if mmaxV == mmaxVp: mmaxV = "" row.append(str(mminV) + ":" + str(mmaxV)) dc.append(row) iDef.append(dc) myIo.writeFile(outputFilePath=os.path.join( HERE, "test-output", "mmcif_pdbx_ddlm_auto.dic"), containerList=oCList) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def test_gen_ddlm(self, in_tmpdir, test_files): myIo = IoAdapterPy() containerList = myIo.readFile( inputFilePath=str(test_files / 'mmcif_pdbx_v5_next.dic')) dApi = DictionaryApi(containerList=containerList, consolidate=True) parentD = dApi.getParentDictionary() # oCList = [] dDef = DataContainer('mmcif_pdbx_ddlm_auto') dc = DataCategory("dictionary") dc.appendAttribute("title") dc.appendAttribute("class") dc.appendAttribute("version") dc.appendAttribute("date") dc.appendAttribute("ddl_conformance") dc.appendAttribute("text") dc.append([ 'mmcif_pdbx_ddlm_auto', 'Instance', 'latest', '2018-03-09', 'ddlm best effort', 'Software converted PDBx dictionary using DDLm semantics' ]) dDef.append(dc) oCList.append(dDef) catIdx = dApi.getCategoryIndex() for catName in sorted(catIdx.keys()): attNameList = catIdx[catName] # created definition container - cDef = DefinitionContainer(catName) oCList.append(cDef) # dc = DataCategory("definition") dc.appendAttribute("id") dc.appendAttribute("scope") dc.appendAttribute("class") dc.appendAttribute("update") dc.append([catName, "Category", "Loop", "2018-03-09"]) cDef.append(dc) val = dApi.getCategoryDescription(category=catName) dc = DataCategory("description") dc.appendAttribute("text") dc.append([val]) cDef.append(dc) # dc = DataCategory("name") dc.appendAttribute("category_id") dc.appendAttribute("object_id") valList = dApi.getCategoryGroupList(category=catName) pcg = catName for val in valList: if val != 'inclusive_group': pcg = val break dc.append([catName, pcg]) cDef.append(dc) valList = dApi.getCategoryKeyList(category=catName) if len(valList) < 1: print("Missing caegory key for category %s\n" % catName) else: dc = DataCategory("category") dc.appendAttribute("key_id") kItemName = CifName.itemName(catName, "synthetic_key") dc.append([kItemName]) cDef.append(dc) iDef = DefinitionContainer(kItemName) self._makeKeyItem(catName, "synthetic_key", valList, iDef) oCList.append(iDef) for attName in attNameList: itemName = CifName.itemName(catName, attName) iDef = DefinitionContainer(itemName) oCList.append(iDef) # dc = DataCategory("definition") dc.appendAttribute("id") dc.appendAttribute("scope") dc.appendAttribute("class") dc.appendAttribute("update") dc.append([itemName, "Item", "Single", "2013-08-22"]) iDef.append(dc) # val = dApi.getDescription(category=catName, attribute=attName) dc = DataCategory("description") dc.appendAttribute("text") dc.append([val]) iDef.append(dc) # dc = DataCategory("name") dc.appendAttribute("category_id") dc.appendAttribute("object_id") # if itemName in parentD: dc.appendAttribute("linked_item_id") dc.append([catName, attName, parentD[itemName][0]]) else: dc.append([catName, attName]) iDef.append(dc) # # aliasList = dApi.getItemAliasList(category=catName, attribute=attName) if len(aliasList) > 0: dc = DataCategory("alias") dc.appendAttribute("definition_id") for alias in aliasList: dc.append([alias[0]]) iDef.append(dc) enList = dApi.getEnumListAltWithDetail(category=catName, attribute=attName) tC = dApi.getTypeCode(category=catName, attribute=attName) tcontainer = 'Single' purpose = 'Describe' source = 'Recorded' contents = 'Text' # if tC is None: self.__lfh.write("Missing data type attribute %s\n" % attName) elif tC in [ 'code', 'atcode', 'name', 'idname', 'symop', 'fax', 'phone', 'email', 'code30', 'ec-type' ]: purpose = 'Encode' contents = 'Text' source = 'Assigned' elif tC in ['ucode']: purpose = 'Encode' contents = 'Code' source = 'Assigned' elif tC in ['line', 'uline', 'text']: purpose = 'Describe' source = 'Recorded' contents = 'Text' elif tC in ['int']: purpose = 'Number' source = 'Recorded' contents = 'Integer' elif tC in ['int-range']: purpose = 'Number' source = 'Recorded' contents = 'Range' elif tC in ['float']: purpose = 'Measurand' source = 'Recorded' contents = 'Real' elif tC in ['float-range']: purpose = 'Measurand' source = 'Recorded' contents = 'Range' elif tC.startswith('yyyy'): source = 'Assigned' contents = 'Date' purpose = 'Describe' if len(enList) > 0: purpose = 'State' dc = DataCategory("type") dc.appendAttribute("purpose") dc.appendAttribute("source") dc.appendAttribute("contents") dc.appendAttribute("container") dc.append([purpose, source, contents, tcontainer]) iDef.append(dc) # if (len(enList) > 0): dc = DataCategory("enumeration_set") dc.appendAttribute("state") dc.appendAttribute("detail") for en in enList: dc.append([en[0], en[1]]) iDef.append(dc) dfv = dApi.getDefaultValue(category=catName, attribute=attName) bvList = dApi.getBoundaryList(category=catName, attribute=attName) if (((dfv is not None) and (dfv not in ['?', '.'])) or len(bvList) > 0): row = [] dc = DataCategory("enumeration") if dfv is not None: dc.appendAttribute("default") row.append(dfv) if len(bvList) > 0: dc.appendAttribute("range") mminVp = -1000000 mmaxVp = 10000000 mminV = mmaxVp mmaxV = mminVp for bv in bvList: minV = float(bv[0]) if bv[0] != '.' else mminVp maxV = float(bv[1]) if bv[1] != '.' else mmaxVp mminV = min(mminV, minV) mmaxV = max(mmaxV, maxV) if mminV == mminVp: mminV = '' if mmaxV == mmaxVp: mmaxV = '' row.append(str(mminV) + ":" + str(mmaxV)) dc.append(row) iDef.append(dc) myIo.writeFile(outputFilePath="mmcif_pdbx_ddlm_auto.dic", containerList=oCList)
def __deserialize(self, fh, storeStringsAsBytes=False): cL = [] try: dec = BinaryCifDecoders(storeStringsAsBytes=storeStringsAsBytes) bD = msgpack.unpack(fh) # logger.debug("bD.keys() %r", bD.keys()) logger.debug("bD['dataBlocks'] %s", bD[self.__toBytes("dataBlocks")]) # for dataBlock in bD[self.__toBytes("dataBlocks")]: header = self.__fromBytes( dataBlock[self.__toBytes("header")]) if self.__toBytes( "header") in dataBlock else None logger.debug("header %r", header) logger.debug("dataBlock %r", dataBlock) # dc = DataContainer(header) categoryList = dataBlock[self.__toBytes( "categories")] if self.__toBytes( "categories") in dataBlock else [] for category in categoryList: catName = self.__fromBytes( category[self.__toBytes("name")])[1:] colList = category[self.__toBytes("columns")] logger.debug("catName %r columns %r", catName, colList) colD = OrderedDict() atNameList = [] for col in colList: logger.debug("col.keys() %r", col.keys()) atName = self.__fromBytes(col[self.__toBytes("name")]) atData = col[self.__toBytes("data")] logger.debug("atData encoding (%d) data (%d)", len(atData[self.__toBytes("encoding")]), len(atData[self.__toBytes("data")])) atMask = col[self.__toBytes("mask")] logger.debug("catName %r atName %r", catName, atName) logger.debug(" >atData.data %r", atData[self.__toBytes("data")]) logger.debug(" >atData.encoding (%d) %r", len(atData[self.__toBytes("encoding")]), atData[self.__toBytes("encoding")]) logger.debug(" >mask %r", atMask) tVal = dec.decode( col[self.__toBytes("data")][self.__toBytes( "data")], col[self.__toBytes("data")][ self.__toBytes("encoding")]) if col[self.__toBytes("mask")]: mVal = dec.decode( col[self.__toBytes("mask")][self.__toBytes( "data")], col[self.__toBytes("mask")][ self.__toBytes("encoding")]) tVal = [ "?" if m == 2 else "." if m == 1 else d for d, m in zip(tVal, mVal) ] colD[atName] = tVal atNameList.append(atName) # cObj = DataCategory(catName, attributeNameList=atNameList) genL = [colGen for colGen in colD.values()] for row in zip(*genL): logger.debug("row %r", row) cObj.append(row) # dc.append(cObj) cL.append(dc) except Exception as e: logger.exception("Failing with %s", str(e)) return cL
class CIF(object): """ This class uses the mmcif library to create an mmCIF-like object Each object has one container, with a container ID and a list of DataCategory objects """ DUMMY_CONTAINER_ID = "emd_0000" def __init__(self, cif_name_name): self.filename = cif_name_name # self.__dataList needed for PDBxWriter self.__dataList = [] self.__container_id = None self.__container = None self.__dataMap = {} def write(self): """ Given a file name, a pdbx writer is used to write data stored in self.__dataList :return written: a boolean; True when pdf writer is finished """ written = False if self.filename: ofh = open(self.filename, "w") pdbx_writer = PdbxWriter(ofh) pdbx_writer.write(self.__dataList) ofh.close() written = True return written def add_container(self, container_id): """ This method provides the basic functionality to set up a container :param container_id: a string; an mmcif category e.g. 'emd_admin' :return: """ added = False self.__container_id = container_id self.__container = DataContainer(container_id) self.__dataMap[container_id] = len(self.__dataList) self.__dataList.append(self.__container) if self.__container is not None: added = True return added def prepare_container(self, container_id): """ Creates a container is it doesn't exist using either provided value or the dummy value :param container_id: a string; an mmcif category e.g. 'emd_admin' :return: """ if not self.__container: if container_id is None: container_id = self.DUMMY_CONTAINER_ID return self.add_container(container_id) def add_category(self, category_id, items): """ This method creates a data category object, adds all items to it and appends it to the container :param category_id: a string; an mmcif category e.g. 'emd_admin' :param items: a list of strings; each element in the list is an item of mmcif category as defined by category_id :return: a list of strings; each element represents a value for the corresponding element in data_items """ category = DataCategory(category_id) for item in items: category.appendAttribute(item) self.__container.append(category) # # def update_single_row_value(self, category_id, item_name, row, value): # """Update value in single row # """ # catObj = self.__container.getObj(category_id) # if catObj is None: # return # # # catObj.setValue(value, item_name, row) # # def update_multiple_rows_value(self, category_id, item_name, value): # """Update value in multiple rows # """ # cat_obj = self.__container.getObj(category_id) # if cat_obj is None: # return # # # row_no = cat_obj.getRowCount() # for row in range(0, row_no): # cat_obj.setValue(value, item_name, row) def insert_data(self, category_id, data_list): """ This method appends the data in data_list to the container labeled category_id :param category_id: a string; an mmcif category e.g. 'emd_admin' :param data_list: :return: """ cat_obj = self.__container.getObj(category_id) if cat_obj is None: return if any(isinstance(el, list) for el in data_list): # print(data_list) for data_ord in data_list[0]: new_list = [] ord_index = data_list[0].index(data_ord) new_list.append(ord_index) new_list.append(data_list[1][ord_index]) # print(new_list) cat_obj.append(new_list) else: cat_obj.append(data_list) def insert_data_into_category(self, category_id, data_items, data_list): """ Helper method: calls two other methods, one to add a category and its items into a container and another to insert the data for the category items :param category_id: a string; an mmcif category e.g. 'emd_admin' :param data_items: a list of strings; each element in the list is an item of mmcif category as defined by category_id :param data_list: a list of strings; each element represents a value for the corresponding element in data_items :return: """ # print('INSERT DATA INTO CATEGORY:', category_id, data_items, data_list) self.add_category(category_id, data_items) self.insert_data(category_id, data_list)