def _makeKeyItem(self, catName, attName, keyItemList, iDef): itemName = CifName.itemName(catName, attName) # dc = DataCategory("definition") dc.appendAttribute("id") dc.appendAttribute("scope") dc.appendAttribute("class") dc.appendAttribute("update") dc.append([itemName, "Item", "Single", "2013-08-22"]) iDef.append(dc) # dc = DataCategory("description") dc.appendAttribute("text") dc.append(['synthentic componsite key']) iDef.append(dc) # dc = DataCategory("name") dc.appendAttribute("category_id") dc.appendAttribute("object_id") dc.append([catName, attName]) iDef.append(dc) tcontainer = 'Set' purpose = 'Composite' source = 'Derived' contents = 'Name' dimension = '[%d]' % len(keyItemList) # dc = DataCategory("type") dc.appendAttribute("purpose") dc.appendAttribute("source") dc.appendAttribute("contents") dc.appendAttribute("container") dc.appendAttribute("dimension") dc.append([purpose, source, contents, tcontainer, dimension]) iDef.append(dc) dc = DataCategory("method") dc.appendAttribute("purpose") dc.appendAttribute("expression") tmpl = ''' With row as %s %s = [%s] ''' mText = tmpl % (catName, itemName, ','.join(keyItemList)) dc.append(['Evaluation', mText]) iDef.append(dc)
def __makeKeyItem(self, catName, attName, keyItemList, iDef): itemName = CifName.itemName(catName, attName) # dc = DataCategory("definition") dc.appendAttribute("id") dc.appendAttribute("scope") dc.appendAttribute("class") dc.appendAttribute("update") dc.append([itemName, "Item", "Single", "2013-08-22"]) iDef.append(dc) # dc = DataCategory("description") dc.appendAttribute("text") dc.append(["synthentic componsite key"]) iDef.append(dc) # dc = DataCategory("name") dc.appendAttribute("category_id") dc.appendAttribute("object_id") dc.append([catName, attName]) iDef.append(dc) tcontainer = "Set" purpose = "Composite" source = "Derived" contents = "Name" dimension = "[%d]" % len(keyItemList) # dc = DataCategory("type") dc.appendAttribute("purpose") dc.appendAttribute("source") dc.appendAttribute("contents") dc.appendAttribute("container") dc.appendAttribute("dimension") dc.append([purpose, source, contents, tcontainer, dimension]) iDef.append(dc) dc = DataCategory("method") dc.appendAttribute("purpose") dc.appendAttribute("expression") tmpl = """ With row as %s %s = [%s] """ mText = tmpl % (catName, itemName, ",".join(keyItemList)) dc.append(["Evaluation", mText]) iDef.append(dc)
def testWriteDataFile(self): """Test case - write data file """ try: # myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append((1, 2, 3, 4, "55555555555555555555555555555555555555555555", 6, 7)) aCat.append((1, 2, 3, 4, "5555", 6, 7)) aCat.append((1, 2, 3, 4, "5555555555", 6, 7)) aCat.append((1, 2, 3, 4, "5", 6, 7)) curContainer.append(aCat) myDataList.append(curContainer) with open(self.__pathOutputFile1, "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.setAlignmentFlag(flag=True) pdbxW.write(myDataList) self.assertEqual(len(myDataList), 1) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testUpdateDataFile(self): """Test case - write data file """ try: # Create a initial data file -- # myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append((1, 2, 3, 4, 5, 6, 7)) aCat.append((1, 2, 3, 4, 5, 6, 7)) aCat.append((1, 2, 3, 4, 5, 6, 7)) aCat.append((1, 2, 3, 4, 5, 6, 7)) curContainer.append(aCat) myDataList.append(curContainer) with open(self.__pathOutputFile1, "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) # # Read and update the data - # myDataList = [] with open(self.__pathOutputFile1, "r") as ifh: pRd = PdbxReader(ifh) pRd.read(myDataList) # myBlock = myDataList[0] # myBlock.printIt() myCat = myBlock.getObj("pdbx_seqtool_mapping_ref") # myCat.printIt() for iRow in range(0, myCat.getRowCount()): myCat.setValue("some value", "ref_mon_id", iRow) myCat.setValue(100, "ref_mon_num", iRow) with open(self.__pathOutputFile2, "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) self.assertEqual(len(myDataList), 1) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def writeDefaultDataTypeMap(self, outPath, dataTyping="ANY"): """Write data file containing application default dictionary to application data type mapping data_rcsb_data_type_map loop_ _pdbx_data_type_application_map.application_name _pdbx_data_type_application_map.type_code _pdbx_data_type_application_map.app_type_code _pdbx_data_type_application_map.app_precision_default _pdbx_data_type_application_map.app_width_default # .... type mapping data ... """ try: # containerList = [] curContainer = DataContainer("rcsb_data_type_map") aCat = DataCategory("pdbx_data_type_application_map") aCat.appendAttribute("application_name") aCat.appendAttribute("type_code") aCat.appendAttribute("app_type_code") aCat.appendAttribute("app_width_default") aCat.appendAttribute("app_precision_default") for (cifType, simpleType, defWidth, defPrecision) in zip( DataTypeApplicationInfo.cifTypes, DataTypeApplicationInfo.appTypes, DataTypeApplicationInfo.defaultWidths, DataTypeApplicationInfo.defaultPrecisions): if self.__isNull(cifType): continue aCat.append( [dataTyping, cifType, simpleType, defWidth, defPrecision]) curContainer.append(aCat) containerList.append(curContainer) # mU = MarshalUtil(workPath=self.__workPath) ok = mU.doExport(outPath, containerList, fmt="mmcif", enforceAscii=True, useCharRefs=True, raiseExceptions=True) return ok except Exception as e: logger.exception("Failing with %s", str(e)) return False
def testRowDictInitialization(self): """Test case - Row dictionary initialization of a data category and data block """ try: # rLen = 10 fn = self.__pathOutputFile5 attributeNameList = ["a", "b", "c", "d"] rowList = [{"a": 1, "b": 2, "c": 3, "d": 4} for i in range(rLen)] nameCat = "myCategory" # # curContainer = DataContainer("myblock") aCat = DataCategory(nameCat, attributeNameList, rowList) aCat.append({"a": 1, "b": 2, "c": 3, "d": 4}) aCat.append({"a": 1, "b": 2, "c": 3, "d": 4}) aCat.extend(rowList) curContainer.append(aCat) aCat.renameAttributes({"a": "aa", "b": "bb", "c": "cc", "d": "dd"}) aCat.setName("renamedCategory") # # myContainerList = [] myContainerList.append(curContainer) ofh = open(fn, "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myContainerList) ofh.close() myContainerList = [] ifh = open(fn, "r") pRd = PdbxReader(ifh) pRd.read(myContainerList) ifh.close() for container in myContainerList: for objName in container.getObjNameList(): name, aList, rList = container.getObj(objName).get() logger.debug("Recovered data category %s", name) logger.debug("Attribute list %r", repr(aList)) logger.debug("Row list %r", repr(rList)) self.assertEqual(len(myContainerList), 1) self.assertEqual(len(rList), 2 * rLen + 2) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def test_single_row(self, rw_data): myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.appendAttribute("details") aCat.append([1, 2, 3, 4, 5, 6, 7, 'data_my_big_data_file']) aCat.append([1, 2, 3, 4, 5, 6, 7, 'loop_my_big_data_loop']) aCat.append([1, 2, 3, 4, 5, 6, 7, 'save_my_big_data_saveframe']) aCat.append([1, 2, 3, 4, 5, 6, 7, '_category.item']) curContainer.append(aCat) bCat = curContainer.getObj("pdbx_seqtool_mapping_ref") print("----attribute list %r\n" % bCat.getAttributeList()) row = bCat.getRow(0) print("----ROW %r\n" % row) with open(str(rw_data['pathOutputFile2']), "w") as ofh: myDataList.append(curContainer) pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) assert len(myDataList) == 1
def test_update_data_file(self, writer_paths): myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append((1, 2, 3, 4, 5, 6, 7)) aCat.append((1, 2, 3, 4, 5, 6, 7)) aCat.append((1, 2, 3, 4, 5, 6, 7)) aCat.append((1, 2, 3, 4, 5, 6, 7)) curContainer.append(aCat) myDataList.append(curContainer) with open(str(writer_paths['pathOutputFile1']), "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) # # Read and update the data - # myDataList = [] with open(str(writer_paths['pathOutputFile1']), "r") as ifh: pRd = PdbxReader(ifh) pRd.read(myDataList) # myBlock = myDataList[0] # myBlock.printIt() myCat = myBlock.getObj('pdbx_seqtool_mapping_ref') # myCat.printIt() for iRow in range(0, myCat.getRowCount()): myCat.setValue('some value', 'ref_mon_id', iRow) myCat.setValue(100, 'ref_mon_num', iRow) with open(str(writer_paths['pathOutputFile2']), "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) assert len(myDataList) == 1
def _createfile1(pathout): my_data_list = [] cur_container = DataContainer("myblock") acat = DataCategory("pdbx_item_enumeration") acat.appendAttribute("name") acat.appendAttribute("value") acat.appendAttribute("detail") acat.append(("1", "2", "3")) cur_container.append(acat) acat = DataCategory("exptl") acat.appendAttribute("absorpt_coefficient_mu") acat.appendAttribute("entry_id") acat.appendAttribute("method") acat.appendAttribute("details") acat.append(("?", "D_12345", "X-RAY DIFFRACTION", "some details")) cur_container.append(acat) acat = DataCategory("struct") acat.appendAttribute("title") acat.appendAttribute("pdbx_descriptor") acat.append(("Start title", "Start Descriptor")) cur_container.append(acat) my_data_list.append(cur_container) # Second block cur_container = DataContainer("secondblock") acat = DataCategory("pdbx_item_enumeration") acat.appendAttribute("name") acat.appendAttribute("value") acat.appendAttribute("detail") acat.append(("3", "2", "1")) cur_container.append(acat) my_data_list.append(cur_container) with open(pathout, "w") as ofh: pdbxw = PdbxWriter(ofh) pdbxw.setAlignmentFlag(flag=True) pdbxw.write(my_data_list)
def _createfile1(pathout): my_data_list = [] cur_container = DataContainer("myblock") acat = DataCategory("pdbx_item_enumeration") acat.appendAttribute("name") acat.appendAttribute("value") acat.appendAttribute("detail") acat.append(('1', '2', '3')) cur_container.append(acat) acat = DataCategory("exptl") acat.appendAttribute('absorpt_coefficient_mu') acat.appendAttribute('entry_id') acat.appendAttribute('method') acat.appendAttribute('details') acat.append(('?', 'D_12345', 'X-RAY DIFFRACTION', 'some details')) cur_container.append(acat) acat = DataCategory("struct") acat.appendAttribute('title') acat.appendAttribute('pdbx_descriptor') acat.append(('Start title', 'Start Descriptor')) cur_container.append(acat) my_data_list.append(cur_container) # Second block cur_container = DataContainer("secondblock") acat = DataCategory("pdbx_item_enumeration") acat.appendAttribute("name") acat.appendAttribute("value") acat.appendAttribute("detail") acat.append(('3', '2', '1')) cur_container.append(acat) my_data_list.append(cur_container) with open(pathout, "w") as ofh: pdbxw = PdbxWriter(ofh) pdbxw.setAlignmentFlag(flag=True) pdbxw.write(my_data_list)
def test_row_dict_initialization(self, rw_data): rLen = 10 fn = rw_data['pathOutputFile5'] attributeNameList = ['a', 'b', 'c', 'd'] rowList = [{'a': 1, 'b': 2, 'c': 3, 'd': 4} for i in range(rLen)] nameCat = 'myCategory' # # curContainer = DataContainer("myblock") aCat = DataCategory(nameCat, attributeNameList, rowList) aCat.append({'a': 1, 'b': 2, 'c': 3, 'd': 4}) aCat.append({'a': 1, 'b': 2, 'c': 3, 'd': 4}) aCat.extend(rowList) curContainer.append(aCat) aCat.renameAttributes({'a': 'aa', 'b': 'bb', 'c': 'cc', 'd': 'dd'}) aCat.setName('renamedCategory') # # myContainerList = [] myContainerList.append(curContainer) ofh = open(str(fn), "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myContainerList) ofh.close() myContainerList = [] ifh = open(str(fn), "r") pRd = PdbxReader(ifh) pRd.read(myContainerList) ifh.close() for container in myContainerList: for objName in container.getObjNameList(): name, aList, rList = container.getObj(objName).get() print("Recovered data category %s\n" % name) print("Attribute list %r\n" % repr(aList)) print("Row list %r\n" % repr(rList)) assert len(myContainerList) == 1 assert len(rList) == 2 * rLen + 2
def test_write_data_file(self, writer_paths): myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append( (1, 2, 3, 4, '55555555555555555555555555555555555555555555', 6, 7)) aCat.append((1, 2, 3, 4, '5555', 6, 7)) aCat.append((1, 2, 3, 4, '5555555555', 6, 7)) aCat.append((1, 2, 3, 4, '5', 6, 7)) curContainer.append(aCat) myDataList.append(curContainer) with open(str(writer_paths['pathOutputFile1']), "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.setAlignmentFlag(flag=True) pdbxW.write(myDataList) assert len(myDataList) == 1
def test_update_data_file(self, rw_data): myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append([9, 2, 3, 4, 5, 6, 7]) aCat.append([10, 2, 3, 4, 5, 6, 7]) aCat.append([11, 2, 3, 4, 5, 6, 7]) aCat.append([12, 2, 3, 4, 5, 6, 7]) curContainer.append(aCat) myDataList.append(curContainer) ofh = open(str(rw_data['pathOutputFile1']), "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) ofh.close() myDataList = [] ifh = open(str(rw_data['pathOutputFile1']), "r") pRd = PdbxReader(ifh) pRd.read(myDataList) ifh.close() myBlock = myDataList[0] myCat = myBlock.getObj('pdbx_seqtool_mapping_ref') for iRow in range(0, myCat.getRowCount()): myCat.setValue('some value', 'ref_mon_id', iRow) myCat.setValue(100, 'ref_mon_num', iRow) with open(str(rw_data['pathOutputFile2']), "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) assert len(myDataList) == 1
def __dictionaryPragma(self, dictName, dictDescription, version, updateDate, comment): """ Add CIF dictionary header details including name, version and history. ; Returns: Data container (object) data container with dictionary history and version details """ # dataH = DataContainer("pdbx_vrpt_ext.dic") dc = DataCategory("datablock", attributeNameList=["id", "description"]) dc.append([dictName, dictDescription]) dataH.append(dc) dc = DataCategory( "dictionary", attributeNameList=["title", "datablock_id", "version"]) dc.append([dictName, dictName, version]) dataH.append(dc) dc = DataCategory("dictionary_history", attributeNameList=["version", "update", "revision"]) dc.append([version, updateDate, comment]) dataH.append(dc) return dataH
def testSingleRow(self): """Test case - read /write single row and null row in data file """ try: # myDataList = [] # ofh = open(self.__pathOutputFile1, "w") curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.appendAttribute("details") aCat.append([1, 2, 3, 4, 5, 6, 7, "data_my_big_data_file"]) aCat.append([1, 2, 3, 4, 5, 6, 7, "loop_my_big_data_loop"]) aCat.append([1, 2, 3, 4, 5, 6, 7, "save_my_big_data_saveframe"]) aCat.append([1, 2, 3, 4, 5, 6, 7, "_category.item"]) # aCat.dumpIt() curContainer.append(aCat) # bCat = curContainer.getObj("pdbx_seqtool_mapping_ref") logger.debug("----attribute list %r", bCat.getAttributeList()) row = bCat.getRow(0) logger.debug("----ROW %r", row) # with open(self.__pathOutputFile2, "w") as ofh: myDataList.append(curContainer) pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) self.assertEqual(len(myDataList), 1) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def test_write_data_file(self, rw_data): myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append([1, 2, 3, 4, 5, 6, 7]) aCat.append([1, 2, 3, 4, 5, 6, 7]) aCat.append([1, 2, 3, 4, 5, 6, 7]) aCat.append([1, 2, 3, 4, 5, 6, 7]) aCat.append([7, 6, 5, 4, 3, 2, 1]) curContainer.append(aCat) myDataList.append(curContainer) with open(str(rw_data['pathOutputFile1']), "w") as ofh: pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) assert len(myDataList) == 1
def __buildCategoryDefinition(self, name, description, keyAttributeNames, examples, contexts): """Construct an attribute definition from input attribute dictionary containing metadata extracted from the XML schema, and from the input schema name mapping dictionary. Args: name (str): category name description (str): category description keyAttributeNames (list): key attribute names examples (list): category examples contexts (list): category contexts Returns: Definition container (object): """ defC = DefinitionContainer(name) # dc = DataCategory( "category", attributeNameList=["id", "description", "mandatory_code"]) dc.append([name, description, "no"]) defC.append(dc) # dc = DataCategory("category_key", attributeNameList=["name"]) for keyAttributeName in keyAttributeNames: keyItemName = CifName.itemName(name, keyAttributeName) dc.append([keyItemName]) defC.append(dc) dc = DataCategory("category_group", attributeNameList=["id"]) dc.append(["inclusive_group"]) dc.append(["validation_report_group"]) defC.append(dc) # pdbx_category_context dc = DataCategory("pdbx_category_context", attributeNameList=["category_id", "type"]) for cType in contexts: dc.append([name, cType]) defC.append(dc) # dc = DataCategory("category_examples", attributeNameList=["detail", "case"]) for example in examples: dc.append([".", example]) defC.append(dc) return defC
def _createfile2(pathout): my_data_list = [] cur_container = DataContainer("test") acat = DataCategory("new") acat.appendAttribute("item") acat.append(("1",)) cur_container.append(acat) acat = DataCategory("second_category") acat.appendAttribute("row") acat.appendAttribute("rowb") acat.append(("1", "2")) cur_container.append(acat) acat = DataCategory("third") acat.appendAttribute("id") acat.appendAttribute("val") acat.append(("1", "a")) acat.append(("2", "b")) acat.append(("3", "c")) cur_container.append(acat) acat = DataCategory("exptl") acat.appendAttribute("method") acat.appendAttribute("entry_id") acat.append(("NEW", "something")) cur_container.append(acat) acat = DataCategory("struct") acat.appendAttribute("new") acat.appendAttribute("pdbx_descriptor") acat.append(("Something to add", "Override descriptor")) cur_container.append(acat) my_data_list.append(cur_container) with open(pathout, "w") as ofh: pdbxw = PdbxWriter(ofh) pdbxw.setAlignmentFlag(flag=True) pdbxw.write(my_data_list)
def __deserialize(self, fh, storeStringsAsBytes=False): cL = [] try: dec = BinaryCifDecoders(storeStringsAsBytes=storeStringsAsBytes) bD = msgpack.unpack(fh) # logger.debug("bD.keys() %r", bD.keys()) logger.debug("bD['dataBlocks'] %s", bD[self.__toBytes("dataBlocks")]) # for dataBlock in bD[self.__toBytes("dataBlocks")]: header = self.__fromBytes( dataBlock[self.__toBytes("header")]) if self.__toBytes( "header") in dataBlock else None logger.debug("header %r", header) logger.debug("dataBlock %r", dataBlock) # dc = DataContainer(header) categoryList = dataBlock[self.__toBytes( "categories")] if self.__toBytes( "categories") in dataBlock else [] for category in categoryList: catName = self.__fromBytes( category[self.__toBytes("name")])[1:] colList = category[self.__toBytes("columns")] logger.debug("catName %r columns %r", catName, colList) colD = OrderedDict() atNameList = [] for col in colList: logger.debug("col.keys() %r", col.keys()) atName = self.__fromBytes(col[self.__toBytes("name")]) atData = col[self.__toBytes("data")] logger.debug("atData encoding (%d) data (%d)", len(atData[self.__toBytes("encoding")]), len(atData[self.__toBytes("data")])) atMask = col[self.__toBytes("mask")] logger.debug("catName %r atName %r", catName, atName) logger.debug(" >atData.data %r", atData[self.__toBytes("data")]) logger.debug(" >atData.encoding (%d) %r", len(atData[self.__toBytes("encoding")]), atData[self.__toBytes("encoding")]) logger.debug(" >mask %r", atMask) tVal = dec.decode( col[self.__toBytes("data")][self.__toBytes( "data")], col[self.__toBytes("data")][ self.__toBytes("encoding")]) if col[self.__toBytes("mask")]: mVal = dec.decode( col[self.__toBytes("mask")][self.__toBytes( "data")], col[self.__toBytes("mask")][ self.__toBytes("encoding")]) tVal = [ "?" if m == 2 else "." if m == 1 else d for d, m in zip(tVal, mVal) ] colD[atName] = tVal atNameList.append(atName) # cObj = DataCategory(catName, attributeNameList=atNameList) genL = [colGen for colGen in colD.values()] for row in zip(*genL): logger.debug("row %r", row) cObj.append(row) # dc.append(cObj) cL.append(dc) except Exception as e: logger.exception("Failing with %s", str(e)) return cL
def testGenDDLm(self): """Generating alternative DDLm metadata format. (starting point)""" try: myIo = IoAdapterPy(self.__verbose, self.__lfh) self.__containerList = myIo.readFile( inputFilePath=self.__pathPdbxDictionary) dApi = DictionaryApi(containerList=self.__containerList, consolidate=True, verbose=self.__verbose) parentD = dApi.getParentDictionary() # oCList = [] dDef = DataContainer("mmcif_pdbx_ddlm_auto") dc = DataCategory("dictionary") dc.appendAttribute("title") dc.appendAttribute("class") dc.appendAttribute("version") dc.appendAttribute("date") dc.appendAttribute("ddl_conformance") dc.appendAttribute("text") dc.append([ "mmcif_pdbx_ddlm_auto", "Instance", "latest", "2018-03-09", "ddlm best effort", "Software converted PDBx dictionary using DDLm semantics" ]) dDef.append(dc) oCList.append(dDef) catIdx = dApi.getCategoryIndex() for catName in sorted(catIdx.keys()): attNameList = catIdx[catName] # created definition container - cDef = DefinitionContainer(catName) oCList.append(cDef) # dc = DataCategory("definition") dc.appendAttribute("id") dc.appendAttribute("scope") dc.appendAttribute("class") dc.appendAttribute("update") dc.append([catName, "Category", "Loop", "2018-03-09"]) cDef.append(dc) val = dApi.getCategoryDescription(category=catName) dc = DataCategory("description") dc.appendAttribute("text") dc.append([val]) cDef.append(dc) # dc = DataCategory("name") dc.appendAttribute("category_id") dc.appendAttribute("object_id") valList = dApi.getCategoryGroupList(category=catName) pcg = catName for val in valList: if val != "inclusive_group": pcg = val break dc.append([catName, pcg]) cDef.append(dc) valList = dApi.getCategoryKeyList(category=catName) if not valList: self.__lfh.write("Missing caegory key for category %s\n" % catName) else: dc = DataCategory("category") dc.appendAttribute("key_id") kItemName = CifName.itemName(catName, "synthetic_key") dc.append([kItemName]) cDef.append(dc) iDef = DefinitionContainer(kItemName) self.__makeKeyItem(catName, "synthetic_key", valList, iDef) oCList.append(iDef) for attName in attNameList: itemName = CifName.itemName(catName, attName) iDef = DefinitionContainer(itemName) oCList.append(iDef) # dc = DataCategory("definition") dc.appendAttribute("id") dc.appendAttribute("scope") dc.appendAttribute("class") dc.appendAttribute("update") dc.append([itemName, "Item", "Single", "2013-08-22"]) iDef.append(dc) # val = dApi.getDescription(category=catName, attribute=attName) dc = DataCategory("description") dc.appendAttribute("text") dc.append([val]) iDef.append(dc) # dc = DataCategory("name") dc.appendAttribute("category_id") dc.appendAttribute("object_id") # if itemName in parentD: dc.appendAttribute("linked_item_id") dc.append([catName, attName, parentD[itemName][0]]) else: dc.append([catName, attName]) iDef.append(dc) # # aliasList = dApi.getItemAliasList(category=catName, attribute=attName) if aliasList: dc = DataCategory("alias") dc.appendAttribute("definition_id") for alias in aliasList: dc.append([alias[0]]) iDef.append(dc) enList = dApi.getEnumListAltWithDetail(category=catName, attribute=attName) tC = dApi.getTypeCode(category=catName, attribute=attName) tcontainer = "Single" purpose = "Describe" source = "Recorded" contents = "Text" # if tC is None: self.__lfh.write("Missing data type attribute %s\n" % attName) elif tC in [ "code", "atcode", "name", "idname", "symop", "fax", "phone", "email", "code30", "ec-type" ]: purpose = "Encode" contents = "Text" source = "Assigned" elif tC in ["ucode"]: purpose = "Encode" contents = "Code" source = "Assigned" elif tC in ["line", "uline", "text"]: purpose = "Describe" source = "Recorded" contents = "Text" elif tC in ["int"]: purpose = "Number" source = "Recorded" contents = "Integer" elif tC in ["int-range"]: purpose = "Number" source = "Recorded" contents = "Range" elif tC in ["float"]: purpose = "Measurand" source = "Recorded" contents = "Real" elif tC in ["float-range"]: purpose = "Measurand" source = "Recorded" contents = "Range" elif tC.startswith("yyyy"): source = "Assigned" contents = "Date" purpose = "Describe" if enList: purpose = "State" dc = DataCategory("type") dc.appendAttribute("purpose") dc.appendAttribute("source") dc.appendAttribute("contents") dc.appendAttribute("container") dc.append([purpose, source, contents, tcontainer]) iDef.append(dc) # if enList: dc = DataCategory("enumeration_set") dc.appendAttribute("state") dc.appendAttribute("detail") for en in enList: dc.append([en[0], en[1]]) iDef.append(dc) dfv = dApi.getDefaultValue(category=catName, attribute=attName) bvList = dApi.getBoundaryList(category=catName, attribute=attName) if ((dfv is not None) and (dfv not in ["?", "."])) or bvList: row = [] dc = DataCategory("enumeration") if dfv is not None: dc.appendAttribute("default") row.append(dfv) if bvList: dc.appendAttribute("range") mminVp = -1000000 mmaxVp = 10000000 mminV = mmaxVp mmaxV = mminVp for bv in bvList: minV = float(bv[0]) if bv[0] != "." else mminVp maxV = float(bv[1]) if bv[1] != "." else mmaxVp mminV = min(mminV, minV) mmaxV = max(mmaxV, maxV) if mminV == mminVp: mminV = "" if mmaxV == mmaxVp: mmaxV = "" row.append(str(mminV) + ":" + str(mmaxV)) dc.append(row) iDef.append(dc) myIo.writeFile(outputFilePath=os.path.join( HERE, "test-output", "mmcif_pdbx_ddlm_auto.dic"), containerList=oCList) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def __buildAttributeDefinition(self, atD, mD): """ Construct an attribute definition from input attribute dictionary containing metadata extracted from the XML schema, and from the input schema name mapping dictionary. Args: atD (dict): attribute metadata dictionary dictionaryMap (dict): mapping details for categories and attributes Returns: Attribute definition (object) """ # atName = atD["name"] catName = atD["category"] # mapAtName = mD["at"] if "at" in mD else atName mapCatName = mD["cat"] if "cat" in mD else catName pCat = mD["pCat"] if "pCat" in mD else None pAt = mD["pAt"] if "pAt" in mD else None pType = mD["pType"] if "pType" in mD else None # itemName = CifName.itemName(mapCatName, mapAtName) # aliasAtName = atD["aliasName"] if "aliasName" in atD else None aliasCatName = atD[ "aliasCategoryName"] if "aliasCategoryName" in atD else None # atDescription = atD["description"] if "description" in atD else None atDescription = self.__filterDescription(atDescription, self.__catMappingD) atDescription = self.__filterDescription(atDescription, self.__atMappingD) # mCode = "yes" if atD["mandatory"] == "mandatory" else "no" mCode = "yes" if mapAtName == "entry_id" else mCode # if atD["type"] not in self.__typeMap: logger.info("Unmapped type %r", atD["type"]) # atType = self.__typeMap[ atD["type"]] if atD["type"] in self.__typeMap else "UNKNOWN" # if atType == "text" and atDescription.find("comma separate") >= 0: atType = "alphanum-csv" if atType == "text" and "_date" in mapAtName and mapAtName != "report_creation_date": atType = "yyyy-mm-dd" # atType = pType if pType else atType if atType == "UNKNOWN": logger.info("Missing type mapping for %s %s %s", catName, atName, atD["type"]) # defA = DefinitionContainer(itemName) # dc = DataCategory("item_description", attributeNameList=["description"]) dc.append([atDescription]) defA.append(dc) dc = DataCategory( "item", attributeNameList=["name", "category_id", "mandatory_code"]) dc.append([itemName, mapCatName, mCode]) defA.append(dc) # dc = DataCategory("item_type", attributeNameList=["code"]) dc.append([atType]) defA.append(dc) dc = DataCategory( "item_aliases", attributeNameList=["alias_name", "dictionary", "version"]) dc.append([ aliasCatName + "." + aliasAtName, self.__schemaPath, self.__schemaVersion ]) defA.append(dc) # # Note - expect boundaries in pairs and 'inclusive' of endpoints # if "minIncl" in atD and "maxIncl" in atD and atD["minIncl"] and atD[ "maxIncl"]: minB = atD["minIncl"] maxB = atD["maxIncl"] dc = DataCategory("item_range", attributeNameList=["minimum", "maximum"]) dc.append([minB, minB]) dc.append([minB, maxB]) dc.append([maxB, maxB]) defA.append(dc) else: if atType == "float" and ((mapAtName.find("percent_") >= 0) or (mapAtName.find("percentile_") >= 0)): dc = DataCategory("item_range", attributeNameList=["minimum", "maximum"]) minB = "0.0" maxB = "100.0" dc.append([minB, minB]) dc.append([minB, maxB]) dc.append([maxB, maxB]) defA.append(dc) # if "enum" in atD and isinstance(atD["enum"], list) and atD["enum"]: dc = DataCategory("item_enumeration", attributeNameList=["value", "detail"]) for enumVal in atD["enum"]: dc.append([enumVal, "."]) defA.append(dc) # - add parent link relationships - if pCat and pAt: dc = DataCategory("item_linked", attributeNameList=["child_name", "parent_name"]) parentItemName = CifName.itemName(pCat, pAt) dc.append([itemName, parentItemName]) defA.append(dc) # # return defA
def test_gen_ddlm(self, in_tmpdir, test_files): myIo = IoAdapterPy() containerList = myIo.readFile( inputFilePath=str(test_files / 'mmcif_pdbx_v5_next.dic')) dApi = DictionaryApi(containerList=containerList, consolidate=True) parentD = dApi.getParentDictionary() # oCList = [] dDef = DataContainer('mmcif_pdbx_ddlm_auto') dc = DataCategory("dictionary") dc.appendAttribute("title") dc.appendAttribute("class") dc.appendAttribute("version") dc.appendAttribute("date") dc.appendAttribute("ddl_conformance") dc.appendAttribute("text") dc.append([ 'mmcif_pdbx_ddlm_auto', 'Instance', 'latest', '2018-03-09', 'ddlm best effort', 'Software converted PDBx dictionary using DDLm semantics' ]) dDef.append(dc) oCList.append(dDef) catIdx = dApi.getCategoryIndex() for catName in sorted(catIdx.keys()): attNameList = catIdx[catName] # created definition container - cDef = DefinitionContainer(catName) oCList.append(cDef) # dc = DataCategory("definition") dc.appendAttribute("id") dc.appendAttribute("scope") dc.appendAttribute("class") dc.appendAttribute("update") dc.append([catName, "Category", "Loop", "2018-03-09"]) cDef.append(dc) val = dApi.getCategoryDescription(category=catName) dc = DataCategory("description") dc.appendAttribute("text") dc.append([val]) cDef.append(dc) # dc = DataCategory("name") dc.appendAttribute("category_id") dc.appendAttribute("object_id") valList = dApi.getCategoryGroupList(category=catName) pcg = catName for val in valList: if val != 'inclusive_group': pcg = val break dc.append([catName, pcg]) cDef.append(dc) valList = dApi.getCategoryKeyList(category=catName) if len(valList) < 1: print("Missing caegory key for category %s\n" % catName) else: dc = DataCategory("category") dc.appendAttribute("key_id") kItemName = CifName.itemName(catName, "synthetic_key") dc.append([kItemName]) cDef.append(dc) iDef = DefinitionContainer(kItemName) self._makeKeyItem(catName, "synthetic_key", valList, iDef) oCList.append(iDef) for attName in attNameList: itemName = CifName.itemName(catName, attName) iDef = DefinitionContainer(itemName) oCList.append(iDef) # dc = DataCategory("definition") dc.appendAttribute("id") dc.appendAttribute("scope") dc.appendAttribute("class") dc.appendAttribute("update") dc.append([itemName, "Item", "Single", "2013-08-22"]) iDef.append(dc) # val = dApi.getDescription(category=catName, attribute=attName) dc = DataCategory("description") dc.appendAttribute("text") dc.append([val]) iDef.append(dc) # dc = DataCategory("name") dc.appendAttribute("category_id") dc.appendAttribute("object_id") # if itemName in parentD: dc.appendAttribute("linked_item_id") dc.append([catName, attName, parentD[itemName][0]]) else: dc.append([catName, attName]) iDef.append(dc) # # aliasList = dApi.getItemAliasList(category=catName, attribute=attName) if len(aliasList) > 0: dc = DataCategory("alias") dc.appendAttribute("definition_id") for alias in aliasList: dc.append([alias[0]]) iDef.append(dc) enList = dApi.getEnumListAltWithDetail(category=catName, attribute=attName) tC = dApi.getTypeCode(category=catName, attribute=attName) tcontainer = 'Single' purpose = 'Describe' source = 'Recorded' contents = 'Text' # if tC is None: self.__lfh.write("Missing data type attribute %s\n" % attName) elif tC in [ 'code', 'atcode', 'name', 'idname', 'symop', 'fax', 'phone', 'email', 'code30', 'ec-type' ]: purpose = 'Encode' contents = 'Text' source = 'Assigned' elif tC in ['ucode']: purpose = 'Encode' contents = 'Code' source = 'Assigned' elif tC in ['line', 'uline', 'text']: purpose = 'Describe' source = 'Recorded' contents = 'Text' elif tC in ['int']: purpose = 'Number' source = 'Recorded' contents = 'Integer' elif tC in ['int-range']: purpose = 'Number' source = 'Recorded' contents = 'Range' elif tC in ['float']: purpose = 'Measurand' source = 'Recorded' contents = 'Real' elif tC in ['float-range']: purpose = 'Measurand' source = 'Recorded' contents = 'Range' elif tC.startswith('yyyy'): source = 'Assigned' contents = 'Date' purpose = 'Describe' if len(enList) > 0: purpose = 'State' dc = DataCategory("type") dc.appendAttribute("purpose") dc.appendAttribute("source") dc.appendAttribute("contents") dc.appendAttribute("container") dc.append([purpose, source, contents, tcontainer]) iDef.append(dc) # if (len(enList) > 0): dc = DataCategory("enumeration_set") dc.appendAttribute("state") dc.appendAttribute("detail") for en in enList: dc.append([en[0], en[1]]) iDef.append(dc) dfv = dApi.getDefaultValue(category=catName, attribute=attName) bvList = dApi.getBoundaryList(category=catName, attribute=attName) if (((dfv is not None) and (dfv not in ['?', '.'])) or len(bvList) > 0): row = [] dc = DataCategory("enumeration") if dfv is not None: dc.appendAttribute("default") row.append(dfv) if len(bvList) > 0: dc.appendAttribute("range") mminVp = -1000000 mmaxVp = 10000000 mminV = mmaxVp mmaxV = mminVp for bv in bvList: minV = float(bv[0]) if bv[0] != '.' else mminVp maxV = float(bv[1]) if bv[1] != '.' else mmaxVp mminV = min(mminV, minV) mmaxV = max(mmaxV, maxV) if mminV == mminVp: mminV = '' if mmaxV == mmaxVp: mmaxV = '' row.append(str(mminV) + ":" + str(mmaxV)) dc.append(row) iDef.append(dc) myIo.writeFile(outputFilePath="mmcif_pdbx_ddlm_auto.dic", containerList=oCList)
def __parser(self, tokenizer, containerList, categorySelectionD=None, excludeFlag=False): """ Parser for PDBx data files and dictionaries. Input - tokenizer() reentrant method recognizing data item names (_category.attribute) quoted strings (single, double and multi-line semi-colon delimited), and unquoted strings. containerList - list-type container for data and definition objects parsed from from the input file. On return: The input containerList is appended with data and definition objects - """ catSelectD = categorySelectionD if categorySelectionD is not None else {} logger.debug("Exclude Flag %r Category selection %r", excludeFlag, catSelectD) # Working container - data or definition curContainer = None # the last container of type data - previousDataContainer = None # # Working category container categoryIndex = {} curCategory = None # curRow = None state = None # Find the first reserved word and begin capturing data. # while True: curCatName, curAttName, curQuotedString, curWord = next(tokenizer) if curWord is None: continue reservedWord, state = self.__getState(curWord) if reservedWord is not None: break while True: # # Set the current state - # # At this point in the processing cycle we are expecting a token containing # either a '_category.attribute' or a reserved word. # if curCatName is not None: state = "ST_KEY_VALUE_PAIR" elif curWord is not None: reservedWord, state = self.__getState(curWord) else: self.__syntaxError("Miscellaneous syntax error") return # # Process _category.attribute value assignments # if state == "ST_KEY_VALUE_PAIR": try: curCategory = categoryIndex[curCatName] except KeyError: # A new category is encountered - create a container and add a row curCategory = categoryIndex[curCatName] = DataCategory( curCatName) # # check if we have all of the selection if not excludeFlag and self.__allSelected( curContainer, catSelectD): return try: if catSelectD: if not excludeFlag and curCatName in catSelectD: curContainer.append(curCategory) elif excludeFlag and curCatName not in catSelectD: curContainer.append(curCategory) else: logger.debug( "Skipped unselected/excluded category %s", curCatName) else: curContainer.append(curCategory) except AttributeError: self.__syntaxError( "Category cannot be added to data_ block") return curRow = [] curCategory.append(curRow) else: # Recover the existing row from the category try: # curRow = curCategory[0] curRow = curCategory.getRow(0) except IndexError: self.__syntaxError( "Internal index error accessing category data") return # Check for duplicate attributes and add attribute to table. if curAttName in curCategory.getAttributeList(): self.__syntaxError( "Duplicate attribute encountered in category") return else: curCategory.appendAttribute(curAttName) # Get the data for this attribute from the next token tCat, _, curQuotedString, curWord = next(tokenizer) if tCat is not None or (curQuotedString is None and curWord is None): self.__syntaxError("Missing data for item _%s.%s" % (curCatName, curAttName)) if curWord is not None: # # Validation check token for misplaced reserved words - # reservedWord, state = self.__getState(curWord) if reservedWord is not None: self.__syntaxError("Unexpected reserved word: %s" % (reservedWord)) curRow.append(curWord) elif curQuotedString is not None: curRow.append(curQuotedString) else: self.__syntaxError("Missing value in item-value pair") curCatName, curAttName, curQuotedString, curWord = next( tokenizer) continue # # Process a loop_ declaration and associated data - # elif state == "ST_TABLE": # The category name in the next curCatName,curAttName pair # defines the name of the category container. curCatName, curAttName, curQuotedString, curWord = next( tokenizer) if curCatName is None or curAttName is None: self.__syntaxError("Unexpected token in loop_ declaration") return # Check for a previous category declaration. if curCatName in categoryIndex: self.__syntaxError( "Duplicate category declaration in loop_") return curCategory = DataCategory(curCatName) # # check if we have all of the selection if not excludeFlag and self.__allSelected( curContainer, catSelectD): return try: if catSelectD: if not excludeFlag and curCatName in catSelectD: curContainer.append(curCategory) elif excludeFlag and curCatName not in catSelectD: curContainer.append(curCategory) else: logger.debug( "Skipped unselected/excluded category %s", curCatName) else: curContainer.append(curCategory) except AttributeError: self.__syntaxError( "loop_ declaration outside of data_ block or save_ frame" ) return curCategory.appendAttribute(curAttName) # Read the rest of the loop_ declaration while True: curCatName, curAttName, curQuotedString, curWord = next( tokenizer) if curCatName is None: break if curCatName != curCategory.getName(): self.__syntaxError( "Changed category name in loop_ declaration") return curCategory.appendAttribute(curAttName) # If the next token is a 'word', check it for any reserved words - if curWord is not None: reservedWord, state = self.__getState(curWord) if reservedWord is not None: if reservedWord == "stop": return else: self.__syntaxError( "Unexpected reserved word after loop declaration: %s" % (reservedWord)) # Read the table of data for this loop_ - while True: curRow = [] curCategory.append(curRow) for _ in curCategory.getAttributeList(): if curWord is not None: curRow.append(curWord) elif curQuotedString is not None: curRow.append(curQuotedString) curCatName, curAttName, curQuotedString, curWord = next( tokenizer) # loop_ data processing ends if - # A new _category.attribute is encountered if curCatName is not None: break # A reserved word is encountered if curWord is not None: reservedWord, state = self.__getState(curWord) if reservedWord is not None: break continue elif state == "ST_DEFINITION": # Ignore trailing unnamed saveframe delimiters e.g. 'save' sName = self.__getContainerName(curWord) if sName: curContainer = DefinitionContainer(sName) containerList.append(curContainer) categoryIndex = {} curCategory = None else: # reset current container to the last data contatiner curContainer = previousDataContainer curCatName, curAttName, curQuotedString, curWord = next( tokenizer) elif state == "ST_DATA_CONTAINER": # dName = self.__getContainerName(curWord) if not dName: dName = "unidentified" curContainer = DataContainer(dName) containerList.append(curContainer) categoryIndex = {} curCategory = None previousDataContainer = curContainer curCatName, curAttName, curQuotedString, curWord = next( tokenizer) elif state == "ST_STOP": ### # curCatName, curAttName, curQuotedString, curWord = tokenizer.next() continue elif state == "ST_GLOBAL": curContainer = DataContainer("blank-global") curContainer.setGlobal() containerList.append(curContainer) categoryIndex = {} curCategory = None curCatName, curAttName, curQuotedString, curWord = next( tokenizer) elif state == "ST_UNKNOWN": self.__syntaxError("Unrecogized syntax element: " + str(curWord)) return
def _createfile2(pathout): my_data_list = [] cur_container = DataContainer("test") acat = DataCategory("new") acat.appendAttribute("item") acat.append(('1',)) cur_container.append(acat) acat = DataCategory("second_category") acat.appendAttribute('row') acat.appendAttribute('rowb') acat.append(('1', '2')) cur_container.append(acat) acat = DataCategory("third") acat.appendAttribute('id') acat.appendAttribute('val') acat.append(('1', 'a')) acat.append(('2', 'b')) acat.append(('3', 'c')) cur_container.append(acat) acat = DataCategory("exptl") acat.appendAttribute('method') acat.appendAttribute('entry_id') acat.append(('NEW', 'something')) cur_container.append(acat) acat = DataCategory("struct") acat.appendAttribute('new') acat.appendAttribute('pdbx_descriptor') acat.append(('Something to add', 'Override descriptor')) cur_container.append(acat) my_data_list.append(cur_container) with open(pathout, "w") as ofh: pdbxw = PdbxWriter(ofh) pdbxw.setAlignmentFlag(flag=True) pdbxw.write(my_data_list)
def __generateData(self): """Generates data for test. __testValues must be in sync""" curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_test") aCat.appendAttribute("ordinal") aCat.appendAttribute("details") aCat.append([1, "data_my_big_data_file"]) aCat.append([2, "loop_my_big_data_loop"]) aCat.append([3, "save_my_big_data_saveframe"]) aCat.append([4, "_category.item"]) aCat.append([5, "Data_my_big_data_file"]) aCat.append([6, "Loop_my_big_data_loop"]) aCat.append([7, "Save_my_big_data_saveframe"]) aCat.append([8, "DatA_my_big_data_file"]) curContainer.append(aCat) return curContainer