Exemple #1
0
    def _makeKeyItem(self, catName, attName, keyItemList, iDef):
        itemName = CifName.itemName(catName, attName)

        #
        dc = DataCategory("definition")
        dc.appendAttribute("id")
        dc.appendAttribute("scope")
        dc.appendAttribute("class")
        dc.appendAttribute("update")
        dc.append([itemName, "Item", "Single", "2013-08-22"])
        iDef.append(dc)
        #
        dc = DataCategory("description")
        dc.appendAttribute("text")
        dc.append(['synthentic componsite key'])
        iDef.append(dc)
        #
        dc = DataCategory("name")
        dc.appendAttribute("category_id")
        dc.appendAttribute("object_id")
        dc.append([catName, attName])
        iDef.append(dc)
        tcontainer = 'Set'
        purpose = 'Composite'
        source = 'Derived'
        contents = 'Name'
        dimension = '[%d]' % len(keyItemList)
        #

        dc = DataCategory("type")
        dc.appendAttribute("purpose")
        dc.appendAttribute("source")
        dc.appendAttribute("contents")
        dc.appendAttribute("container")
        dc.appendAttribute("dimension")
        dc.append([purpose, source, contents, tcontainer, dimension])
        iDef.append(dc)

        dc = DataCategory("method")
        dc.appendAttribute("purpose")
        dc.appendAttribute("expression")

        tmpl = '''

                      With row as %s

                           %s = [%s]

        '''
        mText = tmpl % (catName, itemName, ','.join(keyItemList))
        dc.append(['Evaluation', mText])
        iDef.append(dc)
    def __makeKeyItem(self, catName, attName, keyItemList, iDef):
        itemName = CifName.itemName(catName, attName)

        #
        dc = DataCategory("definition")
        dc.appendAttribute("id")
        dc.appendAttribute("scope")
        dc.appendAttribute("class")
        dc.appendAttribute("update")
        dc.append([itemName, "Item", "Single", "2013-08-22"])
        iDef.append(dc)
        #
        dc = DataCategory("description")
        dc.appendAttribute("text")
        dc.append(["synthentic componsite key"])
        iDef.append(dc)
        #
        dc = DataCategory("name")
        dc.appendAttribute("category_id")
        dc.appendAttribute("object_id")
        dc.append([catName, attName])
        iDef.append(dc)
        tcontainer = "Set"
        purpose = "Composite"
        source = "Derived"
        contents = "Name"
        dimension = "[%d]" % len(keyItemList)
        #

        dc = DataCategory("type")
        dc.appendAttribute("purpose")
        dc.appendAttribute("source")
        dc.appendAttribute("contents")
        dc.appendAttribute("container")
        dc.appendAttribute("dimension")
        dc.append([purpose, source, contents, tcontainer, dimension])
        iDef.append(dc)

        dc = DataCategory("method")
        dc.appendAttribute("purpose")
        dc.appendAttribute("expression")

        tmpl = """

                      With row as %s

                           %s = [%s]

        """
        mText = tmpl % (catName, itemName, ",".join(keyItemList))
        dc.append(["Evaluation", mText])
        iDef.append(dc)
    def testWriteDataFile(self):
        """Test case -  write data file
        """
        try:
            #
            myDataList = []

            curContainer = DataContainer("myblock")
            aCat = DataCategory("pdbx_seqtool_mapping_ref")
            aCat.appendAttribute("ordinal")
            aCat.appendAttribute("entity_id")
            aCat.appendAttribute("auth_mon_id")
            aCat.appendAttribute("auth_mon_num")
            aCat.appendAttribute("pdb_chain_id")
            aCat.appendAttribute("ref_mon_id")
            aCat.appendAttribute("ref_mon_num")
            aCat.append((1, 2, 3, 4, "55555555555555555555555555555555555555555555", 6, 7))
            aCat.append((1, 2, 3, 4, "5555", 6, 7))
            aCat.append((1, 2, 3, 4, "5555555555", 6, 7))
            aCat.append((1, 2, 3, 4, "5", 6, 7))
            curContainer.append(aCat)
            myDataList.append(curContainer)
            with open(self.__pathOutputFile1, "w") as ofh:
                pdbxW = PdbxWriter(ofh)
                pdbxW.setAlignmentFlag(flag=True)
                pdbxW.write(myDataList)
            self.assertEqual(len(myDataList), 1)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
    def testUpdateDataFile(self):
        """Test case -  write data file
        """
        try:
            # Create a initial data file --
            #
            myDataList = []

            curContainer = DataContainer("myblock")
            aCat = DataCategory("pdbx_seqtool_mapping_ref")
            aCat.appendAttribute("ordinal")
            aCat.appendAttribute("entity_id")
            aCat.appendAttribute("auth_mon_id")
            aCat.appendAttribute("auth_mon_num")
            aCat.appendAttribute("pdb_chain_id")
            aCat.appendAttribute("ref_mon_id")
            aCat.appendAttribute("ref_mon_num")
            aCat.append((1, 2, 3, 4, 5, 6, 7))
            aCat.append((1, 2, 3, 4, 5, 6, 7))
            aCat.append((1, 2, 3, 4, 5, 6, 7))
            aCat.append((1, 2, 3, 4, 5, 6, 7))
            curContainer.append(aCat)
            myDataList.append(curContainer)
            with open(self.__pathOutputFile1, "w") as ofh:
                pdbxW = PdbxWriter(ofh)
                pdbxW.write(myDataList)
            #
            # Read and update the data -
            #
            myDataList = []
            with open(self.__pathOutputFile1, "r") as ifh:
                pRd = PdbxReader(ifh)
                pRd.read(myDataList)
            #
            myBlock = myDataList[0]
            # myBlock.printIt()
            myCat = myBlock.getObj("pdbx_seqtool_mapping_ref")
            # myCat.printIt()
            for iRow in range(0, myCat.getRowCount()):
                myCat.setValue("some value", "ref_mon_id", iRow)
                myCat.setValue(100, "ref_mon_num", iRow)
            with open(self.__pathOutputFile2, "w") as ofh:
                pdbxW = PdbxWriter(ofh)
                pdbxW.write(myDataList)
            self.assertEqual(len(myDataList), 1)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemple #5
0
    def writeDefaultDataTypeMap(self, outPath, dataTyping="ANY"):
        """Write data file containing application default dictionary to application data type mapping

        data_rcsb_data_type_map
          loop_
          _pdbx_data_type_application_map.application_name
          _pdbx_data_type_application_map.type_code
          _pdbx_data_type_application_map.app_type_code
          _pdbx_data_type_application_map.app_precision_default
          _pdbx_data_type_application_map.app_width_default
          # .... type mapping data ...
        """
        try:
            #
            containerList = []
            curContainer = DataContainer("rcsb_data_type_map")
            aCat = DataCategory("pdbx_data_type_application_map")
            aCat.appendAttribute("application_name")
            aCat.appendAttribute("type_code")
            aCat.appendAttribute("app_type_code")
            aCat.appendAttribute("app_width_default")
            aCat.appendAttribute("app_precision_default")
            for (cifType, simpleType, defWidth, defPrecision) in zip(
                    DataTypeApplicationInfo.cifTypes,
                    DataTypeApplicationInfo.appTypes,
                    DataTypeApplicationInfo.defaultWidths,
                    DataTypeApplicationInfo.defaultPrecisions):
                if self.__isNull(cifType):
                    continue
                aCat.append(
                    [dataTyping, cifType, simpleType, defWidth, defPrecision])
            curContainer.append(aCat)
            containerList.append(curContainer)
            #
            mU = MarshalUtil(workPath=self.__workPath)
            ok = mU.doExport(outPath,
                             containerList,
                             fmt="mmcif",
                             enforceAscii=True,
                             useCharRefs=True,
                             raiseExceptions=True)

            return ok
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return False
Exemple #6
0
    def testRowDictInitialization(self):
        """Test case -  Row dictionary initialization of a data category and data block
        """
        try:
            #
            rLen = 10
            fn = self.__pathOutputFile5
            attributeNameList = ["a", "b", "c", "d"]
            rowList = [{"a": 1, "b": 2, "c": 3, "d": 4} for i in range(rLen)]
            nameCat = "myCategory"
            #
            #
            curContainer = DataContainer("myblock")
            aCat = DataCategory(nameCat, attributeNameList, rowList)
            aCat.append({"a": 1, "b": 2, "c": 3, "d": 4})
            aCat.append({"a": 1, "b": 2, "c": 3, "d": 4})
            aCat.extend(rowList)
            curContainer.append(aCat)
            aCat.renameAttributes({"a": "aa", "b": "bb", "c": "cc", "d": "dd"})
            aCat.setName("renamedCategory")
            #
            #
            myContainerList = []
            myContainerList.append(curContainer)
            ofh = open(fn, "w")
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myContainerList)
            ofh.close()

            myContainerList = []
            ifh = open(fn, "r")
            pRd = PdbxReader(ifh)
            pRd.read(myContainerList)
            ifh.close()
            for container in myContainerList:
                for objName in container.getObjNameList():
                    name, aList, rList = container.getObj(objName).get()
                    logger.debug("Recovered data category  %s", name)
                    logger.debug("Attribute list           %r", repr(aList))
                    logger.debug("Row list                 %r", repr(rList))
            self.assertEqual(len(myContainerList), 1)
            self.assertEqual(len(rList), 2 * rLen + 2)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemple #7
0
    def test_single_row(self, rw_data):
        myDataList = []
        curContainer = DataContainer("myblock")
        aCat = DataCategory("pdbx_seqtool_mapping_ref")
        aCat.appendAttribute("ordinal")
        aCat.appendAttribute("entity_id")
        aCat.appendAttribute("auth_mon_id")
        aCat.appendAttribute("auth_mon_num")
        aCat.appendAttribute("pdb_chain_id")
        aCat.appendAttribute("ref_mon_id")
        aCat.appendAttribute("ref_mon_num")
        aCat.appendAttribute("details")
        aCat.append([1, 2, 3, 4, 5, 6, 7, 'data_my_big_data_file'])
        aCat.append([1, 2, 3, 4, 5, 6, 7, 'loop_my_big_data_loop'])
        aCat.append([1, 2, 3, 4, 5, 6, 7, 'save_my_big_data_saveframe'])
        aCat.append([1, 2, 3, 4, 5, 6, 7, '_category.item'])

        curContainer.append(aCat)

        bCat = curContainer.getObj("pdbx_seqtool_mapping_ref")
        print("----attribute list %r\n" % bCat.getAttributeList())
        row = bCat.getRow(0)
        print("----ROW %r\n" % row)

        with open(str(rw_data['pathOutputFile2']), "w") as ofh:
            myDataList.append(curContainer)
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myDataList)

        assert len(myDataList) == 1
Exemple #8
0
    def test_update_data_file(self, writer_paths):
        myDataList = []

        curContainer = DataContainer("myblock")
        aCat = DataCategory("pdbx_seqtool_mapping_ref")
        aCat.appendAttribute("ordinal")
        aCat.appendAttribute("entity_id")
        aCat.appendAttribute("auth_mon_id")
        aCat.appendAttribute("auth_mon_num")
        aCat.appendAttribute("pdb_chain_id")
        aCat.appendAttribute("ref_mon_id")
        aCat.appendAttribute("ref_mon_num")
        aCat.append((1, 2, 3, 4, 5, 6, 7))
        aCat.append((1, 2, 3, 4, 5, 6, 7))
        aCat.append((1, 2, 3, 4, 5, 6, 7))
        aCat.append((1, 2, 3, 4, 5, 6, 7))
        curContainer.append(aCat)
        myDataList.append(curContainer)
        with open(str(writer_paths['pathOutputFile1']), "w") as ofh:
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myDataList)
        #
        # Read and update the data -
        #
        myDataList = []
        with open(str(writer_paths['pathOutputFile1']), "r") as ifh:
            pRd = PdbxReader(ifh)
            pRd.read(myDataList)
        #
        myBlock = myDataList[0]
        # myBlock.printIt()
        myCat = myBlock.getObj('pdbx_seqtool_mapping_ref')
        # myCat.printIt()
        for iRow in range(0, myCat.getRowCount()):
            myCat.setValue('some value', 'ref_mon_id', iRow)
            myCat.setValue(100, 'ref_mon_num', iRow)
        with open(str(writer_paths['pathOutputFile2']), "w") as ofh:
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myDataList)
        assert len(myDataList) == 1
Exemple #9
0
    def _createfile1(pathout):
        my_data_list = []

        cur_container = DataContainer("myblock")

        acat = DataCategory("pdbx_item_enumeration")
        acat.appendAttribute("name")
        acat.appendAttribute("value")
        acat.appendAttribute("detail")
        acat.append(("1", "2", "3"))

        cur_container.append(acat)

        acat = DataCategory("exptl")
        acat.appendAttribute("absorpt_coefficient_mu")
        acat.appendAttribute("entry_id")
        acat.appendAttribute("method")
        acat.appendAttribute("details")
        acat.append(("?", "D_12345", "X-RAY DIFFRACTION", "some details"))

        cur_container.append(acat)

        acat = DataCategory("struct")
        acat.appendAttribute("title")
        acat.appendAttribute("pdbx_descriptor")
        acat.append(("Start title", "Start Descriptor"))

        cur_container.append(acat)

        my_data_list.append(cur_container)

        # Second block
        cur_container = DataContainer("secondblock")

        acat = DataCategory("pdbx_item_enumeration")
        acat.appendAttribute("name")
        acat.appendAttribute("value")
        acat.appendAttribute("detail")
        acat.append(("3", "2", "1"))

        cur_container.append(acat)

        my_data_list.append(cur_container)

        with open(pathout, "w") as ofh:
            pdbxw = PdbxWriter(ofh)
            pdbxw.setAlignmentFlag(flag=True)
            pdbxw.write(my_data_list)
    def _createfile1(pathout):
        my_data_list = []

        cur_container = DataContainer("myblock")

        acat = DataCategory("pdbx_item_enumeration")
        acat.appendAttribute("name")
        acat.appendAttribute("value")
        acat.appendAttribute("detail")
        acat.append(('1', '2', '3'))

        cur_container.append(acat)

        acat = DataCategory("exptl")
        acat.appendAttribute('absorpt_coefficient_mu')
        acat.appendAttribute('entry_id')
        acat.appendAttribute('method')
        acat.appendAttribute('details')
        acat.append(('?', 'D_12345', 'X-RAY DIFFRACTION', 'some details'))

        cur_container.append(acat)

        acat = DataCategory("struct")
        acat.appendAttribute('title')
        acat.appendAttribute('pdbx_descriptor')
        acat.append(('Start title', 'Start Descriptor'))

        cur_container.append(acat)

        my_data_list.append(cur_container)

        # Second block
        cur_container = DataContainer("secondblock")

        acat = DataCategory("pdbx_item_enumeration")
        acat.appendAttribute("name")
        acat.appendAttribute("value")
        acat.appendAttribute("detail")
        acat.append(('3', '2', '1'))

        cur_container.append(acat)

        my_data_list.append(cur_container)

        with open(pathout, "w") as ofh:
            pdbxw = PdbxWriter(ofh)
            pdbxw.setAlignmentFlag(flag=True)
            pdbxw.write(my_data_list)
Exemple #11
0
    def test_row_dict_initialization(self, rw_data):
        rLen = 10
        fn = rw_data['pathOutputFile5']
        attributeNameList = ['a', 'b', 'c', 'd']
        rowList = [{'a': 1, 'b': 2, 'c': 3, 'd': 4} for i in range(rLen)]
        nameCat = 'myCategory'
        #
        #
        curContainer = DataContainer("myblock")
        aCat = DataCategory(nameCat, attributeNameList, rowList)
        aCat.append({'a': 1, 'b': 2, 'c': 3, 'd': 4})
        aCat.append({'a': 1, 'b': 2, 'c': 3, 'd': 4})
        aCat.extend(rowList)
        curContainer.append(aCat)
        aCat.renameAttributes({'a': 'aa', 'b': 'bb', 'c': 'cc', 'd': 'dd'})
        aCat.setName('renamedCategory')
        #
        #
        myContainerList = []
        myContainerList.append(curContainer)
        ofh = open(str(fn), "w")
        pdbxW = PdbxWriter(ofh)
        pdbxW.write(myContainerList)
        ofh.close()

        myContainerList = []
        ifh = open(str(fn), "r")
        pRd = PdbxReader(ifh)
        pRd.read(myContainerList)
        ifh.close()
        for container in myContainerList:
            for objName in container.getObjNameList():
                name, aList, rList = container.getObj(objName).get()
                print("Recovered data category  %s\n" % name)
                print("Attribute list           %r\n" % repr(aList))
                print("Row list                 %r\n" % repr(rList))
        assert len(myContainerList) == 1
        assert len(rList) == 2 * rLen + 2
Exemple #12
0
    def test_write_data_file(self, writer_paths):
        myDataList = []

        curContainer = DataContainer("myblock")
        aCat = DataCategory("pdbx_seqtool_mapping_ref")
        aCat.appendAttribute("ordinal")
        aCat.appendAttribute("entity_id")
        aCat.appendAttribute("auth_mon_id")
        aCat.appendAttribute("auth_mon_num")
        aCat.appendAttribute("pdb_chain_id")
        aCat.appendAttribute("ref_mon_id")
        aCat.appendAttribute("ref_mon_num")
        aCat.append(
            (1, 2, 3, 4, '55555555555555555555555555555555555555555555', 6, 7))
        aCat.append((1, 2, 3, 4, '5555', 6, 7))
        aCat.append((1, 2, 3, 4, '5555555555', 6, 7))
        aCat.append((1, 2, 3, 4, '5', 6, 7))
        curContainer.append(aCat)
        myDataList.append(curContainer)
        with open(str(writer_paths['pathOutputFile1']), "w") as ofh:
            pdbxW = PdbxWriter(ofh)
            pdbxW.setAlignmentFlag(flag=True)
            pdbxW.write(myDataList)
        assert len(myDataList) == 1
Exemple #13
0
    def test_update_data_file(self, rw_data):
        myDataList = []

        curContainer = DataContainer("myblock")
        aCat = DataCategory("pdbx_seqtool_mapping_ref")
        aCat.appendAttribute("ordinal")
        aCat.appendAttribute("entity_id")
        aCat.appendAttribute("auth_mon_id")
        aCat.appendAttribute("auth_mon_num")
        aCat.appendAttribute("pdb_chain_id")
        aCat.appendAttribute("ref_mon_id")
        aCat.appendAttribute("ref_mon_num")
        aCat.append([9, 2, 3, 4, 5, 6, 7])
        aCat.append([10, 2, 3, 4, 5, 6, 7])
        aCat.append([11, 2, 3, 4, 5, 6, 7])
        aCat.append([12, 2, 3, 4, 5, 6, 7])

        curContainer.append(aCat)
        myDataList.append(curContainer)
        ofh = open(str(rw_data['pathOutputFile1']), "w")
        pdbxW = PdbxWriter(ofh)
        pdbxW.write(myDataList)
        ofh.close()

        myDataList = []
        ifh = open(str(rw_data['pathOutputFile1']), "r")
        pRd = PdbxReader(ifh)
        pRd.read(myDataList)
        ifh.close()
        myBlock = myDataList[0]
        myCat = myBlock.getObj('pdbx_seqtool_mapping_ref')
        for iRow in range(0, myCat.getRowCount()):
            myCat.setValue('some value', 'ref_mon_id', iRow)
            myCat.setValue(100, 'ref_mon_num', iRow)

        with open(str(rw_data['pathOutputFile2']), "w") as ofh:
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myDataList)

        assert len(myDataList) == 1
Exemple #14
0
    def __dictionaryPragma(self, dictName, dictDescription, version,
                           updateDate, comment):
        """ Add CIF dictionary header details including name, version and history.

;        Returns:
            Data container (object)  data container with dictionary history and version details
        """
        #
        dataH = DataContainer("pdbx_vrpt_ext.dic")
        dc = DataCategory("datablock", attributeNameList=["id", "description"])
        dc.append([dictName, dictDescription])
        dataH.append(dc)
        dc = DataCategory(
            "dictionary",
            attributeNameList=["title", "datablock_id", "version"])
        dc.append([dictName, dictName, version])
        dataH.append(dc)
        dc = DataCategory("dictionary_history",
                          attributeNameList=["version", "update", "revision"])
        dc.append([version, updateDate, comment])
        dataH.append(dc)
        return dataH
Exemple #15
0
    def testSingleRow(self):
        """Test case -  read /write single row and null row in data file
        """
        try:
            #
            myDataList = []
            # ofh = open(self.__pathOutputFile1, "w")
            curContainer = DataContainer("myblock")
            aCat = DataCategory("pdbx_seqtool_mapping_ref")
            aCat.appendAttribute("ordinal")
            aCat.appendAttribute("entity_id")
            aCat.appendAttribute("auth_mon_id")
            aCat.appendAttribute("auth_mon_num")
            aCat.appendAttribute("pdb_chain_id")
            aCat.appendAttribute("ref_mon_id")
            aCat.appendAttribute("ref_mon_num")
            aCat.appendAttribute("details")
            aCat.append([1, 2, 3, 4, 5, 6, 7, "data_my_big_data_file"])
            aCat.append([1, 2, 3, 4, 5, 6, 7, "loop_my_big_data_loop"])
            aCat.append([1, 2, 3, 4, 5, 6, 7, "save_my_big_data_saveframe"])
            aCat.append([1, 2, 3, 4, 5, 6, 7, "_category.item"])
            # aCat.dumpIt()
            curContainer.append(aCat)
            #
            bCat = curContainer.getObj("pdbx_seqtool_mapping_ref")
            logger.debug("----attribute list %r", bCat.getAttributeList())
            row = bCat.getRow(0)
            logger.debug("----ROW %r", row)
            #
            with open(self.__pathOutputFile2, "w") as ofh:
                myDataList.append(curContainer)
                pdbxW = PdbxWriter(ofh)
                pdbxW.write(myDataList)

            self.assertEqual(len(myDataList), 1)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemple #16
0
    def test_write_data_file(self, rw_data):
        myDataList = []
        curContainer = DataContainer("myblock")
        aCat = DataCategory("pdbx_seqtool_mapping_ref")
        aCat.appendAttribute("ordinal")
        aCat.appendAttribute("entity_id")
        aCat.appendAttribute("auth_mon_id")
        aCat.appendAttribute("auth_mon_num")
        aCat.appendAttribute("pdb_chain_id")
        aCat.appendAttribute("ref_mon_id")
        aCat.appendAttribute("ref_mon_num")
        aCat.append([1, 2, 3, 4, 5, 6, 7])
        aCat.append([1, 2, 3, 4, 5, 6, 7])
        aCat.append([1, 2, 3, 4, 5, 6, 7])
        aCat.append([1, 2, 3, 4, 5, 6, 7])
        aCat.append([7, 6, 5, 4, 3, 2, 1])
        curContainer.append(aCat)

        myDataList.append(curContainer)
        with open(str(rw_data['pathOutputFile1']), "w") as ofh:
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myDataList)
        assert len(myDataList) == 1
Exemple #17
0
    def __buildCategoryDefinition(self, name, description, keyAttributeNames,
                                  examples, contexts):
        """Construct an attribute definition from input attribute dictionary
            containing metadata extracted from the XML schema, and from the
            input schema name mapping dictionary.

        Args:
            name (str): category name
            description (str): category description
            keyAttributeNames (list): key attribute names
            examples (list): category examples
            contexts (list): category contexts

        Returns:
            Definition container (object):

        """
        defC = DefinitionContainer(name)
        #
        dc = DataCategory(
            "category",
            attributeNameList=["id", "description", "mandatory_code"])
        dc.append([name, description, "no"])
        defC.append(dc)
        #
        dc = DataCategory("category_key", attributeNameList=["name"])
        for keyAttributeName in keyAttributeNames:
            keyItemName = CifName.itemName(name, keyAttributeName)
            dc.append([keyItemName])
        defC.append(dc)

        dc = DataCategory("category_group", attributeNameList=["id"])
        dc.append(["inclusive_group"])
        dc.append(["validation_report_group"])
        defC.append(dc)
        # pdbx_category_context
        dc = DataCategory("pdbx_category_context",
                          attributeNameList=["category_id", "type"])
        for cType in contexts:
            dc.append([name, cType])
        defC.append(dc)
        #
        dc = DataCategory("category_examples",
                          attributeNameList=["detail", "case"])
        for example in examples:
            dc.append([".", example])
        defC.append(dc)

        return defC
Exemple #18
0
    def _createfile2(pathout):
        my_data_list = []

        cur_container = DataContainer("test")

        acat = DataCategory("new")
        acat.appendAttribute("item")
        acat.append(("1",))

        cur_container.append(acat)

        acat = DataCategory("second_category")
        acat.appendAttribute("row")
        acat.appendAttribute("rowb")
        acat.append(("1", "2"))

        cur_container.append(acat)

        acat = DataCategory("third")
        acat.appendAttribute("id")
        acat.appendAttribute("val")
        acat.append(("1", "a"))
        acat.append(("2", "b"))
        acat.append(("3", "c"))

        cur_container.append(acat)

        acat = DataCategory("exptl")
        acat.appendAttribute("method")
        acat.appendAttribute("entry_id")
        acat.append(("NEW", "something"))

        cur_container.append(acat)

        acat = DataCategory("struct")
        acat.appendAttribute("new")
        acat.appendAttribute("pdbx_descriptor")
        acat.append(("Something to add", "Override descriptor"))

        cur_container.append(acat)

        my_data_list.append(cur_container)

        with open(pathout, "w") as ofh:
            pdbxw = PdbxWriter(ofh)
            pdbxw.setAlignmentFlag(flag=True)
            pdbxw.write(my_data_list)
Exemple #19
0
 def __deserialize(self, fh, storeStringsAsBytes=False):
     cL = []
     try:
         dec = BinaryCifDecoders(storeStringsAsBytes=storeStringsAsBytes)
         bD = msgpack.unpack(fh)
         #
         logger.debug("bD.keys() %r", bD.keys())
         logger.debug("bD['dataBlocks'] %s",
                      bD[self.__toBytes("dataBlocks")])
         #
         for dataBlock in bD[self.__toBytes("dataBlocks")]:
             header = self.__fromBytes(
                 dataBlock[self.__toBytes("header")]) if self.__toBytes(
                     "header") in dataBlock else None
             logger.debug("header %r", header)
             logger.debug("dataBlock %r", dataBlock)
             #
             dc = DataContainer(header)
             categoryList = dataBlock[self.__toBytes(
                 "categories")] if self.__toBytes(
                     "categories") in dataBlock else []
             for category in categoryList:
                 catName = self.__fromBytes(
                     category[self.__toBytes("name")])[1:]
                 colList = category[self.__toBytes("columns")]
                 logger.debug("catName %r columns %r", catName, colList)
                 colD = OrderedDict()
                 atNameList = []
                 for col in colList:
                     logger.debug("col.keys() %r", col.keys())
                     atName = self.__fromBytes(col[self.__toBytes("name")])
                     atData = col[self.__toBytes("data")]
                     logger.debug("atData encoding (%d) data (%d)",
                                  len(atData[self.__toBytes("encoding")]),
                                  len(atData[self.__toBytes("data")]))
                     atMask = col[self.__toBytes("mask")]
                     logger.debug("catName %r atName %r", catName, atName)
                     logger.debug(" >atData.data    %r",
                                  atData[self.__toBytes("data")])
                     logger.debug(" >atData.encoding (%d) %r",
                                  len(atData[self.__toBytes("encoding")]),
                                  atData[self.__toBytes("encoding")])
                     logger.debug(" >mask %r", atMask)
                     tVal = dec.decode(
                         col[self.__toBytes("data")][self.__toBytes(
                             "data")], col[self.__toBytes("data")][
                                 self.__toBytes("encoding")])
                     if col[self.__toBytes("mask")]:
                         mVal = dec.decode(
                             col[self.__toBytes("mask")][self.__toBytes(
                                 "data")], col[self.__toBytes("mask")][
                                     self.__toBytes("encoding")])
                         tVal = [
                             "?" if m == 2 else "." if m == 1 else d
                             for d, m in zip(tVal, mVal)
                         ]
                     colD[atName] = tVal
                     atNameList.append(atName)
                 #
                 cObj = DataCategory(catName, attributeNameList=atNameList)
                 genL = [colGen for colGen in colD.values()]
                 for row in zip(*genL):
                     logger.debug("row %r", row)
                     cObj.append(row)
                 #
                 dc.append(cObj)
             cL.append(dc)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
     return cL
    def testGenDDLm(self):
        """Generating alternative DDLm metadata format. (starting point)"""
        try:
            myIo = IoAdapterPy(self.__verbose, self.__lfh)
            self.__containerList = myIo.readFile(
                inputFilePath=self.__pathPdbxDictionary)
            dApi = DictionaryApi(containerList=self.__containerList,
                                 consolidate=True,
                                 verbose=self.__verbose)
            parentD = dApi.getParentDictionary()
            #
            oCList = []
            dDef = DataContainer("mmcif_pdbx_ddlm_auto")
            dc = DataCategory("dictionary")
            dc.appendAttribute("title")
            dc.appendAttribute("class")
            dc.appendAttribute("version")
            dc.appendAttribute("date")
            dc.appendAttribute("ddl_conformance")
            dc.appendAttribute("text")
            dc.append([
                "mmcif_pdbx_ddlm_auto", "Instance", "latest", "2018-03-09",
                "ddlm best effort",
                "Software converted PDBx dictionary using DDLm semantics"
            ])
            dDef.append(dc)
            oCList.append(dDef)

            catIdx = dApi.getCategoryIndex()
            for catName in sorted(catIdx.keys()):
                attNameList = catIdx[catName]
                # created definition container -
                cDef = DefinitionContainer(catName)
                oCList.append(cDef)
                #
                dc = DataCategory("definition")
                dc.appendAttribute("id")
                dc.appendAttribute("scope")
                dc.appendAttribute("class")
                dc.appendAttribute("update")
                dc.append([catName, "Category", "Loop", "2018-03-09"])
                cDef.append(dc)
                val = dApi.getCategoryDescription(category=catName)
                dc = DataCategory("description")
                dc.appendAttribute("text")
                dc.append([val])
                cDef.append(dc)
                #
                dc = DataCategory("name")
                dc.appendAttribute("category_id")
                dc.appendAttribute("object_id")

                valList = dApi.getCategoryGroupList(category=catName)
                pcg = catName
                for val in valList:
                    if val != "inclusive_group":
                        pcg = val
                        break
                dc.append([catName, pcg])
                cDef.append(dc)

                valList = dApi.getCategoryKeyList(category=catName)
                if not valList:
                    self.__lfh.write("Missing caegory key for category %s\n" %
                                     catName)
                else:
                    dc = DataCategory("category")
                    dc.appendAttribute("key_id")
                    kItemName = CifName.itemName(catName, "synthetic_key")
                    dc.append([kItemName])
                    cDef.append(dc)

                    iDef = DefinitionContainer(kItemName)
                    self.__makeKeyItem(catName, "synthetic_key", valList, iDef)
                    oCList.append(iDef)

                for attName in attNameList:
                    itemName = CifName.itemName(catName, attName)
                    iDef = DefinitionContainer(itemName)

                    oCList.append(iDef)

                    #
                    dc = DataCategory("definition")
                    dc.appendAttribute("id")
                    dc.appendAttribute("scope")
                    dc.appendAttribute("class")
                    dc.appendAttribute("update")
                    dc.append([itemName, "Item", "Single", "2013-08-22"])
                    iDef.append(dc)
                    #
                    val = dApi.getDescription(category=catName,
                                              attribute=attName)
                    dc = DataCategory("description")
                    dc.appendAttribute("text")
                    dc.append([val])
                    iDef.append(dc)
                    #
                    dc = DataCategory("name")
                    dc.appendAttribute("category_id")
                    dc.appendAttribute("object_id")
                    #
                    if itemName in parentD:
                        dc.appendAttribute("linked_item_id")
                        dc.append([catName, attName, parentD[itemName][0]])
                    else:
                        dc.append([catName, attName])
                    iDef.append(dc)
                    #
                    #
                    aliasList = dApi.getItemAliasList(category=catName,
                                                      attribute=attName)
                    if aliasList:
                        dc = DataCategory("alias")
                        dc.appendAttribute("definition_id")
                        for alias in aliasList:
                            dc.append([alias[0]])
                        iDef.append(dc)

                    enList = dApi.getEnumListAltWithDetail(category=catName,
                                                           attribute=attName)

                    tC = dApi.getTypeCode(category=catName, attribute=attName)
                    tcontainer = "Single"
                    purpose = "Describe"
                    source = "Recorded"
                    contents = "Text"
                    #
                    if tC is None:
                        self.__lfh.write("Missing data type attribute %s\n" %
                                         attName)
                    elif tC in [
                            "code", "atcode", "name", "idname", "symop", "fax",
                            "phone", "email", "code30", "ec-type"
                    ]:
                        purpose = "Encode"
                        contents = "Text"
                        source = "Assigned"
                    elif tC in ["ucode"]:
                        purpose = "Encode"
                        contents = "Code"
                        source = "Assigned"
                    elif tC in ["line", "uline", "text"]:
                        purpose = "Describe"
                        source = "Recorded"
                        contents = "Text"
                    elif tC in ["int"]:
                        purpose = "Number"
                        source = "Recorded"
                        contents = "Integer"
                    elif tC in ["int-range"]:
                        purpose = "Number"
                        source = "Recorded"
                        contents = "Range"
                    elif tC in ["float"]:
                        purpose = "Measurand"
                        source = "Recorded"
                        contents = "Real"
                    elif tC in ["float-range"]:
                        purpose = "Measurand"
                        source = "Recorded"
                        contents = "Range"
                    elif tC.startswith("yyyy"):
                        source = "Assigned"
                        contents = "Date"
                        purpose = "Describe"

                    if enList:
                        purpose = "State"

                    dc = DataCategory("type")
                    dc.appendAttribute("purpose")
                    dc.appendAttribute("source")
                    dc.appendAttribute("contents")
                    dc.appendAttribute("container")
                    dc.append([purpose, source, contents, tcontainer])
                    iDef.append(dc)
                    #
                    if enList:
                        dc = DataCategory("enumeration_set")
                        dc.appendAttribute("state")
                        dc.appendAttribute("detail")
                        for en in enList:
                            dc.append([en[0], en[1]])
                        iDef.append(dc)

                    dfv = dApi.getDefaultValue(category=catName,
                                               attribute=attName)
                    bvList = dApi.getBoundaryList(category=catName,
                                                  attribute=attName)
                    if ((dfv is not None) and
                        (dfv not in ["?", "."])) or bvList:
                        row = []
                        dc = DataCategory("enumeration")
                        if dfv is not None:
                            dc.appendAttribute("default")
                            row.append(dfv)
                        if bvList:
                            dc.appendAttribute("range")
                            mminVp = -1000000
                            mmaxVp = 10000000
                            mminV = mmaxVp
                            mmaxV = mminVp
                            for bv in bvList:
                                minV = float(bv[0]) if bv[0] != "." else mminVp
                                maxV = float(bv[1]) if bv[1] != "." else mmaxVp
                                mminV = min(mminV, minV)
                                mmaxV = max(mmaxV, maxV)
                            if mminV == mminVp:
                                mminV = ""
                            if mmaxV == mmaxVp:
                                mmaxV = ""
                            row.append(str(mminV) + ":" + str(mmaxV))

                        dc.append(row)
                        iDef.append(dc)

            myIo.writeFile(outputFilePath=os.path.join(
                HERE, "test-output", "mmcif_pdbx_ddlm_auto.dic"),
                           containerList=oCList)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemple #21
0
    def __buildAttributeDefinition(self, atD, mD):
        """ Construct an attribute definition from input attribute dictionary
            containing metadata extracted from the XML schema, and from the
            input schema name mapping dictionary.
        Args:
            atD (dict): attribute metadata dictionary
            dictionaryMap (dict): mapping details for categories and attributes

        Returns:
                Attribute definition (object)

        """
        #
        atName = atD["name"]
        catName = atD["category"]

        #
        mapAtName = mD["at"] if "at" in mD else atName
        mapCatName = mD["cat"] if "cat" in mD else catName
        pCat = mD["pCat"] if "pCat" in mD else None
        pAt = mD["pAt"] if "pAt" in mD else None
        pType = mD["pType"] if "pType" in mD else None
        #
        itemName = CifName.itemName(mapCatName, mapAtName)
        #
        aliasAtName = atD["aliasName"] if "aliasName" in atD else None
        aliasCatName = atD[
            "aliasCategoryName"] if "aliasCategoryName" in atD else None
        #
        atDescription = atD["description"] if "description" in atD else None
        atDescription = self.__filterDescription(atDescription,
                                                 self.__catMappingD)
        atDescription = self.__filterDescription(atDescription,
                                                 self.__atMappingD)
        #
        mCode = "yes" if atD["mandatory"] == "mandatory" else "no"
        mCode = "yes" if mapAtName == "entry_id" else mCode
        #
        if atD["type"] not in self.__typeMap:
            logger.info("Unmapped type %r", atD["type"])
        #
        atType = self.__typeMap[
            atD["type"]] if atD["type"] in self.__typeMap else "UNKNOWN"
        #
        if atType == "text" and atDescription.find("comma separate") >= 0:
            atType = "alphanum-csv"
        if atType == "text" and "_date" in mapAtName and mapAtName != "report_creation_date":
            atType = "yyyy-mm-dd"
        #
        atType = pType if pType else atType

        if atType == "UNKNOWN":
            logger.info("Missing type mapping for %s %s %s", catName, atName,
                        atD["type"])
        #
        defA = DefinitionContainer(itemName)
        #
        dc = DataCategory("item_description",
                          attributeNameList=["description"])
        dc.append([atDescription])
        defA.append(dc)

        dc = DataCategory(
            "item",
            attributeNameList=["name", "category_id", "mandatory_code"])
        dc.append([itemName, mapCatName, mCode])
        defA.append(dc)
        #
        dc = DataCategory("item_type", attributeNameList=["code"])
        dc.append([atType])
        defA.append(dc)

        dc = DataCategory(
            "item_aliases",
            attributeNameList=["alias_name", "dictionary", "version"])
        dc.append([
            aliasCatName + "." + aliasAtName, self.__schemaPath,
            self.__schemaVersion
        ])
        defA.append(dc)
        #
        # Note - expect boundaries in pairs and 'inclusive' of endpoints
        #
        if "minIncl" in atD and "maxIncl" in atD and atD["minIncl"] and atD[
                "maxIncl"]:
            minB = atD["minIncl"]
            maxB = atD["maxIncl"]
            dc = DataCategory("item_range",
                              attributeNameList=["minimum", "maximum"])
            dc.append([minB, minB])
            dc.append([minB, maxB])
            dc.append([maxB, maxB])
            defA.append(dc)
        else:
            if atType == "float" and ((mapAtName.find("percent_") >= 0) or
                                      (mapAtName.find("percentile_") >= 0)):
                dc = DataCategory("item_range",
                                  attributeNameList=["minimum", "maximum"])
                minB = "0.0"
                maxB = "100.0"
                dc.append([minB, minB])
                dc.append([minB, maxB])
                dc.append([maxB, maxB])
                defA.append(dc)
        #
        if "enum" in atD and isinstance(atD["enum"], list) and atD["enum"]:
            dc = DataCategory("item_enumeration",
                              attributeNameList=["value", "detail"])
            for enumVal in atD["enum"]:
                dc.append([enumVal, "."])
            defA.append(dc)

        # -  add parent link relationships -
        if pCat and pAt:
            dc = DataCategory("item_linked",
                              attributeNameList=["child_name", "parent_name"])
            parentItemName = CifName.itemName(pCat, pAt)
            dc.append([itemName, parentItemName])
            defA.append(dc)
        #
        #
        return defA
Exemple #22
0
    def test_gen_ddlm(self, in_tmpdir, test_files):
        myIo = IoAdapterPy()
        containerList = myIo.readFile(
            inputFilePath=str(test_files / 'mmcif_pdbx_v5_next.dic'))
        dApi = DictionaryApi(containerList=containerList, consolidate=True)
        parentD = dApi.getParentDictionary()
        #
        oCList = []
        dDef = DataContainer('mmcif_pdbx_ddlm_auto')
        dc = DataCategory("dictionary")
        dc.appendAttribute("title")
        dc.appendAttribute("class")
        dc.appendAttribute("version")
        dc.appendAttribute("date")
        dc.appendAttribute("ddl_conformance")
        dc.appendAttribute("text")
        dc.append([
            'mmcif_pdbx_ddlm_auto', 'Instance', 'latest', '2018-03-09',
            'ddlm best effort',
            'Software converted PDBx dictionary using DDLm semantics'
        ])
        dDef.append(dc)
        oCList.append(dDef)

        catIdx = dApi.getCategoryIndex()
        for catName in sorted(catIdx.keys()):
            attNameList = catIdx[catName]
            # created definition container -
            cDef = DefinitionContainer(catName)
            oCList.append(cDef)
            #
            dc = DataCategory("definition")
            dc.appendAttribute("id")
            dc.appendAttribute("scope")
            dc.appendAttribute("class")
            dc.appendAttribute("update")
            dc.append([catName, "Category", "Loop", "2018-03-09"])
            cDef.append(dc)
            val = dApi.getCategoryDescription(category=catName)
            dc = DataCategory("description")
            dc.appendAttribute("text")
            dc.append([val])
            cDef.append(dc)
            #
            dc = DataCategory("name")
            dc.appendAttribute("category_id")
            dc.appendAttribute("object_id")

            valList = dApi.getCategoryGroupList(category=catName)
            pcg = catName
            for val in valList:
                if val != 'inclusive_group':
                    pcg = val
                    break
            dc.append([catName, pcg])
            cDef.append(dc)

            valList = dApi.getCategoryKeyList(category=catName)
            if len(valList) < 1:
                print("Missing caegory key for category %s\n" % catName)
            else:
                dc = DataCategory("category")
                dc.appendAttribute("key_id")
                kItemName = CifName.itemName(catName, "synthetic_key")
                dc.append([kItemName])
                cDef.append(dc)

                iDef = DefinitionContainer(kItemName)
                self._makeKeyItem(catName, "synthetic_key", valList, iDef)
                oCList.append(iDef)

            for attName in attNameList:
                itemName = CifName.itemName(catName, attName)
                iDef = DefinitionContainer(itemName)

                oCList.append(iDef)

                #
                dc = DataCategory("definition")
                dc.appendAttribute("id")
                dc.appendAttribute("scope")
                dc.appendAttribute("class")
                dc.appendAttribute("update")
                dc.append([itemName, "Item", "Single", "2013-08-22"])
                iDef.append(dc)
                #
                val = dApi.getDescription(category=catName, attribute=attName)
                dc = DataCategory("description")
                dc.appendAttribute("text")
                dc.append([val])
                iDef.append(dc)
                #
                dc = DataCategory("name")
                dc.appendAttribute("category_id")
                dc.appendAttribute("object_id")
                #
                if itemName in parentD:
                    dc.appendAttribute("linked_item_id")
                    dc.append([catName, attName, parentD[itemName][0]])
                else:
                    dc.append([catName, attName])
                iDef.append(dc)
                #
                #
                aliasList = dApi.getItemAliasList(category=catName,
                                                  attribute=attName)
                if len(aliasList) > 0:
                    dc = DataCategory("alias")
                    dc.appendAttribute("definition_id")
                    for alias in aliasList:
                        dc.append([alias[0]])
                    iDef.append(dc)

                enList = dApi.getEnumListAltWithDetail(category=catName,
                                                       attribute=attName)

                tC = dApi.getTypeCode(category=catName, attribute=attName)
                tcontainer = 'Single'
                purpose = 'Describe'
                source = 'Recorded'
                contents = 'Text'
                #
                if tC is None:
                    self.__lfh.write("Missing data type attribute %s\n" %
                                     attName)
                elif tC in [
                        'code', 'atcode', 'name', 'idname', 'symop', 'fax',
                        'phone', 'email', 'code30', 'ec-type'
                ]:
                    purpose = 'Encode'
                    contents = 'Text'
                    source = 'Assigned'
                elif tC in ['ucode']:
                    purpose = 'Encode'
                    contents = 'Code'
                    source = 'Assigned'
                elif tC in ['line', 'uline', 'text']:
                    purpose = 'Describe'
                    source = 'Recorded'
                    contents = 'Text'
                elif tC in ['int']:
                    purpose = 'Number'
                    source = 'Recorded'
                    contents = 'Integer'
                elif tC in ['int-range']:
                    purpose = 'Number'
                    source = 'Recorded'
                    contents = 'Range'
                elif tC in ['float']:
                    purpose = 'Measurand'
                    source = 'Recorded'
                    contents = 'Real'
                elif tC in ['float-range']:
                    purpose = 'Measurand'
                    source = 'Recorded'
                    contents = 'Range'
                elif tC.startswith('yyyy'):
                    source = 'Assigned'
                    contents = 'Date'
                    purpose = 'Describe'

                if len(enList) > 0:
                    purpose = 'State'

                dc = DataCategory("type")
                dc.appendAttribute("purpose")
                dc.appendAttribute("source")
                dc.appendAttribute("contents")
                dc.appendAttribute("container")
                dc.append([purpose, source, contents, tcontainer])
                iDef.append(dc)
                #
                if (len(enList) > 0):
                    dc = DataCategory("enumeration_set")
                    dc.appendAttribute("state")
                    dc.appendAttribute("detail")
                    for en in enList:
                        dc.append([en[0], en[1]])
                    iDef.append(dc)

                dfv = dApi.getDefaultValue(category=catName, attribute=attName)
                bvList = dApi.getBoundaryList(category=catName,
                                              attribute=attName)
                if (((dfv is not None) and (dfv not in ['?', '.']))
                        or len(bvList) > 0):
                    row = []
                    dc = DataCategory("enumeration")
                    if dfv is not None:
                        dc.appendAttribute("default")
                        row.append(dfv)
                    if len(bvList) > 0:
                        dc.appendAttribute("range")
                        mminVp = -1000000
                        mmaxVp = 10000000
                        mminV = mmaxVp
                        mmaxV = mminVp
                        for bv in bvList:
                            minV = float(bv[0]) if bv[0] != '.' else mminVp
                            maxV = float(bv[1]) if bv[1] != '.' else mmaxVp
                            mminV = min(mminV, minV)
                            mmaxV = max(mmaxV, maxV)
                        if mminV == mminVp:
                            mminV = ''
                        if mmaxV == mmaxVp:
                            mmaxV = ''
                        row.append(str(mminV) + ":" + str(mmaxV))

                    dc.append(row)
                    iDef.append(dc)

        myIo.writeFile(outputFilePath="mmcif_pdbx_ddlm_auto.dic",
                       containerList=oCList)
Exemple #23
0
    def __parser(self,
                 tokenizer,
                 containerList,
                 categorySelectionD=None,
                 excludeFlag=False):
        """ Parser for PDBx data files and dictionaries.

            Input - tokenizer() reentrant method recognizing data item names (_category.attribute)
                    quoted strings (single, double and multi-line semi-colon delimited), and unquoted
                    strings.

                    containerList -  list-type container for data and definition objects parsed from
                                     from the input file.

            On return:
                    The input containerList is appended with data and definition objects -
        """
        catSelectD = categorySelectionD if categorySelectionD is not None else {}
        logger.debug("Exclude Flag %r Category selection %r", excludeFlag,
                     catSelectD)
        # Working container - data or definition
        curContainer = None
        # the last container of type data -
        previousDataContainer = None
        #
        # Working category container
        categoryIndex = {}
        curCategory = None
        #
        curRow = None
        state = None

        # Find the first reserved word and begin capturing data.
        #
        while True:
            curCatName, curAttName, curQuotedString, curWord = next(tokenizer)
            if curWord is None:
                continue
            reservedWord, state = self.__getState(curWord)
            if reservedWord is not None:
                break

        while True:
            #
            #  Set the current state  -
            #
            #  At this point in the processing cycle we are expecting a token containing
            #  either a '_category.attribute'  or a reserved word.
            #
            if curCatName is not None:
                state = "ST_KEY_VALUE_PAIR"
            elif curWord is not None:
                reservedWord, state = self.__getState(curWord)
            else:
                self.__syntaxError("Miscellaneous syntax error")
                return

            #
            # Process  _category.attribute  value assignments
            #
            if state == "ST_KEY_VALUE_PAIR":
                try:
                    curCategory = categoryIndex[curCatName]
                except KeyError:
                    # A new category is encountered - create a container and add a row
                    curCategory = categoryIndex[curCatName] = DataCategory(
                        curCatName)
                    #
                    #  check if we have all of the selection
                    if not excludeFlag and self.__allSelected(
                            curContainer, catSelectD):
                        return
                    try:
                        if catSelectD:
                            if not excludeFlag and curCatName in catSelectD:
                                curContainer.append(curCategory)
                            elif excludeFlag and curCatName not in catSelectD:
                                curContainer.append(curCategory)
                            else:
                                logger.debug(
                                    "Skipped unselected/excluded category %s",
                                    curCatName)
                        else:
                            curContainer.append(curCategory)
                    except AttributeError:
                        self.__syntaxError(
                            "Category cannot be added to  data_ block")
                        return

                    curRow = []
                    curCategory.append(curRow)
                else:
                    # Recover the existing row from the category
                    try:
                        # curRow = curCategory[0]
                        curRow = curCategory.getRow(0)
                    except IndexError:
                        self.__syntaxError(
                            "Internal index error accessing category data")
                        return

                # Check for duplicate attributes and add attribute to table.
                if curAttName in curCategory.getAttributeList():
                    self.__syntaxError(
                        "Duplicate attribute encountered in category")
                    return
                else:
                    curCategory.appendAttribute(curAttName)

                # Get the data for this attribute from the next token
                tCat, _, curQuotedString, curWord = next(tokenizer)

                if tCat is not None or (curQuotedString is None
                                        and curWord is None):
                    self.__syntaxError("Missing data for item _%s.%s" %
                                       (curCatName, curAttName))

                if curWord is not None:
                    #
                    # Validation check token for misplaced reserved words  -
                    #
                    reservedWord, state = self.__getState(curWord)
                    if reservedWord is not None:
                        self.__syntaxError("Unexpected reserved word: %s" %
                                           (reservedWord))

                    curRow.append(curWord)

                elif curQuotedString is not None:
                    curRow.append(curQuotedString)

                else:
                    self.__syntaxError("Missing value in item-value pair")

                curCatName, curAttName, curQuotedString, curWord = next(
                    tokenizer)
                continue

            #
            # Process a loop_ declaration and associated data -
            #
            elif state == "ST_TABLE":

                # The category name in the next curCatName,curAttName pair
                #    defines the name of the category container.
                curCatName, curAttName, curQuotedString, curWord = next(
                    tokenizer)

                if curCatName is None or curAttName is None:
                    self.__syntaxError("Unexpected token in loop_ declaration")
                    return

                # Check for a previous category declaration.
                if curCatName in categoryIndex:
                    self.__syntaxError(
                        "Duplicate category declaration in loop_")
                    return

                curCategory = DataCategory(curCatName)

                #
                #  check if we have all of the selection
                if not excludeFlag and self.__allSelected(
                        curContainer, catSelectD):
                    return
                try:
                    if catSelectD:
                        if not excludeFlag and curCatName in catSelectD:
                            curContainer.append(curCategory)
                        elif excludeFlag and curCatName not in catSelectD:
                            curContainer.append(curCategory)
                        else:
                            logger.debug(
                                "Skipped unselected/excluded category %s",
                                curCatName)
                    else:
                        curContainer.append(curCategory)
                except AttributeError:
                    self.__syntaxError(
                        "loop_ declaration outside of data_ block or save_ frame"
                    )
                    return

                curCategory.appendAttribute(curAttName)

                # Read the rest of the loop_ declaration
                while True:
                    curCatName, curAttName, curQuotedString, curWord = next(
                        tokenizer)

                    if curCatName is None:
                        break

                    if curCatName != curCategory.getName():
                        self.__syntaxError(
                            "Changed category name in loop_ declaration")
                        return

                    curCategory.appendAttribute(curAttName)

                # If the next token is a 'word', check it for any reserved words -
                if curWord is not None:
                    reservedWord, state = self.__getState(curWord)
                    if reservedWord is not None:
                        if reservedWord == "stop":
                            return
                        else:
                            self.__syntaxError(
                                "Unexpected reserved word after loop declaration: %s"
                                % (reservedWord))

                # Read the table of data for this loop_ -
                while True:
                    curRow = []
                    curCategory.append(curRow)

                    for _ in curCategory.getAttributeList():
                        if curWord is not None:
                            curRow.append(curWord)
                        elif curQuotedString is not None:
                            curRow.append(curQuotedString)

                        curCatName, curAttName, curQuotedString, curWord = next(
                            tokenizer)

                    # loop_ data processing ends if -

                    # A new _category.attribute is encountered
                    if curCatName is not None:
                        break

                    # A reserved word is encountered
                    if curWord is not None:
                        reservedWord, state = self.__getState(curWord)
                        if reservedWord is not None:
                            break

                continue

            elif state == "ST_DEFINITION":
                # Ignore trailing unnamed saveframe delimiters e.g. 'save'
                sName = self.__getContainerName(curWord)
                if sName:
                    curContainer = DefinitionContainer(sName)
                    containerList.append(curContainer)
                    categoryIndex = {}
                    curCategory = None
                else:
                    # reset current container to the last data contatiner
                    curContainer = previousDataContainer

                curCatName, curAttName, curQuotedString, curWord = next(
                    tokenizer)

            elif state == "ST_DATA_CONTAINER":
                #
                dName = self.__getContainerName(curWord)
                if not dName:
                    dName = "unidentified"
                curContainer = DataContainer(dName)
                containerList.append(curContainer)
                categoryIndex = {}
                curCategory = None
                previousDataContainer = curContainer
                curCatName, curAttName, curQuotedString, curWord = next(
                    tokenizer)

            elif state == "ST_STOP":
                ###
                # curCatName, curAttName, curQuotedString, curWord = tokenizer.next()
                continue

            elif state == "ST_GLOBAL":
                curContainer = DataContainer("blank-global")
                curContainer.setGlobal()
                containerList.append(curContainer)
                categoryIndex = {}
                curCategory = None
                curCatName, curAttName, curQuotedString, curWord = next(
                    tokenizer)

            elif state == "ST_UNKNOWN":
                self.__syntaxError("Unrecogized syntax element: " +
                                   str(curWord))
                return
    def _createfile2(pathout):
        my_data_list = []

        cur_container = DataContainer("test")

        acat = DataCategory("new")
        acat.appendAttribute("item")
        acat.append(('1',))

        cur_container.append(acat)

        acat = DataCategory("second_category")
        acat.appendAttribute('row')
        acat.appendAttribute('rowb')
        acat.append(('1', '2'))

        cur_container.append(acat)

        acat = DataCategory("third")
        acat.appendAttribute('id')
        acat.appendAttribute('val')
        acat.append(('1', 'a'))
        acat.append(('2', 'b'))
        acat.append(('3', 'c'))

        cur_container.append(acat)

        acat = DataCategory("exptl")
        acat.appendAttribute('method')
        acat.appendAttribute('entry_id')
        acat.append(('NEW', 'something'))

        cur_container.append(acat)

        acat = DataCategory("struct")
        acat.appendAttribute('new')
        acat.appendAttribute('pdbx_descriptor')
        acat.append(('Something to add', 'Override descriptor'))

        cur_container.append(acat)

        my_data_list.append(cur_container)

        with open(pathout, "w") as ofh:
            pdbxw = PdbxWriter(ofh)
            pdbxw.setAlignmentFlag(flag=True)
            pdbxw.write(my_data_list)
Exemple #25
0
    def __generateData(self):
        """Generates data for test. __testValues must be in sync"""
        curContainer = DataContainer("myblock")
        aCat = DataCategory("pdbx_test")
        aCat.appendAttribute("ordinal")
        aCat.appendAttribute("details")
        aCat.append([1, "data_my_big_data_file"])
        aCat.append([2, "loop_my_big_data_loop"])
        aCat.append([3, "save_my_big_data_saveframe"])
        aCat.append([4, "_category.item"])
        aCat.append([5, "Data_my_big_data_file"])
        aCat.append([6, "Loop_my_big_data_loop"])
        aCat.append([7, "Save_my_big_data_saveframe"])
        aCat.append([8, "DatA_my_big_data_file"])
        curContainer.append(aCat)

        return curContainer