def strip(self, inpPath, outPath, stripList=[]):
        """ Strip categories from inpPath and write to outPath
        """
        try:
            myDataList = []
            with open(inpPath, "r") as ifh:
                pRd = PdbxReader(ifh)
                pRd.read(myDataList)
            #
            myBlock = myDataList[0]
            myName = myBlock.getName()
            newContainer = DataContainer(myName)

            for objName in myBlock.getObjNameList():
                myObj = myBlock.getObj(objName)
                if myObj.getName() not in stripList:
                    newContainer.append(myObj)
            #
            with open(outPath, "w") as ofh:
                pWr = PdbxWriter(ofh)
                pWr.setPreferSingleQuotes()
                pWr.write([newContainer])

            return True
        except Exception as e:
            logger.exception("Failing with %s" % str(e))
            return False
Exemple #2
0
    def testSerialize(self):
        try:
            for storeStringsAsBytes in [True, False]:
                tcL = []
                ioPy = IoAdapter()
                containerList = ioPy.readFile(self.__pathTextCif)
                for container in containerList:
                    cName = container.getName()
                    tc = DataContainer(cName)
                    for catName in container.getObjNameList():
                        dObj = container.getObj(catName)
                        tObj = DataCategoryTyped(dObj, dictionaryApi=self.__dApi, copyInputData=True)
                        tc.append(tObj)
                    tcL.append(tc)
                #
                bcw = BinaryCifWriter(self.__dApi, storeStringsAsBytes=storeStringsAsBytes, applyTypes=False, useFloat64=True)
                bcw.serialize(self.__testBcifOutput, tcL)
                self.assertEqual(containerList[0], containerList[0])
                self.assertEqual(tcL[0], tcL[0])

                bcr = BinaryCifReader(storeStringsAsBytes=storeStringsAsBytes)
                cL = bcr.deserialize(self.__testBcifOutput)
                #
                ioPy = IoAdapter()
                ok = ioPy.writeFile(self.__testBcifTranslated, cL)
                self.assertTrue(ok)
                self.assertTrue(self.__same(tcL[0], cL[0]))
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemple #3
0
    def test_single_row(self, rw_data):
        myDataList = []
        curContainer = DataContainer("myblock")
        aCat = DataCategory("pdbx_seqtool_mapping_ref")
        aCat.appendAttribute("ordinal")
        aCat.appendAttribute("entity_id")
        aCat.appendAttribute("auth_mon_id")
        aCat.appendAttribute("auth_mon_num")
        aCat.appendAttribute("pdb_chain_id")
        aCat.appendAttribute("ref_mon_id")
        aCat.appendAttribute("ref_mon_num")
        aCat.appendAttribute("details")
        aCat.append([1, 2, 3, 4, 5, 6, 7, 'data_my_big_data_file'])
        aCat.append([1, 2, 3, 4, 5, 6, 7, 'loop_my_big_data_loop'])
        aCat.append([1, 2, 3, 4, 5, 6, 7, 'save_my_big_data_saveframe'])
        aCat.append([1, 2, 3, 4, 5, 6, 7, '_category.item'])

        curContainer.append(aCat)

        bCat = curContainer.getObj("pdbx_seqtool_mapping_ref")
        print("----attribute list %r\n" % bCat.getAttributeList())
        row = bCat.getRow(0)
        print("----ROW %r\n" % row)

        with open(str(rw_data['pathOutputFile2']), "w") as ofh:
            myDataList.append(curContainer)
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myDataList)

        assert len(myDataList) == 1
Exemple #4
0
 def testWriteDataFile(self):
     """Test case -  write data file"""
     try:
         #
         myDataList = []
         curContainer = DataContainer("myblock")
         aCat = DataCategory("pdbx_seqtool_mapping_ref")
         aCat.appendAttribute("ordinal")
         aCat.appendAttribute("entity_id")
         aCat.appendAttribute("auth_mon_id")
         aCat.appendAttribute("auth_mon_num")
         aCat.appendAttribute("pdb_chain_id")
         aCat.appendAttribute("ref_mon_id")
         aCat.appendAttribute("ref_mon_num")
         aCat.append([1, 2, 3, 4, 5, 6, 7])
         aCat.append([1, 2, 3, 4, 5, 6, 7])
         aCat.append([1, 2, 3, 4, 5, 6, 7])
         aCat.append([1, 2, 3, 4, 5, 6, 7])
         aCat.append([7, 6, 5, 4, 3, 2, 1])
         # aCat.printIt()
         curContainer.append(aCat)
         # curContainer.printIt()
         #
         myDataList.append(curContainer)
         with open(self.__pathOutputFile1, "w") as ofh:
             pdbxW = PdbxWriter(ofh)
             pdbxW.write(myDataList)
         self.assertEqual(len(myDataList), 1)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
Exemple #5
0
    def testUpdateDataFile(self):
        """Test case -  update data file
        """
        try:
            # Create a initial data file --
            #
            myDataList = []

            curContainer = DataContainer("myblock")
            aCat = DataCategory("pdbx_seqtool_mapping_ref")
            aCat.appendAttribute("ordinal")
            aCat.appendAttribute("entity_id")
            aCat.appendAttribute("auth_mon_id")
            aCat.appendAttribute("auth_mon_num")
            aCat.appendAttribute("pdb_chain_id")
            aCat.appendAttribute("ref_mon_id")
            aCat.appendAttribute("ref_mon_num")
            aCat.append([9, 2, 3, 4, 5, 6, 7])
            aCat.append([10, 2, 3, 4, 5, 6, 7])
            aCat.append([11, 2, 3, 4, 5, 6, 7])
            aCat.append([12, 2, 3, 4, 5, 6, 7])

            curContainer.append(aCat)
            myDataList.append(curContainer)
            ofh = open(self.__pathOutputFile1, "w")
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myDataList)
            ofh.close()
            #
            #
            # Read and update the data -
            #
            myDataList = []
            ifh = open(self.__pathOutputFile1, "r")
            pRd = PdbxReader(ifh)
            pRd.read(myDataList)
            ifh.close()
            #
            myBlock = myDataList[0]
            # myBlock.printIt()
            myCat = myBlock.getObj("pdbx_seqtool_mapping_ref")
            # myCat.printIt()
            for iRow in range(0, myCat.getRowCount()):
                myCat.setValue("some value", "ref_mon_id", iRow)
                myCat.setValue(100, "ref_mon_num", iRow)
            with open(self.__pathOutputFile2, "w") as ofh:
                pdbxW = PdbxWriter(ofh)
                pdbxW.write(myDataList)

            #
            self.assertEqual(len(myDataList), 1)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemple #6
0
    def testRowListInitialization(self):
        """Test case -  Row list initialization of a data category and data block"""
        try:
            #
            fn = self.__pathOutputFile4
            attributeNameList = [
                "aOne", "aTwo", "aThree", "aFour", "aFive", "aSix", "aSeven",
                "aEight", "aNine", "aTen"
            ]
            rowList = [
                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            ]
            nameCat = "myCategory"
            #
            #
            curContainer = DataContainer("myblock")
            aCat = DataCategory(nameCat, attributeNameList, rowList)
            # aCat.printIt()
            curContainer.append(aCat)
            # curContainer.printIt()
            #
            myContainerList = []
            myContainerList.append(curContainer)
            ofh = open(fn, "w")
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myContainerList)
            ofh.close()

            myContainerList = []
            ifh = open(fn, "r")
            pRd = PdbxReader(ifh)
            pRd.read(myContainerList)
            ifh.close()
            for container in myContainerList:
                for objName in container.getObjNameList():
                    name, aList, rList = container.getObj(objName).get()
                    logger.debug("Recovered data category  %s", name)
                    logger.debug("Attribute list           %r", repr(aList))
                    logger.debug("Row list                 %r", repr(rList))
            self.assertEqual(len(myContainerList), 1)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
    def __write_mmcif(self, pathout, coef, entry_id):
        """Writes out the specific map coefficients

        """

        # Categories that will not be copied
        _striplist = [
            'audit', 'diffrn_radiation_wavelength', 'exptl_crystal',
            'reflns_scale'
        ]

        # refln attributes to keep
        _keepattr = ['index_h', 'index_k', 'index_l', 'fom']
        if coef == 'fo':
            _keepattr.extend(['pdbx_DELFWT', 'pdbx_DELPHWT'])
        else:
            _keepattr.extend(['pdbx_FWT', 'pdbx_PHWT'])

        # Datablockname
        blkname = "{}{}".format(entry_id, coef)
        new_cont = DataContainer(blkname)

        # Only care about first block
        blockin = self.__sf[0]

        for objname in blockin.getObjNameList():
            if objname in _striplist:
                continue

            myobj = blockin.getObj(objname)

            # Make a copy of the original - as likely will need to modify
            modobj = copy.deepcopy(myobj)
            if objname == 'entry':
                modobj.setValue(entry_id, 'id', 0)
            if objname in ['cell', 'symmetry']:
                modobj.setValue(entry_id, 'entry_id', 0)
            if objname == 'refln':
                # Remove all but what we want
                # Make a copy to ensure not messed with during operation
                for attr in list(modobj.getAttributeList()):
                    if attr not in _keepattr:
                        modobj.removeAttribute(attr)

            new_cont.append(modobj)

        # new_cont.printIt()
        io = IoAdapterCore()
        # Write out a single block
        ret = io.writeFile(pathout, [new_cont])
        return ret
Exemple #8
0
    def __processContent(self, cifFileObj):
        """Internal method to transfer parsed data from the wrapped input C++ CifFile object into
        the list of Python DataContainer objects.

        Args:
            cifFileObj (wrapped CifFile object): Wrapped input C++ CifFile object

        Returns:
            list of DataContainer objects:   List of Python DataContainer objects

        """
        containerList = []
        containerNameList = []
        try:
            # ----- Repackage the data content  ----
            #
            containerList = []
            containerNameList = []
            containerNameList = list(
                cifFileObj.GetBlockNames(containerNameList))
            for containerName in containerNameList:
                #
                aContainer = DataContainer(containerName)
                #
                block = cifFileObj.GetBlock(containerName)
                tableNameList = []
                tableNameList = list(block.GetTableNames(tableNameList))

                for tableName in tableNameList:
                    table = block.GetTable(tableName)
                    attributeNameList = list(table.GetColumnNames())
                    numRows = table.GetNumRows()
                    rowList = []
                    for iRow in range(0, numRows):
                        row = table.GetRow(iRow)
                        # row = table.GetRow(iRow).decode('unicode_escape').encode('utf-8')
                        # row = [p.encode('ascii', 'xmlcharrefreplace') for p in table.GetRow(iRow)]
                        rowList.append(list(row))
                    aCategory = DataCategory(
                        tableName,
                        attributeNameList,
                        rowList,
                        copyInputData=False,
                        raiseExceptions=self._raiseExceptions)
                    aContainer.append(aCategory)
                containerList.append(aContainer)
        except Exception as e:
            msg = "Failing packaging with %s" % str(e)
            self._logError(msg)

        return containerList
Exemple #9
0
 def add_container(self, container_id):
     """
     This method provides the basic functionality to set up a container
     :param container_id: a string; an mmcif category e.g. 'emd_admin'
     :return:
     """
     added = False
     self.__container_id = container_id
     self.__container = DataContainer(container_id)
     self.__dataMap[container_id] = len(self.__dataList)
     self.__dataList.append(self.__container)
     if self.__container is not None:
         added = True
     return added
Exemple #10
0
    def __generateData(self):
        """Generates data for test. __testValues must be in sync"""
        curContainer = DataContainer("myblock")
        aCat = DataCategory("pdbx_test")
        aCat.appendAttribute("ordinal")
        aCat.appendAttribute("details")
        aCat.append([1, "data_my_big_data_file"])
        aCat.append([2, "loop_my_big_data_loop"])
        aCat.append([3, "save_my_big_data_saveframe"])
        aCat.append([4, "_category.item"])
        aCat.append([5, "Data_my_big_data_file"])
        aCat.append([6, "Loop_my_big_data_loop"])
        aCat.append([7, "Save_my_big_data_saveframe"])
        aCat.append([8, "DatA_my_big_data_file"])
        curContainer.append(aCat)

        return curContainer
Exemple #11
0
    def writeDefaultDataTypeMap(self, outPath, dataTyping="ANY"):
        """Write data file containing application default dictionary to application data type mapping

        data_rcsb_data_type_map
          loop_
          _pdbx_data_type_application_map.application_name
          _pdbx_data_type_application_map.type_code
          _pdbx_data_type_application_map.app_type_code
          _pdbx_data_type_application_map.app_precision_default
          _pdbx_data_type_application_map.app_width_default
          # .... type mapping data ...
        """
        try:
            #
            containerList = []
            curContainer = DataContainer("rcsb_data_type_map")
            aCat = DataCategory("pdbx_data_type_application_map")
            aCat.appendAttribute("application_name")
            aCat.appendAttribute("type_code")
            aCat.appendAttribute("app_type_code")
            aCat.appendAttribute("app_width_default")
            aCat.appendAttribute("app_precision_default")
            for (cifType, simpleType, defWidth, defPrecision) in zip(
                    DataTypeApplicationInfo.cifTypes,
                    DataTypeApplicationInfo.appTypes,
                    DataTypeApplicationInfo.defaultWidths,
                    DataTypeApplicationInfo.defaultPrecisions):
                if self.__isNull(cifType):
                    continue
                aCat.append(
                    [dataTyping, cifType, simpleType, defWidth, defPrecision])
            curContainer.append(aCat)
            containerList.append(curContainer)
            #
            mU = MarshalUtil(workPath=self.__workPath)
            ok = mU.doExport(outPath,
                             containerList,
                             fmt="mmcif",
                             enforceAscii=True,
                             useCharRefs=True,
                             raiseExceptions=True)

            return ok
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return False
Exemple #12
0
    def testRowDictInitialization(self):
        """Test case -  Row dictionary initialization of a data category and data block
        """
        try:
            #
            rLen = 10
            fn = self.__pathOutputFile5
            attributeNameList = ["a", "b", "c", "d"]
            rowList = [{"a": 1, "b": 2, "c": 3, "d": 4} for i in range(rLen)]
            nameCat = "myCategory"
            #
            #
            curContainer = DataContainer("myblock")
            aCat = DataCategory(nameCat, attributeNameList, rowList)
            aCat.append({"a": 1, "b": 2, "c": 3, "d": 4})
            aCat.append({"a": 1, "b": 2, "c": 3, "d": 4})
            aCat.extend(rowList)
            curContainer.append(aCat)
            aCat.renameAttributes({"a": "aa", "b": "bb", "c": "cc", "d": "dd"})
            aCat.setName("renamedCategory")
            #
            #
            myContainerList = []
            myContainerList.append(curContainer)
            ofh = open(fn, "w")
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myContainerList)
            ofh.close()

            myContainerList = []
            ifh = open(fn, "r")
            pRd = PdbxReader(ifh)
            pRd.read(myContainerList)
            ifh.close()
            for container in myContainerList:
                for objName in container.getObjNameList():
                    name, aList, rList = container.getObj(objName).get()
                    logger.debug("Recovered data category  %s", name)
                    logger.debug("Attribute list           %r", repr(aList))
                    logger.debug("Row list                 %r", repr(rList))
            self.assertEqual(len(myContainerList), 1)
            self.assertEqual(len(rList), 2 * rLen + 2)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemple #13
0
    def test_update_data_file(self, writer_paths):
        myDataList = []

        curContainer = DataContainer("myblock")
        aCat = DataCategory("pdbx_seqtool_mapping_ref")
        aCat.appendAttribute("ordinal")
        aCat.appendAttribute("entity_id")
        aCat.appendAttribute("auth_mon_id")
        aCat.appendAttribute("auth_mon_num")
        aCat.appendAttribute("pdb_chain_id")
        aCat.appendAttribute("ref_mon_id")
        aCat.appendAttribute("ref_mon_num")
        aCat.append((1, 2, 3, 4, 5, 6, 7))
        aCat.append((1, 2, 3, 4, 5, 6, 7))
        aCat.append((1, 2, 3, 4, 5, 6, 7))
        aCat.append((1, 2, 3, 4, 5, 6, 7))
        curContainer.append(aCat)
        myDataList.append(curContainer)
        with open(str(writer_paths['pathOutputFile1']), "w") as ofh:
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myDataList)
        #
        # Read and update the data -
        #
        myDataList = []
        with open(str(writer_paths['pathOutputFile1']), "r") as ifh:
            pRd = PdbxReader(ifh)
            pRd.read(myDataList)
        #
        myBlock = myDataList[0]
        # myBlock.printIt()
        myCat = myBlock.getObj('pdbx_seqtool_mapping_ref')
        # myCat.printIt()
        for iRow in range(0, myCat.getRowCount()):
            myCat.setValue('some value', 'ref_mon_id', iRow)
            myCat.setValue(100, 'ref_mon_num', iRow)
        with open(str(writer_paths['pathOutputFile2']), "w") as ofh:
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myDataList)
        assert len(myDataList) == 1
Exemple #14
0
    def test_update_data_file(self, rw_data):
        myDataList = []

        curContainer = DataContainer("myblock")
        aCat = DataCategory("pdbx_seqtool_mapping_ref")
        aCat.appendAttribute("ordinal")
        aCat.appendAttribute("entity_id")
        aCat.appendAttribute("auth_mon_id")
        aCat.appendAttribute("auth_mon_num")
        aCat.appendAttribute("pdb_chain_id")
        aCat.appendAttribute("ref_mon_id")
        aCat.appendAttribute("ref_mon_num")
        aCat.append([9, 2, 3, 4, 5, 6, 7])
        aCat.append([10, 2, 3, 4, 5, 6, 7])
        aCat.append([11, 2, 3, 4, 5, 6, 7])
        aCat.append([12, 2, 3, 4, 5, 6, 7])

        curContainer.append(aCat)
        myDataList.append(curContainer)
        ofh = open(str(rw_data['pathOutputFile1']), "w")
        pdbxW = PdbxWriter(ofh)
        pdbxW.write(myDataList)
        ofh.close()

        myDataList = []
        ifh = open(str(rw_data['pathOutputFile1']), "r")
        pRd = PdbxReader(ifh)
        pRd.read(myDataList)
        ifh.close()
        myBlock = myDataList[0]
        myCat = myBlock.getObj('pdbx_seqtool_mapping_ref')
        for iRow in range(0, myCat.getRowCount()):
            myCat.setValue('some value', 'ref_mon_id', iRow)
            myCat.setValue(100, 'ref_mon_num', iRow)

        with open(str(rw_data['pathOutputFile2']), "w") as ofh:
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myDataList)

        assert len(myDataList) == 1
Exemple #15
0
    def test_row_list_initialization(self, rw_data):
        fn = rw_data['pathOutputFile4']
        attributeNameList = ['aOne', 'aTwo', 'aThree', 'aFour', 'aFive', 'aSix', 'aSeven', 'aEight', 'aNine', 'aTen']
        rowList = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
                   ]
        nameCat = 'myCategory'

        curContainer = DataContainer("myblock")
        aCat = DataCategory(nameCat, attributeNameList, rowList)
        curContainer.append(aCat)

        myContainerList = []
        myContainerList.append(curContainer)
        ofh = open(str(fn), "w")
        pdbxW = PdbxWriter(ofh)
        pdbxW.write(myContainerList)
        ofh.close()

        myContainerList = []
        ifh = open(str(fn), "r")
        pRd = PdbxReader(ifh)
        pRd.read(myContainerList)
        ifh.close()
        for container in myContainerList:
            for objName in container.getObjNameList():
                name, aList, rList = container.getObj(objName).get()
                print("Recovered data category  %s\n" % name)
                print("Attribute list           %r\n" % repr(aList))
                print("Row list                 %r\n" % repr(rList))
        assert len(myContainerList) == 1
Exemple #16
0
    def testSingleRow(self):
        """Test case -  read /write single row and null row in data file
        """
        try:
            #
            myDataList = []
            # ofh = open(self.__pathOutputFile1, "w")
            curContainer = DataContainer("myblock")
            aCat = DataCategory("pdbx_seqtool_mapping_ref")
            aCat.appendAttribute("ordinal")
            aCat.appendAttribute("entity_id")
            aCat.appendAttribute("auth_mon_id")
            aCat.appendAttribute("auth_mon_num")
            aCat.appendAttribute("pdb_chain_id")
            aCat.appendAttribute("ref_mon_id")
            aCat.appendAttribute("ref_mon_num")
            aCat.appendAttribute("details")
            aCat.append([1, 2, 3, 4, 5, 6, 7, "data_my_big_data_file"])
            aCat.append([1, 2, 3, 4, 5, 6, 7, "loop_my_big_data_loop"])
            aCat.append([1, 2, 3, 4, 5, 6, 7, "save_my_big_data_saveframe"])
            aCat.append([1, 2, 3, 4, 5, 6, 7, "_category.item"])
            # aCat.dumpIt()
            curContainer.append(aCat)
            #
            bCat = curContainer.getObj("pdbx_seqtool_mapping_ref")
            logger.debug("----attribute list %r", bCat.getAttributeList())
            row = bCat.getRow(0)
            logger.debug("----ROW %r", row)
            #
            with open(self.__pathOutputFile2, "w") as ofh:
                myDataList.append(curContainer)
                pdbxW = PdbxWriter(ofh)
                pdbxW.write(myDataList)

            self.assertEqual(len(myDataList), 1)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemple #17
0
    def test_row_dict_initialization(self, rw_data):
        rLen = 10
        fn = rw_data['pathOutputFile5']
        attributeNameList = ['a', 'b', 'c', 'd']
        rowList = [{'a': 1, 'b': 2, 'c': 3, 'd': 4} for i in range(rLen)]
        nameCat = 'myCategory'
        #
        #
        curContainer = DataContainer("myblock")
        aCat = DataCategory(nameCat, attributeNameList, rowList)
        aCat.append({'a': 1, 'b': 2, 'c': 3, 'd': 4})
        aCat.append({'a': 1, 'b': 2, 'c': 3, 'd': 4})
        aCat.extend(rowList)
        curContainer.append(aCat)
        aCat.renameAttributes({'a': 'aa', 'b': 'bb', 'c': 'cc', 'd': 'dd'})
        aCat.setName('renamedCategory')
        #
        #
        myContainerList = []
        myContainerList.append(curContainer)
        ofh = open(str(fn), "w")
        pdbxW = PdbxWriter(ofh)
        pdbxW.write(myContainerList)
        ofh.close()

        myContainerList = []
        ifh = open(str(fn), "r")
        pRd = PdbxReader(ifh)
        pRd.read(myContainerList)
        ifh.close()
        for container in myContainerList:
            for objName in container.getObjNameList():
                name, aList, rList = container.getObj(objName).get()
                print("Recovered data category  %s\n" % name)
                print("Attribute list           %r\n" % repr(aList))
                print("Row list                 %r\n" % repr(rList))
        assert len(myContainerList) == 1
        assert len(rList) == 2 * rLen + 2
Exemple #18
0
    def test_write_data_file(self, rw_data):
        myDataList = []
        curContainer = DataContainer("myblock")
        aCat = DataCategory("pdbx_seqtool_mapping_ref")
        aCat.appendAttribute("ordinal")
        aCat.appendAttribute("entity_id")
        aCat.appendAttribute("auth_mon_id")
        aCat.appendAttribute("auth_mon_num")
        aCat.appendAttribute("pdb_chain_id")
        aCat.appendAttribute("ref_mon_id")
        aCat.appendAttribute("ref_mon_num")
        aCat.append([1, 2, 3, 4, 5, 6, 7])
        aCat.append([1, 2, 3, 4, 5, 6, 7])
        aCat.append([1, 2, 3, 4, 5, 6, 7])
        aCat.append([1, 2, 3, 4, 5, 6, 7])
        aCat.append([7, 6, 5, 4, 3, 2, 1])
        curContainer.append(aCat)

        myDataList.append(curContainer)
        with open(str(rw_data['pathOutputFile1']), "w") as ofh:
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myDataList)
        assert len(myDataList) == 1
Exemple #19
0
    def test_write_data_file(self, writer_paths):
        myDataList = []

        curContainer = DataContainer("myblock")
        aCat = DataCategory("pdbx_seqtool_mapping_ref")
        aCat.appendAttribute("ordinal")
        aCat.appendAttribute("entity_id")
        aCat.appendAttribute("auth_mon_id")
        aCat.appendAttribute("auth_mon_num")
        aCat.appendAttribute("pdb_chain_id")
        aCat.appendAttribute("ref_mon_id")
        aCat.appendAttribute("ref_mon_num")
        aCat.append(
            (1, 2, 3, 4, '55555555555555555555555555555555555555555555', 6, 7))
        aCat.append((1, 2, 3, 4, '5555', 6, 7))
        aCat.append((1, 2, 3, 4, '5555555555', 6, 7))
        aCat.append((1, 2, 3, 4, '5', 6, 7))
        curContainer.append(aCat)
        myDataList.append(curContainer)
        with open(str(writer_paths['pathOutputFile1']), "w") as ofh:
            pdbxW = PdbxWriter(ofh)
            pdbxW.setAlignmentFlag(flag=True)
            pdbxW.write(myDataList)
        assert len(myDataList) == 1
Exemple #20
0
    def __dictionaryPragma(self, dictName, dictDescription, version,
                           updateDate, comment):
        """ Add CIF dictionary header details including name, version and history.

;        Returns:
            Data container (object)  data container with dictionary history and version details
        """
        #
        dataH = DataContainer("pdbx_vrpt_ext.dic")
        dc = DataCategory("datablock", attributeNameList=["id", "description"])
        dc.append([dictName, dictDescription])
        dataH.append(dc)
        dc = DataCategory(
            "dictionary",
            attributeNameList=["title", "datablock_id", "version"])
        dc.append([dictName, dictName, version])
        dataH.append(dc)
        dc = DataCategory("dictionary_history",
                          attributeNameList=["version", "update", "revision"])
        dc.append([version, updateDate, comment])
        dataH.append(dc)
        return dataH
    def __writeModel(self, targetId, targetObj, fitFD, fitXyzMapD,
                     fitAtomUnMappedL, matchD, modelId, modelPath):
        """Write the chemical component model for the input chemical component Id and associated atom mapping and
        feature details --

            ComponentAtomDetails = namedtuple("ComponentAtomDetails", "index atNo name aType x y z fCharge")
            AlignAtomMap = namedtuple("AlignAtomMap", "refId refAtIdx refAtNo refAtName fitId fitAtIdx fitAtNo fitAtName")
            AlignAtomUnMapped = namedtuple("AlignAtomUnMapped", "fitId fitAtIdx fitAtNo fitAtType fitAtName fitAtFormalCharge x y z fitNeighbors")
        """
        try:
            unMappedTypeD = defaultdict(int)
            hAtomPrefix = "HEX"
            variantType = self.__getBuildVariant(targetId)
            #
            if not self.__testUnMappedProtonation(fitAtomUnMappedL):
                logger.info(
                    "Unmapped non-hydrogen atoms target %r model %r unMapped count (%d)",
                    targetId, modelId, len(fitAtomUnMappedL))
                return False, variantType
            # Get atom partners for the unmapped atoms
            fitAtMapD = {}
            for refAtName, fAtTup in fitXyzMapD.items():
                fitAtMapD[fAtTup.atName] = refAtName
            if fitAtomUnMappedL:
                #  Check if neighbors are all mapped
                ok = True
                for fitUnTup in fitAtomUnMappedL:
                    for nAtName in fitUnTup.fitNeighbors:
                        if nAtName not in fitAtMapD:
                            ok = False
                            logger.info(
                                "Missing mapped neighbor for %r target %r model %r",
                                nAtName, targetId, modelId)
                            break
                if not ok:
                    return False, variantType
                else:
                    logger.debug("%s match has unmapped protonation", modelId)
                    variantType = "tautomer_protomer"
            #
            #
            kList = ["xyz", "SMILES", "SMILES_STEREO", "InChI", "InChIKey"]
            for k in kList:
                if k not in fitFD:
                    logger.error(
                        "Fit feature dictionary for %s missing key %s",
                        targetId, k)
                    return False, variantType
            # ------------
            dataContainer = DataContainer(modelId)
            #
            myContainer = targetObj
            dbName = myContainer.getName()
            if dbName.upper() != targetId.upper():
                logger.info("mismatch datablock (%r) and targetId (%r)",
                            dbName, targetId)
            cObj = None
            if myContainer.exists("chem_comp"):
                cObj = myContainer.getObj("chem_comp")
            #
            #
            catName = "pdbx_chem_comp_model"
            if not dataContainer.exists(catName):
                dataContainer.append(
                    DataCategory(catName, attributeNameList=["id", "comp_id"]))
            #
            parentId = targetId.split("|")[0]
            wObj = dataContainer.getObj(catName)
            wObj.setValue(modelId, "id", 0)
            wObj.setValue(parentId, "comp_id", 0)
            #
            # --------  ---------
            catName = "pdbx_chem_comp_model_atom"
            if not dataContainer.exists(catName):
                dataContainer.append(
                    DataCategory(catName,
                                 attributeNameList=[
                                     "model_id", "atom_id", "type_symbol",
                                     "charge", "model_Cartn_x",
                                     "model_Cartn_y", "model_Cartn_z",
                                     "ordinal_id"
                                 ]))
            wObj = dataContainer.getObj(catName)
            #
            if myContainer.exists("chem_comp_atom"):
                cObj = myContainer.getObj("chem_comp_atom")
            #
            #  Only write the mapped atoms in case we are missing hydrogens in the mapping
            #
            jj = 0
            for ii in range(cObj.getRowCount()):
                atName = cObj.getValue("atom_id", ii)
                atType = cObj.getValue("type_symbol", ii)
                if atName not in fitXyzMapD:
                    unMappedTypeD[atType] += 1
                    continue
                fitXyz = fitXyzMapD[atName]
                #
                # fCharge = cObj.getValue("charge", ii)
                #
                wObj.setValue(modelId, "model_id", jj)
                wObj.setValue(atName, "atom_id", jj)
                wObj.setValue(atType, "type_symbol", jj)
                #
                wObj.setValue(fitXyz.atFormalCharge, "charge", jj)
                wObj.setValue("%.4f" % fitXyz.x, "model_Cartn_x", jj)
                wObj.setValue("%.4f" % fitXyz.y, "model_Cartn_y", jj)
                wObj.setValue("%.4f" % fitXyz.z, "model_Cartn_z", jj)
                wObj.setValue(jj + 1, "ordinal_id", jj)
                jj += 1
            #
            # Add the unmapped atoms ...
            # AlignAtomUnMapped = namedtuple("AlignAtomUnMapped", "fitId fitAtIdx fitAtNo fitAtType fitAtName fitNeighbors")
            ii = wObj.getRowCount()
            for jj, uTup in enumerate(fitAtomUnMappedL):
                refAtomName = hAtomPrefix + str(jj)
                wObj.setValue(modelId, "model_id", ii)
                wObj.setValue(refAtomName, "atom_id", ii)
                wObj.setValue(uTup.fitAtType, "type_symbol", ii)
                wObj.setValue(uTup.fitAtFormalCharge, "charge", ii)
                wObj.setValue("%.4f" % uTup.x, "model_Cartn_x", ii)
                wObj.setValue("%.4f" % uTup.y, "model_Cartn_y", ii)
                wObj.setValue("%.4f" % uTup.z, "model_Cartn_z", ii)
                wObj.setValue(ii + 1, "ordinal_id", ii)
            # --------  ---------
            catName = "pdbx_chem_comp_model_bond"
            if not dataContainer.exists(catName):
                dataContainer.append(
                    DataCategory(catName,
                                 attributeNameList=[
                                     "model_id", "atom_id_1", "atom_id_2",
                                     "value_order", "ordinal_id"
                                 ]))
            wObj = dataContainer.getObj(catName)
            #
            if myContainer.exists("chem_comp_bond"):
                cObj = myContainer.getObj("chem_comp_bond")
            #
            jj = 0
            for ii in range(cObj.getRowCount()):
                at1 = cObj.getValue("atom_id_1", ii)
                if at1 not in fitXyzMapD:
                    continue
                at2 = cObj.getValue("atom_id_2", ii)
                if at2 not in fitXyzMapD:
                    continue
                bType = cObj.getValue("value_order", ii)
                #
                wObj.setValue(modelId, "model_id", jj)
                wObj.setValue(at1, "atom_id_1", jj)
                wObj.setValue(at2, "atom_id_2", jj)
                wObj.setValue(bType, "value_order", jj)
                wObj.setValue(jj + 1, "ordinal_id", jj)
                jj += 1
            #
            ii = wObj.getRowCount()
            for jj, uTup in enumerate(fitAtomUnMappedL):
                at1 = hAtomPrefix + str(jj)
                for nAt in uTup.fitNeighbors:
                    at2 = fitAtMapD[nAt]
                    wObj.setValue(modelId, "model_id", ii)
                    wObj.setValue(at1, "atom_id_1", ii)
                    wObj.setValue(at2, "atom_id_2", ii)
                    wObj.setValue("SING", "value_order", ii)
                    wObj.setValue(ii + 1, "ordinal_id", ii)

            # --------  ---------
            catName = "pdbx_chem_comp_model_descriptor"
            if not dataContainer.exists(catName):
                dataContainer.append(
                    DataCategory(
                        catName,
                        attributeNameList=["model_id", "type", "descriptor"]))
            wObj = dataContainer.getObj(catName)
            #
            ii = 0
            wObj.setValue(modelId, "model_id", ii)
            wObj.setValue("SMILES", "type", ii)
            wObj.setValue(fitFD["SMILES"], "descriptor", ii)
            ii += 1
            wObj.setValue(modelId, "model_id", ii)
            wObj.setValue("SMILES_CANONICAL", "type", ii)
            wObj.setValue(fitFD["SMILES_STEREO"], "descriptor", ii)
            ii += 1
            wObj.setValue(modelId, "model_id", ii)
            wObj.setValue("InChI", "type", ii)
            wObj.setValue(fitFD["InChI"], "descriptor", ii)
            ii += 1
            wObj.setValue(modelId, "model_id", ii)
            wObj.setValue("InChIKey", "type", ii)
            wObj.setValue(fitFD["InChIKey"], "descriptor", ii)
            #
            # --------  ---------
            if matchD["queryId"] is not None:
                catName = "pdbx_chem_comp_model_reference"
                if not dataContainer.exists(catName):
                    dataContainer.append(
                        DataCategory(catName,
                                     attributeNameList=[
                                         "model_id", "db_name", "db_code"
                                     ]))
                wObj = dataContainer.getObj(catName)
                ii = 0
                wObj.setValue(modelId, "model_id", ii)
                wObj.setValue("COD", "db_name", ii)
                wObj.setValue(matchD["queryId"], "db_code", ii)
            #
            featureD = {}
            v = matchD["rValue"]
            vS = str(v)
            if v is not None and len(vS) > 0:
                featureD["r_factor"] = "%.3f" % float(v)
            #
            v = matchD["diffrnTemp"]
            vS = str(v)
            # remove string artifacts from temperature string ...
            if v is not None and len(vS) > 0:
                tV = vS.upper()
                try:
                    if tV.endswith("DEG.C"):
                        tV = tV.replace("AT", "")
                        tV = tV.replace("DEG.C", "")
                        tV = float(tV.strip())
                        tV = tV + 273.15
                    else:
                        tV = tV.replace("AT", "")
                        tV = tV.replace("K", "")
                        tV = float(tV.strip())
                    featureD["experiment_temperature"] = tV
                except Exception as e:
                    logger.exception(
                        "Temperature conversion fails for %s (%r) with %s",
                        modelId, vS, tV)
            #
            v = matchD["publicationDOI"]
            vS = str(v)
            if v is not None and len(vS) > 0:
                featureD["publication_doi"] = v
            #
            v = matchD["version"]
            vS = str(v)
            if v is not None and len(vS) > 0:
                featureD["cod_version"] = v
            #
            if matchD["radiationSource"] and "neutron" in matchD[
                    "radiationSource"]:
                featureD["neutron_radiation_experiment"] = True
            if matchD["hasDisorder"] in ["Y"]:
                featureD["has_disorder"] = True
            #
            if len(unMappedTypeD) == 1 and "H" in unMappedTypeD:
                logger.info("model %r heavy_atoms_only", modelId)
                featureD["heavy_atoms_only"] = True
            else:
                featureD["all_atoms_have_sites"] = True
            # --------  ---------
            catName = "pdbx_chem_comp_model_feature"
            if not dataContainer.exists(catName):
                dataContainer.append(
                    DataCategory(catName,
                                 attributeNameList=[
                                     "model_id", "feature_name",
                                     "feature_value"
                                 ]))
            wObj = dataContainer.getObj(catName)
            #
            fKeyList = [
                "experiment_temperature", "publication_doi", "r_factor",
                "csd_version"
            ]
            ii = 0
            for fKey in fKeyList:
                if fKey in featureD:
                    wObj.setValue(modelId, "model_id", ii)
                    wObj.setValue(fKey, "feature_name", ii)
                    wObj.setValue(str(featureD[fKey]), "feature_value", ii)
                    ii += 1

            #
            boolKeyList = [
                "has_disorder", "neutron_radiation_experiment",
                "heavy_atoms_only", "all_atoms_have_sites"
            ]
            for fKey in boolKeyList:
                if fKey in featureD:
                    if featureD[fKey]:
                        wObj.setValue(modelId, "model_id", ii)
                        wObj.setValue(fKey, "feature_name", ii)
                        wObj.setValue("Y", "feature_value", ii)
                        ii += 1
            #

            if variantType:
                wObj.setValue(modelId, "model_id", ii)
                wObj.setValue(variantType + "_match", "feature_name", ii)
                wObj.setValue("Y", "feature_value", ii)
                ii += 1

            # --------  ---------
            catName = "pdbx_chem_comp_model_audit"
            if not dataContainer.exists(catName):
                dataContainer.append(
                    DataCategory(
                        catName,
                        attributeNameList=["model_id", "action_type", "date"]))
            wObj = dataContainer.getObj(catName)
            #
            ii = 0
            wObj.setValue(modelId, "model_id", ii)
            wObj.setValue("Initial release", "action_type", ii)
            wObj.setValue(self.__getToday(), "date", ii)
            # wObj.setValue('RCSB', 'processing_site',  ii)
            # wObj.setValue('JDW', 'annotator', ii)
            # wObj.setValue('?', 'details', ii)
            #
            mU = MarshalUtil(workPath=self.__cachePath)
            ok = mU.doExport(modelPath, [dataContainer], fmt="mmcif")
            return ok, variantType
        except Exception as e:
            logger.exception("Failing for %r with %s", targetId, str(e))
        return False, ""
    def _createfile2(pathout):
        my_data_list = []

        cur_container = DataContainer("test")

        acat = DataCategory("new")
        acat.appendAttribute("item")
        acat.append(('1',))

        cur_container.append(acat)

        acat = DataCategory("second_category")
        acat.appendAttribute('row')
        acat.appendAttribute('rowb')
        acat.append(('1', '2'))

        cur_container.append(acat)

        acat = DataCategory("third")
        acat.appendAttribute('id')
        acat.appendAttribute('val')
        acat.append(('1', 'a'))
        acat.append(('2', 'b'))
        acat.append(('3', 'c'))

        cur_container.append(acat)

        acat = DataCategory("exptl")
        acat.appendAttribute('method')
        acat.appendAttribute('entry_id')
        acat.append(('NEW', 'something'))

        cur_container.append(acat)

        acat = DataCategory("struct")
        acat.appendAttribute('new')
        acat.appendAttribute('pdbx_descriptor')
        acat.append(('Something to add', 'Override descriptor'))

        cur_container.append(acat)

        my_data_list.append(cur_container)

        with open(pathout, "w") as ofh:
            pdbxw = PdbxWriter(ofh)
            pdbxw.setAlignmentFlag(flag=True)
            pdbxw.write(my_data_list)
Exemple #23
0
 def AddBlock(self, blockID):
     """Add Data Block"""
     self.__container = DataContainer(blockID)
     self.__blockID = blockID
     self.__dataMap[blockID] = len(self.__dataList)
     self.__dataList.append(self.__container)
Exemple #24
0
    def test_gen_ddlm(self, in_tmpdir, test_files):
        myIo = IoAdapterPy()
        containerList = myIo.readFile(
            inputFilePath=str(test_files / 'mmcif_pdbx_v5_next.dic'))
        dApi = DictionaryApi(containerList=containerList, consolidate=True)
        parentD = dApi.getParentDictionary()
        #
        oCList = []
        dDef = DataContainer('mmcif_pdbx_ddlm_auto')
        dc = DataCategory("dictionary")
        dc.appendAttribute("title")
        dc.appendAttribute("class")
        dc.appendAttribute("version")
        dc.appendAttribute("date")
        dc.appendAttribute("ddl_conformance")
        dc.appendAttribute("text")
        dc.append([
            'mmcif_pdbx_ddlm_auto', 'Instance', 'latest', '2018-03-09',
            'ddlm best effort',
            'Software converted PDBx dictionary using DDLm semantics'
        ])
        dDef.append(dc)
        oCList.append(dDef)

        catIdx = dApi.getCategoryIndex()
        for catName in sorted(catIdx.keys()):
            attNameList = catIdx[catName]
            # created definition container -
            cDef = DefinitionContainer(catName)
            oCList.append(cDef)
            #
            dc = DataCategory("definition")
            dc.appendAttribute("id")
            dc.appendAttribute("scope")
            dc.appendAttribute("class")
            dc.appendAttribute("update")
            dc.append([catName, "Category", "Loop", "2018-03-09"])
            cDef.append(dc)
            val = dApi.getCategoryDescription(category=catName)
            dc = DataCategory("description")
            dc.appendAttribute("text")
            dc.append([val])
            cDef.append(dc)
            #
            dc = DataCategory("name")
            dc.appendAttribute("category_id")
            dc.appendAttribute("object_id")

            valList = dApi.getCategoryGroupList(category=catName)
            pcg = catName
            for val in valList:
                if val != 'inclusive_group':
                    pcg = val
                    break
            dc.append([catName, pcg])
            cDef.append(dc)

            valList = dApi.getCategoryKeyList(category=catName)
            if len(valList) < 1:
                print("Missing caegory key for category %s\n" % catName)
            else:
                dc = DataCategory("category")
                dc.appendAttribute("key_id")
                kItemName = CifName.itemName(catName, "synthetic_key")
                dc.append([kItemName])
                cDef.append(dc)

                iDef = DefinitionContainer(kItemName)
                self._makeKeyItem(catName, "synthetic_key", valList, iDef)
                oCList.append(iDef)

            for attName in attNameList:
                itemName = CifName.itemName(catName, attName)
                iDef = DefinitionContainer(itemName)

                oCList.append(iDef)

                #
                dc = DataCategory("definition")
                dc.appendAttribute("id")
                dc.appendAttribute("scope")
                dc.appendAttribute("class")
                dc.appendAttribute("update")
                dc.append([itemName, "Item", "Single", "2013-08-22"])
                iDef.append(dc)
                #
                val = dApi.getDescription(category=catName, attribute=attName)
                dc = DataCategory("description")
                dc.appendAttribute("text")
                dc.append([val])
                iDef.append(dc)
                #
                dc = DataCategory("name")
                dc.appendAttribute("category_id")
                dc.appendAttribute("object_id")
                #
                if itemName in parentD:
                    dc.appendAttribute("linked_item_id")
                    dc.append([catName, attName, parentD[itemName][0]])
                else:
                    dc.append([catName, attName])
                iDef.append(dc)
                #
                #
                aliasList = dApi.getItemAliasList(category=catName,
                                                  attribute=attName)
                if len(aliasList) > 0:
                    dc = DataCategory("alias")
                    dc.appendAttribute("definition_id")
                    for alias in aliasList:
                        dc.append([alias[0]])
                    iDef.append(dc)

                enList = dApi.getEnumListAltWithDetail(category=catName,
                                                       attribute=attName)

                tC = dApi.getTypeCode(category=catName, attribute=attName)
                tcontainer = 'Single'
                purpose = 'Describe'
                source = 'Recorded'
                contents = 'Text'
                #
                if tC is None:
                    self.__lfh.write("Missing data type attribute %s\n" %
                                     attName)
                elif tC in [
                        'code', 'atcode', 'name', 'idname', 'symop', 'fax',
                        'phone', 'email', 'code30', 'ec-type'
                ]:
                    purpose = 'Encode'
                    contents = 'Text'
                    source = 'Assigned'
                elif tC in ['ucode']:
                    purpose = 'Encode'
                    contents = 'Code'
                    source = 'Assigned'
                elif tC in ['line', 'uline', 'text']:
                    purpose = 'Describe'
                    source = 'Recorded'
                    contents = 'Text'
                elif tC in ['int']:
                    purpose = 'Number'
                    source = 'Recorded'
                    contents = 'Integer'
                elif tC in ['int-range']:
                    purpose = 'Number'
                    source = 'Recorded'
                    contents = 'Range'
                elif tC in ['float']:
                    purpose = 'Measurand'
                    source = 'Recorded'
                    contents = 'Real'
                elif tC in ['float-range']:
                    purpose = 'Measurand'
                    source = 'Recorded'
                    contents = 'Range'
                elif tC.startswith('yyyy'):
                    source = 'Assigned'
                    contents = 'Date'
                    purpose = 'Describe'

                if len(enList) > 0:
                    purpose = 'State'

                dc = DataCategory("type")
                dc.appendAttribute("purpose")
                dc.appendAttribute("source")
                dc.appendAttribute("contents")
                dc.appendAttribute("container")
                dc.append([purpose, source, contents, tcontainer])
                iDef.append(dc)
                #
                if (len(enList) > 0):
                    dc = DataCategory("enumeration_set")
                    dc.appendAttribute("state")
                    dc.appendAttribute("detail")
                    for en in enList:
                        dc.append([en[0], en[1]])
                    iDef.append(dc)

                dfv = dApi.getDefaultValue(category=catName, attribute=attName)
                bvList = dApi.getBoundaryList(category=catName,
                                              attribute=attName)
                if (((dfv is not None) and (dfv not in ['?', '.']))
                        or len(bvList) > 0):
                    row = []
                    dc = DataCategory("enumeration")
                    if dfv is not None:
                        dc.appendAttribute("default")
                        row.append(dfv)
                    if len(bvList) > 0:
                        dc.appendAttribute("range")
                        mminVp = -1000000
                        mmaxVp = 10000000
                        mminV = mmaxVp
                        mmaxV = mminVp
                        for bv in bvList:
                            minV = float(bv[0]) if bv[0] != '.' else mminVp
                            maxV = float(bv[1]) if bv[1] != '.' else mmaxVp
                            mminV = min(mminV, minV)
                            mmaxV = max(mmaxV, maxV)
                        if mminV == mminVp:
                            mminV = ''
                        if mmaxV == mmaxVp:
                            mmaxV = ''
                        row.append(str(mminV) + ":" + str(mmaxV))

                    dc.append(row)
                    iDef.append(dc)

        myIo.writeFile(outputFilePath="mmcif_pdbx_ddlm_auto.dic",
                       containerList=oCList)
    def _createfile1(pathout):
        my_data_list = []

        cur_container = DataContainer("myblock")

        acat = DataCategory("pdbx_item_enumeration")
        acat.appendAttribute("name")
        acat.appendAttribute("value")
        acat.appendAttribute("detail")
        acat.append(('1', '2', '3'))

        cur_container.append(acat)

        acat = DataCategory("exptl")
        acat.appendAttribute('absorpt_coefficient_mu')
        acat.appendAttribute('entry_id')
        acat.appendAttribute('method')
        acat.appendAttribute('details')
        acat.append(('?', 'D_12345', 'X-RAY DIFFRACTION', 'some details'))

        cur_container.append(acat)

        acat = DataCategory("struct")
        acat.appendAttribute('title')
        acat.appendAttribute('pdbx_descriptor')
        acat.append(('Start title', 'Start Descriptor'))

        cur_container.append(acat)

        my_data_list.append(cur_container)

        # Second block
        cur_container = DataContainer("secondblock")

        acat = DataCategory("pdbx_item_enumeration")
        acat.appendAttribute("name")
        acat.appendAttribute("value")
        acat.appendAttribute("detail")
        acat.append(('3', '2', '1'))

        cur_container.append(acat)

        my_data_list.append(cur_container)

        with open(pathout, "w") as ofh:
            pdbxw = PdbxWriter(ofh)
            pdbxw.setAlignmentFlag(flag=True)
            pdbxw.write(my_data_list)
Exemple #26
0
class mmCIFUtil:
    """Using pdbx mmCIF utility to parse mmCIF file"""
    def __init__(self, verbose=False, log=sys.stderr, filePath=None):  # pylint: disable=unused-argument
        # self.__verbose = verbose
        self.__lfh = log
        self.__filePath = filePath
        self.__dataList = []
        self.__dataMap = {}
        self.__container = None
        self.__blockID = None
        self.__read()
        #

    def __read(self):
        if not self.__filePath:
            return
        #
        try:
            ifh = open(self.__filePath, "r")
            pRd = PdbxReader(ifh)
            pRd.read(self.__dataList)
            ifh.close()
            if self.__dataList:
                self.__container = self.__dataList[0]
                self.__blockID = self.__container.getName()
                idx = 0
                for container in self.__dataList:
                    self.__dataMap[container.getName()] = idx
                    idx += 1
                #
            #
        except Exception as e:
            self.__lfh.write("Read %s failed %s.\n" %
                             (self.__filePath, str(e)))
        #

    def GetBlockID(self):
        """Return first block ID"""
        return self.__blockID

    def GetValueAndItemByBlock(self, blockName, catName):
        """Get category values and item names"""
        dList = []
        iList = []
        if blockName not in self.__dataMap:
            return dList, iList
        #
        catObj = self.__dataList[self.__dataMap[blockName]].getObj(catName)
        if not catObj:
            return dList, iList
        #
        iList = catObj.getAttributeList()
        rowList = catObj.getRowList()
        for row in rowList:
            tD = {}
            for idxIt, itName in enumerate(iList):
                if row[idxIt] != "?" and row[idxIt] != ".":
                    tD[itName] = row[idxIt]
            #
            if tD:
                dList.append(tD)
            #
        #
        return dList, iList

    def GetValueAndItem(self, catName):
        dList, iList = self.GetValueAndItemByBlock(self.__blockID, catName)
        return dList, iList

    def GetValue(self, catName):
        """Get category values based on category name 'catName'. The results are stored
        in a list of dictionaries with item name as key
        """
        dList, _iList = self.GetValueAndItemByBlock(self.__blockID, catName)
        return dList

    def GetSingleValue(self, catName, itemName):
        """Get the first value of item name 'itemName' from 'itemName' item in 'catName' category."""
        text = ""
        dlist = self.GetValue(catName)
        if dlist:
            if itemName in dlist[0]:
                text = dlist[0][itemName]
        return text
        #

    def UpdateSingleRowValue(self, catName, itemName, row, value):
        """Update value in single row"""
        catObj = self.__container.getObj(catName)
        if catObj is None:
            return
        #
        catObj.setValue(value, itemName, row)

    def UpdateMultipleRowsValue(self, catName, itemName, value):
        """Update value in multiple rows"""
        catObj = self.__container.getObj(catName)
        if catObj is None:
            return
        #
        rowNo = catObj.getRowCount()
        for row in range(0, rowNo):
            catObj.setValue(value, itemName, row)
        #

    def AddBlock(self, blockID):
        """Add Data Block"""
        self.__container = DataContainer(blockID)
        self.__blockID = blockID
        self.__dataMap[blockID] = len(self.__dataList)
        self.__dataList.append(self.__container)

    def AddCategory(self, categoryID, items):
        """Add Category"""
        category = DataCategory(categoryID)
        for item in items:
            category.appendAttribute(item)
        #
        self.__container.append(category)

    def RemoveCategory(self, categoryID):
        return self.__container.remove(categoryID)

    def InsertData(self, categoryID, dataList):
        """"""
        catObj = self.__container.getObj(categoryID)
        if catObj is None:
            return
        #
        for data in dataList:
            catObj.append(data)
        #

    def WriteCif(self, outputFilePath=None):
        """Write out cif file"""
        if not outputFilePath:
            return
        #
        ofh = open(outputFilePath, "w")
        pdbxW = PdbxWriter(ofh)
        pdbxW.write(self.__dataList)
        ofh.close()

    def GetCategories(self):
        return self.__container.getObjNameList()

    def GetAttributes(self, category):
        return self.__container.getObj(category).getAttributeList()

    def category_as_dict(self, category, block=None):
        if block is None:
            block = self.__blockID
        values, attributes = self.GetValueAndItemByBlock(block, category)
        data = [[x[y] if y in x else None for y in attributes] for x in values]
        return {category: {"Items": attributes, "Values": data}}

    def block_as_dict(self, block=None):
        if block is None:
            block = self.__blockID
        data = {}
        for category in self.GetCategories():
            data.update(self.category_as_dict(category, block=block))
        return data
Exemple #27
0
    def __buildCif(self, rD, containerName="vrpt"):
        """ Construct a mmCIF data category objects for the input
            extracted data.

        Args:
            rD (dict): extracted data organized by category.
            containerName (str) : data block name

        Returns:
            containers (list):  data container list
        """
        #

        curContainer = DataContainer(containerName)
        for elName in rD:
            catName = elName
            if (not rD[elName]) or (not self.__attribD[catName]) or (
                    catName in ["programs"]):
                continue
            hasOrdinal = "ordinal" in self.__attribD[catName]
            rowList = rD[elName]
            # Find the unique attribute content across the rowlist and the ordinal value
            atS = set()
            for ii, rowD in enumerate(rowList, 1):
                if hasOrdinal:
                    rowD["ordinal"] = ii
                if "icode" in rowD:
                    rowD["icode"] = str(rowD["icode"]).strip()
                if "altcode" in rowD:
                    rowD["altcode"] = str(rowD["altcode"]).strip()
                atS.update(rowD.keys())
            attributeNameList = list(atS)
            #
            # Set a reasonable order for these attributes
            #
            sD = {ky: self.__atOrdD[ky] for ky in attributeNameList}
            srtAtL = [
                tup[0]
                for tup in sorted(sD.items(), key=operator.itemgetter(1))
            ]
            logger.debug("Category %s sorted attributes %r", catName, srtAtL)

            aCat = DataCategory(catName, srtAtL, rowList)
            curContainer.append(aCat)
        #
        # Adjust schema names -
        #
        atD = self.__dictionaryMap["attributes"]
        for catName in curContainer.getObjNameList():
            catObj = curContainer.getObj(catName)
            atNameList = catObj.getAttributeList()
            mapD = {}
            mapCatName = self.__dictionaryMap["categories"][
                catName] if catName in self.__dictionaryMap[
                    "categories"] else catName
            for atName in atNameList:
                mapD[atName] = atD[(catName, atName)]["at"] if (
                    catName, atName) in atD else atName
            catObj.renameAttributes(mapD)
            catObj.setName(mapCatName)
        #
        # Map provenance items from programs.properties -
        #
        catObj = curContainer.getObj("program")
        if catObj and catObj.hasAttribute("properties"):
            for iRow in range(catObj.getRowCount()):
                pV = catObj.getValue("properties", iRow)
                pVL = [v.strip() for v in pV.split(",")]
                nL = [
                    self.__atMap[ky] if ky in self.__atMap else ky
                    for ky in pVL
                ]
                catObj.setValue(",".join(nL), "properties", iRow)
                # logger.info("Row %r properties %r" % (iRow, pV))
        #
        return [curContainer]
Exemple #28
0
class CIF(object):
    """
    This class uses the mmcif library to create an mmCIF-like object
    Each object has one container, with a container ID and a list of DataCategory objects
    """
    DUMMY_CONTAINER_ID = "emd_0000"

    def __init__(self, cif_name_name):
        self.filename = cif_name_name
        # self.__dataList needed for PDBxWriter
        self.__dataList = []
        self.__container_id = None
        self.__container = None
        self.__dataMap = {}

    def write(self):
        """
        Given a file name, a pdbx writer is used to write data stored in self.__dataList
        :return written: a boolean; True when pdf writer is finished
        """
        written = False
        if self.filename:
            ofh = open(self.filename, "w")
            pdbx_writer = PdbxWriter(ofh)
            pdbx_writer.write(self.__dataList)
            ofh.close()
            written = True

        return written

    def add_container(self, container_id):
        """
        This method provides the basic functionality to set up a container
        :param container_id: a string; an mmcif category e.g. 'emd_admin'
        :return:
        """
        added = False
        self.__container_id = container_id
        self.__container = DataContainer(container_id)
        self.__dataMap[container_id] = len(self.__dataList)
        self.__dataList.append(self.__container)
        if self.__container is not None:
            added = True
        return added

    def prepare_container(self, container_id):
        """
        Creates a container is it doesn't exist using either provided value or the dummy value
        :param container_id: a string; an mmcif category e.g. 'emd_admin'
        :return:
        """
        if not self.__container:
            if container_id is None:
                container_id = self.DUMMY_CONTAINER_ID

            return self.add_container(container_id)

    def add_category(self, category_id, items):
        """
        This method creates a data category object, adds all items to it and appends it to the container
        :param category_id: a string; an mmcif category e.g. 'emd_admin'
        :param items: a list of strings; each element in the list is an item of mmcif category as defined by category_id
        :return: a list of strings; each element represents a value for the corresponding element in data_items
        """
        category = DataCategory(category_id)
        for item in items:
            category.appendAttribute(item)
        self.__container.append(category)

    #
    # def update_single_row_value(self, category_id, item_name, row, value):
    #     """Update value in single row
    #     """
    #     catObj = self.__container.getObj(category_id)
    #     if catObj is None:
    #         return
    #     #
    #     catObj.setValue(value, item_name, row)
    #
    # def update_multiple_rows_value(self, category_id, item_name, value):
    #     """Update value in multiple rows
    #     """
    #     cat_obj = self.__container.getObj(category_id)
    #     if cat_obj is None:
    #         return
    #     #
    #     row_no = cat_obj.getRowCount()
    #     for row in range(0, row_no):
    #         cat_obj.setValue(value, item_name, row)

    def insert_data(self, category_id, data_list):
        """
        This method appends the data in data_list to the container labeled category_id
        :param category_id: a string; an mmcif category e.g. 'emd_admin'
        :param data_list:
        :return:
        """
        cat_obj = self.__container.getObj(category_id)
        if cat_obj is None:
            return
        if any(isinstance(el, list) for el in data_list):
            # print(data_list)
            for data_ord in data_list[0]:
                new_list = []
                ord_index = data_list[0].index(data_ord)
                new_list.append(ord_index)
                new_list.append(data_list[1][ord_index])
                # print(new_list)
                cat_obj.append(new_list)
        else:
            cat_obj.append(data_list)

    def insert_data_into_category(self, category_id, data_items, data_list):
        """
        Helper method: calls two other methods, one to add a category and its items into a container and
        another to insert the data for the category items
        :param category_id: a string; an mmcif category e.g. 'emd_admin'
        :param data_items: a list of strings; each element in the list is an item of mmcif category as defined by category_id
        :param data_list: a list of strings; each element represents a value for the corresponding element in data_items
        :return:
        """
        # print('INSERT DATA INTO CATEGORY:', category_id, data_items, data_list)
        self.add_category(category_id, data_items)
        self.insert_data(category_id, data_list)
    def testGenDDLm(self):
        """Generating alternative DDLm metadata format. (starting point)"""
        try:
            myIo = IoAdapterPy(self.__verbose, self.__lfh)
            self.__containerList = myIo.readFile(
                inputFilePath=self.__pathPdbxDictionary)
            dApi = DictionaryApi(containerList=self.__containerList,
                                 consolidate=True,
                                 verbose=self.__verbose)
            parentD = dApi.getParentDictionary()
            #
            oCList = []
            dDef = DataContainer("mmcif_pdbx_ddlm_auto")
            dc = DataCategory("dictionary")
            dc.appendAttribute("title")
            dc.appendAttribute("class")
            dc.appendAttribute("version")
            dc.appendAttribute("date")
            dc.appendAttribute("ddl_conformance")
            dc.appendAttribute("text")
            dc.append([
                "mmcif_pdbx_ddlm_auto", "Instance", "latest", "2018-03-09",
                "ddlm best effort",
                "Software converted PDBx dictionary using DDLm semantics"
            ])
            dDef.append(dc)
            oCList.append(dDef)

            catIdx = dApi.getCategoryIndex()
            for catName in sorted(catIdx.keys()):
                attNameList = catIdx[catName]
                # created definition container -
                cDef = DefinitionContainer(catName)
                oCList.append(cDef)
                #
                dc = DataCategory("definition")
                dc.appendAttribute("id")
                dc.appendAttribute("scope")
                dc.appendAttribute("class")
                dc.appendAttribute("update")
                dc.append([catName, "Category", "Loop", "2018-03-09"])
                cDef.append(dc)
                val = dApi.getCategoryDescription(category=catName)
                dc = DataCategory("description")
                dc.appendAttribute("text")
                dc.append([val])
                cDef.append(dc)
                #
                dc = DataCategory("name")
                dc.appendAttribute("category_id")
                dc.appendAttribute("object_id")

                valList = dApi.getCategoryGroupList(category=catName)
                pcg = catName
                for val in valList:
                    if val != "inclusive_group":
                        pcg = val
                        break
                dc.append([catName, pcg])
                cDef.append(dc)

                valList = dApi.getCategoryKeyList(category=catName)
                if not valList:
                    self.__lfh.write("Missing caegory key for category %s\n" %
                                     catName)
                else:
                    dc = DataCategory("category")
                    dc.appendAttribute("key_id")
                    kItemName = CifName.itemName(catName, "synthetic_key")
                    dc.append([kItemName])
                    cDef.append(dc)

                    iDef = DefinitionContainer(kItemName)
                    self.__makeKeyItem(catName, "synthetic_key", valList, iDef)
                    oCList.append(iDef)

                for attName in attNameList:
                    itemName = CifName.itemName(catName, attName)
                    iDef = DefinitionContainer(itemName)

                    oCList.append(iDef)

                    #
                    dc = DataCategory("definition")
                    dc.appendAttribute("id")
                    dc.appendAttribute("scope")
                    dc.appendAttribute("class")
                    dc.appendAttribute("update")
                    dc.append([itemName, "Item", "Single", "2013-08-22"])
                    iDef.append(dc)
                    #
                    val = dApi.getDescription(category=catName,
                                              attribute=attName)
                    dc = DataCategory("description")
                    dc.appendAttribute("text")
                    dc.append([val])
                    iDef.append(dc)
                    #
                    dc = DataCategory("name")
                    dc.appendAttribute("category_id")
                    dc.appendAttribute("object_id")
                    #
                    if itemName in parentD:
                        dc.appendAttribute("linked_item_id")
                        dc.append([catName, attName, parentD[itemName][0]])
                    else:
                        dc.append([catName, attName])
                    iDef.append(dc)
                    #
                    #
                    aliasList = dApi.getItemAliasList(category=catName,
                                                      attribute=attName)
                    if aliasList:
                        dc = DataCategory("alias")
                        dc.appendAttribute("definition_id")
                        for alias in aliasList:
                            dc.append([alias[0]])
                        iDef.append(dc)

                    enList = dApi.getEnumListAltWithDetail(category=catName,
                                                           attribute=attName)

                    tC = dApi.getTypeCode(category=catName, attribute=attName)
                    tcontainer = "Single"
                    purpose = "Describe"
                    source = "Recorded"
                    contents = "Text"
                    #
                    if tC is None:
                        self.__lfh.write("Missing data type attribute %s\n" %
                                         attName)
                    elif tC in [
                            "code", "atcode", "name", "idname", "symop", "fax",
                            "phone", "email", "code30", "ec-type"
                    ]:
                        purpose = "Encode"
                        contents = "Text"
                        source = "Assigned"
                    elif tC in ["ucode"]:
                        purpose = "Encode"
                        contents = "Code"
                        source = "Assigned"
                    elif tC in ["line", "uline", "text"]:
                        purpose = "Describe"
                        source = "Recorded"
                        contents = "Text"
                    elif tC in ["int"]:
                        purpose = "Number"
                        source = "Recorded"
                        contents = "Integer"
                    elif tC in ["int-range"]:
                        purpose = "Number"
                        source = "Recorded"
                        contents = "Range"
                    elif tC in ["float"]:
                        purpose = "Measurand"
                        source = "Recorded"
                        contents = "Real"
                    elif tC in ["float-range"]:
                        purpose = "Measurand"
                        source = "Recorded"
                        contents = "Range"
                    elif tC.startswith("yyyy"):
                        source = "Assigned"
                        contents = "Date"
                        purpose = "Describe"

                    if enList:
                        purpose = "State"

                    dc = DataCategory("type")
                    dc.appendAttribute("purpose")
                    dc.appendAttribute("source")
                    dc.appendAttribute("contents")
                    dc.appendAttribute("container")
                    dc.append([purpose, source, contents, tcontainer])
                    iDef.append(dc)
                    #
                    if enList:
                        dc = DataCategory("enumeration_set")
                        dc.appendAttribute("state")
                        dc.appendAttribute("detail")
                        for en in enList:
                            dc.append([en[0], en[1]])
                        iDef.append(dc)

                    dfv = dApi.getDefaultValue(category=catName,
                                               attribute=attName)
                    bvList = dApi.getBoundaryList(category=catName,
                                                  attribute=attName)
                    if ((dfv is not None) and
                        (dfv not in ["?", "."])) or bvList:
                        row = []
                        dc = DataCategory("enumeration")
                        if dfv is not None:
                            dc.appendAttribute("default")
                            row.append(dfv)
                        if bvList:
                            dc.appendAttribute("range")
                            mminVp = -1000000
                            mmaxVp = 10000000
                            mminV = mmaxVp
                            mmaxV = mminVp
                            for bv in bvList:
                                minV = float(bv[0]) if bv[0] != "." else mminVp
                                maxV = float(bv[1]) if bv[1] != "." else mmaxVp
                                mminV = min(mminV, minV)
                                mmaxV = max(mmaxV, maxV)
                            if mminV == mminVp:
                                mminV = ""
                            if mmaxV == mmaxVp:
                                mmaxV = ""
                            row.append(str(mminV) + ":" + str(mmaxV))

                        dc.append(row)
                        iDef.append(dc)

            myIo.writeFile(outputFilePath=os.path.join(
                HERE, "test-output", "mmcif_pdbx_ddlm_auto.dic"),
                           containerList=oCList)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemple #30
0
 def __deserialize(self, fh, storeStringsAsBytes=False):
     cL = []
     try:
         dec = BinaryCifDecoders(storeStringsAsBytes=storeStringsAsBytes)
         bD = msgpack.unpack(fh)
         #
         logger.debug("bD.keys() %r", bD.keys())
         logger.debug("bD['dataBlocks'] %s",
                      bD[self.__toBytes("dataBlocks")])
         #
         for dataBlock in bD[self.__toBytes("dataBlocks")]:
             header = self.__fromBytes(
                 dataBlock[self.__toBytes("header")]) if self.__toBytes(
                     "header") in dataBlock else None
             logger.debug("header %r", header)
             logger.debug("dataBlock %r", dataBlock)
             #
             dc = DataContainer(header)
             categoryList = dataBlock[self.__toBytes(
                 "categories")] if self.__toBytes(
                     "categories") in dataBlock else []
             for category in categoryList:
                 catName = self.__fromBytes(
                     category[self.__toBytes("name")])[1:]
                 colList = category[self.__toBytes("columns")]
                 logger.debug("catName %r columns %r", catName, colList)
                 colD = OrderedDict()
                 atNameList = []
                 for col in colList:
                     logger.debug("col.keys() %r", col.keys())
                     atName = self.__fromBytes(col[self.__toBytes("name")])
                     atData = col[self.__toBytes("data")]
                     logger.debug("atData encoding (%d) data (%d)",
                                  len(atData[self.__toBytes("encoding")]),
                                  len(atData[self.__toBytes("data")]))
                     atMask = col[self.__toBytes("mask")]
                     logger.debug("catName %r atName %r", catName, atName)
                     logger.debug(" >atData.data    %r",
                                  atData[self.__toBytes("data")])
                     logger.debug(" >atData.encoding (%d) %r",
                                  len(atData[self.__toBytes("encoding")]),
                                  atData[self.__toBytes("encoding")])
                     logger.debug(" >mask %r", atMask)
                     tVal = dec.decode(
                         col[self.__toBytes("data")][self.__toBytes(
                             "data")], col[self.__toBytes("data")][
                                 self.__toBytes("encoding")])
                     if col[self.__toBytes("mask")]:
                         mVal = dec.decode(
                             col[self.__toBytes("mask")][self.__toBytes(
                                 "data")], col[self.__toBytes("mask")][
                                     self.__toBytes("encoding")])
                         tVal = [
                             "?" if m == 2 else "." if m == 1 else d
                             for d, m in zip(tVal, mVal)
                         ]
                     colD[atName] = tVal
                     atNameList.append(atName)
                 #
                 cObj = DataCategory(catName, attributeNameList=atNameList)
                 genL = [colGen for colGen in colD.values()]
                 for row in zip(*genL):
                     logger.debug("row %r", row)
                     cObj.append(row)
                 #
                 dc.append(cObj)
             cL.append(dc)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
     return cL