def __buildCif(self, rD, containerName="vrpt"): """ Construct a mmCIF data category objects for the input extracted data. Args: rD (dict): extracted data organized by category. containerName (str) : data block name Returns: containers (list): data container list """ # curContainer = DataContainer(containerName) for elName in rD: catName = elName if (not rD[elName]) or (not self.__attribD[catName]) or ( catName in ["programs"]): continue hasOrdinal = "ordinal" in self.__attribD[catName] rowList = rD[elName] # Find the unique attribute content across the rowlist and the ordinal value atS = set() for ii, rowD in enumerate(rowList, 1): if hasOrdinal: rowD["ordinal"] = ii if "icode" in rowD: rowD["icode"] = str(rowD["icode"]).strip() if "altcode" in rowD: rowD["altcode"] = str(rowD["altcode"]).strip() atS.update(rowD.keys()) attributeNameList = list(atS) # # Set a reasonable order for these attributes # sD = {ky: self.__atOrdD[ky] for ky in attributeNameList} srtAtL = [ tup[0] for tup in sorted(sD.items(), key=operator.itemgetter(1)) ] logger.debug("Category %s sorted attributes %r", catName, srtAtL) aCat = DataCategory(catName, srtAtL, rowList) curContainer.append(aCat) # # Adjust schema names - # atD = self.__dictionaryMap["attributes"] for catName in curContainer.getObjNameList(): catObj = curContainer.getObj(catName) atNameList = catObj.getAttributeList() mapD = {} mapCatName = self.__dictionaryMap["categories"][ catName] if catName in self.__dictionaryMap[ "categories"] else catName for atName in atNameList: mapD[atName] = atD[(catName, atName)]["at"] if ( catName, atName) in atD else atName catObj.renameAttributes(mapD) catObj.setName(mapCatName) # # Map provenance items from programs.properties - # catObj = curContainer.getObj("program") if catObj and catObj.hasAttribute("properties"): for iRow in range(catObj.getRowCount()): pV = catObj.getValue("properties", iRow) pVL = [v.strip() for v in pV.split(",")] nL = [ self.__atMap[ky] if ky in self.__atMap else ky for ky in pVL ] catObj.setValue(",".join(nL), "properties", iRow) # logger.info("Row %r properties %r" % (iRow, pV)) # return [curContainer]
class mmCIFUtil: """Using pdbx mmCIF utility to parse mmCIF file""" def __init__(self, verbose=False, log=sys.stderr, filePath=None): # pylint: disable=unused-argument # self.__verbose = verbose self.__lfh = log self.__filePath = filePath self.__dataList = [] self.__dataMap = {} self.__container = None self.__blockID = None self.__read() # def __read(self): if not self.__filePath: return # try: ifh = open(self.__filePath, "r") pRd = PdbxReader(ifh) pRd.read(self.__dataList) ifh.close() if self.__dataList: self.__container = self.__dataList[0] self.__blockID = self.__container.getName() idx = 0 for container in self.__dataList: self.__dataMap[container.getName()] = idx idx += 1 # # except Exception as e: self.__lfh.write("Read %s failed %s.\n" % (self.__filePath, str(e))) # def GetBlockID(self): """Return first block ID""" return self.__blockID def GetValueAndItemByBlock(self, blockName, catName): """Get category values and item names""" dList = [] iList = [] if blockName not in self.__dataMap: return dList, iList # catObj = self.__dataList[self.__dataMap[blockName]].getObj(catName) if not catObj: return dList, iList # iList = catObj.getAttributeList() rowList = catObj.getRowList() for row in rowList: tD = {} for idxIt, itName in enumerate(iList): if row[idxIt] != "?" and row[idxIt] != ".": tD[itName] = row[idxIt] # if tD: dList.append(tD) # # return dList, iList def GetValueAndItem(self, catName): dList, iList = self.GetValueAndItemByBlock(self.__blockID, catName) return dList, iList def GetValue(self, catName): """Get category values based on category name 'catName'. The results are stored in a list of dictionaries with item name as key """ dList, _iList = self.GetValueAndItemByBlock(self.__blockID, catName) return dList def GetSingleValue(self, catName, itemName): """Get the first value of item name 'itemName' from 'itemName' item in 'catName' category.""" text = "" dlist = self.GetValue(catName) if dlist: if itemName in dlist[0]: text = dlist[0][itemName] return text # def UpdateSingleRowValue(self, catName, itemName, row, value): """Update value in single row""" catObj = self.__container.getObj(catName) if catObj is None: return # catObj.setValue(value, itemName, row) def UpdateMultipleRowsValue(self, catName, itemName, value): """Update value in multiple rows""" catObj = self.__container.getObj(catName) if catObj is None: return # rowNo = catObj.getRowCount() for row in range(0, rowNo): catObj.setValue(value, itemName, row) # def AddBlock(self, blockID): """Add Data Block""" self.__container = DataContainer(blockID) self.__blockID = blockID self.__dataMap[blockID] = len(self.__dataList) self.__dataList.append(self.__container) def AddCategory(self, categoryID, items): """Add Category""" category = DataCategory(categoryID) for item in items: category.appendAttribute(item) # self.__container.append(category) def RemoveCategory(self, categoryID): return self.__container.remove(categoryID) def InsertData(self, categoryID, dataList): """""" catObj = self.__container.getObj(categoryID) if catObj is None: return # for data in dataList: catObj.append(data) # def WriteCif(self, outputFilePath=None): """Write out cif file""" if not outputFilePath: return # ofh = open(outputFilePath, "w") pdbxW = PdbxWriter(ofh) pdbxW.write(self.__dataList) ofh.close() def GetCategories(self): return self.__container.getObjNameList() def GetAttributes(self, category): return self.__container.getObj(category).getAttributeList() def category_as_dict(self, category, block=None): if block is None: block = self.__blockID values, attributes = self.GetValueAndItemByBlock(block, category) data = [[x[y] if y in x else None for y in attributes] for x in values] return {category: {"Items": attributes, "Values": data}} def block_as_dict(self, block=None): if block is None: block = self.__blockID data = {} for category in self.GetCategories(): data.update(self.category_as_dict(category, block=block)) return data