def __getParentCategories(self, itemNameList): parentCategories = set() for itemName in itemNameList: categoryName = CifName.categoryPart(itemName) attributeName = CifName.attributePart(itemName) parentItemList = self.__dApi.getFullParentList(categoryName, attributeName) for parentItem in parentItemList: parentCategoryName = CifName.categoryPart(parentItem) parentCategories.add(parentCategoryName) return list(parentCategories)
def __getChildCategories(self, itemNameList): childCategories = set() for itemName in itemNameList: categoryName = CifName.categoryPart(itemName) attributeName = CifName.attributePart(itemName) childItemList = self.__dApi.getFullChildList(categoryName, attributeName) for childItem in childItemList: childCategoryName = CifName.categoryPart(childItem) childCategories.add(childCategoryName) return list(childCategories)
def __assignItemIconType(self, itemNameList): iconTypeList = [] # categoryName = CifName.categoryPart(itemNameList[0]) keyItemNameList = self.__dApi.getCategoryKeyList(categoryName) for itemName in itemNameList: tType = "none" attributeName = CifName.attributePart(itemName) aMan = self.__dApi.getMandatoryCode(categoryName, attributeName) in ["yes", "y"] dMan = self.__dApi.getMandatoryCodeAlt(categoryName, attributeName, fallBack=False) in ["yes", "y"] inArchive = self.__getItemCount(itemName, deliveryType="archive") > 0 inChemDict = self.__getItemCount(itemName, deliveryType="cc") > 0 inBirdDict = self.__getItemCount(itemName, deliveryType="prd") > 0 or self.__getItemCount(itemName, deliveryType="family") > 0 isKey = itemName in keyItemNameList if isKey: tType = "key" elif aMan and dMan: tType = "all-mandatory" elif aMan: tType = "mandatory" elif dMan: tType = "deposit-mandatory" if inArchive: tType += "+database" if inChemDict: tType += "+chem-dict" if inBirdDict: tType += "+bird-dict" iconTypeList.append(tType) return iconTypeList
def test_consolidate_dictionary(self, api_paths): myIo = IoAdapter(raiseExceptions=True) containerList = myIo.readFile( inputFilePath=str(api_paths['pathPdbxDictionary'])) dApi = DictionaryApi(containerList=containerList, consolidate=True) for itemName in [ '_entity.id', '_entity_poly_seq.num', '_atom_site.label_asym_id', '_struct_asym.id', '_chem_comp.id', 'chem_comp_atom.comp_id', 'chem_comp_bond.comp_id' ]: categoryName = CifName.categoryPart(itemName) attributeName = CifName.attributePart(itemName) print("Full parent list for %s : %s\n" % (itemName, dApi.getFullParentList(categoryName, attributeName))) print( "Full child list for %s : %s\n" % (itemName, dApi.getFullChildList(categoryName, attributeName))) print("Ultimate parent for %s : %s\n" % (itemName, dApi.getUltimateParent(categoryName, attributeName))) print("Type code for %s : %s\n" % (itemName, dApi.getTypeCode(categoryName, attributeName))) assert dApi.getTypeCode(categoryName, attributeName) is not None
def __itemNameToDictList(self, itemNameList): rL = [] for itemName in list(OrderedDict.fromkeys(itemNameList)): atName = CifName.attributePart(itemName) catName = CifName.categoryPart(itemName) rL.append({"CATEGORY": catName, "ATTRIBUTE": atName}) return rL
def testGetUcode(self): """Test case - Get all data items of type ucode """ print("\n") try: myIo = IoAdapter(raiseExceptions=True) self.__containerList = myIo.readFile( inputFilePath=self.__pathPdbxDictionary) dApi = DictionaryApi(containerList=self.__containerList, consolidate=True, verbose=self.__verbose) logger.debug( "+++++++++++++++++++++++++++++++++++++++++++++++++++++++\n") catNameList = dApi.getCategoryList() for catName in catNameList: itemNameList = dApi.getItemNameList(catName) for itemName in itemNameList: categoryName = CifName.categoryPart(itemName) attributeName = CifName.attributePart(itemName) code = dApi.getTypeCode(categoryName, attributeName) if (code == "ucode"): print("Table: ", categoryName, "\tColumn: ", attributeName, "\tType: ", code) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testConsolidateDictionary(self): """Test case - dump methods for dictionary metadata""" try: myIo = IoAdapter(raiseExceptions=True) self.__containerList = myIo.readFile(inputFilePath=self.__pathPdbxDictionary) dApi = DictionaryApi(containerList=self.__containerList, consolidate=True, expandItemLinked=False, verbose=self.__verbose) for itemName in [ "_entity.id", "_entity_poly_seq.num", "_atom_site.label_asym_id", "_struct_asym.id", "_chem_comp.id", "chem_comp_atom.comp_id", "chem_comp_bond.comp_id", ]: categoryName = CifName.categoryPart(itemName) attributeName = CifName.attributePart(itemName) logger.debug("Full parent list for %s : %s\n", itemName, dApi.getFullParentList(categoryName, attributeName)) logger.debug("Full child list for %s : %s\n", itemName, dApi.getFullChildList(categoryName, attributeName)) logger.debug("Ultimate parent for %s : %s\n", itemName, dApi.getUltimateParent(categoryName, attributeName)) logger.debug("Type code for %s : %s\n", itemName, dApi.getTypeCode(categoryName, attributeName)) self.assertIsNotNone(dApi.getTypeCode(categoryName, attributeName)) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def __substituteItemPrefix(self, itemName, curPrefix, newPrefix): atName = CifName.attributePart(itemName) atName = atName.replace( curPrefix, newPrefix, 1) if atName and atName.startswith(curPrefix) else atName catName = CifName.categoryPart(itemName) catName = catName.replace( curPrefix, newPrefix, 1) if atName and catName.startswith(curPrefix) else catName return CifName.itemName(catName, atName)
def __getIncludeInstructions(self, containerList): """Extract include instructions from categories pdbx_include_dictionary, pdbx_include_category, and pdbx_include_item. Returns: (dict): {"dictionaryIncludeDict": {dictionary_id: {...include details...}}, "categoryIncludeDict": {dictionary_id: {category_id: {...include details... }}}, "itemIncludeDict": {dictionary_id: {category_id: {itemName: {...include details...}}}} } """ includeD = OrderedDict() try: unNamed = 1 for container in containerList: if container.getType() == "data": dictionaryIncludeDict = OrderedDict() categoryIncludeDict = OrderedDict() itemIncludeDict = OrderedDict() if container.getName(): datablockName = container.getName() else: datablockName = str(unNamed) unNamed += 1 logger.debug("Adding data sections from container name %s type %s", datablockName, container.getType()) tl = container.getObj("pdbx_include_dictionary") if tl is not None: for row in tl.getRowList(): tD = OrderedDict() for atName in ["dictionary_id", "dictionary_locator", "include_mode", "dictionary_namespace_prefix", "dictionary_namespace_prefix_replace"]: tD[atName] = row[tl.getIndex(atName)] if tl.hasAttribute(atName) else None dictionaryIncludeDict[tD["dictionary_id"]] = tD # tl = container.getObj("pdbx_include_category") if tl is not None: for row in tl.getRowList(): tD = OrderedDict() for atName in ["dictionary_id", "category_id", "include_as_category_id", "include_mode"]: tD[atName] = row[tl.getIndex(atName)] if tl.hasAttribute(atName) else None categoryIncludeDict.setdefault(tD["dictionary_id"], {}).setdefault(tD["category_id"], tD) # tl = container.getObj("pdbx_include_item") if tl is not None: for row in tl.getRowList(): tD = OrderedDict() for atName in ["dictionary_id", "item_name", "include_as_item_name", "include_mode"]: tD[atName] = row[tl.getIndex(atName)] if tl.hasAttribute(atName) else None categoryId = CifName.categoryPart(tD["item_name"]) itemIncludeDict.setdefault(tD["dictionary_id"], {}).setdefault(categoryId, {}).setdefault(tD["item_name"], tD) includeD[datablockName] = { "dictionaryIncludeDict": dictionaryIncludeDict, "categoryIncludeDict": categoryIncludeDict, "itemIncludeDict": itemIncludeDict, } except Exception as e: logger.exception("Include processing failing with %s", str(e)) return includeD
def setItemCounts(self, itemNameD, deliveryType="archive"): for itemName, itemCount in itemNameD.items(): self.__itemCounts[deliveryType][itemName] = itemCount categoryName = CifName.categoryPart(itemName) if categoryName not in self.__categoryCounts[deliveryType]: self.__categoryCounts[deliveryType][categoryName] = itemCount else: self.__categoryCounts[deliveryType][categoryName] = max(itemCount, self.__categoryCounts[deliveryType][categoryName]) logger.debug("+NeighborFigures.setItemCounts() items in archive count %d", len(self.__itemCounts[deliveryType])) logger.debug("+NeighborFigures.setItemCounts() categories in archive count %d", len(self.__categoryCounts[deliveryType]))
def __getRelativesAdjacent(self, itemNameList): aR = {} for itemName in itemNameList: categoryName = CifName.categoryPart(itemName) attributeName = CifName.attributePart(itemName) tD = {} tD["parentItems"] = self.__dApi.getFullParentList(categoryName, attributeName) tD["childItems"] = self.__dApi.getFullChildList(categoryName, attributeName) aR[itemName] = tD # if (self.__verbose): # for k,v in aR.items(): # logger.debug("Item %s\n parents: %s\n children %s\n\n" % (k,v['parentItems'],v['childItems'])) return aR
def __getRelatedList(self, categoryName, adjacentD): # relatedSet = set() for itemName, adjD in adjacentD.items(): cName = CifName.categoryPart(itemName) if len(adjD["parentItems"]) > 0: if cName == categoryName: relatedSet.add(itemName) for parentItemName in adjD["parentItems"]: pName = CifName.categoryPart(parentItemName) if pName == categoryName: relatedSet.add(parentItemName) if len(adjD["childItems"]) > 0: if cName == categoryName: relatedSet.add(itemName) for childItemName in adjD["childItems"]: chName = CifName.categoryPart(childItemName) if chName == categoryName: relatedSet.add(childItemName) relatedList = sorted(list(relatedSet)) logger.debug("%s items with parent/child relationships %s", categoryName, len(relatedList)) # return relatedList
def __getSliceChildren(self, sliceParentD): """Internal method to build data structure containing the parent-child relationships for the input slice parent construction. """ retD = OrderedDict() for sliceName, sliceParents in sliceParentD.items(): sD = OrderedDict() for pD in sliceParents: parentCategoryName = pD["CATEGORY_NAME"] parentAttributeName = pD["ATTRIBUTE_NAME"] # sD[parentCategoryName] = [{ "PARENT_CATEGORY_NAME": parentCategoryName, "PARENT_ATTRIBUTE_NAME": parentAttributeName, "CHILD_ATTRIBUTE_NAME": parentAttributeName }] # # childItems = self.__dApi.getFullChildList(parentCategoryName, parentAttributeName) childItems = self.__dApi.getFullDescendentList( parentCategoryName, parentAttributeName) # logger.info("Slice parent %s %s %r" % (parentCategoryName, parentAttributeName, childItems)) for childItem in childItems: atName = CifName.attributePart(childItem) catName = CifName.categoryPart(childItem) # Ignore children in the parent category if catName == parentCategoryName: continue if catName not in sD: sD[catName] = [] sD[catName].append({ "PARENT_CATEGORY_NAME": parentCategoryName, "PARENT_ATTRIBUTE_NAME": parentAttributeName, "CHILD_ATTRIBUTE_NAME": atName }) # Sort the list of dictionaries for each category for catName in sD: sD[catName] = sorted( sD[catName], key=lambda k: (k["PARENT_CATEGORY_NAME"], k["PARENT_ATTRIBUTE_NAME"], k["CHILD_ATTRIBUTE_NAME"])) retD[sliceName] = sD return retD
def __renameCategory(self, container, newCategoryName): if not container and not container.isCategory() or not newCategoryName: return container # catNameCur = container.getName() if catNameCur == newCategoryName: return container try: for item in self.__categoryIdRelatives: catName = CifName.categoryPart(item) if container.exists(catName): cObj = container.getObj(catName) atName = CifName.attributePart(item) if cObj.hasAttribute(atName): for iRow in range(cObj.getRowCount()): testVal = cObj.getValue(atName, iRow) if testVal == catNameCur: cObj.setValue(newCategoryName, atName, iRow) except Exception as e: logger.exception("Failing with %s", str(e)) return container
def __getUnitCardinalityCategories(self, parentDList): """Assign categories with unit cardinality relative to the input list of parent key items. parentDList (dict): [{'CATEGORY_NAME':xxx 'ATTRIBUTE_NAME': xxxx}] Return: category name list """ numParents = len(parentDList) logger.debug("Parent slice count %d def %r", numParents, parentDList) ucL = [] # # Find the common set of child categories for the input parent items comCatList = [] for pD in parentDList: catList = [pD["CATEGORY_NAME"]] for childItem in self.__dApi.getFullChildList( pD["CATEGORY_NAME"], pD["ATTRIBUTE_NAME"]): childCategoryName = CifName.categoryPart(childItem) primaryKeyItemList = self.__dApi.getCategoryKeyList( childCategoryName) logger.debug("child category %r primary key items %r", childCategoryName, primaryKeyItemList) # child must be part of the primary key to be a candidate if childItem in primaryKeyItemList: catList.append(childCategoryName) if comCatList: comCatList = list(set(catList) & set(comCatList)) else: comCatList.extend(catList) logger.debug("Common category list %r", comCatList) for cat in comCatList: primaryKeyItemList = self.__dApi.getCategoryKeyList(cat) if len(primaryKeyItemList) == numParents: ucL.append(cat) # logger.debug( "Slice unit cardinality categories from parent-child relationships %r", ucL) return sorted(ucL)
def __getIncludeInstructions(self, containerList, cleanup=False): """Extract include instructions from categories pdbx_include_dictionary, pdbx_include_category, and pdbx_include_item. Args: containerList (list): list of input PdbxContainer data or definition container objects cleanup (optional, bool): flag to remove generator category objects after parsing (default: False) Returns: A dictionary containing the dictionary, category and and item level include details. For example, ```python { "dictionaryIncludeDict": {dictionary_id: {...include details...}}, "categoryIncludeDict": {dictionary_id: {category_id: {...include details... }}}, "itemIncludeDict": {dictionary_id: {category_id: {itemName: {...include details...}}}}, } ``` """ includeD = OrderedDict() try: unNamed = 1 for container in containerList: if container.getType() == "data": dictionaryIncludeDict = OrderedDict() categoryIncludeDict = OrderedDict() itemIncludeDict = OrderedDict() if container.getName(): datablockName = container.getName() else: datablockName = str(unNamed) unNamed += 1 logger.debug("Adding data sections from container name %s type %s", datablockName, container.getType()) tl = container.getObj("pdbx_include_dictionary") if tl is not None: for row in tl.getRowList(): tD = OrderedDict() for atName in ["dictionary_id", "dictionary_locator", "include_mode", "dictionary_namespace_prefix", "dictionary_namespace_prefix_replace"]: tD[atName] = row[tl.getIndex(atName)] if tl.hasAttribute(atName) else None dictionaryIncludeDict[tD["dictionary_id"]] = tD # tl = container.getObj("pdbx_include_category") if tl is not None: for row in tl.getRowList(): tD = OrderedDict() for atName in ["dictionary_id", "category_id", "include_as_category_id", "include_mode"]: tD[atName] = row[tl.getIndex(atName)] if tl.hasAttribute(atName) else None categoryIncludeDict.setdefault(tD["dictionary_id"], {}).setdefault(tD["category_id"], tD) # tl = container.getObj("pdbx_include_item") if tl is not None: for row in tl.getRowList(): tD = OrderedDict() for atName in ["dictionary_id", "item_name", "include_as_item_name", "include_mode"]: tD[atName] = row[tl.getIndex(atName)] if tl.hasAttribute(atName) else None categoryId = CifName.categoryPart(tD["item_name"]) itemIncludeDict.setdefault(tD["dictionary_id"], {}).setdefault(categoryId, {}).setdefault(tD["item_name"], tD) if cleanup: for catName in ["pdbx_include_dictionary", "pdbx_include_category", "pdbx_include_item"]: if container.exists(catName): container.remove(catName) # includeD[datablockName] = { "dictionaryIncludeDict": dictionaryIncludeDict, "categoryIncludeDict": categoryIncludeDict, "itemIncludeDict": itemIncludeDict, } except Exception as e: logger.exception("Include processing failing with %s", str(e)) return includeD
def getItemRelatedList(self, itemName): categoryName = CifName.categoryPart(itemName) attributeName = CifName.attributePart(itemName) return self._dApi.getItemRelatedList(categoryName, attributeName)
def getCategoryPdbxItemEnum(self, itemName): """Returns any DepUI enum list""" categoryName = CifName.categoryPart(itemName) attributeName = CifName.attributePart(itemName) return self._dApi.getEnumListAlt(categoryName, attributeName)
def __fetchIncludedContent(self, includeD): """Fetch included content following the instructions encoded in the input data structure. Args: includeD (dict): (dict): {"dictionaryIncludeDict": {dictionary_id: {...include details...}}, "categoryIncludeDict": {dictionary_id: {category_id: {...include details... }}}, "itemIncludeDict": {dictionary_id: {category_id: {itemName: {...include details...}}}} } Returns: (dict): {datablockName: {"extend": [container,...], "replace": [container, ...]}, ... } """ includeDataD = {} try: for datablockName, inclD in includeD.items(): cL = [] for dictName, iD in inclD["dictionaryIncludeDict"].items(): locator = iD["dictionary_locator"] if locator in self.__locatorIndexD: logger.info("Skipping redundant include for %r at %r", dictName, locator) continue self.__locatorIndexD[locator] = dictName # # --- Fetch the dictionary component - # containerList = self.processIncludedContent(self.__fetchLocator(locator)) # nsPrefix = iD["dictionary_namespace_prefix"] nsPrefixReplace = iD["dictionary_namespace_prefix_replace"] dictInclMode = iD["include_mode"] dataIncludeMode = iD["data_include_mode"] if "data_include_mode" in iD else "extend" catInclD = inclD["categoryIncludeDict"][dictName] if dictName in inclD["categoryIncludeDict"] else None itemInclD = inclD["itemIncludeDict"][dictName] if dictName in inclD["itemIncludeDict"] else None # # Do data sections first. for container in containerList: if container.getType() == "data": logger.debug("Including data container %r with %r", container.getName(), container.getObjNameList()) cL.append((container, dataIncludeMode)) # if catInclD or itemInclD: # Process only explicitly included categories/items in the dictionary component if catInclD: for container in containerList: if container.getType() == "data": continue cName = container.getName() catName = cName if container.isCategory() else CifName.categoryPart(cName) # if catName in catInclD: if container.isAttribute() and itemInclD and catName in itemInclD and cName in itemInclD[catName]: inclMode = itemInclD[catName][cName]["include_mode"] if itemInclD[catName][cName]["include_mode"] else dictInclMode cL.append((self.__renameItem(container, itemInclD[catName][cName]["include_as_item_name"]), inclMode)) else: inclMode = catInclD[catName]["include_mode"] if catInclD[catName]["include_mode"] else dictInclMode cL.append((self.__renameCategory(container, catInclD[catName]["include_as_category_id"]), inclMode)) elif itemInclD: # Process only explicitly included items exclusive of explicitly included categories in the dictionary component for container in containerList: if container.getType() == "data": continue cName = container.getName() catName = cName if container.isCategory() else CifName.categoryPart(cName) # if container.isAttribute() and catName in itemInclD and cName in itemInclD[catName]: inclMode = itemInclD[catName][cName]["include_mode"] if itemInclD[catName][cName]["include_mode"] else dictInclMode cL.append((self.__renameItem(container, itemInclD[catName][cName]["include_as_item_name"]), inclMode)) else: # Process the full content of the dictionary component for container in containerList: if container.getType() == "data": continue cName = container.getName() catName = cName if container.isCategory() else CifName.categoryPart(cName) # if container.isAttribute(): newName = self.__substituteItemPrefix(cName, nsPrefix, nsPrefixReplace) cL.append((self.__renameItem(container, newName), dictInclMode)) else: newName = self.__substituteCategoryPrefix(catName, nsPrefix, nsPrefixReplace) cL.append((self.__renameCategory(container, newName), dictInclMode)) # for container, inclMode in cL: if inclMode == "replace": includeDataD.setdefault(datablockName, {}).setdefault("replace", []).append(container) elif inclMode == "extend": logger.debug("%r extending with %r", datablockName, container.getName()) includeDataD.setdefault(datablockName, {}).setdefault("extend", []).append(container) # for nm in includeDataD: numReplace = len(includeDataD[nm]["replace"]) if "replace" in includeDataD[nm] else 0 numExtend = len(includeDataD[nm]["extend"]) if "extend" in includeDataD[nm] else 0 logger.info("includeDataD %s replace (%d) extend (%d)", nm, numReplace, numExtend) # except Exception as e: logger.exception("Failing with %s", str(e)) return includeDataD
def __generateDotInstructions( self, categoryName, graphTitle=None, graphSubTitle=None, titleFormat="text", maxItems=20, filterDelivery=False, deliveryType="archive", neighborCategoryList=None, maxCategories=None, ): """Internal method producing GraphViz 'dot' instructions depicting data category relationships between the input category and either all of its adjacent neighbors or for selected 'neighborCategoryList'. Optionally apply filtering to categories in current use within the archive. maxItems controls target maximum number of attributes in any category object depiction. maxCategories limits the number of related category objects depicted. """ # # Skip cases where the principal category is not associated with the input delivery type - # if filterDelivery and not self.__isCategoryUsed(categoryName=categoryName, deliveryType=deliveryType): logger.debug("skipping category %r delivery %r", categoryName, deliveryType) return [], 0 # numCategoriesRendered = 0 itemNameList = self.__dApi.getItemNameList(categoryName) aR = self.__getRelativesAdjacent(itemNameList) # for k, v in aR.items(): logger.debug("%s relatives %s", k, v) # adjacentCategories = [] adjacentCategories.append(categoryName) if neighborCategoryList is None: adjacentCategories.extend(self.__getParentCategories(itemNameList)) adjacentCategories.extend(self.__getChildCategories(itemNameList)) else: adjacentCategories.extend(neighborCategoryList) # adjacentCategories = sorted(list(set(adjacentCategories))) if maxCategories is not None: adjacentCategories = adjacentCategories[:maxCategories] logger.debug("adjacent categories %s", adjacentCategories) # oL = [] oL.append("digraph %s {" % categoryName) # Some previous layout parameters --- jdw # oL.append('splines=true; overlap=portho; model=subset;') # oL.append('splines=ortho; overlap=compress; model=subset; ratio=1.0;') oL.append("splines=true; overlap=compress; ") # # Option graph title - if graphTitle is not None: if titleFormat == "text": # dot instructions do not recognize the font settings. # oL.append('graph [labelloc=b, labeljust=left, labelfontname=Helvetica, labelfontsize=18, label="%s"];' % (self.__titleFontFace, self.__titleFontSize, graphTitle)) oL.append('graph [labelloc=b, labeljust=left, labelfontname=%s, labelfontsize=%s, label="%s"];' % (self.__titleFontFace, self.__titleFontSize, graphTitle)) else: # Title is rendered with this font detail. titleText = '<FONT POINT-SIZE="%s" FACE="%s">%s</FONT>' % (self.__titleFontSize, self.__titleFontFace, graphTitle) if graphSubTitle is not None: titleText += '<FONT POINT-SIZE="%s" FACE="%s"> <br/> %s</FONT>' % (self.__subTitleFontSize, self.__titleFontFace, graphSubTitle) oL.append("graph [labelloc=b, label=<%s>];" % (titleText)) oL.append("node [shape=plaintext]") for catName in adjacentCategories: if filterDelivery and not self.__isCategoryUsed(categoryName=catName, deliveryType=deliveryType): continue if catName == categoryName: highLight = "current" else: highLight = "adjacent" relatedList = self.__getRelatedList(categoryName=catName, adjacentD=aR) oL.extend(self.__renderCategory(catName, fkList=relatedList, highLight=highLight, maxItems=maxItems, filterDelivery=filterDelivery, deliveryType=deliveryType)) numCategoriesRendered += 1 # -------- # JDW regenerate full item list - # itemNameList = [] for catName in adjacentCategories: itemNameList.extend(self.__dApi.getItemNameList(catName)) aR = {} aR = self.__getRelativesAdjacent(itemNameList) # # -------- lD = {} # for itemName in itemNameList: if filterDelivery and not self.__isItemUsed(itemName=itemName, deliveryType=deliveryType): continue tD = aR[itemName] for parentItemName in tD["parentItems"]: if filterDelivery and not self.__isItemUsed(itemName=parentItemName, deliveryType=deliveryType): continue catName = CifName.categoryPart(itemName) attName = CifName.attributePart(itemName) catParent = CifName.categoryPart(parentItemName) attParent = CifName.attributePart(parentItemName) # if (itemName, parentItemName) in lD: continue if catParent not in adjacentCategories: continue if catParent != catName: oL.append(" _%s:__%s:w -> _%s:__%s:w;" % (catName, attName, catParent, attParent)) else: oL.append(" _%s:__%s:e -> _%s:__%s:e;" % (catName, attName, catParent, attParent)) lD[(itemName, parentItemName)] = 1 lD[(parentItemName, itemName)] = 1 for childItemName in tD["childItems"]: if filterDelivery and not self.__isItemUsed(itemName=childItemName, deliveryType=deliveryType): continue catName = CifName.categoryPart(itemName) attName = CifName.attributePart(itemName) catChild = CifName.categoryPart(childItemName) attChild = CifName.attributePart(childItemName) # if (itemName, childItemName) in lD: continue if catChild not in adjacentCategories: continue if catChild != catName: oL.append(" _%s:__%s:w -> _%s:__%s:w;" % (catChild, attChild, catName, attName)) else: oL.append(" _%s:__%s:e -> _%s:__%s:e;" % (catChild, attChild, catName, attName)) lD[(itemName, childItemName)] = 1 lD[(childItemName, itemName)] = 1 oL.append("}") return oL, numCategoriesRendered
def makeSchemaDef(self): sD = {} for tableName in self.__tableNameList: if tableName in [ "rcsb_columninfo", "columninfo", "tableinfo", "rcsb_tableinfo" ]: continue dD = {} tableAbbrev = self.__getTableAbbrev(tableName) tU = tableAbbrev.upper() dD["SCHEMA_ID"] = tU dD["SCHEMA_NAME"] = tableAbbrev dD["SCHEMA_TYPE"] = "transactional" dD["ATTRIBUTES"] = {} dD["ATTRIBUTE_INFO"] = {} dD["ATTRIBUTE_MAP"] = {} # # create a sub list for this table - infoL = [] for atD in self.__atDefList: if atD["table_name"] == tableName: infoL.append(atD) # mapD = {} for atD in self.__atMapList: if atD["target_table_name"] == tableName: attributeName = atD["target_attribute_name"] attributeAbbrev = self.__getAttributeAbbrev( tableName, attributeName) atU = attributeAbbrev.upper() itN = atD["source_item_name"] if atD[ "source_item_name"] not in ["?", "."] else None if itN is not None: catNameM = CifName.categoryPart(itN) attNameM = CifName.attributePart(itN) else: catNameM = None attNameM = None # cId = atD['condition_id'] if atD['condition_id'] not in ['?', '.'] else None fId = atD["function_id"] if atD["function_id"] not in [ "?", "." ] else None if fId is not None and catNameM is None: mapD[atU] = (catNameM, attNameM, fId, None) else: mapD[atU] = (catNameM, attNameM, fId, None) # try: indexList = [] for (ii, atD) in enumerate(infoL): attributeName = atD["attribute_name"] attributeAbbrev = self.__getAttributeAbbrev( tableName, attributeName) atU = attributeAbbrev.upper() # td = {} # 'data_type','index_flag','null_flag','width','precision','populated' td["APP_TYPE"] = self.__convertDataType(atD["data_type"], aWidth=int( atD["width"])) td["WIDTH"] = int(atD["width"]) td["PRECISION"] = int(atD["precision"]) td["NULLABLE"] = not self.__toBool(atD["null_flag"]) td["PRIMARY_KEY"] = self.__toBool(atD["index_flag"]) td["ORDER"] = ii + 1 if td["PRIMARY_KEY"]: indexList.append(atU) dD["ATTRIBUTES"][atU] = attributeAbbrev dD["ATTRIBUTE_INFO"][atU] = td dD["ATTRIBUTE_MAP"][atU] = mapD[atU] except Exception as e: logger.error("Failing for table %r attribute %r", tableName, attributeName) logger.exception("Failing with %s", str(e)) # if self.__verbose and len(indexList) > 16: logger.debug( "+WARNING - %s index list exceeds MySQL max length %d", tableName, len(indexList)) mergeDict = {} deleteAttributeList = [] for atU in indexList: tN = dD["ATTRIBUTE_MAP"][atU][0] aN = dD["ATTRIBUTE_MAP"][atU][1] fN = dD["ATTRIBUTE_MAP"][atU][2] if aN is not None: if tN not in mergeDict: mergeDict[tN] = [] mergeDict[tN].append(aN) # # Using RCSB convention of including one attribute in each table corresponding to the datablockId() # this attributeId is used a key pre-insert deletions. # if fN in ["datablockid()"]: deleteAttributeList.append(atU) # # Assign a merge index to this instance category # for k, v in mergeDict.items(): dD["MAP_MERGE_INDICES"] = { k: { "TYPE": "EQUI-JOIN", "ATTRIBUTES": tuple(v) } } if deleteAttributeList: dD["SCHEMA_DELETE_ATTRIBUTE"] = deleteAttributeList[0] dD["INDICES"] = { "p1": { "TYPE": "UNIQUE", "ATTRIBUTES": tuple(indexList) }, "s1": { "TYPE": "SEARCH", "ATTRIBUTES": tuple(deleteAttributeList) } } else: dD["INDICES"] = { "p1": { "TYPE": "UNIQUE", "ATTRIBUTES": tuple(indexList) } } logger.debug("+WARNING - No delete attribute for table %s", tableName) if not mergeDict: logger.debug("+WARNING - No merge index possible for table %s", tableName) sD[tU] = dD return sD
def testClassifyByGroup(self): """Test case - organize dictionary items by classes: SAMPLE, MX, NMR, EM, STRUCTURE, and DB""" try: myIo = IoAdapter(raiseExceptions=True) self.__containerList = myIo.readFile( inputFilePath=self.__pathPdbxDictionary) dApi = DictionaryApi(containerList=self.__containerList, consolidate=True, verbose=self.__verbose) # itemList = [] groupList = dApi.getCategoryGroups() categoryList = dApi.getCategoryList() for category in categoryList: itemList.extend(dApi.getItemNameList(category)) itemList = sorted(set(itemList)) logger.info("Total category length %d", len(categoryList)) logger.info("Total definition length %d", len(itemList)) logger.info("group length %s", len(groupList)) logger.debug("groupList %r", groupList) # findUnlinked = False if findUnlinked: tSet = set(["pdbx_group", "inclusive_group"]) for category in categoryList: gList = dApi.getCategoryGroupList(category) gSet = set(gList) if gSet == tSet: logger.info("unqualified %s", category) # logger.info("%s -> %r", category, gList) if not gList: logger.info("--- No category group assignment for %s", category) # classD = {} # Add category group members - for groupName, className in self.__groupClassTupL: categoryL = dApi.getCategoryGroupCategories( groupName, followChildren=True) for category in categoryL: classD.setdefault(className, []).extend( dApi.getItemNameList(category)) # # Add unlinked categories # for category, className in self.__unlinkedCategoryClassTup: classD.setdefault(className, []).extend(dApi.getItemNameList(category)) # sumItem = 0 classItemD = {} for className, itemL in classD.items(): numItem = len(set(itemL)) sumItem += numItem logger.info("class %s items %d", className, len(set(itemL))) for item in itemL: classItemD[item] = True # logger.info("Sum classified items is %d", sumItem) logger.info("classified items %d", len(classItemD)) # logger.debug("classItemD.items() %r", list(classItemD.items())[:10]) missingGroupL = [] jj = 0 for item in itemList: if item not in classItemD: jj += 1 category = CifName.categoryPart(item) logger.info("%d item %r category %r", jj, item, category) missingGroupL.extend(dApi.getCategoryGroupList(category)) # logger.info("missing groups %r", sorted(set(missingGroupL))) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def __fetchIncludedContent(self, includeD, cleanup=False): """Fetch included content following the instructions encoded in the input data structure. Args: includeD (dict): {"dictionaryIncludeDict": {dictionary_id: {...include details...}}, "categoryIncludeDict": {dictionary_id: {category_id: {...include details... }}}, "itemIncludeDict": {dictionary_id: {category_id: {itemName: {...include details...}}}}, } cleanup (optional, bool): flag to remove generator category objects after parsing (default: false) Returns: (dict): {datablockName: {"extend": [container,...], "replace": [container, ...]}, ... } """ includeDataD = {} try: for datablockName, inclD in includeD.items(): cL = [] for dictName, iD in inclD["dictionaryIncludeDict"].items(): locator = iD["dictionary_locator"] if locator in self.__locatorIndexD: logger.info("Skipping redundant include for %r at %r", dictName, locator) continue self.__locatorIndexD[locator] = dictName # # --- Fetch the dictionary component - # updateStack = self.__isLocal(locator) if updateStack: if not self.__dirStack: # top-level include case self.__dirStack.append(os.path.abspath(self.__dirPath)) # embedded include case (push directory containing the locator) if not os.path.isabs(locator): # handle the relative path case - locator = os.path.abspath(os.path.join(self.__dirStack[-1], locator)) logger.debug("modified local relative locator is %r", locator) self.__dirStack.append(os.path.dirname(locator)) logger.debug("dirStack (%d) top %r", len(self.__dirStack), self.__dirStack[-1]) containerList = self.processIncludedContent(self.__fetchLocator(locator), cleanup=cleanup) if updateStack: # restore stack context self.__dirStack.pop() # nsPrefix = iD["dictionary_namespace_prefix"] nsPrefixReplace = iD["dictionary_namespace_prefix_replace"] dictInclMode = iD["include_mode"] dataIncludeMode = iD["data_include_mode"] if "data_include_mode" in iD else "extend" catInclD = inclD["categoryIncludeDict"][dictName] if dictName in inclD["categoryIncludeDict"] else None itemInclD = inclD["itemIncludeDict"][dictName] if dictName in inclD["itemIncludeDict"] else None # # Do data sections first. for container in containerList: if container.getType() == "data": logger.debug("Including data container %r with %r", container.getName(), container.getObjNameList()) cL.append((container, dataIncludeMode)) # if catInclD or itemInclD: # Process only explicitly included categories/items in the dictionary component if catInclD: for container in containerList: if container.getType() == "data": continue cName = container.getName() catName = cName if container.isCategory() else CifName.categoryPart(cName) # if catName in catInclD: if container.isAttribute() and itemInclD and catName in itemInclD and cName in itemInclD[catName]: inclMode = itemInclD[catName][cName]["include_mode"] if itemInclD[catName][cName]["include_mode"] else dictInclMode cL.append((self.__renameItem(container, itemInclD[catName][cName]["include_as_item_name"]), inclMode)) else: inclMode = catInclD[catName]["include_mode"] if catInclD[catName]["include_mode"] else dictInclMode cL.append((self.__renameCategory(container, catInclD[catName]["include_as_category_id"]), inclMode)) elif itemInclD: # Process only explicitly included items exclusive of explicitly included categories in the dictionary component for container in containerList: if container.getType() == "data": continue cName = container.getName() catName = cName if container.isCategory() else CifName.categoryPart(cName) # if container.isAttribute() and catName in itemInclD and cName in itemInclD[catName]: inclMode = itemInclD[catName][cName]["include_mode"] if itemInclD[catName][cName]["include_mode"] else dictInclMode cL.append((self.__renameItem(container, itemInclD[catName][cName]["include_as_item_name"]), inclMode)) else: # Process the full content of the dictionary component for container in containerList: if container.getType() == "data": continue cName = container.getName() catName = cName if container.isCategory() else CifName.categoryPart(cName) # if container.isAttribute(): newName = self.__substituteItemPrefix(cName, nsPrefix, nsPrefixReplace) cL.append((self.__renameItem(container, newName), dictInclMode)) else: newName = self.__substituteCategoryPrefix(catName, nsPrefix, nsPrefixReplace) cL.append((self.__renameCategory(container, newName), dictInclMode)) # for container, inclMode in cL: if inclMode == "replace": includeDataD.setdefault(datablockName, {}).setdefault("replace", []).append(container) elif inclMode == "extend": logger.debug("%r extending with %r", datablockName, container.getName()) includeDataD.setdefault(datablockName, {}).setdefault("extend", []).append(container) # for nm in includeDataD: numReplace = len(includeDataD[nm]["replace"]) if "replace" in includeDataD[nm] else 0 numExtend = len(includeDataD[nm]["extend"]) if "extend" in includeDataD[nm] else 0 logger.debug("includeDataD %s replace (%d) extend (%d)", nm, numReplace, numExtend) # except Exception as e: logger.exception("Failing with %s", str(e)) return includeDataD