Example #1
0
 def __getParentCategories(self, itemNameList):
     parentCategories = set()
     for itemName in itemNameList:
         categoryName = CifName.categoryPart(itemName)
         attributeName = CifName.attributePart(itemName)
         parentItemList = self.__dApi.getFullParentList(categoryName, attributeName)
         for parentItem in parentItemList:
             parentCategoryName = CifName.categoryPart(parentItem)
             parentCategories.add(parentCategoryName)
     return list(parentCategories)
Example #2
0
 def __getChildCategories(self, itemNameList):
     childCategories = set()
     for itemName in itemNameList:
         categoryName = CifName.categoryPart(itemName)
         attributeName = CifName.attributePart(itemName)
         childItemList = self.__dApi.getFullChildList(categoryName, attributeName)
         for childItem in childItemList:
             childCategoryName = CifName.categoryPart(childItem)
             childCategories.add(childCategoryName)
     return list(childCategories)
Example #3
0
    def __assignItemIconType(self, itemNameList):
        iconTypeList = []
        #
        categoryName = CifName.categoryPart(itemNameList[0])
        keyItemNameList = self.__dApi.getCategoryKeyList(categoryName)
        for itemName in itemNameList:
            tType = "none"
            attributeName = CifName.attributePart(itemName)
            aMan = self.__dApi.getMandatoryCode(categoryName, attributeName) in ["yes", "y"]
            dMan = self.__dApi.getMandatoryCodeAlt(categoryName, attributeName, fallBack=False) in ["yes", "y"]
            inArchive = self.__getItemCount(itemName, deliveryType="archive") > 0
            inChemDict = self.__getItemCount(itemName, deliveryType="cc") > 0
            inBirdDict = self.__getItemCount(itemName, deliveryType="prd") > 0 or self.__getItemCount(itemName, deliveryType="family") > 0

            isKey = itemName in keyItemNameList
            if isKey:
                tType = "key"
            elif aMan and dMan:
                tType = "all-mandatory"
            elif aMan:
                tType = "mandatory"
            elif dMan:
                tType = "deposit-mandatory"
            if inArchive:
                tType += "+database"
            if inChemDict:
                tType += "+chem-dict"
            if inBirdDict:
                tType += "+bird-dict"
            iconTypeList.append(tType)

        return iconTypeList
Example #4
0
    def test_consolidate_dictionary(self, api_paths):
        myIo = IoAdapter(raiseExceptions=True)
        containerList = myIo.readFile(
            inputFilePath=str(api_paths['pathPdbxDictionary']))
        dApi = DictionaryApi(containerList=containerList, consolidate=True)

        for itemName in [
                '_entity.id', '_entity_poly_seq.num',
                '_atom_site.label_asym_id', '_struct_asym.id', '_chem_comp.id',
                'chem_comp_atom.comp_id', 'chem_comp_bond.comp_id'
        ]:
            categoryName = CifName.categoryPart(itemName)
            attributeName = CifName.attributePart(itemName)
            print("Full parent list for  %s : %s\n" %
                  (itemName, dApi.getFullParentList(categoryName,
                                                    attributeName)))
            print(
                "Full child  list for  %s : %s\n" %
                (itemName, dApi.getFullChildList(categoryName, attributeName)))
            print("Ultimate parent for  %s : %s\n" %
                  (itemName, dApi.getUltimateParent(categoryName,
                                                    attributeName)))
            print("Type code for  %s : %s\n" %
                  (itemName, dApi.getTypeCode(categoryName, attributeName)))
            assert dApi.getTypeCode(categoryName, attributeName) is not None
Example #5
0
 def __itemNameToDictList(self, itemNameList):
     rL = []
     for itemName in list(OrderedDict.fromkeys(itemNameList)):
         atName = CifName.attributePart(itemName)
         catName = CifName.categoryPart(itemName)
         rL.append({"CATEGORY": catName, "ATTRIBUTE": atName})
     return rL
Example #6
0
    def testGetUcode(self):
        """Test case - Get all data items of type ucode
        """
        print("\n")

        try:
            myIo = IoAdapter(raiseExceptions=True)
            self.__containerList = myIo.readFile(
                inputFilePath=self.__pathPdbxDictionary)
            dApi = DictionaryApi(containerList=self.__containerList,
                                 consolidate=True,
                                 verbose=self.__verbose)

            logger.debug(
                "+++++++++++++++++++++++++++++++++++++++++++++++++++++++\n")
            catNameList = dApi.getCategoryList()
            for catName in catNameList:
                itemNameList = dApi.getItemNameList(catName)
                for itemName in itemNameList:
                    categoryName = CifName.categoryPart(itemName)
                    attributeName = CifName.attributePart(itemName)
                    code = dApi.getTypeCode(categoryName, attributeName)
                    if (code == "ucode"):
                        print("Table: ", categoryName, "\tColumn: ",
                              attributeName, "\tType: ", code)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Example #7
0
    def testConsolidateDictionary(self):
        """Test case -  dump methods for dictionary metadata"""

        try:
            myIo = IoAdapter(raiseExceptions=True)
            self.__containerList = myIo.readFile(inputFilePath=self.__pathPdbxDictionary)
            dApi = DictionaryApi(containerList=self.__containerList, consolidate=True, expandItemLinked=False, verbose=self.__verbose)
            for itemName in [
                "_entity.id",
                "_entity_poly_seq.num",
                "_atom_site.label_asym_id",
                "_struct_asym.id",
                "_chem_comp.id",
                "chem_comp_atom.comp_id",
                "chem_comp_bond.comp_id",
            ]:
                categoryName = CifName.categoryPart(itemName)
                attributeName = CifName.attributePart(itemName)
                logger.debug("Full parent list for  %s : %s\n", itemName, dApi.getFullParentList(categoryName, attributeName))
                logger.debug("Full child  list for  %s : %s\n", itemName, dApi.getFullChildList(categoryName, attributeName))
                logger.debug("Ultimate parent for  %s : %s\n", itemName, dApi.getUltimateParent(categoryName, attributeName))
                logger.debug("Type code for  %s : %s\n", itemName, dApi.getTypeCode(categoryName, attributeName))
                self.assertIsNotNone(dApi.getTypeCode(categoryName, attributeName))
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Example #8
0
 def __substituteItemPrefix(self, itemName, curPrefix, newPrefix):
     atName = CifName.attributePart(itemName)
     atName = atName.replace(
         curPrefix, newPrefix,
         1) if atName and atName.startswith(curPrefix) else atName
     catName = CifName.categoryPart(itemName)
     catName = catName.replace(
         curPrefix, newPrefix,
         1) if atName and catName.startswith(curPrefix) else catName
     return CifName.itemName(catName, atName)
Example #9
0
    def __getIncludeInstructions(self, containerList):
        """Extract include instructions from categories pdbx_include_dictionary,  pdbx_include_category, and pdbx_include_item.

        Returns:
            (dict): {"dictionaryIncludeDict": {dictionary_id: {...include details...}},
                     "categoryIncludeDict": {dictionary_id: {category_id: {...include details... }}},
                     "itemIncludeDict": {dictionary_id: {category_id: {itemName: {...include details...}}}}
                    }
        """
        includeD = OrderedDict()
        try:
            unNamed = 1
            for container in containerList:
                if container.getType() == "data":
                    dictionaryIncludeDict = OrderedDict()
                    categoryIncludeDict = OrderedDict()
                    itemIncludeDict = OrderedDict()
                    if container.getName():
                        datablockName = container.getName()
                    else:
                        datablockName = str(unNamed)
                        unNamed += 1
                    logger.debug("Adding data sections from container name %s  type  %s", datablockName, container.getType())
                    tl = container.getObj("pdbx_include_dictionary")
                    if tl is not None:
                        for row in tl.getRowList():
                            tD = OrderedDict()
                            for atName in ["dictionary_id", "dictionary_locator", "include_mode", "dictionary_namespace_prefix", "dictionary_namespace_prefix_replace"]:
                                tD[atName] = row[tl.getIndex(atName)] if tl.hasAttribute(atName) else None
                            dictionaryIncludeDict[tD["dictionary_id"]] = tD
                        #
                        tl = container.getObj("pdbx_include_category")
                        if tl is not None:
                            for row in tl.getRowList():
                                tD = OrderedDict()
                                for atName in ["dictionary_id", "category_id", "include_as_category_id", "include_mode"]:
                                    tD[atName] = row[tl.getIndex(atName)] if tl.hasAttribute(atName) else None
                                categoryIncludeDict.setdefault(tD["dictionary_id"], {}).setdefault(tD["category_id"], tD)
                        #
                        tl = container.getObj("pdbx_include_item")
                        if tl is not None:
                            for row in tl.getRowList():
                                tD = OrderedDict()
                                for atName in ["dictionary_id", "item_name", "include_as_item_name", "include_mode"]:
                                    tD[atName] = row[tl.getIndex(atName)] if tl.hasAttribute(atName) else None
                                categoryId = CifName.categoryPart(tD["item_name"])
                                itemIncludeDict.setdefault(tD["dictionary_id"], {}).setdefault(categoryId, {}).setdefault(tD["item_name"], tD)
                    includeD[datablockName] = {
                        "dictionaryIncludeDict": dictionaryIncludeDict,
                        "categoryIncludeDict": categoryIncludeDict,
                        "itemIncludeDict": itemIncludeDict,
                    }
        except Exception as e:
            logger.exception("Include processing failing with %s", str(e))
        return includeD
Example #10
0
    def setItemCounts(self, itemNameD, deliveryType="archive"):
        for itemName, itemCount in itemNameD.items():
            self.__itemCounts[deliveryType][itemName] = itemCount
            categoryName = CifName.categoryPart(itemName)
            if categoryName not in self.__categoryCounts[deliveryType]:
                self.__categoryCounts[deliveryType][categoryName] = itemCount
            else:
                self.__categoryCounts[deliveryType][categoryName] = max(itemCount, self.__categoryCounts[deliveryType][categoryName])

        logger.debug("+NeighborFigures.setItemCounts() items      in archive count %d", len(self.__itemCounts[deliveryType]))
        logger.debug("+NeighborFigures.setItemCounts() categories in archive count %d", len(self.__categoryCounts[deliveryType]))
Example #11
0
 def __getRelativesAdjacent(self, itemNameList):
     aR = {}
     for itemName in itemNameList:
         categoryName = CifName.categoryPart(itemName)
         attributeName = CifName.attributePart(itemName)
         tD = {}
         tD["parentItems"] = self.__dApi.getFullParentList(categoryName, attributeName)
         tD["childItems"] = self.__dApi.getFullChildList(categoryName, attributeName)
         aR[itemName] = tD
     # if (self.__verbose):
     #    for k,v in aR.items():
     #        logger.debug("Item %s\n       parents: %s\n       children  %s\n\n" % (k,v['parentItems'],v['childItems']))
     return aR
Example #12
0
    def __getRelatedList(self, categoryName, adjacentD):
        #
        relatedSet = set()
        for itemName, adjD in adjacentD.items():
            cName = CifName.categoryPart(itemName)
            if len(adjD["parentItems"]) > 0:
                if cName == categoryName:
                    relatedSet.add(itemName)
                for parentItemName in adjD["parentItems"]:
                    pName = CifName.categoryPart(parentItemName)
                    if pName == categoryName:
                        relatedSet.add(parentItemName)
            if len(adjD["childItems"]) > 0:
                if cName == categoryName:
                    relatedSet.add(itemName)
                for childItemName in adjD["childItems"]:
                    chName = CifName.categoryPart(childItemName)
                    if chName == categoryName:
                        relatedSet.add(childItemName)
        relatedList = sorted(list(relatedSet))

        logger.debug("%s items with parent/child relationships %s", categoryName, len(relatedList))
        #
        return relatedList
Example #13
0
    def __getSliceChildren(self, sliceParentD):
        """Internal method to build data structure containing the parent-child relationships for the
        input slice parent construction.

        """
        retD = OrderedDict()
        for sliceName, sliceParents in sliceParentD.items():
            sD = OrderedDict()
            for pD in sliceParents:
                parentCategoryName = pD["CATEGORY_NAME"]
                parentAttributeName = pD["ATTRIBUTE_NAME"]
                #
                sD[parentCategoryName] = [{
                    "PARENT_CATEGORY_NAME":
                    parentCategoryName,
                    "PARENT_ATTRIBUTE_NAME":
                    parentAttributeName,
                    "CHILD_ATTRIBUTE_NAME":
                    parentAttributeName
                }]
                #
                # childItems = self.__dApi.getFullChildList(parentCategoryName, parentAttributeName)
                childItems = self.__dApi.getFullDescendentList(
                    parentCategoryName, parentAttributeName)
                # logger.info("Slice parent %s %s  %r" % (parentCategoryName, parentAttributeName, childItems))
                for childItem in childItems:
                    atName = CifName.attributePart(childItem)
                    catName = CifName.categoryPart(childItem)
                    # Ignore children in the parent category
                    if catName == parentCategoryName:
                        continue
                    if catName not in sD:
                        sD[catName] = []
                    sD[catName].append({
                        "PARENT_CATEGORY_NAME": parentCategoryName,
                        "PARENT_ATTRIBUTE_NAME": parentAttributeName,
                        "CHILD_ATTRIBUTE_NAME": atName
                    })
                # Sort the list of dictionaries for each category
                for catName in sD:
                    sD[catName] = sorted(
                        sD[catName],
                        key=lambda k:
                        (k["PARENT_CATEGORY_NAME"], k["PARENT_ATTRIBUTE_NAME"],
                         k["CHILD_ATTRIBUTE_NAME"]))

            retD[sliceName] = sD
        return retD
Example #14
0
 def __renameCategory(self, container, newCategoryName):
     if not container and not container.isCategory() or not newCategoryName:
         return container
     #
     catNameCur = container.getName()
     if catNameCur == newCategoryName:
         return container
     try:
         for item in self.__categoryIdRelatives:
             catName = CifName.categoryPart(item)
             if container.exists(catName):
                 cObj = container.getObj(catName)
                 atName = CifName.attributePart(item)
                 if cObj.hasAttribute(atName):
                     for iRow in range(cObj.getRowCount()):
                         testVal = cObj.getValue(atName, iRow)
                         if testVal == catNameCur:
                             cObj.setValue(newCategoryName, atName, iRow)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
     return container
Example #15
0
    def __getUnitCardinalityCategories(self, parentDList):
        """Assign categories with unit cardinality relative to the input list of parent key items.

        parentDList (dict):  [{'CATEGORY_NAME':xxx 'ATTRIBUTE_NAME': xxxx}]

        Return: category name list
        """
        numParents = len(parentDList)
        logger.debug("Parent slice count %d def %r", numParents, parentDList)
        ucL = []
        #
        #  Find the common set of child categories for the input parent items
        comCatList = []
        for pD in parentDList:
            catList = [pD["CATEGORY_NAME"]]
            for childItem in self.__dApi.getFullChildList(
                    pD["CATEGORY_NAME"], pD["ATTRIBUTE_NAME"]):
                childCategoryName = CifName.categoryPart(childItem)
                primaryKeyItemList = self.__dApi.getCategoryKeyList(
                    childCategoryName)
                logger.debug("child category %r primary key items  %r",
                             childCategoryName, primaryKeyItemList)
                # child must be part of the primary key to be a candidate
                if childItem in primaryKeyItemList:
                    catList.append(childCategoryName)
            if comCatList:
                comCatList = list(set(catList) & set(comCatList))
            else:
                comCatList.extend(catList)
        logger.debug("Common category list %r", comCatList)
        for cat in comCatList:
            primaryKeyItemList = self.__dApi.getCategoryKeyList(cat)
            if len(primaryKeyItemList) == numParents:
                ucL.append(cat)
        #
        logger.debug(
            "Slice unit cardinality categories from parent-child relationships %r",
            ucL)
        return sorted(ucL)
Example #16
0
    def __getIncludeInstructions(self, containerList, cleanup=False):
        """Extract include instructions from categories pdbx_include_dictionary,  pdbx_include_category, and pdbx_include_item.

        Args:
          containerList (list): list of input PdbxContainer data or definition container objects
          cleanup (optional, bool): flag to remove generator category objects after parsing (default: False)

        Returns:
          A dictionary containing the dictionary, category and and item level include details.
          For example,

            ```python
            {
            "dictionaryIncludeDict": {dictionary_id: {...include details...}},
            "categoryIncludeDict": {dictionary_id: {category_id: {...include details... }}},
            "itemIncludeDict": {dictionary_id: {category_id: {itemName: {...include details...}}}},
            }
            ```


        """
        includeD = OrderedDict()
        try:
            unNamed = 1
            for container in containerList:
                if container.getType() == "data":
                    dictionaryIncludeDict = OrderedDict()
                    categoryIncludeDict = OrderedDict()
                    itemIncludeDict = OrderedDict()
                    if container.getName():
                        datablockName = container.getName()
                    else:
                        datablockName = str(unNamed)
                        unNamed += 1
                    logger.debug("Adding data sections from container name %s  type  %s", datablockName, container.getType())
                    tl = container.getObj("pdbx_include_dictionary")
                    if tl is not None:
                        for row in tl.getRowList():
                            tD = OrderedDict()
                            for atName in ["dictionary_id", "dictionary_locator", "include_mode", "dictionary_namespace_prefix", "dictionary_namespace_prefix_replace"]:
                                tD[atName] = row[tl.getIndex(atName)] if tl.hasAttribute(atName) else None
                            dictionaryIncludeDict[tD["dictionary_id"]] = tD
                        #
                        tl = container.getObj("pdbx_include_category")
                        if tl is not None:
                            for row in tl.getRowList():
                                tD = OrderedDict()
                                for atName in ["dictionary_id", "category_id", "include_as_category_id", "include_mode"]:
                                    tD[atName] = row[tl.getIndex(atName)] if tl.hasAttribute(atName) else None
                                categoryIncludeDict.setdefault(tD["dictionary_id"], {}).setdefault(tD["category_id"], tD)
                        #
                        tl = container.getObj("pdbx_include_item")
                        if tl is not None:
                            for row in tl.getRowList():
                                tD = OrderedDict()
                                for atName in ["dictionary_id", "item_name", "include_as_item_name", "include_mode"]:
                                    tD[atName] = row[tl.getIndex(atName)] if tl.hasAttribute(atName) else None
                                categoryId = CifName.categoryPart(tD["item_name"])
                                itemIncludeDict.setdefault(tD["dictionary_id"], {}).setdefault(categoryId, {}).setdefault(tD["item_name"], tD)
                    if cleanup:
                        for catName in ["pdbx_include_dictionary", "pdbx_include_category", "pdbx_include_item"]:
                            if container.exists(catName):
                                container.remove(catName)
                    #
                    includeD[datablockName] = {
                        "dictionaryIncludeDict": dictionaryIncludeDict,
                        "categoryIncludeDict": categoryIncludeDict,
                        "itemIncludeDict": itemIncludeDict,
                    }
        except Exception as e:
            logger.exception("Include processing failing with %s", str(e))
        return includeD
Example #17
0
    def getItemRelatedList(self, itemName):
        categoryName = CifName.categoryPart(itemName)
        attributeName = CifName.attributePart(itemName)

        return self._dApi.getItemRelatedList(categoryName, attributeName)
Example #18
0
    def getCategoryPdbxItemEnum(self, itemName):
        """Returns any DepUI enum list"""

        categoryName = CifName.categoryPart(itemName)
        attributeName = CifName.attributePart(itemName)
        return self._dApi.getEnumListAlt(categoryName, attributeName)
Example #19
0
    def __fetchIncludedContent(self, includeD):
        """Fetch included content following the instructions encoded in the input data structure.

        Args:
            includeD (dict): (dict): {"dictionaryIncludeDict": {dictionary_id: {...include details...}},
                                      "categoryIncludeDict": {dictionary_id: {category_id: {...include details... }}},
                                      "itemIncludeDict": {dictionary_id: {category_id: {itemName: {...include details...}}}}
                                       }

        Returns:
            (dict): {datablockName: {"extend": [container,...], "replace": [container, ...]}, ... }
        """

        includeDataD = {}
        try:
            for datablockName, inclD in includeD.items():
                cL = []
                for dictName, iD in inclD["dictionaryIncludeDict"].items():
                    locator = iD["dictionary_locator"]
                    if locator in self.__locatorIndexD:
                        logger.info("Skipping redundant include for %r at %r", dictName, locator)
                        continue
                    self.__locatorIndexD[locator] = dictName
                    #
                    # --- Fetch the dictionary component -
                    #
                    containerList = self.processIncludedContent(self.__fetchLocator(locator))
                    #
                    nsPrefix = iD["dictionary_namespace_prefix"]
                    nsPrefixReplace = iD["dictionary_namespace_prefix_replace"]
                    dictInclMode = iD["include_mode"]
                    dataIncludeMode = iD["data_include_mode"] if "data_include_mode" in iD else "extend"
                    catInclD = inclD["categoryIncludeDict"][dictName] if dictName in inclD["categoryIncludeDict"] else None
                    itemInclD = inclD["itemIncludeDict"][dictName] if dictName in inclD["itemIncludeDict"] else None
                    #
                    #  Do data sections first.
                    for container in containerList:
                        if container.getType() == "data":
                            logger.debug("Including data container %r with %r", container.getName(), container.getObjNameList())
                            cL.append((container, dataIncludeMode))
                    #
                    if catInclD or itemInclD:
                        # Process only explicitly included categories/items in the dictionary component
                        if catInclD:
                            for container in containerList:
                                if container.getType() == "data":
                                    continue
                                cName = container.getName()
                                catName = cName if container.isCategory() else CifName.categoryPart(cName)
                                #
                                if catName in catInclD:
                                    if container.isAttribute() and itemInclD and catName in itemInclD and cName in itemInclD[catName]:
                                        inclMode = itemInclD[catName][cName]["include_mode"] if itemInclD[catName][cName]["include_mode"] else dictInclMode
                                        cL.append((self.__renameItem(container, itemInclD[catName][cName]["include_as_item_name"]), inclMode))
                                    else:
                                        inclMode = catInclD[catName]["include_mode"] if catInclD[catName]["include_mode"] else dictInclMode
                                        cL.append((self.__renameCategory(container, catInclD[catName]["include_as_category_id"]), inclMode))
                        elif itemInclD:
                            # Process only explicitly included items exclusive of explicitly included categories in the dictionary component
                            for container in containerList:
                                if container.getType() == "data":
                                    continue
                                cName = container.getName()
                                catName = cName if container.isCategory() else CifName.categoryPart(cName)
                                #
                                if container.isAttribute() and catName in itemInclD and cName in itemInclD[catName]:
                                    inclMode = itemInclD[catName][cName]["include_mode"] if itemInclD[catName][cName]["include_mode"] else dictInclMode
                                    cL.append((self.__renameItem(container, itemInclD[catName][cName]["include_as_item_name"]), inclMode))
                    else:
                        # Process the full content of the dictionary component
                        for container in containerList:
                            if container.getType() == "data":
                                continue
                            cName = container.getName()
                            catName = cName if container.isCategory() else CifName.categoryPart(cName)
                            #
                            if container.isAttribute():
                                newName = self.__substituteItemPrefix(cName, nsPrefix, nsPrefixReplace)
                                cL.append((self.__renameItem(container, newName), dictInclMode))
                            else:
                                newName = self.__substituteCategoryPrefix(catName, nsPrefix, nsPrefixReplace)
                                cL.append((self.__renameCategory(container, newName), dictInclMode))
                #
                for container, inclMode in cL:
                    if inclMode == "replace":
                        includeDataD.setdefault(datablockName, {}).setdefault("replace", []).append(container)
                    elif inclMode == "extend":
                        logger.debug("%r extending with %r", datablockName, container.getName())
                        includeDataD.setdefault(datablockName, {}).setdefault("extend", []).append(container)
                #
            for nm in includeDataD:
                numReplace = len(includeDataD[nm]["replace"]) if "replace" in includeDataD[nm] else 0
                numExtend = len(includeDataD[nm]["extend"]) if "extend" in includeDataD[nm] else 0
                logger.info("includeDataD %s replace (%d) extend (%d)", nm, numReplace, numExtend)
            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))

        return includeDataD
Example #20
0
    def __generateDotInstructions(
        self,
        categoryName,
        graphTitle=None,
        graphSubTitle=None,
        titleFormat="text",
        maxItems=20,
        filterDelivery=False,
        deliveryType="archive",
        neighborCategoryList=None,
        maxCategories=None,
    ):
        """Internal method producing GraphViz 'dot' instructions depicting data category relationships between the input category and either all
        of its adjacent neighbors or for selected 'neighborCategoryList'.    Optionally apply filtering to categories in current use within the archive.

        maxItems       controls target maximum number of attributes in any category object depiction.
        maxCategories  limits the number of related category objects depicted.
        """
        #
        # Skip cases where the principal category is not associated with the input delivery type -
        #
        if filterDelivery and not self.__isCategoryUsed(categoryName=categoryName, deliveryType=deliveryType):
            logger.debug("skipping category %r delivery %r", categoryName, deliveryType)
            return [], 0
        #
        numCategoriesRendered = 0
        itemNameList = self.__dApi.getItemNameList(categoryName)
        aR = self.__getRelativesAdjacent(itemNameList)

        #
        for k, v in aR.items():
            logger.debug("%s relatives %s", k, v)
        #
        adjacentCategories = []
        adjacentCategories.append(categoryName)
        if neighborCategoryList is None:
            adjacentCategories.extend(self.__getParentCategories(itemNameList))
            adjacentCategories.extend(self.__getChildCategories(itemNameList))
        else:
            adjacentCategories.extend(neighborCategoryList)
            #
        adjacentCategories = sorted(list(set(adjacentCategories)))

        if maxCategories is not None:
            adjacentCategories = adjacentCategories[:maxCategories]

        logger.debug("adjacent categories %s", adjacentCategories)
        #
        oL = []
        oL.append("digraph %s {" % categoryName)
        #  Some previous layout parameters --- jdw
        #        oL.append('splines=true; overlap=portho; model=subset;')
        #        oL.append('splines=ortho; overlap=compress; model=subset; ratio=1.0;')
        oL.append("splines=true; overlap=compress; ")
        #
        # Option graph title -
        if graphTitle is not None:
            if titleFormat == "text":
                # dot instructions do not recognize the font settings.
                # oL.append('graph [labelloc=b, labeljust=left, labelfontname=Helvetica, labelfontsize=18, label="%s"];' % (self.__titleFontFace, self.__titleFontSize, graphTitle))
                oL.append('graph [labelloc=b, labeljust=left, labelfontname=%s, labelfontsize=%s, label="%s"];' % (self.__titleFontFace, self.__titleFontSize, graphTitle))
            else:
                # Title is rendered with this font detail.

                titleText = '<FONT POINT-SIZE="%s" FACE="%s">%s</FONT>' % (self.__titleFontSize, self.__titleFontFace, graphTitle)
                if graphSubTitle is not None:
                    titleText += '<FONT POINT-SIZE="%s" FACE="%s"> <br/> %s</FONT>' % (self.__subTitleFontSize, self.__titleFontFace, graphSubTitle)
                oL.append("graph [labelloc=b, label=<%s>];" % (titleText))

        oL.append("node [shape=plaintext]")

        for catName in adjacentCategories:
            if filterDelivery and not self.__isCategoryUsed(categoryName=catName, deliveryType=deliveryType):
                continue
            if catName == categoryName:
                highLight = "current"
            else:
                highLight = "adjacent"
            relatedList = self.__getRelatedList(categoryName=catName, adjacentD=aR)
            oL.extend(self.__renderCategory(catName, fkList=relatedList, highLight=highLight, maxItems=maxItems, filterDelivery=filterDelivery, deliveryType=deliveryType))
            numCategoriesRendered += 1

        #  --------
        # JDW regenerate full item list -
        #
        itemNameList = []
        for catName in adjacentCategories:
            itemNameList.extend(self.__dApi.getItemNameList(catName))
        aR = {}
        aR = self.__getRelativesAdjacent(itemNameList)
        #
        # --------
        lD = {}
        #
        for itemName in itemNameList:
            if filterDelivery and not self.__isItemUsed(itemName=itemName, deliveryType=deliveryType):
                continue
            tD = aR[itemName]
            for parentItemName in tD["parentItems"]:
                if filterDelivery and not self.__isItemUsed(itemName=parentItemName, deliveryType=deliveryType):
                    continue
                catName = CifName.categoryPart(itemName)
                attName = CifName.attributePart(itemName)
                catParent = CifName.categoryPart(parentItemName)
                attParent = CifName.attributePart(parentItemName)
                #
                if (itemName, parentItemName) in lD:
                    continue
                if catParent not in adjacentCategories:
                    continue
                if catParent != catName:
                    oL.append(" _%s:__%s:w -> _%s:__%s:w;" % (catName, attName, catParent, attParent))
                else:
                    oL.append(" _%s:__%s:e -> _%s:__%s:e;" % (catName, attName, catParent, attParent))

                lD[(itemName, parentItemName)] = 1
                lD[(parentItemName, itemName)] = 1

            for childItemName in tD["childItems"]:
                if filterDelivery and not self.__isItemUsed(itemName=childItemName, deliveryType=deliveryType):
                    continue
                catName = CifName.categoryPart(itemName)
                attName = CifName.attributePart(itemName)
                catChild = CifName.categoryPart(childItemName)
                attChild = CifName.attributePart(childItemName)
                #
                if (itemName, childItemName) in lD:
                    continue
                if catChild not in adjacentCategories:
                    continue
                if catChild != catName:
                    oL.append(" _%s:__%s:w -> _%s:__%s:w;" % (catChild, attChild, catName, attName))
                else:
                    oL.append(" _%s:__%s:e -> _%s:__%s:e;" % (catChild, attChild, catName, attName))

                lD[(itemName, childItemName)] = 1
                lD[(childItemName, itemName)] = 1

        oL.append("}")
        return oL, numCategoriesRendered
Example #21
0
    def makeSchemaDef(self):
        sD = {}
        for tableName in self.__tableNameList:
            if tableName in [
                    "rcsb_columninfo", "columninfo", "tableinfo",
                    "rcsb_tableinfo"
            ]:
                continue
            dD = {}
            tableAbbrev = self.__getTableAbbrev(tableName)
            tU = tableAbbrev.upper()
            dD["SCHEMA_ID"] = tU
            dD["SCHEMA_NAME"] = tableAbbrev
            dD["SCHEMA_TYPE"] = "transactional"
            dD["ATTRIBUTES"] = {}
            dD["ATTRIBUTE_INFO"] = {}
            dD["ATTRIBUTE_MAP"] = {}
            #
            # create a sub list for this table -
            infoL = []
            for atD in self.__atDefList:
                if atD["table_name"] == tableName:
                    infoL.append(atD)
            #
            mapD = {}
            for atD in self.__atMapList:
                if atD["target_table_name"] == tableName:
                    attributeName = atD["target_attribute_name"]
                    attributeAbbrev = self.__getAttributeAbbrev(
                        tableName, attributeName)
                    atU = attributeAbbrev.upper()
                    itN = atD["source_item_name"] if atD[
                        "source_item_name"] not in ["?", "."] else None
                    if itN is not None:
                        catNameM = CifName.categoryPart(itN)
                        attNameM = CifName.attributePart(itN)
                    else:
                        catNameM = None
                        attNameM = None

                    # cId = atD['condition_id'] if atD['condition_id'] not in ['?', '.'] else None
                    fId = atD["function_id"] if atD["function_id"] not in [
                        "?", "."
                    ] else None
                    if fId is not None and catNameM is None:
                        mapD[atU] = (catNameM, attNameM, fId, None)
                    else:
                        mapD[atU] = (catNameM, attNameM, fId, None)

            #
            try:
                indexList = []
                for (ii, atD) in enumerate(infoL):
                    attributeName = atD["attribute_name"]
                    attributeAbbrev = self.__getAttributeAbbrev(
                        tableName, attributeName)
                    atU = attributeAbbrev.upper()
                    #
                    td = {}
                    # 'data_type','index_flag','null_flag','width','precision','populated'
                    td["APP_TYPE"] = self.__convertDataType(atD["data_type"],
                                                            aWidth=int(
                                                                atD["width"]))
                    td["WIDTH"] = int(atD["width"])
                    td["PRECISION"] = int(atD["precision"])
                    td["NULLABLE"] = not self.__toBool(atD["null_flag"])
                    td["PRIMARY_KEY"] = self.__toBool(atD["index_flag"])
                    td["ORDER"] = ii + 1
                    if td["PRIMARY_KEY"]:
                        indexList.append(atU)
                    dD["ATTRIBUTES"][atU] = attributeAbbrev
                    dD["ATTRIBUTE_INFO"][atU] = td
                    dD["ATTRIBUTE_MAP"][atU] = mapD[atU]
            except Exception as e:
                logger.error("Failing for table %r attribute %r", tableName,
                             attributeName)
                logger.exception("Failing with %s", str(e))

            #
            if self.__verbose and len(indexList) > 16:
                logger.debug(
                    "+WARNING - %s index list exceeds MySQL max length %d",
                    tableName, len(indexList))
            mergeDict = {}
            deleteAttributeList = []
            for atU in indexList:
                tN = dD["ATTRIBUTE_MAP"][atU][0]
                aN = dD["ATTRIBUTE_MAP"][atU][1]
                fN = dD["ATTRIBUTE_MAP"][atU][2]
                if aN is not None:
                    if tN not in mergeDict:
                        mergeDict[tN] = []
                    mergeDict[tN].append(aN)
                #
                # Using RCSB convention of including one attribute in each table corresponding to the datablockId()
                #   this attributeId is used a key pre-insert deletions.
                #
                if fN in ["datablockid()"]:
                    deleteAttributeList.append(atU)
            #
            # Assign a merge index to this instance category
            #
            for k, v in mergeDict.items():
                dD["MAP_MERGE_INDICES"] = {
                    k: {
                        "TYPE": "EQUI-JOIN",
                        "ATTRIBUTES": tuple(v)
                    }
                }

            if deleteAttributeList:
                dD["SCHEMA_DELETE_ATTRIBUTE"] = deleteAttributeList[0]
                dD["INDICES"] = {
                    "p1": {
                        "TYPE": "UNIQUE",
                        "ATTRIBUTES": tuple(indexList)
                    },
                    "s1": {
                        "TYPE": "SEARCH",
                        "ATTRIBUTES": tuple(deleteAttributeList)
                    }
                }
            else:
                dD["INDICES"] = {
                    "p1": {
                        "TYPE": "UNIQUE",
                        "ATTRIBUTES": tuple(indexList)
                    }
                }
                logger.debug("+WARNING - No delete attribute for table %s",
                             tableName)

            if not mergeDict:
                logger.debug("+WARNING - No merge index possible for table %s",
                             tableName)

            sD[tU] = dD
        return sD
    def testClassifyByGroup(self):
        """Test case -  organize dictionary items by classes: SAMPLE, MX, NMR, EM, STRUCTURE, and DB"""
        try:
            myIo = IoAdapter(raiseExceptions=True)
            self.__containerList = myIo.readFile(
                inputFilePath=self.__pathPdbxDictionary)
            dApi = DictionaryApi(containerList=self.__containerList,
                                 consolidate=True,
                                 verbose=self.__verbose)
            #
            itemList = []
            groupList = dApi.getCategoryGroups()
            categoryList = dApi.getCategoryList()
            for category in categoryList:
                itemList.extend(dApi.getItemNameList(category))
            itemList = sorted(set(itemList))

            logger.info("Total category length %d", len(categoryList))
            logger.info("Total definition length %d", len(itemList))

            logger.info("group length %s", len(groupList))
            logger.debug("groupList %r", groupList)
            #
            findUnlinked = False
            if findUnlinked:
                tSet = set(["pdbx_group", "inclusive_group"])
                for category in categoryList:
                    gList = dApi.getCategoryGroupList(category)
                    gSet = set(gList)
                    if gSet == tSet:
                        logger.info("unqualified %s", category)

                    # logger.info("%s -> %r", category, gList)
                    if not gList:
                        logger.info("--- No category group assignment for %s",
                                    category)
            #
            classD = {}
            # Add category group members -
            for groupName, className in self.__groupClassTupL:
                categoryL = dApi.getCategoryGroupCategories(
                    groupName, followChildren=True)
                for category in categoryL:
                    classD.setdefault(className, []).extend(
                        dApi.getItemNameList(category))
            #
            # Add unlinked categories
            #
            for category, className in self.__unlinkedCategoryClassTup:
                classD.setdefault(className,
                                  []).extend(dApi.getItemNameList(category))
            #
            sumItem = 0
            classItemD = {}
            for className, itemL in classD.items():
                numItem = len(set(itemL))
                sumItem += numItem
                logger.info("class %s items %d", className, len(set(itemL)))
                for item in itemL:
                    classItemD[item] = True
            #
            logger.info("Sum classified items is %d", sumItem)
            logger.info("classified items %d", len(classItemD))
            #
            logger.debug("classItemD.items() %r",
                         list(classItemD.items())[:10])

            missingGroupL = []

            jj = 0
            for item in itemList:
                if item not in classItemD:
                    jj += 1
                    category = CifName.categoryPart(item)
                    logger.info("%d item %r category %r", jj, item, category)
                    missingGroupL.extend(dApi.getCategoryGroupList(category))
            #
            logger.info("missing groups %r", sorted(set(missingGroupL)))

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Example #23
0
    def __fetchIncludedContent(self, includeD, cleanup=False):
        """Fetch included content following the instructions encoded in the input data structure.

        Args:
            includeD (dict):  {"dictionaryIncludeDict": {dictionary_id: {...include details...}},
                               "categoryIncludeDict": {dictionary_id: {category_id: {...include details... }}},
                               "itemIncludeDict": {dictionary_id: {category_id: {itemName: {...include details...}}}},
                               }
            cleanup (optional, bool): flag to remove generator category objects after parsing (default: false)

        Returns:
            (dict): {datablockName: {"extend": [container,...], "replace": [container, ...]}, ... }

        """

        includeDataD = {}
        try:
            for datablockName, inclD in includeD.items():
                cL = []
                for dictName, iD in inclD["dictionaryIncludeDict"].items():
                    locator = iD["dictionary_locator"]
                    if locator in self.__locatorIndexD:
                        logger.info("Skipping redundant include for %r at %r", dictName, locator)
                        continue
                    self.__locatorIndexD[locator] = dictName
                    #
                    # --- Fetch the dictionary component -
                    #
                    updateStack = self.__isLocal(locator)
                    if updateStack:
                        if not self.__dirStack:
                            # top-level include case
                            self.__dirStack.append(os.path.abspath(self.__dirPath))

                        # embedded include case (push directory containing the locator)
                        if not os.path.isabs(locator):
                            # handle the relative path case -
                            locator = os.path.abspath(os.path.join(self.__dirStack[-1], locator))
                            logger.debug("modified local relative locator is %r", locator)
                        self.__dirStack.append(os.path.dirname(locator))
                        logger.debug("dirStack (%d) top %r", len(self.__dirStack), self.__dirStack[-1])
                    containerList = self.processIncludedContent(self.__fetchLocator(locator), cleanup=cleanup)
                    if updateStack:
                        # restore stack context
                        self.__dirStack.pop()
                    #
                    nsPrefix = iD["dictionary_namespace_prefix"]
                    nsPrefixReplace = iD["dictionary_namespace_prefix_replace"]
                    dictInclMode = iD["include_mode"]
                    dataIncludeMode = iD["data_include_mode"] if "data_include_mode" in iD else "extend"
                    catInclD = inclD["categoryIncludeDict"][dictName] if dictName in inclD["categoryIncludeDict"] else None
                    itemInclD = inclD["itemIncludeDict"][dictName] if dictName in inclD["itemIncludeDict"] else None
                    #
                    #  Do data sections first.
                    for container in containerList:
                        if container.getType() == "data":
                            logger.debug("Including data container %r with %r", container.getName(), container.getObjNameList())
                            cL.append((container, dataIncludeMode))
                    #
                    if catInclD or itemInclD:
                        # Process only explicitly included categories/items in the dictionary component
                        if catInclD:
                            for container in containerList:
                                if container.getType() == "data":
                                    continue
                                cName = container.getName()
                                catName = cName if container.isCategory() else CifName.categoryPart(cName)
                                #
                                if catName in catInclD:
                                    if container.isAttribute() and itemInclD and catName in itemInclD and cName in itemInclD[catName]:
                                        inclMode = itemInclD[catName][cName]["include_mode"] if itemInclD[catName][cName]["include_mode"] else dictInclMode
                                        cL.append((self.__renameItem(container, itemInclD[catName][cName]["include_as_item_name"]), inclMode))
                                    else:
                                        inclMode = catInclD[catName]["include_mode"] if catInclD[catName]["include_mode"] else dictInclMode
                                        cL.append((self.__renameCategory(container, catInclD[catName]["include_as_category_id"]), inclMode))
                        elif itemInclD:
                            # Process only explicitly included items exclusive of explicitly included categories in the dictionary component
                            for container in containerList:
                                if container.getType() == "data":
                                    continue
                                cName = container.getName()
                                catName = cName if container.isCategory() else CifName.categoryPart(cName)
                                #
                                if container.isAttribute() and catName in itemInclD and cName in itemInclD[catName]:
                                    inclMode = itemInclD[catName][cName]["include_mode"] if itemInclD[catName][cName]["include_mode"] else dictInclMode
                                    cL.append((self.__renameItem(container, itemInclD[catName][cName]["include_as_item_name"]), inclMode))
                    else:
                        # Process the full content of the dictionary component
                        for container in containerList:
                            if container.getType() == "data":
                                continue
                            cName = container.getName()
                            catName = cName if container.isCategory() else CifName.categoryPart(cName)
                            #
                            if container.isAttribute():
                                newName = self.__substituteItemPrefix(cName, nsPrefix, nsPrefixReplace)
                                cL.append((self.__renameItem(container, newName), dictInclMode))
                            else:
                                newName = self.__substituteCategoryPrefix(catName, nsPrefix, nsPrefixReplace)
                                cL.append((self.__renameCategory(container, newName), dictInclMode))
                #
                for container, inclMode in cL:
                    if inclMode == "replace":
                        includeDataD.setdefault(datablockName, {}).setdefault("replace", []).append(container)
                    elif inclMode == "extend":
                        logger.debug("%r extending with %r", datablockName, container.getName())
                        includeDataD.setdefault(datablockName, {}).setdefault("extend", []).append(container)
                #
            for nm in includeDataD:
                numReplace = len(includeDataD[nm]["replace"]) if "replace" in includeDataD[nm] else 0
                numExtend = len(includeDataD[nm]["extend"]) if "extend" in includeDataD[nm] else 0
                logger.debug("includeDataD %s replace (%d) extend (%d)", nm, numReplace, numExtend)
            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))

        return includeDataD