Ejemplo n.º 1
0
    def test_consolidate_dictionary(self, api_paths):
        myIo = IoAdapter(raiseExceptions=True)
        containerList = myIo.readFile(
            inputFilePath=str(api_paths['pathPdbxDictionary']))
        dApi = DictionaryApi(containerList=containerList, consolidate=True)

        for itemName in [
                '_entity.id', '_entity_poly_seq.num',
                '_atom_site.label_asym_id', '_struct_asym.id', '_chem_comp.id',
                'chem_comp_atom.comp_id', 'chem_comp_bond.comp_id'
        ]:
            categoryName = CifName.categoryPart(itemName)
            attributeName = CifName.attributePart(itemName)
            print("Full parent list for  %s : %s\n" %
                  (itemName, dApi.getFullParentList(categoryName,
                                                    attributeName)))
            print(
                "Full child  list for  %s : %s\n" %
                (itemName, dApi.getFullChildList(categoryName, attributeName)))
            print("Ultimate parent for  %s : %s\n" %
                  (itemName, dApi.getUltimateParent(categoryName,
                                                    attributeName)))
            print("Type code for  %s : %s\n" %
                  (itemName, dApi.getTypeCode(categoryName, attributeName)))
            assert dApi.getTypeCode(categoryName, attributeName) is not None
Ejemplo n.º 2
0
    def __assignItemIconType(self, itemNameList):
        iconTypeList = []
        #
        categoryName = CifName.categoryPart(itemNameList[0])
        keyItemNameList = self.__dApi.getCategoryKeyList(categoryName)
        for itemName in itemNameList:
            tType = "none"
            attributeName = CifName.attributePart(itemName)
            aMan = self.__dApi.getMandatoryCode(categoryName, attributeName) in ["yes", "y"]
            dMan = self.__dApi.getMandatoryCodeAlt(categoryName, attributeName, fallBack=False) in ["yes", "y"]
            inArchive = self.__getItemCount(itemName, deliveryType="archive") > 0
            inChemDict = self.__getItemCount(itemName, deliveryType="cc") > 0
            inBirdDict = self.__getItemCount(itemName, deliveryType="prd") > 0 or self.__getItemCount(itemName, deliveryType="family") > 0

            isKey = itemName in keyItemNameList
            if isKey:
                tType = "key"
            elif aMan and dMan:
                tType = "all-mandatory"
            elif aMan:
                tType = "mandatory"
            elif dMan:
                tType = "deposit-mandatory"
            if inArchive:
                tType += "+database"
            if inChemDict:
                tType += "+chem-dict"
            if inBirdDict:
                tType += "+bird-dict"
            iconTypeList.append(tType)

        return iconTypeList
Ejemplo n.º 3
0
 def __itemNameToDictList(self, itemNameList):
     rL = []
     for itemName in list(OrderedDict.fromkeys(itemNameList)):
         atName = CifName.attributePart(itemName)
         catName = CifName.categoryPart(itemName)
         rL.append({"CATEGORY": catName, "ATTRIBUTE": atName})
     return rL
Ejemplo n.º 4
0
    def testConsolidateDictionary(self):
        """Test case -  dump methods for dictionary metadata"""

        try:
            myIo = IoAdapter(raiseExceptions=True)
            self.__containerList = myIo.readFile(inputFilePath=self.__pathPdbxDictionary)
            dApi = DictionaryApi(containerList=self.__containerList, consolidate=True, expandItemLinked=False, verbose=self.__verbose)
            for itemName in [
                "_entity.id",
                "_entity_poly_seq.num",
                "_atom_site.label_asym_id",
                "_struct_asym.id",
                "_chem_comp.id",
                "chem_comp_atom.comp_id",
                "chem_comp_bond.comp_id",
            ]:
                categoryName = CifName.categoryPart(itemName)
                attributeName = CifName.attributePart(itemName)
                logger.debug("Full parent list for  %s : %s\n", itemName, dApi.getFullParentList(categoryName, attributeName))
                logger.debug("Full child  list for  %s : %s\n", itemName, dApi.getFullChildList(categoryName, attributeName))
                logger.debug("Ultimate parent for  %s : %s\n", itemName, dApi.getUltimateParent(categoryName, attributeName))
                logger.debug("Type code for  %s : %s\n", itemName, dApi.getTypeCode(categoryName, attributeName))
                self.assertIsNotNone(dApi.getTypeCode(categoryName, attributeName))
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Ejemplo n.º 5
0
    def testGetUcode(self):
        """Test case - Get all data items of type ucode
        """
        print("\n")

        try:
            myIo = IoAdapter(raiseExceptions=True)
            self.__containerList = myIo.readFile(
                inputFilePath=self.__pathPdbxDictionary)
            dApi = DictionaryApi(containerList=self.__containerList,
                                 consolidate=True,
                                 verbose=self.__verbose)

            logger.debug(
                "+++++++++++++++++++++++++++++++++++++++++++++++++++++++\n")
            catNameList = dApi.getCategoryList()
            for catName in catNameList:
                itemNameList = dApi.getItemNameList(catName)
                for itemName in itemNameList:
                    categoryName = CifName.categoryPart(itemName)
                    attributeName = CifName.attributePart(itemName)
                    code = dApi.getTypeCode(categoryName, attributeName)
                    if (code == "ucode"):
                        print("Table: ", categoryName, "\tColumn: ",
                              attributeName, "\tType: ", code)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Ejemplo n.º 6
0
 def __substituteItemPrefix(self, itemName, curPrefix, newPrefix):
     atName = CifName.attributePart(itemName)
     atName = atName.replace(
         curPrefix, newPrefix,
         1) if atName and atName.startswith(curPrefix) else atName
     catName = CifName.categoryPart(itemName)
     catName = catName.replace(
         curPrefix, newPrefix,
         1) if atName and catName.startswith(curPrefix) else catName
     return CifName.itemName(catName, atName)
Ejemplo n.º 7
0
 def __getChildCategories(self, itemNameList):
     childCategories = set()
     for itemName in itemNameList:
         categoryName = CifName.categoryPart(itemName)
         attributeName = CifName.attributePart(itemName)
         childItemList = self.__dApi.getFullChildList(categoryName, attributeName)
         for childItem in childItemList:
             childCategoryName = CifName.categoryPart(childItem)
             childCategories.add(childCategoryName)
     return list(childCategories)
Ejemplo n.º 8
0
 def __getParentCategories(self, itemNameList):
     parentCategories = set()
     for itemName in itemNameList:
         categoryName = CifName.categoryPart(itemName)
         attributeName = CifName.attributePart(itemName)
         parentItemList = self.__dApi.getFullParentList(categoryName, attributeName)
         for parentItem in parentItemList:
             parentCategoryName = CifName.categoryPart(parentItem)
             parentCategories.add(parentCategoryName)
     return list(parentCategories)
Ejemplo n.º 9
0
 def __getRelativesAdjacent(self, itemNameList):
     aR = {}
     for itemName in itemNameList:
         categoryName = CifName.categoryPart(itemName)
         attributeName = CifName.attributePart(itemName)
         tD = {}
         tD["parentItems"] = self.__dApi.getFullParentList(categoryName, attributeName)
         tD["childItems"] = self.__dApi.getFullChildList(categoryName, attributeName)
         aR[itemName] = tD
     # if (self.__verbose):
     #    for k,v in aR.items():
     #        logger.debug("Item %s\n       parents: %s\n       children  %s\n\n" % (k,v['parentItems'],v['childItems']))
     return aR
Ejemplo n.º 10
0
    def __getSliceChildren(self, sliceParentD):
        """Internal method to build data structure containing the parent-child relationships for the
        input slice parent construction.

        """
        retD = OrderedDict()
        for sliceName, sliceParents in sliceParentD.items():
            sD = OrderedDict()
            for pD in sliceParents:
                parentCategoryName = pD["CATEGORY_NAME"]
                parentAttributeName = pD["ATTRIBUTE_NAME"]
                #
                sD[parentCategoryName] = [{
                    "PARENT_CATEGORY_NAME":
                    parentCategoryName,
                    "PARENT_ATTRIBUTE_NAME":
                    parentAttributeName,
                    "CHILD_ATTRIBUTE_NAME":
                    parentAttributeName
                }]
                #
                # childItems = self.__dApi.getFullChildList(parentCategoryName, parentAttributeName)
                childItems = self.__dApi.getFullDescendentList(
                    parentCategoryName, parentAttributeName)
                # logger.info("Slice parent %s %s  %r" % (parentCategoryName, parentAttributeName, childItems))
                for childItem in childItems:
                    atName = CifName.attributePart(childItem)
                    catName = CifName.categoryPart(childItem)
                    # Ignore children in the parent category
                    if catName == parentCategoryName:
                        continue
                    if catName not in sD:
                        sD[catName] = []
                    sD[catName].append({
                        "PARENT_CATEGORY_NAME": parentCategoryName,
                        "PARENT_ATTRIBUTE_NAME": parentAttributeName,
                        "CHILD_ATTRIBUTE_NAME": atName
                    })
                # Sort the list of dictionaries for each category
                for catName in sD:
                    sD[catName] = sorted(
                        sD[catName],
                        key=lambda k:
                        (k["PARENT_CATEGORY_NAME"], k["PARENT_ATTRIBUTE_NAME"],
                         k["CHILD_ATTRIBUTE_NAME"]))

            retD[sliceName] = sD
        return retD
Ejemplo n.º 11
0
 def __getCategoryKeysWithReplacement(self, categoryName):
     if categoryName in self.__keyReplaceCategoryD:
         keyItems = [
             CifName.itemName(categoryName, atName)
             for atName in self.__keyReplaceCategoryD[categoryName]
         ]
     else:
         keyItems = self.__dApi.getCategoryKeyList(categoryName)
     return sorted(keyItems)
Ejemplo n.º 12
0
    def __getIncludeInstructions(self, containerList):
        """Extract include instructions from categories pdbx_include_dictionary,  pdbx_include_category, and pdbx_include_item.

        Returns:
            (dict): {"dictionaryIncludeDict": {dictionary_id: {...include details...}},
                     "categoryIncludeDict": {dictionary_id: {category_id: {...include details... }}},
                     "itemIncludeDict": {dictionary_id: {category_id: {itemName: {...include details...}}}}
                    }
        """
        includeD = OrderedDict()
        try:
            unNamed = 1
            for container in containerList:
                if container.getType() == "data":
                    dictionaryIncludeDict = OrderedDict()
                    categoryIncludeDict = OrderedDict()
                    itemIncludeDict = OrderedDict()
                    if container.getName():
                        datablockName = container.getName()
                    else:
                        datablockName = str(unNamed)
                        unNamed += 1
                    logger.debug("Adding data sections from container name %s  type  %s", datablockName, container.getType())
                    tl = container.getObj("pdbx_include_dictionary")
                    if tl is not None:
                        for row in tl.getRowList():
                            tD = OrderedDict()
                            for atName in ["dictionary_id", "dictionary_locator", "include_mode", "dictionary_namespace_prefix", "dictionary_namespace_prefix_replace"]:
                                tD[atName] = row[tl.getIndex(atName)] if tl.hasAttribute(atName) else None
                            dictionaryIncludeDict[tD["dictionary_id"]] = tD
                        #
                        tl = container.getObj("pdbx_include_category")
                        if tl is not None:
                            for row in tl.getRowList():
                                tD = OrderedDict()
                                for atName in ["dictionary_id", "category_id", "include_as_category_id", "include_mode"]:
                                    tD[atName] = row[tl.getIndex(atName)] if tl.hasAttribute(atName) else None
                                categoryIncludeDict.setdefault(tD["dictionary_id"], {}).setdefault(tD["category_id"], tD)
                        #
                        tl = container.getObj("pdbx_include_item")
                        if tl is not None:
                            for row in tl.getRowList():
                                tD = OrderedDict()
                                for atName in ["dictionary_id", "item_name", "include_as_item_name", "include_mode"]:
                                    tD[atName] = row[tl.getIndex(atName)] if tl.hasAttribute(atName) else None
                                categoryId = CifName.categoryPart(tD["item_name"])
                                itemIncludeDict.setdefault(tD["dictionary_id"], {}).setdefault(categoryId, {}).setdefault(tD["item_name"], tD)
                    includeD[datablockName] = {
                        "dictionaryIncludeDict": dictionaryIncludeDict,
                        "categoryIncludeDict": categoryIncludeDict,
                        "itemIncludeDict": itemIncludeDict,
                    }
        except Exception as e:
            logger.exception("Include processing failing with %s", str(e))
        return includeD
Ejemplo n.º 13
0
    def setItemCounts(self, itemNameD, deliveryType="archive"):
        for itemName, itemCount in itemNameD.items():
            self.__itemCounts[deliveryType][itemName] = itemCount
            categoryName = CifName.categoryPart(itemName)
            if categoryName not in self.__categoryCounts[deliveryType]:
                self.__categoryCounts[deliveryType][categoryName] = itemCount
            else:
                self.__categoryCounts[deliveryType][categoryName] = max(itemCount, self.__categoryCounts[deliveryType][categoryName])

        logger.debug("+NeighborFigures.setItemCounts() items      in archive count %d", len(self.__itemCounts[deliveryType]))
        logger.debug("+NeighborFigures.setItemCounts() categories in archive count %d", len(self.__categoryCounts[deliveryType]))
Ejemplo n.º 14
0
 def __renameCategory(self, container, newCategoryName):
     if not container and not container.isCategory() or not newCategoryName:
         return container
     #
     catNameCur = container.getName()
     if catNameCur == newCategoryName:
         return container
     try:
         for item in self.__categoryIdRelatives:
             catName = CifName.categoryPart(item)
             if container.exists(catName):
                 cObj = container.getObj(catName)
                 atName = CifName.attributePart(item)
                 if cObj.hasAttribute(atName):
                     for iRow in range(cObj.getRowCount()):
                         testVal = cObj.getValue(atName, iRow)
                         if testVal == catNameCur:
                             cObj.setValue(newCategoryName, atName, iRow)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
     return container
Ejemplo n.º 15
0
    def __makeKeyItem(self, catName, attName, keyItemList, iDef):
        itemName = CifName.itemName(catName, attName)

        #
        dc = DataCategory("definition")
        dc.appendAttribute("id")
        dc.appendAttribute("scope")
        dc.appendAttribute("class")
        dc.appendAttribute("update")
        dc.append([itemName, "Item", "Single", "2013-08-22"])
        iDef.append(dc)
        #
        dc = DataCategory("description")
        dc.appendAttribute("text")
        dc.append(["synthentic componsite key"])
        iDef.append(dc)
        #
        dc = DataCategory("name")
        dc.appendAttribute("category_id")
        dc.appendAttribute("object_id")
        dc.append([catName, attName])
        iDef.append(dc)
        tcontainer = "Set"
        purpose = "Composite"
        source = "Derived"
        contents = "Name"
        dimension = "[%d]" % len(keyItemList)
        #

        dc = DataCategory("type")
        dc.appendAttribute("purpose")
        dc.appendAttribute("source")
        dc.appendAttribute("contents")
        dc.appendAttribute("container")
        dc.appendAttribute("dimension")
        dc.append([purpose, source, contents, tcontainer, dimension])
        iDef.append(dc)

        dc = DataCategory("method")
        dc.appendAttribute("purpose")
        dc.appendAttribute("expression")

        tmpl = """

                      With row as %s

                           %s = [%s]

        """
        mText = tmpl % (catName, itemName, ",".join(keyItemList))
        dc.append(["Evaluation", mText])
        iDef.append(dc)
Ejemplo n.º 16
0
    def _makeKeyItem(self, catName, attName, keyItemList, iDef):
        itemName = CifName.itemName(catName, attName)

        #
        dc = DataCategory("definition")
        dc.appendAttribute("id")
        dc.appendAttribute("scope")
        dc.appendAttribute("class")
        dc.appendAttribute("update")
        dc.append([itemName, "Item", "Single", "2013-08-22"])
        iDef.append(dc)
        #
        dc = DataCategory("description")
        dc.appendAttribute("text")
        dc.append(['synthentic componsite key'])
        iDef.append(dc)
        #
        dc = DataCategory("name")
        dc.appendAttribute("category_id")
        dc.appendAttribute("object_id")
        dc.append([catName, attName])
        iDef.append(dc)
        tcontainer = 'Set'
        purpose = 'Composite'
        source = 'Derived'
        contents = 'Name'
        dimension = '[%d]' % len(keyItemList)
        #

        dc = DataCategory("type")
        dc.appendAttribute("purpose")
        dc.appendAttribute("source")
        dc.appendAttribute("contents")
        dc.appendAttribute("container")
        dc.appendAttribute("dimension")
        dc.append([purpose, source, contents, tcontainer, dimension])
        iDef.append(dc)

        dc = DataCategory("method")
        dc.appendAttribute("purpose")
        dc.appendAttribute("expression")

        tmpl = '''

                      With row as %s

                           %s = [%s]

        '''
        mText = tmpl % (catName, itemName, ','.join(keyItemList))
        dc.append(['Evaluation', mText])
        iDef.append(dc)
Ejemplo n.º 17
0
    def __buildCategoryDefinition(self, name, description, keyAttributeNames,
                                  examples, contexts):
        """Construct an attribute definition from input attribute dictionary
            containing metadata extracted from the XML schema, and from the
            input schema name mapping dictionary.

        Args:
            name (str): category name
            description (str): category description
            keyAttributeNames (list): key attribute names
            examples (list): category examples
            contexts (list): category contexts

        Returns:
            Definition container (object):

        """
        defC = DefinitionContainer(name)
        #
        dc = DataCategory(
            "category",
            attributeNameList=["id", "description", "mandatory_code"])
        dc.append([name, description, "no"])
        defC.append(dc)
        #
        dc = DataCategory("category_key", attributeNameList=["name"])
        for keyAttributeName in keyAttributeNames:
            keyItemName = CifName.itemName(name, keyAttributeName)
            dc.append([keyItemName])
        defC.append(dc)

        dc = DataCategory("category_group", attributeNameList=["id"])
        dc.append(["inclusive_group"])
        dc.append(["validation_report_group"])
        defC.append(dc)
        # pdbx_category_context
        dc = DataCategory("pdbx_category_context",
                          attributeNameList=["category_id", "type"])
        for cType in contexts:
            dc.append([name, cType])
        defC.append(dc)
        #
        dc = DataCategory("category_examples",
                          attributeNameList=["detail", "case"])
        for example in examples:
            dc.append([".", example])
        defC.append(dc)

        return defC
Ejemplo n.º 18
0
    def __getRelatedList(self, categoryName, adjacentD):
        #
        relatedSet = set()
        for itemName, adjD in adjacentD.items():
            cName = CifName.categoryPart(itemName)
            if len(adjD["parentItems"]) > 0:
                if cName == categoryName:
                    relatedSet.add(itemName)
                for parentItemName in adjD["parentItems"]:
                    pName = CifName.categoryPart(parentItemName)
                    if pName == categoryName:
                        relatedSet.add(parentItemName)
            if len(adjD["childItems"]) > 0:
                if cName == categoryName:
                    relatedSet.add(itemName)
                for childItemName in adjD["childItems"]:
                    chName = CifName.categoryPart(childItemName)
                    if chName == categoryName:
                        relatedSet.add(childItemName)
        relatedList = sorted(list(relatedSet))

        logger.debug("%s items with parent/child relationships %s", categoryName, len(relatedList))
        #
        return relatedList
Ejemplo n.º 19
0
 def __getCategoryFeatures(self, catName, unitCardinalityList,
                           subCategoryD):
     cD = {"KEY_ATTRIBUTES": []}
     # cD['KEY_ATTRIBUTES'] = [CifName.attributePart(keyItem) for keyItem in self.__dApi.getCategoryKeyList(catName)]
     cD["KEY_ATTRIBUTES"] = [
         CifName.attributePart(keyItem)
         for keyItem in self.__getCategoryKeysWithReplacement(catName)
     ]
     cD["UNIT_CARDINALITY"] = catName in unitCardinalityList
     cD["CONTENT_CLASSES"] = self.__getContentClasses(catName)
     cD["IS_MANDATORY"] = True if str(
         self.__dApi.getCategoryMandatoryCode(
             catName)).lower() == "yes" else False
     cD["SUB_CATEGORIES"] = subCategoryD[
         catName] if catName in subCategoryD else []
     #
     return cD
Ejemplo n.º 20
0
 def __getCategoryFeatures(self, catName, unitCardinalityList,
                           subCategoryD):
     cD = {"KEY_ATTRIBUTES": []}
     # cD['KEY_ATTRIBUTES'] = [CifName.attributePart(keyItem) for keyItem in self.__dApi.getCategoryKeyList(catName)]
     cD["KEY_ATTRIBUTES"] = [
         CifName.attributePart(keyItem)
         for keyItem in self.__getCategoryKeysWithReplacement(catName)
     ]
     cD["UNIT_CARDINALITY"] = catName in unitCardinalityList
     cD["CONTENT_CLASSES"] = self.__getContentClasses(catName)
     #
     # Exclude all categories beginning with "ma_" from being mandatory
     # (temporarily hardcoded here until new configuration file section added to achieve same effect)
     cD["IS_MANDATORY"] = True if str(
         self.__dApi.getCategoryMandatoryCode(catName)).lower(
         ) == "yes" and not catName.startswith("ma_") else False
     # cD["IS_MANDATORY"] = True if str(self.__dApi.getCategoryMandatoryCode(catName)).lower() == "yes" else False
     #
     cD["SUB_CATEGORIES"] = subCategoryD[
         catName] if catName in subCategoryD else []
     #
     return cD
Ejemplo n.º 21
0
    def __getUnitCardinalityCategories(self, parentDList):
        """Assign categories with unit cardinality relative to the input list of parent key items.

        parentDList (dict):  [{'CATEGORY_NAME':xxx 'ATTRIBUTE_NAME': xxxx}]

        Return: category name list
        """
        numParents = len(parentDList)
        logger.debug("Parent slice count %d def %r", numParents, parentDList)
        ucL = []
        #
        #  Find the common set of child categories for the input parent items
        comCatList = []
        for pD in parentDList:
            catList = [pD["CATEGORY_NAME"]]
            for childItem in self.__dApi.getFullChildList(
                    pD["CATEGORY_NAME"], pD["ATTRIBUTE_NAME"]):
                childCategoryName = CifName.categoryPart(childItem)
                primaryKeyItemList = self.__dApi.getCategoryKeyList(
                    childCategoryName)
                logger.debug("child category %r primary key items  %r",
                             childCategoryName, primaryKeyItemList)
                # child must be part of the primary key to be a candidate
                if childItem in primaryKeyItemList:
                    catList.append(childCategoryName)
            if comCatList:
                comCatList = list(set(catList) & set(comCatList))
            else:
                comCatList.extend(catList)
        logger.debug("Common category list %r", comCatList)
        for cat in comCatList:
            primaryKeyItemList = self.__dApi.getCategoryKeyList(cat)
            if len(primaryKeyItemList) == numParents:
                ucL.append(cat)
        #
        logger.debug(
            "Slice unit cardinality categories from parent-child relationships %r",
            ucL)
        return sorted(ucL)
Ejemplo n.º 22
0
    def testClassifyByGroup(self):
        """Test case -  organize dictionary items by classes: SAMPLE, MX, NMR, EM, STRUCTURE, and DB"""
        try:
            myIo = IoAdapter(raiseExceptions=True)
            self.__containerList = myIo.readFile(
                inputFilePath=self.__pathPdbxDictionary)
            dApi = DictionaryApi(containerList=self.__containerList,
                                 consolidate=True,
                                 verbose=self.__verbose)
            #
            itemList = []
            groupList = dApi.getCategoryGroups()
            categoryList = dApi.getCategoryList()
            for category in categoryList:
                itemList.extend(dApi.getItemNameList(category))
            itemList = sorted(set(itemList))

            logger.info("Total category length %d", len(categoryList))
            logger.info("Total definition length %d", len(itemList))

            logger.info("group length %s", len(groupList))
            logger.debug("groupList %r", groupList)
            #
            findUnlinked = False
            if findUnlinked:
                tSet = set(["pdbx_group", "inclusive_group"])
                for category in categoryList:
                    gList = dApi.getCategoryGroupList(category)
                    gSet = set(gList)
                    if gSet == tSet:
                        logger.info("unqualified %s", category)

                    # logger.info("%s -> %r", category, gList)
                    if not gList:
                        logger.info("--- No category group assignment for %s",
                                    category)
            #
            classD = {}
            # Add category group members -
            for groupName, className in self.__groupClassTupL:
                categoryL = dApi.getCategoryGroupCategories(
                    groupName, followChildren=True)
                for category in categoryL:
                    classD.setdefault(className, []).extend(
                        dApi.getItemNameList(category))
            #
            # Add unlinked categories
            #
            for category, className in self.__unlinkedCategoryClassTup:
                classD.setdefault(className,
                                  []).extend(dApi.getItemNameList(category))
            #
            sumItem = 0
            classItemD = {}
            for className, itemL in classD.items():
                numItem = len(set(itemL))
                sumItem += numItem
                logger.info("class %s items %d", className, len(set(itemL)))
                for item in itemL:
                    classItemD[item] = True
            #
            logger.info("Sum classified items is %d", sumItem)
            logger.info("classified items %d", len(classItemD))
            #
            logger.debug("classItemD.items() %r",
                         list(classItemD.items())[:10])

            missingGroupL = []

            jj = 0
            for item in itemList:
                if item not in classItemD:
                    jj += 1
                    category = CifName.categoryPart(item)
                    logger.info("%d item %r category %r", jj, item, category)
                    missingGroupL.extend(dApi.getCategoryGroupList(category))
            #
            logger.info("missing groups %r", sorted(set(missingGroupL)))

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Ejemplo n.º 23
0
    def test_gen_ddlm(self, in_tmpdir, test_files):
        myIo = IoAdapterPy()
        containerList = myIo.readFile(
            inputFilePath=str(test_files / 'mmcif_pdbx_v5_next.dic'))
        dApi = DictionaryApi(containerList=containerList, consolidate=True)
        parentD = dApi.getParentDictionary()
        #
        oCList = []
        dDef = DataContainer('mmcif_pdbx_ddlm_auto')
        dc = DataCategory("dictionary")
        dc.appendAttribute("title")
        dc.appendAttribute("class")
        dc.appendAttribute("version")
        dc.appendAttribute("date")
        dc.appendAttribute("ddl_conformance")
        dc.appendAttribute("text")
        dc.append([
            'mmcif_pdbx_ddlm_auto', 'Instance', 'latest', '2018-03-09',
            'ddlm best effort',
            'Software converted PDBx dictionary using DDLm semantics'
        ])
        dDef.append(dc)
        oCList.append(dDef)

        catIdx = dApi.getCategoryIndex()
        for catName in sorted(catIdx.keys()):
            attNameList = catIdx[catName]
            # created definition container -
            cDef = DefinitionContainer(catName)
            oCList.append(cDef)
            #
            dc = DataCategory("definition")
            dc.appendAttribute("id")
            dc.appendAttribute("scope")
            dc.appendAttribute("class")
            dc.appendAttribute("update")
            dc.append([catName, "Category", "Loop", "2018-03-09"])
            cDef.append(dc)
            val = dApi.getCategoryDescription(category=catName)
            dc = DataCategory("description")
            dc.appendAttribute("text")
            dc.append([val])
            cDef.append(dc)
            #
            dc = DataCategory("name")
            dc.appendAttribute("category_id")
            dc.appendAttribute("object_id")

            valList = dApi.getCategoryGroupList(category=catName)
            pcg = catName
            for val in valList:
                if val != 'inclusive_group':
                    pcg = val
                    break
            dc.append([catName, pcg])
            cDef.append(dc)

            valList = dApi.getCategoryKeyList(category=catName)
            if len(valList) < 1:
                print("Missing caegory key for category %s\n" % catName)
            else:
                dc = DataCategory("category")
                dc.appendAttribute("key_id")
                kItemName = CifName.itemName(catName, "synthetic_key")
                dc.append([kItemName])
                cDef.append(dc)

                iDef = DefinitionContainer(kItemName)
                self._makeKeyItem(catName, "synthetic_key", valList, iDef)
                oCList.append(iDef)

            for attName in attNameList:
                itemName = CifName.itemName(catName, attName)
                iDef = DefinitionContainer(itemName)

                oCList.append(iDef)

                #
                dc = DataCategory("definition")
                dc.appendAttribute("id")
                dc.appendAttribute("scope")
                dc.appendAttribute("class")
                dc.appendAttribute("update")
                dc.append([itemName, "Item", "Single", "2013-08-22"])
                iDef.append(dc)
                #
                val = dApi.getDescription(category=catName, attribute=attName)
                dc = DataCategory("description")
                dc.appendAttribute("text")
                dc.append([val])
                iDef.append(dc)
                #
                dc = DataCategory("name")
                dc.appendAttribute("category_id")
                dc.appendAttribute("object_id")
                #
                if itemName in parentD:
                    dc.appendAttribute("linked_item_id")
                    dc.append([catName, attName, parentD[itemName][0]])
                else:
                    dc.append([catName, attName])
                iDef.append(dc)
                #
                #
                aliasList = dApi.getItemAliasList(category=catName,
                                                  attribute=attName)
                if len(aliasList) > 0:
                    dc = DataCategory("alias")
                    dc.appendAttribute("definition_id")
                    for alias in aliasList:
                        dc.append([alias[0]])
                    iDef.append(dc)

                enList = dApi.getEnumListAltWithDetail(category=catName,
                                                       attribute=attName)

                tC = dApi.getTypeCode(category=catName, attribute=attName)
                tcontainer = 'Single'
                purpose = 'Describe'
                source = 'Recorded'
                contents = 'Text'
                #
                if tC is None:
                    self.__lfh.write("Missing data type attribute %s\n" %
                                     attName)
                elif tC in [
                        'code', 'atcode', 'name', 'idname', 'symop', 'fax',
                        'phone', 'email', 'code30', 'ec-type'
                ]:
                    purpose = 'Encode'
                    contents = 'Text'
                    source = 'Assigned'
                elif tC in ['ucode']:
                    purpose = 'Encode'
                    contents = 'Code'
                    source = 'Assigned'
                elif tC in ['line', 'uline', 'text']:
                    purpose = 'Describe'
                    source = 'Recorded'
                    contents = 'Text'
                elif tC in ['int']:
                    purpose = 'Number'
                    source = 'Recorded'
                    contents = 'Integer'
                elif tC in ['int-range']:
                    purpose = 'Number'
                    source = 'Recorded'
                    contents = 'Range'
                elif tC in ['float']:
                    purpose = 'Measurand'
                    source = 'Recorded'
                    contents = 'Real'
                elif tC in ['float-range']:
                    purpose = 'Measurand'
                    source = 'Recorded'
                    contents = 'Range'
                elif tC.startswith('yyyy'):
                    source = 'Assigned'
                    contents = 'Date'
                    purpose = 'Describe'

                if len(enList) > 0:
                    purpose = 'State'

                dc = DataCategory("type")
                dc.appendAttribute("purpose")
                dc.appendAttribute("source")
                dc.appendAttribute("contents")
                dc.appendAttribute("container")
                dc.append([purpose, source, contents, tcontainer])
                iDef.append(dc)
                #
                if (len(enList) > 0):
                    dc = DataCategory("enumeration_set")
                    dc.appendAttribute("state")
                    dc.appendAttribute("detail")
                    for en in enList:
                        dc.append([en[0], en[1]])
                    iDef.append(dc)

                dfv = dApi.getDefaultValue(category=catName, attribute=attName)
                bvList = dApi.getBoundaryList(category=catName,
                                              attribute=attName)
                if (((dfv is not None) and (dfv not in ['?', '.']))
                        or len(bvList) > 0):
                    row = []
                    dc = DataCategory("enumeration")
                    if dfv is not None:
                        dc.appendAttribute("default")
                        row.append(dfv)
                    if len(bvList) > 0:
                        dc.appendAttribute("range")
                        mminVp = -1000000
                        mmaxVp = 10000000
                        mminV = mmaxVp
                        mmaxV = mminVp
                        for bv in bvList:
                            minV = float(bv[0]) if bv[0] != '.' else mminVp
                            maxV = float(bv[1]) if bv[1] != '.' else mmaxVp
                            mminV = min(mminV, minV)
                            mmaxV = max(mmaxV, maxV)
                        if mminV == mminVp:
                            mminV = ''
                        if mmaxV == mmaxVp:
                            mmaxV = ''
                        row.append(str(mminV) + ":" + str(mmaxV))

                    dc.append(row)
                    iDef.append(dc)

        myIo.writeFile(outputFilePath="mmcif_pdbx_ddlm_auto.dic",
                       containerList=oCList)
Ejemplo n.º 24
0
    def getItemRelatedList(self, itemName):
        categoryName = CifName.categoryPart(itemName)
        attributeName = CifName.attributePart(itemName)

        return self._dApi.getItemRelatedList(categoryName, attributeName)
Ejemplo n.º 25
0
    def getCategoryPdbxItemEnum(self, itemName):
        """Returns any DepUI enum list"""

        categoryName = CifName.categoryPart(itemName)
        attributeName = CifName.attributePart(itemName)
        return self._dApi.getEnumListAlt(categoryName, attributeName)
Ejemplo n.º 26
0
    def __fetchIncludedContent(self, includeD):
        """Fetch included content following the instructions encoded in the input data structure.

        Args:
            includeD (dict): (dict): {"dictionaryIncludeDict": {dictionary_id: {...include details...}},
                                      "categoryIncludeDict": {dictionary_id: {category_id: {...include details... }}},
                                      "itemIncludeDict": {dictionary_id: {category_id: {itemName: {...include details...}}}}
                                       }

        Returns:
            (dict): {datablockName: {"extend": [container,...], "replace": [container, ...]}, ... }
        """

        includeDataD = {}
        try:
            for datablockName, inclD in includeD.items():
                cL = []
                for dictName, iD in inclD["dictionaryIncludeDict"].items():
                    locator = iD["dictionary_locator"]
                    if locator in self.__locatorIndexD:
                        logger.info("Skipping redundant include for %r at %r", dictName, locator)
                        continue
                    self.__locatorIndexD[locator] = dictName
                    #
                    # --- Fetch the dictionary component -
                    #
                    containerList = self.processIncludedContent(self.__fetchLocator(locator))
                    #
                    nsPrefix = iD["dictionary_namespace_prefix"]
                    nsPrefixReplace = iD["dictionary_namespace_prefix_replace"]
                    dictInclMode = iD["include_mode"]
                    dataIncludeMode = iD["data_include_mode"] if "data_include_mode" in iD else "extend"
                    catInclD = inclD["categoryIncludeDict"][dictName] if dictName in inclD["categoryIncludeDict"] else None
                    itemInclD = inclD["itemIncludeDict"][dictName] if dictName in inclD["itemIncludeDict"] else None
                    #
                    #  Do data sections first.
                    for container in containerList:
                        if container.getType() == "data":
                            logger.debug("Including data container %r with %r", container.getName(), container.getObjNameList())
                            cL.append((container, dataIncludeMode))
                    #
                    if catInclD or itemInclD:
                        # Process only explicitly included categories/items in the dictionary component
                        if catInclD:
                            for container in containerList:
                                if container.getType() == "data":
                                    continue
                                cName = container.getName()
                                catName = cName if container.isCategory() else CifName.categoryPart(cName)
                                #
                                if catName in catInclD:
                                    if container.isAttribute() and itemInclD and catName in itemInclD and cName in itemInclD[catName]:
                                        inclMode = itemInclD[catName][cName]["include_mode"] if itemInclD[catName][cName]["include_mode"] else dictInclMode
                                        cL.append((self.__renameItem(container, itemInclD[catName][cName]["include_as_item_name"]), inclMode))
                                    else:
                                        inclMode = catInclD[catName]["include_mode"] if catInclD[catName]["include_mode"] else dictInclMode
                                        cL.append((self.__renameCategory(container, catInclD[catName]["include_as_category_id"]), inclMode))
                        elif itemInclD:
                            # Process only explicitly included items exclusive of explicitly included categories in the dictionary component
                            for container in containerList:
                                if container.getType() == "data":
                                    continue
                                cName = container.getName()
                                catName = cName if container.isCategory() else CifName.categoryPart(cName)
                                #
                                if container.isAttribute() and catName in itemInclD and cName in itemInclD[catName]:
                                    inclMode = itemInclD[catName][cName]["include_mode"] if itemInclD[catName][cName]["include_mode"] else dictInclMode
                                    cL.append((self.__renameItem(container, itemInclD[catName][cName]["include_as_item_name"]), inclMode))
                    else:
                        # Process the full content of the dictionary component
                        for container in containerList:
                            if container.getType() == "data":
                                continue
                            cName = container.getName()
                            catName = cName if container.isCategory() else CifName.categoryPart(cName)
                            #
                            if container.isAttribute():
                                newName = self.__substituteItemPrefix(cName, nsPrefix, nsPrefixReplace)
                                cL.append((self.__renameItem(container, newName), dictInclMode))
                            else:
                                newName = self.__substituteCategoryPrefix(catName, nsPrefix, nsPrefixReplace)
                                cL.append((self.__renameCategory(container, newName), dictInclMode))
                #
                for container, inclMode in cL:
                    if inclMode == "replace":
                        includeDataD.setdefault(datablockName, {}).setdefault("replace", []).append(container)
                    elif inclMode == "extend":
                        logger.debug("%r extending with %r", datablockName, container.getName())
                        includeDataD.setdefault(datablockName, {}).setdefault("extend", []).append(container)
                #
            for nm in includeDataD:
                numReplace = len(includeDataD[nm]["replace"]) if "replace" in includeDataD[nm] else 0
                numExtend = len(includeDataD[nm]["extend"]) if "extend" in includeDataD[nm] else 0
                logger.info("includeDataD %s replace (%d) extend (%d)", nm, numReplace, numExtend)
            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))

        return includeDataD
Ejemplo n.º 27
0
    def __getAttributeFeatures(self, catName, iterableD, embeddedIterableD,
                               itemTransformD, methodD):
        """
        Args:
            catName (string): Category name
            iterableD (tuple, optional): iterable dictionary type codes
            iQueryStrings (list, optional): search strings applied to item descriptions to identify iterable candidates
            itemTransformD (dict): dictionary of data transform filters  itd[(catName,atName)] = [f1,f2,...]

        Returns:
            dict: attribute features


             cL = self.getCategoryContextList(catName)
        """
        aD = {}

        #
        # keyAtNames = [CifName.attributePart(kyItem) for kyItem in self.__dApi.getCategoryKeyList(catName)]
        keyAtNames = [
            CifName.attributePart(kyItem)
            for kyItem in self.__getCategoryKeysWithReplacement(catName)
        ]
        for atName in self.__categorySchema[catName]:
            itemName = CifName.itemName(catName, atName)
            fD = {
                "CATEGORY_NAME": catName,
                "ATTRIBUTE_NAME": atName,
                "TYPE_CODE": None,
                "TYPE_CODE_ALT": None,
                "IS_MANDATORY": False,
                "CHILD_ITEMS": [],
                "CHILDREN": [],
                "ROOT_PARENT_ITEM": None,
                "ROOT_PARENT": None,
                "PARENT": None,
                "DESCRIPTION": None,
                "DESCRIPTION_ANNOTATED": [],
                "IS_KEY": False,
                "ITERABLE_DELIMITER": None,
                "EMBEDDED_ITERABLE_DELIMITER": None,
                "FILTER_TYPES": [],
                "IS_CHAR_TYPE": False,
                "METHODS": [],
                "CONTENT_CLASSES": [],
                "UNITS": None,
                "ENUMS": None,
                "ENUMS_ANNOTATED": None,
                "SEARCH_CONTEXTS": None,
            }
            fD["TYPE_CODE"] = self.__dApi.getTypeCode(catName, atName)
            fD["TYPE_CODE_ALT"] = self.__dApi.getTypeCodeAlt(catName, atName)
            fD["IS_MANDATORY"] = True if str(
                self.__dApi.getMandatoryCode(
                    catName, atName)).lower() in ["y", "yes"] else False
            fD["DESCRIPTION"] = textwrap.dedent(
                self.__dApi.getDescription(catName, atName)).lstrip().rstrip()
            #
            fD["DESCRIPTION_ANNOTATED"] = [{
                "text": fD["DESCRIPTION"],
                "context": "dictionary"
            }]
            tS = self.__dApi.getDescriptionPdbx(catName, atName)
            if tS:
                fD["DESCRIPTION_ANNOTATED"].append({
                    "text":
                    textwrap.dedent(tS).lstrip().rstrip(),
                    "context":
                    "deposition"
                })
            #
            fD["UNITS"] = self.__dApi.getUnits(catName, atName)

            #
            fD["CHILD_ITEMS"] = self.__dApi.getFullChildList(catName, atName)
            fD["CHILDREN"] = self.__itemNameToDictList(
                self.__dApi.getFullChildList(catName, atName))
            #
            pItemName = self.__dApi.getUltimateParent(catName, atName)
            pName = pItemName if pItemName != itemName else None
            fD["ROOT_PARENT_ITEM"] = pName

            fD["ROOT_PARENT"] = self.__itemNameToDictList(
                [pName])[0] if pName else None
            #
            pL = self.__dApi.getFullParentList(catName,
                                               atName,
                                               stripSelfParent=True)
            if pL:
                rL = self.__itemNameToDictList(pL)
                fD["PARENT"] = rL[0] if rL else None
                if len(rL) > 1:
                    logger.warning(
                        "Unexpected multiple parent definition for %s %s : %r",
                        catName, atName, rL)
            #
            # logger.debug("catName %s atName %s : parent %r root_parent %r", catName, atName, fD['PARENT'], fD['ROOT_PARENT'])
            #
            fD["IS_KEY"] = atName in keyAtNames
            pType = self.__dApi.getTypePrimitive(catName, atName)
            fD["IS_CHAR_TYPE"] = str(pType).lower() in ["char", "uchar"]
            #
            fD["ITERABLE_DELIMITER"] = iterableD[(catName, atName)] if (
                catName, atName) in iterableD else None
            fD["EMBEDDED_ITERABLE_DELIMITER"] = embeddedIterableD[(
                catName, atName)] if (catName,
                                      atName) in embeddedIterableD else None
            #
            fD["FILTER_TYPES"] = itemTransformD[(catName, "__all__")] if (
                catName, "__all__") in itemTransformD else []
            fD["FILTER_TYPES"] = itemTransformD[(catName, atName)] if (
                catName, atName) in itemTransformD else fD["FILTER_TYPES"]
            #
            fD["METHODS"] = methodD[(catName,
                                     atName)] if (catName,
                                                  atName) in methodD else []
            fD["CONTENT_CLASSES"] = self.__getContentClasses(catName, atName)
            if (catName, atName) in self.__intEnumD:
                fD["ENUMS"] = sorted(
                    self.__assignEnumTypes(
                        self.__dApi.getEnumListPdbx(catName, atName), pType))
                logger.debug("Using internal enums for %s %s %d", catName,
                             atName, len(fD["ENUMS"]))
                enumTupList = self.__dApi.getEnumListAltWithFullDetails(
                    catName, atName)
            else:
                fD["ENUMS"] = sorted(
                    self.__assignEnumTypes(
                        self.__dApi.getEnumList(catName, atName), pType))
                enumTupList = self.__dApi.getEnumListWithFullDetails(
                    catName, atName)
            #
            if self.__hasEnumDetails(enumTupList):
                #
                fD["ENUMS_ANNOTATED"] = []
                for eTup in self.__assignEnumTupTypes(enumTupList, pType):
                    teD = {"value": eTup[0]}
                    if eTup[1]:
                        teD["detail"] = eTup[1]
                    if eTup[2]:
                        teD["name"] = eTup[2]
                    if eTup[3]:
                        teD["units"] = eTup[3]
                    fD["ENUMS_ANNOTATED"].append(teD)
            # -----
            fD["EXAMPLES"] = self.__assignExampleTupTypes(
                catName, atName,
                self.__dApi.getExampleListPdbx(catName, atName), pType)
            fD["EXAMPLES"].extend(
                self.__assignExampleTupTypes(
                    catName, atName,
                    self.__dApi.getExampleList(catName, atName), pType))
            # -----
            scL = []
            for scTup in self.__dApi.getItemSubCategoryList(catName, atName):
                if scTup[1] is not None:
                    qD = {"id": scTup[0], "label": scTup[1]}
                else:
                    qD = {"id": scTup[0]}
                scL.append(qD)
            fD["SUB_CATEGORIES"] = scL
            if len(scL) > 1:
                logger.debug("Multiple subcategories for %r %r %r", catName,
                             atName, scL)
            #
            # bList = self.__dApi.getBoundaryListAlt(catName, atName, fallBack=True)
            bdList = self.__dApi.getBoundaryList(catName, atName)
            if bdList:
                minD = {}
                maxD = {}
                for (minV, maxV) in bdList:
                    if minV == maxV:
                        continue
                    if minV not in [".", "?"]:
                        minD[minV] = False
                    if maxV not in [".", "?"]:
                        maxD[maxV] = False
                for (minV, maxV) in bdList:
                    if minV == maxV and minV in minD:
                        minD[minV] = True
                    if minV == maxV and maxV in maxD:
                        maxD[maxV] = True
                for ky in minD:
                    if "." in ky:
                        kyV = float(ky)
                    else:
                        kyV = int(ky)
                    if minD[ky]:
                        fD["MIN_VALUE"] = kyV
                    else:
                        fD["MIN_VALUE_EXCLUSIVE"] = kyV
                for ky in maxD:
                    if "." in ky:
                        kyV = float(ky)
                    else:
                        kyV = int(ky)
                    if maxD[ky]:
                        fD["MAX_VALUE"] = kyV
                    else:
                        fD["MAX_VALUE_EXCLUSIVE"] = kyV
            #
            aD[atName] = fD
        #
        return aD
Ejemplo n.º 28
0
    def testGenDDLm(self):
        """Generating alternative DDLm metadata format. (starting point)"""
        try:
            myIo = IoAdapterPy(self.__verbose, self.__lfh)
            self.__containerList = myIo.readFile(
                inputFilePath=self.__pathPdbxDictionary)
            dApi = DictionaryApi(containerList=self.__containerList,
                                 consolidate=True,
                                 verbose=self.__verbose)
            parentD = dApi.getParentDictionary()
            #
            oCList = []
            dDef = DataContainer("mmcif_pdbx_ddlm_auto")
            dc = DataCategory("dictionary")
            dc.appendAttribute("title")
            dc.appendAttribute("class")
            dc.appendAttribute("version")
            dc.appendAttribute("date")
            dc.appendAttribute("ddl_conformance")
            dc.appendAttribute("text")
            dc.append([
                "mmcif_pdbx_ddlm_auto", "Instance", "latest", "2018-03-09",
                "ddlm best effort",
                "Software converted PDBx dictionary using DDLm semantics"
            ])
            dDef.append(dc)
            oCList.append(dDef)

            catIdx = dApi.getCategoryIndex()
            for catName in sorted(catIdx.keys()):
                attNameList = catIdx[catName]
                # created definition container -
                cDef = DefinitionContainer(catName)
                oCList.append(cDef)
                #
                dc = DataCategory("definition")
                dc.appendAttribute("id")
                dc.appendAttribute("scope")
                dc.appendAttribute("class")
                dc.appendAttribute("update")
                dc.append([catName, "Category", "Loop", "2018-03-09"])
                cDef.append(dc)
                val = dApi.getCategoryDescription(category=catName)
                dc = DataCategory("description")
                dc.appendAttribute("text")
                dc.append([val])
                cDef.append(dc)
                #
                dc = DataCategory("name")
                dc.appendAttribute("category_id")
                dc.appendAttribute("object_id")

                valList = dApi.getCategoryGroupList(category=catName)
                pcg = catName
                for val in valList:
                    if val != "inclusive_group":
                        pcg = val
                        break
                dc.append([catName, pcg])
                cDef.append(dc)

                valList = dApi.getCategoryKeyList(category=catName)
                if not valList:
                    self.__lfh.write("Missing caegory key for category %s\n" %
                                     catName)
                else:
                    dc = DataCategory("category")
                    dc.appendAttribute("key_id")
                    kItemName = CifName.itemName(catName, "synthetic_key")
                    dc.append([kItemName])
                    cDef.append(dc)

                    iDef = DefinitionContainer(kItemName)
                    self.__makeKeyItem(catName, "synthetic_key", valList, iDef)
                    oCList.append(iDef)

                for attName in attNameList:
                    itemName = CifName.itemName(catName, attName)
                    iDef = DefinitionContainer(itemName)

                    oCList.append(iDef)

                    #
                    dc = DataCategory("definition")
                    dc.appendAttribute("id")
                    dc.appendAttribute("scope")
                    dc.appendAttribute("class")
                    dc.appendAttribute("update")
                    dc.append([itemName, "Item", "Single", "2013-08-22"])
                    iDef.append(dc)
                    #
                    val = dApi.getDescription(category=catName,
                                              attribute=attName)
                    dc = DataCategory("description")
                    dc.appendAttribute("text")
                    dc.append([val])
                    iDef.append(dc)
                    #
                    dc = DataCategory("name")
                    dc.appendAttribute("category_id")
                    dc.appendAttribute("object_id")
                    #
                    if itemName in parentD:
                        dc.appendAttribute("linked_item_id")
                        dc.append([catName, attName, parentD[itemName][0]])
                    else:
                        dc.append([catName, attName])
                    iDef.append(dc)
                    #
                    #
                    aliasList = dApi.getItemAliasList(category=catName,
                                                      attribute=attName)
                    if aliasList:
                        dc = DataCategory("alias")
                        dc.appendAttribute("definition_id")
                        for alias in aliasList:
                            dc.append([alias[0]])
                        iDef.append(dc)

                    enList = dApi.getEnumListAltWithDetail(category=catName,
                                                           attribute=attName)

                    tC = dApi.getTypeCode(category=catName, attribute=attName)
                    tcontainer = "Single"
                    purpose = "Describe"
                    source = "Recorded"
                    contents = "Text"
                    #
                    if tC is None:
                        self.__lfh.write("Missing data type attribute %s\n" %
                                         attName)
                    elif tC in [
                            "code", "atcode", "name", "idname", "symop", "fax",
                            "phone", "email", "code30", "ec-type"
                    ]:
                        purpose = "Encode"
                        contents = "Text"
                        source = "Assigned"
                    elif tC in ["ucode"]:
                        purpose = "Encode"
                        contents = "Code"
                        source = "Assigned"
                    elif tC in ["line", "uline", "text"]:
                        purpose = "Describe"
                        source = "Recorded"
                        contents = "Text"
                    elif tC in ["int"]:
                        purpose = "Number"
                        source = "Recorded"
                        contents = "Integer"
                    elif tC in ["int-range"]:
                        purpose = "Number"
                        source = "Recorded"
                        contents = "Range"
                    elif tC in ["float"]:
                        purpose = "Measurand"
                        source = "Recorded"
                        contents = "Real"
                    elif tC in ["float-range"]:
                        purpose = "Measurand"
                        source = "Recorded"
                        contents = "Range"
                    elif tC.startswith("yyyy"):
                        source = "Assigned"
                        contents = "Date"
                        purpose = "Describe"

                    if enList:
                        purpose = "State"

                    dc = DataCategory("type")
                    dc.appendAttribute("purpose")
                    dc.appendAttribute("source")
                    dc.appendAttribute("contents")
                    dc.appendAttribute("container")
                    dc.append([purpose, source, contents, tcontainer])
                    iDef.append(dc)
                    #
                    if enList:
                        dc = DataCategory("enumeration_set")
                        dc.appendAttribute("state")
                        dc.appendAttribute("detail")
                        for en in enList:
                            dc.append([en[0], en[1]])
                        iDef.append(dc)

                    dfv = dApi.getDefaultValue(category=catName,
                                               attribute=attName)
                    bvList = dApi.getBoundaryList(category=catName,
                                                  attribute=attName)
                    if ((dfv is not None) and
                        (dfv not in ["?", "."])) or bvList:
                        row = []
                        dc = DataCategory("enumeration")
                        if dfv is not None:
                            dc.appendAttribute("default")
                            row.append(dfv)
                        if bvList:
                            dc.appendAttribute("range")
                            mminVp = -1000000
                            mmaxVp = 10000000
                            mminV = mmaxVp
                            mmaxV = mminVp
                            for bv in bvList:
                                minV = float(bv[0]) if bv[0] != "." else mminVp
                                maxV = float(bv[1]) if bv[1] != "." else mmaxVp
                                mminV = min(mminV, minV)
                                mmaxV = max(mmaxV, maxV)
                            if mminV == mminVp:
                                mminV = ""
                            if mmaxV == mmaxVp:
                                mmaxV = ""
                            row.append(str(mminV) + ":" + str(mmaxV))

                        dc.append(row)
                        iDef.append(dc)

            myIo.writeFile(outputFilePath=os.path.join(
                HERE, "test-output", "mmcif_pdbx_ddlm_auto.dic"),
                           containerList=oCList)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Ejemplo n.º 29
0
    def makeSchemaDef(self):
        sD = {}
        for tableName in self.__tableNameList:
            if tableName in [
                    "rcsb_columninfo", "columninfo", "tableinfo",
                    "rcsb_tableinfo"
            ]:
                continue
            dD = {}
            tableAbbrev = self.__getTableAbbrev(tableName)
            tU = tableAbbrev.upper()
            dD["SCHEMA_ID"] = tU
            dD["SCHEMA_NAME"] = tableAbbrev
            dD["SCHEMA_TYPE"] = "transactional"
            dD["ATTRIBUTES"] = {}
            dD["ATTRIBUTE_INFO"] = {}
            dD["ATTRIBUTE_MAP"] = {}
            #
            # create a sub list for this table -
            infoL = []
            for atD in self.__atDefList:
                if atD["table_name"] == tableName:
                    infoL.append(atD)
            #
            mapD = {}
            for atD in self.__atMapList:
                if atD["target_table_name"] == tableName:
                    attributeName = atD["target_attribute_name"]
                    attributeAbbrev = self.__getAttributeAbbrev(
                        tableName, attributeName)
                    atU = attributeAbbrev.upper()
                    itN = atD["source_item_name"] if atD[
                        "source_item_name"] not in ["?", "."] else None
                    if itN is not None:
                        catNameM = CifName.categoryPart(itN)
                        attNameM = CifName.attributePart(itN)
                    else:
                        catNameM = None
                        attNameM = None

                    # cId = atD['condition_id'] if atD['condition_id'] not in ['?', '.'] else None
                    fId = atD["function_id"] if atD["function_id"] not in [
                        "?", "."
                    ] else None
                    if fId is not None and catNameM is None:
                        mapD[atU] = (catNameM, attNameM, fId, None)
                    else:
                        mapD[atU] = (catNameM, attNameM, fId, None)

            #
            try:
                indexList = []
                for (ii, atD) in enumerate(infoL):
                    attributeName = atD["attribute_name"]
                    attributeAbbrev = self.__getAttributeAbbrev(
                        tableName, attributeName)
                    atU = attributeAbbrev.upper()
                    #
                    td = {}
                    # 'data_type','index_flag','null_flag','width','precision','populated'
                    td["APP_TYPE"] = self.__convertDataType(atD["data_type"],
                                                            aWidth=int(
                                                                atD["width"]))
                    td["WIDTH"] = int(atD["width"])
                    td["PRECISION"] = int(atD["precision"])
                    td["NULLABLE"] = not self.__toBool(atD["null_flag"])
                    td["PRIMARY_KEY"] = self.__toBool(atD["index_flag"])
                    td["ORDER"] = ii + 1
                    if td["PRIMARY_KEY"]:
                        indexList.append(atU)
                    dD["ATTRIBUTES"][atU] = attributeAbbrev
                    dD["ATTRIBUTE_INFO"][atU] = td
                    dD["ATTRIBUTE_MAP"][atU] = mapD[atU]
            except Exception as e:
                logger.error("Failing for table %r attribute %r", tableName,
                             attributeName)
                logger.exception("Failing with %s", str(e))

            #
            if self.__verbose and len(indexList) > 16:
                logger.debug(
                    "+WARNING - %s index list exceeds MySQL max length %d",
                    tableName, len(indexList))
            mergeDict = {}
            deleteAttributeList = []
            for atU in indexList:
                tN = dD["ATTRIBUTE_MAP"][atU][0]
                aN = dD["ATTRIBUTE_MAP"][atU][1]
                fN = dD["ATTRIBUTE_MAP"][atU][2]
                if aN is not None:
                    if tN not in mergeDict:
                        mergeDict[tN] = []
                    mergeDict[tN].append(aN)
                #
                # Using RCSB convention of including one attribute in each table corresponding to the datablockId()
                #   this attributeId is used a key pre-insert deletions.
                #
                if fN in ["datablockid()"]:
                    deleteAttributeList.append(atU)
            #
            # Assign a merge index to this instance category
            #
            for k, v in mergeDict.items():
                dD["MAP_MERGE_INDICES"] = {
                    k: {
                        "TYPE": "EQUI-JOIN",
                        "ATTRIBUTES": tuple(v)
                    }
                }

            if deleteAttributeList:
                dD["SCHEMA_DELETE_ATTRIBUTE"] = deleteAttributeList[0]
                dD["INDICES"] = {
                    "p1": {
                        "TYPE": "UNIQUE",
                        "ATTRIBUTES": tuple(indexList)
                    },
                    "s1": {
                        "TYPE": "SEARCH",
                        "ATTRIBUTES": tuple(deleteAttributeList)
                    }
                }
            else:
                dD["INDICES"] = {
                    "p1": {
                        "TYPE": "UNIQUE",
                        "ATTRIBUTES": tuple(indexList)
                    }
                }
                logger.debug("+WARNING - No delete attribute for table %s",
                             tableName)

            if not mergeDict:
                logger.debug("+WARNING - No merge index possible for table %s",
                             tableName)

            sD[tU] = dD
        return sD
    def test_markup_category_group(self, in_tmpdir, test_files, out_file_name,
                                   groupSelectList):
        oFile = out_file_name
        rL = []

        myIo = IoAdapter()
        containerList = myIo.readFile(
            str(test_files / "mmcif_pdbx_v5_next.dic"))
        dApi = DictionaryApi(containerList=containerList, consolidate=True)
        #
        groupList = dApi.getCategoryGroups()
        print('groupList %s\n' % groupList)
        for groupName in groupList:
            if groupSelectList and groupName not in groupSelectList:
                continue

            #
            # Goup header details
            #
            rL.append("# Category Group %s" % groupName)
            rL.append("")
            rL.append("")
            rL.append("%s" % dApi.getCategoryGroupDescription(groupName))
            rL.append("")
            rL.append("---")
            rL.append("")
            catNameList = dApi.getCategoryGroupCategories(groupName=groupName)
            #
            cList = self.__sortIgnoreCase(catNameList)
            for catName in cList:
                print('Group %s category %s\n' % (groupName, catName))
                catDescription = dApi.getCategoryDescription(category=catName)
                catExTupList = dApi.getCategoryExampleList(category=catName)
                keyItemNameList = dApi.getCategoryKeyList(category=catName)
                keyAttNameList = [
                    CifName.attributePart(k) for k in keyItemNameList
                ]
                #
                # Category header details
                #
                rL.append("## Category %s" % catName)
                rL.append("")
                rL.append("")
                rL.append(" %s" % catDescription)
                rL.append("")
                rL.append("---")
                rL.append("")
                if catExTupList:
                    rL.extend(
                        self.__formatTupListInset(catExTupList, tab='     '))
                    #
                    # summary table
                    #
                rL.append("")
                rL.append("---")
                rL.append("")
                rL.append(
                    "| Attribute | Key | Required | Type | Units | Enumerated | Bounded |"
                )
                rL.append(
                    "| --------- | --- | -------- | ---- | ----- | ---------- | ------- |"
                )
                aList = self.__sortIgnoreCase(
                    dApi.getAttributeNameList(category=catName))
                for attName in aList:
                    isKey = attName in keyAttNameList
                    attDescription = dApi.getDescription(category=catName,
                                                         attribute=attName)
                    attUnits = dApi.getUnits(category=catName,
                                             attribute=attName)
                    attMandatory = dApi.getMandatoryCode(category=catName,
                                                         attribute=attName)
                    attTypeCode = dApi.getTypeCode(category=catName,
                                                   attribute=attName)

                    enumTupList = dApi.getEnumListWithDetail(category=catName,
                                                             attribute=attName)
                    if len(enumTupList) > 0:
                        isEnum = True
                    else:
                        isEnum = False
                    bL = dApi.getBoundaryList(category=catName,
                                              attribute=attName)
                    if len(bL) > 0:
                        isBounded = True
                    else:
                        isBounded = False
                    rL.append(
                        '| %s | %s | %s | %s | %s | %s | %s |' %
                        (attName, self.__trB(isKey), attMandatory, attTypeCode,
                         attUnits, self.__trB(isEnum), self.__trB(isBounded)))
                #
                rL.append("")
                rL.append("---")
                rL.append("")
                #
                for attName in aList:
                    isKey = attName in keyAttNameList
                    attMandatory = dApi.getMandatoryCode(category=catName,
                                                         attribute=attName)
                    #
                    tN = '_' + catName + '.' + attName
                    if isKey:
                        tN = tN + ' (key)'
                    elif attMandatory.upper() in ['YES', 'Y']:
                        tN = tN + ' (required)'
                    #
                    rL.append("#### %s\n" % tN)
                    rL.append("")
                    attDescription = dApi.getDescription(category=catName,
                                                         attribute=attName)
                    rL.append(" %s\n" % attDescription)
                    rL.append("")
                    attUnits = dApi.getUnits(category=catName,
                                             attribute=attName)
                    attTypeCode = dApi.getTypeCode(category=catName,
                                                   attribute=attName)

                    enumTupList = dApi.getEnumListWithDetail(category=catName,
                                                             attribute=attName)
                    if len(enumTupList) > 0:
                        rL.append("")
                        rL.append("---")
                        rL.append("")
                        rL.append("| Allowed Values | Detail |")
                        rL.append("| -------------- | ------ |")
                        for tup in enumTupList:
                            if tup[1] and len(tup[1]) > 0:
                                rL.append("| %s | %s |" % (tup[0], tup[1]))
                            else:
                                rL.append("| %s | %s |" % (tup[0], ' '))
                        rL.append("")

                    #
                    bL = dApi.getBoundaryList(category=catName,
                                              attribute=attName)
                    btL = self.__processbounds(bL)
                    if len(btL) > 0:
                        tup = btL[0]
                        rL.append("")
                        rL.append("---")
                        rL.append("")
                        rL.append("| %s | %s |" % (tup[0], tup[1]))
                        #
                        rL.append("| ------------- | ------ |")
                        for tup in btL[1:]:
                            rL.append("| %s | %s |" % (tup[0], tup[1]))
                        rL.append("")
                    rL.append("")
        with open(str(oFile), 'w') as ofh:
            ofh.write('\n'.join(rL))