Python xmlstring Examples

Programming Language: Python

Namespace/Package Name: arelle.XmlUtil

Method/Function: xmlstring

Examples at hotexamples.com: 10

Python xmlstring - 10 examples found. These are the top rated real world Python examples of arelle.XmlUtil.xmlstring extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: XbrlDpmSqlDB.py Project: vincejolivet/Arelle

 def dimValKey(cntx, typedDim=False, behaveAsTypedDims=emptySet, restrictToDims=None):
     return '|'.join(sorted("{}({})".format(dim.dimensionQname,
                                            dim.memberQname if dim.isExplicit and dim not in behaveAsTypedDims
                                            else dim.memberQname if typedDim and not dim.isTyped
                                            else xmlstring(dim.typedMember, stripXmlns=True) if typedDim
                                            else '*' )
                            for dim in cntx.qnameDims.values()
                            if not restrictToDims or str(dim.dimensionQname) in restrictToDims))

Example #2

Show file

File: dpmSignature.py Project: PabTorre/Arelle

def metDimTypedKey(fact):
    cntx = fact.context
    key = "MET({})".format(fact.qname)
    if cntx is not None and cntx.qnameDims:
        key += '|' + '|'.join(sorted("{}({})".format(dim.dimensionQname,
                                                     dim.memberQname if dim.isExplicit
                                                     else "nil" if dim.typedMember.get("{http://www.w3.org/2001/XMLSchema-instance}nil") in ("true", "1")
                                                     else xmlstring(dim.typedMember, stripXmlns=True))
                                    for dim in cntx.qnameDims.values()))
    return key

Example #3

Show file

def metDimTypedKey(fact):
    cntx = fact.context
    key = "MET({})".format(fact.qname)
    if cntx is not None and cntx.qnameDims:
        key += '|' + '|'.join(
            sorted("{}({})".format(
                dim.dimensionQname, dim.memberQname if dim.
                isExplicit else "nil" if dim.typedMember.
                get("{http://www.w3.org/2001/XMLSchema-instance}nil") in
                ("true", "1") else xmlstring(dim.typedMember, stripXmlns=True))
                   for dim in cntx.qnameDims.values()))
    return key

Example #4

Show file

File: saveLoadableOIM.py Project: brianneal-wf/Arelle

 def factFootnotes(fact):
     footnotes = []
     for footnoteRel in footnotesRelationshipSet.fromModelObject(fact):
         footnote = OrderedDict((("group", footnoteRel.linkrole),
                                 ("footnoteType", footnoteRel.arcrole)))
         footnotes.append(footnote)
         if isCSVorXL:
             footnote["factId"] = fact.id if fact.id else "f{}".format(fact.objectIndex)
         toObj = footnoteRel.toModelObject
         if isinstance(toObj, ModelFact):
             footnote["factRef"] = toObj.id if toObj.id else "f{}".format(toObj.objectIndex)
         else:
             footnote["footnote"] = xmlstring(toObj, stripXmlns=True, contentsOnly=True, includeText=True)
             if toObj.xmlLang:
                 footnote["language"] = toObj.xmlLang
     return footnotes

Example #5

Show file

File: saveLoadableOIM.py Project: asteria277/Arelle

 def factFootnotes(fact):
     footnotes = []
     for footnoteRel in footnotesRelationshipSet.fromModelObject(fact):
         footnote = OrderedDict((("group", footnoteRel.arcrole),))
         footnotes.append(footnote)
         if isCSV:
             footnote["factId"] = fact.id if fact.id else "f{}".format(fact.objectIndex)
         toObj = footnoteRel.toModelObject
         if isinstance(toObj, ModelFact):
             footnote["factRef"] = toObj.id if toObj.id else "f{}".format(toObj.objectIndex)
         else:
             footnote["footnoteType"] = toObj.role
             footnote["footnote"] = xmlstring(toObj, stripXmlns=True, contentsOnly=True, includeText=True)
             if toObj.xmlLang:
                 footnote["language"] = toObj.xmlLang
     return footnotes

Example #6

Show file

File: saveLoadableOIM.py Project: PopeyeSurfer/Arelle

 def factAspects(fact):
     aspects = OrderedDict()
     if hasId and fact.id:
         aspects["id"] = fact.id
     elif fact.isTuple or footnotesRelationshipSet.toModelObject(fact):
         aspects["id"] = "f{}".format(fact.objectIndex)
     parent = fact.getparent()
     concept = fact.concept
     if not fact.isTuple:
         if concept is not None:
             _baseXsdType = concept.baseXsdType
             if _baseXsdType == "XBRLI_DATEUNION":
                 if getattr(fact.xValue, "dateOnly", False):
                     _baseXsdType = "date"
                 else:
                     _baseXsdType = "dateTime"
             aspects["baseType"] = "xs:{}".format(_baseXsdType)
             if concept.baseXbrliType in ("string", "normalizedString", "token") and fact.xmlLang:
                 aspects[qnOimLangAspect] = fact.xmlLang
             aspects[qnOimTypeAspect] = concept.baseXbrliType
     if fact.isItem:
         if fact.isNil:
             _value = None
             _strValue = "nil"
         else:
             _inferredDecimals = inferredDecimals(fact)
             _value = oimValue(fact.xValue, _inferredDecimals)
             _strValue = str(_value)
         aspects["value"] = _strValue
         if fact.concept is not None and fact.concept.isNumeric:
             _numValue = fact.xValue
             if isinstance(_numValue, Decimal) and not isinf(_numValue) and not isnan(_numValue):
                 if _numValue == _numValue.to_integral():
                     _numValue = int(_numValue)
                 else:
                     _numValue = float(_numValue)
             aspects["numericValue"] = _numValue
             if not fact.isNil:
                 aspects["accuracy"] = "infinity" if isinf(_inferredDecimals) else _inferredDecimals
         elif isinstance(_value, bool):
             aspects["booleanValue"] = _value
     aspects[qnOimConceptAspect] = oimValue(fact.qname)
     cntx = fact.context
     if cntx is not None:
         if cntx.entityIdentifierElement is not None:
             aspects[qnOimEntityAspect] = oimValue(qname(*cntx.entityIdentifier))
         if cntx.period is not None:
             aspects[qnOimPeriodAspect] = oimPeriodValue(cntx)
         for _qn, dim in sorted(cntx.qnameDims.items(), key=lambda item: item[0]):
             aspects[dim.dimensionQname] = (oimValue(dim.memberQname) if dim.isExplicit
                                            else None if dim.typedMember.get("{http://www.w3.org/2001/XMLSchema-instance}nil") in ("true", "1")
                                            else dim.typedMember.stringValue)
     unit = fact.unit
     if unit is not None:
         _mMul, _mDiv = unit.measures
         if isJSON:
             aspects[qnOimUnitAspect] = { # use tuple instead of list for hashability
                 "numerators": tuple(oimValue(m) for m in sorted(_mMul, key=lambda m: oimValue(m)))
             }
             if _mDiv:
                 aspects[qnOimUnitAspect]["denominators"] = tuple(oimValue(m) for m in sorted(_mDiv, key=lambda m: oimValue(m)))
         else: # CSV
             if _mMul:
                 aspects[qnOimUnitMulAspect] = ",".join(oimValue(m)
                                                     for m in sorted(_mMul, key=lambda m: q(m)))
             if _mDiv:
                 aspects[qnOimUnitDivAspect] = ",".join(oimValue(m)
                                                     for m in sorted(_mDiv, key=lambda m: str(m)))
     if parent.qname != XbrlConst.qnXbrliXbrl:
         aspects[qnOimTupleParentAspect] = parent.id if parent.id else "f{}".format(parent.objectIndex)
         aspects[qnOimTupleOrderAspect] = elementIndex(fact)
                 
     footnotes = []
     for footnoteRel in footnotesRelationshipSet.fromModelObject(fact):
         footnote = {"group": footnoteRel.arcrole}
         footnotes.append(footnote)
         toObj = footnoteRel.toModelObject
         if isinstance(toObj, ModelFact):
             footnote["factRef"] = toObj.id if toObj.id else "f{}".format(toObj.objectIndex)
         else:
             footnote["footnoteType"] = toObj.role
             footnote["footnote"] = xmlstring(toObj, stripXmlns=True, contentsOnly=True, includeText=True)
             if toObj.xmlLang:
                 footnote["language"] = toObj.xmlLang
     if footnotes:
         aspects["footnotes"] = footnotes
     return aspects

Example #7

Show file

File: saveLoadableOIM.py Project: davidjbell/Arelle

def saveLoadableOIM(modelXbrl, oimFile, oimStyle, oimQNameSeparator):
    
    isJSON = oimFile.endswith(".json")
    isCSV = oimFile.endswith(".csv")

    namespacePrefixes = {}
    def compileQname(qname):
        if qname.namespaceURI not in namespacePrefixes:
            namespacePrefixes[qname.namespaceURI] = qname.prefix or ""
            
    aspectsDefined = {
        qnOimConceptAspect,
        qnOimLocationAspect,
        qnOimValueAspect,
        qnOimPeriodAspect,
        qnOimEntityAspect}

            
    def oimValue(object, decimals=None):
        if isinstance(object, QName):
            if oimQNameSeparator == "clark":
                return object.clarkNotation;
            if object.namespaceURI not in namespacePrefixes:
                if object.prefix:
                    namespacePrefixes[object.namespaceURI] = object.prefix
                else:
                    _prefix = "_{}".format(sum(1 for p in namespacePrefixes if p.startswith("_")))
                    namespacePrefixes[object.namespaceURI] = _prefix
            return "{}{}{}".format(namespacePrefixes[object.namespaceURI], 
                                   oimQNameSeparator, 
                                   object.localName)
        if isinstance(object, Decimal):
            try:
                if decimals is not None and not isnan(decimals) and not isinf(decimals):
                    if decimals != 0:
                        object = object / (TEN ** -decimals)
                    return "{}e{}".format(object, -decimals)
                else:
                    return "{}".format(object) # force to string to prevent json floating error
            except:
                return str(object)
        if isinstance(object, (DateTime, YearMonthDuration, DayTimeDuration, Time,
                               gYearMonth, gMonthDay, gYear, gMonth, gDay)):
            return str(object)
        return object
    
    def oimPeriodValue(cntx):
        if cntx.isForeverPeriod:
            return "forever"
        elif cntx.isStartEndPeriod:
            return "{}/{}".format(dateunionValue(cntx.startDatetime, dateOnlyHour=0), 
                                  dateunionValue(cntx.endDatetime, subtractOneDay=True, dateOnlyHour=24))
        else: # instant
            return "PT0S/{}".format(dateunionValue(cntx.endDatetime, subtractOneDay=True, dateOnlyHour=24))
              
    hasId = False
    hasLocation = False # may be optional based on style?
    hasType = True
    hasLang = False
    hasUnits = False      
    hasUnitMulMeasures = False
    hasUnitDivMeasures = False
    hasTuple = False
    
    #compile QNames in instance for OIM
    for fact in modelXbrl.factsInInstance:
        if fact.id:
            hasId = True
        concept = fact.concept
        if concept is not None:
            if concept.baseXbrliType in ("string", "normalizedString", "token") and fact.xmlLang:
                hasLang = True
        compileQname(fact.qname)
        if hasattr(fact, "xValue") and isinstance(fact.xValue, QName):
            compileQname(fact.xValue)
        unit = fact.unit
        if unit is not None:
            hasUnits = True
            if unit.measures[0]:
                hasUnitMulMeasures = True
            if unit.measures[1]:
                hasUnitDivMeasures = True
        if fact.modelTupleFacts:
            hasTuple = True
            
    entitySchemePrefixes = {}
    for cntx in modelXbrl.contexts.values():
        if cntx.entityIdentifierElement is not None:
            scheme = cntx.entityIdentifier[0]
            if scheme not in entitySchemePrefixes:
                if not entitySchemePrefixes: # first one is just scheme
                    if scheme == "http://www.sec.gov/CIK":
                        _schemePrefix = "cik"
                    elif scheme == "http://standard.iso.org/iso/17442":
                        _schemePrefix = "lei"
                    else:
                        _schemePrefix = "scheme"
                else:
                    _schemePrefix = "scheme-{}".format(len(entitySchemePrefixes) + 1)
                entitySchemePrefixes[scheme] = _schemePrefix
                namespacePrefixes[scheme] = _schemePrefix
        for dim in cntx.qnameDims.values():
            compileQname(dim.dimensionQname)
            aspectsDefined.add(dim.dimensionQname)
            if dim.isExplicit:
                compileQname(dim.memberQname)
                
    for unit in modelXbrl.units.values():
        if unit is not None:
            for measures in unit.measures:
                for measure in measures:
                    compileQname(measure)
                    
    if XbrlConst.xbrli in namespacePrefixes and namespacePrefixes[XbrlConst.xbrli] != "xbrli":
        namespacePrefixes[XbrlConst.xbrli] = "xbrli" # normalize xbrli prefix

    if hasId: aspectsDefined.add(qnOimIdAspect)
    if hasLang: aspectsDefined.add(qnOimLangAspect)
    if hasTuple: aspectsDefined.add(qnOimTupleAspect)
    if hasUnits: aspectsDefined.add(qnOimUnitAspect)
    if hasUnitMulMeasures: aspectsDefined.add(qnOimUnitMulAspect)
    if hasUnitDivMeasures: aspectsDefined.add(qnOimUnitDivAspect)
                    
    # compile footnotes and relationships
    factRelationships = []
    factFootnotes = []
    for rel in modelXbrl.relationshipSet(modelXbrl, "XBRL-footnotes").modelRelationships:
        oimRel = {"linkrole": rel.linkrole, "arcrole": rel.arcrole}
        factRelationships.append(oimRel)
        oimRel["fromIds"] = [obj.id if obj.id 
                             else elementChildSequence(obj)
                             for obj in rel.fromModelObjects]
        oimRel["toIds"] = [obj.id if obj.id
                           else elementChildSequence(obj)
                           for obj in rel.toModelObjects]
        _order = rel.arcElement.get("order")
        if _order is not None:
            oimRel["order"] = _order
        for obj in rel.toModelObjects:
            if isinstance(obj, ModelResource): # footnote
                oimFootnote = {"role": obj.role,
                               "id": obj.id if obj.id
                                     else elementChildSequence(obj),
                                # value needs work for html elements and for inline footnotes
                               "value": xmlstring(obj, stripXmlns=True)}
                if obj.xmlLang:
                    oimFootnote["lang"] = obj.xmlLang
                factFootnotes.append(oimFootnote)
                oimFootnote
            
    dtsReferences = [
        {"type": "schema" if doc.type == ModelDocument.Type.SCHEMA
                 else "linkbase" if doc.type == ModelDocument.Type.LINKBASE
                 else "other",
         "href": doc.basename}
        for doc,ref in modelXbrl.modelDocument.referencesDocument.items()
        if ref.referringModelObject.qname in SCHEMA_LB_REFS]
        
    roleTypes = [
        {"type": "role" if ref.referringModelObject.localName == "roleRef" else "arcroleRef",
         "href": ref.referringModelObject["href"]}
        for doc,ref in modelXbrl.modelDocument.referencesDocument.items()
        if ref.referringModelObject.qname in ROLE_REFS]
    

    def factAspects(fact):
        aspects = {qnOimConceptAspect: oimValue(fact.qname)}
        if hasId and fact.id:
            aspects[qnOimIdAspect] = fact.id
        if hasLocation:
            aspects[qnOimLocationAspect] = elementChildSequence(fact)
        concept = fact.concept
        if concept is not None:
            if concept.baseXbrliType in ("string", "normalizedString", "token") and fact.xmlLang:
                aspects[qnOimLangAspect] = fact.xmlLang
        aspects[qnOimTypeAspect] = concept.baseXbrliType
        if fact.isItem:
            aspects[qnOimValueAspect] = (NILVALUE if fact.isNil else
                                         oimValue(fact.xValue, inferredDecimals(fact)))
        cntx = fact.context
        if cntx is not None:
            if cntx.entityIdentifierElement is not None:
                aspects[qnOimEntityAspect] = oimValue(qname(*cntx.entityIdentifier))
            if cntx.period is not None:
                aspects[qnOimPeriodAspect] = oimPeriodValue(cntx)
            for dim in cntx.qnameDims.values():
                aspects[dim.dimensionQname] = (oimValue(dim.memberQname) if dim.isExplicit
                                               else dim.typedMember.stringValue)
        unit = fact.unit
        if unit is not None:
            _mMul, _mDiv = unit.measures
            if isJSON:
                aspects[qnOimUnitAspect] = ( # use tuple instead of list for hashability
                    tuple(oimValue(m) for m in sorted(_mMul, key=lambda m: str(m))),
                    tuple(oimValue(m) for m in sorted(_mDiv, key=lambda m: str(m))))
            else: # CSV
                if _mMul:
                    aspects[qnOimUnitMulAspect] = ",".join(oimValue(m)
                                                        for m in sorted(_mMul, key=lambda m: str(m)))
                if _mDiv:
                    aspects[qnOimUnitDivAspect] = ",".join(oimValue(m)
                                                        for m in sorted(_mDiv, key=lambda m: str(m)))
        return aspects
    
    if isJSON:
        # save JSON
        
        oim = {} # top level of oim json output
            
        oimFacts = []
        oimReport = []
        oimReport.append({"url": modelXbrl.modelDocument.uri})
        if oimQNameSeparator != "clark":
            oimReport.append({"prefixMap": dict((p,ns) for ns,p in namespacePrefixes.items())})
        oimReport.append({"DTSreferences": dtsReferences})
        oimReport.append({"roleTypes": roleTypes})
        oimReport.append({"facts": oimFacts})
        oimReport.append({"footnotes": factFootnotes})
        oimReport.append({"relationships": factRelationships})

        if oimStyle == "flat":
            
            def saveFlatJsonFacts(facts, oimFacts):
                for fact in facts:
                    oimFact = factAspects(fact)
                    if fact.modelTupleFacts:
                        tupleFacts = []
                        oimFact[qnOimTupleAspect] = tupleFacts
                        saveFlatJsonFacts(fact.modelTupleFacts, tupleFacts)
                    oimFacts.append(dict((oimValue(k),v) for k,v in oimFact.items()))
                    
            saveFlatJsonFacts(modelXbrl.facts, oimFacts)
        
        elif oimStyle == "clustered":
            
            # build aspect-value usage per fact for every fact
            categoricalAspectValueSets = {} # for each aspect, value facts-set
            aspectIndex = {}
            indexAspect = {}
            def addCategoricalAspect(aspectQn):
                i = len(aspectIndex)
                aspectIndex[aspectQn] = i 
                indexAspect[i] = oimValue(aspectQn)
                categoricalAspectValueSets[i] = defaultdict(set)

            addCategoricalAspect(qnOimConceptAspect)
            addCategoricalAspect(qnOimEntityAspect)
            addCategoricalAspect(qnOimPeriodAspect)
            for aspectQn in aspectsDefined:
                if aspectQn.namespaceURI != nsOim or aspectQn in (
                    qnOimIdAspect, qnOimLangAspect, qnOimUnitAspect):
                    addCategoricalAspect(aspectQn) 
            
            
            for fact in modelXbrl.facts:
                fact._factAspectValues = {}
                fact._factAspectSet = set()
                for aspectQn, value in factAspects(fact).items():
                    if aspectQn in aspectIndex:
                        i = aspectIndex[aspectQn]
                        v = oimValue(value)
                        categoricalAspectValueSets[i][v].add(fact)
                        fact._factAspectValues[i] = v
                        fact._factAspectSet.add(i)
                        
            # order aspectValues by largest population
            maxAspectValuePopulation = [(aspectIndex, max(len(factSet) for factSet in oimValueFacts.values()))
                                        for aspectIndex, oimValueFacts in categoricalAspectValueSets.items()]
                        
            maxAspectValuePopulation.sort(key=lambda ai_max: -ai_max[1])
            
            factsClustered = set()
            _aspectValue = {}
                        
            def clusterAspect(_avpi, _data):
                if _avpi >= len(maxAspectValuePopulation): 
                    return # end of aspects
                _ai = maxAspectValuePopulation[_avpi][0]
                for _v, _vFactsSet in categoricalAspectValueSets[_ai].items():
                    _aspectValue[_ai] = _v
                    _nestedData = []
                    _nestedAspect = {indexAspect[_ai]: _v, "data": _nestedData}
                    for _fact in _vFactsSet - factsClustered:
                        if (_fact._factAspectSet == _aspectValue.keys() and
                            all([_fact._factAspectValues[__ai] == _aspectValue[__ai]
                                for __ai in _aspectValue])):
                            _factAspects = factAspects(_fact)
                            _oimFactItem = {oimValue(qnOimValueAspect): _factAspects[qnOimValueAspect]}
                            if hasLocation:
                                _oimFactItem[oimValue(qnOimLocationAspect)] = _factAspects[qnOimLocationAspect]
                            if hasType:
                                _oimFactItem[oimValue(qnOimTypeAspect)] = _factAspects[qnOimTypeAspect]
                            _nestedData.append(_oimFactItem)
                            factsClustered.add(_fact)
                    clusterAspect(_avpi+1, _nestedData)
                    if _nestedData:
                        _data.append(_nestedAspect)
                    del _aspectValue[_ai]
            clusterAspect(0, oimFacts)
            
        with open(oimFile, "w", encoding="utf-8") as fh:
            fh.write(json.dumps(oimReport, ensure_ascii=False, indent=1, sort_keys=True))

            
        
    elif isCSV:
        # save CSV
        
        # levels of tuple nesting
        def tupleDepth(facts, parentDepth):
            _levelDepth = parentDepth
            for fact in facts:
                _factDepth = tupleDepth(fact.modelTupleFacts, parentDepth + 1)
                if _factDepth > _levelDepth:
                    _levelDepth = _factDepth
            return _levelDepth
        maxDepth = tupleDepth(modelXbrl.facts, 0)
        
        aspectQnCol = {oimValue(qnOimConceptAspect): maxDepth - 1}
        aspectsHeader = [oimValue(qnOimConceptAspect)]
        
        for i in range(maxDepth - 1):
            aspectsHeader.append(None)
        
        def addAspectQnCol(aspectQn):
            aspectQnCol[aspectQn] = len(aspectsHeader)
            aspectsHeader.append(oimValue(aspectQn))
            
        # pre-ordered aspect columns
        if hasId:
            addAspectQnCol(qnOimIdAspect)
        if hasLocation:
            addAspectQnCol(qnOimLocationAspect)
        if hasType:
            addAspectQnCol(qnOimTypeAspect)
        addAspectQnCol(qnOimValueAspect)
        if qnOimEntityAspect in aspectsDefined:
            addAspectQnCol(qnOimEntityAspect)
        if qnOimPeriodAspect in aspectsDefined:
            addAspectQnCol(qnOimPeriodAspect)
        if qnOimUnitMulAspect in aspectsDefined:
            addAspectQnCol(qnOimUnitMulAspect)
        if qnOimUnitDivAspect in aspectsDefined:
            addAspectQnCol(qnOimUnitDivAspect)
        for aspectQn in sorted(aspectsDefined, key=lambda qn: str(qn)):
            if aspectQn.namespaceURI != nsOim:
                addAspectQnCol(aspectQn) 
        
        def aspectCols(fact, depth):
            cols = [None for i in range(len(aspectsHeader))]
            for aspectQn, aspectValue in factAspects(fact).items():
                if aspectQn == qnOimConceptAspect:
                    cols[depth - 1] = aspectValue
                elif aspectQn in aspectQnCol:
                    cols[aspectQnCol[aspectQn]] = aspectValue
            return cols
        
        # save facts
        csvFile = open(oimFile, csvOpenMode, newline=csvOpenNewline, encoding='utf-8-sig')
        csvWriter = csv.writer(csvFile, dialect="excel")
        csvWriter.writerow(aspectsHeader)
        
        def saveCSVfacts(facts, thisDepth):
            for fact in facts:
                csvWriter.writerow(aspectCols(fact, thisDepth))
                saveCSVfacts(fact.modelTupleFacts, thisDepth + 1)
                
        saveCSVfacts(modelXbrl.facts, 1)
        csvFile.close()
        
        # save namespaces
        if oimQNameSeparator == "clark":
            csvFile = open(oimFile.replace(".csv", "-prefixMap.csv"), csvOpenMode, newline=csvOpenNewline, encoding='utf-8-sig')
            csvWriter = csv.writer(csvFile, dialect="excel")
            csvWriter.writerow(("prefix", "mappedURI"))
            for namespaceURI, prefix in sorted(namespacePrefixes.items(), key=lambda item: item[1]):
                csvWriter.writerow((prefix, namespaceURI))
            csvFile.close()
        
        # save dts references
        csvFile = open(oimFile.replace(".csv", "-dts.csv"), csvOpenMode, newline=csvOpenNewline, encoding='utf-8-sig')
        csvWriter = csv.writer(csvFile, dialect="excel")
        csvWriter.writerow(("type", "href"))
        for oimRef in dtsReferences:
            csvWriter.writerow((oimRef["type"], oimRef["href"]))
        csvFile.close()
        
        # save role and arc type references
        if roleTypes:
            csvFile = open(oimFile.replace(".csv", "-roleTypes.csv"), csvOpenMode, newline=csvOpenNewline, encoding='utf-8-sig')
            csvWriter = csv.writer(csvFile, dialect="excel")
            csvWriter.writerow(("type", "href"))
            for oimRef in roleTypes:
                csvWriter.writerow((oimRef["type"], oimRef["href"]))
            csvFile.close()
        
        # save relationships
        csvFile = open(oimFile.replace(".csv", "-relationships.csv"), csvOpenMode, newline=csvOpenNewline, encoding='utf-8-sig')
        csvWriter = csv.writer(csvFile, dialect="excel")
        hasOrder = any(hasattribute(imRel,"order") for oimRel in factRelationships)
        csvWriter.writerow(("fromIds", "toIds", "linkrole", "arcrole") + 
                           (("order",) if hasOrder else ()))
        for oimRel in factRelationships:
            csvWriter.writerow((",".join(oimRel["fromIds"]),
                                ",".join(oimRel["toIds"]),
                                oimRel["linkrole"],
                                oimRel["arcrole"]) +
                               ((oimRel.get("order",None),) if hasOrder else ()))
        csvFile.close()
        
        # save footnotes
        csvFile = open(oimFile.replace(".csv", "-footnotes.csv"), csvOpenMode, newline=csvOpenNewline, encoding='utf-8-sig')
        csvWriter = csv.writer(csvFile, dialect="excel")
        hasLang = any(hasattribute(oimFnt,"lang") for oimFnt in factFootnotes)
        csvWriter.writerow(("id", "role") + (("lang",) if hasLang else ()) + ("value",))
        for oimFnt in factFootnotes:
            csvWriter.writerow((oimFtn["id"], oimFtn["role"]) +
                               ((oimFtn.get("lang",None),) if hasLang else ()) +
                               (oimFtn["value"],))
        csvFile.close()

Example #8

Show file

def saveLoadableOIM(modelXbrl, oimFile, oimStyle, oimQNameSeparator):

    isJSON = oimFile.endswith(".json")
    isCSV = oimFile.endswith(".csv")

    namespacePrefixes = {}

    def compileQname(qname):
        if qname.namespaceURI not in namespacePrefixes:
            namespacePrefixes[qname.namespaceURI] = qname.prefix or ""

    aspectsDefined = {
        qnOimConceptAspect, qnOimLocationAspect, qnOimValueAspect,
        qnOimPeriodAspect, qnOimEntityAspect
    }

    def oimValue(object, decimals=None):
        if isinstance(object, QName):
            if oimQNameSeparator == "clark":
                return object.clarkNotation
            if object.namespaceURI not in namespacePrefixes:
                if object.prefix:
                    namespacePrefixes[object.namespaceURI] = object.prefix
                else:
                    _prefix = "_{}".format(
                        sum(1 for p in namespacePrefixes if p.startswith("_")))
                    namespacePrefixes[object.namespaceURI] = _prefix
            return "{}{}{}".format(namespacePrefixes[object.namespaceURI],
                                   oimQNameSeparator, object.localName)
        if isinstance(object, Decimal):
            try:
                if decimals is not None and not isnan(decimals) and not isinf(
                        decimals):
                    if decimals != 0:
                        object = object / (TEN**-decimals)
                    return "{}e{}".format(object, -decimals)
                else:
                    return "{}".format(
                        object
                    )  # force to string to prevent json floating error
            except:
                return str(object)
        if isinstance(object,
                      (DateTime, YearMonthDuration, DayTimeDuration, Time,
                       gYearMonth, gMonthDay, gYear, gMonth, gDay)):
            return str(object)
        return object

    def oimPeriodValue(cntx):
        if cntx.isForeverPeriod:
            return "forever"
        elif cntx.isStartEndPeriod:
            return "{}/{}".format(
                dateunionValue(cntx.startDatetime, dateOnlyHour=0),
                dateunionValue(cntx.endDatetime,
                               subtractOneDay=True,
                               dateOnlyHour=24))
        else:  # instant
            return "PT0S/{}".format(
                dateunionValue(cntx.endDatetime,
                               subtractOneDay=True,
                               dateOnlyHour=24))

    hasId = False
    hasLocation = False  # may be optional based on style?
    hasType = True
    hasLang = False
    hasUnits = False
    hasUnitMulMeasures = False
    hasUnitDivMeasures = False
    hasTuple = False

    #compile QNames in instance for OIM
    for fact in modelXbrl.factsInInstance:
        if fact.id:
            hasId = True
        concept = fact.concept
        if concept is not None:
            if concept.baseXbrliType in ("string", "normalizedString",
                                         "token") and fact.xmlLang:
                hasLang = True
        compileQname(fact.qname)
        if hasattr(fact, "xValue") and isinstance(fact.xValue, QName):
            compileQname(fact.xValue)
        unit = fact.unit
        if unit is not None:
            hasUnits = True
            if unit.measures[0]:
                hasUnitMulMeasures = True
            if unit.measures[1]:
                hasUnitDivMeasures = True
        if fact.modelTupleFacts:
            hasTuple = True

    entitySchemePrefixes = {}
    for cntx in modelXbrl.contexts.values():
        if cntx.entityIdentifierElement is not None:
            scheme = cntx.entityIdentifier[0]
            if scheme not in entitySchemePrefixes:
                if not entitySchemePrefixes:  # first one is just scheme
                    if scheme == "http://www.sec.gov/CIK":
                        _schemePrefix = "cik"
                    elif scheme == "http://standard.iso.org/iso/17442":
                        _schemePrefix = "lei"
                    else:
                        _schemePrefix = "scheme"
                else:
                    _schemePrefix = "scheme-{}".format(
                        len(entitySchemePrefixes) + 1)
                entitySchemePrefixes[scheme] = _schemePrefix
                namespacePrefixes[scheme] = _schemePrefix
        for dim in cntx.qnameDims.values():
            compileQname(dim.dimensionQname)
            aspectsDefined.add(dim.dimensionQname)
            if dim.isExplicit:
                compileQname(dim.memberQname)

    for unit in modelXbrl.units.values():
        if unit is not None:
            for measures in unit.measures:
                for measure in measures:
                    compileQname(measure)

    if XbrlConst.xbrli in namespacePrefixes and namespacePrefixes[
            XbrlConst.xbrli] != "xbrli":
        namespacePrefixes[XbrlConst.xbrli] = "xbrli"  # normalize xbrli prefix

    if hasId: aspectsDefined.add(qnOimIdAspect)
    if hasLang: aspectsDefined.add(qnOimLangAspect)
    if hasTuple: aspectsDefined.add(qnOimTupleAspect)
    if hasUnits: aspectsDefined.add(qnOimUnitAspect)
    if hasUnitMulMeasures: aspectsDefined.add(qnOimUnitMulAspect)
    if hasUnitDivMeasures: aspectsDefined.add(qnOimUnitDivAspect)

    # compile footnotes and relationships
    factRelationships = []
    factFootnotes = []
    for rel in modelXbrl.relationshipSet(modelXbrl,
                                         "XBRL-footnotes").modelRelationships:
        oimRel = {"linkrole": rel.linkrole, "arcrole": rel.arcrole}
        factRelationships.append(oimRel)
        oimRel["fromIds"] = [
            obj.id if obj.id else elementChildSequence(obj)
            for obj in rel.fromModelObjects
        ]
        oimRel["toIds"] = [
            obj.id if obj.id else elementChildSequence(obj)
            for obj in rel.toModelObjects
        ]
        _order = rel.arcElement.get("order")
        if _order is not None:
            oimRel["order"] = _order
        for obj in rel.toModelObjects:
            if isinstance(obj, ModelResource):  # footnote
                oimFootnote = {
                    "role": obj.role,
                    "id": obj.id if obj.id else elementChildSequence(obj),
                    # value needs work for html elements and for inline footnotes
                    "value": xmlstring(obj, stripXmlns=True)
                }
                if obj.xmlLang:
                    oimFootnote["lang"] = obj.xmlLang
                factFootnotes.append(oimFootnote)
                oimFootnote

    dtsReferences = [{
        "type":
        "schema" if doc.type == ModelDocument.Type.SCHEMA else
        "linkbase" if doc.type == ModelDocument.Type.LINKBASE else "other",
        "href":
        doc.basename
    } for doc, ref in modelXbrl.modelDocument.referencesDocument.items()
                     if ref.referringModelObject.qname in SCHEMA_LB_REFS]

    roleTypes = [{
        "type":
        "role"
        if ref.referringModelObject.localName == "roleRef" else "arcroleRef",
        "href":
        ref.referringModelObject["href"]
    } for doc, ref in modelXbrl.modelDocument.referencesDocument.items()
                 if ref.referringModelObject.qname in ROLE_REFS]

    def factAspects(fact):
        aspects = {qnOimConceptAspect: oimValue(fact.qname)}
        if hasId and fact.id:
            aspects[qnOimIdAspect] = fact.id
        if hasLocation:
            aspects[qnOimLocationAspect] = elementChildSequence(fact)
        concept = fact.concept
        if concept is not None:
            if concept.baseXbrliType in ("string", "normalizedString",
                                         "token") and fact.xmlLang:
                aspects[qnOimLangAspect] = fact.xmlLang
        aspects[qnOimTypeAspect] = concept.baseXbrliType
        if fact.isItem:
            aspects[qnOimValueAspect] = (NILVALUE if fact.isNil else oimValue(
                fact.xValue, inferredDecimals(fact)))
        cntx = fact.context
        if cntx is not None:
            if cntx.entityIdentifierElement is not None:
                aspects[qnOimEntityAspect] = oimValue(
                    qname(*cntx.entityIdentifier))
            if cntx.period is not None:
                aspects[qnOimPeriodAspect] = oimPeriodValue(cntx)
            for dim in cntx.qnameDims.values():
                aspects[dim.dimensionQname] = (oimValue(dim.memberQname)
                                               if dim.isExplicit else
                                               dim.typedMember.stringValue)
        unit = fact.unit
        if unit is not None:
            _mMul, _mDiv = unit.measures
            if isJSON:
                aspects[
                    qnOimUnitAspect] = (  # use tuple instead of list for hashability
                        tuple(
                            oimValue(m)
                            for m in sorted(_mMul, key=lambda m: str(m))),
                        tuple(
                            oimValue(m)
                            for m in sorted(_mDiv, key=lambda m: str(m))))
            else:  # CSV
                if _mMul:
                    aspects[qnOimUnitMulAspect] = ",".join(
                        oimValue(m)
                        for m in sorted(_mMul, key=lambda m: str(m)))
                if _mDiv:
                    aspects[qnOimUnitDivAspect] = ",".join(
                        oimValue(m)
                        for m in sorted(_mDiv, key=lambda m: str(m)))
        return aspects

    if isJSON:
        # save JSON

        oim = {}  # top level of oim json output

        oimFacts = []
        oimReport = []
        oimReport.append({"url": modelXbrl.modelDocument.uri})
        if oimQNameSeparator != "clark":
            oimReport.append({
                "prefixMap":
                dict((p, ns) for ns, p in namespacePrefixes.items())
            })
        oimReport.append({"DTSreferences": dtsReferences})
        oimReport.append({"roleTypes": roleTypes})
        oimReport.append({"facts": oimFacts})
        oimReport.append({"footnotes": factFootnotes})
        oimReport.append({"relationships": factRelationships})

        if oimStyle == "flat":

            def saveFlatJsonFacts(facts, oimFacts):
                for fact in facts:
                    oimFact = factAspects(fact)
                    if fact.modelTupleFacts:
                        tupleFacts = []
                        oimFact[qnOimTupleAspect] = tupleFacts
                        saveFlatJsonFacts(fact.modelTupleFacts, tupleFacts)
                    oimFacts.append(
                        dict((oimValue(k), v) for k, v in oimFact.items()))

            saveFlatJsonFacts(modelXbrl.facts, oimFacts)

        elif oimStyle == "clustered":

            # build aspect-value usage per fact for every fact
            categoricalAspectValueSets = {}  # for each aspect, value facts-set
            aspectIndex = {}
            indexAspect = {}

            def addCategoricalAspect(aspectQn):
                i = len(aspectIndex)
                aspectIndex[aspectQn] = i
                indexAspect[i] = oimValue(aspectQn)
                categoricalAspectValueSets[i] = defaultdict(set)

            addCategoricalAspect(qnOimConceptAspect)
            addCategoricalAspect(qnOimEntityAspect)
            addCategoricalAspect(qnOimPeriodAspect)
            for aspectQn in aspectsDefined:
                if aspectQn.namespaceURI != nsOim or aspectQn in (
                        qnOimIdAspect, qnOimLangAspect, qnOimUnitAspect):
                    addCategoricalAspect(aspectQn)

            for fact in modelXbrl.facts:
                fact._factAspectValues = {}
                fact._factAspectSet = set()
                for aspectQn, value in factAspects(fact).items():
                    if aspectQn in aspectIndex:
                        i = aspectIndex[aspectQn]
                        v = oimValue(value)
                        categoricalAspectValueSets[i][v].add(fact)
                        fact._factAspectValues[i] = v
                        fact._factAspectSet.add(i)

            # order aspectValues by largest population
            maxAspectValuePopulation = [
                (aspectIndex,
                 max(len(factSet) for factSet in oimValueFacts.values()))
                for aspectIndex, oimValueFacts in
                categoricalAspectValueSets.items()
            ]

            maxAspectValuePopulation.sort(key=lambda ai_max: -ai_max[1])

            factsClustered = set()
            _aspectValue = {}

            def clusterAspect(_avpi, _data):
                if _avpi >= len(maxAspectValuePopulation):
                    return  # end of aspects
                _ai = maxAspectValuePopulation[_avpi][0]
                for _v, _vFactsSet in categoricalAspectValueSets[_ai].items():
                    _aspectValue[_ai] = _v
                    _nestedData = []
                    _nestedAspect = {indexAspect[_ai]: _v, "data": _nestedData}
                    for _fact in _vFactsSet - factsClustered:
                        if (_fact._factAspectSet == _aspectValue.keys()
                                and all([
                                    _fact._factAspectValues[__ai]
                                    == _aspectValue[__ai]
                                    for __ai in _aspectValue
                                ])):
                            _factAspects = factAspects(_fact)
                            _oimFactItem = {
                                oimValue(qnOimValueAspect):
                                _factAspects[qnOimValueAspect]
                            }
                            if hasLocation:
                                _oimFactItem[oimValue(
                                    qnOimLocationAspect
                                )] = _factAspects[qnOimLocationAspect]
                            if hasType:
                                _oimFactItem[
                                    oimValue(qnOimTypeAspect
                                             )] = _factAspects[qnOimTypeAspect]
                            _nestedData.append(_oimFactItem)
                            factsClustered.add(_fact)
                    clusterAspect(_avpi + 1, _nestedData)
                    if _nestedData:
                        _data.append(_nestedAspect)
                    del _aspectValue[_ai]

            clusterAspect(0, oimFacts)

        with open(oimFile, "w", encoding="utf-8") as fh:
            fh.write(
                json.dumps(oimReport,
                           ensure_ascii=False,
                           indent=1,
                           sort_keys=True))

    elif isCSV:
        # save CSV

        # levels of tuple nesting
        def tupleDepth(facts, parentDepth):
            _levelDepth = parentDepth
            for fact in facts:
                _factDepth = tupleDepth(fact.modelTupleFacts, parentDepth + 1)
                if _factDepth > _levelDepth:
                    _levelDepth = _factDepth
            return _levelDepth

        maxDepth = tupleDepth(modelXbrl.facts, 0)

        aspectQnCol = {oimValue(qnOimConceptAspect): maxDepth - 1}
        aspectsHeader = [oimValue(qnOimConceptAspect)]

        for i in range(maxDepth - 1):
            aspectsHeader.append(None)

        def addAspectQnCol(aspectQn):
            aspectQnCol[aspectQn] = len(aspectsHeader)
            aspectsHeader.append(oimValue(aspectQn))

        # pre-ordered aspect columns
        if hasId:
            addAspectQnCol(qnOimIdAspect)
        if hasLocation:
            addAspectQnCol(qnOimLocationAspect)
        if hasType:
            addAspectQnCol(qnOimTypeAspect)
        addAspectQnCol(qnOimValueAspect)
        if qnOimEntityAspect in aspectsDefined:
            addAspectQnCol(qnOimEntityAspect)
        if qnOimPeriodAspect in aspectsDefined:
            addAspectQnCol(qnOimPeriodAspect)
        if qnOimUnitMulAspect in aspectsDefined:
            addAspectQnCol(qnOimUnitMulAspect)
        if qnOimUnitDivAspect in aspectsDefined:
            addAspectQnCol(qnOimUnitDivAspect)
        for aspectQn in sorted(aspectsDefined, key=lambda qn: str(qn)):
            if aspectQn.namespaceURI != nsOim:
                addAspectQnCol(aspectQn)

        def aspectCols(fact, depth):
            cols = [None for i in range(len(aspectsHeader))]
            for aspectQn, aspectValue in factAspects(fact).items():
                if aspectQn == qnOimConceptAspect:
                    cols[depth - 1] = aspectValue
                elif aspectQn in aspectQnCol:
                    cols[aspectQnCol[aspectQn]] = aspectValue
            return cols

        # save facts
        csvFile = open(oimFile,
                       csvOpenMode,
                       newline=csvOpenNewline,
                       encoding='utf-8-sig')
        csvWriter = csv.writer(csvFile, dialect="excel")
        csvWriter.writerow(aspectsHeader)

        def saveCSVfacts(facts, thisDepth):
            for fact in facts:
                csvWriter.writerow(aspectCols(fact, thisDepth))
                saveCSVfacts(fact.modelTupleFacts, thisDepth + 1)

        saveCSVfacts(modelXbrl.facts, 1)
        csvFile.close()

        # save namespaces
        if oimQNameSeparator == "clark":
            csvFile = open(oimFile.replace(".csv", "-prefixMap.csv"),
                           csvOpenMode,
                           newline=csvOpenNewline,
                           encoding='utf-8-sig')
            csvWriter = csv.writer(csvFile, dialect="excel")
            csvWriter.writerow(("prefix", "mappedURI"))
            for namespaceURI, prefix in sorted(namespacePrefixes.items(),
                                               key=lambda item: item[1]):
                csvWriter.writerow((prefix, namespaceURI))
            csvFile.close()

        # save dts references
        csvFile = open(oimFile.replace(".csv", "-dts.csv"),
                       csvOpenMode,
                       newline=csvOpenNewline,
                       encoding='utf-8-sig')
        csvWriter = csv.writer(csvFile, dialect="excel")
        csvWriter.writerow(("type", "href"))
        for oimRef in dtsReferences:
            csvWriter.writerow((oimRef["type"], oimRef["href"]))
        csvFile.close()

        # save role and arc type references
        if roleTypes:
            csvFile = open(oimFile.replace(".csv", "-roleTypes.csv"),
                           csvOpenMode,
                           newline=csvOpenNewline,
                           encoding='utf-8-sig')
            csvWriter = csv.writer(csvFile, dialect="excel")
            csvWriter.writerow(("type", "href"))
            for oimRef in roleTypes:
                csvWriter.writerow((oimRef["type"], oimRef["href"]))
            csvFile.close()

        # save relationships
        csvFile = open(oimFile.replace(".csv", "-relationships.csv"),
                       csvOpenMode,
                       newline=csvOpenNewline,
                       encoding='utf-8-sig')
        csvWriter = csv.writer(csvFile, dialect="excel")
        hasOrder = any(
            hasattribute(imRel, "order") for oimRel in factRelationships)
        csvWriter.writerow(("fromIds", "toIds", "linkrole", "arcrole") +
                           (("order", ) if hasOrder else ()))
        for oimRel in factRelationships:
            csvWriter.writerow(
                (",".join(oimRel["fromIds"]), ",".join(oimRel["toIds"]),
                 oimRel["linkrole"], oimRel["arcrole"]) +
                ((oimRel.get("order", None), ) if hasOrder else ()))
        csvFile.close()

        # save footnotes
        csvFile = open(oimFile.replace(".csv", "-footnotes.csv"),
                       csvOpenMode,
                       newline=csvOpenNewline,
                       encoding='utf-8-sig')
        csvWriter = csv.writer(csvFile, dialect="excel")
        hasLang = any(hasattribute(oimFnt, "lang") for oimFnt in factFootnotes)
        csvWriter.writerow(("id", "role") + (("lang", ) if hasLang else ()) +
                           ("value", ))
        for oimFnt in factFootnotes:
            csvWriter.writerow((oimFtn["id"], oimFtn["role"]) + (
                (oimFtn.get("lang", None), ) if hasLang else ()) +
                               (oimFtn["value"], ))
        csvFile.close()

Example #9

Show file

File: instanceInfo.py Project: selgamal/Arelle

def showInfo(cntlr, options, modelXbrl, _entrypoint, *args, **kwargs):
    for url, doc in sorted(modelXbrl.urlDocs.items(), key=lambda i: i[0]):
        if not any(url.startswith(w) for w in ("https://xbrl.sec.gov/", "http://xbrl.sec.gov/", "http://xbrl.fasb.org/", "http://www.xbrl.org/",
                                               "http://xbrl.ifrs.org/", "http://www.esma.europa.eu/")):
            if os.path.exists(doc.filepath): # skip if in an archive or stream
                cntlr.addToLog("File {} size {:,}".format(doc.basename, os.path.getsize(doc.filepath)), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Heap memory before loading {:,}".format(memoryAtStartup), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Heap memory after loading {:,}".format(cntlr.memoryUsed), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Time to load {:.2f} seconds".format(time.time() - timeAtStart), messageCode="info", level=logging.DEBUG)
    isInlineXbrl = modelXbrl.modelDocument.type in (ModelDocument.Type.INLINEXBRL, ModelDocument.Type.INLINEXBRLDOCUMENTSET)
    if isInlineXbrl:
        instanceType = "inline XBRL, number of documents {}".format(len(modelXbrl.ixdsHtmlElements))
    else:
        instanceType = "xBRL-XML"
    cntlr.addToLog("Instance type {}".format(instanceType), messageCode="info", level=logging.DEBUG)
    numContexts = len(modelXbrl.contexts)
    numLongContexts = 0
    bytesSaveableInline = 0
    bytesSaveableInlineWithCsv = 0
    frequencyOfDims = {}
    sumNumDims = 0
    distinctDurations = set()
    distinctInstants = set()
    shortContextIdLen = int(math.log10(numContexts or 1)) + 2 # if no contexts, use 1 for log function to work
    xbrlQnameCountInline = 0
    xbrlQnameCountInlineWithCsv = 0
    xbrlQnameLengthsInline = 0
    xbrlQnameLengthsInlineWithCsv = 0
    for c in modelXbrl.contexts.values():
        sumNumDims += len(c.qnameDims)
        for d in c.qnameDims.values():
            dimQname = str(d.dimensionQname)
            frequencyOfDims[dimQname] = frequencyOfDims.get(dimQname,0) + 1
            xbrlQnameCountInline += 1
            xbrlQnameCountInlineWithCsv += 1
            xbrlQnameLengthsInline += len(d.dimensionQname.localName)
            xbrlQnameLengthsInlineWithCsv += len(d.dimensionQname.localName)
        if c.isInstantPeriod:
            distinctInstants.add(c.instantDatetime)
        elif c.isStartEndPeriod:
            distinctDurations.add((c.startDatetime, c.endDatetime))
        if len(c.id) > shortContextIdLen:
            bytesSaveableInline += len(c.id) - shortContextIdLen
            bytesSaveableInlineWithCsv += len(c.id) - shortContextIdLen
    cntlr.addToLog("Number of contexts {:,}".format(numContexts), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Number of distinct durations {:,}".format(len(distinctDurations)), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Number of distinct instants {:,}".format(len(distinctInstants)), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Avg number dimensions per contexts {:,.2f}".format(sumNumDims/numContexts if numContexts else 0), messageCode="info", level=logging.DEBUG)
    mostPopularDims = sorted(frequencyOfDims.items(), key=lambda i:"{:0>9},{}".format(999999999-i[1],i[0]))
    for dimName, count in mostPopularDims[0:3]:
        cntlr.addToLog("Dimension {} used in {:,} contexts".format(dimName, count), messageCode="info", level=logging.DEBUG)
        
    # analyze for tables which could be composed from CSV data
    tblFacts = defaultdict(set)
    tblNestedTables = defaultdict(set)
    factSize = {}
    for f in modelXbrl.factsInInstance:
        for tdElt in ancestors(f, xhtml, "td"):
            factSize[f] = len(xmlstring(tdElt,stripXmlns=True))
            break
        childTblElt = None
        for tblElt in ancestors(f, xhtml, "table"):
            tblFacts[tblElt].add(f)
            if childTblElt:
                tblNestedTables[tblElt].add(childTblElt)
    
    # find tables containing only numeric facts
    def tblNestedFactCount(tbl):
        c = len(tblFacts.get(tbl, ()))
        for nestedTbl in tblNestedTables.get(tbl,()):
            c += tblNestedFactCount(nestedTbl)
        return c
    
    factsInInstance = len(modelXbrl.factsInInstance)
    factsInTables = len(set.union(*(fset for fset in tblFacts.values())))
    cntlr.addToLog("Facts in instance: {:,}, facts in tables: {:,}".format(factsInInstance,factsInTables), messageCode="info", level=logging.DEBUG)
    
    numTblsEligible = 0
    numFactsEligible = 0
    bytesCsvSavings = 0
    factsEligibleForCsv = set()
    tablesWithEligibleFacts = set()
    if tblFacts and factSize:
        # find eligible tables, have facts and not nested tables with other facts
        for tbl, facts in tblFacts.items():
            if len(facts) == tblNestedFactCount(tbl):
                s = sum(factSize.get(f,0) for f in facts) - sum(len(str(f.value)) for f in facts)
                if s > 10000:
                    numTblsEligible += 1
                    bytesCsvSavings += s
                    numFactsEligible += len(facts)
                    factsEligibleForCsv |= facts
                    tablesWithEligibleFacts.add(tbl)
    numFacts = 0
    numTableTextBlockFacts = 0
    lenTableTextBlockFacts = 0
    numTextBlockFacts = 0
    lenTextBlockFacts = 0
    distinctElementsInFacts = set()
    factsPerContext = {}
    factForConceptContextUnitHash = defaultdict(list)
    for f in modelXbrl.factsInInstance:
        context = f.context
        concept = f.concept
        distinctElementsInFacts.add(f.qname)
        numFacts += 1
        if f.qname.localName.endswith("TableTextBlock"):
            numTableTextBlockFacts += 1
            lenTableTextBlockFacts += len(f.xValue)
        elif f.qname.localName.endswith("TextBlock"):
            numTextBlockFacts += 1
            lenTextBlockFacts += len(f.xValue)
        if context is not None and concept is not None:
            factsPerContext[context.id] = factsPerContext.get(context.id,0) + 1
            factForConceptContextUnitHash[f.conceptContextUnitHash].append(f)
            bytesSaveableInline += len(context.id) - shortContextIdLen
            if f not in factsEligibleForCsv:
                bytesSaveableInlineWithCsv += len(context.id) - shortContextIdLen
            
            
    if numTblsEligible:
        cntlr.addToLog("Tables eligible for facts in CSV: {:,}, facts eligible for CSV: {:,}, bytes saveable by facts in CSV {:,}".format(numTblsEligible, numFactsEligible, bytesCsvSavings), messageCode="info", level=logging.DEBUG)
    else:
        cntlr.addToLog("No tables eligible for facts in CSV", messageCode="info", level=logging.DEBUG)
        

    mostPopularContexts = sorted(factsPerContext.items(), key=lambda i:"{:0>9},{}".format(999999999-i[1],i[0]))
    cntlr.addToLog("Number of facts {:,}".format(numFacts), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Number of TableTextBlock facts {:,} avg len {:,.0f}".format(numTableTextBlockFacts, lenTableTextBlockFacts/numTableTextBlockFacts if numTableTextBlockFacts else 0), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Number of TextBlock facts {:,} avg len {:,.0f}".format(numTextBlockFacts, lenTextBlockFacts/numTableTextBlockFacts if numTableTextBlockFacts else 0), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Max number facts per context {:,}".format(mostPopularContexts[0][1] if mostPopularContexts else 0), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Avg number facts per context {:,.2f}".format(sum([v for v in factsPerContext.values()])/numContexts if numContexts else 0), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Distinct elements in facts {:,}".format(len(distinctElementsInFacts)), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Number of bytes saveable context id of {} length is {:,}".format(shortContextIdLen, bytesSaveableInline), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Excepting facts eligible for CSV, number of bytes saveable context id of {} length is {:,}".format(shortContextIdLen, bytesSaveableInlineWithCsv), messageCode="info", level=logging.DEBUG)

    aspectEqualFacts = defaultdict(list)
    decVals = {}
    numConsistentDupFacts = numInConsistentDupFacts = 0
    for hashEquivalentFacts in factForConceptContextUnitHash.values():
        if len(hashEquivalentFacts) > 1:
            for f in hashEquivalentFacts:
                aspectEqualFacts[(f.qname,f.contextID,f.unitID,
                                  f.xmlLang.lower() if f.concept.type.isWgnStringFactType else None)].append(f)
            for fList in aspectEqualFacts.values():
                f0 = fList[0]
                if f0.concept.isNumeric:
                    if any(f.isNil for f in fList):
                        _inConsistent = not all(f.isNil for f in fList)
                    else: # not all have same decimals
                        _d = inferredDecimals(f0)
                        _v = f0.xValue
                        _inConsistent = isnan(_v) # NaN is incomparable, always makes dups inconsistent
                        decVals[_d] = _v
                        aMax, bMin = rangeValue(_v, _d)
                        for f in fList[1:]:
                            _d = inferredDecimals(f)
                            _v = f.xValue
                            if isnan(_v):
                                _inConsistent = True
                                break
                            if _d in decVals:
                                _inConsistent |= _v != decVals[_d]
                            else:
                                decVals[_d] = _v
                            a, b = rangeValue(_v, _d)
                            if a > aMax: aMax = a
                            if b < bMin: bMin = b
                        if not _inConsistent:
                            _inConsistent = (bMin < aMax)
                        decVals.clear()
                else:
                    _inConsistent = any(not f.isVEqualTo(f0) for f in fList[1:])
                if _inConsistent:
                    numInConsistentDupFacts += 1
                else:
                    numConsistentDupFacts += 1
                    
            aspectEqualFacts.clear()
    cntlr.addToLog("Number of duplicate facts consistent {:,} inconsistent {:,}".format(numConsistentDupFacts, numInConsistentDupFacts), messageCode="info", level=logging.DEBUG)
    
    styleAttrCountsInline = {}
    styleAttrCountsInlineWithCsv = {}
    totalStyleLenInline = 0
    totalStyleLenInlineWithCsv = 0
    continuationElements = {}
    ixNsPrefix = "{http://www.xbrl.org/2013/inlineXBRL}"
    for ixdsHtmlRootElt in getattr(modelXbrl, "ixdsHtmlElements", ()): # ix root elements if inline
        for ixElt in ixdsHtmlRootElt.iterdescendants():
            inEligibleTableForCsv = any(p in tablesWithEligibleFacts for p in ixElt.iterancestors("{http://www.w3.org/1999/xhtml}table"))
            style = ixElt.get("style")
            ixEltTag = str(ixElt.tag)
            if style:
                styleAttrCountsInline[style] = styleAttrCountsInline.get(style,0) + 1
                if not inEligibleTableForCsv:
                    styleAttrCountsInlineWithCsv[style] = styleAttrCountsInlineWithCsv.get(style,0) + 1
                if styleIxHiddenPattern.match(style) is None:
                    totalStyleLenInline += len(style)
                    if not inEligibleTableForCsv:
                        totalStyleLenInlineWithCsv += len(style)
            if ixEltTag == "{http://www.xbrl.org/2013/inlineXBRL}continuation" and ixElt.id:
                continuationElements[ixElt.id] = ixElt
            if ixEltTag.startswith(ixNsPrefix):
                localName = ixEltTag[len(ixNsPrefix):]
                if localName == "continuation" and ixElt.id:
                    continuationElements[ixElt.id] = ixElt
                elif localName in ("nonFraction", "nonNumeric", "fraction"):
                    xbrlQnameCountInline += 1
                    xbrlQnameLengthsInline += len(ixElt.qname.localName)
                    if not inEligibleTableForCsv:
                        xbrlQnameCountInlineWithCsv += 1
                        xbrlQnameLengthsInlineWithCsv += len(ixElt.qname.localName)
            elif isinstance(ixElt, ModelFact):
                xbrlQnameCountInline += 2
                xbrlQnameLengthsInline += len(ixElt.qname.localName)
                if not inEligibleTableForCsv:
                    xbrlQnameCountInlineWithCsv += 2
                    xbrlQnameLengthsInlineWithCsv += len(ixElt.qname.localName)

    def locateContinuation(element, chain=None):
        contAt = element.get("continuedAt")
        if contAt:
            if contAt in continuationElements:
                if chain is None: chain = [element]
                contElt = continuationElements[contAt]
                if contElt not in chain:
                    chain.append(contElt)
                    element._continuationElement = contElt
                    return locateContinuation(contElt, chain)
        elif chain: # end of chain
            return len(chain)

    numContinuations = 0
    maxLenLen = 0
    maxLenHops = 0
    maxHops = 0
    maxHopsLen = 0
    for f in modelXbrl.factsInInstance:
        if f.get("continuedAt"):
            numContinuations += 1
            _len = len(f.xValue)
            _hops = locateContinuation(f)
            if _hops > maxHops:
                maxHops = _hops
                maxHopsLen = _len
            if _len > maxLenLen:
                maxLenLen = _len
                maxLenHops = _hops

    cntlr.addToLog("Number of continuation facts {:,}".format(numContinuations), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Longest continuation fact {:,} number of hops {:,}".format(maxLenLen, maxLenHops), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Most continuation hops {:,} fact len {:,}".format(maxHops, maxHopsLen), messageCode="info", level=logging.DEBUG)

    numDupStyles = sum(1 for n in styleAttrCountsInline.values() if n > 1)
    bytesSaveableByCssInline = sum(len(s)*(n-1) for s,n in styleAttrCountsInline.items() if n > 1)
    cntlr.addToLog("Number of duplicate styles {:,}, bytes saveable by CSS {:,}, len of all non-ix-hidden @styles {:,}".format(numDupStyles, bytesSaveableByCssInline, totalStyleLenInline), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Number of XBRL QNames {:,}, bytes saveable by EBA-style element names {:,}".format(xbrlQnameCountInline, xbrlQnameLengthsInline - (5*xbrlQnameCountInline)), messageCode="info", level=logging.DEBUG)
    numDupStyles = sum(1 for n in styleAttrCountsInlineWithCsv.values() if n > 1)
    bytesSaveableByCssInlineWithCsv = sum(len(s)*(n-1) for s,n in styleAttrCountsInlineWithCsv.items() if n > 1)
    cntlr.addToLog("Excepting facts eligible for CSV, number of duplicate styles {:,}, bytes saveable by CSS {:,}, len of all non-ix-hidden @styles {:,}".format(numDupStyles, bytesSaveableByCssInlineWithCsv, totalStyleLenInlineWithCsv), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Excepting facts eligible for CSV, number of XBRL QNames {:,}, bytes saveable by EBA-style element names {:,}".format(xbrlQnameCountInlineWithCsv, xbrlQnameLengthsInlineWithCsv - (5*xbrlQnameCountInlineWithCsv)), messageCode="info", level=logging.DEBUG)

Example #10

Show file

 def dimValKey(cntx, typedDim=False):
     return '|'.join(sorted("{}({})".format(dim.dimensionQname,
                                            dim.memberQname if dim.isExplicit 
                                            else xmlstring(dim.typedMember, stripXmlns=True) if typedDim
                                            else '*' )
                            for dim in cntx.qnameDims.values()))