Ejemplo n.º 1
0
 def dimValKey(cntx, typedDim=False, behaveAsTypedDims=emptySet, restrictToDims=None):
     return '|'.join(sorted("{}({})".format(dim.dimensionQname,
                                            dim.memberQname if dim.isExplicit and dim not in behaveAsTypedDims
                                            else dim.memberQname if typedDim and not dim.isTyped
                                            else xmlstring(dim.typedMember, stripXmlns=True) if typedDim
                                            else '*' )
                            for dim in cntx.qnameDims.values()
                            if not restrictToDims or str(dim.dimensionQname) in restrictToDims))
Ejemplo n.º 2
0
def metDimTypedKey(fact):
    cntx = fact.context
    key = "MET({})".format(fact.qname)
    if cntx is not None and cntx.qnameDims:
        key += '|' + '|'.join(sorted("{}({})".format(dim.dimensionQname,
                                                     dim.memberQname if dim.isExplicit
                                                     else "nil" if dim.typedMember.get("{http://www.w3.org/2001/XMLSchema-instance}nil") in ("true", "1")
                                                     else xmlstring(dim.typedMember, stripXmlns=True))
                                    for dim in cntx.qnameDims.values()))
    return key
Ejemplo n.º 3
0
def metDimTypedKey(fact):
    cntx = fact.context
    key = "MET({})".format(fact.qname)
    if cntx is not None and cntx.qnameDims:
        key += '|' + '|'.join(
            sorted("{}({})".format(
                dim.dimensionQname, dim.memberQname if dim.
                isExplicit else "nil" if dim.typedMember.
                get("{http://www.w3.org/2001/XMLSchema-instance}nil") in
                ("true", "1") else xmlstring(dim.typedMember, stripXmlns=True))
                   for dim in cntx.qnameDims.values()))
    return key
Ejemplo n.º 4
0
 def factFootnotes(fact):
     footnotes = []
     for footnoteRel in footnotesRelationshipSet.fromModelObject(fact):
         footnote = OrderedDict((("group", footnoteRel.linkrole),
                                 ("footnoteType", footnoteRel.arcrole)))
         footnotes.append(footnote)
         if isCSVorXL:
             footnote["factId"] = fact.id if fact.id else "f{}".format(fact.objectIndex)
         toObj = footnoteRel.toModelObject
         if isinstance(toObj, ModelFact):
             footnote["factRef"] = toObj.id if toObj.id else "f{}".format(toObj.objectIndex)
         else:
             footnote["footnote"] = xmlstring(toObj, stripXmlns=True, contentsOnly=True, includeText=True)
             if toObj.xmlLang:
                 footnote["language"] = toObj.xmlLang
     return footnotes
Ejemplo n.º 5
0
 def factFootnotes(fact):
     footnotes = []
     for footnoteRel in footnotesRelationshipSet.fromModelObject(fact):
         footnote = OrderedDict((("group", footnoteRel.arcrole),))
         footnotes.append(footnote)
         if isCSV:
             footnote["factId"] = fact.id if fact.id else "f{}".format(fact.objectIndex)
         toObj = footnoteRel.toModelObject
         if isinstance(toObj, ModelFact):
             footnote["factRef"] = toObj.id if toObj.id else "f{}".format(toObj.objectIndex)
         else:
             footnote["footnoteType"] = toObj.role
             footnote["footnote"] = xmlstring(toObj, stripXmlns=True, contentsOnly=True, includeText=True)
             if toObj.xmlLang:
                 footnote["language"] = toObj.xmlLang
     return footnotes
Ejemplo n.º 6
0
 def factAspects(fact):
     aspects = OrderedDict()
     if hasId and fact.id:
         aspects["id"] = fact.id
     elif fact.isTuple or footnotesRelationshipSet.toModelObject(fact):
         aspects["id"] = "f{}".format(fact.objectIndex)
     parent = fact.getparent()
     concept = fact.concept
     if not fact.isTuple:
         if concept is not None:
             _baseXsdType = concept.baseXsdType
             if _baseXsdType == "XBRLI_DATEUNION":
                 if getattr(fact.xValue, "dateOnly", False):
                     _baseXsdType = "date"
                 else:
                     _baseXsdType = "dateTime"
             aspects["baseType"] = "xs:{}".format(_baseXsdType)
             if concept.baseXbrliType in ("string", "normalizedString", "token") and fact.xmlLang:
                 aspects[qnOimLangAspect] = fact.xmlLang
             aspects[qnOimTypeAspect] = concept.baseXbrliType
     if fact.isItem:
         if fact.isNil:
             _value = None
             _strValue = "nil"
         else:
             _inferredDecimals = inferredDecimals(fact)
             _value = oimValue(fact.xValue, _inferredDecimals)
             _strValue = str(_value)
         aspects["value"] = _strValue
         if fact.concept is not None and fact.concept.isNumeric:
             _numValue = fact.xValue
             if isinstance(_numValue, Decimal) and not isinf(_numValue) and not isnan(_numValue):
                 if _numValue == _numValue.to_integral():
                     _numValue = int(_numValue)
                 else:
                     _numValue = float(_numValue)
             aspects["numericValue"] = _numValue
             if not fact.isNil:
                 aspects["accuracy"] = "infinity" if isinf(_inferredDecimals) else _inferredDecimals
         elif isinstance(_value, bool):
             aspects["booleanValue"] = _value
     aspects[qnOimConceptAspect] = oimValue(fact.qname)
     cntx = fact.context
     if cntx is not None:
         if cntx.entityIdentifierElement is not None:
             aspects[qnOimEntityAspect] = oimValue(qname(*cntx.entityIdentifier))
         if cntx.period is not None:
             aspects[qnOimPeriodAspect] = oimPeriodValue(cntx)
         for _qn, dim in sorted(cntx.qnameDims.items(), key=lambda item: item[0]):
             aspects[dim.dimensionQname] = (oimValue(dim.memberQname) if dim.isExplicit
                                            else None if dim.typedMember.get("{http://www.w3.org/2001/XMLSchema-instance}nil") in ("true", "1")
                                            else dim.typedMember.stringValue)
     unit = fact.unit
     if unit is not None:
         _mMul, _mDiv = unit.measures
         if isJSON:
             aspects[qnOimUnitAspect] = { # use tuple instead of list for hashability
                 "numerators": tuple(oimValue(m) for m in sorted(_mMul, key=lambda m: oimValue(m)))
             }
             if _mDiv:
                 aspects[qnOimUnitAspect]["denominators"] = tuple(oimValue(m) for m in sorted(_mDiv, key=lambda m: oimValue(m)))
         else: # CSV
             if _mMul:
                 aspects[qnOimUnitMulAspect] = ",".join(oimValue(m)
                                                     for m in sorted(_mMul, key=lambda m: q(m)))
             if _mDiv:
                 aspects[qnOimUnitDivAspect] = ",".join(oimValue(m)
                                                     for m in sorted(_mDiv, key=lambda m: str(m)))
     if parent.qname != XbrlConst.qnXbrliXbrl:
         aspects[qnOimTupleParentAspect] = parent.id if parent.id else "f{}".format(parent.objectIndex)
         aspects[qnOimTupleOrderAspect] = elementIndex(fact)
                 
     footnotes = []
     for footnoteRel in footnotesRelationshipSet.fromModelObject(fact):
         footnote = {"group": footnoteRel.arcrole}
         footnotes.append(footnote)
         toObj = footnoteRel.toModelObject
         if isinstance(toObj, ModelFact):
             footnote["factRef"] = toObj.id if toObj.id else "f{}".format(toObj.objectIndex)
         else:
             footnote["footnoteType"] = toObj.role
             footnote["footnote"] = xmlstring(toObj, stripXmlns=True, contentsOnly=True, includeText=True)
             if toObj.xmlLang:
                 footnote["language"] = toObj.xmlLang
     if footnotes:
         aspects["footnotes"] = footnotes
     return aspects
Ejemplo n.º 7
0
def saveLoadableOIM(modelXbrl, oimFile, oimStyle, oimQNameSeparator):
    
    isJSON = oimFile.endswith(".json")
    isCSV = oimFile.endswith(".csv")

    namespacePrefixes = {}
    def compileQname(qname):
        if qname.namespaceURI not in namespacePrefixes:
            namespacePrefixes[qname.namespaceURI] = qname.prefix or ""
            
    aspectsDefined = {
        qnOimConceptAspect,
        qnOimLocationAspect,
        qnOimValueAspect,
        qnOimPeriodAspect,
        qnOimEntityAspect}

            
    def oimValue(object, decimals=None):
        if isinstance(object, QName):
            if oimQNameSeparator == "clark":
                return object.clarkNotation;
            if object.namespaceURI not in namespacePrefixes:
                if object.prefix:
                    namespacePrefixes[object.namespaceURI] = object.prefix
                else:
                    _prefix = "_{}".format(sum(1 for p in namespacePrefixes if p.startswith("_")))
                    namespacePrefixes[object.namespaceURI] = _prefix
            return "{}{}{}".format(namespacePrefixes[object.namespaceURI], 
                                   oimQNameSeparator, 
                                   object.localName)
        if isinstance(object, Decimal):
            try:
                if decimals is not None and not isnan(decimals) and not isinf(decimals):
                    if decimals != 0:
                        object = object / (TEN ** -decimals)
                    return "{}e{}".format(object, -decimals)
                else:
                    return "{}".format(object) # force to string to prevent json floating error
            except:
                return str(object)
        if isinstance(object, (DateTime, YearMonthDuration, DayTimeDuration, Time,
                               gYearMonth, gMonthDay, gYear, gMonth, gDay)):
            return str(object)
        return object
    
    def oimPeriodValue(cntx):
        if cntx.isForeverPeriod:
            return "forever"
        elif cntx.isStartEndPeriod:
            return "{}/{}".format(dateunionValue(cntx.startDatetime, dateOnlyHour=0), 
                                  dateunionValue(cntx.endDatetime, subtractOneDay=True, dateOnlyHour=24))
        else: # instant
            return "PT0S/{}".format(dateunionValue(cntx.endDatetime, subtractOneDay=True, dateOnlyHour=24))
              
    hasId = False
    hasLocation = False # may be optional based on style?
    hasType = True
    hasLang = False
    hasUnits = False      
    hasUnitMulMeasures = False
    hasUnitDivMeasures = False
    hasTuple = False
    
    #compile QNames in instance for OIM
    for fact in modelXbrl.factsInInstance:
        if fact.id:
            hasId = True
        concept = fact.concept
        if concept is not None:
            if concept.baseXbrliType in ("string", "normalizedString", "token") and fact.xmlLang:
                hasLang = True
        compileQname(fact.qname)
        if hasattr(fact, "xValue") and isinstance(fact.xValue, QName):
            compileQname(fact.xValue)
        unit = fact.unit
        if unit is not None:
            hasUnits = True
            if unit.measures[0]:
                hasUnitMulMeasures = True
            if unit.measures[1]:
                hasUnitDivMeasures = True
        if fact.modelTupleFacts:
            hasTuple = True
            
    entitySchemePrefixes = {}
    for cntx in modelXbrl.contexts.values():
        if cntx.entityIdentifierElement is not None:
            scheme = cntx.entityIdentifier[0]
            if scheme not in entitySchemePrefixes:
                if not entitySchemePrefixes: # first one is just scheme
                    if scheme == "http://www.sec.gov/CIK":
                        _schemePrefix = "cik"
                    elif scheme == "http://standard.iso.org/iso/17442":
                        _schemePrefix = "lei"
                    else:
                        _schemePrefix = "scheme"
                else:
                    _schemePrefix = "scheme-{}".format(len(entitySchemePrefixes) + 1)
                entitySchemePrefixes[scheme] = _schemePrefix
                namespacePrefixes[scheme] = _schemePrefix
        for dim in cntx.qnameDims.values():
            compileQname(dim.dimensionQname)
            aspectsDefined.add(dim.dimensionQname)
            if dim.isExplicit:
                compileQname(dim.memberQname)
                
    for unit in modelXbrl.units.values():
        if unit is not None:
            for measures in unit.measures:
                for measure in measures:
                    compileQname(measure)
                    
    if XbrlConst.xbrli in namespacePrefixes and namespacePrefixes[XbrlConst.xbrli] != "xbrli":
        namespacePrefixes[XbrlConst.xbrli] = "xbrli" # normalize xbrli prefix

    if hasId: aspectsDefined.add(qnOimIdAspect)
    if hasLang: aspectsDefined.add(qnOimLangAspect)
    if hasTuple: aspectsDefined.add(qnOimTupleAspect)
    if hasUnits: aspectsDefined.add(qnOimUnitAspect)
    if hasUnitMulMeasures: aspectsDefined.add(qnOimUnitMulAspect)
    if hasUnitDivMeasures: aspectsDefined.add(qnOimUnitDivAspect)
                    
    # compile footnotes and relationships
    factRelationships = []
    factFootnotes = []
    for rel in modelXbrl.relationshipSet(modelXbrl, "XBRL-footnotes").modelRelationships:
        oimRel = {"linkrole": rel.linkrole, "arcrole": rel.arcrole}
        factRelationships.append(oimRel)
        oimRel["fromIds"] = [obj.id if obj.id 
                             else elementChildSequence(obj)
                             for obj in rel.fromModelObjects]
        oimRel["toIds"] = [obj.id if obj.id
                           else elementChildSequence(obj)
                           for obj in rel.toModelObjects]
        _order = rel.arcElement.get("order")
        if _order is not None:
            oimRel["order"] = _order
        for obj in rel.toModelObjects:
            if isinstance(obj, ModelResource): # footnote
                oimFootnote = {"role": obj.role,
                               "id": obj.id if obj.id
                                     else elementChildSequence(obj),
                                # value needs work for html elements and for inline footnotes
                               "value": xmlstring(obj, stripXmlns=True)}
                if obj.xmlLang:
                    oimFootnote["lang"] = obj.xmlLang
                factFootnotes.append(oimFootnote)
                oimFootnote
            
    dtsReferences = [
        {"type": "schema" if doc.type == ModelDocument.Type.SCHEMA
                 else "linkbase" if doc.type == ModelDocument.Type.LINKBASE
                 else "other",
         "href": doc.basename}
        for doc,ref in modelXbrl.modelDocument.referencesDocument.items()
        if ref.referringModelObject.qname in SCHEMA_LB_REFS]
        
    roleTypes = [
        {"type": "role" if ref.referringModelObject.localName == "roleRef" else "arcroleRef",
         "href": ref.referringModelObject["href"]}
        for doc,ref in modelXbrl.modelDocument.referencesDocument.items()
        if ref.referringModelObject.qname in ROLE_REFS]
    

    def factAspects(fact):
        aspects = {qnOimConceptAspect: oimValue(fact.qname)}
        if hasId and fact.id:
            aspects[qnOimIdAspect] = fact.id
        if hasLocation:
            aspects[qnOimLocationAspect] = elementChildSequence(fact)
        concept = fact.concept
        if concept is not None:
            if concept.baseXbrliType in ("string", "normalizedString", "token") and fact.xmlLang:
                aspects[qnOimLangAspect] = fact.xmlLang
        aspects[qnOimTypeAspect] = concept.baseXbrliType
        if fact.isItem:
            aspects[qnOimValueAspect] = (NILVALUE if fact.isNil else
                                         oimValue(fact.xValue, inferredDecimals(fact)))
        cntx = fact.context
        if cntx is not None:
            if cntx.entityIdentifierElement is not None:
                aspects[qnOimEntityAspect] = oimValue(qname(*cntx.entityIdentifier))
            if cntx.period is not None:
                aspects[qnOimPeriodAspect] = oimPeriodValue(cntx)
            for dim in cntx.qnameDims.values():
                aspects[dim.dimensionQname] = (oimValue(dim.memberQname) if dim.isExplicit
                                               else dim.typedMember.stringValue)
        unit = fact.unit
        if unit is not None:
            _mMul, _mDiv = unit.measures
            if isJSON:
                aspects[qnOimUnitAspect] = ( # use tuple instead of list for hashability
                    tuple(oimValue(m) for m in sorted(_mMul, key=lambda m: str(m))),
                    tuple(oimValue(m) for m in sorted(_mDiv, key=lambda m: str(m))))
            else: # CSV
                if _mMul:
                    aspects[qnOimUnitMulAspect] = ",".join(oimValue(m)
                                                        for m in sorted(_mMul, key=lambda m: str(m)))
                if _mDiv:
                    aspects[qnOimUnitDivAspect] = ",".join(oimValue(m)
                                                        for m in sorted(_mDiv, key=lambda m: str(m)))
        return aspects
    
    if isJSON:
        # save JSON
        
        oim = {} # top level of oim json output
            
        oimFacts = []
        oimReport = []
        oimReport.append({"url": modelXbrl.modelDocument.uri})
        if oimQNameSeparator != "clark":
            oimReport.append({"prefixMap": dict((p,ns) for ns,p in namespacePrefixes.items())})
        oimReport.append({"DTSreferences": dtsReferences})
        oimReport.append({"roleTypes": roleTypes})
        oimReport.append({"facts": oimFacts})
        oimReport.append({"footnotes": factFootnotes})
        oimReport.append({"relationships": factRelationships})

        if oimStyle == "flat":
            
            def saveFlatJsonFacts(facts, oimFacts):
                for fact in facts:
                    oimFact = factAspects(fact)
                    if fact.modelTupleFacts:
                        tupleFacts = []
                        oimFact[qnOimTupleAspect] = tupleFacts
                        saveFlatJsonFacts(fact.modelTupleFacts, tupleFacts)
                    oimFacts.append(dict((oimValue(k),v) for k,v in oimFact.items()))
                    
            saveFlatJsonFacts(modelXbrl.facts, oimFacts)
        
        elif oimStyle == "clustered":
            
            # build aspect-value usage per fact for every fact
            categoricalAspectValueSets = {} # for each aspect, value facts-set
            aspectIndex = {}
            indexAspect = {}
            def addCategoricalAspect(aspectQn):
                i = len(aspectIndex)
                aspectIndex[aspectQn] = i 
                indexAspect[i] = oimValue(aspectQn)
                categoricalAspectValueSets[i] = defaultdict(set)

            addCategoricalAspect(qnOimConceptAspect)
            addCategoricalAspect(qnOimEntityAspect)
            addCategoricalAspect(qnOimPeriodAspect)
            for aspectQn in aspectsDefined:
                if aspectQn.namespaceURI != nsOim or aspectQn in (
                    qnOimIdAspect, qnOimLangAspect, qnOimUnitAspect):
                    addCategoricalAspect(aspectQn) 
            
            
            for fact in modelXbrl.facts:
                fact._factAspectValues = {}
                fact._factAspectSet = set()
                for aspectQn, value in factAspects(fact).items():
                    if aspectQn in aspectIndex:
                        i = aspectIndex[aspectQn]
                        v = oimValue(value)
                        categoricalAspectValueSets[i][v].add(fact)
                        fact._factAspectValues[i] = v
                        fact._factAspectSet.add(i)
                        
            # order aspectValues by largest population
            maxAspectValuePopulation = [(aspectIndex, max(len(factSet) for factSet in oimValueFacts.values()))
                                        for aspectIndex, oimValueFacts in categoricalAspectValueSets.items()]
                        
            maxAspectValuePopulation.sort(key=lambda ai_max: -ai_max[1])
            
            factsClustered = set()
            _aspectValue = {}
                        
            def clusterAspect(_avpi, _data):
                if _avpi >= len(maxAspectValuePopulation): 
                    return # end of aspects
                _ai = maxAspectValuePopulation[_avpi][0]
                for _v, _vFactsSet in categoricalAspectValueSets[_ai].items():
                    _aspectValue[_ai] = _v
                    _nestedData = []
                    _nestedAspect = {indexAspect[_ai]: _v, "data": _nestedData}
                    for _fact in _vFactsSet - factsClustered:
                        if (_fact._factAspectSet == _aspectValue.keys() and
                            all([_fact._factAspectValues[__ai] == _aspectValue[__ai]
                                for __ai in _aspectValue])):
                            _factAspects = factAspects(_fact)
                            _oimFactItem = {oimValue(qnOimValueAspect): _factAspects[qnOimValueAspect]}
                            if hasLocation:
                                _oimFactItem[oimValue(qnOimLocationAspect)] = _factAspects[qnOimLocationAspect]
                            if hasType:
                                _oimFactItem[oimValue(qnOimTypeAspect)] = _factAspects[qnOimTypeAspect]
                            _nestedData.append(_oimFactItem)
                            factsClustered.add(_fact)
                    clusterAspect(_avpi+1, _nestedData)
                    if _nestedData:
                        _data.append(_nestedAspect)
                    del _aspectValue[_ai]
            clusterAspect(0, oimFacts)
            
        with open(oimFile, "w", encoding="utf-8") as fh:
            fh.write(json.dumps(oimReport, ensure_ascii=False, indent=1, sort_keys=True))

            
        
    elif isCSV:
        # save CSV
        
        # levels of tuple nesting
        def tupleDepth(facts, parentDepth):
            _levelDepth = parentDepth
            for fact in facts:
                _factDepth = tupleDepth(fact.modelTupleFacts, parentDepth + 1)
                if _factDepth > _levelDepth:
                    _levelDepth = _factDepth
            return _levelDepth
        maxDepth = tupleDepth(modelXbrl.facts, 0)
        
        aspectQnCol = {oimValue(qnOimConceptAspect): maxDepth - 1}
        aspectsHeader = [oimValue(qnOimConceptAspect)]
        
        for i in range(maxDepth - 1):
            aspectsHeader.append(None)
        
        def addAspectQnCol(aspectQn):
            aspectQnCol[aspectQn] = len(aspectsHeader)
            aspectsHeader.append(oimValue(aspectQn))
            
        # pre-ordered aspect columns
        if hasId:
            addAspectQnCol(qnOimIdAspect)
        if hasLocation:
            addAspectQnCol(qnOimLocationAspect)
        if hasType:
            addAspectQnCol(qnOimTypeAspect)
        addAspectQnCol(qnOimValueAspect)
        if qnOimEntityAspect in aspectsDefined:
            addAspectQnCol(qnOimEntityAspect)
        if qnOimPeriodAspect in aspectsDefined:
            addAspectQnCol(qnOimPeriodAspect)
        if qnOimUnitMulAspect in aspectsDefined:
            addAspectQnCol(qnOimUnitMulAspect)
        if qnOimUnitDivAspect in aspectsDefined:
            addAspectQnCol(qnOimUnitDivAspect)
        for aspectQn in sorted(aspectsDefined, key=lambda qn: str(qn)):
            if aspectQn.namespaceURI != nsOim:
                addAspectQnCol(aspectQn) 
        
        def aspectCols(fact, depth):
            cols = [None for i in range(len(aspectsHeader))]
            for aspectQn, aspectValue in factAspects(fact).items():
                if aspectQn == qnOimConceptAspect:
                    cols[depth - 1] = aspectValue
                elif aspectQn in aspectQnCol:
                    cols[aspectQnCol[aspectQn]] = aspectValue
            return cols
        
        # save facts
        csvFile = open(oimFile, csvOpenMode, newline=csvOpenNewline, encoding='utf-8-sig')
        csvWriter = csv.writer(csvFile, dialect="excel")
        csvWriter.writerow(aspectsHeader)
        
        def saveCSVfacts(facts, thisDepth):
            for fact in facts:
                csvWriter.writerow(aspectCols(fact, thisDepth))
                saveCSVfacts(fact.modelTupleFacts, thisDepth + 1)
                
        saveCSVfacts(modelXbrl.facts, 1)
        csvFile.close()
        
        # save namespaces
        if oimQNameSeparator == "clark":
            csvFile = open(oimFile.replace(".csv", "-prefixMap.csv"), csvOpenMode, newline=csvOpenNewline, encoding='utf-8-sig')
            csvWriter = csv.writer(csvFile, dialect="excel")
            csvWriter.writerow(("prefix", "mappedURI"))
            for namespaceURI, prefix in sorted(namespacePrefixes.items(), key=lambda item: item[1]):
                csvWriter.writerow((prefix, namespaceURI))
            csvFile.close()
        
        # save dts references
        csvFile = open(oimFile.replace(".csv", "-dts.csv"), csvOpenMode, newline=csvOpenNewline, encoding='utf-8-sig')
        csvWriter = csv.writer(csvFile, dialect="excel")
        csvWriter.writerow(("type", "href"))
        for oimRef in dtsReferences:
            csvWriter.writerow((oimRef["type"], oimRef["href"]))
        csvFile.close()
        
        # save role and arc type references
        if roleTypes:
            csvFile = open(oimFile.replace(".csv", "-roleTypes.csv"), csvOpenMode, newline=csvOpenNewline, encoding='utf-8-sig')
            csvWriter = csv.writer(csvFile, dialect="excel")
            csvWriter.writerow(("type", "href"))
            for oimRef in roleTypes:
                csvWriter.writerow((oimRef["type"], oimRef["href"]))
            csvFile.close()
        
        # save relationships
        csvFile = open(oimFile.replace(".csv", "-relationships.csv"), csvOpenMode, newline=csvOpenNewline, encoding='utf-8-sig')
        csvWriter = csv.writer(csvFile, dialect="excel")
        hasOrder = any(hasattribute(imRel,"order") for oimRel in factRelationships)
        csvWriter.writerow(("fromIds", "toIds", "linkrole", "arcrole") + 
                           (("order",) if hasOrder else ()))
        for oimRel in factRelationships:
            csvWriter.writerow((",".join(oimRel["fromIds"]),
                                ",".join(oimRel["toIds"]),
                                oimRel["linkrole"],
                                oimRel["arcrole"]) +
                               ((oimRel.get("order",None),) if hasOrder else ()))
        csvFile.close()
        
        # save footnotes
        csvFile = open(oimFile.replace(".csv", "-footnotes.csv"), csvOpenMode, newline=csvOpenNewline, encoding='utf-8-sig')
        csvWriter = csv.writer(csvFile, dialect="excel")
        hasLang = any(hasattribute(oimFnt,"lang") for oimFnt in factFootnotes)
        csvWriter.writerow(("id", "role") + (("lang",) if hasLang else ()) + ("value",))
        for oimFnt in factFootnotes:
            csvWriter.writerow((oimFtn["id"], oimFtn["role"]) +
                               ((oimFtn.get("lang",None),) if hasLang else ()) +
                               (oimFtn["value"],))
        csvFile.close()
Ejemplo n.º 8
0
def saveLoadableOIM(modelXbrl, oimFile, oimStyle, oimQNameSeparator):

    isJSON = oimFile.endswith(".json")
    isCSV = oimFile.endswith(".csv")

    namespacePrefixes = {}

    def compileQname(qname):
        if qname.namespaceURI not in namespacePrefixes:
            namespacePrefixes[qname.namespaceURI] = qname.prefix or ""

    aspectsDefined = {
        qnOimConceptAspect, qnOimLocationAspect, qnOimValueAspect,
        qnOimPeriodAspect, qnOimEntityAspect
    }

    def oimValue(object, decimals=None):
        if isinstance(object, QName):
            if oimQNameSeparator == "clark":
                return object.clarkNotation
            if object.namespaceURI not in namespacePrefixes:
                if object.prefix:
                    namespacePrefixes[object.namespaceURI] = object.prefix
                else:
                    _prefix = "_{}".format(
                        sum(1 for p in namespacePrefixes if p.startswith("_")))
                    namespacePrefixes[object.namespaceURI] = _prefix
            return "{}{}{}".format(namespacePrefixes[object.namespaceURI],
                                   oimQNameSeparator, object.localName)
        if isinstance(object, Decimal):
            try:
                if decimals is not None and not isnan(decimals) and not isinf(
                        decimals):
                    if decimals != 0:
                        object = object / (TEN**-decimals)
                    return "{}e{}".format(object, -decimals)
                else:
                    return "{}".format(
                        object
                    )  # force to string to prevent json floating error
            except:
                return str(object)
        if isinstance(object,
                      (DateTime, YearMonthDuration, DayTimeDuration, Time,
                       gYearMonth, gMonthDay, gYear, gMonth, gDay)):
            return str(object)
        return object

    def oimPeriodValue(cntx):
        if cntx.isForeverPeriod:
            return "forever"
        elif cntx.isStartEndPeriod:
            return "{}/{}".format(
                dateunionValue(cntx.startDatetime, dateOnlyHour=0),
                dateunionValue(cntx.endDatetime,
                               subtractOneDay=True,
                               dateOnlyHour=24))
        else:  # instant
            return "PT0S/{}".format(
                dateunionValue(cntx.endDatetime,
                               subtractOneDay=True,
                               dateOnlyHour=24))

    hasId = False
    hasLocation = False  # may be optional based on style?
    hasType = True
    hasLang = False
    hasUnits = False
    hasUnitMulMeasures = False
    hasUnitDivMeasures = False
    hasTuple = False

    #compile QNames in instance for OIM
    for fact in modelXbrl.factsInInstance:
        if fact.id:
            hasId = True
        concept = fact.concept
        if concept is not None:
            if concept.baseXbrliType in ("string", "normalizedString",
                                         "token") and fact.xmlLang:
                hasLang = True
        compileQname(fact.qname)
        if hasattr(fact, "xValue") and isinstance(fact.xValue, QName):
            compileQname(fact.xValue)
        unit = fact.unit
        if unit is not None:
            hasUnits = True
            if unit.measures[0]:
                hasUnitMulMeasures = True
            if unit.measures[1]:
                hasUnitDivMeasures = True
        if fact.modelTupleFacts:
            hasTuple = True

    entitySchemePrefixes = {}
    for cntx in modelXbrl.contexts.values():
        if cntx.entityIdentifierElement is not None:
            scheme = cntx.entityIdentifier[0]
            if scheme not in entitySchemePrefixes:
                if not entitySchemePrefixes:  # first one is just scheme
                    if scheme == "http://www.sec.gov/CIK":
                        _schemePrefix = "cik"
                    elif scheme == "http://standard.iso.org/iso/17442":
                        _schemePrefix = "lei"
                    else:
                        _schemePrefix = "scheme"
                else:
                    _schemePrefix = "scheme-{}".format(
                        len(entitySchemePrefixes) + 1)
                entitySchemePrefixes[scheme] = _schemePrefix
                namespacePrefixes[scheme] = _schemePrefix
        for dim in cntx.qnameDims.values():
            compileQname(dim.dimensionQname)
            aspectsDefined.add(dim.dimensionQname)
            if dim.isExplicit:
                compileQname(dim.memberQname)

    for unit in modelXbrl.units.values():
        if unit is not None:
            for measures in unit.measures:
                for measure in measures:
                    compileQname(measure)

    if XbrlConst.xbrli in namespacePrefixes and namespacePrefixes[
            XbrlConst.xbrli] != "xbrli":
        namespacePrefixes[XbrlConst.xbrli] = "xbrli"  # normalize xbrli prefix

    if hasId: aspectsDefined.add(qnOimIdAspect)
    if hasLang: aspectsDefined.add(qnOimLangAspect)
    if hasTuple: aspectsDefined.add(qnOimTupleAspect)
    if hasUnits: aspectsDefined.add(qnOimUnitAspect)
    if hasUnitMulMeasures: aspectsDefined.add(qnOimUnitMulAspect)
    if hasUnitDivMeasures: aspectsDefined.add(qnOimUnitDivAspect)

    # compile footnotes and relationships
    factRelationships = []
    factFootnotes = []
    for rel in modelXbrl.relationshipSet(modelXbrl,
                                         "XBRL-footnotes").modelRelationships:
        oimRel = {"linkrole": rel.linkrole, "arcrole": rel.arcrole}
        factRelationships.append(oimRel)
        oimRel["fromIds"] = [
            obj.id if obj.id else elementChildSequence(obj)
            for obj in rel.fromModelObjects
        ]
        oimRel["toIds"] = [
            obj.id if obj.id else elementChildSequence(obj)
            for obj in rel.toModelObjects
        ]
        _order = rel.arcElement.get("order")
        if _order is not None:
            oimRel["order"] = _order
        for obj in rel.toModelObjects:
            if isinstance(obj, ModelResource):  # footnote
                oimFootnote = {
                    "role": obj.role,
                    "id": obj.id if obj.id else elementChildSequence(obj),
                    # value needs work for html elements and for inline footnotes
                    "value": xmlstring(obj, stripXmlns=True)
                }
                if obj.xmlLang:
                    oimFootnote["lang"] = obj.xmlLang
                factFootnotes.append(oimFootnote)
                oimFootnote

    dtsReferences = [{
        "type":
        "schema" if doc.type == ModelDocument.Type.SCHEMA else
        "linkbase" if doc.type == ModelDocument.Type.LINKBASE else "other",
        "href":
        doc.basename
    } for doc, ref in modelXbrl.modelDocument.referencesDocument.items()
                     if ref.referringModelObject.qname in SCHEMA_LB_REFS]

    roleTypes = [{
        "type":
        "role"
        if ref.referringModelObject.localName == "roleRef" else "arcroleRef",
        "href":
        ref.referringModelObject["href"]
    } for doc, ref in modelXbrl.modelDocument.referencesDocument.items()
                 if ref.referringModelObject.qname in ROLE_REFS]

    def factAspects(fact):
        aspects = {qnOimConceptAspect: oimValue(fact.qname)}
        if hasId and fact.id:
            aspects[qnOimIdAspect] = fact.id
        if hasLocation:
            aspects[qnOimLocationAspect] = elementChildSequence(fact)
        concept = fact.concept
        if concept is not None:
            if concept.baseXbrliType in ("string", "normalizedString",
                                         "token") and fact.xmlLang:
                aspects[qnOimLangAspect] = fact.xmlLang
        aspects[qnOimTypeAspect] = concept.baseXbrliType
        if fact.isItem:
            aspects[qnOimValueAspect] = (NILVALUE if fact.isNil else oimValue(
                fact.xValue, inferredDecimals(fact)))
        cntx = fact.context
        if cntx is not None:
            if cntx.entityIdentifierElement is not None:
                aspects[qnOimEntityAspect] = oimValue(
                    qname(*cntx.entityIdentifier))
            if cntx.period is not None:
                aspects[qnOimPeriodAspect] = oimPeriodValue(cntx)
            for dim in cntx.qnameDims.values():
                aspects[dim.dimensionQname] = (oimValue(dim.memberQname)
                                               if dim.isExplicit else
                                               dim.typedMember.stringValue)
        unit = fact.unit
        if unit is not None:
            _mMul, _mDiv = unit.measures
            if isJSON:
                aspects[
                    qnOimUnitAspect] = (  # use tuple instead of list for hashability
                        tuple(
                            oimValue(m)
                            for m in sorted(_mMul, key=lambda m: str(m))),
                        tuple(
                            oimValue(m)
                            for m in sorted(_mDiv, key=lambda m: str(m))))
            else:  # CSV
                if _mMul:
                    aspects[qnOimUnitMulAspect] = ",".join(
                        oimValue(m)
                        for m in sorted(_mMul, key=lambda m: str(m)))
                if _mDiv:
                    aspects[qnOimUnitDivAspect] = ",".join(
                        oimValue(m)
                        for m in sorted(_mDiv, key=lambda m: str(m)))
        return aspects

    if isJSON:
        # save JSON

        oim = {}  # top level of oim json output

        oimFacts = []
        oimReport = []
        oimReport.append({"url": modelXbrl.modelDocument.uri})
        if oimQNameSeparator != "clark":
            oimReport.append({
                "prefixMap":
                dict((p, ns) for ns, p in namespacePrefixes.items())
            })
        oimReport.append({"DTSreferences": dtsReferences})
        oimReport.append({"roleTypes": roleTypes})
        oimReport.append({"facts": oimFacts})
        oimReport.append({"footnotes": factFootnotes})
        oimReport.append({"relationships": factRelationships})

        if oimStyle == "flat":

            def saveFlatJsonFacts(facts, oimFacts):
                for fact in facts:
                    oimFact = factAspects(fact)
                    if fact.modelTupleFacts:
                        tupleFacts = []
                        oimFact[qnOimTupleAspect] = tupleFacts
                        saveFlatJsonFacts(fact.modelTupleFacts, tupleFacts)
                    oimFacts.append(
                        dict((oimValue(k), v) for k, v in oimFact.items()))

            saveFlatJsonFacts(modelXbrl.facts, oimFacts)

        elif oimStyle == "clustered":

            # build aspect-value usage per fact for every fact
            categoricalAspectValueSets = {}  # for each aspect, value facts-set
            aspectIndex = {}
            indexAspect = {}

            def addCategoricalAspect(aspectQn):
                i = len(aspectIndex)
                aspectIndex[aspectQn] = i
                indexAspect[i] = oimValue(aspectQn)
                categoricalAspectValueSets[i] = defaultdict(set)

            addCategoricalAspect(qnOimConceptAspect)
            addCategoricalAspect(qnOimEntityAspect)
            addCategoricalAspect(qnOimPeriodAspect)
            for aspectQn in aspectsDefined:
                if aspectQn.namespaceURI != nsOim or aspectQn in (
                        qnOimIdAspect, qnOimLangAspect, qnOimUnitAspect):
                    addCategoricalAspect(aspectQn)

            for fact in modelXbrl.facts:
                fact._factAspectValues = {}
                fact._factAspectSet = set()
                for aspectQn, value in factAspects(fact).items():
                    if aspectQn in aspectIndex:
                        i = aspectIndex[aspectQn]
                        v = oimValue(value)
                        categoricalAspectValueSets[i][v].add(fact)
                        fact._factAspectValues[i] = v
                        fact._factAspectSet.add(i)

            # order aspectValues by largest population
            maxAspectValuePopulation = [
                (aspectIndex,
                 max(len(factSet) for factSet in oimValueFacts.values()))
                for aspectIndex, oimValueFacts in
                categoricalAspectValueSets.items()
            ]

            maxAspectValuePopulation.sort(key=lambda ai_max: -ai_max[1])

            factsClustered = set()
            _aspectValue = {}

            def clusterAspect(_avpi, _data):
                if _avpi >= len(maxAspectValuePopulation):
                    return  # end of aspects
                _ai = maxAspectValuePopulation[_avpi][0]
                for _v, _vFactsSet in categoricalAspectValueSets[_ai].items():
                    _aspectValue[_ai] = _v
                    _nestedData = []
                    _nestedAspect = {indexAspect[_ai]: _v, "data": _nestedData}
                    for _fact in _vFactsSet - factsClustered:
                        if (_fact._factAspectSet == _aspectValue.keys()
                                and all([
                                    _fact._factAspectValues[__ai]
                                    == _aspectValue[__ai]
                                    for __ai in _aspectValue
                                ])):
                            _factAspects = factAspects(_fact)
                            _oimFactItem = {
                                oimValue(qnOimValueAspect):
                                _factAspects[qnOimValueAspect]
                            }
                            if hasLocation:
                                _oimFactItem[oimValue(
                                    qnOimLocationAspect
                                )] = _factAspects[qnOimLocationAspect]
                            if hasType:
                                _oimFactItem[
                                    oimValue(qnOimTypeAspect
                                             )] = _factAspects[qnOimTypeAspect]
                            _nestedData.append(_oimFactItem)
                            factsClustered.add(_fact)
                    clusterAspect(_avpi + 1, _nestedData)
                    if _nestedData:
                        _data.append(_nestedAspect)
                    del _aspectValue[_ai]

            clusterAspect(0, oimFacts)

        with open(oimFile, "w", encoding="utf-8") as fh:
            fh.write(
                json.dumps(oimReport,
                           ensure_ascii=False,
                           indent=1,
                           sort_keys=True))

    elif isCSV:
        # save CSV

        # levels of tuple nesting
        def tupleDepth(facts, parentDepth):
            _levelDepth = parentDepth
            for fact in facts:
                _factDepth = tupleDepth(fact.modelTupleFacts, parentDepth + 1)
                if _factDepth > _levelDepth:
                    _levelDepth = _factDepth
            return _levelDepth

        maxDepth = tupleDepth(modelXbrl.facts, 0)

        aspectQnCol = {oimValue(qnOimConceptAspect): maxDepth - 1}
        aspectsHeader = [oimValue(qnOimConceptAspect)]

        for i in range(maxDepth - 1):
            aspectsHeader.append(None)

        def addAspectQnCol(aspectQn):
            aspectQnCol[aspectQn] = len(aspectsHeader)
            aspectsHeader.append(oimValue(aspectQn))

        # pre-ordered aspect columns
        if hasId:
            addAspectQnCol(qnOimIdAspect)
        if hasLocation:
            addAspectQnCol(qnOimLocationAspect)
        if hasType:
            addAspectQnCol(qnOimTypeAspect)
        addAspectQnCol(qnOimValueAspect)
        if qnOimEntityAspect in aspectsDefined:
            addAspectQnCol(qnOimEntityAspect)
        if qnOimPeriodAspect in aspectsDefined:
            addAspectQnCol(qnOimPeriodAspect)
        if qnOimUnitMulAspect in aspectsDefined:
            addAspectQnCol(qnOimUnitMulAspect)
        if qnOimUnitDivAspect in aspectsDefined:
            addAspectQnCol(qnOimUnitDivAspect)
        for aspectQn in sorted(aspectsDefined, key=lambda qn: str(qn)):
            if aspectQn.namespaceURI != nsOim:
                addAspectQnCol(aspectQn)

        def aspectCols(fact, depth):
            cols = [None for i in range(len(aspectsHeader))]
            for aspectQn, aspectValue in factAspects(fact).items():
                if aspectQn == qnOimConceptAspect:
                    cols[depth - 1] = aspectValue
                elif aspectQn in aspectQnCol:
                    cols[aspectQnCol[aspectQn]] = aspectValue
            return cols

        # save facts
        csvFile = open(oimFile,
                       csvOpenMode,
                       newline=csvOpenNewline,
                       encoding='utf-8-sig')
        csvWriter = csv.writer(csvFile, dialect="excel")
        csvWriter.writerow(aspectsHeader)

        def saveCSVfacts(facts, thisDepth):
            for fact in facts:
                csvWriter.writerow(aspectCols(fact, thisDepth))
                saveCSVfacts(fact.modelTupleFacts, thisDepth + 1)

        saveCSVfacts(modelXbrl.facts, 1)
        csvFile.close()

        # save namespaces
        if oimQNameSeparator == "clark":
            csvFile = open(oimFile.replace(".csv", "-prefixMap.csv"),
                           csvOpenMode,
                           newline=csvOpenNewline,
                           encoding='utf-8-sig')
            csvWriter = csv.writer(csvFile, dialect="excel")
            csvWriter.writerow(("prefix", "mappedURI"))
            for namespaceURI, prefix in sorted(namespacePrefixes.items(),
                                               key=lambda item: item[1]):
                csvWriter.writerow((prefix, namespaceURI))
            csvFile.close()

        # save dts references
        csvFile = open(oimFile.replace(".csv", "-dts.csv"),
                       csvOpenMode,
                       newline=csvOpenNewline,
                       encoding='utf-8-sig')
        csvWriter = csv.writer(csvFile, dialect="excel")
        csvWriter.writerow(("type", "href"))
        for oimRef in dtsReferences:
            csvWriter.writerow((oimRef["type"], oimRef["href"]))
        csvFile.close()

        # save role and arc type references
        if roleTypes:
            csvFile = open(oimFile.replace(".csv", "-roleTypes.csv"),
                           csvOpenMode,
                           newline=csvOpenNewline,
                           encoding='utf-8-sig')
            csvWriter = csv.writer(csvFile, dialect="excel")
            csvWriter.writerow(("type", "href"))
            for oimRef in roleTypes:
                csvWriter.writerow((oimRef["type"], oimRef["href"]))
            csvFile.close()

        # save relationships
        csvFile = open(oimFile.replace(".csv", "-relationships.csv"),
                       csvOpenMode,
                       newline=csvOpenNewline,
                       encoding='utf-8-sig')
        csvWriter = csv.writer(csvFile, dialect="excel")
        hasOrder = any(
            hasattribute(imRel, "order") for oimRel in factRelationships)
        csvWriter.writerow(("fromIds", "toIds", "linkrole", "arcrole") +
                           (("order", ) if hasOrder else ()))
        for oimRel in factRelationships:
            csvWriter.writerow(
                (",".join(oimRel["fromIds"]), ",".join(oimRel["toIds"]),
                 oimRel["linkrole"], oimRel["arcrole"]) +
                ((oimRel.get("order", None), ) if hasOrder else ()))
        csvFile.close()

        # save footnotes
        csvFile = open(oimFile.replace(".csv", "-footnotes.csv"),
                       csvOpenMode,
                       newline=csvOpenNewline,
                       encoding='utf-8-sig')
        csvWriter = csv.writer(csvFile, dialect="excel")
        hasLang = any(hasattribute(oimFnt, "lang") for oimFnt in factFootnotes)
        csvWriter.writerow(("id", "role") + (("lang", ) if hasLang else ()) +
                           ("value", ))
        for oimFnt in factFootnotes:
            csvWriter.writerow((oimFtn["id"], oimFtn["role"]) + (
                (oimFtn.get("lang", None), ) if hasLang else ()) +
                               (oimFtn["value"], ))
        csvFile.close()
Ejemplo n.º 9
0
def showInfo(cntlr, options, modelXbrl, _entrypoint, *args, **kwargs):
    for url, doc in sorted(modelXbrl.urlDocs.items(), key=lambda i: i[0]):
        if not any(url.startswith(w) for w in ("https://xbrl.sec.gov/", "http://xbrl.sec.gov/", "http://xbrl.fasb.org/", "http://www.xbrl.org/",
                                               "http://xbrl.ifrs.org/", "http://www.esma.europa.eu/")):
            if os.path.exists(doc.filepath): # skip if in an archive or stream
                cntlr.addToLog("File {} size {:,}".format(doc.basename, os.path.getsize(doc.filepath)), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Heap memory before loading {:,}".format(memoryAtStartup), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Heap memory after loading {:,}".format(cntlr.memoryUsed), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Time to load {:.2f} seconds".format(time.time() - timeAtStart), messageCode="info", level=logging.DEBUG)
    isInlineXbrl = modelXbrl.modelDocument.type in (ModelDocument.Type.INLINEXBRL, ModelDocument.Type.INLINEXBRLDOCUMENTSET)
    if isInlineXbrl:
        instanceType = "inline XBRL, number of documents {}".format(len(modelXbrl.ixdsHtmlElements))
    else:
        instanceType = "xBRL-XML"
    cntlr.addToLog("Instance type {}".format(instanceType), messageCode="info", level=logging.DEBUG)
    numContexts = len(modelXbrl.contexts)
    numLongContexts = 0
    bytesSaveableInline = 0
    bytesSaveableInlineWithCsv = 0
    frequencyOfDims = {}
    sumNumDims = 0
    distinctDurations = set()
    distinctInstants = set()
    shortContextIdLen = int(math.log10(numContexts or 1)) + 2 # if no contexts, use 1 for log function to work
    xbrlQnameCountInline = 0
    xbrlQnameCountInlineWithCsv = 0
    xbrlQnameLengthsInline = 0
    xbrlQnameLengthsInlineWithCsv = 0
    for c in modelXbrl.contexts.values():
        sumNumDims += len(c.qnameDims)
        for d in c.qnameDims.values():
            dimQname = str(d.dimensionQname)
            frequencyOfDims[dimQname] = frequencyOfDims.get(dimQname,0) + 1
            xbrlQnameCountInline += 1
            xbrlQnameCountInlineWithCsv += 1
            xbrlQnameLengthsInline += len(d.dimensionQname.localName)
            xbrlQnameLengthsInlineWithCsv += len(d.dimensionQname.localName)
        if c.isInstantPeriod:
            distinctInstants.add(c.instantDatetime)
        elif c.isStartEndPeriod:
            distinctDurations.add((c.startDatetime, c.endDatetime))
        if len(c.id) > shortContextIdLen:
            bytesSaveableInline += len(c.id) - shortContextIdLen
            bytesSaveableInlineWithCsv += len(c.id) - shortContextIdLen
    cntlr.addToLog("Number of contexts {:,}".format(numContexts), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Number of distinct durations {:,}".format(len(distinctDurations)), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Number of distinct instants {:,}".format(len(distinctInstants)), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Avg number dimensions per contexts {:,.2f}".format(sumNumDims/numContexts if numContexts else 0), messageCode="info", level=logging.DEBUG)
    mostPopularDims = sorted(frequencyOfDims.items(), key=lambda i:"{:0>9},{}".format(999999999-i[1],i[0]))
    for dimName, count in mostPopularDims[0:3]:
        cntlr.addToLog("Dimension {} used in {:,} contexts".format(dimName, count), messageCode="info", level=logging.DEBUG)
        
    # analyze for tables which could be composed from CSV data
    tblFacts = defaultdict(set)
    tblNestedTables = defaultdict(set)
    factSize = {}
    for f in modelXbrl.factsInInstance:
        for tdElt in ancestors(f, xhtml, "td"):
            factSize[f] = len(xmlstring(tdElt,stripXmlns=True))
            break
        childTblElt = None
        for tblElt in ancestors(f, xhtml, "table"):
            tblFacts[tblElt].add(f)
            if childTblElt:
                tblNestedTables[tblElt].add(childTblElt)
    
    # find tables containing only numeric facts
    def tblNestedFactCount(tbl):
        c = len(tblFacts.get(tbl, ()))
        for nestedTbl in tblNestedTables.get(tbl,()):
            c += tblNestedFactCount(nestedTbl)
        return c
    
    factsInInstance = len(modelXbrl.factsInInstance)
    factsInTables = len(set.union(*(fset for fset in tblFacts.values())))
    cntlr.addToLog("Facts in instance: {:,}, facts in tables: {:,}".format(factsInInstance,factsInTables), messageCode="info", level=logging.DEBUG)
    
    numTblsEligible = 0
    numFactsEligible = 0
    bytesCsvSavings = 0
    factsEligibleForCsv = set()
    tablesWithEligibleFacts = set()
    if tblFacts and factSize:
        # find eligible tables, have facts and not nested tables with other facts
        for tbl, facts in tblFacts.items():
            if len(facts) == tblNestedFactCount(tbl):
                s = sum(factSize.get(f,0) for f in facts) - sum(len(str(f.value)) for f in facts)
                if s > 10000:
                    numTblsEligible += 1
                    bytesCsvSavings += s
                    numFactsEligible += len(facts)
                    factsEligibleForCsv |= facts
                    tablesWithEligibleFacts.add(tbl)
    numFacts = 0
    numTableTextBlockFacts = 0
    lenTableTextBlockFacts = 0
    numTextBlockFacts = 0
    lenTextBlockFacts = 0
    distinctElementsInFacts = set()
    factsPerContext = {}
    factForConceptContextUnitHash = defaultdict(list)
    for f in modelXbrl.factsInInstance:
        context = f.context
        concept = f.concept
        distinctElementsInFacts.add(f.qname)
        numFacts += 1
        if f.qname.localName.endswith("TableTextBlock"):
            numTableTextBlockFacts += 1
            lenTableTextBlockFacts += len(f.xValue)
        elif f.qname.localName.endswith("TextBlock"):
            numTextBlockFacts += 1
            lenTextBlockFacts += len(f.xValue)
        if context is not None and concept is not None:
            factsPerContext[context.id] = factsPerContext.get(context.id,0) + 1
            factForConceptContextUnitHash[f.conceptContextUnitHash].append(f)
            bytesSaveableInline += len(context.id) - shortContextIdLen
            if f not in factsEligibleForCsv:
                bytesSaveableInlineWithCsv += len(context.id) - shortContextIdLen
            
            
    if numTblsEligible:
        cntlr.addToLog("Tables eligible for facts in CSV: {:,}, facts eligible for CSV: {:,}, bytes saveable by facts in CSV {:,}".format(numTblsEligible, numFactsEligible, bytesCsvSavings), messageCode="info", level=logging.DEBUG)
    else:
        cntlr.addToLog("No tables eligible for facts in CSV", messageCode="info", level=logging.DEBUG)
        

    mostPopularContexts = sorted(factsPerContext.items(), key=lambda i:"{:0>9},{}".format(999999999-i[1],i[0]))
    cntlr.addToLog("Number of facts {:,}".format(numFacts), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Number of TableTextBlock facts {:,} avg len {:,.0f}".format(numTableTextBlockFacts, lenTableTextBlockFacts/numTableTextBlockFacts if numTableTextBlockFacts else 0), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Number of TextBlock facts {:,} avg len {:,.0f}".format(numTextBlockFacts, lenTextBlockFacts/numTableTextBlockFacts if numTableTextBlockFacts else 0), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Max number facts per context {:,}".format(mostPopularContexts[0][1] if mostPopularContexts else 0), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Avg number facts per context {:,.2f}".format(sum([v for v in factsPerContext.values()])/numContexts if numContexts else 0), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Distinct elements in facts {:,}".format(len(distinctElementsInFacts)), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Number of bytes saveable context id of {} length is {:,}".format(shortContextIdLen, bytesSaveableInline), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Excepting facts eligible for CSV, number of bytes saveable context id of {} length is {:,}".format(shortContextIdLen, bytesSaveableInlineWithCsv), messageCode="info", level=logging.DEBUG)

    aspectEqualFacts = defaultdict(list)
    decVals = {}
    numConsistentDupFacts = numInConsistentDupFacts = 0
    for hashEquivalentFacts in factForConceptContextUnitHash.values():
        if len(hashEquivalentFacts) > 1:
            for f in hashEquivalentFacts:
                aspectEqualFacts[(f.qname,f.contextID,f.unitID,
                                  f.xmlLang.lower() if f.concept.type.isWgnStringFactType else None)].append(f)
            for fList in aspectEqualFacts.values():
                f0 = fList[0]
                if f0.concept.isNumeric:
                    if any(f.isNil for f in fList):
                        _inConsistent = not all(f.isNil for f in fList)
                    else: # not all have same decimals
                        _d = inferredDecimals(f0)
                        _v = f0.xValue
                        _inConsistent = isnan(_v) # NaN is incomparable, always makes dups inconsistent
                        decVals[_d] = _v
                        aMax, bMin = rangeValue(_v, _d)
                        for f in fList[1:]:
                            _d = inferredDecimals(f)
                            _v = f.xValue
                            if isnan(_v):
                                _inConsistent = True
                                break
                            if _d in decVals:
                                _inConsistent |= _v != decVals[_d]
                            else:
                                decVals[_d] = _v
                            a, b = rangeValue(_v, _d)
                            if a > aMax: aMax = a
                            if b < bMin: bMin = b
                        if not _inConsistent:
                            _inConsistent = (bMin < aMax)
                        decVals.clear()
                else:
                    _inConsistent = any(not f.isVEqualTo(f0) for f in fList[1:])
                if _inConsistent:
                    numInConsistentDupFacts += 1
                else:
                    numConsistentDupFacts += 1
                    
            aspectEqualFacts.clear()
    cntlr.addToLog("Number of duplicate facts consistent {:,} inconsistent {:,}".format(numConsistentDupFacts, numInConsistentDupFacts), messageCode="info", level=logging.DEBUG)
    
    styleAttrCountsInline = {}
    styleAttrCountsInlineWithCsv = {}
    totalStyleLenInline = 0
    totalStyleLenInlineWithCsv = 0
    continuationElements = {}
    ixNsPrefix = "{http://www.xbrl.org/2013/inlineXBRL}"
    for ixdsHtmlRootElt in getattr(modelXbrl, "ixdsHtmlElements", ()): # ix root elements if inline
        for ixElt in ixdsHtmlRootElt.iterdescendants():
            inEligibleTableForCsv = any(p in tablesWithEligibleFacts for p in ixElt.iterancestors("{http://www.w3.org/1999/xhtml}table"))
            style = ixElt.get("style")
            ixEltTag = str(ixElt.tag)
            if style:
                styleAttrCountsInline[style] = styleAttrCountsInline.get(style,0) + 1
                if not inEligibleTableForCsv:
                    styleAttrCountsInlineWithCsv[style] = styleAttrCountsInlineWithCsv.get(style,0) + 1
                if styleIxHiddenPattern.match(style) is None:
                    totalStyleLenInline += len(style)
                    if not inEligibleTableForCsv:
                        totalStyleLenInlineWithCsv += len(style)
            if ixEltTag == "{http://www.xbrl.org/2013/inlineXBRL}continuation" and ixElt.id:
                continuationElements[ixElt.id] = ixElt
            if ixEltTag.startswith(ixNsPrefix):
                localName = ixEltTag[len(ixNsPrefix):]
                if localName == "continuation" and ixElt.id:
                    continuationElements[ixElt.id] = ixElt
                elif localName in ("nonFraction", "nonNumeric", "fraction"):
                    xbrlQnameCountInline += 1
                    xbrlQnameLengthsInline += len(ixElt.qname.localName)
                    if not inEligibleTableForCsv:
                        xbrlQnameCountInlineWithCsv += 1
                        xbrlQnameLengthsInlineWithCsv += len(ixElt.qname.localName)
            elif isinstance(ixElt, ModelFact):
                xbrlQnameCountInline += 2
                xbrlQnameLengthsInline += len(ixElt.qname.localName)
                if not inEligibleTableForCsv:
                    xbrlQnameCountInlineWithCsv += 2
                    xbrlQnameLengthsInlineWithCsv += len(ixElt.qname.localName)

    def locateContinuation(element, chain=None):
        contAt = element.get("continuedAt")
        if contAt:
            if contAt in continuationElements:
                if chain is None: chain = [element]
                contElt = continuationElements[contAt]
                if contElt not in chain:
                    chain.append(contElt)
                    element._continuationElement = contElt
                    return locateContinuation(contElt, chain)
        elif chain: # end of chain
            return len(chain)

    numContinuations = 0
    maxLenLen = 0
    maxLenHops = 0
    maxHops = 0
    maxHopsLen = 0
    for f in modelXbrl.factsInInstance:
        if f.get("continuedAt"):
            numContinuations += 1
            _len = len(f.xValue)
            _hops = locateContinuation(f)
            if _hops > maxHops:
                maxHops = _hops
                maxHopsLen = _len
            if _len > maxLenLen:
                maxLenLen = _len
                maxLenHops = _hops

    cntlr.addToLog("Number of continuation facts {:,}".format(numContinuations), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Longest continuation fact {:,} number of hops {:,}".format(maxLenLen, maxLenHops), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Most continuation hops {:,} fact len {:,}".format(maxHops, maxHopsLen), messageCode="info", level=logging.DEBUG)

    numDupStyles = sum(1 for n in styleAttrCountsInline.values() if n > 1)
    bytesSaveableByCssInline = sum(len(s)*(n-1) for s,n in styleAttrCountsInline.items() if n > 1)
    cntlr.addToLog("Number of duplicate styles {:,}, bytes saveable by CSS {:,}, len of all non-ix-hidden @styles {:,}".format(numDupStyles, bytesSaveableByCssInline, totalStyleLenInline), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Number of XBRL QNames {:,}, bytes saveable by EBA-style element names {:,}".format(xbrlQnameCountInline, xbrlQnameLengthsInline - (5*xbrlQnameCountInline)), messageCode="info", level=logging.DEBUG)
    numDupStyles = sum(1 for n in styleAttrCountsInlineWithCsv.values() if n > 1)
    bytesSaveableByCssInlineWithCsv = sum(len(s)*(n-1) for s,n in styleAttrCountsInlineWithCsv.items() if n > 1)
    cntlr.addToLog("Excepting facts eligible for CSV, number of duplicate styles {:,}, bytes saveable by CSS {:,}, len of all non-ix-hidden @styles {:,}".format(numDupStyles, bytesSaveableByCssInlineWithCsv, totalStyleLenInlineWithCsv), messageCode="info", level=logging.DEBUG)
    cntlr.addToLog("Excepting facts eligible for CSV, number of XBRL QNames {:,}, bytes saveable by EBA-style element names {:,}".format(xbrlQnameCountInlineWithCsv, xbrlQnameLengthsInlineWithCsv - (5*xbrlQnameCountInlineWithCsv)), messageCode="info", level=logging.DEBUG)
Ejemplo n.º 10
0
 def dimValKey(cntx, typedDim=False):
     return '|'.join(sorted("{}({})".format(dim.dimensionQname,
                                            dim.memberQname if dim.isExplicit 
                                            else xmlstring(dim.typedMember, stripXmlns=True) if typedDim
                                            else '*' )
                            for dim in cntx.qnameDims.values()))