Example #1
0
 def start(self, tag, attrib, nsmap=None):
     mdlObj = _parser.makeelement(tag, attrib=attrib, nsmap=nsmap)
     mdlObj.sourceline = 1
     if self.newTree:
         self.newTree = False
         self.currentMdlObj = mdlObj
         modelDocument = ModelDocument(modelXbrl, Type.INSTANCE, mappedUri, filepath, mdlObj.getroottree())
         modelXbrl.modelDocument = modelDocument # needed for incremental validation
         mdlObj.init(modelDocument)
         modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement 
         modelDocument.parserLookupName = _parserLookupName
         modelDocument.parserLookupClass = _parserLookupClass
         modelDocument.xmlRootElement = mdlObj
         modelDocument.schemaLocationElements.add(mdlObj)
         modelDocument.documentEncoding = _encoding
         modelDocument._creationSoftwareComment = creationSoftwareComment
         modelXbrl.info("streamingExtensions:streaming",
                        _("Stream processing this instance."),
                        modelObject = modelDocument)    
     else:
         self.currentMdlObj.append(mdlObj)
         self.currentMdlObj = mdlObj
         mdlObj._init()
         ns = mdlObj.namespaceURI
         ln = mdlObj.localName
         if (self.beforeInstanceStream and (
             (ns == XbrlConst.link and ln not in ("schemaRef", "linkbaseRef")) or
             (ns == XbrlConst.xbrli and ln in ("context", "unit")) or
             (ns not in (XbrlConst.link, XbrlConst.xbrli)))):
             self.beforeInstanceStream = False
             if _streamingExtensionsValidate:
                 instValidator.validate(modelXbrl, modelXbrl.modelManager.formulaOptions.typedParameters())
             else: # need default dimensions
                 ValidateXbrlDimensions.loadDimensionDefaults(modelXbrl)
     return mdlObj
 def start(self, tag, attrib, nsmap=None):
     mdlObj = _parser.makeelement(tag, attrib=attrib, nsmap=nsmap)
     mdlObj.sourceline = 1
     if self.newTree:
         self.newTree = False
         self.currentMdlObj = mdlObj
         modelDocument = ModelDocument(modelXbrl, Type.INSTANCE, mappedUri, filepath, mdlObj.getroottree())
         modelXbrl.modelDocument = modelDocument # needed for incremental validation
         mdlObj.init(modelDocument)
         modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement 
         modelDocument.parserLookupName = _parserLookupName
         modelDocument.parserLookupClass = _parserLookupClass
         modelDocument.xmlRootElement = mdlObj
         modelDocument.schemaLocationElements.add(mdlObj)
         modelDocument.documentEncoding = _encoding
         modelDocument._creationSoftwareComment = creationSoftwareComment
         modelXbrl.info("streamingExtensions:streaming",
                        _("Stream processing this instance."),
                        modelObject = modelDocument)    
     else:
         self.currentMdlObj.append(mdlObj)
         self.currentMdlObj = mdlObj
         mdlObj._init()
         ns = mdlObj.namespaceURI
         ln = mdlObj.localName
         if (self.beforeInstanceStream and (
             (ns == XbrlConst.link and ln not in ("schemaRef", "linkbaseRef")) or
             (ns == XbrlConst.xbrli and ln in ("context", "unit")) or
             (ns not in (XbrlConst.link, XbrlConst.xbrli)))):
             self.beforeInstanceStream = False
             if _streamingExtensionsValidate:
                 instValidator.validate(modelXbrl, modelXbrl.modelManager.formulaOptions.typedParameters())
             else: # need default dimensions
                 ValidateXbrlDimensions.loadDimensionDefaults(modelXbrl)
     return mdlObj
Example #3
0
 def startElementNS(self, name, qname, attrs):
     namespaceURI, localName = name
     prefix = self.prefixmap.get(namespaceURI, None)
     thisQname = QName(prefix, namespaceURI, localName)
     if not self.qnameStack:
         if thisQname != XbrlConst.qnXbrliXbrl:  # not an instance document
             self.modelXbrl = None # dereference
             self.saxParser = None
             raise NotInstanceDocumentException()
         if self.modelDocument is None:
             self.modelDocument = ModelDocument(self.modelXbrl, Type.INSTANCE, self.mappedUri, self.filepath, None)
         parentQname = None
     elif self.qnameStack:
         parentElement = self.qnameStack[0]
         parentQname = parentElement.elementQname
     if namespaceURI in (XbrlConst.xbrli, XbrlConst.link):
         if parentQname == XbrlConst.qnXbrliContext:
             if localName == "identifier":
                 parentElement._entityIdentifier = (attrs.get(None,"scheme"), "")
             elif localName == "forever":
                 parentElement._isForeverPeriod = True
         else:
             if localName == "context":
                 thisModelObject = BigInstContext(self, thisQname, attrs)
                 self.modelXbrl.contexts[thisModelObject.id] = thisModelObject
             elif localName == "unit":
                 thisModelObject = BigInstUnit(self, thisQname, attrs)
                 self.modelXbrl.units[thisModelObject.id] = thisModelObject
             else:
                 thisModelObject = BigInstModelObject(self, thisQname, attrs)
             if localName in ("schemaRef", "linkbaseRef"):
                 self.modelDocument.discoverHref(thisModelObject)
             else:
                 self.qnameStack.insert(0, thisModelObject)
     elif parentQname:
         if parentQname == XbrlConst.qnXbrliContext:
             if namespaceURI == XbrlConst.xbrldi:
                 if localName == "explicitMember":
                     self.dimensionPrefixedName = attrs.get(None,"dimension")
         else: # might be a fact
             thisModelObject = BigInstFact(self, thisQname, attrs)
             if len(self.qnameStack) > 1:
                 tuple = self.qnameStack[0]
                 if not tuple.modelTupleFacts:
                     tuple.modelTupleFacts = [] # allocate unshared list
                 tuple.modelTupleFacts.append(thisModelObject)
             else:
                 self.modelXbrl.facts.append(thisModelObject)
             self.qnameStack.insert(0, thisModelObject)  # build content
     self.currentNamespaceURI = namespaceURI
     self.currentLocalName = localName
Example #4
0
def streamingExtensionsLoader(modelXbrl, mappedUri, filepath, *args, **kwargs):
    # check if big instance and has header with an initial incomplete tree walk (just 2 elements
    if not _streamingExtensionsCheck:
        return None

    # track whether modelXbrl has been validated by this streaming extension
    modelXbrl._streamingExtensionValidated = False

    def logSyntaxErrors(parsercontext):
        for error in parsercontext.error_log:
            modelXbrl.error(
                "xmlSchema:syntax",
                _("%(error)s, %(fileName)s, line %(line)s, column %(column)s, %(sourceAction)s source element"
                  ),
                modelObject=modelXbrl,
                fileName=os.path.basename(filepath),
                error=error.message,
                line=error.line,
                column=error.column,
                sourceAction="streaming")

    #### note: written for iterparse of lxml prior to version 3.3, otherwise rewrite to use XmlPullParser ###
    #### note: iterparse wants a binary file, but file is text mode
    _file, = modelXbrl.fileSource.file(filepath, binary=True)
    startedAt = time.time()
    modelXbrl.profileActivity()
    ''' this seems twice as slow as iterparse
    class instInfoTarget():
        def __init__(self, element_factory=None, parser=None):
            self.newTree = True
            self.streamingAspects = None
            self.foundInstance = False
            self.creationSoftwareComment = ''
            self.currentEltTag = "(before xbrli:xbrl)"
            self.numRootFacts = 0
        def start(self, tag, attrib, nsmap=None):
            if self.newTree:
                if tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    self.foundInstance = True
                    self.newTree = False
                else: # break 
                    raise NotInstanceDocumentException()
            elif not tag.startswith("{http://www.xbrl.org/"):
                self.numRootFacts += 1
                if self.numRootFacts % 1000 == 0:
                    modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0)
            self.currentEltTag = tag
        def end(self, tag):
            pass
        def data(self, data):
            pass
        def comment(self, text):
            if not self.foundInstance: # accumulate comments before xbrli:xbrl
                self.creationSoftwareComment += ('\n' if self.creationSoftwareComment else '') + text
            elif not self.creationSoftwareComment:
                self.creationSoftwareComment = text # or first comment after xbrli:xbrl
        def pi(self, target, data):
            if target == "xbrl-streamable-instance":
                if self.currentEltTag == "{http://www.xbrl.org/2003/instance}xbrl":
                    self.streamingAspects = dict(etree.PI(target,data).attrib.copy()) # dereference target results
                else:
                    modelXbrl.error("streamingExtensions:headerMisplaced",
                            _("Header is misplaced: %(target)s, must follow xbrli:xbrl element but was found at %(element)s"),
                            modelObject=modelXbrl, target=target, element=self.currentEltTag)
        def close(self):
            if not self.creationSoftwareComment:
                self.creationSoftwareComment = None
            return True
    instInfo = instInfoTarget()
    infoParser = etree.XMLParser(recover=True, huge_tree=True, target=instInfo)
    try:
        etree.parse(_file, parser=infoParser, base_url=filepath)
    except NotInstanceDocumentException:
        pass
    '''
    foundErrors = False
    foundInstance = False
    streamingAspects = None
    creationSoftwareComment = None
    instInfoNumRootFacts = 0
    numElts = 0
    elt = None
    instInfoContext = etree.iterparse(_file,
                                      events=("start", "end"),
                                      huge_tree=True)
    try:
        for event, elt in instInfoContext:
            if event == "start":
                if elt.getparent() is not None:
                    if elt.getparent(
                    ).tag == "{http://www.xbrl.org/2003/instance}xbrl":
                        if not foundInstance:
                            foundInstance = True
                            pi = precedingProcessingInstruction(
                                elt, "xbrl-streamable-instance")
                            if pi is None:
                                break
                            else:
                                streamingAspects = dict(pi.attrib.copy())
                                if creationSoftwareComment is None:
                                    creationSoftwareComment = precedingComment(
                                        elt)
                        if not elt.tag.startswith("{http://www.xbrl.org/"):
                            instInfoNumRootFacts += 1
                            if instInfoNumRootFacts % 1000 == 0:
                                modelXbrl.profileActivity(
                                    "... streaming tree check",
                                    minTimeToShow=20.0)
                    elif not foundInstance:
                        break
                elif elt.tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    creationSoftwareComment = precedingComment(elt)
                    if precedingProcessingInstruction(
                            elt, "xbrl-streamable-instance") is not None:
                        modelXbrl.error(
                            "streamingExtensions:headerMisplaced",
                            _("Header is misplaced: %(error)s, must follow xbrli:xbrl element"
                              ),
                            modelObject=elt)
            elif event == "end":
                elt.clear()
                numElts += 1
                if numElts % 1000 == 0 and elt.getparent() is not None:
                    while elt.getprevious() is not None and elt.getparent(
                    ) is not None:
                        del elt.getparent()[0]
    except etree.XMLSyntaxError as err:
        modelXbrl.error("xmlSchema:syntax",
                        _("Unrecoverable error: %(error)s"),
                        error=err)
        _file.close()
        return err

    _file.seek(0, io.SEEK_SET)  # allow reparsing
    if not foundInstance or streamingAspects is None:
        del elt
        _file.close()
        return None
    modelXbrl.profileStat(_("streaming tree check"), time.time() - startedAt)
    startedAt = time.time()
    try:
        version = Decimal(streamingAspects.get("version"))
        if int(version) != 1:
            modelXbrl.error(
                "streamingExtensions:unsupportedVersion",
                _("Streaming version %(version)s, major version number must be 1"
                  ),
                modelObject=elt,
                version=version)
            foundErrors = True
    except (InvalidOperation, OverflowError):
        modelXbrl.error("streamingExtensions:versionError",
                        _("Version %(version)s, number must be 1.n"),
                        modelObject=elt,
                        version=streamingAspects.get("version", "(none)"))
        foundErrors = True
    for bufAspect in ("contextBuffer", "unitBuffer", "footnoteBuffer"):
        try:
            bufLimit = Decimal(streamingAspects.get(bufAspect, "INF"))
            if bufLimit < 1 or (bufLimit.is_finite() and bufLimit % 1 != 0):
                raise InvalidOperation
            elif bufAspect == "contextBuffer":
                contextBufferLimit = bufLimit
            elif bufAspect == "unitBuffer":
                unitBufferLimit = bufLimit
            elif bufAspect == "footnoteBuffer":
                footnoteBufferLimit = bufLimit
        except InvalidOperation:
            modelXbrl.error(
                "streamingExtensions:valueError",
                _("Streaming %(attrib)s %(value)s, number must be a positive integer or INF"
                  ),
                modelObject=elt,
                attrib=bufAspect,
                value=streamingAspects.get(bufAspect))
            foundErrors = True
    if _streamingExtensionsValidate:
        incompatibleValidations = []
        _validateDisclosureSystem = modelXbrl.modelManager.validateDisclosureSystem
        _disclosureSystem = modelXbrl.modelManager.disclosureSystem
        if _validateDisclosureSystem and _disclosureSystem.validationType == "EFM":
            incompatibleValidations.append("EFM")
        if _validateDisclosureSystem and _disclosureSystem.validationType == "GFM":
            incompatibleValidations.append("GFM")
        if _validateDisclosureSystem and _disclosureSystem.validationType == "HMRC":
            incompatibleValidations.append("HMRC")
        if modelXbrl.modelManager.validateCalcLB:
            incompatibleValidations.append("calculation LB")
        if incompatibleValidations:
            modelXbrl.error(
                "streamingExtensions:incompatibleValidation",
                _("Streaming instance validation does not support %(incompatibleValidations)s validation"
                  ),
                modelObject=modelXbrl,
                incompatibleValidations=', '.join(incompatibleValidations))
            foundErrors = True
    if instInfoContext.error_log:
        foundErrors = True
    logSyntaxErrors(instInfoContext)
    del instInfoContext  # dereference

    for pluginMethod in pluginClassMethods("Streaming.BlockStreaming"):
        _blockingPluginName = pluginMethod(modelXbrl)
        if _blockingPluginName:  # name of blocking plugin is returned
            modelXbrl.error(
                "streamingExtensions:incompatiblePlugIn",
                _("Streaming instance not supported by plugin %(blockingPlugin)s"
                  ),
                modelObject=modelXbrl,
                blockingPlugin=_blockingPluginName)
            foundErrors = True

    if foundErrors:
        _file.close()
        return None

    _encoding = XmlUtil.encoding(_file.read(512))
    _file.seek(0, io.SEEK_SET)  # allow reparsing

    if _streamingExtensionsValidate:
        validator = Validate(modelXbrl)
        instValidator = validator.instValidator

    contextBuffer = []
    contextsToDrop = []
    unitBuffer = []
    unitsToDrop = []
    footnoteBuffer = []
    footnoteLinksToDrop = []

    _streamingFactsPlugin = any(
        True for pluginMethod in pluginClassMethods("Streaming.Facts"))
    _streamingValidateFactsPlugin = (_streamingExtensionsValidate and any(
        True
        for pluginMethod in pluginClassMethods("Streaming.ValidateFacts")))
    ''' this is very much slower than iterparse
    class modelLoaderTarget():
        def __init__(self, element_factory=None, parser=None):
            self.newTree = True
            self.currentMdlObj = None
            self.beforeInstanceStream = True
            self.beforeStartStreamingPlugin = True
            self.numRootFacts = 1
            modelXbrl.makeelementParentModelObject = None
            modelXbrl.isStreamingMode = True
            self.factsCheckVersion = None
            self.factsCheckMd5s = Md5Sum()
        def start(self, tag, attrib, nsmap=None):
            modelXbrl.makeelementParentModelObject = self.currentMdlObj # pass parent to makeelement for ModelObjectFactory
            mdlObj = _parser.makeelement(tag, attrib=attrib, nsmap=nsmap)
            mdlObj.sourceline = 1
            if self.newTree:
                self.newTree = False
                self.currentMdlObj = mdlObj
                modelDocument = ModelDocument(modelXbrl, Type.INSTANCE, mappedUri, filepath, mdlObj.getroottree())
                modelXbrl.modelDocument = modelDocument # needed for incremental validation
                mdlObj.init(modelDocument)
                modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement 
                modelDocument.parserLookupName = _parserLookupName
                modelDocument.parserLookupClass = _parserLookupClass
                modelDocument.xmlRootElement = mdlObj
                modelDocument.schemaLocationElements.add(mdlObj)
                modelDocument.documentEncoding = _encoding
                modelDocument._creationSoftwareComment = creationSoftwareComment
                modelXbrl.info("streamingExtensions:streaming",
                               _("Stream processing this instance."),
                               modelObject = modelDocument)
            else:
                self.currentMdlObj.append(mdlObj)
                self.currentMdlObj = mdlObj
                mdlObj._init()
                ns = mdlObj.namespaceURI
                ln = mdlObj.localName
                if (self.beforeInstanceStream and (
                    (ns == XbrlConst.link and ln not in ("schemaRef", "linkbaseRef")) or
                    (ns == XbrlConst.xbrli and ln in ("context", "unit")) or
                    (ns not in (XbrlConst.link, XbrlConst.xbrli)))):
                    self.beforeInstanceStream = False
                    if _streamingExtensionsValidate:
                        instValidator.validate(modelXbrl, modelXbrl.modelManager.formulaOptions.typedParameters(modelXbrl.prefixedNamespaces))
                    else: # need default dimensions
                        ValidateXbrlDimensions.loadDimensionDefaults(modelXbrl)
                elif not self.beforeInstanceStream and self.beforeStartStreamingPlugin:
                    for pluginMethod in pluginClassMethods("Streaming.Start"):
                        pluginMethod(modelXbrl)
                    self.beforeStartStreamingPlugin = False
            return mdlObj
        def end(self, tag):
            modelDocument = modelXbrl.modelDocument
            mdlObj = self.currentMdlObj
            parentMdlObj = mdlObj.getparent()
            self.currentMdlObj = parentMdlObj
            ns = mdlObj.namespaceURI
            ln = mdlObj.localName
            if ns == XbrlConst.xbrli:
                if ln == "context":
                    if mdlObj.get("sticky"):
                        del mdlObj.attrib["sticky"]
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                    else:
                        if _streamingExtensionsValidate and len(contextBuffer) >= contextBufferLimit:
                            # drop before adding as dropped may have same id as added
                            cntx = contextBuffer.pop(0)
                            if _streamingValidateFactsPlugin:
                                contextsToDrop.append(cntx)
                            else:
                                dropContext(modelXbrl, cntx)
                                del parentMdlObj[parentMdlObj.index(cntx)]
                            cntx = None
                        #>>XmlValidate.validate(modelXbrl, mdlObj)
                        #>>modelDocument.contextDiscover(mdlObj)
                        if contextBufferLimit.is_finite():
                            contextBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        contextsToCheck = (mdlObj,)
                        instValidator.checkContexts(contextsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkContextsDimensions(contextsToCheck)
                        del contextsToCheck # dereference
                elif ln == "unit":
                    if _streamingExtensionsValidate and len(unitBuffer) >= unitBufferLimit:
                        # drop before adding as dropped may have same id as added
                        unit = unitBuffer.pop(0)
                        if _streamingValidateFactsPlugin:
                            unitsToDrop.append(unit)
                        else:
                            dropUnit(modelXbrl, unit)
                            del parentMdlObj[parentMdlObj.index(unit)]
                        unit = None 
                    #>>XmlValidate.validate(modelXbrl, mdlObj)
                    #>>modelDocument.unitDiscover(mdlObj)
                    if unitBufferLimit.is_finite():
                        unitBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkUnits( (mdlObj,) )
                elif ln == "xbrl": # end of document
                    # check remaining batched facts if any
                    if _streamingValidateFactsPlugin:
                        # plugin attempts to process batch of all root facts not yet processed (not just current one)
                        # finish any final batch of facts
                        if len(modelXbrl.facts) > 0:
                            factsToCheck = modelXbrl.facts.copy()
                            factsHaveBeenProcessed = True
                            # can block facts deletion if required data not yet available, such as numeric unit for DpmDB
                            for pluginMethod in pluginClassMethods("Streaming.ValidateFacts"):
                                if not pluginMethod(modelXbrl, factsToCheck):
                                    factsHaveBeenProcessed = False
                            if factsHaveBeenProcessed:
                                for fact in factsToCheck:
                                    dropFact(modelXbrl, fact, modelXbrl.facts)
                                    del parentMdlObj[parentMdlObj.index(fact)]
                                for cntx in contextsToDrop:
                                    dropContext(modelXbrl, cntx)
                                    del parentMdlObj[parentMdlObj.index(cntx)]
                                for unit in unitsToDrop:
                                    dropUnit(modelXbrl, unit)
                                    del parentMdlObj[parentMdlObj.index(unit)]
                                for footnoteLink in footnoteLinksToDrop:
                                    dropFootnoteLink(modelXbrl, footnoteLink)
                                    del parentMdlObj[parentMdlObj.index(footnoteLink)]
                                fact = cntx = unit = footnoteLink = None
                                del contextsToDrop[:]
                                del unitsToDrop[:]
                                del footnoteLinksToDrop[:]
                            del factsToCheck
                    # check remaining footnote refs
                    for footnoteLink in footnoteBuffer:
                        checkFootnoteHrefs(modelXbrl, footnoteLink)
                    for pluginMethod in pluginClassMethods("Streaming.Finish"):
                        pluginMethod(modelXbrl)
            elif ns == XbrlConst.link:
                if ln == "footnoteLink":
                    XmlValidate.validate(modelXbrl, mdlObj)
                    footnoteLinks = (mdlObj,)
                    modelDocument.linkbaseDiscover(footnoteLinks, inInstance=True)
                    if footnoteBufferLimit.is_finite():
                        footnoteBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkLinks(footnoteLinks)
                        if len(footnoteBuffer) > footnoteBufferLimit:
                            # check that hrefObjects for locators were all satisfied
                                # drop before addition as dropped may have same id as added
                            footnoteLink = footnoteBuffer.pop(0)
                            checkFootnoteHrefs(modelXbrl, footnoteLink)
                            if _streamingValidateFactsPlugin:
                                footnoteLinksToDrop.append(footnoteLink)
                            else:
                                dropFootnoteLink(modelXbrl, footnoteLink)
                                del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            footnoteLink = None
                    footnoteLinks = None
                elif ln in ("schemaRef", "linkbaseRef"):
                    modelDocument.discoverHref(mdlObj)
                elif not modelXbrl.skipDTS:
                    if ln in ("roleRef", "arcroleRef"):
                        modelDocument.linkbaseDiscover((mdlObj,), inInstance=True)
            elif parentMdlObj.qname == XbrlConst.qnXbrliXbrl:
                self.numRootFacts += 1
                #>>XmlValidate.validate(modelXbrl, mdlObj)
                #>>modelDocument.factDiscover(mdlObj, modelXbrl.facts)
                if self.factsCheckVersion:
                    self.factCheckFact(mdlObj)
                if _streamingExtensionsValidate or _streamingValidateFactsPlugin:
                    factsToCheck = (mdlObj,)  # validate current fact by itself
                    if _streamingExtensionsValidate:
                        instValidator.checkFacts(factsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkFactsDimensions(factsToCheck)
                    if _streamingValidateFactsPlugin:
                        # plugin attempts to process batch of all root facts not yet processed (not just current one)
                        # use batches of 1000 facts
                        if len(modelXbrl.facts) > 1000:
                            factsToCheck = modelXbrl.facts.copy()
                            factsHaveBeenProcessed = True
                            # can block facts deletion if required data not yet available, such as numeric unit for DpmDB
                            for pluginMethod in pluginClassMethods("Streaming.ValidateFacts"):
                                if not pluginMethod(modelXbrl, factsToCheck):
                                    factsHaveBeenProcessed = False
                            if factsHaveBeenProcessed:
                                for fact in factsToCheck:
                                    dropFact(modelXbrl, fact, modelXbrl.facts)
                                    del parentMdlObj[parentMdlObj.index(fact)]
                                for cntx in contextsToDrop:
                                    dropContext(modelXbrl, cntx)
                                    del parentMdlObj[parentMdlObj.index(cntx)]
                                for unit in unitsToDrop:
                                    dropUnit(modelXbrl, unit)
                                    del parentMdlObj[parentMdlObj.index(unit)]
                                for footnoteLink in footnoteLinksToDrop:
                                    dropFootnoteLink(modelXbrl, footnoteLink)
                                    del parentMdlObj[parentMdlObj.index(footnoteLink)]
                                fact = cntx = unit = footnoteLink = None
                                del contextsToDrop[:]
                                del unitsToDrop[:]
                                del footnoteLinksToDrop[:]
                            del factsToCheck # dereference fact or batch of facts
                    else:
                        dropFact(modelXbrl, mdlObj, modelXbrl.facts) # single fact has been processed
                        del parentMdlObj[parentMdlObj.index(mdlObj)]
                if self.numRootFacts % 1000 == 0:
                    pass
                    #modelXbrl.profileActivity("... streaming fact {0} of {1} {2:.2f}%".format(self.numRootFacts, instInfoNumRootFacts, 
                    #                                                                          100.0 * self.numRootFacts / instInfoNumRootFacts), 
                    #                          minTimeToShow=20.0)
                    gc.collect()
                    sys.stdout.write ("\rAt fact {} of {} mem {}".format(self.numRootFacts, instInfoNumRootFacts, modelXbrl.modelManager.cntlr.memoryUsed))
            return mdlObj
        def data(self, data):
            self.currentMdlObj.text = data
        def comment(self, text):
            pass
        def pi(self, target, data):
            if target == "xbrl-facts-check":
                _match = re.search("([\\w-]+)=[\"']([^\"']+)[\"']", data)
                if _match:
                    _matchGroups = _match.groups()
                    if len(_matchGroups) == 2:
                        if _matchGroups[0] == "version":
                            self.factsCheckVersion = _matchGroups[1]
                        elif _matchGroups[0] == "sum-of-fact-md5s":
                            try:
                                expectedMd5 = Md5Sum(_matchGroups[1])
                                if self.factsCheckMd5s != expectedMd5:
                                    modelXbrl.warning("streamingExtensions:xbrlFactsCheckWarning",
                                            _("XBRL facts sum of md5s expected %(expectedMd5)s not matched to actual sum %(actualMd5Sum)s"),
                                            modelObject=modelXbrl, expectedMd5=expectedMd5, actualMd5Sum=self.factsCheckMd5s)
                                else:
                                    modelXbrl.info("info",
                                            _("Successful XBRL facts sum of md5s."),
                                            modelObject=modelXbrl)
                            except ValueError:
                                modelXbrl.error("streamingExtensions:xbrlFactsCheckError",
                                        _("Invalid sum-of-md5s %(sumOfMd5)s"),
                                        modelObject=modelXbrl, sumOfMd5=_matchGroups[1])
        def close(self):
            del modelXbrl.makeelementParentModelObject
            return None
        
        def factCheckFact(self, fact):
            self.factsCheckMd5s += fact.md5sum
            for _tupleFact in fact.modelTupleFacts:
                self.factCheckFact(_tupleFact)
        
    _parser, _parserLookupName, _parserLookupClass = parser(modelXbrl, filepath, target=modelLoaderTarget())
    etree.parse(_file, parser=_parser, base_url=filepath)
    logSyntaxErrors(_parser)
    '''
    # replace modelLoaderTarget with iterparse (as it now supports CustomElementClassLookup)
    streamingParserContext = etree.iterparse(_file,
                                             events=("start", "end"),
                                             huge_tree=True)
    from arelle.ModelObjectFactory import setParserElementClassLookup
    modelXbrl.isStreamingMode = True  # must be set before setting element class lookup
    (_parser, _parserLookupName,
     _parserLookupClass) = setParserElementClassLookup(streamingParserContext,
                                                       modelXbrl)
    foundInstance = False
    beforeInstanceStream = beforeStartStreamingPlugin = True
    numRootFacts = 0
    factsCheckVersion = None

    def factCheckFact(fact):
        modelDocument._factsCheckMd5s += fact.md5sum
        for _tupleFact in fact.modelTupleFacts:
            factCheckFact(_tupleFact)

    for event, mdlObj in streamingParserContext:
        if event == "start":
            if mdlObj.tag == "{http://www.xbrl.org/2003/instance}xbrl":
                modelDocument = ModelDocument(modelXbrl, Type.INSTANCE,
                                              mappedUri, filepath,
                                              mdlObj.getroottree())
                modelXbrl.modelDocument = modelDocument  # needed for incremental validation
                mdlObj.init(modelDocument)
                modelDocument.parser = _parser  # needed for XmlUtil addChild's makeelement
                modelDocument.parserLookupName = _parserLookupName
                modelDocument.parserLookupClass = _parserLookupClass
                modelDocument.xmlRootElement = mdlObj
                modelDocument.schemaLocationElements.add(mdlObj)
                modelDocument.documentEncoding = _encoding
                modelDocument._creationSoftwareComment = precedingComment(
                    mdlObj)
                modelDocument._factsCheckMd5s = Md5Sum()
                modelXbrl.info("streamingExtensions:streaming",
                               _("Stream processing this instance."),
                               modelObject=modelDocument)
            elif mdlObj.getparent() is not None:
                mdlObj._init()  # requires discovery as part of start elements
                if mdlObj.getparent(
                ).tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    if not foundInstance:
                        foundInstance = True
                        pi = precedingProcessingInstruction(
                            mdlObj, "xbrl-facts-check")
                        if pi is not None:
                            factsCheckVersion = pi.attrib.get("version", None)
                elif not foundInstance:
                    break
                ns = mdlObj.qname.namespaceURI
                ln = mdlObj.qname.localName
                if beforeInstanceStream:
                    if ((ns == XbrlConst.link
                         and ln not in ("schemaRef", "linkbaseRef")) or
                        (ns == XbrlConst.xbrli and ln in ("context", "unit"))
                            or (ns not in (XbrlConst.link, XbrlConst.xbrli))):
                        beforeInstanceStream = False
                        if _streamingExtensionsValidate:
                            instValidator.validate(
                                modelXbrl,
                                modelXbrl.modelManager.formulaOptions.
                                typedParameters(modelXbrl.prefixedNamespaces))
                        else:  # need default dimensions
                            ValidateXbrlDimensions.loadDimensionDefaults(
                                modelXbrl)
                elif not beforeInstanceStream and beforeStartStreamingPlugin:
                    for pluginMethod in pluginClassMethods("Streaming.Start"):
                        pluginMethod(modelXbrl)
                    beforeStartStreamingPlugin = False
        elif event == "end":
            parentMdlObj = mdlObj.getparent()
            ns = mdlObj.namespaceURI
            ln = mdlObj.localName
            if ns == XbrlConst.xbrli:
                if ln == "context":
                    if mdlObj.get("sticky"):
                        del mdlObj.attrib["sticky"]
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                    else:
                        if len(contextBuffer) >= contextBufferLimit:
                            # drop before adding as dropped may have same id as added
                            cntx = contextBuffer.pop(0)
                            if _streamingFactsPlugin or _streamingValidateFactsPlugin:
                                contextsToDrop.append(cntx)
                            else:
                                dropContext(modelXbrl, cntx)
                                #>>del parentMdlObj[parentMdlObj.index(cntx)]
                            cntx = None
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                        if contextBufferLimit.is_finite():
                            contextBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        contextsToCheck = (mdlObj, )
                        instValidator.checkContexts(contextsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkContextsDimensions(
                                contextsToCheck)
                        del contextsToCheck  # dereference
                elif ln == "unit":
                    if len(unitBuffer) >= unitBufferLimit:
                        # drop before additing as dropped may have same id as added
                        unit = unitBuffer.pop(0)
                        if _streamingFactsPlugin or _streamingValidateFactsPlugin:
                            unitsToDrop.append(unit)
                        else:
                            dropUnit(modelXbrl, unit)
                            #>>del parentMdlObj[parentMdlObj.index(unit)]
                        unit = None
                    XmlValidate.validate(modelXbrl, mdlObj)
                    modelDocument.unitDiscover(mdlObj)
                    if unitBufferLimit.is_finite():
                        unitBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkUnits((mdlObj, ))
                elif ln == "xbrl":  # end of document
                    # check remaining batched facts if any
                    if _streamingFactsPlugin or _streamingValidateFactsPlugin:
                        # plugin attempts to process batch of all root facts not yet processed (not just current one)
                        # finish any final batch of facts
                        if len(modelXbrl.facts) > 0:
                            factsToCheck = modelXbrl.facts.copy()
                            # can block facts deletion if required data not yet available, such as numeric unit for DpmDB
                            if _streamingValidateFactsPlugin:
                                for pluginMethod in pluginClassMethods(
                                        "Streaming.ValidateFacts"):
                                    pluginMethod(instValidator, factsToCheck)
                            if _streamingFactsPlugin:
                                for pluginMethod in pluginClassMethods(
                                        "Streaming.Facts"):
                                    pluginMethod(modelXbrl, factsToCheck)
                            for fact in factsToCheck:
                                dropFact(modelXbrl, fact, modelXbrl.facts)
                                #>>del parentMdlObj[parentMdlObj.index(fact)]
                            for cntx in contextsToDrop:
                                dropContext(modelXbrl, cntx)
                                #>>del parentMdlObj[parentMdlObj.index(cntx)]
                            for unit in unitsToDrop:
                                dropUnit(modelXbrl, unit)
                                #>>del parentMdlObj[parentMdlObj.index(unit)]
                            for footnoteLink in footnoteLinksToDrop:
                                dropFootnoteLink(modelXbrl, footnoteLink)
                                #>>del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            fact = cntx = unit = footnoteLink = None
                            del contextsToDrop[:]
                            del unitsToDrop[:]
                            del footnoteLinksToDrop[:]
                            del factsToCheck
                    # check remaining footnote refs
                    for footnoteLink in footnoteBuffer:
                        checkFootnoteHrefs(modelXbrl, footnoteLink)
                    pi = childProcessingInstruction(mdlObj,
                                                    "xbrl-facts-check",
                                                    reversed=True)
                    if pi is not None:  # attrib is in .text, not attrib, no idea why!!!
                        _match = re.search("([\\w-]+)=[\"']([^\"']+)[\"']",
                                           pi.text)
                        if _match:
                            _matchGroups = _match.groups()
                            if len(_matchGroups) == 2:
                                if _matchGroups[0] == "sum-of-fact-md5s":
                                    try:
                                        expectedMd5 = Md5Sum(_matchGroups[1])
                                        if modelDocument._factsCheckMd5s != expectedMd5:
                                            modelXbrl.warning(
                                                "streamingExtensions:xbrlFactsCheckWarning",
                                                _("XBRL facts sum of md5s expected %(expectedMd5)s not matched to actual sum %(actualMd5Sum)s"
                                                  ),
                                                modelObject=modelXbrl,
                                                expectedMd5=expectedMd5,
                                                actualMd5Sum=modelDocument.
                                                _factsCheckMd5s)
                                        else:
                                            modelXbrl.info(
                                                "info",
                                                _("Successful XBRL facts sum of md5s."
                                                  ),
                                                modelObject=modelXbrl)
                                    except ValueError:
                                        modelXbrl.error(
                                            "streamingExtensions:xbrlFactsCheckError",
                                            _("Invalid sum-of-md5s %(sumOfMd5)s"
                                              ),
                                            modelObject=modelXbrl,
                                            sumOfMd5=_matchGroups[1])
                    if _streamingValidateFactsPlugin:
                        for pluginMethod in pluginClassMethods(
                                "Streaming.ValidateFinish"):
                            pluginMethod(instValidator)
                    if _streamingFactsPlugin:
                        for pluginMethod in pluginClassMethods(
                                "Streaming.Finish"):
                            pluginMethod(modelXbrl)
            elif ns == XbrlConst.link:
                if ln in ("schemaRef", "linkbaseRef"):
                    modelDocument.discoverHref(
                        mdlObj,
                        urlRewritePluginClass=
                        "ModelDocument.InstanceSchemaRefRewriter")
                elif ln in ("roleRef", "arcroleRef"):
                    modelDocument.linkbaseDiscover((mdlObj, ), inInstance=True)
                elif ln == "footnoteLink":
                    XmlValidate.validate(modelXbrl, mdlObj)
                    footnoteLinks = (mdlObj, )
                    modelDocument.linkbaseDiscover(footnoteLinks,
                                                   inInstance=True)
                    if footnoteBufferLimit.is_finite():
                        footnoteBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkLinks(footnoteLinks)
                        if len(footnoteBuffer) > footnoteBufferLimit:
                            # check that hrefObjects for locators were all satisfied
                            # drop before addition as dropped may have same id as added
                            footnoteLink = footnoteBuffer.pop(0)
                            checkFootnoteHrefs(modelXbrl, footnoteLink)
                            if _streamingValidateFactsPlugin:
                                footnoteLinksToDrop.append(footnoteLink)
                            else:
                                dropFootnoteLink(modelXbrl, footnoteLink)
                                #>>del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            footnoteLink = None
                    footnoteLinks = None
            elif parentMdlObj.qname == XbrlConst.qnXbrliXbrl and isinstance(
                    mdlObj, ModelFact):
                numRootFacts += 1
                XmlValidate.validate(modelXbrl, mdlObj)
                modelDocument.factDiscover(mdlObj, modelXbrl.facts)
                if factsCheckVersion:
                    factCheckFact(mdlObj)
                if _streamingExtensionsValidate or _streamingFactsPlugin or _streamingValidateFactsPlugin:
                    factsToCheck = (mdlObj,
                                    )  # validate current fact by itself
                    if _streamingExtensionsValidate:
                        instValidator.checkFacts(factsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkFactsDimensions(factsToCheck)
                    if _streamingFactsPlugin or _streamingValidateFactsPlugin:
                        # plugin attempts to process batch of all root facts not yet processed (not just current one)
                        # use batches of 1000 facts
                        if len(modelXbrl.facts) > 1000:
                            factsToCheck = modelXbrl.facts.copy()
                            # can block facts deletion if required data not yet available, such as numeric unit for DpmDB
                            if _streamingValidateFactsPlugin:
                                for pluginMethod in pluginClassMethods(
                                        "Streaming.ValidateFacts"):
                                    pluginMethod(instValidator, factsToCheck)
                            if _streamingFactsPlugin:
                                for pluginMethod in pluginClassMethods(
                                        "Streaming.Facts"):
                                    pluginMethod(modelXbrl, factsToCheck)
                            for fact in factsToCheck:
                                dropFact(modelXbrl, fact, modelXbrl.facts)
                                #>>del parentMdlObj[parentMdlObj.index(fact)]
                            for cntx in contextsToDrop:
                                dropContext(modelXbrl, cntx)
                                #>>del parentMdlObj[parentMdlObj.index(cntx)]
                            for unit in unitsToDrop:
                                dropUnit(modelXbrl, unit)
                                #>>del parentMdlObj[parentMdlObj.index(unit)]
                            for footnoteLink in footnoteLinksToDrop:
                                dropFootnoteLink(modelXbrl, footnoteLink)
                                #>>del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            fact = cntx = unit = footnoteLink = None
                            del contextsToDrop[:]
                            del unitsToDrop[:]
                            del footnoteLinksToDrop[:]
                            del factsToCheck  # dereference fact or batch of facts
                    else:
                        dropFact(
                            modelXbrl, mdlObj,
                            modelXbrl.facts)  # single fact has been processed
                        #>>del parentMdlObj[parentMdlObj.index(mdlObj)]
                if numRootFacts % 1000 == 0:
                    pass
                    #modelXbrl.profileActivity("... streaming fact {0} of {1} {2:.2f}%".format(self.numRootFacts, instInfoNumRootFacts,
                    #                                                                          100.0 * self.numRootFacts / instInfoNumRootFacts),
                    #                          minTimeToShow=20.0)
                    #gc.collect()
                    #sys.stdout.write ("\rAt fact {} of {} mem {}".format(numRootFacts, instInfoNumRootFacts, modelXbrl.modelManager.cntlr.memoryUsed))
    if mdlObj is not None:
        mdlObj.clear()
    del _parser, _parserLookupName, _parserLookupClass

    if _streamingExtensionsValidate and validator is not None:
        _file.close()
        del instValidator
        validator.close()
        # track that modelXbrl has been validated by this streaming extension
        modelXbrl._streamingExtensionValidated = True

    modelXbrl.profileStat(_("streaming complete"), time.time() - startedAt)
    return modelXbrl.modelDocument
Example #5
0
def streamingExtensionsLoader(modelXbrl, mappedUri, filepath, *args, **kwargs):
    # check if big instance and has header with an initial incomplete tree walk (just 2 elements
    if not _streamingExtensionsCheck:
        return None
    
    # track whether modelXbrl has been validated by this streaming extension
    modelXbrl._streamingExtensionValidated = False
        
    def logSyntaxErrors(parsercontext):
        for error in parsercontext.error_log:
            modelXbrl.error("xmlSchema:syntax",
                    _("%(error)s, %(fileName)s, line %(line)s, column %(column)s, %(sourceAction)s source element"),
                    modelObject=modelXbrl, fileName=os.path.basename(filepath), 
                    error=error.message, line=error.line, column=error.column, sourceAction="streaming")
    #### note: written for iterparse of lxml prior to version 3.3, otherwise rewrite to use XmlPullParser ###
    #### note: iterparse wants a binary file, but file is text mode
    _file, = modelXbrl.fileSource.file(filepath, binary=True)
    startedAt = time.time()
    modelXbrl.profileActivity()
    ''' this seems twice as slow as iterparse
    class instInfoTarget():
        def __init__(self, element_factory=None, parser=None):
            self.newTree = True
            self.streamingAspects = None
            self.foundInstance = False
            self.creationSoftwareComment = ''
            self.currentEltTag = "(before xbrli:xbrl)"
            self.numRootFacts = 0
        def start(self, tag, attrib, nsmap=None):
            if self.newTree:
                if tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    self.foundInstance = True
                    self.newTree = False
                else: # break 
                    raise NotInstanceDocumentException()
            elif not tag.startswith("{http://www.xbrl.org/"):
                self.numRootFacts += 1
                if self.numRootFacts % 1000 == 0:
                    modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0)
            self.currentEltTag = tag
        def end(self, tag):
            pass
        def data(self, data):
            pass
        def comment(self, text):
            if not self.foundInstance: # accumulate comments before xbrli:xbrl
                self.creationSoftwareComment += ('\n' if self.creationSoftwareComment else '') + text
            elif not self.creationSoftwareComment:
                self.creationSoftwareComment = text # or first comment after xbrli:xbrl
        def pi(self, target, data):
            if target == "xbrl-streamable-instance":
                if self.currentEltTag == "{http://www.xbrl.org/2003/instance}xbrl":
                    self.streamingAspects = dict(etree.PI(target,data).attrib.copy()) # dereference target results
                else:
                    modelXbrl.error("streamingExtensions:headerMisplaced",
                            _("Header is misplaced: %(target)s, must follow xbrli:xbrl element but was found at %(element)s"),
                            modelObject=modelXbrl, target=target, element=self.currentEltTag)
        def close(self):
            if not self.creationSoftwareComment:
                self.creationSoftwareComment = None
            return True
    instInfo = instInfoTarget()
    infoParser = etree.XMLParser(recover=True, huge_tree=True, target=instInfo)
    try:
        etree.parse(_file, parser=infoParser, base_url=filepath)
    except NotInstanceDocumentException:
        pass
    '''
    foundErrors = False
    foundInstance = False
    streamingAspects = None
    creationSoftwareComment = None
    instInfoNumRootFacts = 0
    numElts = 0
    elt = None
    instInfoContext = etree.iterparse(_file, events=("start","end"), huge_tree=True)
    try:
        for event, elt in instInfoContext:
            if event == "start":
                if elt.getparent() is not None:
                    if elt.getparent().tag == "{http://www.xbrl.org/2003/instance}xbrl":
                        if not foundInstance:
                            foundInstance = True
                            pi = precedingProcessingInstruction(elt, "xbrl-streamable-instance")
                            if pi is None:
                                break
                            else:
                                streamingAspects = dict(pi.attrib.copy())
                                if creationSoftwareComment is None:
                                    creationSoftwareComment = precedingComment(elt)
                        if not elt.tag.startswith("{http://www.xbrl.org/"):
                            instInfoNumRootFacts += 1
                            if instInfoNumRootFacts % 1000 == 0:
                                modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0)
                    elif not foundInstance:       
                        break
                elif elt.tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    creationSoftwareComment = precedingComment(elt)
                    if precedingProcessingInstruction(elt, "xbrl-streamable-instance") is not None:
                        modelXbrl.error("streamingExtensions:headerMisplaced",
                                _("Header is misplaced: %(error)s, must follow xbrli:xbrl element"),
                                modelObject=elt)
            elif event == "end":
                elt.clear()
                numElts += 1
                if numElts % 1000 == 0 and elt.getparent() is not None:
                    while elt.getprevious() is not None and elt.getparent() is not None:
                        del elt.getparent()[0]
    except etree.XMLSyntaxError as err:
        modelXbrl.error("xmlSchema:syntax",
                _("Unrecoverable error: %(error)s"),
                error=err)
        _file.close()
        return err
        
    _file.seek(0,io.SEEK_SET) # allow reparsing
    if not foundInstance or streamingAspects is None:
        del elt
        _file.close()
        return None
    modelXbrl.profileStat(_("streaming tree check"), time.time() - startedAt)
    startedAt = time.time()
    try:
        version = Decimal(streamingAspects.get("version"))
        if int(version) != 1:
            modelXbrl.error("streamingExtensions:unsupportedVersion",
                    _("Streaming version %(version)s, major version number must be 1"),
                    modelObject=elt, version=version)
            foundErrors = True
    except (InvalidOperation, OverflowError):
        modelXbrl.error("streamingExtensions:versionError",
                _("Version %(version)s, number must be 1.n"),
                modelObject=elt, version=streamingAspects.get("version", "(none)"))
        foundErrors = True
    for bufAspect in ("contextBuffer", "unitBuffer", "footnoteBuffer"):
        try:
            bufLimit = Decimal(streamingAspects.get(bufAspect, "INF"))
            if bufLimit < 1 or (bufLimit.is_finite() and bufLimit % 1 != 0):
                raise InvalidOperation
            elif bufAspect == "contextBuffer":
                contextBufferLimit = bufLimit
            elif bufAspect == "unitBuffer":
                unitBufferLimit = bufLimit
            elif bufAspect == "footnoteBuffer":
                footnoteBufferLimit = bufLimit
        except InvalidOperation:
            modelXbrl.error("streamingExtensions:valueError",
                    _("Streaming %(attrib)s %(value)s, number must be a positive integer or INF"),
                    modelObject=elt, attrib=bufAspect, value=streamingAspects.get(bufAspect))
            foundErrors = True
    if _streamingExtensionsValidate:
        incompatibleValidations = []
        _validateDisclosureSystem = modelXbrl.modelManager.validateDisclosureSystem
        _disclosureSystem = modelXbrl.modelManager.disclosureSystem
        if _validateDisclosureSystem and _disclosureSystem.validationType == "EFM":
            incompatibleValidations.append("EFM")
        if _validateDisclosureSystem and _disclosureSystem.validationType == "GFM":
            incompatibleValidations.append("GFM")
        if _validateDisclosureSystem and _disclosureSystem.validationType == "HMRC":
            incompatibleValidations.append("HMRC")
        if modelXbrl.modelManager.validateCalcLB:
            incompatibleValidations.append("calculation LB")
        if incompatibleValidations:
            modelXbrl.error("streamingExtensions:incompatibleValidation",
                    _("Streaming instance validation does not support %(incompatibleValidations)s validation"),
                    modelObject=modelXbrl, incompatibleValidations=', '.join(incompatibleValidations))
            foundErrors = True
    if instInfoContext.error_log:
        foundErrors = True
    logSyntaxErrors(instInfoContext)
    del instInfoContext # dereference

    for pluginMethod in pluginClassMethods("Streaming.BlockStreaming"):
        _blockingPluginName = pluginMethod(modelXbrl)
        if _blockingPluginName: # name of blocking plugin is returned
            modelXbrl.error("streamingExtensions:incompatiblePlugIn",
                    _("Streaming instance not supported by plugin %(blockingPlugin)s"),
                    modelObject=modelXbrl, blockingPlugin=_blockingPluginName)
            foundErrors = True
    
    if foundErrors:
        _file.close()
        return None

    _encoding = XmlUtil.encoding(_file.read(512))
    _file.seek(0,io.SEEK_SET) # allow reparsing

    if _streamingExtensionsValidate:
        validator = Validate(modelXbrl)
        instValidator = validator.instValidator

    contextBuffer = []
    contextsToDrop = []
    unitBuffer = []
    unitsToDrop = []
    footnoteBuffer = []
    footnoteLinksToDrop = []
    
    _streamingFactsPlugin = any(True for pluginMethod in pluginClassMethods("Streaming.Facts"))
    _streamingValidateFactsPlugin = (_streamingExtensionsValidate and 
                                     any(True for pluginMethod in pluginClassMethods("Streaming.ValidateFacts")))

    ''' this is very much slower than iterparse
    class modelLoaderTarget():
        def __init__(self, element_factory=None, parser=None):
            self.newTree = True
            self.currentMdlObj = None
            self.beforeInstanceStream = True
            self.beforeStartStreamingPlugin = True
            self.numRootFacts = 1
            modelXbrl.makeelementParentModelObject = None
            modelXbrl.isStreamingMode = True
            self.factsCheckVersion = None
            self.factsCheckMd5s = Md5Sum()
        def start(self, tag, attrib, nsmap=None):
            modelXbrl.makeelementParentModelObject = self.currentMdlObj # pass parent to makeelement for ModelObjectFactory
            mdlObj = _parser.makeelement(tag, attrib=attrib, nsmap=nsmap)
            mdlObj.sourceline = 1
            if self.newTree:
                self.newTree = False
                self.currentMdlObj = mdlObj
                modelDocument = ModelDocument(modelXbrl, Type.INSTANCE, mappedUri, filepath, mdlObj.getroottree())
                modelXbrl.modelDocument = modelDocument # needed for incremental validation
                mdlObj.init(modelDocument)
                modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement 
                modelDocument.parserLookupName = _parserLookupName
                modelDocument.parserLookupClass = _parserLookupClass
                modelDocument.xmlRootElement = mdlObj
                modelDocument.schemaLocationElements.add(mdlObj)
                modelDocument.documentEncoding = _encoding
                modelDocument._creationSoftwareComment = creationSoftwareComment
                modelXbrl.info("streamingExtensions:streaming",
                               _("Stream processing this instance."),
                               modelObject = modelDocument)
            else:
                self.currentMdlObj.append(mdlObj)
                self.currentMdlObj = mdlObj
                mdlObj._init()
                ns = mdlObj.namespaceURI
                ln = mdlObj.localName
                if (self.beforeInstanceStream and (
                    (ns == XbrlConst.link and ln not in ("schemaRef", "linkbaseRef")) or
                    (ns == XbrlConst.xbrli and ln in ("context", "unit")) or
                    (ns not in (XbrlConst.link, XbrlConst.xbrli)))):
                    self.beforeInstanceStream = False
                    if _streamingExtensionsValidate:
                        instValidator.validate(modelXbrl, modelXbrl.modelManager.formulaOptions.typedParameters(modelXbrl.prefixedNamespaces))
                    else: # need default dimensions
                        ValidateXbrlDimensions.loadDimensionDefaults(modelXbrl)
                elif not self.beforeInstanceStream and self.beforeStartStreamingPlugin:
                    for pluginMethod in pluginClassMethods("Streaming.Start"):
                        pluginMethod(modelXbrl)
                    self.beforeStartStreamingPlugin = False
            return mdlObj
        def end(self, tag):
            modelDocument = modelXbrl.modelDocument
            mdlObj = self.currentMdlObj
            parentMdlObj = mdlObj.getparent()
            self.currentMdlObj = parentMdlObj
            ns = mdlObj.namespaceURI
            ln = mdlObj.localName
            if ns == XbrlConst.xbrli:
                if ln == "context":
                    if mdlObj.get("sticky"):
                        del mdlObj.attrib["sticky"]
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                    else:
                        if _streamingExtensionsValidate and len(contextBuffer) >= contextBufferLimit:
                            # drop before adding as dropped may have same id as added
                            cntx = contextBuffer.pop(0)
                            if _streamingValidateFactsPlugin:
                                contextsToDrop.append(cntx)
                            else:
                                dropContext(modelXbrl, cntx)
                                del parentMdlObj[parentMdlObj.index(cntx)]
                            cntx = None
                        #>>XmlValidate.validate(modelXbrl, mdlObj)
                        #>>modelDocument.contextDiscover(mdlObj)
                        if contextBufferLimit.is_finite():
                            contextBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        contextsToCheck = (mdlObj,)
                        instValidator.checkContexts(contextsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkContextsDimensions(contextsToCheck)
                        del contextsToCheck # dereference
                elif ln == "unit":
                    if _streamingExtensionsValidate and len(unitBuffer) >= unitBufferLimit:
                        # drop before adding as dropped may have same id as added
                        unit = unitBuffer.pop(0)
                        if _streamingValidateFactsPlugin:
                            unitsToDrop.append(unit)
                        else:
                            dropUnit(modelXbrl, unit)
                            del parentMdlObj[parentMdlObj.index(unit)]
                        unit = None 
                    #>>XmlValidate.validate(modelXbrl, mdlObj)
                    #>>modelDocument.unitDiscover(mdlObj)
                    if unitBufferLimit.is_finite():
                        unitBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkUnits( (mdlObj,) )
                elif ln == "xbrl": # end of document
                    # check remaining batched facts if any
                    if _streamingValidateFactsPlugin:
                        # plugin attempts to process batch of all root facts not yet processed (not just current one)
                        # finish any final batch of facts
                        if len(modelXbrl.facts) > 0:
                            factsToCheck = modelXbrl.facts.copy()
                            factsHaveBeenProcessed = True
                            # can block facts deletion if required data not yet available, such as numeric unit for DpmDB
                            for pluginMethod in pluginClassMethods("Streaming.ValidateFacts"):
                                if not pluginMethod(modelXbrl, factsToCheck):
                                    factsHaveBeenProcessed = False
                            if factsHaveBeenProcessed:
                                for fact in factsToCheck:
                                    dropFact(modelXbrl, fact, modelXbrl.facts)
                                    del parentMdlObj[parentMdlObj.index(fact)]
                                for cntx in contextsToDrop:
                                    dropContext(modelXbrl, cntx)
                                    del parentMdlObj[parentMdlObj.index(cntx)]
                                for unit in unitsToDrop:
                                    dropUnit(modelXbrl, unit)
                                    del parentMdlObj[parentMdlObj.index(unit)]
                                for footnoteLink in footnoteLinksToDrop:
                                    dropFootnoteLink(modelXbrl, footnoteLink)
                                    del parentMdlObj[parentMdlObj.index(footnoteLink)]
                                fact = cntx = unit = footnoteLink = None
                                del contextsToDrop[:]
                                del unitsToDrop[:]
                                del footnoteLinksToDrop[:]
                            del factsToCheck
                    # check remaining footnote refs
                    for footnoteLink in footnoteBuffer:
                        checkFootnoteHrefs(modelXbrl, footnoteLink)
                    for pluginMethod in pluginClassMethods("Streaming.Finish"):
                        pluginMethod(modelXbrl)
            elif ns == XbrlConst.link:
                if ln == "footnoteLink":
                    XmlValidate.validate(modelXbrl, mdlObj)
                    footnoteLinks = (mdlObj,)
                    modelDocument.linkbaseDiscover(footnoteLinks, inInstance=True)
                    if footnoteBufferLimit.is_finite():
                        footnoteBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkLinks(footnoteLinks)
                        if len(footnoteBuffer) > footnoteBufferLimit:
                            # check that hrefObjects for locators were all satisfied
                                # drop before addition as dropped may have same id as added
                            footnoteLink = footnoteBuffer.pop(0)
                            checkFootnoteHrefs(modelXbrl, footnoteLink)
                            if _streamingValidateFactsPlugin:
                                footnoteLinksToDrop.append(footnoteLink)
                            else:
                                dropFootnoteLink(modelXbrl, footnoteLink)
                                del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            footnoteLink = None
                    footnoteLinks = None
                elif ln in ("schemaRef", "linkbaseRef"):
                    modelDocument.discoverHref(mdlObj)
                elif not modelXbrl.skipDTS:
                    if ln in ("roleRef", "arcroleRef"):
                        modelDocument.linkbaseDiscover((mdlObj,), inInstance=True)
            elif parentMdlObj.qname == XbrlConst.qnXbrliXbrl:
                self.numRootFacts += 1
                #>>XmlValidate.validate(modelXbrl, mdlObj)
                #>>modelDocument.factDiscover(mdlObj, modelXbrl.facts)
                if self.factsCheckVersion:
                    self.factCheckFact(mdlObj)
                if _streamingExtensionsValidate or _streamingValidateFactsPlugin:
                    factsToCheck = (mdlObj,)  # validate current fact by itself
                    if _streamingExtensionsValidate:
                        instValidator.checkFacts(factsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkFactsDimensions(factsToCheck)
                    if _streamingValidateFactsPlugin:
                        # plugin attempts to process batch of all root facts not yet processed (not just current one)
                        # use batches of 1000 facts
                        if len(modelXbrl.facts) > 1000:
                            factsToCheck = modelXbrl.facts.copy()
                            factsHaveBeenProcessed = True
                            # can block facts deletion if required data not yet available, such as numeric unit for DpmDB
                            for pluginMethod in pluginClassMethods("Streaming.ValidateFacts"):
                                if not pluginMethod(modelXbrl, factsToCheck):
                                    factsHaveBeenProcessed = False
                            if factsHaveBeenProcessed:
                                for fact in factsToCheck:
                                    dropFact(modelXbrl, fact, modelXbrl.facts)
                                    del parentMdlObj[parentMdlObj.index(fact)]
                                for cntx in contextsToDrop:
                                    dropContext(modelXbrl, cntx)
                                    del parentMdlObj[parentMdlObj.index(cntx)]
                                for unit in unitsToDrop:
                                    dropUnit(modelXbrl, unit)
                                    del parentMdlObj[parentMdlObj.index(unit)]
                                for footnoteLink in footnoteLinksToDrop:
                                    dropFootnoteLink(modelXbrl, footnoteLink)
                                    del parentMdlObj[parentMdlObj.index(footnoteLink)]
                                fact = cntx = unit = footnoteLink = None
                                del contextsToDrop[:]
                                del unitsToDrop[:]
                                del footnoteLinksToDrop[:]
                            del factsToCheck # dereference fact or batch of facts
                    else:
                        dropFact(modelXbrl, mdlObj, modelXbrl.facts) # single fact has been processed
                        del parentMdlObj[parentMdlObj.index(mdlObj)]
                if self.numRootFacts % 1000 == 0:
                    pass
                    #modelXbrl.profileActivity("... streaming fact {0} of {1} {2:.2f}%".format(self.numRootFacts, instInfoNumRootFacts, 
                    #                                                                          100.0 * self.numRootFacts / instInfoNumRootFacts), 
                    #                          minTimeToShow=20.0)
                    gc.collect()
                    sys.stdout.write ("\rAt fact {} of {} mem {}".format(self.numRootFacts, instInfoNumRootFacts, modelXbrl.modelManager.cntlr.memoryUsed))
            return mdlObj
        def data(self, data):
            self.currentMdlObj.text = data
        def comment(self, text):
            pass
        def pi(self, target, data):
            if target == "xbrl-facts-check":
                _match = re.search("([\\w-]+)=[\"']([^\"']+)[\"']", data)
                if _match:
                    _matchGroups = _match.groups()
                    if len(_matchGroups) == 2:
                        if _matchGroups[0] == "version":
                            self.factsCheckVersion = _matchGroups[1]
                        elif _matchGroups[0] == "sum-of-fact-md5s":
                            try:
                                expectedMd5 = Md5Sum(_matchGroups[1])
                                if self.factsCheckMd5s != expectedMd5:
                                    modelXbrl.warning("streamingExtensions:xbrlFactsCheckWarning",
                                            _("XBRL facts sum of md5s expected %(expectedMd5)s not matched to actual sum %(actualMd5Sum)s"),
                                            modelObject=modelXbrl, expectedMd5=expectedMd5, actualMd5Sum=self.factsCheckMd5s)
                                else:
                                    modelXbrl.info("info",
                                            _("Successful XBRL facts sum of md5s."),
                                            modelObject=modelXbrl)
                            except ValueError:
                                modelXbrl.error("streamingExtensions:xbrlFactsCheckError",
                                        _("Invalid sum-of-md5s %(sumOfMd5)s"),
                                        modelObject=modelXbrl, sumOfMd5=_matchGroups[1])
        def close(self):
            del modelXbrl.makeelementParentModelObject
            return None
        
        def factCheckFact(self, fact):
            self.factsCheckMd5s += fact.md5sum
            for _tupleFact in fact.modelTupleFacts:
                self.factCheckFact(_tupleFact)
        
    _parser, _parserLookupName, _parserLookupClass = parser(modelXbrl, filepath, target=modelLoaderTarget())
    etree.parse(_file, parser=_parser, base_url=filepath)
    logSyntaxErrors(_parser)
    '''
    # replace modelLoaderTarget with iterparse (as it now supports CustomElementClassLookup)
    streamingParserContext = etree.iterparse(_file, events=("start","end"), huge_tree=True)
    from arelle.ModelObjectFactory import setParserElementClassLookup
    modelXbrl.isStreamingMode = True # must be set before setting element class lookup
    (_parser, _parserLookupName, _parserLookupClass) = setParserElementClassLookup(streamingParserContext, modelXbrl)
    foundInstance = False
    beforeInstanceStream = beforeStartStreamingPlugin = True
    numRootFacts = 0
    factsCheckVersion = None
    def factCheckFact(fact):
        modelDocument._factsCheckMd5s += fact.md5sum
        for _tupleFact in fact.modelTupleFacts:
            factCheckFact(_tupleFact)
    for event, mdlObj in streamingParserContext:
        if event == "start":
            if mdlObj.tag == "{http://www.xbrl.org/2003/instance}xbrl":
                modelDocument = ModelDocument(modelXbrl, Type.INSTANCE, mappedUri, filepath, mdlObj.getroottree())
                modelXbrl.modelDocument = modelDocument # needed for incremental validation
                mdlObj.init(modelDocument)
                modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement 
                modelDocument.parserLookupName = _parserLookupName
                modelDocument.parserLookupClass = _parserLookupClass
                modelDocument.xmlRootElement = mdlObj
                modelDocument.schemaLocationElements.add(mdlObj)
                modelDocument.documentEncoding = _encoding
                modelDocument._creationSoftwareComment = precedingComment(mdlObj)
                modelDocument._factsCheckMd5s = Md5Sum()
                modelXbrl.info("streamingExtensions:streaming",
                               _("Stream processing this instance."),
                               modelObject = modelDocument)
            elif mdlObj.getparent() is not None:
                mdlObj._init() # requires discovery as part of start elements
                if mdlObj.getparent().tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    if not foundInstance:
                        foundInstance = True
                        pi = precedingProcessingInstruction(mdlObj, "xbrl-facts-check")
                        if pi is not None:
                            factsCheckVersion = pi.attrib.get("version", None)
                elif not foundInstance:       
                    break
                ns = mdlObj.qname.namespaceURI
                ln = mdlObj.qname.localName
                if beforeInstanceStream:
                    if ((ns == XbrlConst.link and ln not in ("schemaRef", "linkbaseRef")) or
                        (ns == XbrlConst.xbrli and ln in ("context", "unit")) or
                        (ns not in (XbrlConst.link, XbrlConst.xbrli))):
                        beforeInstanceStream = False
                        if _streamingExtensionsValidate:
                            instValidator.validate(modelXbrl, modelXbrl.modelManager.formulaOptions.typedParameters(modelXbrl.prefixedNamespaces))
                        else: # need default dimensions
                            ValidateXbrlDimensions.loadDimensionDefaults(modelXbrl)
                elif not beforeInstanceStream and beforeStartStreamingPlugin:
                    for pluginMethod in pluginClassMethods("Streaming.Start"):
                        pluginMethod(modelXbrl)
                    beforeStartStreamingPlugin = False
        elif event == "end":
            parentMdlObj = mdlObj.getparent()
            ns = mdlObj.namespaceURI
            ln = mdlObj.localName
            if ns == XbrlConst.xbrli:
                if ln == "context":
                    if mdlObj.get("sticky"):
                        del mdlObj.attrib["sticky"]
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                    else:
                        if len(contextBuffer) >= contextBufferLimit:
                            # drop before adding as dropped may have same id as added
                            cntx = contextBuffer.pop(0)
                            if _streamingFactsPlugin or _streamingValidateFactsPlugin:
                                contextsToDrop.append(cntx)
                            else:
                                dropContext(modelXbrl, cntx)
                                #>>del parentMdlObj[parentMdlObj.index(cntx)]
                            cntx = None
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                        if contextBufferLimit.is_finite():
                            contextBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        contextsToCheck = (mdlObj,)
                        instValidator.checkContexts(contextsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkContextsDimensions(contextsToCheck)
                        del contextsToCheck # dereference
                elif ln == "unit":
                    if len(unitBuffer) >= unitBufferLimit:
                        # drop before additing as dropped may have same id as added
                        unit = unitBuffer.pop(0)
                        if _streamingFactsPlugin or _streamingValidateFactsPlugin:
                            unitsToDrop.append(unit)
                        else:
                            dropUnit(modelXbrl, unit)
                            #>>del parentMdlObj[parentMdlObj.index(unit)]
                        unit = None 
                    XmlValidate.validate(modelXbrl, mdlObj)
                    modelDocument.unitDiscover(mdlObj)
                    if unitBufferLimit.is_finite():
                        unitBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkUnits( (mdlObj,) )
                elif ln == "xbrl": # end of document
                    # check remaining batched facts if any
                    if _streamingFactsPlugin or _streamingValidateFactsPlugin:
                        # plugin attempts to process batch of all root facts not yet processed (not just current one)
                        # finish any final batch of facts
                        if len(modelXbrl.facts) > 0:
                            factsToCheck = modelXbrl.facts.copy()
                            # can block facts deletion if required data not yet available, such as numeric unit for DpmDB
                            if _streamingValidateFactsPlugin:
                                for pluginMethod in pluginClassMethods("Streaming.ValidateFacts"):
                                    pluginMethod(instValidator, factsToCheck)
                            if _streamingFactsPlugin:
                                for pluginMethod in pluginClassMethods("Streaming.Facts"):
                                    pluginMethod(modelXbrl, factsToCheck)
                            for fact in factsToCheck:
                                dropFact(modelXbrl, fact, modelXbrl.facts)
                                #>>del parentMdlObj[parentMdlObj.index(fact)]
                            for cntx in contextsToDrop:
                                dropContext(modelXbrl, cntx)
                                #>>del parentMdlObj[parentMdlObj.index(cntx)]
                            for unit in unitsToDrop:
                                dropUnit(modelXbrl, unit)
                                #>>del parentMdlObj[parentMdlObj.index(unit)]
                            for footnoteLink in footnoteLinksToDrop:
                                dropFootnoteLink(modelXbrl, footnoteLink)
                                #>>del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            fact = cntx = unit = footnoteLink = None
                            del contextsToDrop[:]
                            del unitsToDrop[:]
                            del footnoteLinksToDrop[:]
                            del factsToCheck
                    # check remaining footnote refs
                    for footnoteLink in footnoteBuffer:
                        checkFootnoteHrefs(modelXbrl, footnoteLink)
                    pi = childProcessingInstruction(mdlObj, "xbrl-facts-check", reversed=True)
                    if pi is not None: # attrib is in .text, not attrib, no idea why!!!
                        _match = re.search("([\\w-]+)=[\"']([^\"']+)[\"']", pi.text)
                        if _match:
                            _matchGroups = _match.groups()
                            if len(_matchGroups) == 2:
                                if _matchGroups[0] == "sum-of-fact-md5s":
                                    try:
                                        expectedMd5 = Md5Sum(_matchGroups[1])
                                        if modelDocument._factsCheckMd5s != expectedMd5:
                                            modelXbrl.warning("streamingExtensions:xbrlFactsCheckWarning",
                                                    _("XBRL facts sum of md5s expected %(expectedMd5)s not matched to actual sum %(actualMd5Sum)s"),
                                                    modelObject=modelXbrl, expectedMd5=expectedMd5, actualMd5Sum=modelDocument._factsCheckMd5s)
                                        else:
                                            modelXbrl.info("info",
                                                    _("Successful XBRL facts sum of md5s."),
                                                    modelObject=modelXbrl)
                                    except ValueError:
                                        modelXbrl.error("streamingExtensions:xbrlFactsCheckError",
                                                _("Invalid sum-of-md5s %(sumOfMd5)s"),
                                                modelObject=modelXbrl, sumOfMd5=_matchGroups[1])
                    if _streamingValidateFactsPlugin:
                        for pluginMethod in pluginClassMethods("Streaming.ValidateFinish"):
                            pluginMethod(instValidator)
                    if _streamingFactsPlugin:
                        for pluginMethod in pluginClassMethods("Streaming.Finish"):
                            pluginMethod(modelXbrl)
            elif ns == XbrlConst.link:
                if ln in ("schemaRef", "linkbaseRef"):
                    modelDocument.discoverHref(mdlObj, urlRewritePluginClass="ModelDocument.InstanceSchemaRefRewriter")
                elif ln in ("roleRef", "arcroleRef"):
                    modelDocument.linkbaseDiscover((mdlObj,), inInstance=True)
                elif ln == "footnoteLink":
                    XmlValidate.validate(modelXbrl, mdlObj)
                    footnoteLinks = (mdlObj,)
                    modelDocument.linkbaseDiscover(footnoteLinks, inInstance=True)
                    if footnoteBufferLimit.is_finite():
                        footnoteBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkLinks(footnoteLinks)
                        if len(footnoteBuffer) > footnoteBufferLimit:
                            # check that hrefObjects for locators were all satisfied
                                # drop before addition as dropped may have same id as added
                            footnoteLink = footnoteBuffer.pop(0)
                            checkFootnoteHrefs(modelXbrl, footnoteLink)
                            if _streamingValidateFactsPlugin:
                                footnoteLinksToDrop.append(footnoteLink)
                            else:
                                dropFootnoteLink(modelXbrl, footnoteLink)
                                #>>del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            footnoteLink = None
                    footnoteLinks = None
            elif parentMdlObj.qname == XbrlConst.qnXbrliXbrl and isinstance(mdlObj, ModelFact):
                numRootFacts += 1
                XmlValidate.validate(modelXbrl, mdlObj)
                modelDocument.factDiscover(mdlObj, modelXbrl.facts)
                if factsCheckVersion:
                    factCheckFact(mdlObj)
                if _streamingExtensionsValidate or _streamingFactsPlugin or _streamingValidateFactsPlugin:
                    factsToCheck = (mdlObj,)  # validate current fact by itself
                    if _streamingExtensionsValidate:
                        instValidator.checkFacts(factsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkFactsDimensions(factsToCheck)
                    if _streamingFactsPlugin or _streamingValidateFactsPlugin:
                        # plugin attempts to process batch of all root facts not yet processed (not just current one)
                        # use batches of 1000 facts
                        if len(modelXbrl.facts) > 1000:
                            factsToCheck = modelXbrl.facts.copy()
                            # can block facts deletion if required data not yet available, such as numeric unit for DpmDB
                            if _streamingValidateFactsPlugin:
                                for pluginMethod in pluginClassMethods("Streaming.ValidateFacts"):
                                    pluginMethod(instValidator, factsToCheck)
                            if _streamingFactsPlugin:
                                for pluginMethod in pluginClassMethods("Streaming.Facts"):
                                    pluginMethod(modelXbrl, factsToCheck)
                            for fact in factsToCheck:
                                dropFact(modelXbrl, fact, modelXbrl.facts)
                                #>>del parentMdlObj[parentMdlObj.index(fact)]
                            for cntx in contextsToDrop:
                                dropContext(modelXbrl, cntx)
                                #>>del parentMdlObj[parentMdlObj.index(cntx)]
                            for unit in unitsToDrop:
                                dropUnit(modelXbrl, unit)
                                #>>del parentMdlObj[parentMdlObj.index(unit)]
                            for footnoteLink in footnoteLinksToDrop:
                                dropFootnoteLink(modelXbrl, footnoteLink)
                                #>>del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            fact = cntx = unit = footnoteLink = None
                            del contextsToDrop[:]
                            del unitsToDrop[:]
                            del footnoteLinksToDrop[:]
                            del factsToCheck # dereference fact or batch of facts
                    else:
                        dropFact(modelXbrl, mdlObj, modelXbrl.facts) # single fact has been processed
                        #>>del parentMdlObj[parentMdlObj.index(mdlObj)]
                if numRootFacts % 1000 == 0:
                    pass
                    #modelXbrl.profileActivity("... streaming fact {0} of {1} {2:.2f}%".format(self.numRootFacts, instInfoNumRootFacts, 
                    #                                                                          100.0 * self.numRootFacts / instInfoNumRootFacts), 
                    #                          minTimeToShow=20.0)
                    #gc.collect()
                    #sys.stdout.write ("\rAt fact {} of {} mem {}".format(numRootFacts, instInfoNumRootFacts, modelXbrl.modelManager.cntlr.memoryUsed))
    if mdlObj is not None:
        mdlObj.clear()
    del _parser, _parserLookupName, _parserLookupClass                
                
    if _streamingExtensionsValidate and validator is not None:
        _file.close()
        del instValidator
        validator.close()
        # track that modelXbrl has been validated by this streaming extension
        modelXbrl._streamingExtensionValidated = True
        
    modelXbrl.profileStat(_("streaming complete"), time.time() - startedAt)
    return modelXbrl.modelDocument
Example #6
0
def streamingExtensionsLoader(modelXbrl, mappedUri, filepath):
    # check if big instance and has header with an initial incomplete tree walk (just 2 elements
    def logSyntaxErrors(parsercontext):
        for error in parsercontext.error_log:
            modelXbrl.error("xmlSchema:syntax",
                    _("%(error)s, %(fileName)s, line %(line)s, column %(column)s, %(sourceAction)s source element"),
                    modelObject=modelDocument, fileName=os.path.basename(filepath), 
                    error=error.message, line=error.line, column=error.column, sourceAction="streaming")
    #### note: written for iterparse of lxml prior to version 3.3, otherwise rewrite to use XmlPullParser ###
    #### note: iterparse wants a binary file, but file is text mode
    _file, = modelXbrl.fileSource.file(filepath, binary=True)
    startedAt = time.time()
    modelXbrl.profileActivity()
    parsercontext = etree.iterparse(_file, events=("start","end"), huge_tree=True)
    foundInstance = False
    foundErrors = False
    streamingAspects = None
    numRootFacts1 = 0
    numElts = 0
    elt = None
    for event, elt in parsercontext:
        if event == "start":
            if elt.getparent() is not None:
                if elt.getparent().tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    if not foundInstance:
                        foundInstance = True
                        pi = precedingProcessingInstruction(elt, "xbrl-streamable-instance")
                        if pi is None:
                            break
                        else:
                            streamingAspects = dict(pi.attrib.copy())
                    if not elt.tag.startswith("{http://www.xbrl.org/"):
                        numRootFacts1 += 1
                        if numRootFacts1 % 1000 == 0:
                            modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0)
                elif not foundInstance:       
                    break
            elif elt.tag == "{http://www.xbrl.org/2003/instance}xbrl" and precedingProcessingInstruction(elt, "xbrl-streamable-instance") is not None:
                modelXbrl.error("streamingExtensions:headerMisplaced",
                        _("Header is misplaced: %(error)s, must follow xbrli:xbrl element"),
                        modelObject=elt)
        elif event == "end":
            elt.clear()
            numElts += 1
            if numElts % 1000 == 0 and elt.getparent() is not None:
                while elt.getprevious() is not None and elt.getparent() is not None:
                    del elt.getparent()[0]
    if elt is not None:
        elt.clear()
    _file.seek(0,io.SEEK_SET) # allow reparsing
    if not foundInstance or streamingAspects is None:
        del elt, parsercontext
        _file.close()
        return None
    modelXbrl.profileStat(_("streaming tree check"), time.time() - startedAt)
    startedAt = time.time()
    try:
        version = Decimal(streamingAspects.get("version"))
        if int(version) != 1:
            modelXbrl.error("streamingExtensions:unsupportedVersion",
                    _("Streaming version %(version)s, major version number must be 1"),
                    modelObject=elt, version=version)
            foundErrors = True
    except (InvalidOperation, OverflowError):
        modelXbrl.error("streamingExtensions:versionError",
                _("Version %(version)s, number must be 1.n"),
                modelObject=elt, version=streamingAspects.get("version", "(none)"))
        foundErrors = True
    for bufAspect in ("contextBuffer", "unitBuffer", "footnoteBuffer"):
        try:
            bufLimit = Decimal(streamingAspects.get(bufAspect, "INF"))
            if bufLimit < 1 or (bufLimit.is_finite() and bufLimit % 1 != 0):
                raise InvalidOperation
            elif bufAspect == "contextBuffer":
                contextBufferLimit = bufLimit
            elif bufAspect == "unitBuffer":
                unitBufferLimit = bufLimit
            elif bufAspect == "footnoteBuffer":
                footnoteBufferLimit = bufLimit
        except InvalidOperation:
            modelXbrl.error("streamingExtensions:valueError",
                    _("Streaming %(attrib)s %(value)s, number must be a positive integer or INF"),
                    modelObject=elt, attrib=bufAspect, value=streamingAspects.get(bufAspect))
            foundErrors = True
    if parsercontext.error_log:
        foundErrors = True
    logSyntaxErrors(parsercontext)
    
    if foundErrors:
        _file.close()
        return None
    parsercontext = etree.iterparse(_file, events=("start","end"), huge_tree=True)
    _parser, _parserLookupName, _parserLookupClass = parser(modelXbrl,filepath)
    eltMdlObjs = {}
    beforeInstanceStream = True
    validator = None
    contextBuffer = []
    unitBuffer = []
    footnoteBuffer = []
    factBuffer = []
    numFacts = numRootFacts2 = 1
    for event, elt in parsercontext:
        if event == "start":
            mdlObj = _parser.makeelement(elt.tag, attrib=elt.attrib, nsmap=elt.nsmap)
            mdlObj.sourceline = elt.sourceline
            eltMdlObjs[elt] = mdlObj
            if elt.getparent() is None:
                modelDocument = ModelDocument(modelXbrl, Type.INSTANCE, mappedUri, filepath, etree.ElementTree(mdlObj))
                modelDocument.xmlRootElement = mdlObj
                modelXbrl.modelDocument = modelDocument # needed for incremental validation
                mdlObj.init(modelDocument)
                modelXbrl.info("streamingExtensions:streaming",
                               _("Stream processing this instance."),
                               modelObject = modelDocument)    
            else:
                eltMdlObjs[elt.getparent()].append(mdlObj)
                mdlObj._init()
                ns = mdlObj.namespaceURI
                ln = mdlObj.localName
                if (beforeInstanceStream and (
                    (ns == XbrlConst.link and ln not in ("schemaRef", "linkbaseRef")) or
                    (ns == XbrlConst.xbrli and ln in ("context", "unit")) or
                    (ns not in (XbrlConst.link, XbrlConst.xbrli)))):
                    beforeInstanceStream = False
                    if _streamingExtensionsValidate:
                        validator = Validate(modelXbrl)
                        validator.instValidator.validate(modelXbrl, modelXbrl.modelManager.formulaOptions.typedParameters())
                    else: # need default dimensions
                        ValidateXbrlDimensions.loadDimensionDefaults(modelXbrl)
            mdlObj = None # deref
                        
        elif event == "end":
            mdlObj = eltMdlObjs.pop(elt)
            if elt.text: # text available after child nodes processed
                mdlObj.text = elt.text
            ns = mdlObj.namespaceURI
            ln = mdlObj.localName
            parentMdlObj = mdlObj.getparent()
            if ns == XbrlConst.xbrli:
                if ln == "context":
                    if mdlObj.get("sticky"):
                        del mdlObj.attrib["sticky"]
                        modelDocument.contextDiscover(mdlObj)
                    else:
                        if _streamingExtensionsValidate and len(contextBuffer) >= contextBufferLimit:
                            # drop before adding as dropped may have same id as added
                            cntx = contextBuffer.pop(0)
                            dropContext(modelXbrl, cntx)
                            del parentMdlObj[parentMdlObj.index(cntx)]
                            cntx = None
                        modelDocument.contextDiscover(mdlObj)
                        if contextBufferLimit.is_finite():
                            contextBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        contextsToCheck = (mdlObj,)
                        validator.instValidator.checkContexts(contextsToCheck)
                        if modelXbrl.hasXDT:
                            validator.instValidator.checkContextsDimensions(contextsToCheck)
                        del contextsToCheck # dereference
                elif ln == "unit":
                    if _streamingExtensionsValidate and len(unitBuffer) >= unitBufferLimit:
                        # drop before additing as dropped may have same id as added
                        unit = unitBuffer.pop(0)
                        dropUnit(modelXbrl, unit)
                        del parentMdlObj[parentMdlObj.index(unit)]
                        unit = None 
                    modelDocument.unitDiscover(mdlObj)
                    if unitBufferLimit.is_finite():
                        unitBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        validator.instValidator.checkUnits( (mdlObj,) )
                elif ln == "xbrl": # end of document
                    # check remaining footnote refs
                    for footnoteLink in footnoteBuffer:
                        checkFootnoteHrefs(modelXbrl, footnoteLink)
                elt.clear()
            elif ns == XbrlConst.link:
                if ln in ("schemaRef", "linkbaseRef"):
                    modelDocument.discoverHref(mdlObj)
                elif ln in ("roleRef", "arcroleRef"):
                    modelDocument.linkbaseDiscover((mdlObj,), inInstance=True)
                elif ln == "footnoteLink":
                    footnoteLinks = (mdlObj,)
                    modelDocument.linkbaseDiscover(footnoteLinks, inInstance=True)
                    if footnoteBufferLimit.is_finite():
                        footnoteBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        validator.instValidator.checkLinks(footnoteLinks)
                        if len(footnoteBuffer) > footnoteBufferLimit:
                            # check that hrefObjects for locators were all satisfied
                                # drop before addition as dropped may have same id as added
                            footnoteLink = footnoteBuffer.pop(0)
                            checkFootnoteHrefs(modelXbrl, footnoteLink)
                            dropFootnoteLink(modelXbrl, footnoteLink)
                            del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            footnoteLink = None
                    footnoteLinks = None
                elt.clear()
            elif parentMdlObj.qname == XbrlConst.qnXbrliXbrl:
                numRootFacts2 += 1
                modelDocument.factDiscover(mdlObj, modelXbrl.facts)
                XmlValidate.validate(modelXbrl, mdlObj)
                if _streamingExtensionsValidate:
                    factsToCheck = (mdlObj,)
                    validator.instValidator.checkFacts(factsToCheck)
                    if modelXbrl.hasXDT:
                        validator.instValidator.checkFactsDimensions(factsToCheck)
                    del factsToCheck
                    dropFact(modelXbrl, mdlObj, modelXbrl.facts)
                    del parentMdlObj[parentMdlObj.index(mdlObj)]
                if numRootFacts2 % 1000 == 0:
                    modelXbrl.profileActivity("... streaming fact {0} of {1} {2:.2f}%".format(numRootFacts2, numRootFacts1, 100.0 * numRootFacts2 / numRootFacts1), 
                                              minTimeToShow=20.0)
                # get rid of root element from iterparse's tree
                elt.clear()
                while elt.getprevious() is not None:  # cleans up any prior siblings
                    del elt.getparent()[0]
            mdlObj = None # deref
    logSyntaxErrors(parsercontext)
    del parsercontext
    if validator is not None:
        validator.close()
    _file.close()
    modelXbrl.profileStat(_("streaming complete"), time.time() - startedAt)
    return modelDocument
Example #7
0
class saxHandler(xml.sax.ContentHandler):
    def __init__(self, saxParser, modelXbrl, mappedUri, filepath):
        self.saxParser = saxParser
        self.modelXbrl = modelXbrl
        self.mappedUri = mappedUri
        self.filepath = filepath
        self.nsmap = {}
        self.prefixmap = {}
        self.qnameStack = []
        self.currentNamespaceURI = None
        self.modelDocument = None
        self.contextRefedFacts = defaultdict(list)
        self.unitRefedFacts = defaultdict(list)
        
    def qname(self, prefixedName):
        prefix, sep, localName = prefixedName.rpartition(":")
        return QName(prefix, self.nsmap.get(prefix,None), localName)
        
    def startPrefixMapping(self, prefix, uri):
        self.nsmap[prefix] = uri
        self.prefixmap[uri] = prefix
        
    def endPrefixMapping(self, prefix):
        if prefix in self.nsmap:
            self.prefixmap.pop(self.nsmap[prefix], None)
        self.nsmap.pop(prefix, None)
        
    def startElementNS(self, name, qname, attrs):
        namespaceURI, localName = name
        prefix = self.prefixmap.get(namespaceURI, None)
        thisQname = QName(prefix, namespaceURI, localName)
        if not self.qnameStack:
            if thisQname != XbrlConst.qnXbrliXbrl:  # not an instance document
                self.modelXbrl = None # dereference
                self.saxParser = None
                raise NotInstanceDocumentException()
            if self.modelDocument is None:
                self.modelDocument = ModelDocument(self.modelXbrl, Type.INSTANCE, self.mappedUri, self.filepath, None)
            parentQname = None
        elif self.qnameStack:
            parentElement = self.qnameStack[0]
            parentQname = parentElement.elementQname
        if namespaceURI in (XbrlConst.xbrli, XbrlConst.link):
            if parentQname == XbrlConst.qnXbrliContext:
                if localName == "identifier":
                    parentElement._entityIdentifier = (attrs.get(None,"scheme"), "")
                elif localName == "forever":
                    parentElement._isForeverPeriod = True
            else:
                if localName == "context":
                    thisModelObject = BigInstContext(self, thisQname, attrs)
                    self.modelXbrl.contexts[thisModelObject.id] = thisModelObject
                elif localName == "unit":
                    thisModelObject = BigInstUnit(self, thisQname, attrs)
                    self.modelXbrl.units[thisModelObject.id] = thisModelObject
                else:
                    thisModelObject = BigInstModelObject(self, thisQname, attrs)
                if localName in ("schemaRef", "linkbaseRef"):
                    self.modelDocument.discoverHref(thisModelObject)
                else:
                    self.qnameStack.insert(0, thisModelObject)
        elif parentQname:
            if parentQname == XbrlConst.qnXbrliContext:
                if namespaceURI == XbrlConst.xbrldi:
                    if localName == "explicitMember":
                        self.dimensionPrefixedName = attrs.get(None,"dimension")
            else: # might be a fact
                thisModelObject = BigInstFact(self, thisQname, attrs)
                if len(self.qnameStack) > 1:
                    tuple = self.qnameStack[0]
                    if not tuple.modelTupleFacts:
                        tuple.modelTupleFacts = [] # allocate unshared list
                    tuple.modelTupleFacts.append(thisModelObject)
                else:
                    self.modelXbrl.facts.append(thisModelObject)
                self.qnameStack.insert(0, thisModelObject)  # build content
        self.currentNamespaceURI = namespaceURI
        self.currentLocalName = localName
 
    def endElementNS(self, name, qname):
        thisQname = QName(None, *name)
        if self.qnameStack and self.qnameStack[0].elementQname == thisQname:
            elt = self.qnameStack.pop(0)
            if elt.namespaceURI == XbrlConst.xbrli:
                if elt.localName == "unit":
                    elt._measures = (sorted(elt._measures[0]), sorted(elt._measures[1]))
                    if elt.id in self.unitRefedFacts:
                        for fact in self.unitRefedFacts[elt.id]:
                            fact._unit = elt
                        del self.unitRefedFacts[elt.id]
                elif elt.localName == "context":
                    if elt.id in self.contextRefedFacts:
                        for fact in self.contextRefedFacts[elt.id]:
                            fact._context = elt
                        del self.contextRefedFacts[elt.id]
            self.currentNamespaceURI = None
            self.currentLocalName = None
            XmlValidate.validate(self.modelXbrl, elt, recurse=False)
            pass
            
    def characters(self, content):
        if self.currentNamespaceURI:
            elt = self.qnameStack[0]
            if self.currentNamespaceURI == XbrlConst.xbrli:
                s = content.strip()
                if s:
                    if self.currentLocalName == "identifier":
                        elt._entityIdentifier = (elt._entityIdentifier[0], elt._entityIdentifier[1] + content)
                    elif self.currentLocalName == "startDate":
                        elt._startDatetime = XmlUtil.datetimeValue(s)
                        elt._isStartEndPeriod = True
                    elif self.currentLocalName == "endDate":
                        elt._endDatetime = XmlUtil.datetimeValue(s, addOneDay=True)
                        elt._isStartEndPeriod = True
                    elif self.currentLocalName == "instant":
                        elt._endDatetime = elt._instantDatetime = XmlUtil.datetimeValue(s, addOneDay=True)
                        elt._isInstantPeriod = True
                    elif self.currentLocalName == "measure":
                        m = self.qname(content)
                        parentEltLocalName = self.qnameStack[1].localName
                        if parentEltLocalName == "unit":
                            self.qnameStack[1]._measures[0].append(m)
                        elif parentEltLocalName == "unitNumerator" and self.qnameStack[2].localName == "unit":
                            self.qnameStack[2]._measures[0].append(m)
                        elif parentEltLocalName == "unitDenominator" and self.qnameStack[2].localName == "unit":
                            self.qnameStack[2]._measures[1].append(m)
            elif self.currentNamespaceURI == XbrlConst.xbrldi:
                s = content.strip()
                if s:
                    if self.currentLocalName == "explicitMember" and self.dimensionPrefixedName:
                        dimQname = self.qname(self.currentLocalName)
                        memQname = self.qname(s)
                        dimConcept = self.modelXbrl.qnameConcepts.get(dimQname)
                        memConcept = self.modelXbrl.qnameConcepts.get(memQname)
            elif elt is not None:
                elt._elementText += content
        
    def skippedEntity(self, name):
        print ("skipped entity={0}".format(name))