def streamingExtensionsLoader(modelXbrl, mappedUri, filepath, *args, **kwargs): # check if big instance and has header with an initial incomplete tree walk (just 2 elements if not _streamingExtensionsCheck: return None # track whether modelXbrl has been validated by this streaming extension modelXbrl._streamingExtensionValidated = False def logSyntaxErrors(parsercontext): for error in parsercontext.error_log: modelXbrl.error( "xmlSchema:syntax", _("%(error)s, %(fileName)s, line %(line)s, column %(column)s, %(sourceAction)s source element" ), modelObject=modelXbrl, fileName=os.path.basename(filepath), error=error.message, line=error.line, column=error.column, sourceAction="streaming") #### note: written for iterparse of lxml prior to version 3.3, otherwise rewrite to use XmlPullParser ### #### note: iterparse wants a binary file, but file is text mode _file, = modelXbrl.fileSource.file(filepath, binary=True) startedAt = time.time() modelXbrl.profileActivity() ''' this seems twice as slow as iterparse class instInfoTarget(): def __init__(self, element_factory=None, parser=None): self.newTree = True self.streamingAspects = None self.foundInstance = False self.creationSoftwareComment = '' self.currentEltTag = "(before xbrli:xbrl)" self.numRootFacts = 0 def start(self, tag, attrib, nsmap=None): if self.newTree: if tag == "{http://www.xbrl.org/2003/instance}xbrl": self.foundInstance = True self.newTree = False else: # break raise NotInstanceDocumentException() elif not tag.startswith("{http://www.xbrl.org/"): self.numRootFacts += 1 if self.numRootFacts % 1000 == 0: modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0) self.currentEltTag = tag def end(self, tag): pass def data(self, data): pass def comment(self, text): if not self.foundInstance: # accumulate comments before xbrli:xbrl self.creationSoftwareComment += ('\n' if self.creationSoftwareComment else '') + text elif not self.creationSoftwareComment: self.creationSoftwareComment = text # or first comment after xbrli:xbrl def pi(self, target, data): if target == "xbrl-streamable-instance": if self.currentEltTag == "{http://www.xbrl.org/2003/instance}xbrl": self.streamingAspects = dict(etree.PI(target,data).attrib.copy()) # dereference target results else: modelXbrl.error("streamingExtensions:headerMisplaced", _("Header is misplaced: %(target)s, must follow xbrli:xbrl element but was found at %(element)s"), modelObject=modelXbrl, target=target, element=self.currentEltTag) def close(self): if not self.creationSoftwareComment: self.creationSoftwareComment = None return True instInfo = instInfoTarget() infoParser = etree.XMLParser(recover=True, huge_tree=True, target=instInfo) try: etree.parse(_file, parser=infoParser, base_url=filepath) except NotInstanceDocumentException: pass ''' foundErrors = False foundInstance = False streamingAspects = None creationSoftwareComment = None instInfoNumRootFacts = 0 numElts = 0 elt = None instInfoContext = etree.iterparse(_file, events=("start", "end"), huge_tree=True) try: for event, elt in instInfoContext: if event == "start": if elt.getparent() is not None: if elt.getparent( ).tag == "{http://www.xbrl.org/2003/instance}xbrl": if not foundInstance: foundInstance = True pi = precedingProcessingInstruction( elt, "xbrl-streamable-instance") if pi is None: break else: streamingAspects = dict(pi.attrib.copy()) if creationSoftwareComment is None: creationSoftwareComment = precedingComment( elt) if not elt.tag.startswith("{http://www.xbrl.org/"): instInfoNumRootFacts += 1 if instInfoNumRootFacts % 1000 == 0: modelXbrl.profileActivity( "... streaming tree check", minTimeToShow=20.0) elif not foundInstance: break elif elt.tag == "{http://www.xbrl.org/2003/instance}xbrl": creationSoftwareComment = precedingComment(elt) if precedingProcessingInstruction( elt, "xbrl-streamable-instance") is not None: modelXbrl.error( "streamingExtensions:headerMisplaced", _("Header is misplaced: %(error)s, must follow xbrli:xbrl element" ), modelObject=elt) elif event == "end": elt.clear() numElts += 1 if numElts % 1000 == 0 and elt.getparent() is not None: while elt.getprevious() is not None and elt.getparent( ) is not None: del elt.getparent()[0] except etree.XMLSyntaxError as err: modelXbrl.error("xmlSchema:syntax", _("Unrecoverable error: %(error)s"), error=err) _file.close() return err _file.seek(0, io.SEEK_SET) # allow reparsing if not foundInstance or streamingAspects is None: del elt _file.close() return None modelXbrl.profileStat(_("streaming tree check"), time.time() - startedAt) startedAt = time.time() try: version = Decimal(streamingAspects.get("version")) if int(version) != 1: modelXbrl.error( "streamingExtensions:unsupportedVersion", _("Streaming version %(version)s, major version number must be 1" ), modelObject=elt, version=version) foundErrors = True except (InvalidOperation, OverflowError): modelXbrl.error("streamingExtensions:versionError", _("Version %(version)s, number must be 1.n"), modelObject=elt, version=streamingAspects.get("version", "(none)")) foundErrors = True for bufAspect in ("contextBuffer", "unitBuffer", "footnoteBuffer"): try: bufLimit = Decimal(streamingAspects.get(bufAspect, "INF")) if bufLimit < 1 or (bufLimit.is_finite() and bufLimit % 1 != 0): raise InvalidOperation elif bufAspect == "contextBuffer": contextBufferLimit = bufLimit elif bufAspect == "unitBuffer": unitBufferLimit = bufLimit elif bufAspect == "footnoteBuffer": footnoteBufferLimit = bufLimit except InvalidOperation: modelXbrl.error( "streamingExtensions:valueError", _("Streaming %(attrib)s %(value)s, number must be a positive integer or INF" ), modelObject=elt, attrib=bufAspect, value=streamingAspects.get(bufAspect)) foundErrors = True if _streamingExtensionsValidate: incompatibleValidations = [] _validateDisclosureSystem = modelXbrl.modelManager.validateDisclosureSystem _disclosureSystem = modelXbrl.modelManager.disclosureSystem if _validateDisclosureSystem and _disclosureSystem.validationType == "EFM": incompatibleValidations.append("EFM") if _validateDisclosureSystem and _disclosureSystem.validationType == "GFM": incompatibleValidations.append("GFM") if _validateDisclosureSystem and _disclosureSystem.validationType == "HMRC": incompatibleValidations.append("HMRC") if modelXbrl.modelManager.validateCalcLB: incompatibleValidations.append("calculation LB") if incompatibleValidations: modelXbrl.error( "streamingExtensions:incompatibleValidation", _("Streaming instance validation does not support %(incompatibleValidations)s validation" ), modelObject=modelXbrl, incompatibleValidations=', '.join(incompatibleValidations)) foundErrors = True if instInfoContext.error_log: foundErrors = True logSyntaxErrors(instInfoContext) del instInfoContext # dereference for pluginMethod in pluginClassMethods("Streaming.BlockStreaming"): _blockingPluginName = pluginMethod(modelXbrl) if _blockingPluginName: # name of blocking plugin is returned modelXbrl.error( "streamingExtensions:incompatiblePlugIn", _("Streaming instance not supported by plugin %(blockingPlugin)s" ), modelObject=modelXbrl, blockingPlugin=_blockingPluginName) foundErrors = True if foundErrors: _file.close() return None _encoding = XmlUtil.encoding(_file.read(512)) _file.seek(0, io.SEEK_SET) # allow reparsing if _streamingExtensionsValidate: validator = Validate(modelXbrl) instValidator = validator.instValidator contextBuffer = [] contextsToDrop = [] unitBuffer = [] unitsToDrop = [] footnoteBuffer = [] footnoteLinksToDrop = [] _streamingFactsPlugin = any( True for pluginMethod in pluginClassMethods("Streaming.Facts")) _streamingValidateFactsPlugin = (_streamingExtensionsValidate and any( True for pluginMethod in pluginClassMethods("Streaming.ValidateFacts"))) ''' this is very much slower than iterparse class modelLoaderTarget(): def __init__(self, element_factory=None, parser=None): self.newTree = True self.currentMdlObj = None self.beforeInstanceStream = True self.beforeStartStreamingPlugin = True self.numRootFacts = 1 modelXbrl.makeelementParentModelObject = None modelXbrl.isStreamingMode = True self.factsCheckVersion = None self.factsCheckMd5s = Md5Sum() def start(self, tag, attrib, nsmap=None): modelXbrl.makeelementParentModelObject = self.currentMdlObj # pass parent to makeelement for ModelObjectFactory mdlObj = _parser.makeelement(tag, attrib=attrib, nsmap=nsmap) mdlObj.sourceline = 1 if self.newTree: self.newTree = False self.currentMdlObj = mdlObj modelDocument = ModelDocument(modelXbrl, Type.INSTANCE, mappedUri, filepath, mdlObj.getroottree()) modelXbrl.modelDocument = modelDocument # needed for incremental validation mdlObj.init(modelDocument) modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement modelDocument.parserLookupName = _parserLookupName modelDocument.parserLookupClass = _parserLookupClass modelDocument.xmlRootElement = mdlObj modelDocument.schemaLocationElements.add(mdlObj) modelDocument.documentEncoding = _encoding modelDocument._creationSoftwareComment = creationSoftwareComment modelXbrl.info("streamingExtensions:streaming", _("Stream processing this instance."), modelObject = modelDocument) else: self.currentMdlObj.append(mdlObj) self.currentMdlObj = mdlObj mdlObj._init() ns = mdlObj.namespaceURI ln = mdlObj.localName if (self.beforeInstanceStream and ( (ns == XbrlConst.link and ln not in ("schemaRef", "linkbaseRef")) or (ns == XbrlConst.xbrli and ln in ("context", "unit")) or (ns not in (XbrlConst.link, XbrlConst.xbrli)))): self.beforeInstanceStream = False if _streamingExtensionsValidate: instValidator.validate(modelXbrl, modelXbrl.modelManager.formulaOptions.typedParameters(modelXbrl.prefixedNamespaces)) else: # need default dimensions ValidateXbrlDimensions.loadDimensionDefaults(modelXbrl) elif not self.beforeInstanceStream and self.beforeStartStreamingPlugin: for pluginMethod in pluginClassMethods("Streaming.Start"): pluginMethod(modelXbrl) self.beforeStartStreamingPlugin = False return mdlObj def end(self, tag): modelDocument = modelXbrl.modelDocument mdlObj = self.currentMdlObj parentMdlObj = mdlObj.getparent() self.currentMdlObj = parentMdlObj ns = mdlObj.namespaceURI ln = mdlObj.localName if ns == XbrlConst.xbrli: if ln == "context": if mdlObj.get("sticky"): del mdlObj.attrib["sticky"] XmlValidate.validate(modelXbrl, mdlObj) modelDocument.contextDiscover(mdlObj) else: if _streamingExtensionsValidate and len(contextBuffer) >= contextBufferLimit: # drop before adding as dropped may have same id as added cntx = contextBuffer.pop(0) if _streamingValidateFactsPlugin: contextsToDrop.append(cntx) else: dropContext(modelXbrl, cntx) del parentMdlObj[parentMdlObj.index(cntx)] cntx = None #>>XmlValidate.validate(modelXbrl, mdlObj) #>>modelDocument.contextDiscover(mdlObj) if contextBufferLimit.is_finite(): contextBuffer.append(mdlObj) if _streamingExtensionsValidate: contextsToCheck = (mdlObj,) instValidator.checkContexts(contextsToCheck) if modelXbrl.hasXDT: instValidator.checkContextsDimensions(contextsToCheck) del contextsToCheck # dereference elif ln == "unit": if _streamingExtensionsValidate and len(unitBuffer) >= unitBufferLimit: # drop before adding as dropped may have same id as added unit = unitBuffer.pop(0) if _streamingValidateFactsPlugin: unitsToDrop.append(unit) else: dropUnit(modelXbrl, unit) del parentMdlObj[parentMdlObj.index(unit)] unit = None #>>XmlValidate.validate(modelXbrl, mdlObj) #>>modelDocument.unitDiscover(mdlObj) if unitBufferLimit.is_finite(): unitBuffer.append(mdlObj) if _streamingExtensionsValidate: instValidator.checkUnits( (mdlObj,) ) elif ln == "xbrl": # end of document # check remaining batched facts if any if _streamingValidateFactsPlugin: # plugin attempts to process batch of all root facts not yet processed (not just current one) # finish any final batch of facts if len(modelXbrl.facts) > 0: factsToCheck = modelXbrl.facts.copy() factsHaveBeenProcessed = True # can block facts deletion if required data not yet available, such as numeric unit for DpmDB for pluginMethod in pluginClassMethods("Streaming.ValidateFacts"): if not pluginMethod(modelXbrl, factsToCheck): factsHaveBeenProcessed = False if factsHaveBeenProcessed: for fact in factsToCheck: dropFact(modelXbrl, fact, modelXbrl.facts) del parentMdlObj[parentMdlObj.index(fact)] for cntx in contextsToDrop: dropContext(modelXbrl, cntx) del parentMdlObj[parentMdlObj.index(cntx)] for unit in unitsToDrop: dropUnit(modelXbrl, unit) del parentMdlObj[parentMdlObj.index(unit)] for footnoteLink in footnoteLinksToDrop: dropFootnoteLink(modelXbrl, footnoteLink) del parentMdlObj[parentMdlObj.index(footnoteLink)] fact = cntx = unit = footnoteLink = None del contextsToDrop[:] del unitsToDrop[:] del footnoteLinksToDrop[:] del factsToCheck # check remaining footnote refs for footnoteLink in footnoteBuffer: checkFootnoteHrefs(modelXbrl, footnoteLink) for pluginMethod in pluginClassMethods("Streaming.Finish"): pluginMethod(modelXbrl) elif ns == XbrlConst.link: if ln == "footnoteLink": XmlValidate.validate(modelXbrl, mdlObj) footnoteLinks = (mdlObj,) modelDocument.linkbaseDiscover(footnoteLinks, inInstance=True) if footnoteBufferLimit.is_finite(): footnoteBuffer.append(mdlObj) if _streamingExtensionsValidate: instValidator.checkLinks(footnoteLinks) if len(footnoteBuffer) > footnoteBufferLimit: # check that hrefObjects for locators were all satisfied # drop before addition as dropped may have same id as added footnoteLink = footnoteBuffer.pop(0) checkFootnoteHrefs(modelXbrl, footnoteLink) if _streamingValidateFactsPlugin: footnoteLinksToDrop.append(footnoteLink) else: dropFootnoteLink(modelXbrl, footnoteLink) del parentMdlObj[parentMdlObj.index(footnoteLink)] footnoteLink = None footnoteLinks = None elif ln in ("schemaRef", "linkbaseRef"): modelDocument.discoverHref(mdlObj) elif not modelXbrl.skipDTS: if ln in ("roleRef", "arcroleRef"): modelDocument.linkbaseDiscover((mdlObj,), inInstance=True) elif parentMdlObj.qname == XbrlConst.qnXbrliXbrl: self.numRootFacts += 1 #>>XmlValidate.validate(modelXbrl, mdlObj) #>>modelDocument.factDiscover(mdlObj, modelXbrl.facts) if self.factsCheckVersion: self.factCheckFact(mdlObj) if _streamingExtensionsValidate or _streamingValidateFactsPlugin: factsToCheck = (mdlObj,) # validate current fact by itself if _streamingExtensionsValidate: instValidator.checkFacts(factsToCheck) if modelXbrl.hasXDT: instValidator.checkFactsDimensions(factsToCheck) if _streamingValidateFactsPlugin: # plugin attempts to process batch of all root facts not yet processed (not just current one) # use batches of 1000 facts if len(modelXbrl.facts) > 1000: factsToCheck = modelXbrl.facts.copy() factsHaveBeenProcessed = True # can block facts deletion if required data not yet available, such as numeric unit for DpmDB for pluginMethod in pluginClassMethods("Streaming.ValidateFacts"): if not pluginMethod(modelXbrl, factsToCheck): factsHaveBeenProcessed = False if factsHaveBeenProcessed: for fact in factsToCheck: dropFact(modelXbrl, fact, modelXbrl.facts) del parentMdlObj[parentMdlObj.index(fact)] for cntx in contextsToDrop: dropContext(modelXbrl, cntx) del parentMdlObj[parentMdlObj.index(cntx)] for unit in unitsToDrop: dropUnit(modelXbrl, unit) del parentMdlObj[parentMdlObj.index(unit)] for footnoteLink in footnoteLinksToDrop: dropFootnoteLink(modelXbrl, footnoteLink) del parentMdlObj[parentMdlObj.index(footnoteLink)] fact = cntx = unit = footnoteLink = None del contextsToDrop[:] del unitsToDrop[:] del footnoteLinksToDrop[:] del factsToCheck # dereference fact or batch of facts else: dropFact(modelXbrl, mdlObj, modelXbrl.facts) # single fact has been processed del parentMdlObj[parentMdlObj.index(mdlObj)] if self.numRootFacts % 1000 == 0: pass #modelXbrl.profileActivity("... streaming fact {0} of {1} {2:.2f}%".format(self.numRootFacts, instInfoNumRootFacts, # 100.0 * self.numRootFacts / instInfoNumRootFacts), # minTimeToShow=20.0) gc.collect() sys.stdout.write ("\rAt fact {} of {} mem {}".format(self.numRootFacts, instInfoNumRootFacts, modelXbrl.modelManager.cntlr.memoryUsed)) return mdlObj def data(self, data): self.currentMdlObj.text = data def comment(self, text): pass def pi(self, target, data): if target == "xbrl-facts-check": _match = re.search("([\\w-]+)=[\"']([^\"']+)[\"']", data) if _match: _matchGroups = _match.groups() if len(_matchGroups) == 2: if _matchGroups[0] == "version": self.factsCheckVersion = _matchGroups[1] elif _matchGroups[0] == "sum-of-fact-md5s": try: expectedMd5 = Md5Sum(_matchGroups[1]) if self.factsCheckMd5s != expectedMd5: modelXbrl.warning("streamingExtensions:xbrlFactsCheckWarning", _("XBRL facts sum of md5s expected %(expectedMd5)s not matched to actual sum %(actualMd5Sum)s"), modelObject=modelXbrl, expectedMd5=expectedMd5, actualMd5Sum=self.factsCheckMd5s) else: modelXbrl.info("info", _("Successful XBRL facts sum of md5s."), modelObject=modelXbrl) except ValueError: modelXbrl.error("streamingExtensions:xbrlFactsCheckError", _("Invalid sum-of-md5s %(sumOfMd5)s"), modelObject=modelXbrl, sumOfMd5=_matchGroups[1]) def close(self): del modelXbrl.makeelementParentModelObject return None def factCheckFact(self, fact): self.factsCheckMd5s += fact.md5sum for _tupleFact in fact.modelTupleFacts: self.factCheckFact(_tupleFact) _parser, _parserLookupName, _parserLookupClass = parser(modelXbrl, filepath, target=modelLoaderTarget()) etree.parse(_file, parser=_parser, base_url=filepath) logSyntaxErrors(_parser) ''' # replace modelLoaderTarget with iterparse (as it now supports CustomElementClassLookup) streamingParserContext = etree.iterparse(_file, events=("start", "end"), huge_tree=True) from arelle.ModelObjectFactory import setParserElementClassLookup modelXbrl.isStreamingMode = True # must be set before setting element class lookup (_parser, _parserLookupName, _parserLookupClass) = setParserElementClassLookup(streamingParserContext, modelXbrl) foundInstance = False beforeInstanceStream = beforeStartStreamingPlugin = True numRootFacts = 0 factsCheckVersion = None def factCheckFact(fact): modelDocument._factsCheckMd5s += fact.md5sum for _tupleFact in fact.modelTupleFacts: factCheckFact(_tupleFact) for event, mdlObj in streamingParserContext: if event == "start": if mdlObj.tag == "{http://www.xbrl.org/2003/instance}xbrl": modelDocument = ModelDocument(modelXbrl, Type.INSTANCE, mappedUri, filepath, mdlObj.getroottree()) modelXbrl.modelDocument = modelDocument # needed for incremental validation mdlObj.init(modelDocument) modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement modelDocument.parserLookupName = _parserLookupName modelDocument.parserLookupClass = _parserLookupClass modelDocument.xmlRootElement = mdlObj modelDocument.schemaLocationElements.add(mdlObj) modelDocument.documentEncoding = _encoding modelDocument._creationSoftwareComment = precedingComment( mdlObj) modelDocument._factsCheckMd5s = Md5Sum() modelXbrl.info("streamingExtensions:streaming", _("Stream processing this instance."), modelObject=modelDocument) elif mdlObj.getparent() is not None: mdlObj._init() # requires discovery as part of start elements if mdlObj.getparent( ).tag == "{http://www.xbrl.org/2003/instance}xbrl": if not foundInstance: foundInstance = True pi = precedingProcessingInstruction( mdlObj, "xbrl-facts-check") if pi is not None: factsCheckVersion = pi.attrib.get("version", None) elif not foundInstance: break ns = mdlObj.qname.namespaceURI ln = mdlObj.qname.localName if beforeInstanceStream: if ((ns == XbrlConst.link and ln not in ("schemaRef", "linkbaseRef")) or (ns == XbrlConst.xbrli and ln in ("context", "unit")) or (ns not in (XbrlConst.link, XbrlConst.xbrli))): beforeInstanceStream = False if _streamingExtensionsValidate: instValidator.validate( modelXbrl, modelXbrl.modelManager.formulaOptions. typedParameters(modelXbrl.prefixedNamespaces)) else: # need default dimensions ValidateXbrlDimensions.loadDimensionDefaults( modelXbrl) elif not beforeInstanceStream and beforeStartStreamingPlugin: for pluginMethod in pluginClassMethods("Streaming.Start"): pluginMethod(modelXbrl) beforeStartStreamingPlugin = False elif event == "end": parentMdlObj = mdlObj.getparent() ns = mdlObj.namespaceURI ln = mdlObj.localName if ns == XbrlConst.xbrli: if ln == "context": if mdlObj.get("sticky"): del mdlObj.attrib["sticky"] XmlValidate.validate(modelXbrl, mdlObj) modelDocument.contextDiscover(mdlObj) else: if len(contextBuffer) >= contextBufferLimit: # drop before adding as dropped may have same id as added cntx = contextBuffer.pop(0) if _streamingFactsPlugin or _streamingValidateFactsPlugin: contextsToDrop.append(cntx) else: dropContext(modelXbrl, cntx) #>>del parentMdlObj[parentMdlObj.index(cntx)] cntx = None XmlValidate.validate(modelXbrl, mdlObj) modelDocument.contextDiscover(mdlObj) if contextBufferLimit.is_finite(): contextBuffer.append(mdlObj) if _streamingExtensionsValidate: contextsToCheck = (mdlObj, ) instValidator.checkContexts(contextsToCheck) if modelXbrl.hasXDT: instValidator.checkContextsDimensions( contextsToCheck) del contextsToCheck # dereference elif ln == "unit": if len(unitBuffer) >= unitBufferLimit: # drop before additing as dropped may have same id as added unit = unitBuffer.pop(0) if _streamingFactsPlugin or _streamingValidateFactsPlugin: unitsToDrop.append(unit) else: dropUnit(modelXbrl, unit) #>>del parentMdlObj[parentMdlObj.index(unit)] unit = None XmlValidate.validate(modelXbrl, mdlObj) modelDocument.unitDiscover(mdlObj) if unitBufferLimit.is_finite(): unitBuffer.append(mdlObj) if _streamingExtensionsValidate: instValidator.checkUnits((mdlObj, )) elif ln == "xbrl": # end of document # check remaining batched facts if any if _streamingFactsPlugin or _streamingValidateFactsPlugin: # plugin attempts to process batch of all root facts not yet processed (not just current one) # finish any final batch of facts if len(modelXbrl.facts) > 0: factsToCheck = modelXbrl.facts.copy() # can block facts deletion if required data not yet available, such as numeric unit for DpmDB if _streamingValidateFactsPlugin: for pluginMethod in pluginClassMethods( "Streaming.ValidateFacts"): pluginMethod(instValidator, factsToCheck) if _streamingFactsPlugin: for pluginMethod in pluginClassMethods( "Streaming.Facts"): pluginMethod(modelXbrl, factsToCheck) for fact in factsToCheck: dropFact(modelXbrl, fact, modelXbrl.facts) #>>del parentMdlObj[parentMdlObj.index(fact)] for cntx in contextsToDrop: dropContext(modelXbrl, cntx) #>>del parentMdlObj[parentMdlObj.index(cntx)] for unit in unitsToDrop: dropUnit(modelXbrl, unit) #>>del parentMdlObj[parentMdlObj.index(unit)] for footnoteLink in footnoteLinksToDrop: dropFootnoteLink(modelXbrl, footnoteLink) #>>del parentMdlObj[parentMdlObj.index(footnoteLink)] fact = cntx = unit = footnoteLink = None del contextsToDrop[:] del unitsToDrop[:] del footnoteLinksToDrop[:] del factsToCheck # check remaining footnote refs for footnoteLink in footnoteBuffer: checkFootnoteHrefs(modelXbrl, footnoteLink) pi = childProcessingInstruction(mdlObj, "xbrl-facts-check", reversed=True) if pi is not None: # attrib is in .text, not attrib, no idea why!!! _match = re.search("([\\w-]+)=[\"']([^\"']+)[\"']", pi.text) if _match: _matchGroups = _match.groups() if len(_matchGroups) == 2: if _matchGroups[0] == "sum-of-fact-md5s": try: expectedMd5 = Md5Sum(_matchGroups[1]) if modelDocument._factsCheckMd5s != expectedMd5: modelXbrl.warning( "streamingExtensions:xbrlFactsCheckWarning", _("XBRL facts sum of md5s expected %(expectedMd5)s not matched to actual sum %(actualMd5Sum)s" ), modelObject=modelXbrl, expectedMd5=expectedMd5, actualMd5Sum=modelDocument. _factsCheckMd5s) else: modelXbrl.info( "info", _("Successful XBRL facts sum of md5s." ), modelObject=modelXbrl) except ValueError: modelXbrl.error( "streamingExtensions:xbrlFactsCheckError", _("Invalid sum-of-md5s %(sumOfMd5)s" ), modelObject=modelXbrl, sumOfMd5=_matchGroups[1]) if _streamingValidateFactsPlugin: for pluginMethod in pluginClassMethods( "Streaming.ValidateFinish"): pluginMethod(instValidator) if _streamingFactsPlugin: for pluginMethod in pluginClassMethods( "Streaming.Finish"): pluginMethod(modelXbrl) elif ns == XbrlConst.link: if ln in ("schemaRef", "linkbaseRef"): modelDocument.discoverHref( mdlObj, urlRewritePluginClass= "ModelDocument.InstanceSchemaRefRewriter") elif ln in ("roleRef", "arcroleRef"): modelDocument.linkbaseDiscover((mdlObj, ), inInstance=True) elif ln == "footnoteLink": XmlValidate.validate(modelXbrl, mdlObj) footnoteLinks = (mdlObj, ) modelDocument.linkbaseDiscover(footnoteLinks, inInstance=True) if footnoteBufferLimit.is_finite(): footnoteBuffer.append(mdlObj) if _streamingExtensionsValidate: instValidator.checkLinks(footnoteLinks) if len(footnoteBuffer) > footnoteBufferLimit: # check that hrefObjects for locators were all satisfied # drop before addition as dropped may have same id as added footnoteLink = footnoteBuffer.pop(0) checkFootnoteHrefs(modelXbrl, footnoteLink) if _streamingValidateFactsPlugin: footnoteLinksToDrop.append(footnoteLink) else: dropFootnoteLink(modelXbrl, footnoteLink) #>>del parentMdlObj[parentMdlObj.index(footnoteLink)] footnoteLink = None footnoteLinks = None elif parentMdlObj.qname == XbrlConst.qnXbrliXbrl and isinstance( mdlObj, ModelFact): numRootFacts += 1 XmlValidate.validate(modelXbrl, mdlObj) modelDocument.factDiscover(mdlObj, modelXbrl.facts) if factsCheckVersion: factCheckFact(mdlObj) if _streamingExtensionsValidate or _streamingFactsPlugin or _streamingValidateFactsPlugin: factsToCheck = (mdlObj, ) # validate current fact by itself if _streamingExtensionsValidate: instValidator.checkFacts(factsToCheck) if modelXbrl.hasXDT: instValidator.checkFactsDimensions(factsToCheck) if _streamingFactsPlugin or _streamingValidateFactsPlugin: # plugin attempts to process batch of all root facts not yet processed (not just current one) # use batches of 1000 facts if len(modelXbrl.facts) > 1000: factsToCheck = modelXbrl.facts.copy() # can block facts deletion if required data not yet available, such as numeric unit for DpmDB if _streamingValidateFactsPlugin: for pluginMethod in pluginClassMethods( "Streaming.ValidateFacts"): pluginMethod(instValidator, factsToCheck) if _streamingFactsPlugin: for pluginMethod in pluginClassMethods( "Streaming.Facts"): pluginMethod(modelXbrl, factsToCheck) for fact in factsToCheck: dropFact(modelXbrl, fact, modelXbrl.facts) #>>del parentMdlObj[parentMdlObj.index(fact)] for cntx in contextsToDrop: dropContext(modelXbrl, cntx) #>>del parentMdlObj[parentMdlObj.index(cntx)] for unit in unitsToDrop: dropUnit(modelXbrl, unit) #>>del parentMdlObj[parentMdlObj.index(unit)] for footnoteLink in footnoteLinksToDrop: dropFootnoteLink(modelXbrl, footnoteLink) #>>del parentMdlObj[parentMdlObj.index(footnoteLink)] fact = cntx = unit = footnoteLink = None del contextsToDrop[:] del unitsToDrop[:] del footnoteLinksToDrop[:] del factsToCheck # dereference fact or batch of facts else: dropFact( modelXbrl, mdlObj, modelXbrl.facts) # single fact has been processed #>>del parentMdlObj[parentMdlObj.index(mdlObj)] if numRootFacts % 1000 == 0: pass #modelXbrl.profileActivity("... streaming fact {0} of {1} {2:.2f}%".format(self.numRootFacts, instInfoNumRootFacts, # 100.0 * self.numRootFacts / instInfoNumRootFacts), # minTimeToShow=20.0) #gc.collect() #sys.stdout.write ("\rAt fact {} of {} mem {}".format(numRootFacts, instInfoNumRootFacts, modelXbrl.modelManager.cntlr.memoryUsed)) if mdlObj is not None: mdlObj.clear() del _parser, _parserLookupName, _parserLookupClass if _streamingExtensionsValidate and validator is not None: _file.close() del instValidator validator.close() # track that modelXbrl has been validated by this streaming extension modelXbrl._streamingExtensionValidated = True modelXbrl.profileStat(_("streaming complete"), time.time() - startedAt) return modelXbrl.modelDocument
def streamingExtensionsLoader(modelXbrl, mappedUri, filepath): # check if big instance and has header with an initial incomplete tree walk (just 2 elements def logSyntaxErrors(parsercontext): for error in parsercontext.error_log: modelXbrl.error("xmlSchema:syntax", _("%(error)s, %(fileName)s, line %(line)s, column %(column)s, %(sourceAction)s source element"), modelObject=modelDocument, fileName=os.path.basename(filepath), error=error.message, line=error.line, column=error.column, sourceAction="streaming") #### note: written for iterparse of lxml prior to version 3.3, otherwise rewrite to use XmlPullParser ### #### note: iterparse wants a binary file, but file is text mode _file, = modelXbrl.fileSource.file(filepath, binary=True) startedAt = time.time() modelXbrl.profileActivity() parsercontext = etree.iterparse(_file, events=("start","end"), huge_tree=True) foundInstance = False foundErrors = False streamingAspects = None numRootFacts1 = 0 numElts = 0 elt = None for event, elt in parsercontext: if event == "start": if elt.getparent() is not None: if elt.getparent().tag == "{http://www.xbrl.org/2003/instance}xbrl": if not foundInstance: foundInstance = True pi = precedingProcessingInstruction(elt, "xbrl-streamable-instance") if pi is None: break else: streamingAspects = dict(pi.attrib.copy()) if not elt.tag.startswith("{http://www.xbrl.org/"): numRootFacts1 += 1 if numRootFacts1 % 1000 == 0: modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0) elif not foundInstance: break elif elt.tag == "{http://www.xbrl.org/2003/instance}xbrl" and precedingProcessingInstruction(elt, "xbrl-streamable-instance") is not None: modelXbrl.error("streamingExtensions:headerMisplaced", _("Header is misplaced: %(error)s, must follow xbrli:xbrl element"), modelObject=elt) elif event == "end": elt.clear() numElts += 1 if numElts % 1000 == 0 and elt.getparent() is not None: while elt.getprevious() is not None and elt.getparent() is not None: del elt.getparent()[0] if elt is not None: elt.clear() _file.seek(0,io.SEEK_SET) # allow reparsing if not foundInstance or streamingAspects is None: del elt, parsercontext _file.close() return None modelXbrl.profileStat(_("streaming tree check"), time.time() - startedAt) startedAt = time.time() try: version = Decimal(streamingAspects.get("version")) if int(version) != 1: modelXbrl.error("streamingExtensions:unsupportedVersion", _("Streaming version %(version)s, major version number must be 1"), modelObject=elt, version=version) foundErrors = True except (InvalidOperation, OverflowError): modelXbrl.error("streamingExtensions:versionError", _("Version %(version)s, number must be 1.n"), modelObject=elt, version=streamingAspects.get("version", "(none)")) foundErrors = True for bufAspect in ("contextBuffer", "unitBuffer", "footnoteBuffer"): try: bufLimit = Decimal(streamingAspects.get(bufAspect, "INF")) if bufLimit < 1 or (bufLimit.is_finite() and bufLimit % 1 != 0): raise InvalidOperation elif bufAspect == "contextBuffer": contextBufferLimit = bufLimit elif bufAspect == "unitBuffer": unitBufferLimit = bufLimit elif bufAspect == "footnoteBuffer": footnoteBufferLimit = bufLimit except InvalidOperation: modelXbrl.error("streamingExtensions:valueError", _("Streaming %(attrib)s %(value)s, number must be a positive integer or INF"), modelObject=elt, attrib=bufAspect, value=streamingAspects.get(bufAspect)) foundErrors = True if parsercontext.error_log: foundErrors = True logSyntaxErrors(parsercontext) if foundErrors: _file.close() return None parsercontext = etree.iterparse(_file, events=("start","end"), huge_tree=True) _parser, _parserLookupName, _parserLookupClass = parser(modelXbrl,filepath) eltMdlObjs = {} beforeInstanceStream = True validator = None contextBuffer = [] unitBuffer = [] footnoteBuffer = [] factBuffer = [] numFacts = numRootFacts2 = 1 for event, elt in parsercontext: if event == "start": mdlObj = _parser.makeelement(elt.tag, attrib=elt.attrib, nsmap=elt.nsmap) mdlObj.sourceline = elt.sourceline eltMdlObjs[elt] = mdlObj if elt.getparent() is None: modelDocument = ModelDocument(modelXbrl, Type.INSTANCE, mappedUri, filepath, etree.ElementTree(mdlObj)) modelDocument.xmlRootElement = mdlObj modelXbrl.modelDocument = modelDocument # needed for incremental validation mdlObj.init(modelDocument) modelXbrl.info("streamingExtensions:streaming", _("Stream processing this instance."), modelObject = modelDocument) else: eltMdlObjs[elt.getparent()].append(mdlObj) mdlObj._init() ns = mdlObj.namespaceURI ln = mdlObj.localName if (beforeInstanceStream and ( (ns == XbrlConst.link and ln not in ("schemaRef", "linkbaseRef")) or (ns == XbrlConst.xbrli and ln in ("context", "unit")) or (ns not in (XbrlConst.link, XbrlConst.xbrli)))): beforeInstanceStream = False if _streamingExtensionsValidate: validator = Validate(modelXbrl) validator.instValidator.validate(modelXbrl, modelXbrl.modelManager.formulaOptions.typedParameters()) else: # need default dimensions ValidateXbrlDimensions.loadDimensionDefaults(modelXbrl) mdlObj = None # deref elif event == "end": mdlObj = eltMdlObjs.pop(elt) if elt.text: # text available after child nodes processed mdlObj.text = elt.text ns = mdlObj.namespaceURI ln = mdlObj.localName parentMdlObj = mdlObj.getparent() if ns == XbrlConst.xbrli: if ln == "context": if mdlObj.get("sticky"): del mdlObj.attrib["sticky"] modelDocument.contextDiscover(mdlObj) else: if _streamingExtensionsValidate and len(contextBuffer) >= contextBufferLimit: # drop before adding as dropped may have same id as added cntx = contextBuffer.pop(0) dropContext(modelXbrl, cntx) del parentMdlObj[parentMdlObj.index(cntx)] cntx = None modelDocument.contextDiscover(mdlObj) if contextBufferLimit.is_finite(): contextBuffer.append(mdlObj) if _streamingExtensionsValidate: contextsToCheck = (mdlObj,) validator.instValidator.checkContexts(contextsToCheck) if modelXbrl.hasXDT: validator.instValidator.checkContextsDimensions(contextsToCheck) del contextsToCheck # dereference elif ln == "unit": if _streamingExtensionsValidate and len(unitBuffer) >= unitBufferLimit: # drop before additing as dropped may have same id as added unit = unitBuffer.pop(0) dropUnit(modelXbrl, unit) del parentMdlObj[parentMdlObj.index(unit)] unit = None modelDocument.unitDiscover(mdlObj) if unitBufferLimit.is_finite(): unitBuffer.append(mdlObj) if _streamingExtensionsValidate: validator.instValidator.checkUnits( (mdlObj,) ) elif ln == "xbrl": # end of document # check remaining footnote refs for footnoteLink in footnoteBuffer: checkFootnoteHrefs(modelXbrl, footnoteLink) elt.clear() elif ns == XbrlConst.link: if ln in ("schemaRef", "linkbaseRef"): modelDocument.discoverHref(mdlObj) elif ln in ("roleRef", "arcroleRef"): modelDocument.linkbaseDiscover((mdlObj,), inInstance=True) elif ln == "footnoteLink": footnoteLinks = (mdlObj,) modelDocument.linkbaseDiscover(footnoteLinks, inInstance=True) if footnoteBufferLimit.is_finite(): footnoteBuffer.append(mdlObj) if _streamingExtensionsValidate: validator.instValidator.checkLinks(footnoteLinks) if len(footnoteBuffer) > footnoteBufferLimit: # check that hrefObjects for locators were all satisfied # drop before addition as dropped may have same id as added footnoteLink = footnoteBuffer.pop(0) checkFootnoteHrefs(modelXbrl, footnoteLink) dropFootnoteLink(modelXbrl, footnoteLink) del parentMdlObj[parentMdlObj.index(footnoteLink)] footnoteLink = None footnoteLinks = None elt.clear() elif parentMdlObj.qname == XbrlConst.qnXbrliXbrl: numRootFacts2 += 1 modelDocument.factDiscover(mdlObj, modelXbrl.facts) XmlValidate.validate(modelXbrl, mdlObj) if _streamingExtensionsValidate: factsToCheck = (mdlObj,) validator.instValidator.checkFacts(factsToCheck) if modelXbrl.hasXDT: validator.instValidator.checkFactsDimensions(factsToCheck) del factsToCheck dropFact(modelXbrl, mdlObj, modelXbrl.facts) del parentMdlObj[parentMdlObj.index(mdlObj)] if numRootFacts2 % 1000 == 0: modelXbrl.profileActivity("... streaming fact {0} of {1} {2:.2f}%".format(numRootFacts2, numRootFacts1, 100.0 * numRootFacts2 / numRootFacts1), minTimeToShow=20.0) # get rid of root element from iterparse's tree elt.clear() while elt.getprevious() is not None: # cleans up any prior siblings del elt.getparent()[0] mdlObj = None # deref logSyntaxErrors(parsercontext) del parsercontext if validator is not None: validator.close() _file.close() modelXbrl.profileStat(_("streaming complete"), time.time() - startedAt) return modelDocument
def streamingExtensionsLoader(modelXbrl, mappedUri, filepath, **kwargs): # check if big instance and has header with an initial incomplete tree walk (just 2 elements if not _streamingExtensionsCheck: return None # track whether modelXbrl has been validated by this streaming extension modelXbrl._streamingExtensionValidated = False def logSyntaxErrors(parsercontext): for error in parsercontext.error_log: modelXbrl.error("xmlSchema:syntax", _("%(error)s, %(fileName)s, line %(line)s, column %(column)s, %(sourceAction)s source element"), modelObject=modelXbrl, fileName=os.path.basename(filepath), error=error.message, line=error.line, column=error.column, sourceAction="streaming") #### note: written for iterparse of lxml prior to version 3.3, otherwise rewrite to use XmlPullParser ### #### note: iterparse wants a binary file, but file is text mode _file, = modelXbrl.fileSource.file(filepath, binary=True) startedAt = time.time() modelXbrl.profileActivity() ''' this seems twice as slow as iterparse class instInfoTarget(): def __init__(self, element_factory=None, parser=None): self.newTree = True self.streamingAspects = None self.foundInstance = False self.creationSoftwareComment = '' self.currentEltTag = "(before xbrli:xbrl)" self.numRootFacts = 0 def start(self, tag, attrib, nsmap=None): if self.newTree: if tag == "{http://www.xbrl.org/2003/instance}xbrl": self.foundInstance = True self.newTree = False else: # break raise NotInstanceDocumentException() elif not tag.startswith("{http://www.xbrl.org/"): self.numRootFacts += 1 if self.numRootFacts % 1000 == 0: modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0) self.currentEltTag = tag def end(self, tag): pass def data(self, data): pass def comment(self, text): if not self.foundInstance: # accumulate comments before xbrli:xbrl self.creationSoftwareComment += ('\n' if self.creationSoftwareComment else '') + text elif not self.creationSoftwareComment: self.creationSoftwareComment = text # or first comment after xbrli:xbrl def pi(self, target, data): if target == "xbrl-streamable-instance": if self.currentEltTag == "{http://www.xbrl.org/2003/instance}xbrl": self.streamingAspects = dict(etree.PI(target,data).attrib.copy()) # dereference target results else: modelXbrl.error("streamingExtensions:headerMisplaced", _("Header is misplaced: %(target)s, must follow xbrli:xbrl element but was found at %(element)s"), modelObject=modelXbrl, target=target, element=self.currentEltTag) def close(self): if not self.creationSoftwareComment: self.creationSoftwareComment = None return True instInfo = instInfoTarget() infoParser = etree.XMLParser(recover=True, huge_tree=True, target=instInfo) try: etree.parse(_file, parser=infoParser, base_url=filepath) except NotInstanceDocumentException: pass ''' foundErrors = False foundInstance = False streamingAspects = None creationSoftwareComment = None instInfoNumRootFacts = 0 numElts = 0 elt = None instInfoContext = etree.iterparse(_file, events=("start","end"), huge_tree=True) for event, elt in instInfoContext: if event == "start": if elt.getparent() is not None: if elt.getparent().tag == "{http://www.xbrl.org/2003/instance}xbrl": if not foundInstance: foundInstance = True pi = precedingProcessingInstruction(elt, "xbrl-streamable-instance") if pi is None: break else: streamingAspects = dict(pi.attrib.copy()) if creationSoftwareComment is None: creationSoftwareComment = precedingComment(elt) if not elt.tag.startswith("{http://www.xbrl.org/"): instInfoNumRootFacts += 1 if instInfoNumRootFacts % 1000 == 0: modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0) elif not foundInstance: break elif elt.tag == "{http://www.xbrl.org/2003/instance}xbrl": creationSoftwareComment = precedingComment(elt) if precedingProcessingInstruction(elt, "xbrl-streamable-instance") is not None: modelXbrl.error("streamingExtensions:headerMisplaced", _("Header is misplaced: %(error)s, must follow xbrli:xbrl element"), modelObject=elt) elif event == "end": elt.clear() numElts += 1 if numElts % 1000 == 0 and elt.getparent() is not None: while elt.getprevious() is not None and elt.getparent() is not None: del elt.getparent()[0] if elt is not None: elt.clear() _file.seek(0,io.SEEK_SET) # allow reparsing if not foundInstance or streamingAspects is None: del elt _file.close() return None modelXbrl.profileStat(_("streaming tree check"), time.time() - startedAt) startedAt = time.time() try: version = Decimal(streamingAspects.get("version")) if int(version) != 1: modelXbrl.error("streamingExtensions:unsupportedVersion", _("Streaming version %(version)s, major version number must be 1"), modelObject=elt, version=version) foundErrors = True except (InvalidOperation, OverflowError): modelXbrl.error("streamingExtensions:versionError", _("Version %(version)s, number must be 1.n"), modelObject=elt, version=streamingAspects.get("version", "(none)")) foundErrors = True for bufAspect in ("contextBuffer", "unitBuffer", "footnoteBuffer"): try: bufLimit = Decimal(streamingAspects.get(bufAspect, "INF")) if bufLimit < 1 or (bufLimit.is_finite() and bufLimit % 1 != 0): raise InvalidOperation elif bufAspect == "contextBuffer": contextBufferLimit = bufLimit elif bufAspect == "unitBuffer": unitBufferLimit = bufLimit elif bufAspect == "footnoteBuffer": footnoteBufferLimit = bufLimit except InvalidOperation: modelXbrl.error("streamingExtensions:valueError", _("Streaming %(attrib)s %(value)s, number must be a positive integer or INF"), modelObject=elt, attrib=bufAspect, value=streamingAspects.get(bufAspect)) foundErrors = True if _streamingExtensionsValidate: incompatibleValidations = [] _validateDisclosureSystem = modelXbrl.modelManager.validateDisclosureSystem _disclosureSystem = modelXbrl.modelManager.disclosureSystem if _validateDisclosureSystem and _disclosureSystem.EFM: incompatibleValidations.append("EFM") if _validateDisclosureSystem and _disclosureSystem.GFM: incompatibleValidations.append("GFM") if _validateDisclosureSystem and _disclosureSystem.EBA: incompatibleValidations.append("EBA") if _validateDisclosureSystem and _disclosureSystem.HMRC: incompatibleValidations.append("EBA") if modelXbrl.modelManager.validateCalcLB: incompatibleValidations.append("calculation LB") if incompatibleValidations: modelXbrl.error("streamingExtensions:incompatibleValidation", _("Streaming instance validation does not support %(incompatibleValidations)s validation"), modelObject=modelXbrl, incompatibleValidations=', '.join(incompatibleValidations)) foundErrors = True if instInfoContext.error_log: foundErrors = True logSyntaxErrors(instInfoContext) del instInfoContext # dereference for pluginMethod in pluginClassMethods("Streaming.BlockStreaming"): _blockingPluginName = pluginMethod(modelXbrl) if _blockingPluginName: # name of blocking plugin is returned modelXbrl.error("streamingExtensions:incompatiblePlugIn", _("Streaming instance not supported by plugin %(blockingPlugin)s"), modelObject=modelXbrl, blockingPlugin=_blockingPluginName) foundErrors = True if foundErrors: _file.close() return None _encoding = XmlUtil.encoding(_file.read(512)) _file.seek(0,io.SEEK_SET) # allow reparsing if _streamingExtensionsValidate: validator = Validate(modelXbrl) instValidator = validator.instValidator eltMdlObjs = {} contextBuffer = [] unitBuffer = [] footnoteBuffer = [] factBuffer = [] numFacts = 1 _streamingValidateFactsPlugin = any(True for pluginMethod in pluginClassMethods("Streaming.ValidateFacts")) class modelLoaderTarget(): def __init__(self, element_factory=None, parser=None): self.newTree = True self.currentMdlObj = None self.beforeInstanceStream = True self.beforeStartStreamingPlugin = True self.numRootFacts = 1 modelXbrl.streamingParentModelObject = None modelXbrl.isStreamingMode = True def start(self, tag, attrib, nsmap=None): modelXbrl.streamingParentModelObject = self.currentMdlObj # pass parent to makeelement for ModelObjectFactory mdlObj = _parser.makeelement(tag, attrib=attrib, nsmap=nsmap) mdlObj.sourceline = 1 if self.newTree: self.newTree = False self.currentMdlObj = mdlObj modelDocument = ModelDocument(modelXbrl, Type.INSTANCE, mappedUri, filepath, mdlObj.getroottree()) modelXbrl.modelDocument = modelDocument # needed for incremental validation mdlObj.init(modelDocument) modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement modelDocument.parserLookupName = _parserLookupName modelDocument.parserLookupClass = _parserLookupClass modelDocument.xmlRootElement = mdlObj modelDocument.schemaLocationElements.add(mdlObj) modelDocument.documentEncoding = _encoding modelDocument._creationSoftwareComment = creationSoftwareComment modelXbrl.info("streamingExtensions:streaming", _("Stream processing this instance."), modelObject = modelDocument) else: self.currentMdlObj.append(mdlObj) self.currentMdlObj = mdlObj mdlObj._init() ns = mdlObj.namespaceURI ln = mdlObj.localName if (self.beforeInstanceStream and ( (ns == XbrlConst.link and ln not in ("schemaRef", "linkbaseRef")) or (ns == XbrlConst.xbrli and ln in ("context", "unit")) or (ns not in (XbrlConst.link, XbrlConst.xbrli)))): self.beforeInstanceStream = False if _streamingExtensionsValidate: instValidator.validate(modelXbrl, modelXbrl.modelManager.formulaOptions.typedParameters()) else: # need default dimensions ValidateXbrlDimensions.loadDimensionDefaults(modelXbrl) elif not self.beforeInstanceStream and self.beforeStartStreamingPlugin: for pluginMethod in pluginClassMethods("Streaming.Start"): pluginMethod(modelXbrl) self.beforeStartStreamingPlugin = False return mdlObj def end(self, tag): modelDocument = modelXbrl.modelDocument mdlObj = self.currentMdlObj parentMdlObj = mdlObj.getparent() self.currentMdlObj = parentMdlObj ns = mdlObj.namespaceURI ln = mdlObj.localName if ns == XbrlConst.xbrli: if ln == "context": if mdlObj.get("sticky"): del mdlObj.attrib["sticky"] XmlValidate.validate(modelXbrl, mdlObj) modelDocument.contextDiscover(mdlObj) else: if _streamingExtensionsValidate and len(contextBuffer) >= contextBufferLimit: # drop before adding as dropped may have same id as added cntx = contextBuffer.pop(0) dropContext(modelXbrl, cntx) del parentMdlObj[parentMdlObj.index(cntx)] cntx = None XmlValidate.validate(modelXbrl, mdlObj) modelDocument.contextDiscover(mdlObj) if contextBufferLimit.is_finite(): contextBuffer.append(mdlObj) if _streamingExtensionsValidate: contextsToCheck = (mdlObj,) instValidator.checkContexts(contextsToCheck) if modelXbrl.hasXDT: instValidator.checkContextsDimensions(contextsToCheck) del contextsToCheck # dereference elif ln == "unit": if _streamingExtensionsValidate and len(unitBuffer) >= unitBufferLimit: # drop before additing as dropped may have same id as added unit = unitBuffer.pop(0) dropUnit(modelXbrl, unit) del parentMdlObj[parentMdlObj.index(unit)] unit = None XmlValidate.validate(modelXbrl, mdlObj) modelDocument.unitDiscover(mdlObj) if unitBufferLimit.is_finite(): unitBuffer.append(mdlObj) if _streamingExtensionsValidate: instValidator.checkUnits( (mdlObj,) ) elif ln == "xbrl": # end of document # check remaining footnote refs for footnoteLink in footnoteBuffer: checkFootnoteHrefs(modelXbrl, footnoteLink) for pluginMethod in pluginClassMethods("Streaming.Finish"): pluginMethod(modelXbrl) elif ns == XbrlConst.link: if ln == "footnoteLink": XmlValidate.validate(modelXbrl, mdlObj) footnoteLinks = (mdlObj,) modelDocument.linkbaseDiscover(footnoteLinks, inInstance=True) if footnoteBufferLimit.is_finite(): footnoteBuffer.append(mdlObj) if _streamingExtensionsValidate: instValidator.checkLinks(footnoteLinks) if len(footnoteBuffer) > footnoteBufferLimit: # check that hrefObjects for locators were all satisfied # drop before addition as dropped may have same id as added footnoteLink = footnoteBuffer.pop(0) checkFootnoteHrefs(modelXbrl, footnoteLink) dropFootnoteLink(modelXbrl, footnoteLink) del parentMdlObj[parentMdlObj.index(footnoteLink)] footnoteLink = None footnoteLinks = None elif ln in ("schemaRef", "linkbaseRef"): modelDocument.discoverHref(mdlObj) elif not modelXbrl.skipDTS: if ln in ("roleRef", "arcroleRef"): modelDocument.linkbaseDiscover((mdlObj,), inInstance=True) elif parentMdlObj.qname == XbrlConst.qnXbrliXbrl: self.numRootFacts += 1 XmlValidate.validate(modelXbrl, mdlObj) modelDocument.factDiscover(mdlObj, modelXbrl.facts) if _streamingExtensionsValidate or _streamingValidateFactsPlugin: factsToCheck = (mdlObj,) # validate current fact by itself if _streamingExtensionsValidate: instValidator.checkFacts(factsToCheck) if modelXbrl.hasXDT: instValidator.checkFactsDimensions(factsToCheck) if _streamingValidateFactsPlugin: # plugin attempts to process batch of all root facts not yet processed (not just current one) factsToCheck = modelXbrl.facts.copy() factsHaveBeenProcessed = True # can block facts deletion if required data not yet available, such as numeric unit for DpmDB for pluginMethod in pluginClassMethods("Streaming.ValidateFacts"): if not pluginMethod(modelXbrl, factsToCheck): factsHaveBeenProcessed = False if factsHaveBeenProcessed: for fact in factsToCheck: dropFact(modelXbrl, fact, modelXbrl.facts) del parentMdlObj[parentMdlObj.index(fact)] else: dropFact(modelXbrl, mdlObj, modelXbrl.facts) # single fact has been processed del parentMdlObj[parentMdlObj.index(mdlObj)] del factsToCheck # dereference fact or batch of facts if self.numRootFacts % 1000 == 0: modelXbrl.profileActivity("... streaming fact {0} of {1} {2:.2f}%".format(self.numRootFacts, instInfoNumRootFacts, 100.0 * self.numRootFacts / instInfoNumRootFacts), minTimeToShow=20.0) return mdlObj def data(self, data): self.currentMdlObj.text = data def comment(self, text): pass def pi(self, target, data): pass def close(self): del modelXbrl.streamingParentModelObject return None _parser, _parserLookupName, _parserLookupClass = parser(modelXbrl, filepath, target=modelLoaderTarget()) etree.parse(_file, parser=_parser, base_url=filepath) logSyntaxErrors(_parser) if _streamingExtensionsValidate and validator is not None: _file.close() del instValidator validator.close() # track that modelXbrl has been validated by this streaming extension modelXbrl._streamingExtensionValidated = True modelXbrl.profileStat(_("streaming complete"), time.time() - startedAt) return modelXbrl.modelDocument
def streamingExtensionsLoader(modelXbrl, mappedUri, filepath, **kwargs): # check if big instance and has header with an initial incomplete tree walk (just 2 elements if not _streamingExtensionsCheck: return None # track whether modelXbrl has been validated by this streaming extension modelXbrl._streamingExtensionValidated = False def logSyntaxErrors(parsercontext): for error in parsercontext.error_log: modelXbrl.error("xmlSchema:syntax", _("%(error)s, %(fileName)s, line %(line)s, column %(column)s, %(sourceAction)s source element"), modelObject=modelXbrl, fileName=os.path.basename(filepath), error=error.message, line=error.line, column=error.column, sourceAction="streaming") #### note: written for iterparse of lxml prior to version 3.3, otherwise rewrite to use XmlPullParser ### #### note: iterparse wants a binary file, but file is text mode _file, = modelXbrl.fileSource.file(filepath, binary=True) startedAt = time.time() modelXbrl.profileActivity() ''' this seems twice as slow as iterparse class instInfoTarget(): def __init__(self, element_factory=None, parser=None): self.newTree = True self.streamingAspects = None self.foundInstance = False self.creationSoftwareComment = '' self.currentEltTag = "(before xbrli:xbrl)" self.numRootFacts = 0 def start(self, tag, attrib, nsmap=None): if self.newTree: if tag == "{http://www.xbrl.org/2003/instance}xbrl": self.foundInstance = True self.newTree = False else: # break raise NotInstanceDocumentException() elif not tag.startswith("{http://www.xbrl.org/"): self.numRootFacts += 1 if self.numRootFacts % 1000 == 0: modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0) self.currentEltTag = tag def end(self, tag): pass def data(self, data): pass def comment(self, text): if not self.foundInstance: # accumulate comments before xbrli:xbrl self.creationSoftwareComment += ('\n' if self.creationSoftwareComment else '') + text elif not self.creationSoftwareComment: self.creationSoftwareComment = text # or first comment after xbrli:xbrl def pi(self, target, data): if target == "xbrl-streamable-instance": if self.currentEltTag == "{http://www.xbrl.org/2003/instance}xbrl": self.streamingAspects = dict(etree.PI(target,data).attrib.copy()) # dereference target results else: modelXbrl.error("streamingExtensions:headerMisplaced", _("Header is misplaced: %(target)s, must follow xbrli:xbrl element but was found at %(element)s"), modelObject=modelXbrl, target=target, element=self.currentEltTag) def close(self): if not self.creationSoftwareComment: self.creationSoftwareComment = None return True instInfo = instInfoTarget() infoParser = etree.XMLParser(recover=True, huge_tree=True, target=instInfo) try: etree.parse(_file, parser=infoParser, base_url=filepath) except NotInstanceDocumentException: pass ''' foundErrors = False foundInstance = False streamingAspects = None creationSoftwareComment = None instInfoNumRootFacts = 0 numElts = 0 elt = None instInfoContext = etree.iterparse(_file, events=("start","end"), huge_tree=True) for event, elt in instInfoContext: if event == "start": if elt.getparent() is not None: if elt.getparent().tag == "{http://www.xbrl.org/2003/instance}xbrl": if not foundInstance: foundInstance = True pi = precedingProcessingInstruction(elt, "xbrl-streamable-instance") if pi is None: break else: streamingAspects = dict(pi.attrib.copy()) if creationSoftwareComment is None: creationSoftwareComment = precedingComment(elt) if not elt.tag.startswith("{http://www.xbrl.org/"): instInfoNumRootFacts += 1 if instInfoNumRootFacts % 1000 == 0: modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0) elif not foundInstance: break elif elt.tag == "{http://www.xbrl.org/2003/instance}xbrl": creationSoftwareComment = precedingComment(elt) if precedingProcessingInstruction(elt, "xbrl-streamable-instance") is not None: modelXbrl.error("streamingExtensions:headerMisplaced", _("Header is misplaced: %(error)s, must follow xbrli:xbrl element"), modelObject=elt) elif event == "end": elt.clear() numElts += 1 if numElts % 1000 == 0 and elt.getparent() is not None: while elt.getprevious() is not None and elt.getparent() is not None: del elt.getparent()[0] if elt is not None: elt.clear() _file.seek(0,io.SEEK_SET) # allow reparsing if not foundInstance or streamingAspects is None: del elt _file.close() return None modelXbrl.profileStat(_("streaming tree check"), time.time() - startedAt) startedAt = time.time() try: version = Decimal(streamingAspects.get("version")) if int(version) != 1: modelXbrl.error("streamingExtensions:unsupportedVersion", _("Streaming version %(version)s, major version number must be 1"), modelObject=elt, version=version) foundErrors = True except (InvalidOperation, OverflowError): modelXbrl.error("streamingExtensions:versionError", _("Version %(version)s, number must be 1.n"), modelObject=elt, version=streamingAspects.get("version", "(none)")) foundErrors = True for bufAspect in ("contextBuffer", "unitBuffer", "footnoteBuffer"): try: bufLimit = Decimal(streamingAspects.get(bufAspect, "INF")) if bufLimit < 1 or (bufLimit.is_finite() and bufLimit % 1 != 0): raise InvalidOperation elif bufAspect == "contextBuffer": contextBufferLimit = bufLimit elif bufAspect == "unitBuffer": unitBufferLimit = bufLimit elif bufAspect == "footnoteBuffer": footnoteBufferLimit = bufLimit except InvalidOperation: modelXbrl.error("streamingExtensions:valueError", _("Streaming %(attrib)s %(value)s, number must be a positive integer or INF"), modelObject=elt, attrib=bufAspect, value=streamingAspects.get(bufAspect)) foundErrors = True if _streamingExtensionsValidate: incompatibleValidations = [] _validateDisclosureSystem = modelXbrl.modelManager.validateDisclosureSystem _disclosureSystem = modelXbrl.modelManager.disclosureSystem if _validateDisclosureSystem and _disclosureSystem.EFM: incompatibleValidations.append("EFM") if _validateDisclosureSystem and _disclosureSystem.GFM: incompatibleValidations.append("GFM") if _validateDisclosureSystem and _disclosureSystem.EBA: incompatibleValidations.append("EBA") if _validateDisclosureSystem and _disclosureSystem.HMRC: incompatibleValidations.append("EBA") if modelXbrl.modelManager.validateCalcLB: incompatibleValidations.append("calculation LB") if incompatibleValidations: modelXbrl.error("streamingExtensions:incompatibleValidation", _("Streaming instance validation does not support %(incompatibleValidations)s validation"), modelObject=modelXbrl, incompatibleValidations=', '.join(incompatibleValidations)) foundErrors = True if instInfoContext.error_log: foundErrors = True logSyntaxErrors(instInfoContext) del instInfoContext # dereference if foundErrors: _file.close() return None _encoding = XmlUtil.encoding(_file.read(512)) _file.seek(0,io.SEEK_SET) # allow reparsing if _streamingExtensionsValidate: validator = Validate(modelXbrl) instValidator = validator.instValidator eltMdlObjs = {} contextBuffer = [] unitBuffer = [] footnoteBuffer = [] factBuffer = [] numFacts = 1 class modelLoaderTarget(): def __init__(self, element_factory=None, parser=None): self.newTree = True self.currentMdlObj = None self.beforeInstanceStream = True self.numRootFacts = 1 def start(self, tag, attrib, nsmap=None): mdlObj = _parser.makeelement(tag, attrib=attrib, nsmap=nsmap) mdlObj.sourceline = 1 if self.newTree: self.newTree = False self.currentMdlObj = mdlObj modelDocument = ModelDocument(modelXbrl, Type.INSTANCE, mappedUri, filepath, mdlObj.getroottree()) modelXbrl.modelDocument = modelDocument # needed for incremental validation mdlObj.init(modelDocument) modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement modelDocument.parserLookupName = _parserLookupName modelDocument.parserLookupClass = _parserLookupClass modelDocument.xmlRootElement = mdlObj modelDocument.schemaLocationElements.add(mdlObj) modelDocument.documentEncoding = _encoding modelDocument._creationSoftwareComment = creationSoftwareComment modelXbrl.info("streamingExtensions:streaming", _("Stream processing this instance."), modelObject = modelDocument) else: self.currentMdlObj.append(mdlObj) self.currentMdlObj = mdlObj mdlObj._init() ns = mdlObj.namespaceURI ln = mdlObj.localName if (self.beforeInstanceStream and ( (ns == XbrlConst.link and ln not in ("schemaRef", "linkbaseRef")) or (ns == XbrlConst.xbrli and ln in ("context", "unit")) or (ns not in (XbrlConst.link, XbrlConst.xbrli)))): self.beforeInstanceStream = False if _streamingExtensionsValidate: instValidator.validate(modelXbrl, modelXbrl.modelManager.formulaOptions.typedParameters()) else: # need default dimensions ValidateXbrlDimensions.loadDimensionDefaults(modelXbrl) return mdlObj def end(self, tag): modelDocument = modelXbrl.modelDocument mdlObj = self.currentMdlObj parentMdlObj = mdlObj.getparent() self.currentMdlObj = parentMdlObj ns = mdlObj.namespaceURI ln = mdlObj.localName if ns == XbrlConst.xbrli: if ln == "context": if mdlObj.get("sticky"): del mdlObj.attrib["sticky"] XmlValidate.validate(modelXbrl, mdlObj) modelDocument.contextDiscover(mdlObj) else: if _streamingExtensionsValidate and len(contextBuffer) >= contextBufferLimit: # drop before adding as dropped may have same id as added cntx = contextBuffer.pop(0) dropContext(modelXbrl, cntx) del parentMdlObj[parentMdlObj.index(cntx)] cntx = None XmlValidate.validate(modelXbrl, mdlObj) modelDocument.contextDiscover(mdlObj) if contextBufferLimit.is_finite(): contextBuffer.append(mdlObj) if _streamingExtensionsValidate: contextsToCheck = (mdlObj,) instValidator.checkContexts(contextsToCheck) if modelXbrl.hasXDT: instValidator.checkContextsDimensions(contextsToCheck) del contextsToCheck # dereference elif ln == "unit": if _streamingExtensionsValidate and len(unitBuffer) >= unitBufferLimit: # drop before additing as dropped may have same id as added unit = unitBuffer.pop(0) dropUnit(modelXbrl, unit) del parentMdlObj[parentMdlObj.index(unit)] unit = None XmlValidate.validate(modelXbrl, mdlObj) modelDocument.unitDiscover(mdlObj) if unitBufferLimit.is_finite(): unitBuffer.append(mdlObj) if _streamingExtensionsValidate: instValidator.checkUnits( (mdlObj,) ) elif ln == "xbrl": # end of document # check remaining footnote refs for footnoteLink in footnoteBuffer: checkFootnoteHrefs(modelXbrl, footnoteLink) elif ns == XbrlConst.link: if ln == "footnoteLink": XmlValidate.validate(modelXbrl, mdlObj) footnoteLinks = (mdlObj,) modelDocument.linkbaseDiscover(footnoteLinks, inInstance=True) if footnoteBufferLimit.is_finite(): footnoteBuffer.append(mdlObj) if _streamingExtensionsValidate: instValidator.checkLinks(footnoteLinks) if len(footnoteBuffer) > footnoteBufferLimit: # check that hrefObjects for locators were all satisfied # drop before addition as dropped may have same id as added footnoteLink = footnoteBuffer.pop(0) checkFootnoteHrefs(modelXbrl, footnoteLink) dropFootnoteLink(modelXbrl, footnoteLink) del parentMdlObj[parentMdlObj.index(footnoteLink)] footnoteLink = None footnoteLinks = None elif ln in ("schemaRef", "linkbaseRef"): modelDocument.discoverHref(mdlObj) elif not modelXbrl.skipDTS: if ln in ("roleRef", "arcroleRef"): modelDocument.linkbaseDiscover((mdlObj,), inInstance=True) elif parentMdlObj.qname == XbrlConst.qnXbrliXbrl: self.numRootFacts += 1 XmlValidate.validate(modelXbrl, mdlObj) modelDocument.factDiscover(mdlObj, modelXbrl.facts) if _streamingExtensionsValidate: factsToCheck = (mdlObj,) instValidator.checkFacts(factsToCheck) if modelXbrl.hasXDT: instValidator.checkFactsDimensions(factsToCheck) del factsToCheck dropFact(modelXbrl, mdlObj, modelXbrl.facts) del parentMdlObj[parentMdlObj.index(mdlObj)] if self.numRootFacts % 1000 == 0: modelXbrl.profileActivity("... streaming fact {0} of {1} {2:.2f}%".format(self.numRootFacts, instInfoNumRootFacts, 100.0 * self.numRootFacts / instInfoNumRootFacts), minTimeToShow=20.0) return mdlObj def data(self, data): self.currentMdlObj.text = data def comment(self, text): pass def pi(self, target, data): pass def close(self): return None _parser, _parserLookupName, _parserLookupClass = parser(modelXbrl, filepath, target=modelLoaderTarget()) etree.parse(_file, parser=_parser, base_url=filepath) logSyntaxErrors(_parser) _file.close() if _streamingExtensionsValidate and validator is not None: del instValidator validator.close() # track that modelXbrl has been validated by this streaming extension modelXbrl._streamingExtensionValidated = True modelXbrl.profileStat(_("streaming complete"), time.time() - startedAt) return modelXbrl.modelDocument