コード例 #1
0
ファイル: FileSource.py プロジェクト: marado/Arelle
 def file(self,filepath):
     archiveFileSource = self.fileSourceContainingFilepath(filepath)
     if archiveFileSource is not None:
         if filepath.startswith(archiveFileSource.basefile):
             archiveFileName = filepath[len(archiveFileSource.basefile) + 1:]
         else: # filepath.startswith(self.baseurl)
             archiveFileName = filepath[len(archiveFileSource.baseurl) + 1:]
         if archiveFileSource.isZip:
             b = archiveFileSource.fs.read(archiveFileName.replace("\\","/"))
             encoding = XmlUtil.encoding(b)
             return (io.TextIOWrapper(
                     io.BytesIO(b), 
                     encoding=encoding), encoding)
         elif archiveFileSource.isXfd:
             for data in archiveFileSource.xfdDocument.iter(tag="data"):
                 outfn = data.findtext("filename")
                 if outfn == archiveFileName:
                     b64data = data.findtext("mimedata")
                     if b64data:
                         # convert to bytes
                         #byteData = []
                         #for c in b64data:
                         #    byteData.append(ord(c))
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[2] == 191:
                             start = 3;
                             length = len(b) - 3;
                             b = b[start:start + length]
                         else:
                             start = 0;
                             length = len(b);
                         # pass back as ascii
                         #str = ""
                         #for bChar in b[start:start + length]:
                         #    str += chr( bChar )
                         #return str
                         return (io.TextIOWrapper(
                             io.BytesIO(b), 
                             encoding=XmlUtil.encoding(b)), "latin-1")
             return (None,None)
     # check encoding
     with open(filepath, 'rb') as fb:
         hdrBytes = fb.peek(512)
         encoding = XmlUtil.encoding(hdrBytes)
         if encoding.lower() in ('utf-8','utf8'):
             text = None
         else:
             text = fb.read().decode(encoding)
         # allow filepath to close
     # this may not be needed for Mac or Linux, needs confirmation!!!
     if text is None:  # ok to read as utf-8
         return (open(filepath, 'rt', encoding='utf-8'), encoding)
     else:
         # strip XML declaration
         xmlDeclarationMatch = XMLdeclaration.search(text)
         if xmlDeclarationMatch: # remove it for lxml
             start,end = xmlDeclarationMatch.span()
             text = text[0:start] + text[end:]
         return (io.StringIO(initial_value=text), encoding)
コード例 #2
0
def openFileStream(cntlr, filepath, mode='r', encoding=None):
    if isHttpUrl(filepath) and cntlr:
        filepath = cntlr.webCache.getfilename(filepath)
    # file path may be server (or memcache) or local file system
    if filepath.startswith(SERVER_WEB_CACHE) and cntlr:
        filestream = None
        cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\", "/")
        if cntlr.isGAE:  # check if in memcache
            cachedBytes = gaeGet(cacheKey)
            if cachedBytes:
                filestream = io.BytesIO(cachedBytes)
        if filestream is None:
            filestream = io.BytesIO()
            cntlr.webCache.retrieve(
                cntlr.webCache.cacheFilepathToUrl(filepath),
                filestream=filestream)
            if cntlr.isGAE:
                gaeSet(cacheKey, filestream.getvalue())
        if mode.endswith('t') or encoding:
            contents = filestream.getvalue()
            filestream.close()
            filestream = FileNamedStringIO(
                filepath, contents.decode(encoding or 'utf-8'))
        return filestream
    # local file system
    elif encoding is None and 'b' not in mode:
        openedFileStream = io.open(filepath, mode='rb')
        hdrBytes = openedFileStream.read(512)
        encoding = XmlUtil.encoding(hdrBytes, default=None)
        openedFileStream.close()
        return io.open(filepath, mode=mode, encoding=encoding)
    else:
        # local file system
        return io.open(filepath, mode=mode, encoding=encoding)
コード例 #3
0
ファイル: FileSource.py プロジェクト: andygreener/Arelle
def openXmlFileStream(cntlr, filepath, stripDeclaration=False):
    # returns tuple: (fileStream, encoding)
    openedFileStream = openFileStream(cntlr, filepath, 'rb')
    # check encoding
    hdrBytes = openedFileStream.read(512)
    encoding = XmlUtil.encoding(hdrBytes)
    if encoding.lower() in ('utf-8', 'utf8') and (
            cntlr is None or not cntlr.isGAE) and not stripDeclaration:
        text = None
        openedFileStream.close()
    else:
        openedFileStream.seek(0)
        text = openedFileStream.read().decode(encoding)
        openedFileStream.close()
        # allow filepath to close
    # this may not be needed for Mac or Linux, needs confirmation!!!
    if text is None:  # ok to read as utf-8
        return io.open(filepath, 'rt', encoding='utf-8'), encoding
    else:
        # strip XML declaration
        xmlDeclarationMatch = XMLdeclaration.search(text)
        if xmlDeclarationMatch:  # remove it for lxml
            start, end = xmlDeclarationMatch.span()
            text = text[0:start] + text[end:]
        return (io.StringIO(initial_value=text), encoding)
コード例 #4
0
ファイル: cryptAES_EAX.py プロジェクト: Arelle/Arelle
def securityFileSourceFile(cntlr, ownerObject, filepath, binary, stripDeclaration):
    # handle FileSource file requests which can return encrypted contents
    if ownerObject.hasEncryption:
        for entrypointfile in ownerObject.entrypointfiles:
            if (filepath == entrypointfile.get("file") or 
                any(filepath == ixfile.get("file") for ixfile in entrypointfile.get("ixds",()))
                ) and "key" in entrypointfile:
                ownerObject.cipherKey = base64.decodebytes(entrypointfile["key"].encode())
                break # set new iv, key based on entrypointfiles
        # may be a non-entry file (xsd, linkbase, jpg) using entry's key
        if os.path.exists(filepath + ENCRYPTED_FILE_SUFFIX) and ownerObject.cipherKey is not None:
            with io.open(filepath + ENCRYPTED_FILE_SUFFIX, "rb") as fh:
                nonce = fh.read(16)
                tag = fh.read(16)
                encrdata = fh.read()
            cipher = AES.new(ownerObject.cipherKey, AES.MODE_EAX, nonce)
            bytesdata = cipher.decrypt_and_verify(encrdata, tag)
            encrdata = None # dereference before decode operation
            if binary: # return bytes
                return (FileSource.FileNamedBytesIO(filepath, bytesdata[0:-bytesdata[-1]]), ) # trim AES CBC padding
            # detect encoding if there is an XML header
            encoding = XmlUtil.encoding(bytesdata[0:512], 
                                        default=cntlr.modelManager.disclosureSystem.defaultXmlEncoding
                                                if cntlr else 'utf-8')
            # return decoded string
            text = bytesdata[0:-bytesdata[-1]].decode(encoding or 'utf-8') # trim AES CBC padding and decode
            bytesdata = None # dereference before text operation
            if stripDeclaration: # file source may strip XML declaration for libxml
                xmlDeclarationMatch = FileSource.XMLdeclaration.search(text)
                if xmlDeclarationMatch: # remove it for lxml
                    start,end = xmlDeclarationMatch.span()
                    text = text[0:start] + text[end:]
            return (FileSource.FileNamedStringIO(filepath, initial_value=text), encoding)
    return None
コード例 #5
0
ファイル: FileSource.py プロジェクト: camirisk/Arelle
def openXmlFileStream(cntlr, filepath, stripDeclaration=False):
    # returns tuple: (fileStream, encoding)
    openedFileStream = openFileStream(cntlr, filepath, 'rb')
    # check encoding
    hdrBytes = openedFileStream.read(512)
    encoding = XmlUtil.encoding(
        hdrBytes,
        default=cntlr.modelManager.disclosureSystem.defaultXmlEncoding
        if cntlr else 'utf-8')
    # encoding default from disclosure system could be None
    if encoding.lower() in ('utf-8', 'utf8', 'utf-8-sig') and (
            cntlr is None or not cntlr.isGAE) and not stripDeclaration:
        text = None
        openedFileStream.close()
    else:
        openedFileStream.seek(0)
        text = openedFileStream.read().decode(encoding or 'utf-8')
        openedFileStream.close()
        # allow filepath to close
    # this may not be needed for Mac or Linux, needs confirmation!!!
    if text is None:  # ok to read as utf-8
        return io.open(filepath, 'rt', encoding='utf-8'), encoding
    else:
        if stripDeclaration:
            # strip XML declaration
            xmlDeclarationMatch = XMLdeclaration.search(text)
            if xmlDeclarationMatch:  # remove it for lxml
                start, end = xmlDeclarationMatch.span()
                text = text[0:start] + text[end:]
        return (FileNamedStringIO(filepath, initial_value=text), encoding)
コード例 #6
0
def securityFileSourceFile(cntlr, ownerObject, filepath, binary, stripDeclaration):
    # handle FileSource file requests which can return encrypted contents
    if ownerObject.hasEncryption:
        for entrypointfile in ownerObject.entrypointfiles:
            if (filepath == entrypointfile.get("file") or 
                any(filepath == ixfile.get("file") for ixfile in entrypointfile.get("ixds",()))
                ) and "key" in entrypointfile and "iv" in entrypointfile:
                ownerObject.cipherIv = base64.decodebytes(entrypointfile["iv"].encode())
                ownerObject.cipherKey = base64.decodebytes(entrypointfile["key"].encode())
                break # set new iv, key based on entrypointfiles
        # may be a non-entry file (xsd, linkbase, jpg) using entry's iv, key
        if os.path.exists(filepath + ENCRYPTED_FILE_SUFFIX) and ownerObject.cipherKey is not None and ownerObject.cipherIv is not None:
            encrdata = io.open(filepath + ENCRYPTED_FILE_SUFFIX, "rb").read()
            cipher = AES.new(ownerObject.cipherKey, AES.MODE_CBC, iv=ownerObject.cipherIv)
            bytesdata = cipher.decrypt(encrdata)
            encrdata = None # dereference before decode operation
            if binary: # return bytes
                return (FileSource.FileNamedBytesIO(filepath, bytesdata[0:-bytesdata[-1]]), ) # trim AES CBC padding
            # detect encoding if there is an XML header
            encoding = XmlUtil.encoding(bytesdata[0:512], 
                                        default=cntlr.modelManager.disclosureSystem.defaultXmlEncoding
                                                if cntlr else 'utf-8')
            # return decoded string
            text = bytesdata[0:-bytesdata[-1]].decode(encoding or 'utf-8') # trim AES CBC padding and decode
            bytesdata = None # dereference before text operation
            if stripDeclaration: # file source may strip XML declaration for libxml
                xmlDeclarationMatch = FileSource.XMLdeclaration.search(text)
                if xmlDeclarationMatch: # remove it for lxml
                    start,end = xmlDeclarationMatch.span()
                    text = text[0:start] + text[end:]
            return (FileSource.FileNamedStringIO(filepath, initial_value=text), encoding)
    return None
コード例 #7
0
ファイル: FileSource.py プロジェクト: javascriptgeek/Arelle
def openXmlFileStream(cntlr, filepath, stripDeclaration=False):
    # returns tuple: (fileStream, encoding)
    openedFileStream = openFileStream(cntlr, filepath, "rb")
    # check encoding
    hdrBytes = openedFileStream.read(512)
    encoding = XmlUtil.encoding(
        hdrBytes, default=cntlr.modelManager.disclosureSystem.defaultXmlEncoding if cntlr else "utf-8"
    )
    # encoding default from disclosure system could be None
    if (
        encoding.lower() in ("utf-8", "utf8", "utf-8-sig")
        and (cntlr is None or not cntlr.isGAE)
        and not stripDeclaration
    ):
        text = None
        openedFileStream.close()
    else:
        openedFileStream.seek(0)
        text = openedFileStream.read().decode(encoding or "utf-8")
        openedFileStream.close()
        # allow filepath to close
    # this may not be needed for Mac or Linux, needs confirmation!!!
    if text is None:  # ok to read as utf-8
        return io.open(filepath, "rt", encoding="utf-8"), encoding
    else:
        if stripDeclaration:
            # strip XML declaration
            xmlDeclarationMatch = XMLdeclaration.search(text)
            if xmlDeclarationMatch:  # remove it for lxml
                start, end = xmlDeclarationMatch.span()
                text = text[0:start] + text[end:]
        return (FileNamedStringIO(filepath, initial_value=text), encoding)
コード例 #8
0
ファイル: FileSource.py プロジェクト: joshdholtz/Arelle
def openFileStream(cntlr, filepath, mode='r', encoding=None):
    if isHttpUrl(filepath) and cntlr:
        filepath = cntlr.webCache.getfilename(filepath)
    # file path may be server (or memcache) or local file system
    if filepath.startswith(SERVER_WEB_CACHE) and cntlr:
        filestream = None
        cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\","/")
        if cntlr.isGAE: # check if in memcache
            cachedBytes = gaeGet(cacheKey)
            if cachedBytes:
                filestream = io.BytesIO(cachedBytes)
        if filestream is None:
            filestream = io.BytesIO()
            cntlr.webCache.retrieve(cntlr.webCache.cacheFilepathToUrl(filepath),
                                    filestream=filestream)
            if cntlr.isGAE:
                gaeSet(cacheKey, filestream.getvalue())
        if mode.endswith('t') or encoding:
            contents = filestream.getvalue()
            filestream.close()
            filestream = FileNamedStringIO(filepath, contents.decode(encoding or 'utf-8'))
        return filestream
    # local file system
    elif encoding is None and 'b' not in mode:
        openedFileStream = io.open(filepath, mode='rb')
        hdrBytes = openedFileStream.read(512)
        encoding = XmlUtil.encoding(hdrBytes, default=None)
        openedFileStream.close()
        return io.open(filepath, mode=mode, encoding=encoding)
    else:
        # local file system
        return io.open(filepath, mode=mode, encoding=encoding)
コード例 #9
0
ファイル: FileSource.py プロジェクト: namitkewat/Arelle
def openXmlFileStream(cntlr, filepath, stripDeclaration=False):
    # returns tuple: (fileStream, encoding)
    openedFileStream = openFileStream(cntlr, filepath, 'rb')
    # check encoding
    hdrBytes = openedFileStream.read(512)
    encoding = XmlUtil.encoding(hdrBytes)
    if encoding.lower() in ('utf-8','utf8','utf-8-sig') and (cntlr is None or not cntlr.isGAE) and not stripDeclaration:
        text = None
        openedFileStream.close()
    else:
        openedFileStream.seek(0)
        text = openedFileStream.read().decode(encoding)
        openedFileStream.close()
        # allow filepath to close
    # this may not be needed for Mac or Linux, needs confirmation!!!
    if text is None:  # ok to read as utf-8
        return io.open(filepath, 'rt', encoding='utf-8'), encoding
    else:
        if stripDeclaration:
            # strip XML declaration
            xmlDeclarationMatch = XMLdeclaration.search(text)
            if xmlDeclarationMatch: # remove it for lxml
                start,end = xmlDeclarationMatch.span()
                text = text[0:start] + text[end:]
        return (FileNamedStringIO(filepath, initial_value=text), encoding)
コード例 #10
0
 def file(self, filepath):
     archiveFileSource = self.fileSourceContainingFilepath(filepath)
     if archiveFileSource is not None:
         if filepath.startswith(archiveFileSource.basefile):
             archiveFileName = filepath[len(archiveFileSource.basefile) +
                                        1:]
         else:  # filepath.startswith(self.baseurl)
             archiveFileName = filepath[len(archiveFileSource.baseurl) + 1:]
         if archiveFileSource.isZip:
             b = archiveFileSource.fs.read(archiveFileName)
             return io.TextIOWrapper(io.BytesIO(b),
                                     encoding=XmlUtil.encoding(b))
         elif archiveFileSource.isXfd:
             for data in archiveFileSource.xfdDocument.getElementsByTagName(
                     "data"):
                 outfn = XmlUtil.text(
                     data.getElementsByTagName("filename")[0])
                 b64data = XmlUtil.text(
                     data.getElementsByTagName("mimedata")[0])
                 if len(outfn) > 1 and len(
                         b64data) > 1 and outfn == archiveFileName:
                     # convert to bytes
                     #byteData = []
                     #for c in b64data:
                     #    byteData.append(ord(c))
                     b = base64.b64decode(b64data.encode("latin-1"))
                     # remove BOM codes if present
                     if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[
                             2] == 191:
                         start = 3
                         length = len(b) - 3
                         b = b[start:start + length]
                     else:
                         start = 0
                         length = len(b)
                     # pass back as ascii
                     #str = ""
                     #for bChar in b[start:start + length]:
                     #    str += chr( bChar )
                     #return str
                     return io.TextIOWrapper(io.BytesIO(b),
                                             encoding=XmlUtil.encoding(b))
             return None
     return open(filepath, 'rt', encoding='utf-8')
コード例 #11
0
ファイル: FileSource.py プロジェクト: 8maki/Arelle
 def file(self,filepath):
     archiveFileSource = self.fileSourceContainingFilepath(filepath)
     if archiveFileSource is not None:
         if filepath.startswith(archiveFileSource.basefile):
             archiveFileName = filepath[len(archiveFileSource.basefile) + 1:]
         else: # filepath.startswith(self.baseurl)
             archiveFileName = filepath[len(archiveFileSource.baseurl) + 1:]
         if archiveFileSource.isZip:
             b = archiveFileSource.fs.read(archiveFileName)
             return io.TextIOWrapper(
                     io.BytesIO(b), 
                     encoding=XmlUtil.encoding(b))
         elif archiveFileSource.isXfd:
             for data in archiveFileSource.xfdDocument.getElementsByTagName("data"):
                 outfn = XmlUtil.text(data.getElementsByTagName("filename")[0])
                 b64data = XmlUtil.text(data.getElementsByTagName("mimedata")[0])
                 if len(outfn) > 1 and len(b64data) > 1 and outfn == archiveFileName:
                     # convert to bytes
                     #byteData = []
                     #for c in b64data:
                     #    byteData.append(ord(c))
                     b = base64.b64decode(b64data.encode("latin-1"))
                     # remove BOM codes if present
                     if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[2] == 191:
                         start = 3;
                         length = len(b) - 3;
                         b = b[start:start + length]
                     else:
                         start = 0;
                         length = len(b);
                     # pass back as ascii
                     #str = ""
                     #for bChar in b[start:start + length]:
                     #    str += chr( bChar )
                     #return str
                     return io.TextIOWrapper(
                         io.BytesIO(b), 
                         encoding=XmlUtil.encoding(b))
             return None
     return open(filepath, 'rt', encoding='utf-8')
コード例 #12
0
ファイル: FileSource.py プロジェクト: fewang0521/python_dart
def openFileStream(cntlr, filepath, mode='r', encoding=None):

    if PackageManager.isMappedUrl(filepath):
        filepath = PackageManager.mappedUrl(filepath)
    elif isHttpUrl(filepath) and cntlr and hasattr(
            cntlr, "modelManager"
    ):  # may be called early in initialization for PluginManager
        filepath = cntlr.modelManager.disclosureSystem.mappedUrl(filepath)
    if archiveFilenameParts(filepath):  # file is in an archive
        return openFileSource(filepath, cntlr).file(filepath,
                                                    binary='b' in mode,
                                                    encoding=encoding)[0]
    if isHttpUrl(filepath) and cntlr:
        _cacheFilepath = cntlr.webCache.getfilename(
            filepath, normalize=True
        )  # normalize is separate step in ModelDocument retrieval, combined here
        if _cacheFilepath is None:
            raise IOError(_("Unable to open file: {0}.").format(filepath))
        filepath = _cacheFilepath
    # file path may be server (or memcache) or local file system
    if filepath.startswith(SERVER_WEB_CACHE) and cntlr:
        filestream = None
        cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\", "/")
        if cntlr.isGAE:  # check if in memcache
            cachedBytes = gaeGet(cacheKey)
            if cachedBytes:
                filestream = io.BytesIO(cachedBytes)
        if filestream is None:
            filestream = io.BytesIO()
            cntlr.webCache.retrieve(
                cntlr.webCache.cacheFilepathToUrl(filepath),
                filestream=filestream)
            if cntlr.isGAE:
                gaeSet(cacheKey, filestream.getvalue())
        if mode.endswith('t') or encoding:
            contents = filestream.getvalue()
            filestream.close()
            filestream = FileNamedStringIO(
                filepath, contents.decode(encoding or 'utf-8'))
        return filestream
    # local file system
    elif encoding is None and 'b' not in mode:
        openedFileStream = io.open(filepath, mode='rb')
        hdrBytes = openedFileStream.read(512)
        encoding = XmlUtil.encoding(hdrBytes, default=None)
        openedFileStream.close()
        return io.open(filepath, mode=mode, encoding=encoding)
    else:
        # local file system
        return io.open(filepath, mode=mode, encoding=encoding)
コード例 #13
0
ファイル: FileSource.py プロジェクト: namitkewat/Arelle
def openFileStream(cntlr, filepath, mode='r', encoding=None):
    if PackageManager.isMappedUrl(filepath):
        filepath = PackageManager.mappedUrl(filepath)
    else:
        filepath = cntlr.modelManager.disclosureSystem.mappedUrl(filepath)
    if archiveFilenameParts(filepath): # file is in an archive
        return openFileSource(filepath, cntlr).file(filepath, binary='b' in mode, encoding=encoding)[0]
    if isHttpUrl(filepath) and cntlr:
        _cacheFilepath = cntlr.webCache.getfilename(filepath)
        if _cacheFilepath is None:
            raise IOError(_("Unable to open file: {0}.").format(filepath))
        filepath = _cacheFilepath
    # file path may be server (or memcache) or local file system
    if filepath.startswith(SERVER_WEB_CACHE) and cntlr:
        filestream = None
        cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\","/")
        if cntlr.isGAE: # check if in memcache
            cachedBytes = gaeGet(cacheKey)
            if cachedBytes:
                filestream = io.BytesIO(cachedBytes)
        if filestream is None:
            filestream = io.BytesIO()
            cntlr.webCache.retrieve(cntlr.webCache.cacheFilepathToUrl(filepath),
                                    filestream=filestream)
            if cntlr.isGAE:
                gaeSet(cacheKey, filestream.getvalue())
        if mode.endswith('t') or encoding:
            contents = filestream.getvalue()
            filestream.close()
            filestream = FileNamedStringIO(filepath, contents.decode(encoding or 'utf-8'))
        return filestream
    # local file system
    elif encoding is None and 'b' not in mode:
        openedFileStream = io.open(filepath, mode='rb')
        hdrBytes = openedFileStream.read(512)
        encoding = XmlUtil.encoding(hdrBytes, default=None)
        openedFileStream.close()
        return io.open(filepath, mode=mode, encoding=encoding)
    else:
        # local file system
        return io.open(filepath, mode=mode, encoding=encoding)
コード例 #14
0
def streamingExtensionsLoader(modelXbrl, mappedUri, filepath, **kwargs):
    # check if big instance and has header with an initial incomplete tree walk (just 2 elements
    if not _streamingExtensionsCheck:
        return None
    
    # track whether modelXbrl has been validated by this streaming extension
    modelXbrl._streamingExtensionValidated = False
        
    def logSyntaxErrors(parsercontext):
        for error in parsercontext.error_log:
            modelXbrl.error("xmlSchema:syntax",
                    _("%(error)s, %(fileName)s, line %(line)s, column %(column)s, %(sourceAction)s source element"),
                    modelObject=modelXbrl, fileName=os.path.basename(filepath), 
                    error=error.message, line=error.line, column=error.column, sourceAction="streaming")
    #### note: written for iterparse of lxml prior to version 3.3, otherwise rewrite to use XmlPullParser ###
    #### note: iterparse wants a binary file, but file is text mode
    _file, = modelXbrl.fileSource.file(filepath, binary=True)
    startedAt = time.time()
    modelXbrl.profileActivity()
    ''' this seems twice as slow as iterparse
    class instInfoTarget():
        def __init__(self, element_factory=None, parser=None):
            self.newTree = True
            self.streamingAspects = None
            self.foundInstance = False
            self.creationSoftwareComment = ''
            self.currentEltTag = "(before xbrli:xbrl)"
            self.numRootFacts = 0
        def start(self, tag, attrib, nsmap=None):
            if self.newTree:
                if tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    self.foundInstance = True
                    self.newTree = False
                else: # break 
                    raise NotInstanceDocumentException()
            elif not tag.startswith("{http://www.xbrl.org/"):
                self.numRootFacts += 1
                if self.numRootFacts % 1000 == 0:
                    modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0)
            self.currentEltTag = tag
        def end(self, tag):
            pass
        def data(self, data):
            pass
        def comment(self, text):
            if not self.foundInstance: # accumulate comments before xbrli:xbrl
                self.creationSoftwareComment += ('\n' if self.creationSoftwareComment else '') + text
            elif not self.creationSoftwareComment:
                self.creationSoftwareComment = text # or first comment after xbrli:xbrl
        def pi(self, target, data):
            if target == "xbrl-streamable-instance":
                if self.currentEltTag == "{http://www.xbrl.org/2003/instance}xbrl":
                    self.streamingAspects = dict(etree.PI(target,data).attrib.copy()) # dereference target results
                else:
                    modelXbrl.error("streamingExtensions:headerMisplaced",
                            _("Header is misplaced: %(target)s, must follow xbrli:xbrl element but was found at %(element)s"),
                            modelObject=modelXbrl, target=target, element=self.currentEltTag)
        def close(self):
            if not self.creationSoftwareComment:
                self.creationSoftwareComment = None
            return True
    instInfo = instInfoTarget()
    infoParser = etree.XMLParser(recover=True, huge_tree=True, target=instInfo)
    try:
        etree.parse(_file, parser=infoParser, base_url=filepath)
    except NotInstanceDocumentException:
        pass
    '''
    foundErrors = False
    foundInstance = False
    streamingAspects = None
    creationSoftwareComment = None
    instInfoNumRootFacts = 0
    numElts = 0
    elt = None
    instInfoContext = etree.iterparse(_file, events=("start","end"), huge_tree=True)
    for event, elt in instInfoContext:
        if event == "start":
            if elt.getparent() is not None:
                if elt.getparent().tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    if not foundInstance:
                        foundInstance = True
                        pi = precedingProcessingInstruction(elt, "xbrl-streamable-instance")
                        if pi is None:
                            break
                        else:
                            streamingAspects = dict(pi.attrib.copy())
                            if creationSoftwareComment is None:
                                creationSoftwareComment = precedingComment(elt)
                    if not elt.tag.startswith("{http://www.xbrl.org/"):
                        instInfoNumRootFacts += 1
                        if instInfoNumRootFacts % 1000 == 0:
                            modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0)
                elif not foundInstance:       
                    break
            elif elt.tag == "{http://www.xbrl.org/2003/instance}xbrl":
                creationSoftwareComment = precedingComment(elt)
                if precedingProcessingInstruction(elt, "xbrl-streamable-instance") is not None:
                    modelXbrl.error("streamingExtensions:headerMisplaced",
                            _("Header is misplaced: %(error)s, must follow xbrli:xbrl element"),
                            modelObject=elt)
        elif event == "end":
            elt.clear()
            numElts += 1
            if numElts % 1000 == 0 and elt.getparent() is not None:
                while elt.getprevious() is not None and elt.getparent() is not None:
                    del elt.getparent()[0]
    if elt is not None:
        elt.clear()
    
    _file.seek(0,io.SEEK_SET) # allow reparsing
    if not foundInstance or streamingAspects is None:
        del elt
        _file.close()
        return None
    modelXbrl.profileStat(_("streaming tree check"), time.time() - startedAt)
    startedAt = time.time()
    try:
        version = Decimal(streamingAspects.get("version"))
        if int(version) != 1:
            modelXbrl.error("streamingExtensions:unsupportedVersion",
                    _("Streaming version %(version)s, major version number must be 1"),
                    modelObject=elt, version=version)
            foundErrors = True
    except (InvalidOperation, OverflowError):
        modelXbrl.error("streamingExtensions:versionError",
                _("Version %(version)s, number must be 1.n"),
                modelObject=elt, version=streamingAspects.get("version", "(none)"))
        foundErrors = True
    for bufAspect in ("contextBuffer", "unitBuffer", "footnoteBuffer"):
        try:
            bufLimit = Decimal(streamingAspects.get(bufAspect, "INF"))
            if bufLimit < 1 or (bufLimit.is_finite() and bufLimit % 1 != 0):
                raise InvalidOperation
            elif bufAspect == "contextBuffer":
                contextBufferLimit = bufLimit
            elif bufAspect == "unitBuffer":
                unitBufferLimit = bufLimit
            elif bufAspect == "footnoteBuffer":
                footnoteBufferLimit = bufLimit
        except InvalidOperation:
            modelXbrl.error("streamingExtensions:valueError",
                    _("Streaming %(attrib)s %(value)s, number must be a positive integer or INF"),
                    modelObject=elt, attrib=bufAspect, value=streamingAspects.get(bufAspect))
            foundErrors = True
    if _streamingExtensionsValidate:
        incompatibleValidations = []
        _validateDisclosureSystem = modelXbrl.modelManager.validateDisclosureSystem
        _disclosureSystem = modelXbrl.modelManager.disclosureSystem
        if _validateDisclosureSystem and _disclosureSystem.EFM:
            incompatibleValidations.append("EFM")
        if _validateDisclosureSystem and _disclosureSystem.GFM:
            incompatibleValidations.append("GFM")
        if _validateDisclosureSystem and _disclosureSystem.EBA:
            incompatibleValidations.append("EBA")
        if _validateDisclosureSystem and _disclosureSystem.HMRC:
            incompatibleValidations.append("EBA")
        if modelXbrl.modelManager.validateCalcLB:
            incompatibleValidations.append("calculation LB")
        if incompatibleValidations:            
            modelXbrl.error("streamingExtensions:incompatibleValidation",
                    _("Streaming instance validation does not support %(incompatibleValidations)s validation"),
                    modelObject=modelXbrl, incompatibleValidations=', '.join(incompatibleValidations))
            foundErrors = True
    if instInfoContext.error_log:
        foundErrors = True
    logSyntaxErrors(instInfoContext)
    del instInfoContext # dereference
    
    if foundErrors:
        _file.close()
        return None

    _encoding = XmlUtil.encoding(_file.read(512))
    _file.seek(0,io.SEEK_SET) # allow reparsing

    if _streamingExtensionsValidate:
        validator = Validate(modelXbrl)
        instValidator = validator.instValidator

    eltMdlObjs = {}
    contextBuffer = []
    unitBuffer = []
    footnoteBuffer = []
    factBuffer = []
    numFacts = 1
    
    class modelLoaderTarget():
        def __init__(self, element_factory=None, parser=None):
            self.newTree = True
            self.currentMdlObj = None
            self.beforeInstanceStream = True
            self.numRootFacts = 1
        def start(self, tag, attrib, nsmap=None):
            mdlObj = _parser.makeelement(tag, attrib=attrib, nsmap=nsmap)
            mdlObj.sourceline = 1
            if self.newTree:
                self.newTree = False
                self.currentMdlObj = mdlObj
                modelDocument = ModelDocument(modelXbrl, Type.INSTANCE, mappedUri, filepath, mdlObj.getroottree())
                modelXbrl.modelDocument = modelDocument # needed for incremental validation
                mdlObj.init(modelDocument)
                modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement 
                modelDocument.parserLookupName = _parserLookupName
                modelDocument.parserLookupClass = _parserLookupClass
                modelDocument.xmlRootElement = mdlObj
                modelDocument.schemaLocationElements.add(mdlObj)
                modelDocument.documentEncoding = _encoding
                modelDocument._creationSoftwareComment = creationSoftwareComment
                modelXbrl.info("streamingExtensions:streaming",
                               _("Stream processing this instance."),
                               modelObject = modelDocument)    
            else:
                self.currentMdlObj.append(mdlObj)
                self.currentMdlObj = mdlObj
                mdlObj._init()
                ns = mdlObj.namespaceURI
                ln = mdlObj.localName
                if (self.beforeInstanceStream and (
                    (ns == XbrlConst.link and ln not in ("schemaRef", "linkbaseRef")) or
                    (ns == XbrlConst.xbrli and ln in ("context", "unit")) or
                    (ns not in (XbrlConst.link, XbrlConst.xbrli)))):
                    self.beforeInstanceStream = False
                    if _streamingExtensionsValidate:
                        instValidator.validate(modelXbrl, modelXbrl.modelManager.formulaOptions.typedParameters())
                    else: # need default dimensions
                        ValidateXbrlDimensions.loadDimensionDefaults(modelXbrl)
            return mdlObj
        def end(self, tag):
            modelDocument = modelXbrl.modelDocument
            mdlObj = self.currentMdlObj
            parentMdlObj = mdlObj.getparent()
            self.currentMdlObj = parentMdlObj
            ns = mdlObj.namespaceURI
            ln = mdlObj.localName
            if ns == XbrlConst.xbrli:
                if ln == "context":
                    if mdlObj.get("sticky"):
                        del mdlObj.attrib["sticky"]
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                    else:
                        if _streamingExtensionsValidate and len(contextBuffer) >= contextBufferLimit:
                            # drop before adding as dropped may have same id as added
                            cntx = contextBuffer.pop(0)
                            dropContext(modelXbrl, cntx)
                            del parentMdlObj[parentMdlObj.index(cntx)]
                            cntx = None
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                        if contextBufferLimit.is_finite():
                            contextBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        contextsToCheck = (mdlObj,)
                        instValidator.checkContexts(contextsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkContextsDimensions(contextsToCheck)
                        del contextsToCheck # dereference
                elif ln == "unit":
                    if _streamingExtensionsValidate and len(unitBuffer) >= unitBufferLimit:
                        # drop before additing as dropped may have same id as added
                        unit = unitBuffer.pop(0)
                        dropUnit(modelXbrl, unit)
                        del parentMdlObj[parentMdlObj.index(unit)]
                        unit = None 
                    XmlValidate.validate(modelXbrl, mdlObj)
                    modelDocument.unitDiscover(mdlObj)
                    if unitBufferLimit.is_finite():
                        unitBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkUnits( (mdlObj,) )
                elif ln == "xbrl": # end of document
                    # check remaining footnote refs
                    for footnoteLink in footnoteBuffer:
                        checkFootnoteHrefs(modelXbrl, footnoteLink)
            elif ns == XbrlConst.link:
                if ln == "footnoteLink":
                    XmlValidate.validate(modelXbrl, mdlObj)
                    footnoteLinks = (mdlObj,)
                    modelDocument.linkbaseDiscover(footnoteLinks, inInstance=True)
                    if footnoteBufferLimit.is_finite():
                        footnoteBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkLinks(footnoteLinks)
                        if len(footnoteBuffer) > footnoteBufferLimit:
                            # check that hrefObjects for locators were all satisfied
                                # drop before addition as dropped may have same id as added
                            footnoteLink = footnoteBuffer.pop(0)
                            checkFootnoteHrefs(modelXbrl, footnoteLink)
                            dropFootnoteLink(modelXbrl, footnoteLink)
                            del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            footnoteLink = None
                    footnoteLinks = None
                elif ln in ("schemaRef", "linkbaseRef"):
                    modelDocument.discoverHref(mdlObj)
                elif not modelXbrl.skipDTS:
                    if ln in ("roleRef", "arcroleRef"):
                        modelDocument.linkbaseDiscover((mdlObj,), inInstance=True)
            elif parentMdlObj.qname == XbrlConst.qnXbrliXbrl:
                self.numRootFacts += 1
                XmlValidate.validate(modelXbrl, mdlObj)
                modelDocument.factDiscover(mdlObj, modelXbrl.facts)
                if _streamingExtensionsValidate:
                    factsToCheck = (mdlObj,)
                    instValidator.checkFacts(factsToCheck)
                    if modelXbrl.hasXDT:
                        instValidator.checkFactsDimensions(factsToCheck)
                    del factsToCheck
                    dropFact(modelXbrl, mdlObj, modelXbrl.facts)
                    del parentMdlObj[parentMdlObj.index(mdlObj)]
                if self.numRootFacts % 1000 == 0:
                    modelXbrl.profileActivity("... streaming fact {0} of {1} {2:.2f}%".format(self.numRootFacts, instInfoNumRootFacts, 
                                                                                              100.0 * self.numRootFacts / instInfoNumRootFacts), 
                                              minTimeToShow=20.0)
            return mdlObj
        def data(self, data):
            self.currentMdlObj.text = data
        def comment(self, text):
            pass
        def pi(self, target, data):
            pass
        def close(self):
            return None
        
    _parser, _parserLookupName, _parserLookupClass = parser(modelXbrl, filepath, target=modelLoaderTarget())
    etree.parse(_file, parser=_parser, base_url=filepath)
    logSyntaxErrors(_parser)
    _file.close()
    if _streamingExtensionsValidate and validator is not None:
        del instValidator
        validator.close()
        # track that modelXbrl has been validated by this streaming extension
        modelXbrl._streamingExtensionValidated = True
        
    modelXbrl.profileStat(_("streaming complete"), time.time() - startedAt)
    return modelXbrl.modelDocument
コード例 #15
0
ファイル: FileSource.py プロジェクト: namitkewat/Arelle
 def file(self, filepath, binary=False, stripDeclaration=False, encoding=None):
     ''' 
         for text, return a tuple of (open file handle, encoding)
         for binary, return a tuple of (open file handle, )
     '''
     archiveFileSource = self.fileSourceContainingFilepath(filepath)
     if archiveFileSource is not None:
         if filepath.startswith(archiveFileSource.basefile):
             archiveFileName = filepath[len(archiveFileSource.basefile) + 1:]
         else: # filepath.startswith(self.baseurl)
             archiveFileName = filepath[len(archiveFileSource.baseurl) + 1:]
         if archiveFileSource.isZip:
             try:
                 b = archiveFileSource.fs.read(archiveFileName.replace("\\","/"))
                 if binary:
                     return (io.BytesIO(b), )
                 if encoding is None:
                     encoding = XmlUtil.encoding(b)
                 if stripDeclaration:
                     b = stripDeclarationBytes(b)
                 return (FileNamedTextIOWrapper(filepath, io.BytesIO(b), encoding=encoding), 
                         encoding)
             except KeyError:
                 raise ArchiveFileIOError(self, archiveFileName)
         elif archiveFileSource.isTarGz:
             try:
                 fh = archiveFileSource.fs.extractfile(archiveFileName)
                 b = fh.read()
                 fh.close() # doesn't seem to close properly using a with construct
                 if binary:
                     return (io.BytesIO(b), )
                 if encoding is None:
                     encoding = XmlUtil.encoding(b)
                 if stripDeclaration:
                     b = stripDeclarationBytes(b)
                 return (FileNamedTextIOWrapper(filepath, io.BytesIO(b), encoding=encoding), 
                         encoding)
             except KeyError:
                 raise ArchiveFileIOError(self, archiveFileName)
         elif archiveFileSource.isEis:
             for docElt in self.eisDocument.iter(tag="{http://www.sec.gov/edgar/common}document"):
                 outfn = docElt.findtext("{http://www.sec.gov/edgar/common}conformedName")
                 if outfn == archiveFileName:
                     b64data = docElt.findtext("{http://www.sec.gov/edgar/common}contents")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[2] == 191:
                             start = 3;
                             length = len(b) - 3;
                             b = b[start:start + length]
                         else:
                             start = 0;
                             length = len(b);
                         if binary:
                             return (io.BytesIO(b), )
                         if encoding is None:
                             encoding = XmlUtil.encoding(b, default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b), encoding=encoding), 
                                 encoding)
             raise ArchiveFileIOError(self, archiveFileName)
         elif archiveFileSource.isXfd:
             for data in archiveFileSource.xfdDocument.iter(tag="data"):
                 outfn = data.findtext("filename")
                 if outfn == archiveFileName:
                     b64data = data.findtext("mimedata")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[2] == 191:
                             start = 3;
                             length = len(b) - 3;
                             b = b[start:start + length]
                         else:
                             start = 0;
                             length = len(b);
                         if binary:
                             return (io.BytesIO(b), )
                         if encoding is None:
                             encoding = XmlUtil.encoding(b, default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b), encoding=encoding), 
                                 encoding)
             raise ArchiveFileIOError(self, archiveFileName)
         elif archiveFileSource.isInstalledTaxonomyPackage:
             # remove TAXONOMY_PACKAGE_FILE_NAME from file path
             if filepath.startswith(archiveFileSource.basefile):
                 l = len(archiveFileSource.basefile)
                 for f in TAXONOMY_PACKAGE_FILE_NAMES:
                     if filepath[l - len(f):l] == f:
                         filepath = filepath[0:l - len(f) - 1] + filepath[l:]
                         break
     if binary:
         return (openFileStream(self.cntlr, filepath, 'rb'), )
     else:
         return openXmlFileStream(self.cntlr, filepath, stripDeclaration)
コード例 #16
0
ファイル: FileSource.py プロジェクト: namitkewat/Arelle
    def open(self):
        if not self.isOpen:
            if (self.isZip or self.isTarGz or self.isEis or self.isXfd or self.isRss or self.isInstalledTaxonomyPackage) and self.cntlr:
                self.basefile = self.cntlr.webCache.getfilename(self.url)
            else:
                self.basefile = self.url
            self.baseurl = self.url # url gets changed by selection
            if not self.basefile:
                return  # an error should have been logged
            if self.isZip:
                try:
                    self.fs = zipfile.ZipFile(openFileStream(self.cntlr, self.basefile, 'rb'), mode="r")
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    pass
            elif self.isTarGz:
                try:
                    self.fs = tarfile.open(self.basefile, "r:gz")
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    pass
            elif self.isEis:
                # check first line of file
                buf = b''
                try:
                    file = open(self.basefile, 'rb')
                    more = True
                    while more:
                        l = file.read(8)
                        if len(l) < 8:
                            break
                        if len(buf) == 0 and l.startswith(b"<?xml "): # not compressed
                            buf = l + file.read()  # not compressed
                            break
                        compressedBytes = file.read( struct.unpack(">L", l[0:4])[0])
                        if len(compressedBytes) <= 0:
                            break
                        buf += zlib.decompress(compressedBytes)
                    file.close()
                except EnvironmentError as err:
                    self.logError(err)
                    pass
                #uncomment to save for debugging
                #with open("c:/temp/test.xml", "wb") as f:
                #    f.write(buf)
                
                if buf.startswith(b"<?xml "):
                    try:
                        # must strip encoding
                        str = buf.decode(XmlUtil.encoding(buf))
                        endEncoding = str.index("?>", 0, 128)
                        if endEncoding > 0:
                            str = str[endEncoding+2:]
                        file = io.StringIO(initial_value=str)
                        parser = etree.XMLParser(recover=True, huge_tree=True)
                        self.eisDocument = etree.parse(file, parser=parser)
                        file.close()
                        self.isOpen = True
                    except EnvironmentError as err:
                        self.logError(err)
                        return # provide error message later
                    except etree.LxmlError as err:
                        self.logError(err)
                        return # provide error message later
                
            elif self.isXfd:
                # check first line of file
                file = open(self.basefile, 'rb')
                firstline = file.readline()
                if firstline.startswith(b"application/x-xfdl;content-encoding=\"asc-gzip\""):
                    # file has been gzipped
                    base64input = file.read(-1)
                    file.close();
                    file = None;
    
                    fb = base64.b64decode(base64input)
                    ungzippedBytes = b""
                    totalLenUncompr = 0
                    i = 0
                    while i < len(fb):
                        lenCompr = fb[i + 0] * 256 + fb[i + 1]
                        lenUncomp = fb[i + 2] * 256 + fb[i + 3]
                        lenRead = 0
                        totalLenUncompr += lenUncomp

                        gzchunk = (bytes((31,139,8,0)) + fb[i:i+lenCompr])
                        try:
                            with gzip.GzipFile(fileobj=io.BytesIO(gzchunk)) as gf:
                                while True:
                                    readSize = min(16384, lenUncomp - lenRead)
                                    readBytes = gf.read(size=readSize)
                                    lenRead += len(readBytes)
                                    ungzippedBytes += readBytes
                                    if len(readBytes) == 0 or (lenUncomp - lenRead) <= 0:
                                        break
                        except IOError as err:
                            pass # provide error message later

                        i += lenCompr + 4
                    #for learning the content of xfd file, uncomment this:
                    #with open("c:\\temp\\test.xml", "wb") as fh:
                    #    fh.write(ungzippedBytes)
                    file = io.StringIO(initial_value=ungzippedBytes.decode("utf-8"))
                else:
                    # position to start of file
                    file.seek(0,io.SEEK_SET)
                    
                try:
                    self.xfdDocument = etree.parse(file)
                    file.close()
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    return # provide error message later
                except etree.LxmlError as err:
                    self.logError(err)
                    return # provide error message later
                
            elif self.isRss:
                try:
                    self.rssDocument = etree.parse(self.basefile)
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    return # provide error message later
                except etree.LxmlError as err:
                    self.logError(err)
                    return # provide error message later
            
            elif self.isInstalledTaxonomyPackage:
                self.isOpen = True
                # load mappings
                try:
                    metadataFiles = self.taxonomyPackageMetadataFiles
                    if len(metadataFiles) != 1:
                        raise IOError(_("Taxonomy package must contain one and only one metadata file: {0}.")
                                      .format(', '.join(metadataFiles)))
                    # HF: this won't work, see DialogOpenArchive for correct code
                    # not sure if it is used
                    taxonomyPackage = PackageManager.parsePackage(self.cntlr, self.url)
                    fileSourceDir = os.path.dirname(self.baseurl) + os.sep
                    self.mappedPaths = \
                        dict((prefix, 
                              remapping if isHttpUrl(remapping)
                              else (fileSourceDir + remapping.replace("/", os.sep)))
                              for prefix, remapping in taxonomyPackage["remappings"].items())
                except EnvironmentError as err:
                    self.logError(err)
                    return # provide error message later
コード例 #17
0
ファイル: FileSource.py プロジェクト: jaolguin/Arelle
 def file(self,filepath):
     archiveFileSource = self.fileSourceContainingFilepath(filepath)
     if archiveFileSource is not None:
         if filepath.startswith(archiveFileSource.basefile):
             archiveFileName = filepath[len(archiveFileSource.basefile) + 1:]
         else: # filepath.startswith(self.baseurl)
             archiveFileName = filepath[len(archiveFileSource.baseurl) + 1:]
         if archiveFileSource.isZip:
             b = archiveFileSource.fs.read(archiveFileName.replace("\\","/"))
             encoding = XmlUtil.encoding(b)
             return (io.TextIOWrapper(
                     io.BytesIO(b), 
                     encoding=encoding), encoding)
         elif archiveFileSource.isXfd:
             for data in archiveFileSource.xfdDocument.iter(tag="data"):
                 outfn = data.findtext("filename")
                 if outfn == archiveFileName:
                     b64data = data.findtext("mimedata")
                     if b64data:
                         # convert to bytes
                         #byteData = []
                         #for c in b64data:
                         #    byteData.append(ord(c))
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[2] == 191:
                             start = 3;
                             length = len(b) - 3;
                             b = b[start:start + length]
                         else:
                             start = 0;
                             length = len(b);
                         # pass back as ascii
                         #str = ""
                         #for bChar in b[start:start + length]:
                         #    str += chr( bChar )
                         #return str
                         return (io.TextIOWrapper(
                             io.BytesIO(b), 
                             encoding=XmlUtil.encoding(b)), "latin-1")
             return (None,None)
     # check encoding
     with open(filepath, 'rb') as fb:
         hdrBytes = fb.read(512)
         encoding = XmlUtil.encoding(hdrBytes)
         if encoding.lower() in ('utf-8','utf8'):
             text = None
         else:
             fb.seek(0)
             text = fb.read().decode(encoding)
         # allow filepath to close
     # this may not be needed for Mac or Linux, needs confirmation!!!
     if text is None:  # ok to read as utf-8
         return io.open(filepath, 'rt', encoding='utf-8'), encoding
     else:
         # strip XML declaration
         xmlDeclarationMatch = XMLdeclaration.search(text)
         if xmlDeclarationMatch: # remove it for lxml
             start,end = xmlDeclarationMatch.span()
             text = text[0:start] + text[end:]
         return (io.StringIO(initial_value=text), encoding)
コード例 #18
0
ファイル: FileSource.py プロジェクト: camirisk/Arelle
 def file(self,
          filepath,
          binary=False,
          stripDeclaration=False,
          encoding=None):
     ''' 
         for text, return a tuple of (open file handle, encoding)
         for binary, return a tuple of (open file handle, )
     '''
     archiveFileSource = self.fileSourceContainingFilepath(filepath)
     if archiveFileSource is not None:
         if filepath.startswith(archiveFileSource.basefile):
             archiveFileName = filepath[len(archiveFileSource.basefile) +
                                        1:]
         else:  # filepath.startswith(self.baseurl)
             archiveFileName = filepath[len(archiveFileSource.baseurl) + 1:]
         if archiveFileSource.isZip:
             try:
                 b = archiveFileSource.fs.read(
                     archiveFileName.replace("\\", "/"))
                 if binary:
                     return (io.BytesIO(b), )
                 if encoding is None:
                     encoding = XmlUtil.encoding(b)
                 if stripDeclaration:
                     b = stripDeclarationBytes(b)
                 return (FileNamedTextIOWrapper(filepath,
                                                io.BytesIO(b),
                                                encoding=encoding),
                         encoding)
             except KeyError:
                 raise ArchiveFileIOError(self, errno.ENOENT,
                                          archiveFileName)
         elif archiveFileSource.isTarGz:
             try:
                 fh = archiveFileSource.fs.extractfile(archiveFileName)
                 b = fh.read()
                 fh.close(
                 )  # doesn't seem to close properly using a with construct
                 if binary:
                     return (io.BytesIO(b), )
                 if encoding is None:
                     encoding = XmlUtil.encoding(b)
                 if stripDeclaration:
                     b = stripDeclarationBytes(b)
                 return (FileNamedTextIOWrapper(filepath,
                                                io.BytesIO(b),
                                                encoding=encoding),
                         encoding)
             except KeyError:
                 raise ArchiveFileIOError(self, archiveFileName)
         elif archiveFileSource.isEis:
             for docElt in self.eisDocument.iter(
                     tag="{http://www.sec.gov/edgar/common}document"):
                 outfn = docElt.findtext(
                     "{http://www.sec.gov/edgar/common}conformedName")
                 if outfn == archiveFileName:
                     b64data = docElt.findtext(
                         "{http://www.sec.gov/edgar/common}contents")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[
                                 1] == 187 and b[2] == 191:
                             start = 3
                             length = len(b) - 3
                             b = b[start:start + length]
                         else:
                             start = 0
                             length = len(b)
                         if binary:
                             return (io.BytesIO(b), )
                         if encoding is None:
                             encoding = XmlUtil.encoding(b,
                                                         default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b),
                                                  encoding=encoding),
                                 encoding)
             raise ArchiveFileIOError(self, errno.ENOENT, archiveFileName)
         elif archiveFileSource.isXfd:
             for data in archiveFileSource.xfdDocument.iter(tag="data"):
                 outfn = data.findtext("filename")
                 if outfn == archiveFileName:
                     b64data = data.findtext("mimedata")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[
                                 1] == 187 and b[2] == 191:
                             start = 3
                             length = len(b) - 3
                             b = b[start:start + length]
                         else:
                             start = 0
                             length = len(b)
                         if binary:
                             return (io.BytesIO(b), )
                         if encoding is None:
                             encoding = XmlUtil.encoding(b,
                                                         default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b),
                                                  encoding=encoding),
                                 encoding)
             raise ArchiveFileIOError(self, errno.ENOENT, archiveFileName)
         elif archiveFileSource.isInstalledTaxonomyPackage:
             # remove TAXONOMY_PACKAGE_FILE_NAME from file path
             if filepath.startswith(archiveFileSource.basefile):
                 l = len(archiveFileSource.basefile)
                 for f in TAXONOMY_PACKAGE_FILE_NAMES:
                     if filepath[l - len(f):l] == f:
                         filepath = filepath[0:l - len(f) -
                                             1] + filepath[l:]
                         break
     if binary:
         return (openFileStream(self.cntlr, filepath, 'rb'), )
     else:
         return openXmlFileStream(self.cntlr, filepath, stripDeclaration)
コード例 #19
0
ファイル: FileSource.py プロジェクト: camirisk/Arelle
    def open(self, reloadCache=False):
        if not self.isOpen:
            if (self.isZip or self.isTarGz or self.isEis or self.isXfd
                    or self.isRss
                    or self.isInstalledTaxonomyPackage) and self.cntlr:
                self.basefile = self.cntlr.webCache.getfilename(
                    self.url, reload=reloadCache)
            else:
                self.basefile = self.url
            self.baseurl = self.url  # url gets changed by selection
            if not self.basefile:
                return  # an error should have been logged
            if self.isZip:
                try:
                    self.fs = zipfile.ZipFile(openFileStream(
                        self.cntlr, self.basefile, 'rb'),
                                              mode="r")
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    pass
            elif self.isTarGz:
                try:
                    self.fs = tarfile.open(self.basefile, "r:gz")
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    pass
            elif self.isEis:
                # check first line of file
                buf = b''
                try:
                    file = open(self.basefile, 'rb')
                    more = True
                    while more:
                        l = file.read(8)
                        if len(l) < 8:
                            break
                        if len(buf) == 0 and l.startswith(
                                b"<?xml "):  # not compressed
                            buf = l + file.read()  # not compressed
                            break
                        compressedBytes = file.read(
                            struct.unpack(">L", l[0:4])[0])
                        if len(compressedBytes) <= 0:
                            break
                        buf += zlib.decompress(compressedBytes)
                    file.close()
                except EnvironmentError as err:
                    self.logError(err)
                    pass
                #uncomment to save for debugging
                #with open("c:/temp/test.xml", "wb") as f:
                #    f.write(buf)

                if buf.startswith(b"<?xml "):
                    try:
                        # must strip encoding
                        str = buf.decode(XmlUtil.encoding(buf))
                        endEncoding = str.index("?>", 0, 128)
                        if endEncoding > 0:
                            str = str[endEncoding + 2:]
                        file = io.StringIO(initial_value=str)
                        parser = etree.XMLParser(recover=True, huge_tree=True)
                        self.eisDocument = etree.parse(file, parser=parser)
                        file.close()
                        self.isOpen = True
                    except EnvironmentError as err:
                        self.logError(err)
                        return  # provide error message later
                    except etree.LxmlError as err:
                        self.logError(err)
                        return  # provide error message later

            elif self.isXfd:
                # check first line of file
                file = open(self.basefile, 'rb')
                firstline = file.readline()
                if firstline.startswith(
                        b"application/x-xfdl;content-encoding=\"asc-gzip\""):
                    # file has been gzipped
                    base64input = file.read(-1)
                    file.close()
                    file = None

                    fb = base64.b64decode(base64input)
                    ungzippedBytes = b""
                    totalLenUncompr = 0
                    i = 0
                    while i < len(fb):
                        lenCompr = fb[i + 0] * 256 + fb[i + 1]
                        lenUncomp = fb[i + 2] * 256 + fb[i + 3]
                        lenRead = 0
                        totalLenUncompr += lenUncomp

                        gzchunk = (bytes((31, 139, 8, 0)) + fb[i:i + lenCompr])
                        try:
                            with gzip.GzipFile(
                                    fileobj=io.BytesIO(gzchunk)) as gf:
                                while True:
                                    readSize = min(16384, lenUncomp - lenRead)
                                    readBytes = gf.read(size=readSize)
                                    lenRead += len(readBytes)
                                    ungzippedBytes += readBytes
                                    if len(readBytes) == 0 or (lenUncomp -
                                                               lenRead) <= 0:
                                        break
                        except IOError as err:
                            pass  # provide error message later

                        i += lenCompr + 4
                    #for learning the content of xfd file, uncomment this:
                    #with open("c:\\temp\\test.xml", "wb") as fh:
                    #    fh.write(ungzippedBytes)
                    file = io.StringIO(
                        initial_value=ungzippedBytes.decode("utf-8"))
                else:
                    # position to start of file
                    file.seek(0, io.SEEK_SET)

                try:
                    self.xfdDocument = etree.parse(file)
                    file.close()
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    return  # provide error message later
                except etree.LxmlError as err:
                    self.logError(err)
                    return  # provide error message later

            elif self.isRss:
                try:
                    self.rssDocument = etree.parse(self.basefile)
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    return  # provide error message later
                except etree.LxmlError as err:
                    self.logError(err)
                    return  # provide error message later

            elif self.isInstalledTaxonomyPackage:
                self.isOpen = True
                # load mappings
                try:
                    metadataFiles = self.taxonomyPackageMetadataFiles
                    if len(metadataFiles) != 1:
                        raise IOError(
                            _("Taxonomy package must contain one and only one metadata file: {0}."
                              ).format(', '.join(metadataFiles)))
                    # HF: this won't work, see DialogOpenArchive for correct code
                    # not sure if it is used
                    taxonomyPackage = PackageManager.parsePackage(
                        self.cntlr, self.url)
                    fileSourceDir = os.path.dirname(self.baseurl) + os.sep
                    self.mappedPaths = \
                        dict((prefix,
                              remapping if isHttpUrl(remapping)
                              else (fileSourceDir + remapping.replace("/", os.sep)))
                              for prefix, remapping in taxonomyPackage["remappings"].items())
                except EnvironmentError as err:
                    self.logError(err)
                    return  # provide error message later
コード例 #20
0
ファイル: FileSource.py プロジェクト: andygreener/Arelle
    def open(self):
        if not self.isOpen:
            if (self.isZip or self.isEis or self.isXfd
                    or self.isRss) and self.cntlr:
                self.basefile = self.cntlr.webCache.getfilename(self.url)
            else:
                self.basefile = self.url
            self.baseurl = self.url  # url gets changed by selection
            if not self.basefile:
                return  # an error should have been logged
            if self.isZip:
                try:
                    self.fs = zipfile.ZipFile(openFileStream(
                        self.cntlr, self.basefile, 'rb'),
                                              mode="r")
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    pass
            elif self.isEis:
                # check first line of file
                buf = b''
                try:
                    file = open(self.basefile, 'rb')
                    more = True
                    while more:
                        l = file.read(8)
                        if len(l) < 8:
                            break
                        if len(buf) == 0 and l.startswith(
                                b"<?xml "):  # not compressed
                            buf = l + file.read()  # not compressed
                            break
                        compressedBytes = file.read(
                            struct.unpack(">L", l[0:4])[0])
                        if len(compressedBytes) <= 0:
                            break
                        buf += zlib.decompress(compressedBytes)
                    file.close()
                except EnvironmentError as err:
                    self.logError(err)
                    pass
                #uncomment to save for debugging
                #with open("c:/temp/test.xml", "wb") as f:
                #    f.write(buf)

                if buf.startswith(b"<?xml "):
                    try:
                        # must strip encoding
                        str = buf.decode(XmlUtil.encoding(buf))
                        endEncoding = str.index("?>", 0, 128)
                        if endEncoding > 0:
                            str = str[endEncoding + 2:]
                        file = io.StringIO(initial_value=str)
                        parser = etree.XMLParser(recover=True, huge_tree=True)
                        self.eisDocument = etree.parse(file, parser=parser)
                        file.close()
                        self.isOpen = True
                    except EnvironmentError as err:
                        self.logError(err)
                        return  # provide error message later
                    except etree.LxmlError as err:
                        self.logError(err)
                        return  # provide error message later

            elif self.isXfd:
                # check first line of file
                file = open(self.basefile, 'rb')
                firstline = file.readline()
                if firstline.startswith(
                        b"application/x-xfdl;content-encoding=\"asc-gzip\""):
                    # file has been gzipped
                    base64input = file.read(-1)
                    file.close()
                    file = None

                    fb = base64.b64decode(base64input)
                    ungzippedBytes = b""
                    totalLenUncompr = 0
                    i = 0
                    while i < len(fb):
                        lenCompr = fb[i + 0] * 256 + fb[i + 1]
                        lenUncomp = fb[i + 2] * 256 + fb[i + 3]
                        lenRead = 0
                        totalLenUncompr += lenUncomp

                        gzchunk = (bytes((31, 139, 8, 0)) + fb[i:i + lenCompr])
                        try:
                            with gzip.GzipFile(
                                    fileobj=io.BytesIO(gzchunk)) as gf:
                                while True:
                                    readSize = min(16384, lenUncomp - lenRead)
                                    readBytes = gf.read(size=readSize)
                                    lenRead += len(readBytes)
                                    ungzippedBytes += readBytes
                                    if len(readBytes) == 0 or (lenUncomp -
                                                               lenRead) <= 0:
                                        break
                        except IOError as err:
                            pass  # provide error message later

                        i += lenCompr + 4
                    #for learning the content of xfd file, uncomment this:
                    #with open("c:\\temp\\test.xml", "wb") as fh:
                    #    fh.write(ungzippedBytes)
                    file = io.StringIO(
                        initial_value=ungzippedBytes.decode("utf-8"))
                else:
                    # position to start of file
                    file.seek(0, io.SEEK_SET)

                try:
                    self.xfdDocument = etree.parse(file)
                    file.close()
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    return  # provide error message later
                except etree.LxmlError as err:
                    self.logError(err)
                    return  # provide error message later

            elif self.isRss:
                try:
                    self.rssDocument = etree.parse(self.basefile)
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    return  # provide error message later
                except etree.LxmlError as err:
                    self.logError(err)
                    return  # provide error message later
コード例 #21
0
def streamingExtensionsLoader(modelXbrl, mappedUri, filepath, *args, **kwargs):
    # check if big instance and has header with an initial incomplete tree walk (just 2 elements
    if not _streamingExtensionsCheck:
        return None

    # track whether modelXbrl has been validated by this streaming extension
    modelXbrl._streamingExtensionValidated = False

    def logSyntaxErrors(parsercontext):
        for error in parsercontext.error_log:
            modelXbrl.error(
                "xmlSchema:syntax",
                _("%(error)s, %(fileName)s, line %(line)s, column %(column)s, %(sourceAction)s source element"
                  ),
                modelObject=modelXbrl,
                fileName=os.path.basename(filepath),
                error=error.message,
                line=error.line,
                column=error.column,
                sourceAction="streaming")

    #### note: written for iterparse of lxml prior to version 3.3, otherwise rewrite to use XmlPullParser ###
    #### note: iterparse wants a binary file, but file is text mode
    _file, = modelXbrl.fileSource.file(filepath, binary=True)
    startedAt = time.time()
    modelXbrl.profileActivity()
    ''' this seems twice as slow as iterparse
    class instInfoTarget():
        def __init__(self, element_factory=None, parser=None):
            self.newTree = True
            self.streamingAspects = None
            self.foundInstance = False
            self.creationSoftwareComment = ''
            self.currentEltTag = "(before xbrli:xbrl)"
            self.numRootFacts = 0
        def start(self, tag, attrib, nsmap=None):
            if self.newTree:
                if tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    self.foundInstance = True
                    self.newTree = False
                else: # break 
                    raise NotInstanceDocumentException()
            elif not tag.startswith("{http://www.xbrl.org/"):
                self.numRootFacts += 1
                if self.numRootFacts % 1000 == 0:
                    modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0)
            self.currentEltTag = tag
        def end(self, tag):
            pass
        def data(self, data):
            pass
        def comment(self, text):
            if not self.foundInstance: # accumulate comments before xbrli:xbrl
                self.creationSoftwareComment += ('\n' if self.creationSoftwareComment else '') + text
            elif not self.creationSoftwareComment:
                self.creationSoftwareComment = text # or first comment after xbrli:xbrl
        def pi(self, target, data):
            if target == "xbrl-streamable-instance":
                if self.currentEltTag == "{http://www.xbrl.org/2003/instance}xbrl":
                    self.streamingAspects = dict(etree.PI(target,data).attrib.copy()) # dereference target results
                else:
                    modelXbrl.error("streamingExtensions:headerMisplaced",
                            _("Header is misplaced: %(target)s, must follow xbrli:xbrl element but was found at %(element)s"),
                            modelObject=modelXbrl, target=target, element=self.currentEltTag)
        def close(self):
            if not self.creationSoftwareComment:
                self.creationSoftwareComment = None
            return True
    instInfo = instInfoTarget()
    infoParser = etree.XMLParser(recover=True, huge_tree=True, target=instInfo)
    try:
        etree.parse(_file, parser=infoParser, base_url=filepath)
    except NotInstanceDocumentException:
        pass
    '''
    foundErrors = False
    foundInstance = False
    streamingAspects = None
    creationSoftwareComment = None
    instInfoNumRootFacts = 0
    numElts = 0
    elt = None
    instInfoContext = etree.iterparse(_file,
                                      events=("start", "end"),
                                      huge_tree=True)
    try:
        for event, elt in instInfoContext:
            if event == "start":
                if elt.getparent() is not None:
                    if elt.getparent(
                    ).tag == "{http://www.xbrl.org/2003/instance}xbrl":
                        if not foundInstance:
                            foundInstance = True
                            pi = precedingProcessingInstruction(
                                elt, "xbrl-streamable-instance")
                            if pi is None:
                                break
                            else:
                                streamingAspects = dict(pi.attrib.copy())
                                if creationSoftwareComment is None:
                                    creationSoftwareComment = precedingComment(
                                        elt)
                        if not elt.tag.startswith("{http://www.xbrl.org/"):
                            instInfoNumRootFacts += 1
                            if instInfoNumRootFacts % 1000 == 0:
                                modelXbrl.profileActivity(
                                    "... streaming tree check",
                                    minTimeToShow=20.0)
                    elif not foundInstance:
                        break
                elif elt.tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    creationSoftwareComment = precedingComment(elt)
                    if precedingProcessingInstruction(
                            elt, "xbrl-streamable-instance") is not None:
                        modelXbrl.error(
                            "streamingExtensions:headerMisplaced",
                            _("Header is misplaced: %(error)s, must follow xbrli:xbrl element"
                              ),
                            modelObject=elt)
            elif event == "end":
                elt.clear()
                numElts += 1
                if numElts % 1000 == 0 and elt.getparent() is not None:
                    while elt.getprevious() is not None and elt.getparent(
                    ) is not None:
                        del elt.getparent()[0]
    except etree.XMLSyntaxError as err:
        modelXbrl.error("xmlSchema:syntax",
                        _("Unrecoverable error: %(error)s"),
                        error=err)
        _file.close()
        return err

    _file.seek(0, io.SEEK_SET)  # allow reparsing
    if not foundInstance or streamingAspects is None:
        del elt
        _file.close()
        return None
    modelXbrl.profileStat(_("streaming tree check"), time.time() - startedAt)
    startedAt = time.time()
    try:
        version = Decimal(streamingAspects.get("version"))
        if int(version) != 1:
            modelXbrl.error(
                "streamingExtensions:unsupportedVersion",
                _("Streaming version %(version)s, major version number must be 1"
                  ),
                modelObject=elt,
                version=version)
            foundErrors = True
    except (InvalidOperation, OverflowError):
        modelXbrl.error("streamingExtensions:versionError",
                        _("Version %(version)s, number must be 1.n"),
                        modelObject=elt,
                        version=streamingAspects.get("version", "(none)"))
        foundErrors = True
    for bufAspect in ("contextBuffer", "unitBuffer", "footnoteBuffer"):
        try:
            bufLimit = Decimal(streamingAspects.get(bufAspect, "INF"))
            if bufLimit < 1 or (bufLimit.is_finite() and bufLimit % 1 != 0):
                raise InvalidOperation
            elif bufAspect == "contextBuffer":
                contextBufferLimit = bufLimit
            elif bufAspect == "unitBuffer":
                unitBufferLimit = bufLimit
            elif bufAspect == "footnoteBuffer":
                footnoteBufferLimit = bufLimit
        except InvalidOperation:
            modelXbrl.error(
                "streamingExtensions:valueError",
                _("Streaming %(attrib)s %(value)s, number must be a positive integer or INF"
                  ),
                modelObject=elt,
                attrib=bufAspect,
                value=streamingAspects.get(bufAspect))
            foundErrors = True
    if _streamingExtensionsValidate:
        incompatibleValidations = []
        _validateDisclosureSystem = modelXbrl.modelManager.validateDisclosureSystem
        _disclosureSystem = modelXbrl.modelManager.disclosureSystem
        if _validateDisclosureSystem and _disclosureSystem.validationType == "EFM":
            incompatibleValidations.append("EFM")
        if _validateDisclosureSystem and _disclosureSystem.validationType == "GFM":
            incompatibleValidations.append("GFM")
        if _validateDisclosureSystem and _disclosureSystem.validationType == "HMRC":
            incompatibleValidations.append("HMRC")
        if modelXbrl.modelManager.validateCalcLB:
            incompatibleValidations.append("calculation LB")
        if incompatibleValidations:
            modelXbrl.error(
                "streamingExtensions:incompatibleValidation",
                _("Streaming instance validation does not support %(incompatibleValidations)s validation"
                  ),
                modelObject=modelXbrl,
                incompatibleValidations=', '.join(incompatibleValidations))
            foundErrors = True
    if instInfoContext.error_log:
        foundErrors = True
    logSyntaxErrors(instInfoContext)
    del instInfoContext  # dereference

    for pluginMethod in pluginClassMethods("Streaming.BlockStreaming"):
        _blockingPluginName = pluginMethod(modelXbrl)
        if _blockingPluginName:  # name of blocking plugin is returned
            modelXbrl.error(
                "streamingExtensions:incompatiblePlugIn",
                _("Streaming instance not supported by plugin %(blockingPlugin)s"
                  ),
                modelObject=modelXbrl,
                blockingPlugin=_blockingPluginName)
            foundErrors = True

    if foundErrors:
        _file.close()
        return None

    _encoding = XmlUtil.encoding(_file.read(512))
    _file.seek(0, io.SEEK_SET)  # allow reparsing

    if _streamingExtensionsValidate:
        validator = Validate(modelXbrl)
        instValidator = validator.instValidator

    contextBuffer = []
    contextsToDrop = []
    unitBuffer = []
    unitsToDrop = []
    footnoteBuffer = []
    footnoteLinksToDrop = []

    _streamingFactsPlugin = any(
        True for pluginMethod in pluginClassMethods("Streaming.Facts"))
    _streamingValidateFactsPlugin = (_streamingExtensionsValidate and any(
        True
        for pluginMethod in pluginClassMethods("Streaming.ValidateFacts")))
    ''' this is very much slower than iterparse
    class modelLoaderTarget():
        def __init__(self, element_factory=None, parser=None):
            self.newTree = True
            self.currentMdlObj = None
            self.beforeInstanceStream = True
            self.beforeStartStreamingPlugin = True
            self.numRootFacts = 1
            modelXbrl.makeelementParentModelObject = None
            modelXbrl.isStreamingMode = True
            self.factsCheckVersion = None
            self.factsCheckMd5s = Md5Sum()
        def start(self, tag, attrib, nsmap=None):
            modelXbrl.makeelementParentModelObject = self.currentMdlObj # pass parent to makeelement for ModelObjectFactory
            mdlObj = _parser.makeelement(tag, attrib=attrib, nsmap=nsmap)
            mdlObj.sourceline = 1
            if self.newTree:
                self.newTree = False
                self.currentMdlObj = mdlObj
                modelDocument = ModelDocument(modelXbrl, Type.INSTANCE, mappedUri, filepath, mdlObj.getroottree())
                modelXbrl.modelDocument = modelDocument # needed for incremental validation
                mdlObj.init(modelDocument)
                modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement 
                modelDocument.parserLookupName = _parserLookupName
                modelDocument.parserLookupClass = _parserLookupClass
                modelDocument.xmlRootElement = mdlObj
                modelDocument.schemaLocationElements.add(mdlObj)
                modelDocument.documentEncoding = _encoding
                modelDocument._creationSoftwareComment = creationSoftwareComment
                modelXbrl.info("streamingExtensions:streaming",
                               _("Stream processing this instance."),
                               modelObject = modelDocument)
            else:
                self.currentMdlObj.append(mdlObj)
                self.currentMdlObj = mdlObj
                mdlObj._init()
                ns = mdlObj.namespaceURI
                ln = mdlObj.localName
                if (self.beforeInstanceStream and (
                    (ns == XbrlConst.link and ln not in ("schemaRef", "linkbaseRef")) or
                    (ns == XbrlConst.xbrli and ln in ("context", "unit")) or
                    (ns not in (XbrlConst.link, XbrlConst.xbrli)))):
                    self.beforeInstanceStream = False
                    if _streamingExtensionsValidate:
                        instValidator.validate(modelXbrl, modelXbrl.modelManager.formulaOptions.typedParameters(modelXbrl.prefixedNamespaces))
                    else: # need default dimensions
                        ValidateXbrlDimensions.loadDimensionDefaults(modelXbrl)
                elif not self.beforeInstanceStream and self.beforeStartStreamingPlugin:
                    for pluginMethod in pluginClassMethods("Streaming.Start"):
                        pluginMethod(modelXbrl)
                    self.beforeStartStreamingPlugin = False
            return mdlObj
        def end(self, tag):
            modelDocument = modelXbrl.modelDocument
            mdlObj = self.currentMdlObj
            parentMdlObj = mdlObj.getparent()
            self.currentMdlObj = parentMdlObj
            ns = mdlObj.namespaceURI
            ln = mdlObj.localName
            if ns == XbrlConst.xbrli:
                if ln == "context":
                    if mdlObj.get("sticky"):
                        del mdlObj.attrib["sticky"]
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                    else:
                        if _streamingExtensionsValidate and len(contextBuffer) >= contextBufferLimit:
                            # drop before adding as dropped may have same id as added
                            cntx = contextBuffer.pop(0)
                            if _streamingValidateFactsPlugin:
                                contextsToDrop.append(cntx)
                            else:
                                dropContext(modelXbrl, cntx)
                                del parentMdlObj[parentMdlObj.index(cntx)]
                            cntx = None
                        #>>XmlValidate.validate(modelXbrl, mdlObj)
                        #>>modelDocument.contextDiscover(mdlObj)
                        if contextBufferLimit.is_finite():
                            contextBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        contextsToCheck = (mdlObj,)
                        instValidator.checkContexts(contextsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkContextsDimensions(contextsToCheck)
                        del contextsToCheck # dereference
                elif ln == "unit":
                    if _streamingExtensionsValidate and len(unitBuffer) >= unitBufferLimit:
                        # drop before adding as dropped may have same id as added
                        unit = unitBuffer.pop(0)
                        if _streamingValidateFactsPlugin:
                            unitsToDrop.append(unit)
                        else:
                            dropUnit(modelXbrl, unit)
                            del parentMdlObj[parentMdlObj.index(unit)]
                        unit = None 
                    #>>XmlValidate.validate(modelXbrl, mdlObj)
                    #>>modelDocument.unitDiscover(mdlObj)
                    if unitBufferLimit.is_finite():
                        unitBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkUnits( (mdlObj,) )
                elif ln == "xbrl": # end of document
                    # check remaining batched facts if any
                    if _streamingValidateFactsPlugin:
                        # plugin attempts to process batch of all root facts not yet processed (not just current one)
                        # finish any final batch of facts
                        if len(modelXbrl.facts) > 0:
                            factsToCheck = modelXbrl.facts.copy()
                            factsHaveBeenProcessed = True
                            # can block facts deletion if required data not yet available, such as numeric unit for DpmDB
                            for pluginMethod in pluginClassMethods("Streaming.ValidateFacts"):
                                if not pluginMethod(modelXbrl, factsToCheck):
                                    factsHaveBeenProcessed = False
                            if factsHaveBeenProcessed:
                                for fact in factsToCheck:
                                    dropFact(modelXbrl, fact, modelXbrl.facts)
                                    del parentMdlObj[parentMdlObj.index(fact)]
                                for cntx in contextsToDrop:
                                    dropContext(modelXbrl, cntx)
                                    del parentMdlObj[parentMdlObj.index(cntx)]
                                for unit in unitsToDrop:
                                    dropUnit(modelXbrl, unit)
                                    del parentMdlObj[parentMdlObj.index(unit)]
                                for footnoteLink in footnoteLinksToDrop:
                                    dropFootnoteLink(modelXbrl, footnoteLink)
                                    del parentMdlObj[parentMdlObj.index(footnoteLink)]
                                fact = cntx = unit = footnoteLink = None
                                del contextsToDrop[:]
                                del unitsToDrop[:]
                                del footnoteLinksToDrop[:]
                            del factsToCheck
                    # check remaining footnote refs
                    for footnoteLink in footnoteBuffer:
                        checkFootnoteHrefs(modelXbrl, footnoteLink)
                    for pluginMethod in pluginClassMethods("Streaming.Finish"):
                        pluginMethod(modelXbrl)
            elif ns == XbrlConst.link:
                if ln == "footnoteLink":
                    XmlValidate.validate(modelXbrl, mdlObj)
                    footnoteLinks = (mdlObj,)
                    modelDocument.linkbaseDiscover(footnoteLinks, inInstance=True)
                    if footnoteBufferLimit.is_finite():
                        footnoteBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkLinks(footnoteLinks)
                        if len(footnoteBuffer) > footnoteBufferLimit:
                            # check that hrefObjects for locators were all satisfied
                                # drop before addition as dropped may have same id as added
                            footnoteLink = footnoteBuffer.pop(0)
                            checkFootnoteHrefs(modelXbrl, footnoteLink)
                            if _streamingValidateFactsPlugin:
                                footnoteLinksToDrop.append(footnoteLink)
                            else:
                                dropFootnoteLink(modelXbrl, footnoteLink)
                                del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            footnoteLink = None
                    footnoteLinks = None
                elif ln in ("schemaRef", "linkbaseRef"):
                    modelDocument.discoverHref(mdlObj)
                elif not modelXbrl.skipDTS:
                    if ln in ("roleRef", "arcroleRef"):
                        modelDocument.linkbaseDiscover((mdlObj,), inInstance=True)
            elif parentMdlObj.qname == XbrlConst.qnXbrliXbrl:
                self.numRootFacts += 1
                #>>XmlValidate.validate(modelXbrl, mdlObj)
                #>>modelDocument.factDiscover(mdlObj, modelXbrl.facts)
                if self.factsCheckVersion:
                    self.factCheckFact(mdlObj)
                if _streamingExtensionsValidate or _streamingValidateFactsPlugin:
                    factsToCheck = (mdlObj,)  # validate current fact by itself
                    if _streamingExtensionsValidate:
                        instValidator.checkFacts(factsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkFactsDimensions(factsToCheck)
                    if _streamingValidateFactsPlugin:
                        # plugin attempts to process batch of all root facts not yet processed (not just current one)
                        # use batches of 1000 facts
                        if len(modelXbrl.facts) > 1000:
                            factsToCheck = modelXbrl.facts.copy()
                            factsHaveBeenProcessed = True
                            # can block facts deletion if required data not yet available, such as numeric unit for DpmDB
                            for pluginMethod in pluginClassMethods("Streaming.ValidateFacts"):
                                if not pluginMethod(modelXbrl, factsToCheck):
                                    factsHaveBeenProcessed = False
                            if factsHaveBeenProcessed:
                                for fact in factsToCheck:
                                    dropFact(modelXbrl, fact, modelXbrl.facts)
                                    del parentMdlObj[parentMdlObj.index(fact)]
                                for cntx in contextsToDrop:
                                    dropContext(modelXbrl, cntx)
                                    del parentMdlObj[parentMdlObj.index(cntx)]
                                for unit in unitsToDrop:
                                    dropUnit(modelXbrl, unit)
                                    del parentMdlObj[parentMdlObj.index(unit)]
                                for footnoteLink in footnoteLinksToDrop:
                                    dropFootnoteLink(modelXbrl, footnoteLink)
                                    del parentMdlObj[parentMdlObj.index(footnoteLink)]
                                fact = cntx = unit = footnoteLink = None
                                del contextsToDrop[:]
                                del unitsToDrop[:]
                                del footnoteLinksToDrop[:]
                            del factsToCheck # dereference fact or batch of facts
                    else:
                        dropFact(modelXbrl, mdlObj, modelXbrl.facts) # single fact has been processed
                        del parentMdlObj[parentMdlObj.index(mdlObj)]
                if self.numRootFacts % 1000 == 0:
                    pass
                    #modelXbrl.profileActivity("... streaming fact {0} of {1} {2:.2f}%".format(self.numRootFacts, instInfoNumRootFacts, 
                    #                                                                          100.0 * self.numRootFacts / instInfoNumRootFacts), 
                    #                          minTimeToShow=20.0)
                    gc.collect()
                    sys.stdout.write ("\rAt fact {} of {} mem {}".format(self.numRootFacts, instInfoNumRootFacts, modelXbrl.modelManager.cntlr.memoryUsed))
            return mdlObj
        def data(self, data):
            self.currentMdlObj.text = data
        def comment(self, text):
            pass
        def pi(self, target, data):
            if target == "xbrl-facts-check":
                _match = re.search("([\\w-]+)=[\"']([^\"']+)[\"']", data)
                if _match:
                    _matchGroups = _match.groups()
                    if len(_matchGroups) == 2:
                        if _matchGroups[0] == "version":
                            self.factsCheckVersion = _matchGroups[1]
                        elif _matchGroups[0] == "sum-of-fact-md5s":
                            try:
                                expectedMd5 = Md5Sum(_matchGroups[1])
                                if self.factsCheckMd5s != expectedMd5:
                                    modelXbrl.warning("streamingExtensions:xbrlFactsCheckWarning",
                                            _("XBRL facts sum of md5s expected %(expectedMd5)s not matched to actual sum %(actualMd5Sum)s"),
                                            modelObject=modelXbrl, expectedMd5=expectedMd5, actualMd5Sum=self.factsCheckMd5s)
                                else:
                                    modelXbrl.info("info",
                                            _("Successful XBRL facts sum of md5s."),
                                            modelObject=modelXbrl)
                            except ValueError:
                                modelXbrl.error("streamingExtensions:xbrlFactsCheckError",
                                        _("Invalid sum-of-md5s %(sumOfMd5)s"),
                                        modelObject=modelXbrl, sumOfMd5=_matchGroups[1])
        def close(self):
            del modelXbrl.makeelementParentModelObject
            return None
        
        def factCheckFact(self, fact):
            self.factsCheckMd5s += fact.md5sum
            for _tupleFact in fact.modelTupleFacts:
                self.factCheckFact(_tupleFact)
        
    _parser, _parserLookupName, _parserLookupClass = parser(modelXbrl, filepath, target=modelLoaderTarget())
    etree.parse(_file, parser=_parser, base_url=filepath)
    logSyntaxErrors(_parser)
    '''
    # replace modelLoaderTarget with iterparse (as it now supports CustomElementClassLookup)
    streamingParserContext = etree.iterparse(_file,
                                             events=("start", "end"),
                                             huge_tree=True)
    from arelle.ModelObjectFactory import setParserElementClassLookup
    modelXbrl.isStreamingMode = True  # must be set before setting element class lookup
    (_parser, _parserLookupName,
     _parserLookupClass) = setParserElementClassLookup(streamingParserContext,
                                                       modelXbrl)
    foundInstance = False
    beforeInstanceStream = beforeStartStreamingPlugin = True
    numRootFacts = 0
    factsCheckVersion = None

    def factCheckFact(fact):
        modelDocument._factsCheckMd5s += fact.md5sum
        for _tupleFact in fact.modelTupleFacts:
            factCheckFact(_tupleFact)

    for event, mdlObj in streamingParserContext:
        if event == "start":
            if mdlObj.tag == "{http://www.xbrl.org/2003/instance}xbrl":
                modelDocument = ModelDocument(modelXbrl, Type.INSTANCE,
                                              mappedUri, filepath,
                                              mdlObj.getroottree())
                modelXbrl.modelDocument = modelDocument  # needed for incremental validation
                mdlObj.init(modelDocument)
                modelDocument.parser = _parser  # needed for XmlUtil addChild's makeelement
                modelDocument.parserLookupName = _parserLookupName
                modelDocument.parserLookupClass = _parserLookupClass
                modelDocument.xmlRootElement = mdlObj
                modelDocument.schemaLocationElements.add(mdlObj)
                modelDocument.documentEncoding = _encoding
                modelDocument._creationSoftwareComment = precedingComment(
                    mdlObj)
                modelDocument._factsCheckMd5s = Md5Sum()
                modelXbrl.info("streamingExtensions:streaming",
                               _("Stream processing this instance."),
                               modelObject=modelDocument)
            elif mdlObj.getparent() is not None:
                mdlObj._init()  # requires discovery as part of start elements
                if mdlObj.getparent(
                ).tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    if not foundInstance:
                        foundInstance = True
                        pi = precedingProcessingInstruction(
                            mdlObj, "xbrl-facts-check")
                        if pi is not None:
                            factsCheckVersion = pi.attrib.get("version", None)
                elif not foundInstance:
                    break
                ns = mdlObj.qname.namespaceURI
                ln = mdlObj.qname.localName
                if beforeInstanceStream:
                    if ((ns == XbrlConst.link
                         and ln not in ("schemaRef", "linkbaseRef")) or
                        (ns == XbrlConst.xbrli and ln in ("context", "unit"))
                            or (ns not in (XbrlConst.link, XbrlConst.xbrli))):
                        beforeInstanceStream = False
                        if _streamingExtensionsValidate:
                            instValidator.validate(
                                modelXbrl,
                                modelXbrl.modelManager.formulaOptions.
                                typedParameters(modelXbrl.prefixedNamespaces))
                        else:  # need default dimensions
                            ValidateXbrlDimensions.loadDimensionDefaults(
                                modelXbrl)
                elif not beforeInstanceStream and beforeStartStreamingPlugin:
                    for pluginMethod in pluginClassMethods("Streaming.Start"):
                        pluginMethod(modelXbrl)
                    beforeStartStreamingPlugin = False
        elif event == "end":
            parentMdlObj = mdlObj.getparent()
            ns = mdlObj.namespaceURI
            ln = mdlObj.localName
            if ns == XbrlConst.xbrli:
                if ln == "context":
                    if mdlObj.get("sticky"):
                        del mdlObj.attrib["sticky"]
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                    else:
                        if len(contextBuffer) >= contextBufferLimit:
                            # drop before adding as dropped may have same id as added
                            cntx = contextBuffer.pop(0)
                            if _streamingFactsPlugin or _streamingValidateFactsPlugin:
                                contextsToDrop.append(cntx)
                            else:
                                dropContext(modelXbrl, cntx)
                                #>>del parentMdlObj[parentMdlObj.index(cntx)]
                            cntx = None
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                        if contextBufferLimit.is_finite():
                            contextBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        contextsToCheck = (mdlObj, )
                        instValidator.checkContexts(contextsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkContextsDimensions(
                                contextsToCheck)
                        del contextsToCheck  # dereference
                elif ln == "unit":
                    if len(unitBuffer) >= unitBufferLimit:
                        # drop before additing as dropped may have same id as added
                        unit = unitBuffer.pop(0)
                        if _streamingFactsPlugin or _streamingValidateFactsPlugin:
                            unitsToDrop.append(unit)
                        else:
                            dropUnit(modelXbrl, unit)
                            #>>del parentMdlObj[parentMdlObj.index(unit)]
                        unit = None
                    XmlValidate.validate(modelXbrl, mdlObj)
                    modelDocument.unitDiscover(mdlObj)
                    if unitBufferLimit.is_finite():
                        unitBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkUnits((mdlObj, ))
                elif ln == "xbrl":  # end of document
                    # check remaining batched facts if any
                    if _streamingFactsPlugin or _streamingValidateFactsPlugin:
                        # plugin attempts to process batch of all root facts not yet processed (not just current one)
                        # finish any final batch of facts
                        if len(modelXbrl.facts) > 0:
                            factsToCheck = modelXbrl.facts.copy()
                            # can block facts deletion if required data not yet available, such as numeric unit for DpmDB
                            if _streamingValidateFactsPlugin:
                                for pluginMethod in pluginClassMethods(
                                        "Streaming.ValidateFacts"):
                                    pluginMethod(instValidator, factsToCheck)
                            if _streamingFactsPlugin:
                                for pluginMethod in pluginClassMethods(
                                        "Streaming.Facts"):
                                    pluginMethod(modelXbrl, factsToCheck)
                            for fact in factsToCheck:
                                dropFact(modelXbrl, fact, modelXbrl.facts)
                                #>>del parentMdlObj[parentMdlObj.index(fact)]
                            for cntx in contextsToDrop:
                                dropContext(modelXbrl, cntx)
                                #>>del parentMdlObj[parentMdlObj.index(cntx)]
                            for unit in unitsToDrop:
                                dropUnit(modelXbrl, unit)
                                #>>del parentMdlObj[parentMdlObj.index(unit)]
                            for footnoteLink in footnoteLinksToDrop:
                                dropFootnoteLink(modelXbrl, footnoteLink)
                                #>>del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            fact = cntx = unit = footnoteLink = None
                            del contextsToDrop[:]
                            del unitsToDrop[:]
                            del footnoteLinksToDrop[:]
                            del factsToCheck
                    # check remaining footnote refs
                    for footnoteLink in footnoteBuffer:
                        checkFootnoteHrefs(modelXbrl, footnoteLink)
                    pi = childProcessingInstruction(mdlObj,
                                                    "xbrl-facts-check",
                                                    reversed=True)
                    if pi is not None:  # attrib is in .text, not attrib, no idea why!!!
                        _match = re.search("([\\w-]+)=[\"']([^\"']+)[\"']",
                                           pi.text)
                        if _match:
                            _matchGroups = _match.groups()
                            if len(_matchGroups) == 2:
                                if _matchGroups[0] == "sum-of-fact-md5s":
                                    try:
                                        expectedMd5 = Md5Sum(_matchGroups[1])
                                        if modelDocument._factsCheckMd5s != expectedMd5:
                                            modelXbrl.warning(
                                                "streamingExtensions:xbrlFactsCheckWarning",
                                                _("XBRL facts sum of md5s expected %(expectedMd5)s not matched to actual sum %(actualMd5Sum)s"
                                                  ),
                                                modelObject=modelXbrl,
                                                expectedMd5=expectedMd5,
                                                actualMd5Sum=modelDocument.
                                                _factsCheckMd5s)
                                        else:
                                            modelXbrl.info(
                                                "info",
                                                _("Successful XBRL facts sum of md5s."
                                                  ),
                                                modelObject=modelXbrl)
                                    except ValueError:
                                        modelXbrl.error(
                                            "streamingExtensions:xbrlFactsCheckError",
                                            _("Invalid sum-of-md5s %(sumOfMd5)s"
                                              ),
                                            modelObject=modelXbrl,
                                            sumOfMd5=_matchGroups[1])
                    if _streamingValidateFactsPlugin:
                        for pluginMethod in pluginClassMethods(
                                "Streaming.ValidateFinish"):
                            pluginMethod(instValidator)
                    if _streamingFactsPlugin:
                        for pluginMethod in pluginClassMethods(
                                "Streaming.Finish"):
                            pluginMethod(modelXbrl)
            elif ns == XbrlConst.link:
                if ln in ("schemaRef", "linkbaseRef"):
                    modelDocument.discoverHref(
                        mdlObj,
                        urlRewritePluginClass=
                        "ModelDocument.InstanceSchemaRefRewriter")
                elif ln in ("roleRef", "arcroleRef"):
                    modelDocument.linkbaseDiscover((mdlObj, ), inInstance=True)
                elif ln == "footnoteLink":
                    XmlValidate.validate(modelXbrl, mdlObj)
                    footnoteLinks = (mdlObj, )
                    modelDocument.linkbaseDiscover(footnoteLinks,
                                                   inInstance=True)
                    if footnoteBufferLimit.is_finite():
                        footnoteBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkLinks(footnoteLinks)
                        if len(footnoteBuffer) > footnoteBufferLimit:
                            # check that hrefObjects for locators were all satisfied
                            # drop before addition as dropped may have same id as added
                            footnoteLink = footnoteBuffer.pop(0)
                            checkFootnoteHrefs(modelXbrl, footnoteLink)
                            if _streamingValidateFactsPlugin:
                                footnoteLinksToDrop.append(footnoteLink)
                            else:
                                dropFootnoteLink(modelXbrl, footnoteLink)
                                #>>del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            footnoteLink = None
                    footnoteLinks = None
            elif parentMdlObj.qname == XbrlConst.qnXbrliXbrl and isinstance(
                    mdlObj, ModelFact):
                numRootFacts += 1
                XmlValidate.validate(modelXbrl, mdlObj)
                modelDocument.factDiscover(mdlObj, modelXbrl.facts)
                if factsCheckVersion:
                    factCheckFact(mdlObj)
                if _streamingExtensionsValidate or _streamingFactsPlugin or _streamingValidateFactsPlugin:
                    factsToCheck = (mdlObj,
                                    )  # validate current fact by itself
                    if _streamingExtensionsValidate:
                        instValidator.checkFacts(factsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkFactsDimensions(factsToCheck)
                    if _streamingFactsPlugin or _streamingValidateFactsPlugin:
                        # plugin attempts to process batch of all root facts not yet processed (not just current one)
                        # use batches of 1000 facts
                        if len(modelXbrl.facts) > 1000:
                            factsToCheck = modelXbrl.facts.copy()
                            # can block facts deletion if required data not yet available, such as numeric unit for DpmDB
                            if _streamingValidateFactsPlugin:
                                for pluginMethod in pluginClassMethods(
                                        "Streaming.ValidateFacts"):
                                    pluginMethod(instValidator, factsToCheck)
                            if _streamingFactsPlugin:
                                for pluginMethod in pluginClassMethods(
                                        "Streaming.Facts"):
                                    pluginMethod(modelXbrl, factsToCheck)
                            for fact in factsToCheck:
                                dropFact(modelXbrl, fact, modelXbrl.facts)
                                #>>del parentMdlObj[parentMdlObj.index(fact)]
                            for cntx in contextsToDrop:
                                dropContext(modelXbrl, cntx)
                                #>>del parentMdlObj[parentMdlObj.index(cntx)]
                            for unit in unitsToDrop:
                                dropUnit(modelXbrl, unit)
                                #>>del parentMdlObj[parentMdlObj.index(unit)]
                            for footnoteLink in footnoteLinksToDrop:
                                dropFootnoteLink(modelXbrl, footnoteLink)
                                #>>del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            fact = cntx = unit = footnoteLink = None
                            del contextsToDrop[:]
                            del unitsToDrop[:]
                            del footnoteLinksToDrop[:]
                            del factsToCheck  # dereference fact or batch of facts
                    else:
                        dropFact(
                            modelXbrl, mdlObj,
                            modelXbrl.facts)  # single fact has been processed
                        #>>del parentMdlObj[parentMdlObj.index(mdlObj)]
                if numRootFacts % 1000 == 0:
                    pass
                    #modelXbrl.profileActivity("... streaming fact {0} of {1} {2:.2f}%".format(self.numRootFacts, instInfoNumRootFacts,
                    #                                                                          100.0 * self.numRootFacts / instInfoNumRootFacts),
                    #                          minTimeToShow=20.0)
                    #gc.collect()
                    #sys.stdout.write ("\rAt fact {} of {} mem {}".format(numRootFacts, instInfoNumRootFacts, modelXbrl.modelManager.cntlr.memoryUsed))
    if mdlObj is not None:
        mdlObj.clear()
    del _parser, _parserLookupName, _parserLookupClass

    if _streamingExtensionsValidate and validator is not None:
        _file.close()
        del instValidator
        validator.close()
        # track that modelXbrl has been validated by this streaming extension
        modelXbrl._streamingExtensionValidated = True

    modelXbrl.profileStat(_("streaming complete"), time.time() - startedAt)
    return modelXbrl.modelDocument
コード例 #22
0
ファイル: FileSource.py プロジェクト: benrosemeyer-wf/Arelle
    def open(self):
        if not self.isOpen:
            if (self.isZip or self.isEis or self.isXfd or self.isRss) and self.cntlr:
                self.basefile = self.cntlr.webCache.getfilename(self.url)
            else:
                self.basefile = self.url
            self.baseurl = self.url # url gets changed by selection
            if not self.basefile:
                return  # an error should have been logged
            if self.isZip:
                self.fs = zipfile.ZipFile(self.basefile, mode="r")
                self.isOpen = True    
            elif self.isEis:
                # check first line of file
                buf = b''
                try:
                    file = open(self.basefile, 'rb')
                    more = True
                    while more:
                        l = file.read(8)
                        if len(l) < 8:
                            break
                        if len(buf) == 0 and l.startswith(b"<?xml "): # not compressed
                            buf = l + file.read()  # not compressed
                            break
                        compressedBytes = file.read( struct.unpack(">L", l[0:4])[0])
                        if len(compressedBytes) <= 0:
                            break
                        buf += zlib.decompress(compressedBytes)
                    file.close()
                except EnvironmentError as err:
                    self.logError(err)
                    pass
                #uncomment to save for debugging
                #with open("c:/temp/test.xml", "wb") as f:
                #    f.write(buf)
                
                if buf.startswith(b"<?xml "):
                    try:
                        # must strip encoding
                        str = buf.decode(XmlUtil.encoding(buf))
                        endEncoding = str.index("?>", 0, 128)
                        if endEncoding > 0:
                            str = str[endEncoding+2:]
                        file = io.StringIO(initial_value=str)
                        self.eisDocument = etree.parse(file)
                        file.close()
                        self.isOpen = True
                    except EnvironmentError as err:
                        self.logError(err)
                        return # provide error message later
                    except etree.LxmlError as err:
                        self.logError(err)
                        return # provide error message later
                
            elif self.isXfd:
                # check first line of file
                file = open(self.basefile, 'rb')
                firstline = file.readline()
                if firstline.startswith(b"application/x-xfdl;content-encoding=\"asc-gzip\""):
                    # file has been gzipped
                    base64input = file.read(-1)
                    file.close();
                    file = None;
    
                    fb = base64.b64decode(base64input)
                    ungzippedBytes = b""
                    totalLenUncompr = 0
                    i = 0
                    while i < len(fb):
                        lenCompr = fb[i + 0] * 256 + fb[i + 1]
                        lenUncomp = fb[i + 2] * 256 + fb[i + 3]
                        lenRead = 0
                        totalLenUncompr += lenUncomp

                        gzchunk = (bytes((31,139,8,0)) + fb[i:i+lenCompr])
                        try:
                            with gzip.GzipFile(fileobj=io.BytesIO(gzchunk)) as gf:
                                while True:
                                    readSize = min(16384, lenUncomp - lenRead)
                                    readBytes = gf.read(size=readSize)
                                    lenRead += len(readBytes)
                                    ungzippedBytes += readBytes
                                    if len(readBytes) == 0 or (lenUncomp - lenRead) <= 0:
                                        break
                        except IOError as err:
                            pass # provide error message later

                        i += lenCompr + 4
                    #for learning the content of xfd file, uncomment this:
                    #with open("c:\\temp\\test.xml", "wb") as fh:
                    #    fh.write(ungzippedBytes)
                    file = io.StringIO(initial_value=ungzippedBytes.decode("utf-8"))
                else:
                    # position to start of file
                    file.seek(0,io.SEEK_SET)
                    
                try:
                    self.xfdDocument = etree.parse(file)
                    file.close()
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    return # provide error message later
                except etree.LxmlError as err:
                    self.logError(err)
                    return # provide error message later
                
            elif self.isRss:
                try:
                    self.rssDocument = etree.parse(self.basefile)
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    return # provide error message later
                except etree.LxmlError as err:
                    self.logError(err)
                    return # provide error message later
コード例 #23
0
ファイル: FileSource.py プロジェクト: benrosemeyer-wf/Arelle
 def file(self, filepath, binary=False):
     ''' 
         for text, return a tuple of (open file handle, encoding)
         for binary, return a tuple of (open file handle, )
     '''
     archiveFileSource = self.fileSourceContainingFilepath(filepath)
     if archiveFileSource is not None:
         if filepath.startswith(archiveFileSource.basefile):
             archiveFileName = filepath[len(archiveFileSource.basefile) + 1:]
         else: # filepath.startswith(self.baseurl)
             archiveFileName = filepath[len(archiveFileSource.baseurl) + 1:]
         if archiveFileSource.isZip:
             b = archiveFileSource.fs.read(archiveFileName.replace("\\","/"))
             if binary:
                 return (io.BytesIO(b), )
             encoding = XmlUtil.encoding(b)
             return (io.TextIOWrapper(io.BytesIO(b), encoding=encoding), 
                     encoding)
         elif archiveFileSource.isEis:
             for docElt in self.eisDocument.iter(tag="{http://www.sec.gov/edgar/common}document"):
                 outfn = docElt.findtext("{http://www.sec.gov/edgar/common}conformedName")
                 if outfn == archiveFileName:
                     b64data = docElt.findtext("{http://www.sec.gov/edgar/common}contents")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[2] == 191:
                             start = 3;
                             length = len(b) - 3;
                             b = b[start:start + length]
                         else:
                             start = 0;
                             length = len(b);
                         if binary:
                             return (io.BytesIO(b), )
                         encoding = XmlUtil.encoding(b, default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b), encoding=encoding), 
                                 encoding)
             raise ArchiveFileIOError(self, archiveFileName)
         elif archiveFileSource.isXfd:
             for data in archiveFileSource.xfdDocument.iter(tag="data"):
                 outfn = data.findtext("filename")
                 if outfn == archiveFileName:
                     b64data = data.findtext("mimedata")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[2] == 191:
                             start = 3;
                             length = len(b) - 3;
                             b = b[start:start + length]
                         else:
                             start = 0;
                             length = len(b);
                         if binary:
                             return (io.BytesIO(b), )
                         encoding = XmlUtil.encoding(b, default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b), encoding=encoding), 
                                 encoding)
             raise ArchiveFileIOError(self, archiveFileName)
     if binary:
         return (io.open(filepath, 'rb'), )
     # check encoding
     with open(filepath, 'rb') as fb:
         hdrBytes = fb.read(512)
         encoding = XmlUtil.encoding(hdrBytes)
         if encoding.lower() in ('utf-8','utf8'):
             text = None
         else:
             fb.seek(0)
             text = fb.read().decode(encoding)
         # allow filepath to close
     # this may not be needed for Mac or Linux, needs confirmation!!!
     if text is None:  # ok to read as utf-8
         return io.open(filepath, 'rt', encoding='utf-8'), encoding
     else:
         # strip XML declaration
         xmlDeclarationMatch = XMLdeclaration.search(text)
         if xmlDeclarationMatch: # remove it for lxml
             start,end = xmlDeclarationMatch.span()
             text = text[0:start] + text[end:]
         return (io.StringIO(initial_value=text), encoding)
コード例 #24
0
ファイル: FileSource.py プロジェクト: tyrose1214/Arelle
 def file(self, filepath, binary=False):
     ''' 
         for text, return a tuple of (open file handle, encoding)
         for binary, return a tuple of (open file handle, )
     '''
     archiveFileSource = self.fileSourceContainingFilepath(filepath)
     if archiveFileSource is not None:
         if filepath.startswith(archiveFileSource.basefile):
             archiveFileName = filepath[len(archiveFileSource.basefile) + 1:]
         else: # filepath.startswith(self.baseurl)
             archiveFileName = filepath[len(archiveFileSource.baseurl) + 1:]
         if archiveFileSource.isZip:
             try:
                 b = archiveFileSource.fs.read(archiveFileName.replace("\\","/"))
                 if binary:
                     return (io.BytesIO(b), )
                 encoding = XmlUtil.encoding(b)
                 return (io.TextIOWrapper(io.BytesIO(b), encoding=encoding), 
                         encoding)
             except KeyError:
                 raise ArchiveFileIOError(self, archiveFileName)
         elif archiveFileSource.isEis:
             for docElt in self.eisDocument.iter(tag="{http://www.sec.gov/edgar/common}document"):
                 outfn = docElt.findtext("{http://www.sec.gov/edgar/common}conformedName")
                 if outfn == archiveFileName:
                     b64data = docElt.findtext("{http://www.sec.gov/edgar/common}contents")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[2] == 191:
                             start = 3;
                             length = len(b) - 3;
                             b = b[start:start + length]
                         else:
                             start = 0;
                             length = len(b);
                         if binary:
                             return (io.BytesIO(b), )
                         encoding = XmlUtil.encoding(b, default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b), encoding=encoding), 
                                 encoding)
             raise ArchiveFileIOError(self, archiveFileName)
         elif archiveFileSource.isXfd:
             for data in archiveFileSource.xfdDocument.iter(tag="data"):
                 outfn = data.findtext("filename")
                 if outfn == archiveFileName:
                     b64data = data.findtext("mimedata")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[2] == 191:
                             start = 3;
                             length = len(b) - 3;
                             b = b[start:start + length]
                         else:
                             start = 0;
                             length = len(b);
                         if binary:
                             return (io.BytesIO(b), )
                         encoding = XmlUtil.encoding(b, default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b), encoding=encoding), 
                                 encoding)
             raise ArchiveFileIOError(self, archiveFileName)
     if binary:
         return (openFileStream(self.cntlr, filepath, 'rb'), )
     else:
         return openXmlFileStream(self.cntlr, filepath)
コード例 #25
0
ファイル: FileSource.py プロジェクト: andygreener/Arelle
 def file(self, filepath, binary=False):
     ''' 
         for text, return a tuple of (open file handle, encoding)
         for binary, return a tuple of (open file handle, )
     '''
     archiveFileSource = self.fileSourceContainingFilepath(filepath)
     if archiveFileSource is not None:
         if filepath.startswith(archiveFileSource.basefile):
             archiveFileName = filepath[len(archiveFileSource.basefile) +
                                        1:]
         else:  # filepath.startswith(self.baseurl)
             archiveFileName = filepath[len(archiveFileSource.baseurl) + 1:]
         if archiveFileSource.isZip:
             b = archiveFileSource.fs.read(
                 archiveFileName.replace("\\", "/"))
             if binary:
                 return (io.BytesIO(b), )
             encoding = XmlUtil.encoding(b)
             return (io.TextIOWrapper(io.BytesIO(b),
                                      encoding=encoding), encoding)
         elif archiveFileSource.isEis:
             for docElt in self.eisDocument.iter(
                     tag="{http://www.sec.gov/edgar/common}document"):
                 outfn = docElt.findtext(
                     "{http://www.sec.gov/edgar/common}conformedName")
                 if outfn == archiveFileName:
                     b64data = docElt.findtext(
                         "{http://www.sec.gov/edgar/common}contents")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[
                                 1] == 187 and b[2] == 191:
                             start = 3
                             length = len(b) - 3
                             b = b[start:start + length]
                         else:
                             start = 0
                             length = len(b)
                         if binary:
                             return (io.BytesIO(b), )
                         encoding = XmlUtil.encoding(b, default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b),
                                                  encoding=encoding),
                                 encoding)
             raise ArchiveFileIOError(self, archiveFileName)
         elif archiveFileSource.isXfd:
             for data in archiveFileSource.xfdDocument.iter(tag="data"):
                 outfn = data.findtext("filename")
                 if outfn == archiveFileName:
                     b64data = data.findtext("mimedata")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[
                                 1] == 187 and b[2] == 191:
                             start = 3
                             length = len(b) - 3
                             b = b[start:start + length]
                         else:
                             start = 0
                             length = len(b)
                         if binary:
                             return (io.BytesIO(b), )
                         encoding = XmlUtil.encoding(b, default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b),
                                                  encoding=encoding),
                                 encoding)
             raise ArchiveFileIOError(self, archiveFileName)
     if binary:
         return (openFileStream(self.cntlr, filepath, 'rb'), )
     else:
         return openXmlFileStream(self.cntlr, filepath)
コード例 #26
0
ファイル: FileSource.py プロジェクト: sternshus/not_arelle2.7
         compressedBytes = file.read( struct.unpack(u">L", l[0:4])[0])
         if len(compressedBytes) <= 0:
             break
         buf += zlib.decompress(compressedBytes)
     file.close()
 except EnvironmentError, err:
     self.logError(err)
     pass
 #uncomment to save for debugging
 #with open("c:/temp/test.xml", "wb") as f:
 #    f.write(buf)
 
 if buf.startswith("<?xml "):
     try:
         # must strip encoding
         unicode = buf.decode(XmlUtil.encoding(buf))
         endEncoding = unicode.index(u"?>", 0, 128)
         if endEncoding > 0:
             unicode = unicode[endEncoding+2:]
         file = io.StringIO(initial_value=unicode)
         parser = etree.XMLParser(recover=True, huge_tree=True)
         self.eisDocument = etree.parse(file, parser=parser)
         file.close()
         self.isOpen = True
     except EnvironmentError, err:
         self.logError(err)
         return # provide error message later
     except etree.LxmlError, err:
         self.logError(err)
         return # provide error message later
 
コード例 #27
0
ファイル: streamingExtensions.py プロジェクト: joyanta/Arelle
def streamingExtensionsLoader(modelXbrl, mappedUri, filepath, **kwargs):
    # check if big instance and has header with an initial incomplete tree walk (just 2 elements
    if not _streamingExtensionsCheck:
        return None
    
    # track whether modelXbrl has been validated by this streaming extension
    modelXbrl._streamingExtensionValidated = False
        
    def logSyntaxErrors(parsercontext):
        for error in parsercontext.error_log:
            modelXbrl.error("xmlSchema:syntax",
                    _("%(error)s, %(fileName)s, line %(line)s, column %(column)s, %(sourceAction)s source element"),
                    modelObject=modelXbrl, fileName=os.path.basename(filepath), 
                    error=error.message, line=error.line, column=error.column, sourceAction="streaming")
    #### note: written for iterparse of lxml prior to version 3.3, otherwise rewrite to use XmlPullParser ###
    #### note: iterparse wants a binary file, but file is text mode
    _file, = modelXbrl.fileSource.file(filepath, binary=True)
    startedAt = time.time()
    modelXbrl.profileActivity()
    ''' this seems twice as slow as iterparse
    class instInfoTarget():
        def __init__(self, element_factory=None, parser=None):
            self.newTree = True
            self.streamingAspects = None
            self.foundInstance = False
            self.creationSoftwareComment = ''
            self.currentEltTag = "(before xbrli:xbrl)"
            self.numRootFacts = 0
        def start(self, tag, attrib, nsmap=None):
            if self.newTree:
                if tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    self.foundInstance = True
                    self.newTree = False
                else: # break 
                    raise NotInstanceDocumentException()
            elif not tag.startswith("{http://www.xbrl.org/"):
                self.numRootFacts += 1
                if self.numRootFacts % 1000 == 0:
                    modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0)
            self.currentEltTag = tag
        def end(self, tag):
            pass
        def data(self, data):
            pass
        def comment(self, text):
            if not self.foundInstance: # accumulate comments before xbrli:xbrl
                self.creationSoftwareComment += ('\n' if self.creationSoftwareComment else '') + text
            elif not self.creationSoftwareComment:
                self.creationSoftwareComment = text # or first comment after xbrli:xbrl
        def pi(self, target, data):
            if target == "xbrl-streamable-instance":
                if self.currentEltTag == "{http://www.xbrl.org/2003/instance}xbrl":
                    self.streamingAspects = dict(etree.PI(target,data).attrib.copy()) # dereference target results
                else:
                    modelXbrl.error("streamingExtensions:headerMisplaced",
                            _("Header is misplaced: %(target)s, must follow xbrli:xbrl element but was found at %(element)s"),
                            modelObject=modelXbrl, target=target, element=self.currentEltTag)
        def close(self):
            if not self.creationSoftwareComment:
                self.creationSoftwareComment = None
            return True
    instInfo = instInfoTarget()
    infoParser = etree.XMLParser(recover=True, huge_tree=True, target=instInfo)
    try:
        etree.parse(_file, parser=infoParser, base_url=filepath)
    except NotInstanceDocumentException:
        pass
    '''
    foundErrors = False
    foundInstance = False
    streamingAspects = None
    creationSoftwareComment = None
    instInfoNumRootFacts = 0
    numElts = 0
    elt = None
    instInfoContext = etree.iterparse(_file, events=("start","end"), huge_tree=True)
    for event, elt in instInfoContext:
        if event == "start":
            if elt.getparent() is not None:
                if elt.getparent().tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    if not foundInstance:
                        foundInstance = True
                        pi = precedingProcessingInstruction(elt, "xbrl-streamable-instance")
                        if pi is None:
                            break
                        else:
                            streamingAspects = dict(pi.attrib.copy())
                            if creationSoftwareComment is None:
                                creationSoftwareComment = precedingComment(elt)
                    if not elt.tag.startswith("{http://www.xbrl.org/"):
                        instInfoNumRootFacts += 1
                        if instInfoNumRootFacts % 1000 == 0:
                            modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0)
                elif not foundInstance:       
                    break
            elif elt.tag == "{http://www.xbrl.org/2003/instance}xbrl":
                creationSoftwareComment = precedingComment(elt)
                if precedingProcessingInstruction(elt, "xbrl-streamable-instance") is not None:
                    modelXbrl.error("streamingExtensions:headerMisplaced",
                            _("Header is misplaced: %(error)s, must follow xbrli:xbrl element"),
                            modelObject=elt)
        elif event == "end":
            elt.clear()
            numElts += 1
            if numElts % 1000 == 0 and elt.getparent() is not None:
                while elt.getprevious() is not None and elt.getparent() is not None:
                    del elt.getparent()[0]
    if elt is not None:
        elt.clear()
    
    _file.seek(0,io.SEEK_SET) # allow reparsing
    if not foundInstance or streamingAspects is None:
        del elt
        _file.close()
        return None
    modelXbrl.profileStat(_("streaming tree check"), time.time() - startedAt)
    startedAt = time.time()
    try:
        version = Decimal(streamingAspects.get("version"))
        if int(version) != 1:
            modelXbrl.error("streamingExtensions:unsupportedVersion",
                    _("Streaming version %(version)s, major version number must be 1"),
                    modelObject=elt, version=version)
            foundErrors = True
    except (InvalidOperation, OverflowError):
        modelXbrl.error("streamingExtensions:versionError",
                _("Version %(version)s, number must be 1.n"),
                modelObject=elt, version=streamingAspects.get("version", "(none)"))
        foundErrors = True
    for bufAspect in ("contextBuffer", "unitBuffer", "footnoteBuffer"):
        try:
            bufLimit = Decimal(streamingAspects.get(bufAspect, "INF"))
            if bufLimit < 1 or (bufLimit.is_finite() and bufLimit % 1 != 0):
                raise InvalidOperation
            elif bufAspect == "contextBuffer":
                contextBufferLimit = bufLimit
            elif bufAspect == "unitBuffer":
                unitBufferLimit = bufLimit
            elif bufAspect == "footnoteBuffer":
                footnoteBufferLimit = bufLimit
        except InvalidOperation:
            modelXbrl.error("streamingExtensions:valueError",
                    _("Streaming %(attrib)s %(value)s, number must be a positive integer or INF"),
                    modelObject=elt, attrib=bufAspect, value=streamingAspects.get(bufAspect))
            foundErrors = True
    if _streamingExtensionsValidate:
        incompatibleValidations = []
        _validateDisclosureSystem = modelXbrl.modelManager.validateDisclosureSystem
        _disclosureSystem = modelXbrl.modelManager.disclosureSystem
        if _validateDisclosureSystem and _disclosureSystem.EFM:
            incompatibleValidations.append("EFM")
        if _validateDisclosureSystem and _disclosureSystem.GFM:
            incompatibleValidations.append("GFM")
        if _validateDisclosureSystem and _disclosureSystem.EBA:
            incompatibleValidations.append("EBA")
        if _validateDisclosureSystem and _disclosureSystem.HMRC:
            incompatibleValidations.append("EBA")
        if modelXbrl.modelManager.validateCalcLB:
            incompatibleValidations.append("calculation LB")
        if incompatibleValidations:
            modelXbrl.error("streamingExtensions:incompatibleValidation",
                    _("Streaming instance validation does not support %(incompatibleValidations)s validation"),
                    modelObject=modelXbrl, incompatibleValidations=', '.join(incompatibleValidations))
            foundErrors = True
    if instInfoContext.error_log:
        foundErrors = True
    logSyntaxErrors(instInfoContext)
    del instInfoContext # dereference

    for pluginMethod in pluginClassMethods("Streaming.BlockStreaming"):
        _blockingPluginName = pluginMethod(modelXbrl)
        if _blockingPluginName: # name of blocking plugin is returned
            modelXbrl.error("streamingExtensions:incompatiblePlugIn",
                    _("Streaming instance not supported by plugin %(blockingPlugin)s"),
                    modelObject=modelXbrl, blockingPlugin=_blockingPluginName)
            foundErrors = True
    
    if foundErrors:
        _file.close()
        return None

    _encoding = XmlUtil.encoding(_file.read(512))
    _file.seek(0,io.SEEK_SET) # allow reparsing

    if _streamingExtensionsValidate:
        validator = Validate(modelXbrl)
        instValidator = validator.instValidator

    eltMdlObjs = {}
    contextBuffer = []
    unitBuffer = []
    footnoteBuffer = []
    factBuffer = []
    numFacts = 1
    
    _streamingValidateFactsPlugin = any(True for pluginMethod in pluginClassMethods("Streaming.ValidateFacts"))

    
    class modelLoaderTarget():
        def __init__(self, element_factory=None, parser=None):
            self.newTree = True
            self.currentMdlObj = None
            self.beforeInstanceStream = True
            self.beforeStartStreamingPlugin = True
            self.numRootFacts = 1
            modelXbrl.streamingParentModelObject = None
            modelXbrl.isStreamingMode = True
        def start(self, tag, attrib, nsmap=None):
            modelXbrl.streamingParentModelObject = self.currentMdlObj # pass parent to makeelement for ModelObjectFactory
            mdlObj = _parser.makeelement(tag, attrib=attrib, nsmap=nsmap)
            mdlObj.sourceline = 1
            if self.newTree:
                self.newTree = False
                self.currentMdlObj = mdlObj
                modelDocument = ModelDocument(modelXbrl, Type.INSTANCE, mappedUri, filepath, mdlObj.getroottree())
                modelXbrl.modelDocument = modelDocument # needed for incremental validation
                mdlObj.init(modelDocument)
                modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement 
                modelDocument.parserLookupName = _parserLookupName
                modelDocument.parserLookupClass = _parserLookupClass
                modelDocument.xmlRootElement = mdlObj
                modelDocument.schemaLocationElements.add(mdlObj)
                modelDocument.documentEncoding = _encoding
                modelDocument._creationSoftwareComment = creationSoftwareComment
                modelXbrl.info("streamingExtensions:streaming",
                               _("Stream processing this instance."),
                               modelObject = modelDocument)
            else:
                self.currentMdlObj.append(mdlObj)
                self.currentMdlObj = mdlObj
                mdlObj._init()
                ns = mdlObj.namespaceURI
                ln = mdlObj.localName
                if (self.beforeInstanceStream and (
                    (ns == XbrlConst.link and ln not in ("schemaRef", "linkbaseRef")) or
                    (ns == XbrlConst.xbrli and ln in ("context", "unit")) or
                    (ns not in (XbrlConst.link, XbrlConst.xbrli)))):
                    self.beforeInstanceStream = False
                    if _streamingExtensionsValidate:
                        instValidator.validate(modelXbrl, modelXbrl.modelManager.formulaOptions.typedParameters())
                    else: # need default dimensions
                        ValidateXbrlDimensions.loadDimensionDefaults(modelXbrl)
                elif not self.beforeInstanceStream and self.beforeStartStreamingPlugin:
                    for pluginMethod in pluginClassMethods("Streaming.Start"):
                        pluginMethod(modelXbrl)
                    self.beforeStartStreamingPlugin = False
            return mdlObj
        def end(self, tag):
            modelDocument = modelXbrl.modelDocument
            mdlObj = self.currentMdlObj
            parentMdlObj = mdlObj.getparent()
            self.currentMdlObj = parentMdlObj
            ns = mdlObj.namespaceURI
            ln = mdlObj.localName
            if ns == XbrlConst.xbrli:
                if ln == "context":
                    if mdlObj.get("sticky"):
                        del mdlObj.attrib["sticky"]
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                    else:
                        if _streamingExtensionsValidate and len(contextBuffer) >= contextBufferLimit:
                            # drop before adding as dropped may have same id as added
                            cntx = contextBuffer.pop(0)
                            dropContext(modelXbrl, cntx)
                            del parentMdlObj[parentMdlObj.index(cntx)]
                            cntx = None
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                        if contextBufferLimit.is_finite():
                            contextBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        contextsToCheck = (mdlObj,)
                        instValidator.checkContexts(contextsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkContextsDimensions(contextsToCheck)
                        del contextsToCheck # dereference
                elif ln == "unit":
                    if _streamingExtensionsValidate and len(unitBuffer) >= unitBufferLimit:
                        # drop before additing as dropped may have same id as added
                        unit = unitBuffer.pop(0)
                        dropUnit(modelXbrl, unit)
                        del parentMdlObj[parentMdlObj.index(unit)]
                        unit = None 
                    XmlValidate.validate(modelXbrl, mdlObj)
                    modelDocument.unitDiscover(mdlObj)
                    if unitBufferLimit.is_finite():
                        unitBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkUnits( (mdlObj,) )
                elif ln == "xbrl": # end of document
                    # check remaining footnote refs
                    for footnoteLink in footnoteBuffer:
                        checkFootnoteHrefs(modelXbrl, footnoteLink)
                    for pluginMethod in pluginClassMethods("Streaming.Finish"):
                        pluginMethod(modelXbrl)
            elif ns == XbrlConst.link:
                if ln == "footnoteLink":
                    XmlValidate.validate(modelXbrl, mdlObj)
                    footnoteLinks = (mdlObj,)
                    modelDocument.linkbaseDiscover(footnoteLinks, inInstance=True)
                    if footnoteBufferLimit.is_finite():
                        footnoteBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkLinks(footnoteLinks)
                        if len(footnoteBuffer) > footnoteBufferLimit:
                            # check that hrefObjects for locators were all satisfied
                                # drop before addition as dropped may have same id as added
                            footnoteLink = footnoteBuffer.pop(0)
                            checkFootnoteHrefs(modelXbrl, footnoteLink)
                            dropFootnoteLink(modelXbrl, footnoteLink)
                            del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            footnoteLink = None
                    footnoteLinks = None
                elif ln in ("schemaRef", "linkbaseRef"):
                    modelDocument.discoverHref(mdlObj)
                elif not modelXbrl.skipDTS:
                    if ln in ("roleRef", "arcroleRef"):
                        modelDocument.linkbaseDiscover((mdlObj,), inInstance=True)
            elif parentMdlObj.qname == XbrlConst.qnXbrliXbrl:
                self.numRootFacts += 1
                XmlValidate.validate(modelXbrl, mdlObj)
                modelDocument.factDiscover(mdlObj, modelXbrl.facts)
                if _streamingExtensionsValidate or _streamingValidateFactsPlugin:
                    factsToCheck = (mdlObj,)  # validate current fact by itself
                    if _streamingExtensionsValidate:
                        instValidator.checkFacts(factsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkFactsDimensions(factsToCheck)
                    if _streamingValidateFactsPlugin:
                        # plugin attempts to process batch of all root facts not yet processed (not just current one)
                        factsToCheck = modelXbrl.facts.copy()
                        factsHaveBeenProcessed = True
                        # can block facts deletion if required data not yet available, such as numeric unit for DpmDB
                        for pluginMethod in pluginClassMethods("Streaming.ValidateFacts"):
                            if not pluginMethod(modelXbrl, factsToCheck):
                                factsHaveBeenProcessed = False
                        if factsHaveBeenProcessed:
                            for fact in factsToCheck:
                                dropFact(modelXbrl, fact, modelXbrl.facts)
                                del parentMdlObj[parentMdlObj.index(fact)]
                    else:
                        dropFact(modelXbrl, mdlObj, modelXbrl.facts) # single fact has been processed
                        del parentMdlObj[parentMdlObj.index(mdlObj)]
                    del factsToCheck # dereference fact or batch of facts
                if self.numRootFacts % 1000 == 0:
                    modelXbrl.profileActivity("... streaming fact {0} of {1} {2:.2f}%".format(self.numRootFacts, instInfoNumRootFacts, 
                                                                                              100.0 * self.numRootFacts / instInfoNumRootFacts), 
                                              minTimeToShow=20.0)
            return mdlObj
        def data(self, data):
            self.currentMdlObj.text = data
        def comment(self, text):
            pass
        def pi(self, target, data):
            pass
        def close(self):
            del modelXbrl.streamingParentModelObject
            return None
        
    _parser, _parserLookupName, _parserLookupClass = parser(modelXbrl, filepath, target=modelLoaderTarget())
    etree.parse(_file, parser=_parser, base_url=filepath)
    logSyntaxErrors(_parser)
    if _streamingExtensionsValidate and validator is not None:
        _file.close()
        del instValidator
        validator.close()
        # track that modelXbrl has been validated by this streaming extension
        modelXbrl._streamingExtensionValidated = True
        
    modelXbrl.profileStat(_("streaming complete"), time.time() - startedAt)
    return modelXbrl.modelDocument