Python XmlUtil.encodingの例

プログラミング言語: Python

名前空間/パッケージ名: arelle

クラス/型: XmlUtil

メソッド/関数: encoding

hotexamples.comのコード掲載数: 27

Python XmlUtil.encoding - 27件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのarelle.XmlUtil.encodingの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

descendant(30)

child(30)

descendants(28)

innerText(19)

childAttr(16)

children(16)

dateunionValue(15)

hasChild(15)

encoding(13)

addChild(12)

xmlnsprefix(12)

writexml(12)

elementFragmentIdentifier(8)

datetimeValue(8)

xmlstring(8)

descendantAttr(7)

text(7)

xmlns(5)

addQnameValue(5)

parent(5)

clarkNotationToPrefixedName(4)

ancestors(4)

schemaLocation(3)

hasAncestor(3)

lastChild(2)

prefixedNameToNamespaceLocalname(2)

markIdAttributes(2)

xpointerSchemes(2)

copyNodes(2)

clarkNotationToPrefixNsLocalname(2)

innerTextNodes(2)

ixToXhtml(1)

ancestor(1)

childText(1)

childrenAttrs(1)

textNotStripped(1)

copyChildren(1)

setXmlns(1)

schemaFacets(1)

schemaDescendantsNames(1)

schemaDescendant(1)

schemaBaseTypeDerivedFrom(1)

schemaAttributesGroups(1)

emptyContentModel(1)

parentId(1)

innerTextList(1)

hasDescendant(1)

コード例 #1

ファイルを表示

ファイル: FileSource.py プロジェクト: marado/Arelle

 def file(self,filepath):
     archiveFileSource = self.fileSourceContainingFilepath(filepath)
     if archiveFileSource is not None:
         if filepath.startswith(archiveFileSource.basefile):
             archiveFileName = filepath[len(archiveFileSource.basefile) + 1:]
         else: # filepath.startswith(self.baseurl)
             archiveFileName = filepath[len(archiveFileSource.baseurl) + 1:]
         if archiveFileSource.isZip:
             b = archiveFileSource.fs.read(archiveFileName.replace("\\","/"))
             encoding = XmlUtil.encoding(b)
             return (io.TextIOWrapper(
                     io.BytesIO(b), 
                     encoding=encoding), encoding)
         elif archiveFileSource.isXfd:
             for data in archiveFileSource.xfdDocument.iter(tag="data"):
                 outfn = data.findtext("filename")
                 if outfn == archiveFileName:
                     b64data = data.findtext("mimedata")
                     if b64data:
                         # convert to bytes
                         #byteData = []
                         #for c in b64data:
                         #    byteData.append(ord(c))
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[2] == 191:
                             start = 3;
                             length = len(b) - 3;
                             b = b[start:start + length]
                         else:
                             start = 0;
                             length = len(b);
                         # pass back as ascii
                         #str = ""
                         #for bChar in b[start:start + length]:
                         #    str += chr( bChar )
                         #return str
                         return (io.TextIOWrapper(
                             io.BytesIO(b), 
                             encoding=XmlUtil.encoding(b)), "latin-1")
             return (None,None)
     # check encoding
     with open(filepath, 'rb') as fb:
         hdrBytes = fb.peek(512)
         encoding = XmlUtil.encoding(hdrBytes)
         if encoding.lower() in ('utf-8','utf8'):
             text = None
         else:
             text = fb.read().decode(encoding)
         # allow filepath to close
     # this may not be needed for Mac or Linux, needs confirmation!!!
     if text is None:  # ok to read as utf-8
         return (open(filepath, 'rt', encoding='utf-8'), encoding)
     else:
         # strip XML declaration
         xmlDeclarationMatch = XMLdeclaration.search(text)
         if xmlDeclarationMatch: # remove it for lxml
             start,end = xmlDeclarationMatch.span()
             text = text[0:start] + text[end:]
         return (io.StringIO(initial_value=text), encoding)

コード例 #2

ファイルを表示

def openFileStream(cntlr, filepath, mode='r', encoding=None):
    if isHttpUrl(filepath) and cntlr:
        filepath = cntlr.webCache.getfilename(filepath)
    # file path may be server (or memcache) or local file system
    if filepath.startswith(SERVER_WEB_CACHE) and cntlr:
        filestream = None
        cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\", "/")
        if cntlr.isGAE:  # check if in memcache
            cachedBytes = gaeGet(cacheKey)
            if cachedBytes:
                filestream = io.BytesIO(cachedBytes)
        if filestream is None:
            filestream = io.BytesIO()
            cntlr.webCache.retrieve(
                cntlr.webCache.cacheFilepathToUrl(filepath),
                filestream=filestream)
            if cntlr.isGAE:
                gaeSet(cacheKey, filestream.getvalue())
        if mode.endswith('t') or encoding:
            contents = filestream.getvalue()
            filestream.close()
            filestream = FileNamedStringIO(
                filepath, contents.decode(encoding or 'utf-8'))
        return filestream
    # local file system
    elif encoding is None and 'b' not in mode:
        openedFileStream = io.open(filepath, mode='rb')
        hdrBytes = openedFileStream.read(512)
        encoding = XmlUtil.encoding(hdrBytes, default=None)
        openedFileStream.close()
        return io.open(filepath, mode=mode, encoding=encoding)
    else:
        # local file system
        return io.open(filepath, mode=mode, encoding=encoding)

コード例 #3

ファイルを表示

ファイル: FileSource.py プロジェクト: andygreener/Arelle

def openXmlFileStream(cntlr, filepath, stripDeclaration=False):
    # returns tuple: (fileStream, encoding)
    openedFileStream = openFileStream(cntlr, filepath, 'rb')
    # check encoding
    hdrBytes = openedFileStream.read(512)
    encoding = XmlUtil.encoding(hdrBytes)
    if encoding.lower() in ('utf-8', 'utf8') and (
            cntlr is None or not cntlr.isGAE) and not stripDeclaration:
        text = None
        openedFileStream.close()
    else:
        openedFileStream.seek(0)
        text = openedFileStream.read().decode(encoding)
        openedFileStream.close()
        # allow filepath to close
    # this may not be needed for Mac or Linux, needs confirmation!!!
    if text is None:  # ok to read as utf-8
        return io.open(filepath, 'rt', encoding='utf-8'), encoding
    else:
        # strip XML declaration
        xmlDeclarationMatch = XMLdeclaration.search(text)
        if xmlDeclarationMatch:  # remove it for lxml
            start, end = xmlDeclarationMatch.span()
            text = text[0:start] + text[end:]
        return (io.StringIO(initial_value=text), encoding)

コード例 #4

ファイルを表示

ファイル: cryptAES_EAX.py プロジェクト: Arelle/Arelle

def securityFileSourceFile(cntlr, ownerObject, filepath, binary, stripDeclaration):
    # handle FileSource file requests which can return encrypted contents
    if ownerObject.hasEncryption:
        for entrypointfile in ownerObject.entrypointfiles:
            if (filepath == entrypointfile.get("file") or 
                any(filepath == ixfile.get("file") for ixfile in entrypointfile.get("ixds",()))
                ) and "key" in entrypointfile:
                ownerObject.cipherKey = base64.decodebytes(entrypointfile["key"].encode())
                break # set new iv, key based on entrypointfiles
        # may be a non-entry file (xsd, linkbase, jpg) using entry's key
        if os.path.exists(filepath + ENCRYPTED_FILE_SUFFIX) and ownerObject.cipherKey is not None:
            with io.open(filepath + ENCRYPTED_FILE_SUFFIX, "rb") as fh:
                nonce = fh.read(16)
                tag = fh.read(16)
                encrdata = fh.read()
            cipher = AES.new(ownerObject.cipherKey, AES.MODE_EAX, nonce)
            bytesdata = cipher.decrypt_and_verify(encrdata, tag)
            encrdata = None # dereference before decode operation
            if binary: # return bytes
                return (FileSource.FileNamedBytesIO(filepath, bytesdata[0:-bytesdata[-1]]), ) # trim AES CBC padding
            # detect encoding if there is an XML header
            encoding = XmlUtil.encoding(bytesdata[0:512], 
                                        default=cntlr.modelManager.disclosureSystem.defaultXmlEncoding
                                                if cntlr else 'utf-8')
            # return decoded string
            text = bytesdata[0:-bytesdata[-1]].decode(encoding or 'utf-8') # trim AES CBC padding and decode
            bytesdata = None # dereference before text operation
            if stripDeclaration: # file source may strip XML declaration for libxml
                xmlDeclarationMatch = FileSource.XMLdeclaration.search(text)
                if xmlDeclarationMatch: # remove it for lxml
                    start,end = xmlDeclarationMatch.span()
                    text = text[0:start] + text[end:]
            return (FileSource.FileNamedStringIO(filepath, initial_value=text), encoding)
    return None

コード例 #5

ファイルを表示

ファイル: FileSource.py プロジェクト: camirisk/Arelle

def openXmlFileStream(cntlr, filepath, stripDeclaration=False):
    # returns tuple: (fileStream, encoding)
    openedFileStream = openFileStream(cntlr, filepath, 'rb')
    # check encoding
    hdrBytes = openedFileStream.read(512)
    encoding = XmlUtil.encoding(
        hdrBytes,
        default=cntlr.modelManager.disclosureSystem.defaultXmlEncoding
        if cntlr else 'utf-8')
    # encoding default from disclosure system could be None
    if encoding.lower() in ('utf-8', 'utf8', 'utf-8-sig') and (
            cntlr is None or not cntlr.isGAE) and not stripDeclaration:
        text = None
        openedFileStream.close()
    else:
        openedFileStream.seek(0)
        text = openedFileStream.read().decode(encoding or 'utf-8')
        openedFileStream.close()
        # allow filepath to close
    # this may not be needed for Mac or Linux, needs confirmation!!!
    if text is None:  # ok to read as utf-8
        return io.open(filepath, 'rt', encoding='utf-8'), encoding
    else:
        if stripDeclaration:
            # strip XML declaration
            xmlDeclarationMatch = XMLdeclaration.search(text)
            if xmlDeclarationMatch:  # remove it for lxml
                start, end = xmlDeclarationMatch.span()
                text = text[0:start] + text[end:]
        return (FileNamedStringIO(filepath, initial_value=text), encoding)

コード例 #6

ファイルを表示

def securityFileSourceFile(cntlr, ownerObject, filepath, binary, stripDeclaration):
    # handle FileSource file requests which can return encrypted contents
    if ownerObject.hasEncryption:
        for entrypointfile in ownerObject.entrypointfiles:
            if (filepath == entrypointfile.get("file") or 
                any(filepath == ixfile.get("file") for ixfile in entrypointfile.get("ixds",()))
                ) and "key" in entrypointfile and "iv" in entrypointfile:
                ownerObject.cipherIv = base64.decodebytes(entrypointfile["iv"].encode())
                ownerObject.cipherKey = base64.decodebytes(entrypointfile["key"].encode())
                break # set new iv, key based on entrypointfiles
        # may be a non-entry file (xsd, linkbase, jpg) using entry's iv, key
        if os.path.exists(filepath + ENCRYPTED_FILE_SUFFIX) and ownerObject.cipherKey is not None and ownerObject.cipherIv is not None:
            encrdata = io.open(filepath + ENCRYPTED_FILE_SUFFIX, "rb").read()
            cipher = AES.new(ownerObject.cipherKey, AES.MODE_CBC, iv=ownerObject.cipherIv)
            bytesdata = cipher.decrypt(encrdata)
            encrdata = None # dereference before decode operation
            if binary: # return bytes
                return (FileSource.FileNamedBytesIO(filepath, bytesdata[0:-bytesdata[-1]]), ) # trim AES CBC padding
            # detect encoding if there is an XML header
            encoding = XmlUtil.encoding(bytesdata[0:512], 
                                        default=cntlr.modelManager.disclosureSystem.defaultXmlEncoding
                                                if cntlr else 'utf-8')
            # return decoded string
            text = bytesdata[0:-bytesdata[-1]].decode(encoding or 'utf-8') # trim AES CBC padding and decode
            bytesdata = None # dereference before text operation
            if stripDeclaration: # file source may strip XML declaration for libxml
                xmlDeclarationMatch = FileSource.XMLdeclaration.search(text)
                if xmlDeclarationMatch: # remove it for lxml
                    start,end = xmlDeclarationMatch.span()
                    text = text[0:start] + text[end:]
            return (FileSource.FileNamedStringIO(filepath, initial_value=text), encoding)
    return None

コード例 #7

ファイルを表示

ファイル: FileSource.py プロジェクト: javascriptgeek/Arelle

def openXmlFileStream(cntlr, filepath, stripDeclaration=False):
    # returns tuple: (fileStream, encoding)
    openedFileStream = openFileStream(cntlr, filepath, "rb")
    # check encoding
    hdrBytes = openedFileStream.read(512)
    encoding = XmlUtil.encoding(
        hdrBytes, default=cntlr.modelManager.disclosureSystem.defaultXmlEncoding if cntlr else "utf-8"
    )
    # encoding default from disclosure system could be None
    if (
        encoding.lower() in ("utf-8", "utf8", "utf-8-sig")
        and (cntlr is None or not cntlr.isGAE)
        and not stripDeclaration
    ):
        text = None
        openedFileStream.close()
    else:
        openedFileStream.seek(0)
        text = openedFileStream.read().decode(encoding or "utf-8")
        openedFileStream.close()
        # allow filepath to close
    # this may not be needed for Mac or Linux, needs confirmation!!!
    if text is None:  # ok to read as utf-8
        return io.open(filepath, "rt", encoding="utf-8"), encoding
    else:
        if stripDeclaration:
            # strip XML declaration
            xmlDeclarationMatch = XMLdeclaration.search(text)
            if xmlDeclarationMatch:  # remove it for lxml
                start, end = xmlDeclarationMatch.span()
                text = text[0:start] + text[end:]
        return (FileNamedStringIO(filepath, initial_value=text), encoding)

コード例 #8

ファイルを表示

ファイル: FileSource.py プロジェクト: joshdholtz/Arelle

def openFileStream(cntlr, filepath, mode='r', encoding=None):
    if isHttpUrl(filepath) and cntlr:
        filepath = cntlr.webCache.getfilename(filepath)
    # file path may be server (or memcache) or local file system
    if filepath.startswith(SERVER_WEB_CACHE) and cntlr:
        filestream = None
        cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\","/")
        if cntlr.isGAE: # check if in memcache
            cachedBytes = gaeGet(cacheKey)
            if cachedBytes:
                filestream = io.BytesIO(cachedBytes)
        if filestream is None:
            filestream = io.BytesIO()
            cntlr.webCache.retrieve(cntlr.webCache.cacheFilepathToUrl(filepath),
                                    filestream=filestream)
            if cntlr.isGAE:
                gaeSet(cacheKey, filestream.getvalue())
        if mode.endswith('t') or encoding:
            contents = filestream.getvalue()
            filestream.close()
            filestream = FileNamedStringIO(filepath, contents.decode(encoding or 'utf-8'))
        return filestream
    # local file system
    elif encoding is None and 'b' not in mode:
        openedFileStream = io.open(filepath, mode='rb')
        hdrBytes = openedFileStream.read(512)
        encoding = XmlUtil.encoding(hdrBytes, default=None)
        openedFileStream.close()
        return io.open(filepath, mode=mode, encoding=encoding)
    else:
        # local file system
        return io.open(filepath, mode=mode, encoding=encoding)

コード例 #9

ファイルを表示

ファイル: FileSource.py プロジェクト: namitkewat/Arelle

def openXmlFileStream(cntlr, filepath, stripDeclaration=False):
    # returns tuple: (fileStream, encoding)
    openedFileStream = openFileStream(cntlr, filepath, 'rb')
    # check encoding
    hdrBytes = openedFileStream.read(512)
    encoding = XmlUtil.encoding(hdrBytes)
    if encoding.lower() in ('utf-8','utf8','utf-8-sig') and (cntlr is None or not cntlr.isGAE) and not stripDeclaration:
        text = None
        openedFileStream.close()
    else:
        openedFileStream.seek(0)
        text = openedFileStream.read().decode(encoding)
        openedFileStream.close()
        # allow filepath to close
    # this may not be needed for Mac or Linux, needs confirmation!!!
    if text is None:  # ok to read as utf-8
        return io.open(filepath, 'rt', encoding='utf-8'), encoding
    else:
        if stripDeclaration:
            # strip XML declaration
            xmlDeclarationMatch = XMLdeclaration.search(text)
            if xmlDeclarationMatch: # remove it for lxml
                start,end = xmlDeclarationMatch.span()
                text = text[0:start] + text[end:]
        return (FileNamedStringIO(filepath, initial_value=text), encoding)

コード例 #10

ファイルを表示

 def file(self, filepath):
     archiveFileSource = self.fileSourceContainingFilepath(filepath)
     if archiveFileSource is not None:
         if filepath.startswith(archiveFileSource.basefile):
             archiveFileName = filepath[len(archiveFileSource.basefile) +
                                        1:]
         else:  # filepath.startswith(self.baseurl)
             archiveFileName = filepath[len(archiveFileSource.baseurl) + 1:]
         if archiveFileSource.isZip:
             b = archiveFileSource.fs.read(archiveFileName)
             return io.TextIOWrapper(io.BytesIO(b),
                                     encoding=XmlUtil.encoding(b))
         elif archiveFileSource.isXfd:
             for data in archiveFileSource.xfdDocument.getElementsByTagName(
                     "data"):
                 outfn = XmlUtil.text(
                     data.getElementsByTagName("filename")[0])
                 b64data = XmlUtil.text(
                     data.getElementsByTagName("mimedata")[0])
                 if len(outfn) > 1 and len(
                         b64data) > 1 and outfn == archiveFileName:
                     # convert to bytes
                     #byteData = []
                     #for c in b64data:
                     #    byteData.append(ord(c))
                     b = base64.b64decode(b64data.encode("latin-1"))
                     # remove BOM codes if present
                     if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[
                             2] == 191:
                         start = 3
                         length = len(b) - 3
                         b = b[start:start + length]
                     else:
                         start = 0
                         length = len(b)
                     # pass back as ascii
                     #str = ""
                     #for bChar in b[start:start + length]:
                     #    str += chr( bChar )
                     #return str
                     return io.TextIOWrapper(io.BytesIO(b),
                                             encoding=XmlUtil.encoding(b))
             return None
     return open(filepath, 'rt', encoding='utf-8')

コード例 #11

ファイルを表示

ファイル: FileSource.py プロジェクト: 8maki/Arelle

 def file(self,filepath):
     archiveFileSource = self.fileSourceContainingFilepath(filepath)
     if archiveFileSource is not None:
         if filepath.startswith(archiveFileSource.basefile):
             archiveFileName = filepath[len(archiveFileSource.basefile) + 1:]
         else: # filepath.startswith(self.baseurl)
             archiveFileName = filepath[len(archiveFileSource.baseurl) + 1:]
         if archiveFileSource.isZip:
             b = archiveFileSource.fs.read(archiveFileName)
             return io.TextIOWrapper(
                     io.BytesIO(b), 
                     encoding=XmlUtil.encoding(b))
         elif archiveFileSource.isXfd:
             for data in archiveFileSource.xfdDocument.getElementsByTagName("data"):
                 outfn = XmlUtil.text(data.getElementsByTagName("filename")[0])
                 b64data = XmlUtil.text(data.getElementsByTagName("mimedata")[0])
                 if len(outfn) > 1 and len(b64data) > 1 and outfn == archiveFileName:
                     # convert to bytes
                     #byteData = []
                     #for c in b64data:
                     #    byteData.append(ord(c))
                     b = base64.b64decode(b64data.encode("latin-1"))
                     # remove BOM codes if present
                     if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[2] == 191:
                         start = 3;
                         length = len(b) - 3;
                         b = b[start:start + length]
                     else:
                         start = 0;
                         length = len(b);
                     # pass back as ascii
                     #str = ""
                     #for bChar in b[start:start + length]:
                     #    str += chr( bChar )
                     #return str
                     return io.TextIOWrapper(
                         io.BytesIO(b), 
                         encoding=XmlUtil.encoding(b))
             return None
     return open(filepath, 'rt', encoding='utf-8')

コード例 #12

ファイルを表示

ファイル: FileSource.py プロジェクト: fewang0521/python_dart

def openFileStream(cntlr, filepath, mode='r', encoding=None):

    if PackageManager.isMappedUrl(filepath):
        filepath = PackageManager.mappedUrl(filepath)
    elif isHttpUrl(filepath) and cntlr and hasattr(
            cntlr, "modelManager"
    ):  # may be called early in initialization for PluginManager
        filepath = cntlr.modelManager.disclosureSystem.mappedUrl(filepath)
    if archiveFilenameParts(filepath):  # file is in an archive
        return openFileSource(filepath, cntlr).file(filepath,
                                                    binary='b' in mode,
                                                    encoding=encoding)[0]
    if isHttpUrl(filepath) and cntlr:
        _cacheFilepath = cntlr.webCache.getfilename(
            filepath, normalize=True
        )  # normalize is separate step in ModelDocument retrieval, combined here
        if _cacheFilepath is None:
            raise IOError(_("Unable to open file: {0}.").format(filepath))
        filepath = _cacheFilepath
    # file path may be server (or memcache) or local file system
    if filepath.startswith(SERVER_WEB_CACHE) and cntlr:
        filestream = None
        cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\", "/")
        if cntlr.isGAE:  # check if in memcache
            cachedBytes = gaeGet(cacheKey)
            if cachedBytes:
                filestream = io.BytesIO(cachedBytes)
        if filestream is None:
            filestream = io.BytesIO()
            cntlr.webCache.retrieve(
                cntlr.webCache.cacheFilepathToUrl(filepath),
                filestream=filestream)
            if cntlr.isGAE:
                gaeSet(cacheKey, filestream.getvalue())
        if mode.endswith('t') or encoding:
            contents = filestream.getvalue()
            filestream.close()
            filestream = FileNamedStringIO(
                filepath, contents.decode(encoding or 'utf-8'))
        return filestream
    # local file system
    elif encoding is None and 'b' not in mode:
        openedFileStream = io.open(filepath, mode='rb')
        hdrBytes = openedFileStream.read(512)
        encoding = XmlUtil.encoding(hdrBytes, default=None)
        openedFileStream.close()
        return io.open(filepath, mode=mode, encoding=encoding)
    else:
        # local file system
        return io.open(filepath, mode=mode, encoding=encoding)

コード例 #13

ファイルを表示

ファイル: FileSource.py プロジェクト: namitkewat/Arelle

def openFileStream(cntlr, filepath, mode='r', encoding=None):
    if PackageManager.isMappedUrl(filepath):
        filepath = PackageManager.mappedUrl(filepath)
    else:
        filepath = cntlr.modelManager.disclosureSystem.mappedUrl(filepath)
    if archiveFilenameParts(filepath): # file is in an archive
        return openFileSource(filepath, cntlr).file(filepath, binary='b' in mode, encoding=encoding)[0]
    if isHttpUrl(filepath) and cntlr:
        _cacheFilepath = cntlr.webCache.getfilename(filepath)
        if _cacheFilepath is None:
            raise IOError(_("Unable to open file: {0}.").format(filepath))
        filepath = _cacheFilepath
    # file path may be server (or memcache) or local file system
    if filepath.startswith(SERVER_WEB_CACHE) and cntlr:
        filestream = None
        cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\","/")
        if cntlr.isGAE: # check if in memcache
            cachedBytes = gaeGet(cacheKey)
            if cachedBytes:
                filestream = io.BytesIO(cachedBytes)
        if filestream is None:
            filestream = io.BytesIO()
            cntlr.webCache.retrieve(cntlr.webCache.cacheFilepathToUrl(filepath),
                                    filestream=filestream)
            if cntlr.isGAE:
                gaeSet(cacheKey, filestream.getvalue())
        if mode.endswith('t') or encoding:
            contents = filestream.getvalue()
            filestream.close()
            filestream = FileNamedStringIO(filepath, contents.decode(encoding or 'utf-8'))
        return filestream
    # local file system
    elif encoding is None and 'b' not in mode:
        openedFileStream = io.open(filepath, mode='rb')
        hdrBytes = openedFileStream.read(512)
        encoding = XmlUtil.encoding(hdrBytes, default=None)
        openedFileStream.close()
        return io.open(filepath, mode=mode, encoding=encoding)
    else:
        # local file system
        return io.open(filepath, mode=mode, encoding=encoding)

コード例 #14

ファイルを表示

def streamingExtensionsLoader(modelXbrl, mappedUri, filepath, **kwargs):
    # check if big instance and has header with an initial incomplete tree walk (just 2 elements
    if not _streamingExtensionsCheck:
        return None
    
    # track whether modelXbrl has been validated by this streaming extension
    modelXbrl._streamingExtensionValidated = False
        
    def logSyntaxErrors(parsercontext):
        for error in parsercontext.error_log:
            modelXbrl.error("xmlSchema:syntax",
                    _("%(error)s, %(fileName)s, line %(line)s, column %(column)s, %(sourceAction)s source element"),
                    modelObject=modelXbrl, fileName=os.path.basename(filepath), 
                    error=error.message, line=error.line, column=error.column, sourceAction="streaming")
    #### note: written for iterparse of lxml prior to version 3.3, otherwise rewrite to use XmlPullParser ###
    #### note: iterparse wants a binary file, but file is text mode
    _file, = modelXbrl.fileSource.file(filepath, binary=True)
    startedAt = time.time()
    modelXbrl.profileActivity()
    ''' this seems twice as slow as iterparse
    class instInfoTarget():
        def __init__(self, element_factory=None, parser=None):
            self.newTree = True
            self.streamingAspects = None
            self.foundInstance = False
            self.creationSoftwareComment = ''
            self.currentEltTag = "(before xbrli:xbrl)"
            self.numRootFacts = 0
        def start(self, tag, attrib, nsmap=None):
            if self.newTree:
                if tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    self.foundInstance = True
                    self.newTree = False
                else: # break 
                    raise NotInstanceDocumentException()
            elif not tag.startswith("{http://www.xbrl.org/"):
                self.numRootFacts += 1
                if self.numRootFacts % 1000 == 0:
                    modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0)
            self.currentEltTag = tag
        def end(self, tag):
            pass
        def data(self, data):
            pass
        def comment(self, text):
            if not self.foundInstance: # accumulate comments before xbrli:xbrl
                self.creationSoftwareComment += ('\n' if self.creationSoftwareComment else '') + text
            elif not self.creationSoftwareComment:
                self.creationSoftwareComment = text # or first comment after xbrli:xbrl
        def pi(self, target, data):
            if target == "xbrl-streamable-instance":
                if self.currentEltTag == "{http://www.xbrl.org/2003/instance}xbrl":
                    self.streamingAspects = dict(etree.PI(target,data).attrib.copy()) # dereference target results
                else:
                    modelXbrl.error("streamingExtensions:headerMisplaced",
                            _("Header is misplaced: %(target)s, must follow xbrli:xbrl element but was found at %(element)s"),
                            modelObject=modelXbrl, target=target, element=self.currentEltTag)
        def close(self):
            if not self.creationSoftwareComment:
                self.creationSoftwareComment = None
            return True
    instInfo = instInfoTarget()
    infoParser = etree.XMLParser(recover=True, huge_tree=True, target=instInfo)
    try:
        etree.parse(_file, parser=infoParser, base_url=filepath)
    except NotInstanceDocumentException:
        pass
    '''
    foundErrors = False
    foundInstance = False
    streamingAspects = None
    creationSoftwareComment = None
    instInfoNumRootFacts = 0
    numElts = 0
    elt = None
    instInfoContext = etree.iterparse(_file, events=("start","end"), huge_tree=True)
    for event, elt in instInfoContext:
        if event == "start":
            if elt.getparent() is not None:
                if elt.getparent().tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    if not foundInstance:
                        foundInstance = True
                        pi = precedingProcessingInstruction(elt, "xbrl-streamable-instance")
                        if pi is None:
                            break
                        else:
                            streamingAspects = dict(pi.attrib.copy())
                            if creationSoftwareComment is None:
                                creationSoftwareComment = precedingComment(elt)
                    if not elt.tag.startswith("{http://www.xbrl.org/"):
                        instInfoNumRootFacts += 1
                        if instInfoNumRootFacts % 1000 == 0:
                            modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0)
                elif not foundInstance:       
                    break
            elif elt.tag == "{http://www.xbrl.org/2003/instance}xbrl":
                creationSoftwareComment = precedingComment(elt)
                if precedingProcessingInstruction(elt, "xbrl-streamable-instance") is not None:
                    modelXbrl.error("streamingExtensions:headerMisplaced",
                            _("Header is misplaced: %(error)s, must follow xbrli:xbrl element"),
                            modelObject=elt)
        elif event == "end":
            elt.clear()
            numElts += 1
            if numElts % 1000 == 0 and elt.getparent() is not None:
                while elt.getprevious() is not None and elt.getparent() is not None:
                    del elt.getparent()[0]
    if elt is not None:
        elt.clear()
    
    _file.seek(0,io.SEEK_SET) # allow reparsing
    if not foundInstance or streamingAspects is None:
        del elt
        _file.close()
        return None
    modelXbrl.profileStat(_("streaming tree check"), time.time() - startedAt)
    startedAt = time.time()
    try:
        version = Decimal(streamingAspects.get("version"))
        if int(version) != 1:
            modelXbrl.error("streamingExtensions:unsupportedVersion",
                    _("Streaming version %(version)s, major version number must be 1"),
                    modelObject=elt, version=version)
            foundErrors = True
    except (InvalidOperation, OverflowError):
        modelXbrl.error("streamingExtensions:versionError",
                _("Version %(version)s, number must be 1.n"),
                modelObject=elt, version=streamingAspects.get("version", "(none)"))
        foundErrors = True
    for bufAspect in ("contextBuffer", "unitBuffer", "footnoteBuffer"):
        try:
            bufLimit = Decimal(streamingAspects.get(bufAspect, "INF"))
            if bufLimit < 1 or (bufLimit.is_finite() and bufLimit % 1 != 0):
                raise InvalidOperation
            elif bufAspect == "contextBuffer":
                contextBufferLimit = bufLimit
            elif bufAspect == "unitBuffer":
                unitBufferLimit = bufLimit
            elif bufAspect == "footnoteBuffer":
                footnoteBufferLimit = bufLimit
        except InvalidOperation:
            modelXbrl.error("streamingExtensions:valueError",
                    _("Streaming %(attrib)s %(value)s, number must be a positive integer or INF"),
                    modelObject=elt, attrib=bufAspect, value=streamingAspects.get(bufAspect))
            foundErrors = True
    if _streamingExtensionsValidate:
        incompatibleValidations = []
        _validateDisclosureSystem = modelXbrl.modelManager.validateDisclosureSystem
        _disclosureSystem = modelXbrl.modelManager.disclosureSystem
        if _validateDisclosureSystem and _disclosureSystem.EFM:
            incompatibleValidations.append("EFM")
        if _validateDisclosureSystem and _disclosureSystem.GFM:
            incompatibleValidations.append("GFM")
        if _validateDisclosureSystem and _disclosureSystem.EBA:
            incompatibleValidations.append("EBA")
        if _validateDisclosureSystem and _disclosureSystem.HMRC:
            incompatibleValidations.append("EBA")
        if modelXbrl.modelManager.validateCalcLB:
            incompatibleValidations.append("calculation LB")
        if incompatibleValidations:            
            modelXbrl.error("streamingExtensions:incompatibleValidation",
                    _("Streaming instance validation does not support %(incompatibleValidations)s validation"),
                    modelObject=modelXbrl, incompatibleValidations=', '.join(incompatibleValidations))
            foundErrors = True
    if instInfoContext.error_log:
        foundErrors = True
    logSyntaxErrors(instInfoContext)
    del instInfoContext # dereference
    
    if foundErrors:
        _file.close()
        return None

    _encoding = XmlUtil.encoding(_file.read(512))
    _file.seek(0,io.SEEK_SET) # allow reparsing

    if _streamingExtensionsValidate:
        validator = Validate(modelXbrl)
        instValidator = validator.instValidator

    eltMdlObjs = {}
    contextBuffer = []
    unitBuffer = []
    footnoteBuffer = []
    factBuffer = []
    numFacts = 1
    
    class modelLoaderTarget():
        def __init__(self, element_factory=None, parser=None):
            self.newTree = True
            self.currentMdlObj = None
            self.beforeInstanceStream = True
            self.numRootFacts = 1
        def start(self, tag, attrib, nsmap=None):
            mdlObj = _parser.makeelement(tag, attrib=attrib, nsmap=nsmap)
            mdlObj.sourceline = 1
            if self.newTree:
                self.newTree = False
                self.currentMdlObj = mdlObj
                modelDocument = ModelDocument(modelXbrl, Type.INSTANCE, mappedUri, filepath, mdlObj.getroottree())
                modelXbrl.modelDocument = modelDocument # needed for incremental validation
                mdlObj.init(modelDocument)
                modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement 
                modelDocument.parserLookupName = _parserLookupName
                modelDocument.parserLookupClass = _parserLookupClass
                modelDocument.xmlRootElement = mdlObj
                modelDocument.schemaLocationElements.add(mdlObj)
                modelDocument.documentEncoding = _encoding
                modelDocument._creationSoftwareComment = creationSoftwareComment
                modelXbrl.info("streamingExtensions:streaming",
                               _("Stream processing this instance."),
                               modelObject = modelDocument)    
            else:
                self.currentMdlObj.append(mdlObj)
                self.currentMdlObj = mdlObj
                mdlObj._init()
                ns = mdlObj.namespaceURI
                ln = mdlObj.localName
                if (self.beforeInstanceStream and (
                    (ns == XbrlConst.link and ln not in ("schemaRef", "linkbaseRef")) or
                    (ns == XbrlConst.xbrli and ln in ("context", "unit")) or
                    (ns not in (XbrlConst.link, XbrlConst.xbrli)))):
                    self.beforeInstanceStream = False
                    if _streamingExtensionsValidate:
                        instValidator.validate(modelXbrl, modelXbrl.modelManager.formulaOptions.typedParameters())
                    else: # need default dimensions
                        ValidateXbrlDimensions.loadDimensionDefaults(modelXbrl)
            return mdlObj
        def end(self, tag):
            modelDocument = modelXbrl.modelDocument
            mdlObj = self.currentMdlObj
            parentMdlObj = mdlObj.getparent()
            self.currentMdlObj = parentMdlObj
            ns = mdlObj.namespaceURI
            ln = mdlObj.localName
            if ns == XbrlConst.xbrli:
                if ln == "context":
                    if mdlObj.get("sticky"):
                        del mdlObj.attrib["sticky"]
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                    else:
                        if _streamingExtensionsValidate and len(contextBuffer) >= contextBufferLimit:
                            # drop before adding as dropped may have same id as added
                            cntx = contextBuffer.pop(0)
                            dropContext(modelXbrl, cntx)
                            del parentMdlObj[parentMdlObj.index(cntx)]
                            cntx = None
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                        if contextBufferLimit.is_finite():
                            contextBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        contextsToCheck = (mdlObj,)
                        instValidator.checkContexts(contextsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkContextsDimensions(contextsToCheck)
                        del contextsToCheck # dereference
                elif ln == "unit":
                    if _streamingExtensionsValidate and len(unitBuffer) >= unitBufferLimit:
                        # drop before additing as dropped may have same id as added
                        unit = unitBuffer.pop(0)
                        dropUnit(modelXbrl, unit)
                        del parentMdlObj[parentMdlObj.index(unit)]
                        unit = None 
                    XmlValidate.validate(modelXbrl, mdlObj)
                    modelDocument.unitDiscover(mdlObj)
                    if unitBufferLimit.is_finite():
                        unitBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkUnits( (mdlObj,) )
                elif ln == "xbrl": # end of document
                    # check remaining footnote refs
                    for footnoteLink in footnoteBuffer:
                        checkFootnoteHrefs(modelXbrl, footnoteLink)
            elif ns == XbrlConst.link:
                if ln == "footnoteLink":
                    XmlValidate.validate(modelXbrl, mdlObj)
                    footnoteLinks = (mdlObj,)
                    modelDocument.linkbaseDiscover(footnoteLinks, inInstance=True)
                    if footnoteBufferLimit.is_finite():
                        footnoteBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkLinks(footnoteLinks)
                        if len(footnoteBuffer) > footnoteBufferLimit:
                            # check that hrefObjects for locators were all satisfied
                                # drop before addition as dropped may have same id as added
                            footnoteLink = footnoteBuffer.pop(0)
                            checkFootnoteHrefs(modelXbrl, footnoteLink)
                            dropFootnoteLink(modelXbrl, footnoteLink)
                            del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            footnoteLink = None
                    footnoteLinks = None
                elif ln in ("schemaRef", "linkbaseRef"):
                    modelDocument.discoverHref(mdlObj)
                elif not modelXbrl.skipDTS:
                    if ln in ("roleRef", "arcroleRef"):
                        modelDocument.linkbaseDiscover((mdlObj,), inInstance=True)
            elif parentMdlObj.qname == XbrlConst.qnXbrliXbrl:
                self.numRootFacts += 1
                XmlValidate.validate(modelXbrl, mdlObj)
                modelDocument.factDiscover(mdlObj, modelXbrl.facts)
                if _streamingExtensionsValidate:
                    factsToCheck = (mdlObj,)
                    instValidator.checkFacts(factsToCheck)
                    if modelXbrl.hasXDT:
                        instValidator.checkFactsDimensions(factsToCheck)
                    del factsToCheck
                    dropFact(modelXbrl, mdlObj, modelXbrl.facts)
                    del parentMdlObj[parentMdlObj.index(mdlObj)]
                if self.numRootFacts % 1000 == 0:
                    modelXbrl.profileActivity("... streaming fact {0} of {1} {2:.2f}%".format(self.numRootFacts, instInfoNumRootFacts, 
                                                                                              100.0 * self.numRootFacts / instInfoNumRootFacts), 
                                              minTimeToShow=20.0)
            return mdlObj
        def data(self, data):
            self.currentMdlObj.text = data
        def comment(self, text):
            pass
        def pi(self, target, data):
            pass
        def close(self):
            return None
        
    _parser, _parserLookupName, _parserLookupClass = parser(modelXbrl, filepath, target=modelLoaderTarget())
    etree.parse(_file, parser=_parser, base_url=filepath)
    logSyntaxErrors(_parser)
    _file.close()
    if _streamingExtensionsValidate and validator is not None:
        del instValidator
        validator.close()
        # track that modelXbrl has been validated by this streaming extension
        modelXbrl._streamingExtensionValidated = True
        
    modelXbrl.profileStat(_("streaming complete"), time.time() - startedAt)
    return modelXbrl.modelDocument

コード例 #15

ファイルを表示

ファイル: FileSource.py プロジェクト: namitkewat/Arelle

 def file(self, filepath, binary=False, stripDeclaration=False, encoding=None):
     ''' 
         for text, return a tuple of (open file handle, encoding)
         for binary, return a tuple of (open file handle, )
     '''
     archiveFileSource = self.fileSourceContainingFilepath(filepath)
     if archiveFileSource is not None:
         if filepath.startswith(archiveFileSource.basefile):
             archiveFileName = filepath[len(archiveFileSource.basefile) + 1:]
         else: # filepath.startswith(self.baseurl)
             archiveFileName = filepath[len(archiveFileSource.baseurl) + 1:]
         if archiveFileSource.isZip:
             try:
                 b = archiveFileSource.fs.read(archiveFileName.replace("\\","/"))
                 if binary:
                     return (io.BytesIO(b), )
                 if encoding is None:
                     encoding = XmlUtil.encoding(b)
                 if stripDeclaration:
                     b = stripDeclarationBytes(b)
                 return (FileNamedTextIOWrapper(filepath, io.BytesIO(b), encoding=encoding), 
                         encoding)
             except KeyError:
                 raise ArchiveFileIOError(self, archiveFileName)
         elif archiveFileSource.isTarGz:
             try:
                 fh = archiveFileSource.fs.extractfile(archiveFileName)
                 b = fh.read()
                 fh.close() # doesn't seem to close properly using a with construct
                 if binary:
                     return (io.BytesIO(b), )
                 if encoding is None:
                     encoding = XmlUtil.encoding(b)
                 if stripDeclaration:
                     b = stripDeclarationBytes(b)
                 return (FileNamedTextIOWrapper(filepath, io.BytesIO(b), encoding=encoding), 
                         encoding)
             except KeyError:
                 raise ArchiveFileIOError(self, archiveFileName)
         elif archiveFileSource.isEis:
             for docElt in self.eisDocument.iter(tag="{http://www.sec.gov/edgar/common}document"):
                 outfn = docElt.findtext("{http://www.sec.gov/edgar/common}conformedName")
                 if outfn == archiveFileName:
                     b64data = docElt.findtext("{http://www.sec.gov/edgar/common}contents")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[2] == 191:
                             start = 3;
                             length = len(b) - 3;
                             b = b[start:start + length]
                         else:
                             start = 0;
                             length = len(b);
                         if binary:
                             return (io.BytesIO(b), )
                         if encoding is None:
                             encoding = XmlUtil.encoding(b, default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b), encoding=encoding), 
                                 encoding)
             raise ArchiveFileIOError(self, archiveFileName)
         elif archiveFileSource.isXfd:
             for data in archiveFileSource.xfdDocument.iter(tag="data"):
                 outfn = data.findtext("filename")
                 if outfn == archiveFileName:
                     b64data = data.findtext("mimedata")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[2] == 191:
                             start = 3;
                             length = len(b) - 3;
                             b = b[start:start + length]
                         else:
                             start = 0;
                             length = len(b);
                         if binary:
                             return (io.BytesIO(b), )
                         if encoding is None:
                             encoding = XmlUtil.encoding(b, default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b), encoding=encoding), 
                                 encoding)
             raise ArchiveFileIOError(self, archiveFileName)
         elif archiveFileSource.isInstalledTaxonomyPackage:
             # remove TAXONOMY_PACKAGE_FILE_NAME from file path
             if filepath.startswith(archiveFileSource.basefile):
                 l = len(archiveFileSource.basefile)
                 for f in TAXONOMY_PACKAGE_FILE_NAMES:
                     if filepath[l - len(f):l] == f:
                         filepath = filepath[0:l - len(f) - 1] + filepath[l:]
                         break
     if binary:
         return (openFileStream(self.cntlr, filepath, 'rb'), )
     else:
         return openXmlFileStream(self.cntlr, filepath, stripDeclaration)

コード例 #16

ファイルを表示

ファイル: FileSource.py プロジェクト: namitkewat/Arelle

    def open(self):
        if not self.isOpen:
            if (self.isZip or self.isTarGz or self.isEis or self.isXfd or self.isRss or self.isInstalledTaxonomyPackage) and self.cntlr:
                self.basefile = self.cntlr.webCache.getfilename(self.url)
            else:
                self.basefile = self.url
            self.baseurl = self.url # url gets changed by selection
            if not self.basefile:
                return  # an error should have been logged
            if self.isZip:
                try:
                    self.fs = zipfile.ZipFile(openFileStream(self.cntlr, self.basefile, 'rb'), mode="r")
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    pass
            elif self.isTarGz:
                try:
                    self.fs = tarfile.open(self.basefile, "r:gz")
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    pass
            elif self.isEis:
                # check first line of file
                buf = b''
                try:
                    file = open(self.basefile, 'rb')
                    more = True
                    while more:
                        l = file.read(8)
                        if len(l) < 8:
                            break
                        if len(buf) == 0 and l.startswith(b"<?xml "): # not compressed
                            buf = l + file.read()  # not compressed
                            break
                        compressedBytes = file.read( struct.unpack(">L", l[0:4])[0])
                        if len(compressedBytes) <= 0:
                            break
                        buf += zlib.decompress(compressedBytes)
                    file.close()
                except EnvironmentError as err:
                    self.logError(err)
                    pass
                #uncomment to save for debugging
                #with open("c:/temp/test.xml", "wb") as f:
                #    f.write(buf)
                
                if buf.startswith(b"<?xml "):
                    try:
                        # must strip encoding
                        str = buf.decode(XmlUtil.encoding(buf))
                        endEncoding = str.index("?>", 0, 128)
                        if endEncoding > 0:
                            str = str[endEncoding+2:]
                        file = io.StringIO(initial_value=str)
                        parser = etree.XMLParser(recover=True, huge_tree=True)
                        self.eisDocument = etree.parse(file, parser=parser)
                        file.close()
                        self.isOpen = True
                    except EnvironmentError as err:
                        self.logError(err)
                        return # provide error message later
                    except etree.LxmlError as err:
                        self.logError(err)
                        return # provide error message later
                
            elif self.isXfd:
                # check first line of file
                file = open(self.basefile, 'rb')
                firstline = file.readline()
                if firstline.startswith(b"application/x-xfdl;content-encoding=\"asc-gzip\""):
                    # file has been gzipped
                    base64input = file.read(-1)
                    file.close();
                    file = None;
    
                    fb = base64.b64decode(base64input)
                    ungzippedBytes = b""
                    totalLenUncompr = 0
                    i = 0
                    while i < len(fb):
                        lenCompr = fb[i + 0] * 256 + fb[i + 1]
                        lenUncomp = fb[i + 2] * 256 + fb[i + 3]
                        lenRead = 0
                        totalLenUncompr += lenUncomp

                        gzchunk = (bytes((31,139,8,0)) + fb[i:i+lenCompr])
                        try:
                            with gzip.GzipFile(fileobj=io.BytesIO(gzchunk)) as gf:
                                while True:
                                    readSize = min(16384, lenUncomp - lenRead)
                                    readBytes = gf.read(size=readSize)
                                    lenRead += len(readBytes)
                                    ungzippedBytes += readBytes
                                    if len(readBytes) == 0 or (lenUncomp - lenRead) <= 0:
                                        break
                        except IOError as err:
                            pass # provide error message later

                        i += lenCompr + 4
                    #for learning the content of xfd file, uncomment this:
                    #with open("c:\\temp\\test.xml", "wb") as fh:
                    #    fh.write(ungzippedBytes)
                    file = io.StringIO(initial_value=ungzippedBytes.decode("utf-8"))
                else:
                    # position to start of file
                    file.seek(0,io.SEEK_SET)
                    
                try:
                    self.xfdDocument = etree.parse(file)
                    file.close()
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    return # provide error message later
                except etree.LxmlError as err:
                    self.logError(err)
                    return # provide error message later
                
            elif self.isRss:
                try:
                    self.rssDocument = etree.parse(self.basefile)
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    return # provide error message later
                except etree.LxmlError as err:
                    self.logError(err)
                    return # provide error message later
            
            elif self.isInstalledTaxonomyPackage:
                self.isOpen = True
                # load mappings
                try:
                    metadataFiles = self.taxonomyPackageMetadataFiles
                    if len(metadataFiles) != 1:
                        raise IOError(_("Taxonomy package must contain one and only one metadata file: {0}.")
                                      .format(', '.join(metadataFiles)))
                    # HF: this won't work, see DialogOpenArchive for correct code
                    # not sure if it is used
                    taxonomyPackage = PackageManager.parsePackage(self.cntlr, self.url)
                    fileSourceDir = os.path.dirname(self.baseurl) + os.sep
                    self.mappedPaths = \
                        dict((prefix, 
                              remapping if isHttpUrl(remapping)
                              else (fileSourceDir + remapping.replace("/", os.sep)))
                              for prefix, remapping in taxonomyPackage["remappings"].items())
                except EnvironmentError as err:
                    self.logError(err)
                    return # provide error message later

コード例 #17

ファイルを表示

ファイル: FileSource.py プロジェクト: jaolguin/Arelle

 def file(self,filepath):
     archiveFileSource = self.fileSourceContainingFilepath(filepath)
     if archiveFileSource is not None:
         if filepath.startswith(archiveFileSource.basefile):
             archiveFileName = filepath[len(archiveFileSource.basefile) + 1:]
         else: # filepath.startswith(self.baseurl)
             archiveFileName = filepath[len(archiveFileSource.baseurl) + 1:]
         if archiveFileSource.isZip:
             b = archiveFileSource.fs.read(archiveFileName.replace("\\","/"))
             encoding = XmlUtil.encoding(b)
             return (io.TextIOWrapper(
                     io.BytesIO(b), 
                     encoding=encoding), encoding)
         elif archiveFileSource.isXfd:
             for data in archiveFileSource.xfdDocument.iter(tag="data"):
                 outfn = data.findtext("filename")
                 if outfn == archiveFileName:
                     b64data = data.findtext("mimedata")
                     if b64data:
                         # convert to bytes
                         #byteData = []
                         #for c in b64data:
                         #    byteData.append(ord(c))
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[2] == 191:
                             start = 3;
                             length = len(b) - 3;
                             b = b[start:start + length]
                         else:
                             start = 0;
                             length = len(b);
                         # pass back as ascii
                         #str = ""
                         #for bChar in b[start:start + length]:
                         #    str += chr( bChar )
                         #return str
                         return (io.TextIOWrapper(
                             io.BytesIO(b), 
                             encoding=XmlUtil.encoding(b)), "latin-1")
             return (None,None)
     # check encoding
     with open(filepath, 'rb') as fb:
         hdrBytes = fb.read(512)
         encoding = XmlUtil.encoding(hdrBytes)
         if encoding.lower() in ('utf-8','utf8'):
             text = None
         else:
             fb.seek(0)
             text = fb.read().decode(encoding)
         # allow filepath to close
     # this may not be needed for Mac or Linux, needs confirmation!!!
     if text is None:  # ok to read as utf-8
         return io.open(filepath, 'rt', encoding='utf-8'), encoding
     else:
         # strip XML declaration
         xmlDeclarationMatch = XMLdeclaration.search(text)
         if xmlDeclarationMatch: # remove it for lxml
             start,end = xmlDeclarationMatch.span()
             text = text[0:start] + text[end:]
         return (io.StringIO(initial_value=text), encoding)

コード例 #18

ファイルを表示

ファイル: FileSource.py プロジェクト: camirisk/Arelle

 def file(self,
          filepath,
          binary=False,
          stripDeclaration=False,
          encoding=None):
     ''' 
         for text, return a tuple of (open file handle, encoding)
         for binary, return a tuple of (open file handle, )
     '''
     archiveFileSource = self.fileSourceContainingFilepath(filepath)
     if archiveFileSource is not None:
         if filepath.startswith(archiveFileSource.basefile):
             archiveFileName = filepath[len(archiveFileSource.basefile) +
                                        1:]
         else:  # filepath.startswith(self.baseurl)
             archiveFileName = filepath[len(archiveFileSource.baseurl) + 1:]
         if archiveFileSource.isZip:
             try:
                 b = archiveFileSource.fs.read(
                     archiveFileName.replace("\\", "/"))
                 if binary:
                     return (io.BytesIO(b), )
                 if encoding is None:
                     encoding = XmlUtil.encoding(b)
                 if stripDeclaration:
                     b = stripDeclarationBytes(b)
                 return (FileNamedTextIOWrapper(filepath,
                                                io.BytesIO(b),
                                                encoding=encoding),
                         encoding)
             except KeyError:
                 raise ArchiveFileIOError(self, errno.ENOENT,
                                          archiveFileName)
         elif archiveFileSource.isTarGz:
             try:
                 fh = archiveFileSource.fs.extractfile(archiveFileName)
                 b = fh.read()
                 fh.close(
                 )  # doesn't seem to close properly using a with construct
                 if binary:
                     return (io.BytesIO(b), )
                 if encoding is None:
                     encoding = XmlUtil.encoding(b)
                 if stripDeclaration:
                     b = stripDeclarationBytes(b)
                 return (FileNamedTextIOWrapper(filepath,
                                                io.BytesIO(b),
                                                encoding=encoding),
                         encoding)
             except KeyError:
                 raise ArchiveFileIOError(self, archiveFileName)
         elif archiveFileSource.isEis:
             for docElt in self.eisDocument.iter(
                     tag="{http://www.sec.gov/edgar/common}document"):
                 outfn = docElt.findtext(
                     "{http://www.sec.gov/edgar/common}conformedName")
                 if outfn == archiveFileName:
                     b64data = docElt.findtext(
                         "{http://www.sec.gov/edgar/common}contents")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[
                                 1] == 187 and b[2] == 191:
                             start = 3
                             length = len(b) - 3
                             b = b[start:start + length]
                         else:
                             start = 0
                             length = len(b)
                         if binary:
                             return (io.BytesIO(b), )
                         if encoding is None:
                             encoding = XmlUtil.encoding(b,
                                                         default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b),
                                                  encoding=encoding),
                                 encoding)
             raise ArchiveFileIOError(self, errno.ENOENT, archiveFileName)
         elif archiveFileSource.isXfd:
             for data in archiveFileSource.xfdDocument.iter(tag="data"):
                 outfn = data.findtext("filename")
                 if outfn == archiveFileName:
                     b64data = data.findtext("mimedata")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[
                                 1] == 187 and b[2] == 191:
                             start = 3
                             length = len(b) - 3
                             b = b[start:start + length]
                         else:
                             start = 0
                             length = len(b)
                         if binary:
                             return (io.BytesIO(b), )
                         if encoding is None:
                             encoding = XmlUtil.encoding(b,
                                                         default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b),
                                                  encoding=encoding),
                                 encoding)
             raise ArchiveFileIOError(self, errno.ENOENT, archiveFileName)
         elif archiveFileSource.isInstalledTaxonomyPackage:
             # remove TAXONOMY_PACKAGE_FILE_NAME from file path
             if filepath.startswith(archiveFileSource.basefile):
                 l = len(archiveFileSource.basefile)
                 for f in TAXONOMY_PACKAGE_FILE_NAMES:
                     if filepath[l - len(f):l] == f:
                         filepath = filepath[0:l - len(f) -
                                             1] + filepath[l:]
                         break
     if binary:
         return (openFileStream(self.cntlr, filepath, 'rb'), )
     else:
         return openXmlFileStream(self.cntlr, filepath, stripDeclaration)

コード例 #19

ファイルを表示

ファイル: FileSource.py プロジェクト: camirisk/Arelle

    def open(self, reloadCache=False):
        if not self.isOpen:
            if (self.isZip or self.isTarGz or self.isEis or self.isXfd
                    or self.isRss
                    or self.isInstalledTaxonomyPackage) and self.cntlr:
                self.basefile = self.cntlr.webCache.getfilename(
                    self.url, reload=reloadCache)
            else:
                self.basefile = self.url
            self.baseurl = self.url  # url gets changed by selection
            if not self.basefile:
                return  # an error should have been logged
            if self.isZip:
                try:
                    self.fs = zipfile.ZipFile(openFileStream(
                        self.cntlr, self.basefile, 'rb'),
                                              mode="r")
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    pass
            elif self.isTarGz:
                try:
                    self.fs = tarfile.open(self.basefile, "r:gz")
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    pass
            elif self.isEis:
                # check first line of file
                buf = b''
                try:
                    file = open(self.basefile, 'rb')
                    more = True
                    while more:
                        l = file.read(8)
                        if len(l) < 8:
                            break
                        if len(buf) == 0 and l.startswith(
                                b"<?xml "):  # not compressed
                            buf = l + file.read()  # not compressed
                            break
                        compressedBytes = file.read(
                            struct.unpack(">L", l[0:4])[0])
                        if len(compressedBytes) <= 0:
                            break
                        buf += zlib.decompress(compressedBytes)
                    file.close()
                except EnvironmentError as err:
                    self.logError(err)
                    pass
                #uncomment to save for debugging
                #with open("c:/temp/test.xml", "wb") as f:
                #    f.write(buf)

                if buf.startswith(b"<?xml "):
                    try:
                        # must strip encoding
                        str = buf.decode(XmlUtil.encoding(buf))
                        endEncoding = str.index("?>", 0, 128)
                        if endEncoding > 0:
                            str = str[endEncoding + 2:]
                        file = io.StringIO(initial_value=str)
                        parser = etree.XMLParser(recover=True, huge_tree=True)
                        self.eisDocument = etree.parse(file, parser=parser)
                        file.close()
                        self.isOpen = True
                    except EnvironmentError as err:
                        self.logError(err)
                        return  # provide error message later
                    except etree.LxmlError as err:
                        self.logError(err)
                        return  # provide error message later

            elif self.isXfd:
                # check first line of file
                file = open(self.basefile, 'rb')
                firstline = file.readline()
                if firstline.startswith(
                        b"application/x-xfdl;content-encoding=\"asc-gzip\""):
                    # file has been gzipped
                    base64input = file.read(-1)
                    file.close()
                    file = None

                    fb = base64.b64decode(base64input)
                    ungzippedBytes = b""
                    totalLenUncompr = 0
                    i = 0
                    while i < len(fb):
                        lenCompr = fb[i + 0] * 256 + fb[i + 1]
                        lenUncomp = fb[i + 2] * 256 + fb[i + 3]
                        lenRead = 0
                        totalLenUncompr += lenUncomp

                        gzchunk = (bytes((31, 139, 8, 0)) + fb[i:i + lenCompr])
                        try:
                            with gzip.GzipFile(
                                    fileobj=io.BytesIO(gzchunk)) as gf:
                                while True:
                                    readSize = min(16384, lenUncomp - lenRead)
                                    readBytes = gf.read(size=readSize)
                                    lenRead += len(readBytes)
                                    ungzippedBytes += readBytes
                                    if len(readBytes) == 0 or (lenUncomp -
                                                               lenRead) <= 0:
                                        break
                        except IOError as err:
                            pass  # provide error message later

                        i += lenCompr + 4
                    #for learning the content of xfd file, uncomment this:
                    #with open("c:\\temp\\test.xml", "wb") as fh:
                    #    fh.write(ungzippedBytes)
                    file = io.StringIO(
                        initial_value=ungzippedBytes.decode("utf-8"))
                else:
                    # position to start of file
                    file.seek(0, io.SEEK_SET)

                try:
                    self.xfdDocument = etree.parse(file)
                    file.close()
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    return  # provide error message later
                except etree.LxmlError as err:
                    self.logError(err)
                    return  # provide error message later

            elif self.isRss:
                try:
                    self.rssDocument = etree.parse(self.basefile)
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    return  # provide error message later
                except etree.LxmlError as err:
                    self.logError(err)
                    return  # provide error message later

            elif self.isInstalledTaxonomyPackage:
                self.isOpen = True
                # load mappings
                try:
                    metadataFiles = self.taxonomyPackageMetadataFiles
                    if len(metadataFiles) != 1:
                        raise IOError(
                            _("Taxonomy package must contain one and only one metadata file: {0}."
                              ).format(', '.join(metadataFiles)))
                    # HF: this won't work, see DialogOpenArchive for correct code
                    # not sure if it is used
                    taxonomyPackage = PackageManager.parsePackage(
                        self.cntlr, self.url)
                    fileSourceDir = os.path.dirname(self.baseurl) + os.sep
                    self.mappedPaths = \
                        dict((prefix,
                              remapping if isHttpUrl(remapping)
                              else (fileSourceDir + remapping.replace("/", os.sep)))
                              for prefix, remapping in taxonomyPackage["remappings"].items())
                except EnvironmentError as err:
                    self.logError(err)
                    return  # provide error message later

コード例 #20

ファイルを表示

ファイル: FileSource.py プロジェクト: andygreener/Arelle

    def open(self):
        if not self.isOpen:
            if (self.isZip or self.isEis or self.isXfd
                    or self.isRss) and self.cntlr:
                self.basefile = self.cntlr.webCache.getfilename(self.url)
            else:
                self.basefile = self.url
            self.baseurl = self.url  # url gets changed by selection
            if not self.basefile:
                return  # an error should have been logged
            if self.isZip:
                try:
                    self.fs = zipfile.ZipFile(openFileStream(
                        self.cntlr, self.basefile, 'rb'),
                                              mode="r")
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    pass
            elif self.isEis:
                # check first line of file
                buf = b''
                try:
                    file = open(self.basefile, 'rb')
                    more = True
                    while more:
                        l = file.read(8)
                        if len(l) < 8:
                            break
                        if len(buf) == 0 and l.startswith(
                                b"<?xml "):  # not compressed
                            buf = l + file.read()  # not compressed
                            break
                        compressedBytes = file.read(
                            struct.unpack(">L", l[0:4])[0])
                        if len(compressedBytes) <= 0:
                            break
                        buf += zlib.decompress(compressedBytes)
                    file.close()
                except EnvironmentError as err:
                    self.logError(err)
                    pass
                #uncomment to save for debugging
                #with open("c:/temp/test.xml", "wb") as f:
                #    f.write(buf)

                if buf.startswith(b"<?xml "):
                    try:
                        # must strip encoding
                        str = buf.decode(XmlUtil.encoding(buf))
                        endEncoding = str.index("?>", 0, 128)
                        if endEncoding > 0:
                            str = str[endEncoding + 2:]
                        file = io.StringIO(initial_value=str)
                        parser = etree.XMLParser(recover=True, huge_tree=True)
                        self.eisDocument = etree.parse(file, parser=parser)
                        file.close()
                        self.isOpen = True
                    except EnvironmentError as err:
                        self.logError(err)
                        return  # provide error message later
                    except etree.LxmlError as err:
                        self.logError(err)
                        return  # provide error message later

            elif self.isXfd:
                # check first line of file
                file = open(self.basefile, 'rb')
                firstline = file.readline()
                if firstline.startswith(
                        b"application/x-xfdl;content-encoding=\"asc-gzip\""):
                    # file has been gzipped
                    base64input = file.read(-1)
                    file.close()
                    file = None

                    fb = base64.b64decode(base64input)
                    ungzippedBytes = b""
                    totalLenUncompr = 0
                    i = 0
                    while i < len(fb):
                        lenCompr = fb[i + 0] * 256 + fb[i + 1]
                        lenUncomp = fb[i + 2] * 256 + fb[i + 3]
                        lenRead = 0
                        totalLenUncompr += lenUncomp

                        gzchunk = (bytes((31, 139, 8, 0)) + fb[i:i + lenCompr])
                        try:
                            with gzip.GzipFile(
                                    fileobj=io.BytesIO(gzchunk)) as gf:
                                while True:
                                    readSize = min(16384, lenUncomp - lenRead)
                                    readBytes = gf.read(size=readSize)
                                    lenRead += len(readBytes)
                                    ungzippedBytes += readBytes
                                    if len(readBytes) == 0 or (lenUncomp -
                                                               lenRead) <= 0:
                                        break
                        except IOError as err:
                            pass  # provide error message later

                        i += lenCompr + 4
                    #for learning the content of xfd file, uncomment this:
                    #with open("c:\\temp\\test.xml", "wb") as fh:
                    #    fh.write(ungzippedBytes)
                    file = io.StringIO(
                        initial_value=ungzippedBytes.decode("utf-8"))
                else:
                    # position to start of file
                    file.seek(0, io.SEEK_SET)

                try:
                    self.xfdDocument = etree.parse(file)
                    file.close()
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    return  # provide error message later
                except etree.LxmlError as err:
                    self.logError(err)
                    return  # provide error message later

            elif self.isRss:
                try:
                    self.rssDocument = etree.parse(self.basefile)
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    return  # provide error message later
                except etree.LxmlError as err:
                    self.logError(err)
                    return  # provide error message later

コード例 #21

ファイルを表示

def streamingExtensionsLoader(modelXbrl, mappedUri, filepath, *args, **kwargs):
    # check if big instance and has header with an initial incomplete tree walk (just 2 elements
    if not _streamingExtensionsCheck:
        return None

    # track whether modelXbrl has been validated by this streaming extension
    modelXbrl._streamingExtensionValidated = False

    def logSyntaxErrors(parsercontext):
        for error in parsercontext.error_log:
            modelXbrl.error(
                "xmlSchema:syntax",
                _("%(error)s, %(fileName)s, line %(line)s, column %(column)s, %(sourceAction)s source element"
                  ),
                modelObject=modelXbrl,
                fileName=os.path.basename(filepath),
                error=error.message,
                line=error.line,
                column=error.column,
                sourceAction="streaming")

    #### note: written for iterparse of lxml prior to version 3.3, otherwise rewrite to use XmlPullParser ###
    #### note: iterparse wants a binary file, but file is text mode
    _file, = modelXbrl.fileSource.file(filepath, binary=True)
    startedAt = time.time()
    modelXbrl.profileActivity()
    ''' this seems twice as slow as iterparse
    class instInfoTarget():
        def __init__(self, element_factory=None, parser=None):
            self.newTree = True
            self.streamingAspects = None
            self.foundInstance = False
            self.creationSoftwareComment = ''
            self.currentEltTag = "(before xbrli:xbrl)"
            self.numRootFacts = 0
        def start(self, tag, attrib, nsmap=None):
            if self.newTree:
                if tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    self.foundInstance = True
                    self.newTree = False
                else: # break 
                    raise NotInstanceDocumentException()
            elif not tag.startswith("{http://www.xbrl.org/"):
                self.numRootFacts += 1
                if self.numRootFacts % 1000 == 0:
                    modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0)
            self.currentEltTag = tag
        def end(self, tag):
            pass
        def data(self, data):
            pass
        def comment(self, text):
            if not self.foundInstance: # accumulate comments before xbrli:xbrl
                self.creationSoftwareComment += ('\n' if self.creationSoftwareComment else '') + text
            elif not self.creationSoftwareComment:
                self.creationSoftwareComment = text # or first comment after xbrli:xbrl
        def pi(self, target, data):
            if target == "xbrl-streamable-instance":
                if self.currentEltTag == "{http://www.xbrl.org/2003/instance}xbrl":
                    self.streamingAspects = dict(etree.PI(target,data).attrib.copy()) # dereference target results
                else:
                    modelXbrl.error("streamingExtensions:headerMisplaced",
                            _("Header is misplaced: %(target)s, must follow xbrli:xbrl element but was found at %(element)s"),
                            modelObject=modelXbrl, target=target, element=self.currentEltTag)
        def close(self):
            if not self.creationSoftwareComment:
                self.creationSoftwareComment = None
            return True
    instInfo = instInfoTarget()
    infoParser = etree.XMLParser(recover=True, huge_tree=True, target=instInfo)
    try:
        etree.parse(_file, parser=infoParser, base_url=filepath)
    except NotInstanceDocumentException:
        pass
    '''
    foundErrors = False
    foundInstance = False
    streamingAspects = None
    creationSoftwareComment = None
    instInfoNumRootFacts = 0
    numElts = 0
    elt = None
    instInfoContext = etree.iterparse(_file,
                                      events=("start", "end"),
                                      huge_tree=True)
    try:
        for event, elt in instInfoContext:
            if event == "start":
                if elt.getparent() is not None:
                    if elt.getparent(
                    ).tag == "{http://www.xbrl.org/2003/instance}xbrl":
                        if not foundInstance:
                            foundInstance = True
                            pi = precedingProcessingInstruction(
                                elt, "xbrl-streamable-instance")
                            if pi is None:
                                break
                            else:
                                streamingAspects = dict(pi.attrib.copy())
                                if creationSoftwareComment is None:
                                    creationSoftwareComment = precedingComment(
                                        elt)
                        if not elt.tag.startswith("{http://www.xbrl.org/"):
                            instInfoNumRootFacts += 1
                            if instInfoNumRootFacts % 1000 == 0:
                                modelXbrl.profileActivity(
                                    "... streaming tree check",
                                    minTimeToShow=20.0)
                    elif not foundInstance:
                        break
                elif elt.tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    creationSoftwareComment = precedingComment(elt)
                    if precedingProcessingInstruction(
                            elt, "xbrl-streamable-instance") is not None:
                        modelXbrl.error(
                            "streamingExtensions:headerMisplaced",
                            _("Header is misplaced: %(error)s, must follow xbrli:xbrl element"
                              ),
                            modelObject=elt)
            elif event == "end":
                elt.clear()
                numElts += 1
                if numElts % 1000 == 0 and elt.getparent() is not None:
                    while elt.getprevious() is not None and elt.getparent(
                    ) is not None:
                        del elt.getparent()[0]
    except etree.XMLSyntaxError as err:
        modelXbrl.error("xmlSchema:syntax",
                        _("Unrecoverable error: %(error)s"),
                        error=err)
        _file.close()
        return err

    _file.seek(0, io.SEEK_SET)  # allow reparsing
    if not foundInstance or streamingAspects is None:
        del elt
        _file.close()
        return None
    modelXbrl.profileStat(_("streaming tree check"), time.time() - startedAt)
    startedAt = time.time()
    try:
        version = Decimal(streamingAspects.get("version"))
        if int(version) != 1:
            modelXbrl.error(
                "streamingExtensions:unsupportedVersion",
                _("Streaming version %(version)s, major version number must be 1"
                  ),
                modelObject=elt,
                version=version)
            foundErrors = True
    except (InvalidOperation, OverflowError):
        modelXbrl.error("streamingExtensions:versionError",
                        _("Version %(version)s, number must be 1.n"),
                        modelObject=elt,
                        version=streamingAspects.get("version", "(none)"))
        foundErrors = True
    for bufAspect in ("contextBuffer", "unitBuffer", "footnoteBuffer"):
        try:
            bufLimit = Decimal(streamingAspects.get(bufAspect, "INF"))
            if bufLimit < 1 or (bufLimit.is_finite() and bufLimit % 1 != 0):
                raise InvalidOperation
            elif bufAspect == "contextBuffer":
                contextBufferLimit = bufLimit
            elif bufAspect == "unitBuffer":
                unitBufferLimit = bufLimit
            elif bufAspect == "footnoteBuffer":
                footnoteBufferLimit = bufLimit
        except InvalidOperation:
            modelXbrl.error(
                "streamingExtensions:valueError",
                _("Streaming %(attrib)s %(value)s, number must be a positive integer or INF"
                  ),
                modelObject=elt,
                attrib=bufAspect,
                value=streamingAspects.get(bufAspect))
            foundErrors = True
    if _streamingExtensionsValidate:
        incompatibleValidations = []
        _validateDisclosureSystem = modelXbrl.modelManager.validateDisclosureSystem
        _disclosureSystem = modelXbrl.modelManager.disclosureSystem
        if _validateDisclosureSystem and _disclosureSystem.validationType == "EFM":
            incompatibleValidations.append("EFM")
        if _validateDisclosureSystem and _disclosureSystem.validationType == "GFM":
            incompatibleValidations.append("GFM")
        if _validateDisclosureSystem and _disclosureSystem.validationType == "HMRC":
            incompatibleValidations.append("HMRC")
        if modelXbrl.modelManager.validateCalcLB:
            incompatibleValidations.append("calculation LB")
        if incompatibleValidations:
            modelXbrl.error(
                "streamingExtensions:incompatibleValidation",
                _("Streaming instance validation does not support %(incompatibleValidations)s validation"
                  ),
                modelObject=modelXbrl,
                incompatibleValidations=', '.join(incompatibleValidations))
            foundErrors = True
    if instInfoContext.error_log:
        foundErrors = True
    logSyntaxErrors(instInfoContext)
    del instInfoContext  # dereference

    for pluginMethod in pluginClassMethods("Streaming.BlockStreaming"):
        _blockingPluginName = pluginMethod(modelXbrl)
        if _blockingPluginName:  # name of blocking plugin is returned
            modelXbrl.error(
                "streamingExtensions:incompatiblePlugIn",
                _("Streaming instance not supported by plugin %(blockingPlugin)s"
                  ),
                modelObject=modelXbrl,
                blockingPlugin=_blockingPluginName)
            foundErrors = True

    if foundErrors:
        _file.close()
        return None

    _encoding = XmlUtil.encoding(_file.read(512))
    _file.seek(0, io.SEEK_SET)  # allow reparsing

    if _streamingExtensionsValidate:
        validator = Validate(modelXbrl)
        instValidator = validator.instValidator

    contextBuffer = []
    contextsToDrop = []
    unitBuffer = []
    unitsToDrop = []
    footnoteBuffer = []
    footnoteLinksToDrop = []

    _streamingFactsPlugin = any(
        True for pluginMethod in pluginClassMethods("Streaming.Facts"))
    _streamingValidateFactsPlugin = (_streamingExtensionsValidate and any(
        True
        for pluginMethod in pluginClassMethods("Streaming.ValidateFacts")))
    ''' this is very much slower than iterparse
    class modelLoaderTarget():
        def __init__(self, element_factory=None, parser=None):
            self.newTree = True
            self.currentMdlObj = None
            self.beforeInstanceStream = True
            self.beforeStartStreamingPlugin = True
            self.numRootFacts = 1
            modelXbrl.makeelementParentModelObject = None
            modelXbrl.isStreamingMode = True
            self.factsCheckVersion = None
            self.factsCheckMd5s = Md5Sum()
        def start(self, tag, attrib, nsmap=None):
            modelXbrl.makeelementParentModelObject = self.currentMdlObj # pass parent to makeelement for ModelObjectFactory
            mdlObj = _parser.makeelement(tag, attrib=attrib, nsmap=nsmap)
            mdlObj.sourceline = 1
            if self.newTree:
                self.newTree = False
                self.currentMdlObj = mdlObj
                modelDocument = ModelDocument(modelXbrl, Type.INSTANCE, mappedUri, filepath, mdlObj.getroottree())
                modelXbrl.modelDocument = modelDocument # needed for incremental validation
                mdlObj.init(modelDocument)
                modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement 
                modelDocument.parserLookupName = _parserLookupName
                modelDocument.parserLookupClass = _parserLookupClass
                modelDocument.xmlRootElement = mdlObj
                modelDocument.schemaLocationElements.add(mdlObj)
                modelDocument.documentEncoding = _encoding
                modelDocument._creationSoftwareComment = creationSoftwareComment
                modelXbrl.info("streamingExtensions:streaming",
                               _("Stream processing this instance."),
                               modelObject = modelDocument)
            else:
                self.currentMdlObj.append(mdlObj)
                self.currentMdlObj = mdlObj
                mdlObj._init()
                ns = mdlObj.namespaceURI
                ln = mdlObj.localName
                if (self.beforeInstanceStream and (
                    (ns == XbrlConst.link and ln not in ("schemaRef", "linkbaseRef")) or
                    (ns == XbrlConst.xbrli and ln in ("context", "unit")) or
                    (ns not in (XbrlConst.link, XbrlConst.xbrli)))):
                    self.beforeInstanceStream = False
                    if _streamingExtensionsValidate:
                        instValidator.validate(modelXbrl, modelXbrl.modelManager.formulaOptions.typedParameters(modelXbrl.prefixedNamespaces))
                    else: # need default dimensions
                        ValidateXbrlDimensions.loadDimensionDefaults(modelXbrl)
                elif not self.beforeInstanceStream and self.beforeStartStreamingPlugin:
                    for pluginMethod in pluginClassMethods("Streaming.Start"):
                        pluginMethod(modelXbrl)
                    self.beforeStartStreamingPlugin = False
            return mdlObj
        def end(self, tag):
            modelDocument = modelXbrl.modelDocument
            mdlObj = self.currentMdlObj
            parentMdlObj = mdlObj.getparent()
            self.currentMdlObj = parentMdlObj
            ns = mdlObj.namespaceURI
            ln = mdlObj.localName
            if ns == XbrlConst.xbrli:
                if ln == "context":
                    if mdlObj.get("sticky"):
                        del mdlObj.attrib["sticky"]
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                    else:
                        if _streamingExtensionsValidate and len(contextBuffer) >= contextBufferLimit:
                            # drop before adding as dropped may have same id as added
                            cntx = contextBuffer.pop(0)
                            if _streamingValidateFactsPlugin:
                                contextsToDrop.append(cntx)
                            else:
                                dropContext(modelXbrl, cntx)
                                del parentMdlObj[parentMdlObj.index(cntx)]
                            cntx = None
                        #>>XmlValidate.validate(modelXbrl, mdlObj)
                        #>>modelDocument.contextDiscover(mdlObj)
                        if contextBufferLimit.is_finite():
                            contextBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        contextsToCheck = (mdlObj,)
                        instValidator.checkContexts(contextsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkContextsDimensions(contextsToCheck)
                        del contextsToCheck # dereference
                elif ln == "unit":
                    if _streamingExtensionsValidate and len(unitBuffer) >= unitBufferLimit:
                        # drop before adding as dropped may have same id as added
                        unit = unitBuffer.pop(0)
                        if _streamingValidateFactsPlugin:
                            unitsToDrop.append(unit)
                        else:
                            dropUnit(modelXbrl, unit)
                            del parentMdlObj[parentMdlObj.index(unit)]
                        unit = None 
                    #>>XmlValidate.validate(modelXbrl, mdlObj)
                    #>>modelDocument.unitDiscover(mdlObj)
                    if unitBufferLimit.is_finite():
                        unitBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkUnits( (mdlObj,) )
                elif ln == "xbrl": # end of document
                    # check remaining batched facts if any
                    if _streamingValidateFactsPlugin:
                        # plugin attempts to process batch of all root facts not yet processed (not just current one)
                        # finish any final batch of facts
                        if len(modelXbrl.facts) > 0:
                            factsToCheck = modelXbrl.facts.copy()
                            factsHaveBeenProcessed = True
                            # can block facts deletion if required data not yet available, such as numeric unit for DpmDB
                            for pluginMethod in pluginClassMethods("Streaming.ValidateFacts"):
                                if not pluginMethod(modelXbrl, factsToCheck):
                                    factsHaveBeenProcessed = False
                            if factsHaveBeenProcessed:
                                for fact in factsToCheck:
                                    dropFact(modelXbrl, fact, modelXbrl.facts)
                                    del parentMdlObj[parentMdlObj.index(fact)]
                                for cntx in contextsToDrop:
                                    dropContext(modelXbrl, cntx)
                                    del parentMdlObj[parentMdlObj.index(cntx)]
                                for unit in unitsToDrop:
                                    dropUnit(modelXbrl, unit)
                                    del parentMdlObj[parentMdlObj.index(unit)]
                                for footnoteLink in footnoteLinksToDrop:
                                    dropFootnoteLink(modelXbrl, footnoteLink)
                                    del parentMdlObj[parentMdlObj.index(footnoteLink)]
                                fact = cntx = unit = footnoteLink = None
                                del contextsToDrop[:]
                                del unitsToDrop[:]
                                del footnoteLinksToDrop[:]
                            del factsToCheck
                    # check remaining footnote refs
                    for footnoteLink in footnoteBuffer:
                        checkFootnoteHrefs(modelXbrl, footnoteLink)
                    for pluginMethod in pluginClassMethods("Streaming.Finish"):
                        pluginMethod(modelXbrl)
            elif ns == XbrlConst.link:
                if ln == "footnoteLink":
                    XmlValidate.validate(modelXbrl, mdlObj)
                    footnoteLinks = (mdlObj,)
                    modelDocument.linkbaseDiscover(footnoteLinks, inInstance=True)
                    if footnoteBufferLimit.is_finite():
                        footnoteBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkLinks(footnoteLinks)
                        if len(footnoteBuffer) > footnoteBufferLimit:
                            # check that hrefObjects for locators were all satisfied
                                # drop before addition as dropped may have same id as added
                            footnoteLink = footnoteBuffer.pop(0)
                            checkFootnoteHrefs(modelXbrl, footnoteLink)
                            if _streamingValidateFactsPlugin:
                                footnoteLinksToDrop.append(footnoteLink)
                            else:
                                dropFootnoteLink(modelXbrl, footnoteLink)
                                del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            footnoteLink = None
                    footnoteLinks = None
                elif ln in ("schemaRef", "linkbaseRef"):
                    modelDocument.discoverHref(mdlObj)
                elif not modelXbrl.skipDTS:
                    if ln in ("roleRef", "arcroleRef"):
                        modelDocument.linkbaseDiscover((mdlObj,), inInstance=True)
            elif parentMdlObj.qname == XbrlConst.qnXbrliXbrl:
                self.numRootFacts += 1
                #>>XmlValidate.validate(modelXbrl, mdlObj)
                #>>modelDocument.factDiscover(mdlObj, modelXbrl.facts)
                if self.factsCheckVersion:
                    self.factCheckFact(mdlObj)
                if _streamingExtensionsValidate or _streamingValidateFactsPlugin:
                    factsToCheck = (mdlObj,)  # validate current fact by itself
                    if _streamingExtensionsValidate:
                        instValidator.checkFacts(factsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkFactsDimensions(factsToCheck)
                    if _streamingValidateFactsPlugin:
                        # plugin attempts to process batch of all root facts not yet processed (not just current one)
                        # use batches of 1000 facts
                        if len(modelXbrl.facts) > 1000:
                            factsToCheck = modelXbrl.facts.copy()
                            factsHaveBeenProcessed = True
                            # can block facts deletion if required data not yet available, such as numeric unit for DpmDB
                            for pluginMethod in pluginClassMethods("Streaming.ValidateFacts"):
                                if not pluginMethod(modelXbrl, factsToCheck):
                                    factsHaveBeenProcessed = False
                            if factsHaveBeenProcessed:
                                for fact in factsToCheck:
                                    dropFact(modelXbrl, fact, modelXbrl.facts)
                                    del parentMdlObj[parentMdlObj.index(fact)]
                                for cntx in contextsToDrop:
                                    dropContext(modelXbrl, cntx)
                                    del parentMdlObj[parentMdlObj.index(cntx)]
                                for unit in unitsToDrop:
                                    dropUnit(modelXbrl, unit)
                                    del parentMdlObj[parentMdlObj.index(unit)]
                                for footnoteLink in footnoteLinksToDrop:
                                    dropFootnoteLink(modelXbrl, footnoteLink)
                                    del parentMdlObj[parentMdlObj.index(footnoteLink)]
                                fact = cntx = unit = footnoteLink = None
                                del contextsToDrop[:]
                                del unitsToDrop[:]
                                del footnoteLinksToDrop[:]
                            del factsToCheck # dereference fact or batch of facts
                    else:
                        dropFact(modelXbrl, mdlObj, modelXbrl.facts) # single fact has been processed
                        del parentMdlObj[parentMdlObj.index(mdlObj)]
                if self.numRootFacts % 1000 == 0:
                    pass
                    #modelXbrl.profileActivity("... streaming fact {0} of {1} {2:.2f}%".format(self.numRootFacts, instInfoNumRootFacts, 
                    #                                                                          100.0 * self.numRootFacts / instInfoNumRootFacts), 
                    #                          minTimeToShow=20.0)
                    gc.collect()
                    sys.stdout.write ("\rAt fact {} of {} mem {}".format(self.numRootFacts, instInfoNumRootFacts, modelXbrl.modelManager.cntlr.memoryUsed))
            return mdlObj
        def data(self, data):
            self.currentMdlObj.text = data
        def comment(self, text):
            pass
        def pi(self, target, data):
            if target == "xbrl-facts-check":
                _match = re.search("([\\w-]+)=[\"']([^\"']+)[\"']", data)
                if _match:
                    _matchGroups = _match.groups()
                    if len(_matchGroups) == 2:
                        if _matchGroups[0] == "version":
                            self.factsCheckVersion = _matchGroups[1]
                        elif _matchGroups[0] == "sum-of-fact-md5s":
                            try:
                                expectedMd5 = Md5Sum(_matchGroups[1])
                                if self.factsCheckMd5s != expectedMd5:
                                    modelXbrl.warning("streamingExtensions:xbrlFactsCheckWarning",
                                            _("XBRL facts sum of md5s expected %(expectedMd5)s not matched to actual sum %(actualMd5Sum)s"),
                                            modelObject=modelXbrl, expectedMd5=expectedMd5, actualMd5Sum=self.factsCheckMd5s)
                                else:
                                    modelXbrl.info("info",
                                            _("Successful XBRL facts sum of md5s."),
                                            modelObject=modelXbrl)
                            except ValueError:
                                modelXbrl.error("streamingExtensions:xbrlFactsCheckError",
                                        _("Invalid sum-of-md5s %(sumOfMd5)s"),
                                        modelObject=modelXbrl, sumOfMd5=_matchGroups[1])
        def close(self):
            del modelXbrl.makeelementParentModelObject
            return None
        
        def factCheckFact(self, fact):
            self.factsCheckMd5s += fact.md5sum
            for _tupleFact in fact.modelTupleFacts:
                self.factCheckFact(_tupleFact)
        
    _parser, _parserLookupName, _parserLookupClass = parser(modelXbrl, filepath, target=modelLoaderTarget())
    etree.parse(_file, parser=_parser, base_url=filepath)
    logSyntaxErrors(_parser)
    '''
    # replace modelLoaderTarget with iterparse (as it now supports CustomElementClassLookup)
    streamingParserContext = etree.iterparse(_file,
                                             events=("start", "end"),
                                             huge_tree=True)
    from arelle.ModelObjectFactory import setParserElementClassLookup
    modelXbrl.isStreamingMode = True  # must be set before setting element class lookup
    (_parser, _parserLookupName,
     _parserLookupClass) = setParserElementClassLookup(streamingParserContext,
                                                       modelXbrl)
    foundInstance = False
    beforeInstanceStream = beforeStartStreamingPlugin = True
    numRootFacts = 0
    factsCheckVersion = None

    def factCheckFact(fact):
        modelDocument._factsCheckMd5s += fact.md5sum
        for _tupleFact in fact.modelTupleFacts:
            factCheckFact(_tupleFact)

    for event, mdlObj in streamingParserContext:
        if event == "start":
            if mdlObj.tag == "{http://www.xbrl.org/2003/instance}xbrl":
                modelDocument = ModelDocument(modelXbrl, Type.INSTANCE,
                                              mappedUri, filepath,
                                              mdlObj.getroottree())
                modelXbrl.modelDocument = modelDocument  # needed for incremental validation
                mdlObj.init(modelDocument)
                modelDocument.parser = _parser  # needed for XmlUtil addChild's makeelement
                modelDocument.parserLookupName = _parserLookupName
                modelDocument.parserLookupClass = _parserLookupClass
                modelDocument.xmlRootElement = mdlObj
                modelDocument.schemaLocationElements.add(mdlObj)
                modelDocument.documentEncoding = _encoding
                modelDocument._creationSoftwareComment = precedingComment(
                    mdlObj)
                modelDocument._factsCheckMd5s = Md5Sum()
                modelXbrl.info("streamingExtensions:streaming",
                               _("Stream processing this instance."),
                               modelObject=modelDocument)
            elif mdlObj.getparent() is not None:
                mdlObj._init()  # requires discovery as part of start elements
                if mdlObj.getparent(
                ).tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    if not foundInstance:
                        foundInstance = True
                        pi = precedingProcessingInstruction(
                            mdlObj, "xbrl-facts-check")
                        if pi is not None:
                            factsCheckVersion = pi.attrib.get("version", None)
                elif not foundInstance:
                    break
                ns = mdlObj.qname.namespaceURI
                ln = mdlObj.qname.localName
                if beforeInstanceStream:
                    if ((ns == XbrlConst.link
                         and ln not in ("schemaRef", "linkbaseRef")) or
                        (ns == XbrlConst.xbrli and ln in ("context", "unit"))
                            or (ns not in (XbrlConst.link, XbrlConst.xbrli))):
                        beforeInstanceStream = False
                        if _streamingExtensionsValidate:
                            instValidator.validate(
                                modelXbrl,
                                modelXbrl.modelManager.formulaOptions.
                                typedParameters(modelXbrl.prefixedNamespaces))
                        else:  # need default dimensions
                            ValidateXbrlDimensions.loadDimensionDefaults(
                                modelXbrl)
                elif not beforeInstanceStream and beforeStartStreamingPlugin:
                    for pluginMethod in pluginClassMethods("Streaming.Start"):
                        pluginMethod(modelXbrl)
                    beforeStartStreamingPlugin = False
        elif event == "end":
            parentMdlObj = mdlObj.getparent()
            ns = mdlObj.namespaceURI
            ln = mdlObj.localName
            if ns == XbrlConst.xbrli:
                if ln == "context":
                    if mdlObj.get("sticky"):
                        del mdlObj.attrib["sticky"]
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                    else:
                        if len(contextBuffer) >= contextBufferLimit:
                            # drop before adding as dropped may have same id as added
                            cntx = contextBuffer.pop(0)
                            if _streamingFactsPlugin or _streamingValidateFactsPlugin:
                                contextsToDrop.append(cntx)
                            else:
                                dropContext(modelXbrl, cntx)
                                #>>del parentMdlObj[parentMdlObj.index(cntx)]
                            cntx = None
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                        if contextBufferLimit.is_finite():
                            contextBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        contextsToCheck = (mdlObj, )
                        instValidator.checkContexts(contextsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkContextsDimensions(
                                contextsToCheck)
                        del contextsToCheck  # dereference
                elif ln == "unit":
                    if len(unitBuffer) >= unitBufferLimit:
                        # drop before additing as dropped may have same id as added
                        unit = unitBuffer.pop(0)
                        if _streamingFactsPlugin or _streamingValidateFactsPlugin:
                            unitsToDrop.append(unit)
                        else:
                            dropUnit(modelXbrl, unit)
                            #>>del parentMdlObj[parentMdlObj.index(unit)]
                        unit = None
                    XmlValidate.validate(modelXbrl, mdlObj)
                    modelDocument.unitDiscover(mdlObj)
                    if unitBufferLimit.is_finite():
                        unitBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkUnits((mdlObj, ))
                elif ln == "xbrl":  # end of document
                    # check remaining batched facts if any
                    if _streamingFactsPlugin or _streamingValidateFactsPlugin:
                        # plugin attempts to process batch of all root facts not yet processed (not just current one)
                        # finish any final batch of facts
                        if len(modelXbrl.facts) > 0:
                            factsToCheck = modelXbrl.facts.copy()
                            # can block facts deletion if required data not yet available, such as numeric unit for DpmDB
                            if _streamingValidateFactsPlugin:
                                for pluginMethod in pluginClassMethods(
                                        "Streaming.ValidateFacts"):
                                    pluginMethod(instValidator, factsToCheck)
                            if _streamingFactsPlugin:
                                for pluginMethod in pluginClassMethods(
                                        "Streaming.Facts"):
                                    pluginMethod(modelXbrl, factsToCheck)
                            for fact in factsToCheck:
                                dropFact(modelXbrl, fact, modelXbrl.facts)
                                #>>del parentMdlObj[parentMdlObj.index(fact)]
                            for cntx in contextsToDrop:
                                dropContext(modelXbrl, cntx)
                                #>>del parentMdlObj[parentMdlObj.index(cntx)]
                            for unit in unitsToDrop:
                                dropUnit(modelXbrl, unit)
                                #>>del parentMdlObj[parentMdlObj.index(unit)]
                            for footnoteLink in footnoteLinksToDrop:
                                dropFootnoteLink(modelXbrl, footnoteLink)
                                #>>del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            fact = cntx = unit = footnoteLink = None
                            del contextsToDrop[:]
                            del unitsToDrop[:]
                            del footnoteLinksToDrop[:]
                            del factsToCheck
                    # check remaining footnote refs
                    for footnoteLink in footnoteBuffer:
                        checkFootnoteHrefs(modelXbrl, footnoteLink)
                    pi = childProcessingInstruction(mdlObj,
                                                    "xbrl-facts-check",
                                                    reversed=True)
                    if pi is not None:  # attrib is in .text, not attrib, no idea why!!!
                        _match = re.search("([\\w-]+)=[\"']([^\"']+)[\"']",
                                           pi.text)
                        if _match:
                            _matchGroups = _match.groups()
                            if len(_matchGroups) == 2:
                                if _matchGroups[0] == "sum-of-fact-md5s":
                                    try:
                                        expectedMd5 = Md5Sum(_matchGroups[1])
                                        if modelDocument._factsCheckMd5s != expectedMd5:
                                            modelXbrl.warning(
                                                "streamingExtensions:xbrlFactsCheckWarning",
                                                _("XBRL facts sum of md5s expected %(expectedMd5)s not matched to actual sum %(actualMd5Sum)s"
                                                  ),
                                                modelObject=modelXbrl,
                                                expectedMd5=expectedMd5,
                                                actualMd5Sum=modelDocument.
                                                _factsCheckMd5s)
                                        else:
                                            modelXbrl.info(
                                                "info",
                                                _("Successful XBRL facts sum of md5s."
                                                  ),
                                                modelObject=modelXbrl)
                                    except ValueError:
                                        modelXbrl.error(
                                            "streamingExtensions:xbrlFactsCheckError",
                                            _("Invalid sum-of-md5s %(sumOfMd5)s"
                                              ),
                                            modelObject=modelXbrl,
                                            sumOfMd5=_matchGroups[1])
                    if _streamingValidateFactsPlugin:
                        for pluginMethod in pluginClassMethods(
                                "Streaming.ValidateFinish"):
                            pluginMethod(instValidator)
                    if _streamingFactsPlugin:
                        for pluginMethod in pluginClassMethods(
                                "Streaming.Finish"):
                            pluginMethod(modelXbrl)
            elif ns == XbrlConst.link:
                if ln in ("schemaRef", "linkbaseRef"):
                    modelDocument.discoverHref(
                        mdlObj,
                        urlRewritePluginClass=
                        "ModelDocument.InstanceSchemaRefRewriter")
                elif ln in ("roleRef", "arcroleRef"):
                    modelDocument.linkbaseDiscover((mdlObj, ), inInstance=True)
                elif ln == "footnoteLink":
                    XmlValidate.validate(modelXbrl, mdlObj)
                    footnoteLinks = (mdlObj, )
                    modelDocument.linkbaseDiscover(footnoteLinks,
                                                   inInstance=True)
                    if footnoteBufferLimit.is_finite():
                        footnoteBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkLinks(footnoteLinks)
                        if len(footnoteBuffer) > footnoteBufferLimit:
                            # check that hrefObjects for locators were all satisfied
                            # drop before addition as dropped may have same id as added
                            footnoteLink = footnoteBuffer.pop(0)
                            checkFootnoteHrefs(modelXbrl, footnoteLink)
                            if _streamingValidateFactsPlugin:
                                footnoteLinksToDrop.append(footnoteLink)
                            else:
                                dropFootnoteLink(modelXbrl, footnoteLink)
                                #>>del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            footnoteLink = None
                    footnoteLinks = None
            elif parentMdlObj.qname == XbrlConst.qnXbrliXbrl and isinstance(
                    mdlObj, ModelFact):
                numRootFacts += 1
                XmlValidate.validate(modelXbrl, mdlObj)
                modelDocument.factDiscover(mdlObj, modelXbrl.facts)
                if factsCheckVersion:
                    factCheckFact(mdlObj)
                if _streamingExtensionsValidate or _streamingFactsPlugin or _streamingValidateFactsPlugin:
                    factsToCheck = (mdlObj,
                                    )  # validate current fact by itself
                    if _streamingExtensionsValidate:
                        instValidator.checkFacts(factsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkFactsDimensions(factsToCheck)
                    if _streamingFactsPlugin or _streamingValidateFactsPlugin:
                        # plugin attempts to process batch of all root facts not yet processed (not just current one)
                        # use batches of 1000 facts
                        if len(modelXbrl.facts) > 1000:
                            factsToCheck = modelXbrl.facts.copy()
                            # can block facts deletion if required data not yet available, such as numeric unit for DpmDB
                            if _streamingValidateFactsPlugin:
                                for pluginMethod in pluginClassMethods(
                                        "Streaming.ValidateFacts"):
                                    pluginMethod(instValidator, factsToCheck)
                            if _streamingFactsPlugin:
                                for pluginMethod in pluginClassMethods(
                                        "Streaming.Facts"):
                                    pluginMethod(modelXbrl, factsToCheck)
                            for fact in factsToCheck:
                                dropFact(modelXbrl, fact, modelXbrl.facts)
                                #>>del parentMdlObj[parentMdlObj.index(fact)]
                            for cntx in contextsToDrop:
                                dropContext(modelXbrl, cntx)
                                #>>del parentMdlObj[parentMdlObj.index(cntx)]
                            for unit in unitsToDrop:
                                dropUnit(modelXbrl, unit)
                                #>>del parentMdlObj[parentMdlObj.index(unit)]
                            for footnoteLink in footnoteLinksToDrop:
                                dropFootnoteLink(modelXbrl, footnoteLink)
                                #>>del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            fact = cntx = unit = footnoteLink = None
                            del contextsToDrop[:]
                            del unitsToDrop[:]
                            del footnoteLinksToDrop[:]
                            del factsToCheck  # dereference fact or batch of facts
                    else:
                        dropFact(
                            modelXbrl, mdlObj,
                            modelXbrl.facts)  # single fact has been processed
                        #>>del parentMdlObj[parentMdlObj.index(mdlObj)]
                if numRootFacts % 1000 == 0:
                    pass
                    #modelXbrl.profileActivity("... streaming fact {0} of {1} {2:.2f}%".format(self.numRootFacts, instInfoNumRootFacts,
                    #                                                                          100.0 * self.numRootFacts / instInfoNumRootFacts),
                    #                          minTimeToShow=20.0)
                    #gc.collect()
                    #sys.stdout.write ("\rAt fact {} of {} mem {}".format(numRootFacts, instInfoNumRootFacts, modelXbrl.modelManager.cntlr.memoryUsed))
    if mdlObj is not None:
        mdlObj.clear()
    del _parser, _parserLookupName, _parserLookupClass

    if _streamingExtensionsValidate and validator is not None:
        _file.close()
        del instValidator
        validator.close()
        # track that modelXbrl has been validated by this streaming extension
        modelXbrl._streamingExtensionValidated = True

    modelXbrl.profileStat(_("streaming complete"), time.time() - startedAt)
    return modelXbrl.modelDocument

コード例 #22

ファイルを表示

ファイル: FileSource.py プロジェクト: benrosemeyer-wf/Arelle

    def open(self):
        if not self.isOpen:
            if (self.isZip or self.isEis or self.isXfd or self.isRss) and self.cntlr:
                self.basefile = self.cntlr.webCache.getfilename(self.url)
            else:
                self.basefile = self.url
            self.baseurl = self.url # url gets changed by selection
            if not self.basefile:
                return  # an error should have been logged
            if self.isZip:
                self.fs = zipfile.ZipFile(self.basefile, mode="r")
                self.isOpen = True    
            elif self.isEis:
                # check first line of file
                buf = b''
                try:
                    file = open(self.basefile, 'rb')
                    more = True
                    while more:
                        l = file.read(8)
                        if len(l) < 8:
                            break
                        if len(buf) == 0 and l.startswith(b"<?xml "): # not compressed
                            buf = l + file.read()  # not compressed
                            break
                        compressedBytes = file.read( struct.unpack(">L", l[0:4])[0])
                        if len(compressedBytes) <= 0:
                            break
                        buf += zlib.decompress(compressedBytes)
                    file.close()
                except EnvironmentError as err:
                    self.logError(err)
                    pass
                #uncomment to save for debugging
                #with open("c:/temp/test.xml", "wb") as f:
                #    f.write(buf)
                
                if buf.startswith(b"<?xml "):
                    try:
                        # must strip encoding
                        str = buf.decode(XmlUtil.encoding(buf))
                        endEncoding = str.index("?>", 0, 128)
                        if endEncoding > 0:
                            str = str[endEncoding+2:]
                        file = io.StringIO(initial_value=str)
                        self.eisDocument = etree.parse(file)
                        file.close()
                        self.isOpen = True
                    except EnvironmentError as err:
                        self.logError(err)
                        return # provide error message later
                    except etree.LxmlError as err:
                        self.logError(err)
                        return # provide error message later
                
            elif self.isXfd:
                # check first line of file
                file = open(self.basefile, 'rb')
                firstline = file.readline()
                if firstline.startswith(b"application/x-xfdl;content-encoding=\"asc-gzip\""):
                    # file has been gzipped
                    base64input = file.read(-1)
                    file.close();
                    file = None;
    
                    fb = base64.b64decode(base64input)
                    ungzippedBytes = b""
                    totalLenUncompr = 0
                    i = 0
                    while i < len(fb):
                        lenCompr = fb[i + 0] * 256 + fb[i + 1]
                        lenUncomp = fb[i + 2] * 256 + fb[i + 3]
                        lenRead = 0
                        totalLenUncompr += lenUncomp

                        gzchunk = (bytes((31,139,8,0)) + fb[i:i+lenCompr])
                        try:
                            with gzip.GzipFile(fileobj=io.BytesIO(gzchunk)) as gf:
                                while True:
                                    readSize = min(16384, lenUncomp - lenRead)
                                    readBytes = gf.read(size=readSize)
                                    lenRead += len(readBytes)
                                    ungzippedBytes += readBytes
                                    if len(readBytes) == 0 or (lenUncomp - lenRead) <= 0:
                                        break
                        except IOError as err:
                            pass # provide error message later

                        i += lenCompr + 4
                    #for learning the content of xfd file, uncomment this:
                    #with open("c:\\temp\\test.xml", "wb") as fh:
                    #    fh.write(ungzippedBytes)
                    file = io.StringIO(initial_value=ungzippedBytes.decode("utf-8"))
                else:
                    # position to start of file
                    file.seek(0,io.SEEK_SET)
                    
                try:
                    self.xfdDocument = etree.parse(file)
                    file.close()
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    return # provide error message later
                except etree.LxmlError as err:
                    self.logError(err)
                    return # provide error message later
                
            elif self.isRss:
                try:
                    self.rssDocument = etree.parse(self.basefile)
                    self.isOpen = True
                except EnvironmentError as err:
                    self.logError(err)
                    return # provide error message later
                except etree.LxmlError as err:
                    self.logError(err)
                    return # provide error message later

コード例 #23

ファイルを表示

ファイル: FileSource.py プロジェクト: benrosemeyer-wf/Arelle

 def file(self, filepath, binary=False):
     ''' 
         for text, return a tuple of (open file handle, encoding)
         for binary, return a tuple of (open file handle, )
     '''
     archiveFileSource = self.fileSourceContainingFilepath(filepath)
     if archiveFileSource is not None:
         if filepath.startswith(archiveFileSource.basefile):
             archiveFileName = filepath[len(archiveFileSource.basefile) + 1:]
         else: # filepath.startswith(self.baseurl)
             archiveFileName = filepath[len(archiveFileSource.baseurl) + 1:]
         if archiveFileSource.isZip:
             b = archiveFileSource.fs.read(archiveFileName.replace("\\","/"))
             if binary:
                 return (io.BytesIO(b), )
             encoding = XmlUtil.encoding(b)
             return (io.TextIOWrapper(io.BytesIO(b), encoding=encoding), 
                     encoding)
         elif archiveFileSource.isEis:
             for docElt in self.eisDocument.iter(tag="{http://www.sec.gov/edgar/common}document"):
                 outfn = docElt.findtext("{http://www.sec.gov/edgar/common}conformedName")
                 if outfn == archiveFileName:
                     b64data = docElt.findtext("{http://www.sec.gov/edgar/common}contents")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[2] == 191:
                             start = 3;
                             length = len(b) - 3;
                             b = b[start:start + length]
                         else:
                             start = 0;
                             length = len(b);
                         if binary:
                             return (io.BytesIO(b), )
                         encoding = XmlUtil.encoding(b, default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b), encoding=encoding), 
                                 encoding)
             raise ArchiveFileIOError(self, archiveFileName)
         elif archiveFileSource.isXfd:
             for data in archiveFileSource.xfdDocument.iter(tag="data"):
                 outfn = data.findtext("filename")
                 if outfn == archiveFileName:
                     b64data = data.findtext("mimedata")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[2] == 191:
                             start = 3;
                             length = len(b) - 3;
                             b = b[start:start + length]
                         else:
                             start = 0;
                             length = len(b);
                         if binary:
                             return (io.BytesIO(b), )
                         encoding = XmlUtil.encoding(b, default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b), encoding=encoding), 
                                 encoding)
             raise ArchiveFileIOError(self, archiveFileName)
     if binary:
         return (io.open(filepath, 'rb'), )
     # check encoding
     with open(filepath, 'rb') as fb:
         hdrBytes = fb.read(512)
         encoding = XmlUtil.encoding(hdrBytes)
         if encoding.lower() in ('utf-8','utf8'):
             text = None
         else:
             fb.seek(0)
             text = fb.read().decode(encoding)
         # allow filepath to close
     # this may not be needed for Mac or Linux, needs confirmation!!!
     if text is None:  # ok to read as utf-8
         return io.open(filepath, 'rt', encoding='utf-8'), encoding
     else:
         # strip XML declaration
         xmlDeclarationMatch = XMLdeclaration.search(text)
         if xmlDeclarationMatch: # remove it for lxml
             start,end = xmlDeclarationMatch.span()
             text = text[0:start] + text[end:]
         return (io.StringIO(initial_value=text), encoding)

コード例 #24

ファイルを表示

ファイル: FileSource.py プロジェクト: tyrose1214/Arelle

 def file(self, filepath, binary=False):
     ''' 
         for text, return a tuple of (open file handle, encoding)
         for binary, return a tuple of (open file handle, )
     '''
     archiveFileSource = self.fileSourceContainingFilepath(filepath)
     if archiveFileSource is not None:
         if filepath.startswith(archiveFileSource.basefile):
             archiveFileName = filepath[len(archiveFileSource.basefile) + 1:]
         else: # filepath.startswith(self.baseurl)
             archiveFileName = filepath[len(archiveFileSource.baseurl) + 1:]
         if archiveFileSource.isZip:
             try:
                 b = archiveFileSource.fs.read(archiveFileName.replace("\\","/"))
                 if binary:
                     return (io.BytesIO(b), )
                 encoding = XmlUtil.encoding(b)
                 return (io.TextIOWrapper(io.BytesIO(b), encoding=encoding), 
                         encoding)
             except KeyError:
                 raise ArchiveFileIOError(self, archiveFileName)
         elif archiveFileSource.isEis:
             for docElt in self.eisDocument.iter(tag="{http://www.sec.gov/edgar/common}document"):
                 outfn = docElt.findtext("{http://www.sec.gov/edgar/common}conformedName")
                 if outfn == archiveFileName:
                     b64data = docElt.findtext("{http://www.sec.gov/edgar/common}contents")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[2] == 191:
                             start = 3;
                             length = len(b) - 3;
                             b = b[start:start + length]
                         else:
                             start = 0;
                             length = len(b);
                         if binary:
                             return (io.BytesIO(b), )
                         encoding = XmlUtil.encoding(b, default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b), encoding=encoding), 
                                 encoding)
             raise ArchiveFileIOError(self, archiveFileName)
         elif archiveFileSource.isXfd:
             for data in archiveFileSource.xfdDocument.iter(tag="data"):
                 outfn = data.findtext("filename")
                 if outfn == archiveFileName:
                     b64data = data.findtext("mimedata")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[1] == 187 and b[2] == 191:
                             start = 3;
                             length = len(b) - 3;
                             b = b[start:start + length]
                         else:
                             start = 0;
                             length = len(b);
                         if binary:
                             return (io.BytesIO(b), )
                         encoding = XmlUtil.encoding(b, default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b), encoding=encoding), 
                                 encoding)
             raise ArchiveFileIOError(self, archiveFileName)
     if binary:
         return (openFileStream(self.cntlr, filepath, 'rb'), )
     else:
         return openXmlFileStream(self.cntlr, filepath)

コード例 #25

ファイルを表示

ファイル: FileSource.py プロジェクト: andygreener/Arelle

 def file(self, filepath, binary=False):
     ''' 
         for text, return a tuple of (open file handle, encoding)
         for binary, return a tuple of (open file handle, )
     '''
     archiveFileSource = self.fileSourceContainingFilepath(filepath)
     if archiveFileSource is not None:
         if filepath.startswith(archiveFileSource.basefile):
             archiveFileName = filepath[len(archiveFileSource.basefile) +
                                        1:]
         else:  # filepath.startswith(self.baseurl)
             archiveFileName = filepath[len(archiveFileSource.baseurl) + 1:]
         if archiveFileSource.isZip:
             b = archiveFileSource.fs.read(
                 archiveFileName.replace("\\", "/"))
             if binary:
                 return (io.BytesIO(b), )
             encoding = XmlUtil.encoding(b)
             return (io.TextIOWrapper(io.BytesIO(b),
                                      encoding=encoding), encoding)
         elif archiveFileSource.isEis:
             for docElt in self.eisDocument.iter(
                     tag="{http://www.sec.gov/edgar/common}document"):
                 outfn = docElt.findtext(
                     "{http://www.sec.gov/edgar/common}conformedName")
                 if outfn == archiveFileName:
                     b64data = docElt.findtext(
                         "{http://www.sec.gov/edgar/common}contents")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[
                                 1] == 187 and b[2] == 191:
                             start = 3
                             length = len(b) - 3
                             b = b[start:start + length]
                         else:
                             start = 0
                             length = len(b)
                         if binary:
                             return (io.BytesIO(b), )
                         encoding = XmlUtil.encoding(b, default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b),
                                                  encoding=encoding),
                                 encoding)
             raise ArchiveFileIOError(self, archiveFileName)
         elif archiveFileSource.isXfd:
             for data in archiveFileSource.xfdDocument.iter(tag="data"):
                 outfn = data.findtext("filename")
                 if outfn == archiveFileName:
                     b64data = data.findtext("mimedata")
                     if b64data:
                         b = base64.b64decode(b64data.encode("latin-1"))
                         # remove BOM codes if present
                         if len(b) > 3 and b[0] == 239 and b[
                                 1] == 187 and b[2] == 191:
                             start = 3
                             length = len(b) - 3
                             b = b[start:start + length]
                         else:
                             start = 0
                             length = len(b)
                         if binary:
                             return (io.BytesIO(b), )
                         encoding = XmlUtil.encoding(b, default="latin-1")
                         return (io.TextIOWrapper(io.BytesIO(b),
                                                  encoding=encoding),
                                 encoding)
             raise ArchiveFileIOError(self, archiveFileName)
     if binary:
         return (openFileStream(self.cntlr, filepath, 'rb'), )
     else:
         return openXmlFileStream(self.cntlr, filepath)

コード例 #26

ファイルを表示

ファイル: FileSource.py プロジェクト: sternshus/not_arelle2.7

         compressedBytes = file.read( struct.unpack(u">L", l[0:4])[0])
         if len(compressedBytes) <= 0:
             break
         buf += zlib.decompress(compressedBytes)
     file.close()
 except EnvironmentError, err:
     self.logError(err)
     pass
 #uncomment to save for debugging
 #with open("c:/temp/test.xml", "wb") as f:
 #    f.write(buf)
 
 if buf.startswith("<?xml "):
     try:
         # must strip encoding
         unicode = buf.decode(XmlUtil.encoding(buf))
         endEncoding = unicode.index(u"?>", 0, 128)
         if endEncoding > 0:
             unicode = unicode[endEncoding+2:]
         file = io.StringIO(initial_value=unicode)
         parser = etree.XMLParser(recover=True, huge_tree=True)
         self.eisDocument = etree.parse(file, parser=parser)
         file.close()
         self.isOpen = True
     except EnvironmentError, err:
         self.logError(err)
         return # provide error message later
     except etree.LxmlError, err:
         self.logError(err)
         return # provide error message later

コード例 #27

ファイルを表示

ファイル: streamingExtensions.py プロジェクト: joyanta/Arelle

def streamingExtensionsLoader(modelXbrl, mappedUri, filepath, **kwargs):
    # check if big instance and has header with an initial incomplete tree walk (just 2 elements
    if not _streamingExtensionsCheck:
        return None
    
    # track whether modelXbrl has been validated by this streaming extension
    modelXbrl._streamingExtensionValidated = False
        
    def logSyntaxErrors(parsercontext):
        for error in parsercontext.error_log:
            modelXbrl.error("xmlSchema:syntax",
                    _("%(error)s, %(fileName)s, line %(line)s, column %(column)s, %(sourceAction)s source element"),
                    modelObject=modelXbrl, fileName=os.path.basename(filepath), 
                    error=error.message, line=error.line, column=error.column, sourceAction="streaming")
    #### note: written for iterparse of lxml prior to version 3.3, otherwise rewrite to use XmlPullParser ###
    #### note: iterparse wants a binary file, but file is text mode
    _file, = modelXbrl.fileSource.file(filepath, binary=True)
    startedAt = time.time()
    modelXbrl.profileActivity()
    ''' this seems twice as slow as iterparse
    class instInfoTarget():
        def __init__(self, element_factory=None, parser=None):
            self.newTree = True
            self.streamingAspects = None
            self.foundInstance = False
            self.creationSoftwareComment = ''
            self.currentEltTag = "(before xbrli:xbrl)"
            self.numRootFacts = 0
        def start(self, tag, attrib, nsmap=None):
            if self.newTree:
                if tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    self.foundInstance = True
                    self.newTree = False
                else: # break 
                    raise NotInstanceDocumentException()
            elif not tag.startswith("{http://www.xbrl.org/"):
                self.numRootFacts += 1
                if self.numRootFacts % 1000 == 0:
                    modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0)
            self.currentEltTag = tag
        def end(self, tag):
            pass
        def data(self, data):
            pass
        def comment(self, text):
            if not self.foundInstance: # accumulate comments before xbrli:xbrl
                self.creationSoftwareComment += ('\n' if self.creationSoftwareComment else '') + text
            elif not self.creationSoftwareComment:
                self.creationSoftwareComment = text # or first comment after xbrli:xbrl
        def pi(self, target, data):
            if target == "xbrl-streamable-instance":
                if self.currentEltTag == "{http://www.xbrl.org/2003/instance}xbrl":
                    self.streamingAspects = dict(etree.PI(target,data).attrib.copy()) # dereference target results
                else:
                    modelXbrl.error("streamingExtensions:headerMisplaced",
                            _("Header is misplaced: %(target)s, must follow xbrli:xbrl element but was found at %(element)s"),
                            modelObject=modelXbrl, target=target, element=self.currentEltTag)
        def close(self):
            if not self.creationSoftwareComment:
                self.creationSoftwareComment = None
            return True
    instInfo = instInfoTarget()
    infoParser = etree.XMLParser(recover=True, huge_tree=True, target=instInfo)
    try:
        etree.parse(_file, parser=infoParser, base_url=filepath)
    except NotInstanceDocumentException:
        pass
    '''
    foundErrors = False
    foundInstance = False
    streamingAspects = None
    creationSoftwareComment = None
    instInfoNumRootFacts = 0
    numElts = 0
    elt = None
    instInfoContext = etree.iterparse(_file, events=("start","end"), huge_tree=True)
    for event, elt in instInfoContext:
        if event == "start":
            if elt.getparent() is not None:
                if elt.getparent().tag == "{http://www.xbrl.org/2003/instance}xbrl":
                    if not foundInstance:
                        foundInstance = True
                        pi = precedingProcessingInstruction(elt, "xbrl-streamable-instance")
                        if pi is None:
                            break
                        else:
                            streamingAspects = dict(pi.attrib.copy())
                            if creationSoftwareComment is None:
                                creationSoftwareComment = precedingComment(elt)
                    if not elt.tag.startswith("{http://www.xbrl.org/"):
                        instInfoNumRootFacts += 1
                        if instInfoNumRootFacts % 1000 == 0:
                            modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0)
                elif not foundInstance:       
                    break
            elif elt.tag == "{http://www.xbrl.org/2003/instance}xbrl":
                creationSoftwareComment = precedingComment(elt)
                if precedingProcessingInstruction(elt, "xbrl-streamable-instance") is not None:
                    modelXbrl.error("streamingExtensions:headerMisplaced",
                            _("Header is misplaced: %(error)s, must follow xbrli:xbrl element"),
                            modelObject=elt)
        elif event == "end":
            elt.clear()
            numElts += 1
            if numElts % 1000 == 0 and elt.getparent() is not None:
                while elt.getprevious() is not None and elt.getparent() is not None:
                    del elt.getparent()[0]
    if elt is not None:
        elt.clear()
    
    _file.seek(0,io.SEEK_SET) # allow reparsing
    if not foundInstance or streamingAspects is None:
        del elt
        _file.close()
        return None
    modelXbrl.profileStat(_("streaming tree check"), time.time() - startedAt)
    startedAt = time.time()
    try:
        version = Decimal(streamingAspects.get("version"))
        if int(version) != 1:
            modelXbrl.error("streamingExtensions:unsupportedVersion",
                    _("Streaming version %(version)s, major version number must be 1"),
                    modelObject=elt, version=version)
            foundErrors = True
    except (InvalidOperation, OverflowError):
        modelXbrl.error("streamingExtensions:versionError",
                _("Version %(version)s, number must be 1.n"),
                modelObject=elt, version=streamingAspects.get("version", "(none)"))
        foundErrors = True
    for bufAspect in ("contextBuffer", "unitBuffer", "footnoteBuffer"):
        try:
            bufLimit = Decimal(streamingAspects.get(bufAspect, "INF"))
            if bufLimit < 1 or (bufLimit.is_finite() and bufLimit % 1 != 0):
                raise InvalidOperation
            elif bufAspect == "contextBuffer":
                contextBufferLimit = bufLimit
            elif bufAspect == "unitBuffer":
                unitBufferLimit = bufLimit
            elif bufAspect == "footnoteBuffer":
                footnoteBufferLimit = bufLimit
        except InvalidOperation:
            modelXbrl.error("streamingExtensions:valueError",
                    _("Streaming %(attrib)s %(value)s, number must be a positive integer or INF"),
                    modelObject=elt, attrib=bufAspect, value=streamingAspects.get(bufAspect))
            foundErrors = True
    if _streamingExtensionsValidate:
        incompatibleValidations = []
        _validateDisclosureSystem = modelXbrl.modelManager.validateDisclosureSystem
        _disclosureSystem = modelXbrl.modelManager.disclosureSystem
        if _validateDisclosureSystem and _disclosureSystem.EFM:
            incompatibleValidations.append("EFM")
        if _validateDisclosureSystem and _disclosureSystem.GFM:
            incompatibleValidations.append("GFM")
        if _validateDisclosureSystem and _disclosureSystem.EBA:
            incompatibleValidations.append("EBA")
        if _validateDisclosureSystem and _disclosureSystem.HMRC:
            incompatibleValidations.append("EBA")
        if modelXbrl.modelManager.validateCalcLB:
            incompatibleValidations.append("calculation LB")
        if incompatibleValidations:
            modelXbrl.error("streamingExtensions:incompatibleValidation",
                    _("Streaming instance validation does not support %(incompatibleValidations)s validation"),
                    modelObject=modelXbrl, incompatibleValidations=', '.join(incompatibleValidations))
            foundErrors = True
    if instInfoContext.error_log:
        foundErrors = True
    logSyntaxErrors(instInfoContext)
    del instInfoContext # dereference

    for pluginMethod in pluginClassMethods("Streaming.BlockStreaming"):
        _blockingPluginName = pluginMethod(modelXbrl)
        if _blockingPluginName: # name of blocking plugin is returned
            modelXbrl.error("streamingExtensions:incompatiblePlugIn",
                    _("Streaming instance not supported by plugin %(blockingPlugin)s"),
                    modelObject=modelXbrl, blockingPlugin=_blockingPluginName)
            foundErrors = True
    
    if foundErrors:
        _file.close()
        return None

    _encoding = XmlUtil.encoding(_file.read(512))
    _file.seek(0,io.SEEK_SET) # allow reparsing

    if _streamingExtensionsValidate:
        validator = Validate(modelXbrl)
        instValidator = validator.instValidator

    eltMdlObjs = {}
    contextBuffer = []
    unitBuffer = []
    footnoteBuffer = []
    factBuffer = []
    numFacts = 1
    
    _streamingValidateFactsPlugin = any(True for pluginMethod in pluginClassMethods("Streaming.ValidateFacts"))

    
    class modelLoaderTarget():
        def __init__(self, element_factory=None, parser=None):
            self.newTree = True
            self.currentMdlObj = None
            self.beforeInstanceStream = True
            self.beforeStartStreamingPlugin = True
            self.numRootFacts = 1
            modelXbrl.streamingParentModelObject = None
            modelXbrl.isStreamingMode = True
        def start(self, tag, attrib, nsmap=None):
            modelXbrl.streamingParentModelObject = self.currentMdlObj # pass parent to makeelement for ModelObjectFactory
            mdlObj = _parser.makeelement(tag, attrib=attrib, nsmap=nsmap)
            mdlObj.sourceline = 1
            if self.newTree:
                self.newTree = False
                self.currentMdlObj = mdlObj
                modelDocument = ModelDocument(modelXbrl, Type.INSTANCE, mappedUri, filepath, mdlObj.getroottree())
                modelXbrl.modelDocument = modelDocument # needed for incremental validation
                mdlObj.init(modelDocument)
                modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement 
                modelDocument.parserLookupName = _parserLookupName
                modelDocument.parserLookupClass = _parserLookupClass
                modelDocument.xmlRootElement = mdlObj
                modelDocument.schemaLocationElements.add(mdlObj)
                modelDocument.documentEncoding = _encoding
                modelDocument._creationSoftwareComment = creationSoftwareComment
                modelXbrl.info("streamingExtensions:streaming",
                               _("Stream processing this instance."),
                               modelObject = modelDocument)
            else:
                self.currentMdlObj.append(mdlObj)
                self.currentMdlObj = mdlObj
                mdlObj._init()
                ns = mdlObj.namespaceURI
                ln = mdlObj.localName
                if (self.beforeInstanceStream and (
                    (ns == XbrlConst.link and ln not in ("schemaRef", "linkbaseRef")) or
                    (ns == XbrlConst.xbrli and ln in ("context", "unit")) or
                    (ns not in (XbrlConst.link, XbrlConst.xbrli)))):
                    self.beforeInstanceStream = False
                    if _streamingExtensionsValidate:
                        instValidator.validate(modelXbrl, modelXbrl.modelManager.formulaOptions.typedParameters())
                    else: # need default dimensions
                        ValidateXbrlDimensions.loadDimensionDefaults(modelXbrl)
                elif not self.beforeInstanceStream and self.beforeStartStreamingPlugin:
                    for pluginMethod in pluginClassMethods("Streaming.Start"):
                        pluginMethod(modelXbrl)
                    self.beforeStartStreamingPlugin = False
            return mdlObj
        def end(self, tag):
            modelDocument = modelXbrl.modelDocument
            mdlObj = self.currentMdlObj
            parentMdlObj = mdlObj.getparent()
            self.currentMdlObj = parentMdlObj
            ns = mdlObj.namespaceURI
            ln = mdlObj.localName
            if ns == XbrlConst.xbrli:
                if ln == "context":
                    if mdlObj.get("sticky"):
                        del mdlObj.attrib["sticky"]
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                    else:
                        if _streamingExtensionsValidate and len(contextBuffer) >= contextBufferLimit:
                            # drop before adding as dropped may have same id as added
                            cntx = contextBuffer.pop(0)
                            dropContext(modelXbrl, cntx)
                            del parentMdlObj[parentMdlObj.index(cntx)]
                            cntx = None
                        XmlValidate.validate(modelXbrl, mdlObj)
                        modelDocument.contextDiscover(mdlObj)
                        if contextBufferLimit.is_finite():
                            contextBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        contextsToCheck = (mdlObj,)
                        instValidator.checkContexts(contextsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkContextsDimensions(contextsToCheck)
                        del contextsToCheck # dereference
                elif ln == "unit":
                    if _streamingExtensionsValidate and len(unitBuffer) >= unitBufferLimit:
                        # drop before additing as dropped may have same id as added
                        unit = unitBuffer.pop(0)
                        dropUnit(modelXbrl, unit)
                        del parentMdlObj[parentMdlObj.index(unit)]
                        unit = None 
                    XmlValidate.validate(modelXbrl, mdlObj)
                    modelDocument.unitDiscover(mdlObj)
                    if unitBufferLimit.is_finite():
                        unitBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkUnits( (mdlObj,) )
                elif ln == "xbrl": # end of document
                    # check remaining footnote refs
                    for footnoteLink in footnoteBuffer:
                        checkFootnoteHrefs(modelXbrl, footnoteLink)
                    for pluginMethod in pluginClassMethods("Streaming.Finish"):
                        pluginMethod(modelXbrl)
            elif ns == XbrlConst.link:
                if ln == "footnoteLink":
                    XmlValidate.validate(modelXbrl, mdlObj)
                    footnoteLinks = (mdlObj,)
                    modelDocument.linkbaseDiscover(footnoteLinks, inInstance=True)
                    if footnoteBufferLimit.is_finite():
                        footnoteBuffer.append(mdlObj)
                    if _streamingExtensionsValidate:
                        instValidator.checkLinks(footnoteLinks)
                        if len(footnoteBuffer) > footnoteBufferLimit:
                            # check that hrefObjects for locators were all satisfied
                                # drop before addition as dropped may have same id as added
                            footnoteLink = footnoteBuffer.pop(0)
                            checkFootnoteHrefs(modelXbrl, footnoteLink)
                            dropFootnoteLink(modelXbrl, footnoteLink)
                            del parentMdlObj[parentMdlObj.index(footnoteLink)]
                            footnoteLink = None
                    footnoteLinks = None
                elif ln in ("schemaRef", "linkbaseRef"):
                    modelDocument.discoverHref(mdlObj)
                elif not modelXbrl.skipDTS:
                    if ln in ("roleRef", "arcroleRef"):
                        modelDocument.linkbaseDiscover((mdlObj,), inInstance=True)
            elif parentMdlObj.qname == XbrlConst.qnXbrliXbrl:
                self.numRootFacts += 1
                XmlValidate.validate(modelXbrl, mdlObj)
                modelDocument.factDiscover(mdlObj, modelXbrl.facts)
                if _streamingExtensionsValidate or _streamingValidateFactsPlugin:
                    factsToCheck = (mdlObj,)  # validate current fact by itself
                    if _streamingExtensionsValidate:
                        instValidator.checkFacts(factsToCheck)
                        if modelXbrl.hasXDT:
                            instValidator.checkFactsDimensions(factsToCheck)
                    if _streamingValidateFactsPlugin:
                        # plugin attempts to process batch of all root facts not yet processed (not just current one)
                        factsToCheck = modelXbrl.facts.copy()
                        factsHaveBeenProcessed = True
                        # can block facts deletion if required data not yet available, such as numeric unit for DpmDB
                        for pluginMethod in pluginClassMethods("Streaming.ValidateFacts"):
                            if not pluginMethod(modelXbrl, factsToCheck):
                                factsHaveBeenProcessed = False
                        if factsHaveBeenProcessed:
                            for fact in factsToCheck:
                                dropFact(modelXbrl, fact, modelXbrl.facts)
                                del parentMdlObj[parentMdlObj.index(fact)]
                    else:
                        dropFact(modelXbrl, mdlObj, modelXbrl.facts) # single fact has been processed
                        del parentMdlObj[parentMdlObj.index(mdlObj)]
                    del factsToCheck # dereference fact or batch of facts
                if self.numRootFacts % 1000 == 0:
                    modelXbrl.profileActivity("... streaming fact {0} of {1} {2:.2f}%".format(self.numRootFacts, instInfoNumRootFacts, 
                                                                                              100.0 * self.numRootFacts / instInfoNumRootFacts), 
                                              minTimeToShow=20.0)
            return mdlObj
        def data(self, data):
            self.currentMdlObj.text = data
        def comment(self, text):
            pass
        def pi(self, target, data):
            pass
        def close(self):
            del modelXbrl.streamingParentModelObject
            return None
        
    _parser, _parserLookupName, _parserLookupClass = parser(modelXbrl, filepath, target=modelLoaderTarget())
    etree.parse(_file, parser=_parser, base_url=filepath)
    logSyntaxErrors(_parser)
    if _streamingExtensionsValidate and validator is not None:
        _file.close()
        del instValidator
        validator.close()
        # track that modelXbrl has been validated by this streaming extension
        modelXbrl._streamingExtensionValidated = True
        
    modelXbrl.profileStat(_("streaming complete"), time.time() - startedAt)
    return modelXbrl.modelDocument