Ejemplo n.º 1
0
def create(modelXbrl, type, uri, schemaRefs=None, isEntry=False):
    normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(
        uri, None)
    if isEntry:
        modelXbrl.uri = normalizedUri
        modelXbrl.uriDir = os.path.dirname(normalizedUri)
        for i in range(modelXbrl.modelManager.disclosureSystem.
                       maxSubmissionSubdirectoryEntryNesting):
            modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir)
    filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(normalizedUri)
    if type == Type.INSTANCE:
        # modelXbrl.uriDir = os.path.dirname(normalizedUri)
        Xml = ('<?xml version="1.0" encoding="UTF-8"?>'
               '<xbrl xmlns="http://www.xbrl.org/2003/instance"'
               ' xmlns:link="http://www.xbrl.org/2003/linkbase"'
               ' xmlns:xlink="http://www.w3.org/1999/xlink">')
        if schemaRefs:
            for schemaRef in schemaRefs:
                Xml += '<link:schemaRef xlink:type="simple" xlink:href="{0}"/>'.format(
                    schemaRef.replace("\\", "/"))
        Xml += '</xbrl>'
    elif type == Type.SCHEMA:
        Xml = ('<?xml version="1.0" encoding="UTF-8"?>'
               '<schema xmlns="http://www.w3.org/2001/XMLSchema" />')
    elif type == Type.RSSFEED:
        Xml = '<?xml version="1.0" encoding="UTF-8"?><rss version="2.0" />'
    elif type == Type.DTSENTRIES:
        Xml = None
    else:
        type = Type.Unknown
        Xml = '<?xml version="1.0" encoding="UTF-8"?>'
    if Xml:
        xmlDocument = xml.dom.minidom.parseString(Xml)
    else:
        xmlDocument = None
    if type == Type.RSSFEED:
        from arelle.ModelRssObject import ModelRssObject
        modelDocument = ModelRssObject(modelXbrl, type, uri, filepath,
                                       xmlDocument)
    else:
        modelDocument = ModelDocument(modelXbrl, type, normalizedUri, filepath,
                                      xmlDocument)
    if xmlDocument:
        modelDocument.xmlRootElement = modelDocument.xmlDocument.documentElement
    if type == Type.INSTANCE:
        modelDocument.instanceDiscover(modelDocument.xmlRootElement)
    elif type == Type.RSSFEED:
        modelDocument.rssFeedDiscover(modelDocument.xmlRootElement)
    elif type == Type.SCHEMA:
        modelDocument.targetNamespace = None
    return modelDocument
Ejemplo n.º 2
0
def companiesHouseLoader(modelXbrl, mappedUri, filepath, **kwargs):
    if not (mappedUri.startswith("http://download.companieshouse.gov.uk/")
            and mappedUri.endswith(".zip")):
        return None  # not a companies houst zip file

    rssObject = ModelRssObject(modelXbrl, uri=mappedUri, filepath=filepath)

    # find <table> with <a>Download in it
    for instanceFile in modelXbrl.fileSource.dir:
        rssObject.rssItems.append(
            CompaniesHouseItem(modelXbrl, instanceFile,
                               mappedUri + '/' + instanceFile))
    return rssObject
Ejemplo n.º 3
0
def create(modelXbrl, type, uri, schemaRefs=None, isEntry=False):
    normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(uri, None)
    if isEntry:
        modelXbrl.uri = normalizedUri
        modelXbrl.uriDir = os.path.dirname(normalizedUri)
        for i in range(modelXbrl.modelManager.disclosureSystem.maxSubmissionSubdirectoryEntryNesting):
            modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir)
    filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(normalizedUri)
    if type == Type.INSTANCE:
        # modelXbrl.uriDir = os.path.dirname(normalizedUri)
        Xml = ('<?xml version="1.0" encoding="UTF-8"?>' 
               '<xbrl xmlns="http://www.xbrl.org/2003/instance"'
               ' xmlns:link="http://www.xbrl.org/2003/linkbase"'
               ' xmlns:xlink="http://www.w3.org/1999/xlink">')
        if schemaRefs:
            for schemaRef in schemaRefs:
                Xml += '<link:schemaRef xlink:type="simple" xlink:href="{0}"/>'.format(schemaRef.replace("\\","/"))
        Xml += '</xbrl>'
    elif type == Type.SCHEMA:
        Xml = ('<?xml version="1.0" encoding="UTF-8"?>'
               '<schema xmlns="http://www.w3.org/2001/XMLSchema" />')
    elif type == Type.RSSFEED:
        Xml = '<?xml version="1.0" encoding="UTF-8"?><rss version="2.0" />'
    elif type == Type.DTSENTRIES:
        Xml = None
    else:
        type = Type.Unknown
        Xml = '<?xml version="1.0" encoding="UTF-8"?>'
    if Xml:
        xmlDocument = xml.dom.minidom.parseString(Xml)
    else:
        xmlDocument = None
    if type == Type.RSSFEED:
        from arelle.ModelRssObject import ModelRssObject 
        modelDocument = ModelRssObject(modelXbrl, type, uri, filepath, xmlDocument)
    else:
        modelDocument = ModelDocument(modelXbrl, type, normalizedUri, filepath, xmlDocument)
    if xmlDocument:
        modelDocument.xmlRootElement = modelDocument.xmlDocument.documentElement
    if type == Type.INSTANCE:
        modelDocument.instanceDiscover(modelDocument.xmlRootElement)
    elif type == Type.RSSFEED:
        modelDocument.rssFeedDiscover(modelDocument.xmlRootElement)
    elif type == Type.SCHEMA:
        modelDocument.targetNamespace = None
    return modelDocument
Ejemplo n.º 4
0
def secCorrespondenceLoader(modelXbrl, mappedUri, filepath, **kwargs):
    if (mappedUri.startswith("http://www.sec.gov/Archives/edgar/Feed/")
            and mappedUri.endswith(".nc.tar.gz")):

        # daily feed loader (the rss object)
        rssObject = ModelRssObject(modelXbrl, uri=mappedUri, filepath=filepath)

        # location for expanded feed files
        tempdir = os.path.join(modelXbrl.modelManager.cntlr.userAppDir, "tmp",
                               "edgarFeed")

        # remove prior files
        if os.path.exists(tempdir):
            os.system("rm -fr {}".format(
                tempdir))  # rmtree does not work with this many files!
        os.makedirs(tempdir, exist_ok=True)
        # untar to /temp/arelle/edgarFeed for faster operation
        startedAt = time.time()
        modelXbrl.fileSource.open()
        modelXbrl.fileSource.fs.extractall(tempdir)
        modelXbrl.info("info",
                       "untar edgarFeed temp files in %.2f sec" %
                       (time.time() - startedAt),
                       modelObject=modelXbrl)

        # find <table> with <a>Download in it
        for instanceFile in sorted(
                os.listdir(tempdir)):  # modelXbrl.fileSource.dir:
            if instanceFile != ".":
                rssObject.rssItems.append(
                    SECCorrespondenceItem(modelXbrl, instanceFile,
                                          mappedUri + '/' + instanceFile))
        return rssObject
    elif "rssItem" in kwargs and ".nc.tar.gz/" in mappedUri:
        rssItem = kwargs["rssItem"]
        text = None  # no instance information
        # parse document
        try:
            startedAt = time.time()
            file, encoding = modelXbrl.fileSource.file(
                os.path.join(modelXbrl.modelManager.cntlr.userAppDir, "tmp",
                             "edgarFeed", os.path.basename(rssItem.url)))
            s = file.read()
            file.close()
            for match in re.finditer(r"[<]([^>]+)[>]([^<\n\r]*)", s,
                                     re.MULTILINE):
                tag = match.group(1).lower()
                v = match.group(2)
                if tag == "accession-number":
                    rssItem.accessionNumber = v
                elif tag == "form-type":
                    rssItem.formType = v
                    if v != "UPLOAD":
                        rssItem.doNotProcessRSSitem = True  # skip this RSS item in validate loop, don't load DB
                elif tag == "filing-date":
                    try:
                        rssItem.filingDate = datetime.date(
                            int(v[0:4]), int(v[4:6]), int(v[6:8]))
                    except (ValueError, IndexError):
                        pass
                elif tag == "conformed-name":
                    rssItem.companyName = v
                elif tag == "cik":
                    rssItem.cikNumber = v
                elif tag == "assigned-sic":
                    rssItem.assignedSic = v
                elif tag == "fiscal-year-end":
                    try:
                        rssItem.fiscalYearEnd = v[0:2] + '-' + v[2:4]
                    except (IndexError, TypeError):
                        pass
            match = re.search("<PDF>(.*)</PDF>", s, re.DOTALL)
            if match:
                import uu, io
                pageText = []
                uuIn = io.BytesIO(match.group(1).encode(encoding))
                uuOut = io.BytesIO()
                uu.decode(uuIn, uuOut)
                from pyPdf import PdfFileReader
                uuOut.seek(0, 0)
                try:
                    pdfIn = PdfFileReader(uuOut)
                    for pageNum in range(pdfIn.getNumPages()):
                        pageText.append(pdfIn.getPage(pageNum).extractText())
                except:
                    # do we want a warning here that the PDF can't be read with this library?
                    pass
                uuIn.close()
                uuOut.close()
                text = ''.join(pageText)
            else:
                match = re.search("<TEXT>(.*)</TEXT>", s, re.DOTALL)
                if match:
                    text = match.group(1)
        except (IOError, EnvironmentError):
            pass  # give up, no instance
        # daily rss item loader, provide unpopulated instance document to be filled in by RssItem.Xbrl.Loaded
        if not text:
            rssItem.doNotProcessRSSitem = True  # skip this RSS item in validate loop, don't load DB
            instDoc = ModelDocument.create(
                modelXbrl,
                ModelDocument.Type.UnknownXML,
                rssItem.url,
                isEntry=True,
                base='',  # block pathname from becomming absolute
                initialXml='<DummyXml/>')
        else:
            instDoc = ModelDocument.create(
                modelXbrl,
                ModelDocument.Type.INSTANCE,
                rssItem.url,
                isEntry=True,
                base='',  # block pathname from becomming absolute
                initialXml='''
<xbrli:xbrl xmlns:doc="http://arelle.org/doc/2014-01-31" 
    xmlns:link="http://www.xbrl.org/2003/linkbase" 
    xmlns:xlink="http://www.w3.org/1999/xlink" 
    xmlns:xbrli="http://www.xbrl.org/2003/instance">
    <link:schemaRef xlink:type="simple" xlink:href="http://arelle.org/2014/doc-2014-01-31.xsd"/>
   <xbrli:context id="pubDate">
      <xbrli:entity>
         <xbrli:identifier scheme="http://www.sec.gov/CIK">{cik}</xbrli:identifier>
      </xbrli:entity>
      <xbrli:period>
         <xbrli:instant>{pubDate}</xbrli:instant>
      </xbrli:period>
    </xbrli:context>
    <doc:Correspondence contextRef="pubDate">{text}</doc:Correspondence>
</xbrli:xbrl>
            '''.format(cik=rssItem.cikNumber,
                       pubDate=rssItem.pubDate.date(),
                       text=text.strip().replace("&",
                                                 "&amp;").replace("<",
                                                                  "&lt;")))
            #modelXbrl.info("info", "loaded in %.2f sec" % (time.time() - startedAt),
            #               modelDocument=instDoc)
        return instDoc

    return None
Ejemplo n.º 5
0
def create(modelXbrl, type, uri, schemaRefs=None, isEntry=False):
    normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(uri, None)
    if isEntry:
        modelXbrl.uri = normalizedUri
        modelXbrl.entryLoadingUrl = normalizedUri
        modelXbrl.uriDir = os.path.dirname(normalizedUri)
        for i in range(modelXbrl.modelManager.disclosureSystem.maxSubmissionSubdirectoryEntryNesting):
            modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir)
    filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(normalizedUri)
    # XML document has nsmap root element to replace nsmap as new xmlns entries are required
    if type == Type.INSTANCE:
        # modelXbrl.uriDir = os.path.dirname(normalizedUri)
        Xml = ('<nsmap>'
               '<xbrl xmlns="http://www.xbrl.org/2003/instance"'
               ' xmlns:link="http://www.xbrl.org/2003/linkbase"'
               ' xmlns:xlink="http://www.w3.org/1999/xlink">')
        if schemaRefs:
            for schemaRef in schemaRefs:
                Xml += '<link:schemaRef xlink:type="simple" xlink:href="{0}"/>'.format(schemaRef.replace("\\","/"))
        Xml += '</xbrl></nsmap>'
    elif type == Type.SCHEMA:
        Xml = ('<nsmap><schema xmlns="http://www.w3.org/2001/XMLSchema" /></nsmap>')
    elif type == Type.RSSFEED:
        Xml = '<nsmap><rss version="2.0" /></nsmap>'
    elif type == Type.DTSENTRIES:
        Xml = None
    else:
        type = Type.Unknown
        Xml = '<nsmap/>'
    if Xml:
        import io
        file = io.StringIO(Xml)
        _parser, _parserLookupName, _parserLookupClass = parser(modelXbrl,filepath)
        xmlDocument = etree.parse(file,parser=_parser,base_url=filepath)
        file.close()
    else:
        xmlDocument = None
    if type == Type.RSSFEED:
        from arelle.ModelRssObject import ModelRssObject 
        modelDocument = ModelRssObject(modelXbrl, type, uri, filepath, xmlDocument)
    else:
        modelDocument = ModelDocument(modelXbrl, type, normalizedUri, filepath, xmlDocument)
    if Xml:
        modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement 
        modelDocument.parserLookupName = _parserLookupName
        modelDocument.parserLookupClass = _parserLookupClass
        modelDocument.documentEncoding = "utf-8"
        rootNode = xmlDocument.getroot()
        rootNode.init(modelDocument)
        if xmlDocument:
            for semanticRoot in rootNode.iterchildren():
                if isinstance(semanticRoot, ModelObject):
                    modelDocument.xmlRootElement = semanticRoot
                    break
    if type == Type.INSTANCE:
        modelDocument.instanceDiscover(modelDocument.xmlRootElement)
    elif type == Type.RSSFEED:
        modelDocument.rssFeedDiscover(modelDocument.xmlRootElement)
    elif type == Type.SCHEMA:
        modelDocument.targetNamespace = None
        modelDocument.isQualifiedElementFormDefault = False
        modelDocument.isQualifiedAttributeFormDefault = False
    return modelDocument
Ejemplo n.º 6
0
def tdNetLoader(modelXbrl, mappedUri, filepath, **kwargs):
    if not (mappedUri.startswith("https://www.release.tdnet.info/inbs/I_")
            and mappedUri.endswith(".html")):
        return None  # not a td net info file

    rssObject = ModelRssObject(modelXbrl, uri=mappedUri, filepath=filepath)

    hasMoreSections = True
    while hasMoreSections:
        # treat tdnet as an RSS feed object
        try:
            tdInfoDoc = html.parse(filepath)
        except (IOError, EnvironmentError):
            return None  # give up, use ordinary loader

        # find date
        date = None
        for elt in tdInfoDoc.iter():
            if elt.tag == "table":
                break  # no date portion, probably wrong document
            if elt.text and datePattern.match(elt.text):
                g = datePattern.match(elt.text).groups()
                date = datetime.date(int(g[0]), int(g[1]), int(g[2]))
                break
        if not date:
            return None  # give up, not a TDnet index document

        urlDir = os.path.dirname(mappedUri)

        # find <table> with <a>Download in it
        for tableElt in tdInfoDoc.iter(tag="table"):
            useThisTableElt = False
            for aElt in tableElt.iterdescendants(tag="a"):
                if "download" in aElt.text.lower():
                    useThisTableElt = True
                    break
            if useThisTableElt:
                cols = {}
                for trElt in tableElt.iter(tag="tr"):
                    col = 0
                    rowData = {}
                    for tdElt in trElt.iter(tag="td"):
                        text = ''.join(t.strip() for t in tdElt.itertext())
                        if tdElt.get("class") == "tableh":  #header
                            type = {
                                "時刻": "time",
                                "コード": "code",
                                "会社名": "companyName",
                                "表題": "title",
                                "XBRL": "zipUrl",
                                "上場取引所": "stockExchange",
                                "更新履歴": "changeLog"
                            }.get(text, None)
                            if type:
                                cols[col] = type
                                cols[type] = col
                        elif col == cols["title"]:
                            rowData["title"] = text
                            rowData["pdfUrl"] = descendantAttr(
                                tdElt, "a", "href")
                        elif col == cols["zipUrl"]:
                            rowData["zipUrl"] = descendantAttr(
                                tdElt, "a", "href")
                        elif col in cols:  # body
                            rowData[cols[col]] = text
                        col += int(tdElt.get("colspan", 1))
                    if rowData:
                        time = rowData.get("time", "")
                        if timePattern.match(time):
                            g = timePattern.match(time).groups()
                            dateTime = datetime.datetime(
                                date.year, date.month, date.day, int(g[0]),
                                int(g[1]))
                        else:
                            dateTime = datetime.datetime.now()
                        filingCode = rowData.get("code")
                        companyName = rowData.get("companyName")
                        stockExchange = rowData.get("stockExchange")
                        title = rowData.get("title")
                        pdfUrl = rowData.get("pdfUrl")
                        if pdfUrl:
                            pdfUrl = urlDir + "/" + pdfUrl
                        zipUrl = rowData.get("zipUrl")
                        if zipUrl:
                            zipUrl = urlDir + "/" + zipUrl
                        changeLog = rowData.get("changeLog")
                        # find instance doc in file
                        instanceUrls = []
                        if zipUrl:
                            try:
                                normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(
                                    zipUrl)
                                filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(
                                    normalizedUri)
                                filesource = FileSource.FileSource(filepath)
                                dir = filesource.dir
                                filesource.close()
                                if dir:
                                    for file in dir:
                                        if "ixbrl" in file or file.endswith(
                                                ".xbrl") or "instance" in file:
                                            instanceUrls.append(zipUrl + "/" +
                                                                file)
                            except:
                                continue  # forget this filing
                        for instanceUrl in instanceUrls:
                            rssObject.rssItems.append(
                                TDnetItem(modelXbrl, date, dateTime,
                                          filingCode, companyName, title,
                                          pdfUrl, instanceUrl, stockExchange))
        # next screen if continuation
        hasMoreSections = False
        for elt in tdInfoDoc.iter(tag="input"):
            if elt.value == "次画面":  # next screen button
                nextLocation = elt.get("onclick")
                if nextLocation and nextLocationPattern.match(nextLocation):
                    hasMoreSections = True
                    nextUrl = urlDir + "/" + nextLocationPattern.match(
                        nextLocation).groups()[0]
                    mappedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(
                        nextUrl)
                    filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(
                        mappedUri)
    return rssObject
Ejemplo n.º 7
0
def create(modelXbrl, type, uri, schemaRefs=None, isEntry=False):
    normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(
        uri, None)
    if isEntry:
        modelXbrl.uri = normalizedUri
        modelXbrl.entryLoadingUrl = normalizedUri
        modelXbrl.uriDir = os.path.dirname(normalizedUri)
        for i in range(modelXbrl.modelManager.disclosureSystem.
                       maxSubmissionSubdirectoryEntryNesting):
            modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir)
    filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(normalizedUri)
    # XML document has nsmap root element to replace nsmap as new xmlns entries are required
    if type == Type.INSTANCE:
        # modelXbrl.uriDir = os.path.dirname(normalizedUri)
        Xml = ('<nsmap>'
               '<xbrl xmlns="http://www.xbrl.org/2003/instance"'
               ' xmlns:link="http://www.xbrl.org/2003/linkbase"'
               ' xmlns:xlink="http://www.w3.org/1999/xlink">')
        if schemaRefs:
            for schemaRef in schemaRefs:
                Xml += '<link:schemaRef xlink:type="simple" xlink:href="{0}"/>'.format(
                    schemaRef.replace("\\", "/"))
        Xml += '</xbrl></nsmap>'
    elif type == Type.SCHEMA:
        Xml = (
            '<nsmap><schema xmlns="http://www.w3.org/2001/XMLSchema" /></nsmap>'
        )
    elif type == Type.RSSFEED:
        Xml = '<nsmap><rss version="2.0" /></nsmap>'
    elif type == Type.DTSENTRIES:
        Xml = None
    else:
        type = Type.UnknownXML
        Xml = '<nsmap/>'
    if Xml:
        import io
        file = io.StringIO(Xml)
        _parser, _parserLookupName, _parserLookupClass = parser(
            modelXbrl, filepath)
        xmlDocument = etree.parse(file, parser=_parser, base_url=filepath)
        file.close()
    else:
        xmlDocument = None
    if type == Type.RSSFEED:
        from arelle.ModelRssObject import ModelRssObject
        modelDocument = ModelRssObject(modelXbrl, type, uri, filepath,
                                       xmlDocument)
    else:
        modelDocument = ModelDocument(modelXbrl, type, normalizedUri, filepath,
                                      xmlDocument)
    if Xml:
        modelDocument.parser = _parser  # needed for XmlUtil addChild's makeelement
        modelDocument.parserLookupName = _parserLookupName
        modelDocument.parserLookupClass = _parserLookupClass
        modelDocument.documentEncoding = "utf-8"
        rootNode = xmlDocument.getroot()
        rootNode.init(modelDocument)
        if xmlDocument:
            for semanticRoot in rootNode.iterchildren():
                if isinstance(semanticRoot, ModelObject):
                    modelDocument.xmlRootElement = semanticRoot
                    break
    if type == Type.INSTANCE:
        modelDocument.instanceDiscover(modelDocument.xmlRootElement)
    elif type == Type.RSSFEED:
        modelDocument.rssFeedDiscover(modelDocument.xmlRootElement)
    elif type == Type.SCHEMA:
        modelDocument.targetNamespace = None
        modelDocument.isQualifiedElementFormDefault = False
        modelDocument.isQualifiedAttributeFormDefault = False
    modelDocument.definesUTR = False
    return modelDocument
Ejemplo n.º 8
0
def load(modelXbrl,
         uri,
         base=None,
         referringElement=None,
         isEntry=False,
         isDiscovered=False,
         isIncluded=None,
         namespace=None,
         reloadCache=False):
    if referringElement is None:  # used for error messages
        referringElement = modelXbrl
    normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(
        uri, base)
    if isEntry:
        modelXbrl.entryLoadingUrl = normalizedUri  # for error loggiong during loading
        modelXbrl.uri = normalizedUri
        modelXbrl.uriDir = os.path.dirname(normalizedUri)
        for i in range(modelXbrl.modelManager.disclosureSystem.
                       maxSubmissionSubdirectoryEntryNesting):
            modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir)
    if modelXbrl.modelManager.validateDisclosureSystem and \
       not normalizedUri.startswith(modelXbrl.uriDir) and \
       not modelXbrl.modelManager.disclosureSystem.hrefValid(normalizedUri):
        blocked = modelXbrl.modelManager.disclosureSystem.blockDisallowedReferences
        modelXbrl.error(
            ("EFM.6.22.02", "GFM.1.1.3", "SBR.NL.2.1.0.06"
             if normalizedUri.startswith("http") else "SBR.NL.2.2.0.17"),
            _("Prohibited file for filings %(blockedIndicator)s: %(url)s"),
            modelObject=referringElement,
            url=normalizedUri,
            blockedIndicator=_(" blocked") if blocked else "")
        if blocked:
            return None
    if normalizedUri in modelXbrl.modelManager.disclosureSystem.mappedFiles:
        mappedUri = modelXbrl.modelManager.disclosureSystem.mappedFiles[
            normalizedUri]
    else:  # handle mapped paths
        mappedUri = normalizedUri
        for mapFrom, mapTo in modelXbrl.modelManager.disclosureSystem.mappedPaths:
            if normalizedUri.startswith(mapFrom):
                mappedUri = mapTo + normalizedUri[len(mapFrom):]
                break
    if isEntry:
        modelXbrl.entryLoadingUrl = mappedUri  # for error loggiong during loading
    if modelXbrl.fileSource.isInArchive(mappedUri):
        filepath = mappedUri
    else:
        filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(
            mappedUri, reload=reloadCache)
        if filepath:
            uri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(filepath)
    if filepath is None:  # error such as HTTPerror is already logged
        modelXbrl.error("FileNotLoadable",
                        _("File can not be loaded: %(fileName)s"),
                        modelObject=referringElement,
                        fileName=mappedUri)
        return None

    modelDocument = modelXbrl.urlDocs.get(mappedUri)
    if modelDocument:
        return modelDocument

    # load XML and determine type of model document
    modelXbrl.modelManager.showStatus(_("parsing {0}").format(uri))
    file = None
    try:
        if (modelXbrl.modelManager.validateDisclosureSystem
                and modelXbrl.modelManager.disclosureSystem.validateFileText):
            file, _encoding = ValidateFilingText.checkfile(modelXbrl, filepath)
        else:
            file, _encoding = modelXbrl.fileSource.file(filepath)
        _parser, _parserLookupName, _parserLookupClass = parser(
            modelXbrl, filepath)
        xmlDocument = etree.parse(file, parser=_parser, base_url=filepath)
        file.close()
    except (EnvironmentError,
            KeyError) as err:  # missing zip file raises KeyError
        if file:
            file.close()
        # retry in case of well known schema locations
        if not isIncluded and namespace and namespace in XbrlConst.standardNamespaceSchemaLocations and uri != XbrlConst.standardNamespaceSchemaLocations[
                namespace]:
            return load(modelXbrl,
                        XbrlConst.standardNamespaceSchemaLocations[namespace],
                        base, referringElement, isEntry, isDiscovered,
                        isIncluded, namespace, reloadCache)
        modelXbrl.error("IOerror",
                        _("%(fileName)s: file error: %(error)s"),
                        modelObject=referringElement,
                        fileName=os.path.basename(uri),
                        error=str(err))
        return None
    except (
            etree.LxmlError, ValueError
    ) as err:  # ValueError raised on bad format of qnames, xmlns'es, or parameters
        if file:
            file.close()
        if not isEntry and str(
                err) == "Start tag expected, '<' not found, line 1, column 1":
            return ModelDocument(modelXbrl, Type.UnknownNonXML, mappedUri,
                                 filepath, None)
        else:
            modelXbrl.error(
                "xmlSchema:syntax",
                _("%(error)s, %(fileName)s, %(sourceAction)s source element"),
                modelObject=referringElement,
                fileName=os.path.basename(uri),
                error=str(err),
                sourceAction=("including" if isIncluded else "importing"))
            return None

    # identify document
    #modelXbrl.modelManager.addToLog("discovery: {0}".format(
    #            os.path.basename(uri)))
    modelXbrl.modelManager.showStatus(_("loading {0}").format(uri))
    modelDocument = None

    rootNode = xmlDocument.getroot()
    if rootNode is not None:
        ln = rootNode.localName
        ns = rootNode.namespaceURI

        # type classification
        if ns == XbrlConst.xsd and ln == "schema":
            type = Type.SCHEMA
        elif ns == XbrlConst.link:
            if ln == "linkbase":
                type = Type.LINKBASE
            elif ln == "xbrl":
                type = Type.INSTANCE
        elif ns == XbrlConst.xbrli:
            if ln == "xbrl":
                type = Type.INSTANCE
        elif ns == XbrlConst.xhtml and \
             (ln == "html" or ln == "xhtml"):
            type = Type.UnknownXML
            if XbrlConst.ixbrl in rootNode.nsmap.values():
                type = Type.INLINEXBRL
        elif ln == "report" and ns == XbrlConst.ver:
            type = Type.VERSIONINGREPORT
        elif ln == "testcases" or ln == "documentation":
            type = Type.TESTCASESINDEX
        elif ln == "testcase":
            type = Type.TESTCASE
        elif ln == "registry" and ns == XbrlConst.registry:
            type = Type.REGISTRY
        elif ln == "rss":
            type = Type.RSSFEED
        elif ln == "ptvl":
            type = Type.ARCSINFOSET
        elif ln == "facts":
            type = Type.FACTDIMSINFOSET
        else:
            type = Type.UnknownXML
            nestedInline = None
            for htmlElt in rootNode.iter(
                    tag="{http://www.w3.org/1999/xhtml}html"):
                nestedInline = htmlElt
                break
            if nestedInline is None:
                for htmlElt in rootNode.iter(
                        tag="{http://www.w3.org/1999/xhtml}xhtml"):
                    nestedInline = htmlElt
                    break
            if nestedInline is not None:
                if XbrlConst.ixbrl in nestedInline.nsmap.values():
                    type = Type.INLINEXBRL
                    rootNode = nestedInline

        #create modelDocument object or subtype as identified
        if type == Type.VERSIONINGREPORT:
            from arelle.ModelVersReport import ModelVersReport
            modelDocument = ModelVersReport(modelXbrl, type, mappedUri,
                                            filepath, xmlDocument)
        elif type == Type.RSSFEED:
            from arelle.ModelRssObject import ModelRssObject
            modelDocument = ModelRssObject(modelXbrl, type, mappedUri,
                                           filepath, xmlDocument)
        else:
            modelDocument = ModelDocument(modelXbrl, type, mappedUri, filepath,
                                          xmlDocument)
        rootNode.init(modelDocument)
        modelDocument.parser = _parser  # needed for XmlUtil addChild's makeelement
        modelDocument.parserLookupName = _parserLookupName
        modelDocument.parserLookupClass = _parserLookupClass
        modelDocument.xmlRootElement = rootNode
        modelDocument.schemaLocationElements.add(rootNode)
        modelDocument.documentEncoding = _encoding

        if isEntry or isDiscovered:
            modelDocument.inDTS = True

        # discovery (parsing)
        if type == Type.SCHEMA:
            modelDocument.schemaDiscover(rootNode, isIncluded, namespace)
        elif type == Type.LINKBASE:
            modelDocument.linkbaseDiscover(rootNode)
        elif type == Type.INSTANCE:
            modelDocument.instanceDiscover(rootNode)
        elif type == Type.INLINEXBRL:
            modelDocument.inlineXbrlDiscover(rootNode)
        elif type == Type.VERSIONINGREPORT:
            modelDocument.versioningReportDiscover(rootNode)
        elif type == Type.TESTCASESINDEX:
            modelDocument.testcasesIndexDiscover(xmlDocument)
        elif type == Type.TESTCASE:
            modelDocument.testcaseDiscover(rootNode)
        elif type == Type.REGISTRY:
            modelDocument.registryDiscover(rootNode)
        elif type == Type.VERSIONINGREPORT:
            modelDocument.versioningReportDiscover(rootNode)
        elif type == Type.RSSFEED:
            modelDocument.rssFeedDiscover(rootNode)
    return modelDocument
Ejemplo n.º 9
0
def load(modelXbrl,
         uri,
         base=None,
         isEntry=False,
         isIncluded=None,
         namespace=None,
         reloadCache=False):
    normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(
        uri, base)
    if isEntry:
        modelXbrl.uri = normalizedUri
        modelXbrl.uriDir = os.path.dirname(normalizedUri)
        for i in range(modelXbrl.modelManager.disclosureSystem.
                       maxSubmissionSubdirectoryEntryNesting):
            modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir)
    if modelXbrl.modelManager.validateDisclosureSystem and \
       not normalizedUri.startswith(modelXbrl.uriDir) and \
       not modelXbrl.modelManager.disclosureSystem.hrefValid(normalizedUri):
        blocked = modelXbrl.modelManager.disclosureSystem.blockDisallowedReferences
        modelXbrl.error(
            "Prohibited file for filings{1}: {0}".format(
                normalizedUri,
                _(" blocked") if blocked else ""), "err", "EFM.6.22.02",
            "GFM.1.1.3", "SBR.NL.2.1.0.06")
        if blocked:
            return None
    if normalizedUri in modelXbrl.modelManager.disclosureSystem.mappedFiles:
        mappedUri = modelXbrl.modelManager.disclosureSystem.mappedFiles[
            normalizedUri]
    else:  # handle mapped paths
        mappedUri = normalizedUri
        for mapFrom, mapTo in modelXbrl.modelManager.disclosureSystem.mappedPaths:
            if normalizedUri.startswith(mapFrom):
                mappedUri = mapTo + normalizedUri[len(mapFrom):]
                break
    if modelXbrl.fileSource.isInArchive(mappedUri):
        filepath = mappedUri
    else:
        filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(
            mappedUri, reload=reloadCache)
        if filepath:
            uri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(filepath)
    if filepath is None:  # error such as HTTPerror is already logged
        modelXbrl.error("File can not be loaded: {0}".format(mappedUri), "err",
                        "FileNotLoadable")
        type = Type.Unknown
        return None

    modelDocument = modelXbrl.urlDocs.get(mappedUri)
    if modelDocument:
        return modelDocument

    # load XML and determine type of model document
    modelXbrl.modelManager.showStatus(_("parsing {0}").format(uri))
    file = None
    try:
        if modelXbrl.modelManager.validateDisclosureSystem:
            file = ValidateFilingText.checkfile(modelXbrl, filepath)
        else:
            file = modelXbrl.fileSource.file(filepath)
        xmlDocument = xml.dom.minidom.parse(file)
        file.close()
    except EnvironmentError as err:
        modelXbrl.error(
            "{0}: file error: {1}".format(os.path.basename(uri), err), "err",
            "IOerror")
        type = Type.Unknown
        if file:
            file.close()
        return None
    except (
            xml.parsers.expat.ExpatError, xml.dom.DOMException, ValueError
    ) as err:  # ValueError raised on bad format of qnames, xmlns'es, or parameters
        modelXbrl.error(
            "{0}: import error: {1}".format(os.path.basename(uri), err), "err",
            "XMLsyntax")
        type = Type.Unknown
        if file:
            file.close()
        return None

    # identify document
    #modelXbrl.modelManager.addToLog("discovery: {0}".format(
    #            os.path.basename(uri)))
    modelXbrl.modelManager.showStatus(_("loading {0}").format(uri))
    modelDocument = None

    for rootNode in xmlDocument.childNodes:
        if rootNode.nodeType == 1:  #element
            ln = rootNode.localName
            ns = rootNode.namespaceURI

            # type classification
            if ns == XbrlConst.xsd and ln == "schema":
                type = Type.SCHEMA
            elif ns == XbrlConst.link:
                if ln == "linkbase":
                    type = Type.LINKBASE
                elif ln == "xbrl":
                    type = Type.INSTANCE
            elif ns == XbrlConst.xbrli:
                if ln == "xbrl":
                    type = Type.INSTANCE
            elif ns == XbrlConst.xhtml and \
                 ln == "html" or ln == "xhtml":
                type = Type.Unknown
                for i in range(len(rootNode.attributes)):
                    if rootNode.attributes.item(i).value == XbrlConst.ixbrl:
                        type = Type.INLINEXBRL
                        break
                XmlUtil.markIdAttributes(
                    rootNode)  # required for minidom searchability
            elif ln == "report" and ns == XbrlConst.ver:
                type = Type.VERSIONINGREPORT
            elif ln == "testcases" or ln == "documentation":
                type = Type.TESTCASESINDEX
            elif ln == "testcase":
                type = Type.TESTCASE
            elif ln == "registry" and ns == XbrlConst.registry:
                type = Type.REGISTRY
            elif ln == "rss":
                type = Type.RSSFEED
            else:
                type = Type.Unknown
                nestedInline = XmlUtil.descendant(rootNode, XbrlConst.xhtml,
                                                  ("html", "xhtml"))
                if nestedInline:
                    for i in range(len(nestedInline.attributes)):
                        if nestedInline.attributes.item(
                                i).value == XbrlConst.ixbrl:
                            type = Type.INLINEXBRL
                            rootNode = nestedInline
                            break
                XmlUtil.markIdAttributes(
                    rootNode)  # required for minidom searchability

            #create modelDocument object or subtype as identified
            if type == Type.VERSIONINGREPORT:
                from arelle.ModelVersReport import ModelVersReport
                modelDocument = ModelVersReport(modelXbrl, type, mappedUri,
                                                filepath, xmlDocument)
            elif type == Type.RSSFEED:
                from arelle.ModelRssObject import ModelRssObject
                modelDocument = ModelRssObject(modelXbrl, type, mappedUri,
                                               filepath, xmlDocument)
            else:
                modelDocument = ModelDocument(modelXbrl, type, mappedUri,
                                              filepath, xmlDocument)
            modelDocument.xmlRootElement = rootNode
            modelDocument.schemaLocationElements.add(rootNode)

            if isEntry:
                modelDocument.inDTS = True

            # discovery (parsing)
            if type == Type.SCHEMA:
                modelDocument.schemaDiscover(rootNode, isIncluded, namespace)
            elif type == Type.LINKBASE:
                modelDocument.linkbaseDiscover(rootNode)
            elif type == Type.INSTANCE:
                modelDocument.instanceDiscover(rootNode)
            elif type == Type.INLINEXBRL:
                modelDocument.inlineXbrlDiscover(rootNode)
            elif type == Type.VERSIONINGREPORT:
                modelDocument.versioningReportDiscover(rootNode)
            elif type == Type.TESTCASESINDEX:
                modelDocument.testcasesIndexDiscover(xmlDocument)
            elif type == Type.TESTCASE:
                modelDocument.testcaseDiscover(rootNode)
            elif type == Type.REGISTRY:
                modelDocument.registryDiscover(rootNode)
            elif type == Type.VERSIONINGREPORT:
                modelDocument.versioningReportDiscover(rootNode)
            elif type == Type.RSSFEED:
                modelDocument.rssFeedDiscover(rootNode)
            break
    return modelDocument