Python ModelRssObject Examples

Programming Language: Python

Namespace/Package Name: arelle.ModelRssObject

Class/Type: ModelRssObject

Examples at hotexamples.com: 9

Python ModelRssObject - 9 examples found. These are the top rated real world Python examples of arelle.ModelRssObject.ModelRssObject extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ModelRssObject(7)

Example #1

Show file

File: ModelDocument.py Project: gplehmann/Arelle

def create(modelXbrl, type, uri, schemaRefs=None, isEntry=False):
    normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(
        uri, None)
    if isEntry:
        modelXbrl.uri = normalizedUri
        modelXbrl.uriDir = os.path.dirname(normalizedUri)
        for i in range(modelXbrl.modelManager.disclosureSystem.
                       maxSubmissionSubdirectoryEntryNesting):
            modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir)
    filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(normalizedUri)
    if type == Type.INSTANCE:
        # modelXbrl.uriDir = os.path.dirname(normalizedUri)
        Xml = ('<?xml version="1.0" encoding="UTF-8"?>'
               '<xbrl xmlns="http://www.xbrl.org/2003/instance"'
               ' xmlns:link="http://www.xbrl.org/2003/linkbase"'
               ' xmlns:xlink="http://www.w3.org/1999/xlink">')
        if schemaRefs:
            for schemaRef in schemaRefs:
                Xml += '<link:schemaRef xlink:type="simple" xlink:href="{0}"/>'.format(
                    schemaRef.replace("\\", "/"))
        Xml += '</xbrl>'
    elif type == Type.SCHEMA:
        Xml = ('<?xml version="1.0" encoding="UTF-8"?>'
               '<schema xmlns="http://www.w3.org/2001/XMLSchema" />')
    elif type == Type.RSSFEED:
        Xml = '<?xml version="1.0" encoding="UTF-8"?><rss version="2.0" />'
    elif type == Type.DTSENTRIES:
        Xml = None
    else:
        type = Type.Unknown
        Xml = '<?xml version="1.0" encoding="UTF-8"?>'
    if Xml:
        xmlDocument = xml.dom.minidom.parseString(Xml)
    else:
        xmlDocument = None
    if type == Type.RSSFEED:
        from arelle.ModelRssObject import ModelRssObject
        modelDocument = ModelRssObject(modelXbrl, type, uri, filepath,
                                       xmlDocument)
    else:
        modelDocument = ModelDocument(modelXbrl, type, normalizedUri, filepath,
                                      xmlDocument)
    if xmlDocument:
        modelDocument.xmlRootElement = modelDocument.xmlDocument.documentElement
    if type == Type.INSTANCE:
        modelDocument.instanceDiscover(modelDocument.xmlRootElement)
    elif type == Type.RSSFEED:
        modelDocument.rssFeedDiscover(modelDocument.xmlRootElement)
    elif type == Type.SCHEMA:
        modelDocument.targetNamespace = None
    return modelDocument

Example #2

Show file

def companiesHouseLoader(modelXbrl, mappedUri, filepath, **kwargs):
    if not (mappedUri.startswith("http://download.companieshouse.gov.uk/")
            and mappedUri.endswith(".zip")):
        return None  # not a companies houst zip file

    rssObject = ModelRssObject(modelXbrl, uri=mappedUri, filepath=filepath)

    # find <table> with <a>Download in it
    for instanceFile in modelXbrl.fileSource.dir:
        rssObject.rssItems.append(
            CompaniesHouseItem(modelXbrl, instanceFile,
                               mappedUri + '/' + instanceFile))
    return rssObject

Example #3

Show file

File: ModelDocument.py Project: 8maki/Arelle

def create(modelXbrl, type, uri, schemaRefs=None, isEntry=False):
    normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(uri, None)
    if isEntry:
        modelXbrl.uri = normalizedUri
        modelXbrl.uriDir = os.path.dirname(normalizedUri)
        for i in range(modelXbrl.modelManager.disclosureSystem.maxSubmissionSubdirectoryEntryNesting):
            modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir)
    filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(normalizedUri)
    if type == Type.INSTANCE:
        # modelXbrl.uriDir = os.path.dirname(normalizedUri)
        Xml = ('<?xml version="1.0" encoding="UTF-8"?>' 
               '<xbrl xmlns="http://www.xbrl.org/2003/instance"'
               ' xmlns:link="http://www.xbrl.org/2003/linkbase"'
               ' xmlns:xlink="http://www.w3.org/1999/xlink">')
        if schemaRefs:
            for schemaRef in schemaRefs:
                Xml += '<link:schemaRef xlink:type="simple" xlink:href="{0}"/>'.format(schemaRef.replace("\\","/"))
        Xml += '</xbrl>'
    elif type == Type.SCHEMA:
        Xml = ('<?xml version="1.0" encoding="UTF-8"?>'
               '<schema xmlns="http://www.w3.org/2001/XMLSchema" />')
    elif type == Type.RSSFEED:
        Xml = '<?xml version="1.0" encoding="UTF-8"?><rss version="2.0" />'
    elif type == Type.DTSENTRIES:
        Xml = None
    else:
        type = Type.Unknown
        Xml = '<?xml version="1.0" encoding="UTF-8"?>'
    if Xml:
        xmlDocument = xml.dom.minidom.parseString(Xml)
    else:
        xmlDocument = None
    if type == Type.RSSFEED:
        from arelle.ModelRssObject import ModelRssObject 
        modelDocument = ModelRssObject(modelXbrl, type, uri, filepath, xmlDocument)
    else:
        modelDocument = ModelDocument(modelXbrl, type, normalizedUri, filepath, xmlDocument)
    if xmlDocument:
        modelDocument.xmlRootElement = modelDocument.xmlDocument.documentElement
    if type == Type.INSTANCE:
        modelDocument.instanceDiscover(modelDocument.xmlRootElement)
    elif type == Type.RSSFEED:
        modelDocument.rssFeedDiscover(modelDocument.xmlRootElement)
    elif type == Type.SCHEMA:
        modelDocument.targetNamespace = None
    return modelDocument

Example #4

Show file

def secCorrespondenceLoader(modelXbrl, mappedUri, filepath, **kwargs):
    if (mappedUri.startswith("http://www.sec.gov/Archives/edgar/Feed/")
            and mappedUri.endswith(".nc.tar.gz")):

        # daily feed loader (the rss object)
        rssObject = ModelRssObject(modelXbrl, uri=mappedUri, filepath=filepath)

        # location for expanded feed files
        tempdir = os.path.join(modelXbrl.modelManager.cntlr.userAppDir, "tmp",
                               "edgarFeed")

        # remove prior files
        if os.path.exists(tempdir):
            os.system("rm -fr {}".format(
                tempdir))  # rmtree does not work with this many files!
        os.makedirs(tempdir, exist_ok=True)
        # untar to /temp/arelle/edgarFeed for faster operation
        startedAt = time.time()
        modelXbrl.fileSource.open()
        modelXbrl.fileSource.fs.extractall(tempdir)
        modelXbrl.info("info",
                       "untar edgarFeed temp files in %.2f sec" %
                       (time.time() - startedAt),
                       modelObject=modelXbrl)

        # find <table> with <a>Download in it
        for instanceFile in sorted(
                os.listdir(tempdir)):  # modelXbrl.fileSource.dir:
            if instanceFile != ".":
                rssObject.rssItems.append(
                    SECCorrespondenceItem(modelXbrl, instanceFile,
                                          mappedUri + '/' + instanceFile))
        return rssObject
    elif "rssItem" in kwargs and ".nc.tar.gz/" in mappedUri:
        rssItem = kwargs["rssItem"]
        text = None  # no instance information
        # parse document
        try:
            startedAt = time.time()
            file, encoding = modelXbrl.fileSource.file(
                os.path.join(modelXbrl.modelManager.cntlr.userAppDir, "tmp",
                             "edgarFeed", os.path.basename(rssItem.url)))
            s = file.read()
            file.close()
            for match in re.finditer(r"[<]([^>]+)[>]([^<\n\r]*)", s,
                                     re.MULTILINE):
                tag = match.group(1).lower()
                v = match.group(2)
                if tag == "accession-number":
                    rssItem.accessionNumber = v
                elif tag == "form-type":
                    rssItem.formType = v
                    if v != "UPLOAD":
                        rssItem.doNotProcessRSSitem = True  # skip this RSS item in validate loop, don't load DB
                elif tag == "filing-date":
                    try:
                        rssItem.filingDate = datetime.date(
                            int(v[0:4]), int(v[4:6]), int(v[6:8]))
                    except (ValueError, IndexError):
                        pass
                elif tag == "conformed-name":
                    rssItem.companyName = v
                elif tag == "cik":
                    rssItem.cikNumber = v
                elif tag == "assigned-sic":
                    rssItem.assignedSic = v
                elif tag == "fiscal-year-end":
                    try:
                        rssItem.fiscalYearEnd = v[0:2] + '-' + v[2:4]
                    except (IndexError, TypeError):
                        pass
            match = re.search("<PDF>(.*)</PDF>", s, re.DOTALL)
            if match:
                import uu, io
                pageText = []
                uuIn = io.BytesIO(match.group(1).encode(encoding))
                uuOut = io.BytesIO()
                uu.decode(uuIn, uuOut)
                from pyPdf import PdfFileReader
                uuOut.seek(0, 0)
                try:
                    pdfIn = PdfFileReader(uuOut)
                    for pageNum in range(pdfIn.getNumPages()):
                        pageText.append(pdfIn.getPage(pageNum).extractText())
                except:
                    # do we want a warning here that the PDF can't be read with this library?
                    pass
                uuIn.close()
                uuOut.close()
                text = ''.join(pageText)
            else:
                match = re.search("<TEXT>(.*)</TEXT>", s, re.DOTALL)
                if match:
                    text = match.group(1)
        except (IOError, EnvironmentError):
            pass  # give up, no instance
        # daily rss item loader, provide unpopulated instance document to be filled in by RssItem.Xbrl.Loaded
        if not text:
            rssItem.doNotProcessRSSitem = True  # skip this RSS item in validate loop, don't load DB
            instDoc = ModelDocument.create(
                modelXbrl,
                ModelDocument.Type.UnknownXML,
                rssItem.url,
                isEntry=True,
                base='',  # block pathname from becomming absolute
                initialXml='<DummyXml/>')
        else:
            instDoc = ModelDocument.create(
                modelXbrl,
                ModelDocument.Type.INSTANCE,
                rssItem.url,
                isEntry=True,
                base='',  # block pathname from becomming absolute
                initialXml='''
<xbrli:xbrl xmlns:doc="http://arelle.org/doc/2014-01-31" 
    xmlns:link="http://www.xbrl.org/2003/linkbase" 
    xmlns:xlink="http://www.w3.org/1999/xlink" 
    xmlns:xbrli="http://www.xbrl.org/2003/instance">
    <link:schemaRef xlink:type="simple" xlink:href="http://arelle.org/2014/doc-2014-01-31.xsd"/>
   <xbrli:context id="pubDate">
      <xbrli:entity>
         <xbrli:identifier scheme="http://www.sec.gov/CIK">{cik}</xbrli:identifier>
      </xbrli:entity>
      <xbrli:period>
         <xbrli:instant>{pubDate}</xbrli:instant>
      </xbrli:period>
    </xbrli:context>
    <doc:Correspondence contextRef="pubDate">{text}</doc:Correspondence>
</xbrli:xbrl>
            '''.format(cik=rssItem.cikNumber,
                       pubDate=rssItem.pubDate.date(),
                       text=text.strip().replace("&",
                                                 "&amp;").replace("<",
                                                                  "&lt;")))
            #modelXbrl.info("info", "loaded in %.2f sec" % (time.time() - startedAt),
            #               modelDocument=instDoc)
        return instDoc

    return None

Example #5

Show file

File: ModelDocument.py Project: marado/Arelle

def create(modelXbrl, type, uri, schemaRefs=None, isEntry=False):
    normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(uri, None)
    if isEntry:
        modelXbrl.uri = normalizedUri
        modelXbrl.entryLoadingUrl = normalizedUri
        modelXbrl.uriDir = os.path.dirname(normalizedUri)
        for i in range(modelXbrl.modelManager.disclosureSystem.maxSubmissionSubdirectoryEntryNesting):
            modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir)
    filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(normalizedUri)
    # XML document has nsmap root element to replace nsmap as new xmlns entries are required
    if type == Type.INSTANCE:
        # modelXbrl.uriDir = os.path.dirname(normalizedUri)
        Xml = ('<nsmap>'
               '<xbrl xmlns="http://www.xbrl.org/2003/instance"'
               ' xmlns:link="http://www.xbrl.org/2003/linkbase"'
               ' xmlns:xlink="http://www.w3.org/1999/xlink">')
        if schemaRefs:
            for schemaRef in schemaRefs:
                Xml += '<link:schemaRef xlink:type="simple" xlink:href="{0}"/>'.format(schemaRef.replace("\\","/"))
        Xml += '</xbrl></nsmap>'
    elif type == Type.SCHEMA:
        Xml = ('<nsmap><schema xmlns="http://www.w3.org/2001/XMLSchema" /></nsmap>')
    elif type == Type.RSSFEED:
        Xml = '<nsmap><rss version="2.0" /></nsmap>'
    elif type == Type.DTSENTRIES:
        Xml = None
    else:
        type = Type.Unknown
        Xml = '<nsmap/>'
    if Xml:
        import io
        file = io.StringIO(Xml)
        _parser, _parserLookupName, _parserLookupClass = parser(modelXbrl,filepath)
        xmlDocument = etree.parse(file,parser=_parser,base_url=filepath)
        file.close()
    else:
        xmlDocument = None
    if type == Type.RSSFEED:
        from arelle.ModelRssObject import ModelRssObject 
        modelDocument = ModelRssObject(modelXbrl, type, uri, filepath, xmlDocument)
    else:
        modelDocument = ModelDocument(modelXbrl, type, normalizedUri, filepath, xmlDocument)
    if Xml:
        modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement 
        modelDocument.parserLookupName = _parserLookupName
        modelDocument.parserLookupClass = _parserLookupClass
        modelDocument.documentEncoding = "utf-8"
        rootNode = xmlDocument.getroot()
        rootNode.init(modelDocument)
        if xmlDocument:
            for semanticRoot in rootNode.iterchildren():
                if isinstance(semanticRoot, ModelObject):
                    modelDocument.xmlRootElement = semanticRoot
                    break
    if type == Type.INSTANCE:
        modelDocument.instanceDiscover(modelDocument.xmlRootElement)
    elif type == Type.RSSFEED:
        modelDocument.rssFeedDiscover(modelDocument.xmlRootElement)
    elif type == Type.SCHEMA:
        modelDocument.targetNamespace = None
        modelDocument.isQualifiedElementFormDefault = False
        modelDocument.isQualifiedAttributeFormDefault = False
    return modelDocument

Example #6

Show file

File: TDnetLoader.py Project: spinbris/Arelle

def tdNetLoader(modelXbrl, mappedUri, filepath, **kwargs):
    if not (mappedUri.startswith("https://www.release.tdnet.info/inbs/I_")
            and mappedUri.endswith(".html")):
        return None  # not a td net info file

    rssObject = ModelRssObject(modelXbrl, uri=mappedUri, filepath=filepath)

    hasMoreSections = True
    while hasMoreSections:
        # treat tdnet as an RSS feed object
        try:
            tdInfoDoc = html.parse(filepath)
        except (IOError, EnvironmentError):
            return None  # give up, use ordinary loader

        # find date
        date = None
        for elt in tdInfoDoc.iter():
            if elt.tag == "table":
                break  # no date portion, probably wrong document
            if elt.text and datePattern.match(elt.text):
                g = datePattern.match(elt.text).groups()
                date = datetime.date(int(g[0]), int(g[1]), int(g[2]))
                break
        if not date:
            return None  # give up, not a TDnet index document

        urlDir = os.path.dirname(mappedUri)

        # find <table> with <a>Download in it
        for tableElt in tdInfoDoc.iter(tag="table"):
            useThisTableElt = False
            for aElt in tableElt.iterdescendants(tag="a"):
                if "download" in aElt.text.lower():
                    useThisTableElt = True
                    break
            if useThisTableElt:
                cols = {}
                for trElt in tableElt.iter(tag="tr"):
                    col = 0
                    rowData = {}
                    for tdElt in trElt.iter(tag="td"):
                        text = ''.join(t.strip() for t in tdElt.itertext())
                        if tdElt.get("class") == "tableh":  #header
                            type = {
                                "時刻": "time",
                                "コード": "code",
                                "会社名": "companyName",
                                "表題": "title",
                                "XBRL": "zipUrl",
                                "上場取引所": "stockExchange",
                                "更新履歴": "changeLog"
                            }.get(text, None)
                            if type:
                                cols[col] = type
                                cols[type] = col
                        elif col == cols["title"]:
                            rowData["title"] = text
                            rowData["pdfUrl"] = descendantAttr(
                                tdElt, "a", "href")
                        elif col == cols["zipUrl"]:
                            rowData["zipUrl"] = descendantAttr(
                                tdElt, "a", "href")
                        elif col in cols:  # body
                            rowData[cols[col]] = text
                        col += int(tdElt.get("colspan", 1))
                    if rowData:
                        time = rowData.get("time", "")
                        if timePattern.match(time):
                            g = timePattern.match(time).groups()
                            dateTime = datetime.datetime(
                                date.year, date.month, date.day, int(g[0]),
                                int(g[1]))
                        else:
                            dateTime = datetime.datetime.now()
                        filingCode = rowData.get("code")
                        companyName = rowData.get("companyName")
                        stockExchange = rowData.get("stockExchange")
                        title = rowData.get("title")
                        pdfUrl = rowData.get("pdfUrl")
                        if pdfUrl:
                            pdfUrl = urlDir + "/" + pdfUrl
                        zipUrl = rowData.get("zipUrl")
                        if zipUrl:
                            zipUrl = urlDir + "/" + zipUrl
                        changeLog = rowData.get("changeLog")
                        # find instance doc in file
                        instanceUrls = []
                        if zipUrl:
                            try:
                                normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(
                                    zipUrl)
                                filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(
                                    normalizedUri)
                                filesource = FileSource.FileSource(filepath)
                                dir = filesource.dir
                                filesource.close()
                                if dir:
                                    for file in dir:
                                        if "ixbrl" in file or file.endswith(
                                                ".xbrl") or "instance" in file:
                                            instanceUrls.append(zipUrl + "/" +
                                                                file)
                            except:
                                continue  # forget this filing
                        for instanceUrl in instanceUrls:
                            rssObject.rssItems.append(
                                TDnetItem(modelXbrl, date, dateTime,
                                          filingCode, companyName, title,
                                          pdfUrl, instanceUrl, stockExchange))
        # next screen if continuation
        hasMoreSections = False
        for elt in tdInfoDoc.iter(tag="input"):
            if elt.value == "次画面":  # next screen button
                nextLocation = elt.get("onclick")
                if nextLocation and nextLocationPattern.match(nextLocation):
                    hasMoreSections = True
                    nextUrl = urlDir + "/" + nextLocationPattern.match(
                        nextLocation).groups()[0]
                    mappedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(
                        nextUrl)
                    filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(
                        mappedUri)
    return rssObject

Example #7

Show file

def create(modelXbrl, type, uri, schemaRefs=None, isEntry=False):
    normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(
        uri, None)
    if isEntry:
        modelXbrl.uri = normalizedUri
        modelXbrl.entryLoadingUrl = normalizedUri
        modelXbrl.uriDir = os.path.dirname(normalizedUri)
        for i in range(modelXbrl.modelManager.disclosureSystem.
                       maxSubmissionSubdirectoryEntryNesting):
            modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir)
    filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(normalizedUri)
    # XML document has nsmap root element to replace nsmap as new xmlns entries are required
    if type == Type.INSTANCE:
        # modelXbrl.uriDir = os.path.dirname(normalizedUri)
        Xml = ('<nsmap>'
               '<xbrl xmlns="http://www.xbrl.org/2003/instance"'
               ' xmlns:link="http://www.xbrl.org/2003/linkbase"'
               ' xmlns:xlink="http://www.w3.org/1999/xlink">')
        if schemaRefs:
            for schemaRef in schemaRefs:
                Xml += '<link:schemaRef xlink:type="simple" xlink:href="{0}"/>'.format(
                    schemaRef.replace("\\", "/"))
        Xml += '</xbrl></nsmap>'
    elif type == Type.SCHEMA:
        Xml = (
            '<nsmap><schema xmlns="http://www.w3.org/2001/XMLSchema" /></nsmap>'
        )
    elif type == Type.RSSFEED:
        Xml = '<nsmap><rss version="2.0" /></nsmap>'
    elif type == Type.DTSENTRIES:
        Xml = None
    else:
        type = Type.UnknownXML
        Xml = '<nsmap/>'
    if Xml:
        import io
        file = io.StringIO(Xml)
        _parser, _parserLookupName, _parserLookupClass = parser(
            modelXbrl, filepath)
        xmlDocument = etree.parse(file, parser=_parser, base_url=filepath)
        file.close()
    else:
        xmlDocument = None
    if type == Type.RSSFEED:
        from arelle.ModelRssObject import ModelRssObject
        modelDocument = ModelRssObject(modelXbrl, type, uri, filepath,
                                       xmlDocument)
    else:
        modelDocument = ModelDocument(modelXbrl, type, normalizedUri, filepath,
                                      xmlDocument)
    if Xml:
        modelDocument.parser = _parser  # needed for XmlUtil addChild's makeelement
        modelDocument.parserLookupName = _parserLookupName
        modelDocument.parserLookupClass = _parserLookupClass
        modelDocument.documentEncoding = "utf-8"
        rootNode = xmlDocument.getroot()
        rootNode.init(modelDocument)
        if xmlDocument:
            for semanticRoot in rootNode.iterchildren():
                if isinstance(semanticRoot, ModelObject):
                    modelDocument.xmlRootElement = semanticRoot
                    break
    if type == Type.INSTANCE:
        modelDocument.instanceDiscover(modelDocument.xmlRootElement)
    elif type == Type.RSSFEED:
        modelDocument.rssFeedDiscover(modelDocument.xmlRootElement)
    elif type == Type.SCHEMA:
        modelDocument.targetNamespace = None
        modelDocument.isQualifiedElementFormDefault = False
        modelDocument.isQualifiedAttributeFormDefault = False
    modelDocument.definesUTR = False
    return modelDocument

Example #8

Show file

def load(modelXbrl,
         uri,
         base=None,
         referringElement=None,
         isEntry=False,
         isDiscovered=False,
         isIncluded=None,
         namespace=None,
         reloadCache=False):
    if referringElement is None:  # used for error messages
        referringElement = modelXbrl
    normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(
        uri, base)
    if isEntry:
        modelXbrl.entryLoadingUrl = normalizedUri  # for error loggiong during loading
        modelXbrl.uri = normalizedUri
        modelXbrl.uriDir = os.path.dirname(normalizedUri)
        for i in range(modelXbrl.modelManager.disclosureSystem.
                       maxSubmissionSubdirectoryEntryNesting):
            modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir)
    if modelXbrl.modelManager.validateDisclosureSystem and \
       not normalizedUri.startswith(modelXbrl.uriDir) and \
       not modelXbrl.modelManager.disclosureSystem.hrefValid(normalizedUri):
        blocked = modelXbrl.modelManager.disclosureSystem.blockDisallowedReferences
        modelXbrl.error(
            ("EFM.6.22.02", "GFM.1.1.3", "SBR.NL.2.1.0.06"
             if normalizedUri.startswith("http") else "SBR.NL.2.2.0.17"),
            _("Prohibited file for filings %(blockedIndicator)s: %(url)s"),
            modelObject=referringElement,
            url=normalizedUri,
            blockedIndicator=_(" blocked") if blocked else "")
        if blocked:
            return None
    if normalizedUri in modelXbrl.modelManager.disclosureSystem.mappedFiles:
        mappedUri = modelXbrl.modelManager.disclosureSystem.mappedFiles[
            normalizedUri]
    else:  # handle mapped paths
        mappedUri = normalizedUri
        for mapFrom, mapTo in modelXbrl.modelManager.disclosureSystem.mappedPaths:
            if normalizedUri.startswith(mapFrom):
                mappedUri = mapTo + normalizedUri[len(mapFrom):]
                break
    if isEntry:
        modelXbrl.entryLoadingUrl = mappedUri  # for error loggiong during loading
    if modelXbrl.fileSource.isInArchive(mappedUri):
        filepath = mappedUri
    else:
        filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(
            mappedUri, reload=reloadCache)
        if filepath:
            uri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(filepath)
    if filepath is None:  # error such as HTTPerror is already logged
        modelXbrl.error("FileNotLoadable",
                        _("File can not be loaded: %(fileName)s"),
                        modelObject=referringElement,
                        fileName=mappedUri)
        return None

    modelDocument = modelXbrl.urlDocs.get(mappedUri)
    if modelDocument:
        return modelDocument

    # load XML and determine type of model document
    modelXbrl.modelManager.showStatus(_("parsing {0}").format(uri))
    file = None
    try:
        if (modelXbrl.modelManager.validateDisclosureSystem
                and modelXbrl.modelManager.disclosureSystem.validateFileText):
            file, _encoding = ValidateFilingText.checkfile(modelXbrl, filepath)
        else:
            file, _encoding = modelXbrl.fileSource.file(filepath)
        _parser, _parserLookupName, _parserLookupClass = parser(
            modelXbrl, filepath)
        xmlDocument = etree.parse(file, parser=_parser, base_url=filepath)
        file.close()
    except (EnvironmentError,
            KeyError) as err:  # missing zip file raises KeyError
        if file:
            file.close()
        # retry in case of well known schema locations
        if not isIncluded and namespace and namespace in XbrlConst.standardNamespaceSchemaLocations and uri != XbrlConst.standardNamespaceSchemaLocations[
                namespace]:
            return load(modelXbrl,
                        XbrlConst.standardNamespaceSchemaLocations[namespace],
                        base, referringElement, isEntry, isDiscovered,
                        isIncluded, namespace, reloadCache)
        modelXbrl.error("IOerror",
                        _("%(fileName)s: file error: %(error)s"),
                        modelObject=referringElement,
                        fileName=os.path.basename(uri),
                        error=str(err))
        return None
    except (
            etree.LxmlError, ValueError
    ) as err:  # ValueError raised on bad format of qnames, xmlns'es, or parameters
        if file:
            file.close()
        if not isEntry and str(
                err) == "Start tag expected, '<' not found, line 1, column 1":
            return ModelDocument(modelXbrl, Type.UnknownNonXML, mappedUri,
                                 filepath, None)
        else:
            modelXbrl.error(
                "xmlSchema:syntax",
                _("%(error)s, %(fileName)s, %(sourceAction)s source element"),
                modelObject=referringElement,
                fileName=os.path.basename(uri),
                error=str(err),
                sourceAction=("including" if isIncluded else "importing"))
            return None

    # identify document
    #modelXbrl.modelManager.addToLog("discovery: {0}".format(
    #            os.path.basename(uri)))
    modelXbrl.modelManager.showStatus(_("loading {0}").format(uri))
    modelDocument = None

    rootNode = xmlDocument.getroot()
    if rootNode is not None:
        ln = rootNode.localName
        ns = rootNode.namespaceURI

        # type classification
        if ns == XbrlConst.xsd and ln == "schema":
            type = Type.SCHEMA
        elif ns == XbrlConst.link:
            if ln == "linkbase":
                type = Type.LINKBASE
            elif ln == "xbrl":
                type = Type.INSTANCE
        elif ns == XbrlConst.xbrli:
            if ln == "xbrl":
                type = Type.INSTANCE
        elif ns == XbrlConst.xhtml and \
             (ln == "html" or ln == "xhtml"):
            type = Type.UnknownXML
            if XbrlConst.ixbrl in rootNode.nsmap.values():
                type = Type.INLINEXBRL
        elif ln == "report" and ns == XbrlConst.ver:
            type = Type.VERSIONINGREPORT
        elif ln == "testcases" or ln == "documentation":
            type = Type.TESTCASESINDEX
        elif ln == "testcase":
            type = Type.TESTCASE
        elif ln == "registry" and ns == XbrlConst.registry:
            type = Type.REGISTRY
        elif ln == "rss":
            type = Type.RSSFEED
        elif ln == "ptvl":
            type = Type.ARCSINFOSET
        elif ln == "facts":
            type = Type.FACTDIMSINFOSET
        else:
            type = Type.UnknownXML
            nestedInline = None
            for htmlElt in rootNode.iter(
                    tag="{http://www.w3.org/1999/xhtml}html"):
                nestedInline = htmlElt
                break
            if nestedInline is None:
                for htmlElt in rootNode.iter(
                        tag="{http://www.w3.org/1999/xhtml}xhtml"):
                    nestedInline = htmlElt
                    break
            if nestedInline is not None:
                if XbrlConst.ixbrl in nestedInline.nsmap.values():
                    type = Type.INLINEXBRL
                    rootNode = nestedInline

        #create modelDocument object or subtype as identified
        if type == Type.VERSIONINGREPORT:
            from arelle.ModelVersReport import ModelVersReport
            modelDocument = ModelVersReport(modelXbrl, type, mappedUri,
                                            filepath, xmlDocument)
        elif type == Type.RSSFEED:
            from arelle.ModelRssObject import ModelRssObject
            modelDocument = ModelRssObject(modelXbrl, type, mappedUri,
                                           filepath, xmlDocument)
        else:
            modelDocument = ModelDocument(modelXbrl, type, mappedUri, filepath,
                                          xmlDocument)
        rootNode.init(modelDocument)
        modelDocument.parser = _parser  # needed for XmlUtil addChild's makeelement
        modelDocument.parserLookupName = _parserLookupName
        modelDocument.parserLookupClass = _parserLookupClass
        modelDocument.xmlRootElement = rootNode
        modelDocument.schemaLocationElements.add(rootNode)
        modelDocument.documentEncoding = _encoding

        if isEntry or isDiscovered:
            modelDocument.inDTS = True

        # discovery (parsing)
        if type == Type.SCHEMA:
            modelDocument.schemaDiscover(rootNode, isIncluded, namespace)
        elif type == Type.LINKBASE:
            modelDocument.linkbaseDiscover(rootNode)
        elif type == Type.INSTANCE:
            modelDocument.instanceDiscover(rootNode)
        elif type == Type.INLINEXBRL:
            modelDocument.inlineXbrlDiscover(rootNode)
        elif type == Type.VERSIONINGREPORT:
            modelDocument.versioningReportDiscover(rootNode)
        elif type == Type.TESTCASESINDEX:
            modelDocument.testcasesIndexDiscover(xmlDocument)
        elif type == Type.TESTCASE:
            modelDocument.testcaseDiscover(rootNode)
        elif type == Type.REGISTRY:
            modelDocument.registryDiscover(rootNode)
        elif type == Type.VERSIONINGREPORT:
            modelDocument.versioningReportDiscover(rootNode)
        elif type == Type.RSSFEED:
            modelDocument.rssFeedDiscover(rootNode)
    return modelDocument

Example #9

Show file

File: ModelDocument.py Project: gplehmann/Arelle

def load(modelXbrl,
         uri,
         base=None,
         isEntry=False,
         isIncluded=None,
         namespace=None,
         reloadCache=False):
    normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(
        uri, base)
    if isEntry:
        modelXbrl.uri = normalizedUri
        modelXbrl.uriDir = os.path.dirname(normalizedUri)
        for i in range(modelXbrl.modelManager.disclosureSystem.
                       maxSubmissionSubdirectoryEntryNesting):
            modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir)
    if modelXbrl.modelManager.validateDisclosureSystem and \
       not normalizedUri.startswith(modelXbrl.uriDir) and \
       not modelXbrl.modelManager.disclosureSystem.hrefValid(normalizedUri):
        blocked = modelXbrl.modelManager.disclosureSystem.blockDisallowedReferences
        modelXbrl.error(
            "Prohibited file for filings{1}: {0}".format(
                normalizedUri,
                _(" blocked") if blocked else ""), "err", "EFM.6.22.02",
            "GFM.1.1.3", "SBR.NL.2.1.0.06")
        if blocked:
            return None
    if normalizedUri in modelXbrl.modelManager.disclosureSystem.mappedFiles:
        mappedUri = modelXbrl.modelManager.disclosureSystem.mappedFiles[
            normalizedUri]
    else:  # handle mapped paths
        mappedUri = normalizedUri
        for mapFrom, mapTo in modelXbrl.modelManager.disclosureSystem.mappedPaths:
            if normalizedUri.startswith(mapFrom):
                mappedUri = mapTo + normalizedUri[len(mapFrom):]
                break
    if modelXbrl.fileSource.isInArchive(mappedUri):
        filepath = mappedUri
    else:
        filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(
            mappedUri, reload=reloadCache)
        if filepath:
            uri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(filepath)
    if filepath is None:  # error such as HTTPerror is already logged
        modelXbrl.error("File can not be loaded: {0}".format(mappedUri), "err",
                        "FileNotLoadable")
        type = Type.Unknown
        return None

    modelDocument = modelXbrl.urlDocs.get(mappedUri)
    if modelDocument:
        return modelDocument

    # load XML and determine type of model document
    modelXbrl.modelManager.showStatus(_("parsing {0}").format(uri))
    file = None
    try:
        if modelXbrl.modelManager.validateDisclosureSystem:
            file = ValidateFilingText.checkfile(modelXbrl, filepath)
        else:
            file = modelXbrl.fileSource.file(filepath)
        xmlDocument = xml.dom.minidom.parse(file)
        file.close()
    except EnvironmentError as err:
        modelXbrl.error(
            "{0}: file error: {1}".format(os.path.basename(uri), err), "err",
            "IOerror")
        type = Type.Unknown
        if file:
            file.close()
        return None
    except (
            xml.parsers.expat.ExpatError, xml.dom.DOMException, ValueError
    ) as err:  # ValueError raised on bad format of qnames, xmlns'es, or parameters
        modelXbrl.error(
            "{0}: import error: {1}".format(os.path.basename(uri), err), "err",
            "XMLsyntax")
        type = Type.Unknown
        if file:
            file.close()
        return None

    # identify document
    #modelXbrl.modelManager.addToLog("discovery: {0}".format(
    #            os.path.basename(uri)))
    modelXbrl.modelManager.showStatus(_("loading {0}").format(uri))
    modelDocument = None

    for rootNode in xmlDocument.childNodes:
        if rootNode.nodeType == 1:  #element
            ln = rootNode.localName
            ns = rootNode.namespaceURI

            # type classification
            if ns == XbrlConst.xsd and ln == "schema":
                type = Type.SCHEMA
            elif ns == XbrlConst.link:
                if ln == "linkbase":
                    type = Type.LINKBASE
                elif ln == "xbrl":
                    type = Type.INSTANCE
            elif ns == XbrlConst.xbrli:
                if ln == "xbrl":
                    type = Type.INSTANCE
            elif ns == XbrlConst.xhtml and \
                 ln == "html" or ln == "xhtml":
                type = Type.Unknown
                for i in range(len(rootNode.attributes)):
                    if rootNode.attributes.item(i).value == XbrlConst.ixbrl:
                        type = Type.INLINEXBRL
                        break
                XmlUtil.markIdAttributes(
                    rootNode)  # required for minidom searchability
            elif ln == "report" and ns == XbrlConst.ver:
                type = Type.VERSIONINGREPORT
            elif ln == "testcases" or ln == "documentation":
                type = Type.TESTCASESINDEX
            elif ln == "testcase":
                type = Type.TESTCASE
            elif ln == "registry" and ns == XbrlConst.registry:
                type = Type.REGISTRY
            elif ln == "rss":
                type = Type.RSSFEED
            else:
                type = Type.Unknown
                nestedInline = XmlUtil.descendant(rootNode, XbrlConst.xhtml,
                                                  ("html", "xhtml"))
                if nestedInline:
                    for i in range(len(nestedInline.attributes)):
                        if nestedInline.attributes.item(
                                i).value == XbrlConst.ixbrl:
                            type = Type.INLINEXBRL
                            rootNode = nestedInline
                            break
                XmlUtil.markIdAttributes(
                    rootNode)  # required for minidom searchability

            #create modelDocument object or subtype as identified
            if type == Type.VERSIONINGREPORT:
                from arelle.ModelVersReport import ModelVersReport
                modelDocument = ModelVersReport(modelXbrl, type, mappedUri,
                                                filepath, xmlDocument)
            elif type == Type.RSSFEED:
                from arelle.ModelRssObject import ModelRssObject
                modelDocument = ModelRssObject(modelXbrl, type, mappedUri,
                                               filepath, xmlDocument)
            else:
                modelDocument = ModelDocument(modelXbrl, type, mappedUri,
                                              filepath, xmlDocument)
            modelDocument.xmlRootElement = rootNode
            modelDocument.schemaLocationElements.add(rootNode)

            if isEntry:
                modelDocument.inDTS = True

            # discovery (parsing)
            if type == Type.SCHEMA:
                modelDocument.schemaDiscover(rootNode, isIncluded, namespace)
            elif type == Type.LINKBASE:
                modelDocument.linkbaseDiscover(rootNode)
            elif type == Type.INSTANCE:
                modelDocument.instanceDiscover(rootNode)
            elif type == Type.INLINEXBRL:
                modelDocument.inlineXbrlDiscover(rootNode)
            elif type == Type.VERSIONINGREPORT:
                modelDocument.versioningReportDiscover(rootNode)
            elif type == Type.TESTCASESINDEX:
                modelDocument.testcasesIndexDiscover(xmlDocument)
            elif type == Type.TESTCASE:
                modelDocument.testcaseDiscover(rootNode)
            elif type == Type.REGISTRY:
                modelDocument.registryDiscover(rootNode)
            elif type == Type.VERSIONINGREPORT:
                modelDocument.versioningReportDiscover(rootNode)
            elif type == Type.RSSFEED:
                modelDocument.rssFeedDiscover(rootNode)
            break
    return modelDocument