def create(modelXbrl, type, uri, schemaRefs=None, isEntry=False): normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl( uri, None) if isEntry: modelXbrl.uri = normalizedUri modelXbrl.uriDir = os.path.dirname(normalizedUri) for i in range(modelXbrl.modelManager.disclosureSystem. maxSubmissionSubdirectoryEntryNesting): modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir) filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(normalizedUri) if type == Type.INSTANCE: # modelXbrl.uriDir = os.path.dirname(normalizedUri) Xml = ('<?xml version="1.0" encoding="UTF-8"?>' '<xbrl xmlns="http://www.xbrl.org/2003/instance"' ' xmlns:link="http://www.xbrl.org/2003/linkbase"' ' xmlns:xlink="http://www.w3.org/1999/xlink">') if schemaRefs: for schemaRef in schemaRefs: Xml += '<link:schemaRef xlink:type="simple" xlink:href="{0}"/>'.format( schemaRef.replace("\\", "/")) Xml += '</xbrl>' elif type == Type.SCHEMA: Xml = ('<?xml version="1.0" encoding="UTF-8"?>' '<schema xmlns="http://www.w3.org/2001/XMLSchema" />') elif type == Type.RSSFEED: Xml = '<?xml version="1.0" encoding="UTF-8"?><rss version="2.0" />' elif type == Type.DTSENTRIES: Xml = None else: type = Type.Unknown Xml = '<?xml version="1.0" encoding="UTF-8"?>' if Xml: xmlDocument = xml.dom.minidom.parseString(Xml) else: xmlDocument = None if type == Type.RSSFEED: from arelle.ModelRssObject import ModelRssObject modelDocument = ModelRssObject(modelXbrl, type, uri, filepath, xmlDocument) else: modelDocument = ModelDocument(modelXbrl, type, normalizedUri, filepath, xmlDocument) if xmlDocument: modelDocument.xmlRootElement = modelDocument.xmlDocument.documentElement if type == Type.INSTANCE: modelDocument.instanceDiscover(modelDocument.xmlRootElement) elif type == Type.RSSFEED: modelDocument.rssFeedDiscover(modelDocument.xmlRootElement) elif type == Type.SCHEMA: modelDocument.targetNamespace = None return modelDocument
def companiesHouseLoader(modelXbrl, mappedUri, filepath, **kwargs): if not (mappedUri.startswith("http://download.companieshouse.gov.uk/") and mappedUri.endswith(".zip")): return None # not a companies houst zip file rssObject = ModelRssObject(modelXbrl, uri=mappedUri, filepath=filepath) # find <table> with <a>Download in it for instanceFile in modelXbrl.fileSource.dir: rssObject.rssItems.append( CompaniesHouseItem(modelXbrl, instanceFile, mappedUri + '/' + instanceFile)) return rssObject
def create(modelXbrl, type, uri, schemaRefs=None, isEntry=False): normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(uri, None) if isEntry: modelXbrl.uri = normalizedUri modelXbrl.uriDir = os.path.dirname(normalizedUri) for i in range(modelXbrl.modelManager.disclosureSystem.maxSubmissionSubdirectoryEntryNesting): modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir) filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(normalizedUri) if type == Type.INSTANCE: # modelXbrl.uriDir = os.path.dirname(normalizedUri) Xml = ('<?xml version="1.0" encoding="UTF-8"?>' '<xbrl xmlns="http://www.xbrl.org/2003/instance"' ' xmlns:link="http://www.xbrl.org/2003/linkbase"' ' xmlns:xlink="http://www.w3.org/1999/xlink">') if schemaRefs: for schemaRef in schemaRefs: Xml += '<link:schemaRef xlink:type="simple" xlink:href="{0}"/>'.format(schemaRef.replace("\\","/")) Xml += '</xbrl>' elif type == Type.SCHEMA: Xml = ('<?xml version="1.0" encoding="UTF-8"?>' '<schema xmlns="http://www.w3.org/2001/XMLSchema" />') elif type == Type.RSSFEED: Xml = '<?xml version="1.0" encoding="UTF-8"?><rss version="2.0" />' elif type == Type.DTSENTRIES: Xml = None else: type = Type.Unknown Xml = '<?xml version="1.0" encoding="UTF-8"?>' if Xml: xmlDocument = xml.dom.minidom.parseString(Xml) else: xmlDocument = None if type == Type.RSSFEED: from arelle.ModelRssObject import ModelRssObject modelDocument = ModelRssObject(modelXbrl, type, uri, filepath, xmlDocument) else: modelDocument = ModelDocument(modelXbrl, type, normalizedUri, filepath, xmlDocument) if xmlDocument: modelDocument.xmlRootElement = modelDocument.xmlDocument.documentElement if type == Type.INSTANCE: modelDocument.instanceDiscover(modelDocument.xmlRootElement) elif type == Type.RSSFEED: modelDocument.rssFeedDiscover(modelDocument.xmlRootElement) elif type == Type.SCHEMA: modelDocument.targetNamespace = None return modelDocument
def secCorrespondenceLoader(modelXbrl, mappedUri, filepath, **kwargs): if (mappedUri.startswith("http://www.sec.gov/Archives/edgar/Feed/") and mappedUri.endswith(".nc.tar.gz")): # daily feed loader (the rss object) rssObject = ModelRssObject(modelXbrl, uri=mappedUri, filepath=filepath) # location for expanded feed files tempdir = os.path.join(modelXbrl.modelManager.cntlr.userAppDir, "tmp", "edgarFeed") # remove prior files if os.path.exists(tempdir): os.system("rm -fr {}".format( tempdir)) # rmtree does not work with this many files! os.makedirs(tempdir, exist_ok=True) # untar to /temp/arelle/edgarFeed for faster operation startedAt = time.time() modelXbrl.fileSource.open() modelXbrl.fileSource.fs.extractall(tempdir) modelXbrl.info("info", "untar edgarFeed temp files in %.2f sec" % (time.time() - startedAt), modelObject=modelXbrl) # find <table> with <a>Download in it for instanceFile in sorted( os.listdir(tempdir)): # modelXbrl.fileSource.dir: if instanceFile != ".": rssObject.rssItems.append( SECCorrespondenceItem(modelXbrl, instanceFile, mappedUri + '/' + instanceFile)) return rssObject elif "rssItem" in kwargs and ".nc.tar.gz/" in mappedUri: rssItem = kwargs["rssItem"] text = None # no instance information # parse document try: startedAt = time.time() file, encoding = modelXbrl.fileSource.file( os.path.join(modelXbrl.modelManager.cntlr.userAppDir, "tmp", "edgarFeed", os.path.basename(rssItem.url))) s = file.read() file.close() for match in re.finditer(r"[<]([^>]+)[>]([^<\n\r]*)", s, re.MULTILINE): tag = match.group(1).lower() v = match.group(2) if tag == "accession-number": rssItem.accessionNumber = v elif tag == "form-type": rssItem.formType = v if v != "UPLOAD": rssItem.doNotProcessRSSitem = True # skip this RSS item in validate loop, don't load DB elif tag == "filing-date": try: rssItem.filingDate = datetime.date( int(v[0:4]), int(v[4:6]), int(v[6:8])) except (ValueError, IndexError): pass elif tag == "conformed-name": rssItem.companyName = v elif tag == "cik": rssItem.cikNumber = v elif tag == "assigned-sic": rssItem.assignedSic = v elif tag == "fiscal-year-end": try: rssItem.fiscalYearEnd = v[0:2] + '-' + v[2:4] except (IndexError, TypeError): pass match = re.search("<PDF>(.*)</PDF>", s, re.DOTALL) if match: import uu, io pageText = [] uuIn = io.BytesIO(match.group(1).encode(encoding)) uuOut = io.BytesIO() uu.decode(uuIn, uuOut) from pyPdf import PdfFileReader uuOut.seek(0, 0) try: pdfIn = PdfFileReader(uuOut) for pageNum in range(pdfIn.getNumPages()): pageText.append(pdfIn.getPage(pageNum).extractText()) except: # do we want a warning here that the PDF can't be read with this library? pass uuIn.close() uuOut.close() text = ''.join(pageText) else: match = re.search("<TEXT>(.*)</TEXT>", s, re.DOTALL) if match: text = match.group(1) except (IOError, EnvironmentError): pass # give up, no instance # daily rss item loader, provide unpopulated instance document to be filled in by RssItem.Xbrl.Loaded if not text: rssItem.doNotProcessRSSitem = True # skip this RSS item in validate loop, don't load DB instDoc = ModelDocument.create( modelXbrl, ModelDocument.Type.UnknownXML, rssItem.url, isEntry=True, base='', # block pathname from becomming absolute initialXml='<DummyXml/>') else: instDoc = ModelDocument.create( modelXbrl, ModelDocument.Type.INSTANCE, rssItem.url, isEntry=True, base='', # block pathname from becomming absolute initialXml=''' <xbrli:xbrl xmlns:doc="http://arelle.org/doc/2014-01-31" xmlns:link="http://www.xbrl.org/2003/linkbase" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xbrli="http://www.xbrl.org/2003/instance"> <link:schemaRef xlink:type="simple" xlink:href="http://arelle.org/2014/doc-2014-01-31.xsd"/> <xbrli:context id="pubDate"> <xbrli:entity> <xbrli:identifier scheme="http://www.sec.gov/CIK">{cik}</xbrli:identifier> </xbrli:entity> <xbrli:period> <xbrli:instant>{pubDate}</xbrli:instant> </xbrli:period> </xbrli:context> <doc:Correspondence contextRef="pubDate">{text}</doc:Correspondence> </xbrli:xbrl> '''.format(cik=rssItem.cikNumber, pubDate=rssItem.pubDate.date(), text=text.strip().replace("&", "&").replace("<", "<"))) #modelXbrl.info("info", "loaded in %.2f sec" % (time.time() - startedAt), # modelDocument=instDoc) return instDoc return None
def create(modelXbrl, type, uri, schemaRefs=None, isEntry=False): normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(uri, None) if isEntry: modelXbrl.uri = normalizedUri modelXbrl.entryLoadingUrl = normalizedUri modelXbrl.uriDir = os.path.dirname(normalizedUri) for i in range(modelXbrl.modelManager.disclosureSystem.maxSubmissionSubdirectoryEntryNesting): modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir) filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(normalizedUri) # XML document has nsmap root element to replace nsmap as new xmlns entries are required if type == Type.INSTANCE: # modelXbrl.uriDir = os.path.dirname(normalizedUri) Xml = ('<nsmap>' '<xbrl xmlns="http://www.xbrl.org/2003/instance"' ' xmlns:link="http://www.xbrl.org/2003/linkbase"' ' xmlns:xlink="http://www.w3.org/1999/xlink">') if schemaRefs: for schemaRef in schemaRefs: Xml += '<link:schemaRef xlink:type="simple" xlink:href="{0}"/>'.format(schemaRef.replace("\\","/")) Xml += '</xbrl></nsmap>' elif type == Type.SCHEMA: Xml = ('<nsmap><schema xmlns="http://www.w3.org/2001/XMLSchema" /></nsmap>') elif type == Type.RSSFEED: Xml = '<nsmap><rss version="2.0" /></nsmap>' elif type == Type.DTSENTRIES: Xml = None else: type = Type.Unknown Xml = '<nsmap/>' if Xml: import io file = io.StringIO(Xml) _parser, _parserLookupName, _parserLookupClass = parser(modelXbrl,filepath) xmlDocument = etree.parse(file,parser=_parser,base_url=filepath) file.close() else: xmlDocument = None if type == Type.RSSFEED: from arelle.ModelRssObject import ModelRssObject modelDocument = ModelRssObject(modelXbrl, type, uri, filepath, xmlDocument) else: modelDocument = ModelDocument(modelXbrl, type, normalizedUri, filepath, xmlDocument) if Xml: modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement modelDocument.parserLookupName = _parserLookupName modelDocument.parserLookupClass = _parserLookupClass modelDocument.documentEncoding = "utf-8" rootNode = xmlDocument.getroot() rootNode.init(modelDocument) if xmlDocument: for semanticRoot in rootNode.iterchildren(): if isinstance(semanticRoot, ModelObject): modelDocument.xmlRootElement = semanticRoot break if type == Type.INSTANCE: modelDocument.instanceDiscover(modelDocument.xmlRootElement) elif type == Type.RSSFEED: modelDocument.rssFeedDiscover(modelDocument.xmlRootElement) elif type == Type.SCHEMA: modelDocument.targetNamespace = None modelDocument.isQualifiedElementFormDefault = False modelDocument.isQualifiedAttributeFormDefault = False return modelDocument
def tdNetLoader(modelXbrl, mappedUri, filepath, **kwargs): if not (mappedUri.startswith("https://www.release.tdnet.info/inbs/I_") and mappedUri.endswith(".html")): return None # not a td net info file rssObject = ModelRssObject(modelXbrl, uri=mappedUri, filepath=filepath) hasMoreSections = True while hasMoreSections: # treat tdnet as an RSS feed object try: tdInfoDoc = html.parse(filepath) except (IOError, EnvironmentError): return None # give up, use ordinary loader # find date date = None for elt in tdInfoDoc.iter(): if elt.tag == "table": break # no date portion, probably wrong document if elt.text and datePattern.match(elt.text): g = datePattern.match(elt.text).groups() date = datetime.date(int(g[0]), int(g[1]), int(g[2])) break if not date: return None # give up, not a TDnet index document urlDir = os.path.dirname(mappedUri) # find <table> with <a>Download in it for tableElt in tdInfoDoc.iter(tag="table"): useThisTableElt = False for aElt in tableElt.iterdescendants(tag="a"): if "download" in aElt.text.lower(): useThisTableElt = True break if useThisTableElt: cols = {} for trElt in tableElt.iter(tag="tr"): col = 0 rowData = {} for tdElt in trElt.iter(tag="td"): text = ''.join(t.strip() for t in tdElt.itertext()) if tdElt.get("class") == "tableh": #header type = { "時刻": "time", "コード": "code", "会社名": "companyName", "表題": "title", "XBRL": "zipUrl", "上場取引所": "stockExchange", "更新履歴": "changeLog" }.get(text, None) if type: cols[col] = type cols[type] = col elif col == cols["title"]: rowData["title"] = text rowData["pdfUrl"] = descendantAttr( tdElt, "a", "href") elif col == cols["zipUrl"]: rowData["zipUrl"] = descendantAttr( tdElt, "a", "href") elif col in cols: # body rowData[cols[col]] = text col += int(tdElt.get("colspan", 1)) if rowData: time = rowData.get("time", "") if timePattern.match(time): g = timePattern.match(time).groups() dateTime = datetime.datetime( date.year, date.month, date.day, int(g[0]), int(g[1])) else: dateTime = datetime.datetime.now() filingCode = rowData.get("code") companyName = rowData.get("companyName") stockExchange = rowData.get("stockExchange") title = rowData.get("title") pdfUrl = rowData.get("pdfUrl") if pdfUrl: pdfUrl = urlDir + "/" + pdfUrl zipUrl = rowData.get("zipUrl") if zipUrl: zipUrl = urlDir + "/" + zipUrl changeLog = rowData.get("changeLog") # find instance doc in file instanceUrls = [] if zipUrl: try: normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl( zipUrl) filepath = modelXbrl.modelManager.cntlr.webCache.getfilename( normalizedUri) filesource = FileSource.FileSource(filepath) dir = filesource.dir filesource.close() if dir: for file in dir: if "ixbrl" in file or file.endswith( ".xbrl") or "instance" in file: instanceUrls.append(zipUrl + "/" + file) except: continue # forget this filing for instanceUrl in instanceUrls: rssObject.rssItems.append( TDnetItem(modelXbrl, date, dateTime, filingCode, companyName, title, pdfUrl, instanceUrl, stockExchange)) # next screen if continuation hasMoreSections = False for elt in tdInfoDoc.iter(tag="input"): if elt.value == "次画面": # next screen button nextLocation = elt.get("onclick") if nextLocation and nextLocationPattern.match(nextLocation): hasMoreSections = True nextUrl = urlDir + "/" + nextLocationPattern.match( nextLocation).groups()[0] mappedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl( nextUrl) filepath = modelXbrl.modelManager.cntlr.webCache.getfilename( mappedUri) return rssObject
def create(modelXbrl, type, uri, schemaRefs=None, isEntry=False): normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl( uri, None) if isEntry: modelXbrl.uri = normalizedUri modelXbrl.entryLoadingUrl = normalizedUri modelXbrl.uriDir = os.path.dirname(normalizedUri) for i in range(modelXbrl.modelManager.disclosureSystem. maxSubmissionSubdirectoryEntryNesting): modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir) filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(normalizedUri) # XML document has nsmap root element to replace nsmap as new xmlns entries are required if type == Type.INSTANCE: # modelXbrl.uriDir = os.path.dirname(normalizedUri) Xml = ('<nsmap>' '<xbrl xmlns="http://www.xbrl.org/2003/instance"' ' xmlns:link="http://www.xbrl.org/2003/linkbase"' ' xmlns:xlink="http://www.w3.org/1999/xlink">') if schemaRefs: for schemaRef in schemaRefs: Xml += '<link:schemaRef xlink:type="simple" xlink:href="{0}"/>'.format( schemaRef.replace("\\", "/")) Xml += '</xbrl></nsmap>' elif type == Type.SCHEMA: Xml = ( '<nsmap><schema xmlns="http://www.w3.org/2001/XMLSchema" /></nsmap>' ) elif type == Type.RSSFEED: Xml = '<nsmap><rss version="2.0" /></nsmap>' elif type == Type.DTSENTRIES: Xml = None else: type = Type.UnknownXML Xml = '<nsmap/>' if Xml: import io file = io.StringIO(Xml) _parser, _parserLookupName, _parserLookupClass = parser( modelXbrl, filepath) xmlDocument = etree.parse(file, parser=_parser, base_url=filepath) file.close() else: xmlDocument = None if type == Type.RSSFEED: from arelle.ModelRssObject import ModelRssObject modelDocument = ModelRssObject(modelXbrl, type, uri, filepath, xmlDocument) else: modelDocument = ModelDocument(modelXbrl, type, normalizedUri, filepath, xmlDocument) if Xml: modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement modelDocument.parserLookupName = _parserLookupName modelDocument.parserLookupClass = _parserLookupClass modelDocument.documentEncoding = "utf-8" rootNode = xmlDocument.getroot() rootNode.init(modelDocument) if xmlDocument: for semanticRoot in rootNode.iterchildren(): if isinstance(semanticRoot, ModelObject): modelDocument.xmlRootElement = semanticRoot break if type == Type.INSTANCE: modelDocument.instanceDiscover(modelDocument.xmlRootElement) elif type == Type.RSSFEED: modelDocument.rssFeedDiscover(modelDocument.xmlRootElement) elif type == Type.SCHEMA: modelDocument.targetNamespace = None modelDocument.isQualifiedElementFormDefault = False modelDocument.isQualifiedAttributeFormDefault = False modelDocument.definesUTR = False return modelDocument
def load(modelXbrl, uri, base=None, referringElement=None, isEntry=False, isDiscovered=False, isIncluded=None, namespace=None, reloadCache=False): if referringElement is None: # used for error messages referringElement = modelXbrl normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl( uri, base) if isEntry: modelXbrl.entryLoadingUrl = normalizedUri # for error loggiong during loading modelXbrl.uri = normalizedUri modelXbrl.uriDir = os.path.dirname(normalizedUri) for i in range(modelXbrl.modelManager.disclosureSystem. maxSubmissionSubdirectoryEntryNesting): modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir) if modelXbrl.modelManager.validateDisclosureSystem and \ not normalizedUri.startswith(modelXbrl.uriDir) and \ not modelXbrl.modelManager.disclosureSystem.hrefValid(normalizedUri): blocked = modelXbrl.modelManager.disclosureSystem.blockDisallowedReferences modelXbrl.error( ("EFM.6.22.02", "GFM.1.1.3", "SBR.NL.2.1.0.06" if normalizedUri.startswith("http") else "SBR.NL.2.2.0.17"), _("Prohibited file for filings %(blockedIndicator)s: %(url)s"), modelObject=referringElement, url=normalizedUri, blockedIndicator=_(" blocked") if blocked else "") if blocked: return None if normalizedUri in modelXbrl.modelManager.disclosureSystem.mappedFiles: mappedUri = modelXbrl.modelManager.disclosureSystem.mappedFiles[ normalizedUri] else: # handle mapped paths mappedUri = normalizedUri for mapFrom, mapTo in modelXbrl.modelManager.disclosureSystem.mappedPaths: if normalizedUri.startswith(mapFrom): mappedUri = mapTo + normalizedUri[len(mapFrom):] break if isEntry: modelXbrl.entryLoadingUrl = mappedUri # for error loggiong during loading if modelXbrl.fileSource.isInArchive(mappedUri): filepath = mappedUri else: filepath = modelXbrl.modelManager.cntlr.webCache.getfilename( mappedUri, reload=reloadCache) if filepath: uri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(filepath) if filepath is None: # error such as HTTPerror is already logged modelXbrl.error("FileNotLoadable", _("File can not be loaded: %(fileName)s"), modelObject=referringElement, fileName=mappedUri) return None modelDocument = modelXbrl.urlDocs.get(mappedUri) if modelDocument: return modelDocument # load XML and determine type of model document modelXbrl.modelManager.showStatus(_("parsing {0}").format(uri)) file = None try: if (modelXbrl.modelManager.validateDisclosureSystem and modelXbrl.modelManager.disclosureSystem.validateFileText): file, _encoding = ValidateFilingText.checkfile(modelXbrl, filepath) else: file, _encoding = modelXbrl.fileSource.file(filepath) _parser, _parserLookupName, _parserLookupClass = parser( modelXbrl, filepath) xmlDocument = etree.parse(file, parser=_parser, base_url=filepath) file.close() except (EnvironmentError, KeyError) as err: # missing zip file raises KeyError if file: file.close() # retry in case of well known schema locations if not isIncluded and namespace and namespace in XbrlConst.standardNamespaceSchemaLocations and uri != XbrlConst.standardNamespaceSchemaLocations[ namespace]: return load(modelXbrl, XbrlConst.standardNamespaceSchemaLocations[namespace], base, referringElement, isEntry, isDiscovered, isIncluded, namespace, reloadCache) modelXbrl.error("IOerror", _("%(fileName)s: file error: %(error)s"), modelObject=referringElement, fileName=os.path.basename(uri), error=str(err)) return None except ( etree.LxmlError, ValueError ) as err: # ValueError raised on bad format of qnames, xmlns'es, or parameters if file: file.close() if not isEntry and str( err) == "Start tag expected, '<' not found, line 1, column 1": return ModelDocument(modelXbrl, Type.UnknownNonXML, mappedUri, filepath, None) else: modelXbrl.error( "xmlSchema:syntax", _("%(error)s, %(fileName)s, %(sourceAction)s source element"), modelObject=referringElement, fileName=os.path.basename(uri), error=str(err), sourceAction=("including" if isIncluded else "importing")) return None # identify document #modelXbrl.modelManager.addToLog("discovery: {0}".format( # os.path.basename(uri))) modelXbrl.modelManager.showStatus(_("loading {0}").format(uri)) modelDocument = None rootNode = xmlDocument.getroot() if rootNode is not None: ln = rootNode.localName ns = rootNode.namespaceURI # type classification if ns == XbrlConst.xsd and ln == "schema": type = Type.SCHEMA elif ns == XbrlConst.link: if ln == "linkbase": type = Type.LINKBASE elif ln == "xbrl": type = Type.INSTANCE elif ns == XbrlConst.xbrli: if ln == "xbrl": type = Type.INSTANCE elif ns == XbrlConst.xhtml and \ (ln == "html" or ln == "xhtml"): type = Type.UnknownXML if XbrlConst.ixbrl in rootNode.nsmap.values(): type = Type.INLINEXBRL elif ln == "report" and ns == XbrlConst.ver: type = Type.VERSIONINGREPORT elif ln == "testcases" or ln == "documentation": type = Type.TESTCASESINDEX elif ln == "testcase": type = Type.TESTCASE elif ln == "registry" and ns == XbrlConst.registry: type = Type.REGISTRY elif ln == "rss": type = Type.RSSFEED elif ln == "ptvl": type = Type.ARCSINFOSET elif ln == "facts": type = Type.FACTDIMSINFOSET else: type = Type.UnknownXML nestedInline = None for htmlElt in rootNode.iter( tag="{http://www.w3.org/1999/xhtml}html"): nestedInline = htmlElt break if nestedInline is None: for htmlElt in rootNode.iter( tag="{http://www.w3.org/1999/xhtml}xhtml"): nestedInline = htmlElt break if nestedInline is not None: if XbrlConst.ixbrl in nestedInline.nsmap.values(): type = Type.INLINEXBRL rootNode = nestedInline #create modelDocument object or subtype as identified if type == Type.VERSIONINGREPORT: from arelle.ModelVersReport import ModelVersReport modelDocument = ModelVersReport(modelXbrl, type, mappedUri, filepath, xmlDocument) elif type == Type.RSSFEED: from arelle.ModelRssObject import ModelRssObject modelDocument = ModelRssObject(modelXbrl, type, mappedUri, filepath, xmlDocument) else: modelDocument = ModelDocument(modelXbrl, type, mappedUri, filepath, xmlDocument) rootNode.init(modelDocument) modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement modelDocument.parserLookupName = _parserLookupName modelDocument.parserLookupClass = _parserLookupClass modelDocument.xmlRootElement = rootNode modelDocument.schemaLocationElements.add(rootNode) modelDocument.documentEncoding = _encoding if isEntry or isDiscovered: modelDocument.inDTS = True # discovery (parsing) if type == Type.SCHEMA: modelDocument.schemaDiscover(rootNode, isIncluded, namespace) elif type == Type.LINKBASE: modelDocument.linkbaseDiscover(rootNode) elif type == Type.INSTANCE: modelDocument.instanceDiscover(rootNode) elif type == Type.INLINEXBRL: modelDocument.inlineXbrlDiscover(rootNode) elif type == Type.VERSIONINGREPORT: modelDocument.versioningReportDiscover(rootNode) elif type == Type.TESTCASESINDEX: modelDocument.testcasesIndexDiscover(xmlDocument) elif type == Type.TESTCASE: modelDocument.testcaseDiscover(rootNode) elif type == Type.REGISTRY: modelDocument.registryDiscover(rootNode) elif type == Type.VERSIONINGREPORT: modelDocument.versioningReportDiscover(rootNode) elif type == Type.RSSFEED: modelDocument.rssFeedDiscover(rootNode) return modelDocument
def load(modelXbrl, uri, base=None, isEntry=False, isIncluded=None, namespace=None, reloadCache=False): normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl( uri, base) if isEntry: modelXbrl.uri = normalizedUri modelXbrl.uriDir = os.path.dirname(normalizedUri) for i in range(modelXbrl.modelManager.disclosureSystem. maxSubmissionSubdirectoryEntryNesting): modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir) if modelXbrl.modelManager.validateDisclosureSystem and \ not normalizedUri.startswith(modelXbrl.uriDir) and \ not modelXbrl.modelManager.disclosureSystem.hrefValid(normalizedUri): blocked = modelXbrl.modelManager.disclosureSystem.blockDisallowedReferences modelXbrl.error( "Prohibited file for filings{1}: {0}".format( normalizedUri, _(" blocked") if blocked else ""), "err", "EFM.6.22.02", "GFM.1.1.3", "SBR.NL.2.1.0.06") if blocked: return None if normalizedUri in modelXbrl.modelManager.disclosureSystem.mappedFiles: mappedUri = modelXbrl.modelManager.disclosureSystem.mappedFiles[ normalizedUri] else: # handle mapped paths mappedUri = normalizedUri for mapFrom, mapTo in modelXbrl.modelManager.disclosureSystem.mappedPaths: if normalizedUri.startswith(mapFrom): mappedUri = mapTo + normalizedUri[len(mapFrom):] break if modelXbrl.fileSource.isInArchive(mappedUri): filepath = mappedUri else: filepath = modelXbrl.modelManager.cntlr.webCache.getfilename( mappedUri, reload=reloadCache) if filepath: uri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(filepath) if filepath is None: # error such as HTTPerror is already logged modelXbrl.error("File can not be loaded: {0}".format(mappedUri), "err", "FileNotLoadable") type = Type.Unknown return None modelDocument = modelXbrl.urlDocs.get(mappedUri) if modelDocument: return modelDocument # load XML and determine type of model document modelXbrl.modelManager.showStatus(_("parsing {0}").format(uri)) file = None try: if modelXbrl.modelManager.validateDisclosureSystem: file = ValidateFilingText.checkfile(modelXbrl, filepath) else: file = modelXbrl.fileSource.file(filepath) xmlDocument = xml.dom.minidom.parse(file) file.close() except EnvironmentError as err: modelXbrl.error( "{0}: file error: {1}".format(os.path.basename(uri), err), "err", "IOerror") type = Type.Unknown if file: file.close() return None except ( xml.parsers.expat.ExpatError, xml.dom.DOMException, ValueError ) as err: # ValueError raised on bad format of qnames, xmlns'es, or parameters modelXbrl.error( "{0}: import error: {1}".format(os.path.basename(uri), err), "err", "XMLsyntax") type = Type.Unknown if file: file.close() return None # identify document #modelXbrl.modelManager.addToLog("discovery: {0}".format( # os.path.basename(uri))) modelXbrl.modelManager.showStatus(_("loading {0}").format(uri)) modelDocument = None for rootNode in xmlDocument.childNodes: if rootNode.nodeType == 1: #element ln = rootNode.localName ns = rootNode.namespaceURI # type classification if ns == XbrlConst.xsd and ln == "schema": type = Type.SCHEMA elif ns == XbrlConst.link: if ln == "linkbase": type = Type.LINKBASE elif ln == "xbrl": type = Type.INSTANCE elif ns == XbrlConst.xbrli: if ln == "xbrl": type = Type.INSTANCE elif ns == XbrlConst.xhtml and \ ln == "html" or ln == "xhtml": type = Type.Unknown for i in range(len(rootNode.attributes)): if rootNode.attributes.item(i).value == XbrlConst.ixbrl: type = Type.INLINEXBRL break XmlUtil.markIdAttributes( rootNode) # required for minidom searchability elif ln == "report" and ns == XbrlConst.ver: type = Type.VERSIONINGREPORT elif ln == "testcases" or ln == "documentation": type = Type.TESTCASESINDEX elif ln == "testcase": type = Type.TESTCASE elif ln == "registry" and ns == XbrlConst.registry: type = Type.REGISTRY elif ln == "rss": type = Type.RSSFEED else: type = Type.Unknown nestedInline = XmlUtil.descendant(rootNode, XbrlConst.xhtml, ("html", "xhtml")) if nestedInline: for i in range(len(nestedInline.attributes)): if nestedInline.attributes.item( i).value == XbrlConst.ixbrl: type = Type.INLINEXBRL rootNode = nestedInline break XmlUtil.markIdAttributes( rootNode) # required for minidom searchability #create modelDocument object or subtype as identified if type == Type.VERSIONINGREPORT: from arelle.ModelVersReport import ModelVersReport modelDocument = ModelVersReport(modelXbrl, type, mappedUri, filepath, xmlDocument) elif type == Type.RSSFEED: from arelle.ModelRssObject import ModelRssObject modelDocument = ModelRssObject(modelXbrl, type, mappedUri, filepath, xmlDocument) else: modelDocument = ModelDocument(modelXbrl, type, mappedUri, filepath, xmlDocument) modelDocument.xmlRootElement = rootNode modelDocument.schemaLocationElements.add(rootNode) if isEntry: modelDocument.inDTS = True # discovery (parsing) if type == Type.SCHEMA: modelDocument.schemaDiscover(rootNode, isIncluded, namespace) elif type == Type.LINKBASE: modelDocument.linkbaseDiscover(rootNode) elif type == Type.INSTANCE: modelDocument.instanceDiscover(rootNode) elif type == Type.INLINEXBRL: modelDocument.inlineXbrlDiscover(rootNode) elif type == Type.VERSIONINGREPORT: modelDocument.versioningReportDiscover(rootNode) elif type == Type.TESTCASESINDEX: modelDocument.testcasesIndexDiscover(xmlDocument) elif type == Type.TESTCASE: modelDocument.testcaseDiscover(rootNode) elif type == Type.REGISTRY: modelDocument.registryDiscover(rootNode) elif type == Type.VERSIONINGREPORT: modelDocument.versioningReportDiscover(rootNode) elif type == Type.RSSFEED: modelDocument.rssFeedDiscover(rootNode) break return modelDocument