def normalizeUrl(self, url, base=None): if url and not (isHttpUrl(url) or os.path.isabs(url)): if base is not None and not isHttpUrl(base) and u'%' in url: url = unquote(url) if base: if isHttpUrl(base): scheme, sep, path = base.partition(u"://") normedPath = scheme + sep + posixpath.normpath(os.path.dirname(path) + u"/" + url) else: if u'%' in base: base = unquote(base) normedPath = os.path.normpath(os.path.join(os.path.dirname(base),url)) else: # includes base == '' (for forcing relative path) normedPath = url if normedPath.startswith(u"file://"): normedPath = normedPath[7:] elif normedPath.startswith(u"file:\\"): normedPath = normedPath[6:] # no base, not normalized, must be relative to current working directory if base is None and not os.path.isabs(url): normedPath = os.path.abspath(normedPath) else: normedPath = url if normedPath: if isHttpUrl(normedPath): scheme, sep, pathpart = normedPath.partition(u"://") pathpart = pathpart.replace(u'\\',u'/') endingSep = u'/' if pathpart[-1] == u'/' else u'' # normpath drops ending directory separator return scheme + u"://" + posixpath.normpath(pathpart) + endingSep normedPath = os.path.normpath(normedPath) if normedPath.startswith(self.cacheDir): normedPath = self.cacheFilepathToUrl(normedPath) return normedPath
def ok(self, event=None): selection = self.treeView.selection() if len(selection) > 0: if self.openType in (ARCHIVE, DISCLOSURE_SYSTEM): filename = self.filenames[int(selection[0][4:])] if isinstance(filename,tuple): if self.isRss: filename = filename[4] else: filename = filename[0] if not filename.endswith("/"): self.filesource.select(filename) self.accepted = True self.close() elif self.openType == ENTRY_POINTS: epName = selection[0] #index 0 is the remapped Url, as opposed to the canonical one used for display urlOrFile = self.nameToUrls[epName][0] # load file source remappings self.filesource.mappedPaths = \ dict((prefix, remapping if isHttpUrl(remapping) else (self.filesource.baseurl + os.sep + self.metadataFilePrefix +remapping.replace("/", os.sep))) for prefix, remapping in self.remappings.items()) if not urlOrFile.endswith("/"): # check if it's an absolute URL rather than a path into the archive if isHttpUrl(urlOrFile): self.filesource.select(urlOrFile) # absolute path selection else: # assume it's a path inside the archive: self.filesource.select(self.metadataFilePrefix + urlOrFile) self.accepted = True self.close()
def ok(self, event=None): selection = self.treeView.selection() if len(selection) > 0: if hasattr(self, "taxonomyPackage"): # load file source remappings self.filesource.mappedPaths = self.taxonomyPackage["remappings"] filename = None if self.openType in (ARCHIVE, DISCLOSURE_SYSTEM): filename = self.filenames[int(selection[0][4:])] if isinstance(filename,tuple): if self.isRss: filename = filename[4] else: filename = filename[0] elif self.openType == ENTRY_POINTS: epName = selection[0] #index 0 is the remapped Url, as opposed to the canonical one used for display # Greg Acsone reports [0] does not work for Corep 1.6 pkgs, need [1], old style packages filenames = [] for _url, _type in self.packageContainedInstances: # check if selection was an inline instance if _type == epName: filenames.append(_url) if not filenames: # else if it's a named taxonomy entry point for url in self.taxonomyPackage["entryPoints"][epName]: filename = url[0] if not filename.endswith("/"): # check if it's an absolute URL rather than a path into the archive if not isHttpUrl(filename) and self.metadataFilePrefix != self.taxonomyPkgMetaInf: # assume it's a path inside the archive: filename = self.metadataFilePrefix + filename filenames.append(filename) if filenames: self.filesource.select(filenames) self.accepted = True self.close() return elif self.openType in (PLUGIN, PACKAGE): filename = self.filenames[int(selection[0][4:])][2] if filename is not None and not filename.endswith("/"): if hasattr(self, "taxonomyPackage"): # attempt to unmap the filename to original file # will be mapped again in loading, but this allows schemaLocation to be unmapped for prefix, remapping in self.taxonomyPackage["remappings"].items(): if isHttpUrl(remapping): remapStart = remapping else: remapStart = self.metadataFilePrefix + remapping if filename.startswith(remapStart): # set unmmapped file filename = prefix + filename[len(remapStart):] break if self.openType in (PLUGIN, PACKAGE): self.filesource.selection = filename else: self.filesource.select(filename) self.accepted = True self.close()
def ok(self, event=None): selection = self.treeView.selection() if len(selection) > 0: if hasattr(self, "taxonomyPackage"): # load file source remappings self.filesource.mappedPaths = self.taxonomyPackage[ "remappings"] filename = None if self.openType in (ARCHIVE, DISCLOSURE_SYSTEM): filename = self.filenames[int(selection[0][4:])] if isinstance(filename, tuple): if self.isRss: filename = filename[4] else: filename = filename[0] elif self.openType == ENTRY_POINTS: epName = selection[0] #index 0 is the remapped Url, as opposed to the canonical one used for display # Greg Acsone reports [0] does not work for Corep 1.6 pkgs, need [1], old style packages filenames = [] for url in self.taxonomyPackage["entryPoints"][epName]: filename = url[0] if not filename.endswith("/"): # check if it's an absolute URL rather than a path into the archive if not isHttpUrl( filename ) and self.metadataFilePrefix != self.taxonomyPkgMetaInf: # assume it's a path inside the archive: filename = self.metadataFilePrefix + filename filenames.append(filename) if filenames: self.filesource.select(filenames) self.accepted = True self.close() return elif self.openType == PLUGIN: filename = self.filenames[int(selection[0][4:])][2] if filename is not None and not filename.endswith("/"): if hasattr(self, "taxonomyPackage"): # attempt to unmap the filename to original file # will be mapped again in loading, but this allows schemaLocation to be unmapped for prefix, remapping in self.taxonomyPackage[ "remappings"].items(): if isHttpUrl(remapping): remapStart = remapping else: remapStart = self.metadataFilePrefix + remapping if filename.startswith(remapStart): # set unmmapped file filename = prefix + filename[len(remapStart):] break if self.openType == PLUGIN: self.filesource.selection = filename else: self.filesource.select(filename) self.accepted = True self.close()
def openFileStream(cntlr, filepath, mode='r', encoding=None): if PackageManager.isMappedUrl(filepath): filepath = PackageManager.mappedUrl(filepath) elif isHttpUrl(filepath) and cntlr and hasattr( cntlr, "modelManager" ): # may be called early in initialization for PluginManager filepath = cntlr.modelManager.disclosureSystem.mappedUrl(filepath) if archiveFilenameParts(filepath): # file is in an archive return openFileSource(filepath, cntlr).file(filepath, binary='b' in mode, encoding=encoding)[0] if isHttpUrl(filepath) and cntlr: _cacheFilepath = cntlr.webCache.getfilename( filepath, normalize=True ) # normalize is separate step in ModelDocument retrieval, combined here if _cacheFilepath is None: raise IOError(_("Unable to open file: {0}.").format(filepath)) filepath = _cacheFilepath # file path may be server (or memcache) or local file system if filepath.startswith(SERVER_WEB_CACHE) and cntlr: filestream = None cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\", "/") if cntlr.isGAE: # check if in memcache cachedBytes = gaeGet(cacheKey) if cachedBytes: filestream = io.BytesIO(cachedBytes) if filestream is None: filestream = io.BytesIO() cntlr.webCache.retrieve( cntlr.webCache.cacheFilepathToUrl(filepath), filestream=filestream) if cntlr.isGAE: gaeSet(cacheKey, filestream.getvalue()) if mode.endswith('t') or encoding: contents = filestream.getvalue() filestream.close() filestream = FileNamedStringIO( filepath, contents.decode(encoding or 'utf-8')) return filestream # local file system elif encoding is None and 'b' not in mode: openedFileStream = io.open(filepath, mode='rb') hdrBytes = openedFileStream.read(512) encoding = XmlUtil.encoding(hdrBytes, default=None) openedFileStream.close() return io.open(filepath, mode=mode, encoding=encoding) else: # local file system return io.open(filepath, mode=mode, encoding=encoding)
def basedUrl(self, selection): if isHttpUrl(selection) or os.path.isabs(selection): return selection elif self.baseIsHttp or os.sep == '/': return self.baseurl + "/" + selection else: # MSFT os.sep == '\\' return self.baseurl + os.sep + selection.replace("/", os.sep)
def __init__(self, url, cntlr=None, checkIfXmlIsEis=False): self.url = str(url) # allow either string or FileNamedStringIO self.baseIsHttp = isHttpUrl(self.url) self.cntlr = cntlr self.type = self.url.lower()[-4:] self.isZip = self.type == ".zip" self.isEis = self.type == ".eis" self.isXfd = (self.type == ".xfd" or self.type == ".frm") self.isRss = (self.type == ".rss" or self.url.endswith(".rss.xml")) self.isOpen = False self.fs = None self.selection = None self.filesDir = None self.referencedFileSources = {} # archive file name, fileSource object self.mappedPaths = {} # remappings of path segments may be loaded by taxonomyPackage manifest # for SEC xml files, check if it's an EIS anyway if (not (self.isZip or self.isEis or self.isXfd or self.isRss) and self.type == ".xml" and checkIfXmlIsEis): try: file = open(self.cntlr.webCache.getfilename(self.url), 'r', errors='replace') l = file.read(128) file.close() if re.match(r"\s*(<[?]xml[^?]+[?]>)?\s*<cor[a-z]*:edgarSubmission", l): self.isEis = True except EnvironmentError as err: if self.cntlr: self.cntlr.addToLog(_("[{0}] {1}").format(type(err).__name__, err)) pass
def openFileStream(cntlr, filepath, mode='r', encoding=None): if isHttpUrl(filepath) and cntlr: filepath = cntlr.webCache.getfilename(filepath) # file path may be server (or memcache) or local file system if filepath.startswith(SERVER_WEB_CACHE) and cntlr: filestream = None cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\", "/") if cntlr.isGAE: # check if in memcache cachedBytes = gaeGet(cacheKey) if cachedBytes: filestream = io.BytesIO(cachedBytes) if filestream is None: filestream = io.BytesIO() cntlr.webCache.retrieve( cntlr.webCache.cacheFilepathToUrl(filepath), filestream=filestream) if cntlr.isGAE: gaeSet(cacheKey, filestream.getvalue()) if mode.endswith('t') or encoding: contents = filestream.getvalue() filestream.close() filestream = FileNamedStringIO( filepath, contents.decode(encoding or 'utf-8')) return filestream # local file system elif encoding is None and 'b' not in mode: openedFileStream = io.open(filepath, mode='rb') hdrBytes = openedFileStream.read(512) encoding = XmlUtil.encoding(hdrBytes, default=None) openedFileStream.close() return io.open(filepath, mode=mode, encoding=encoding) else: # local file system return io.open(filepath, mode=mode, encoding=encoding)
def package(dts): if dts.fileSource.isArchive: return import os from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED from arelle.UrlUtil import isHttpUrl try: import zlib compression = ZIP_DEFLATED except ImportError: compression = ZIP_STORED dts.info("info:packageDTS", _("Python's zlib module is not available, output is not compressed."), modelObject=dts) entryFilename = dts.fileSource.url pkgFilename = entryFilename + ".zip" with ZipFile(pkgFilename, 'w', compression) as zipFile: numFiles = 0 for fileUri in sorted(dts.urlDocs.keys()): if not isHttpUrl(fileUri): numFiles += 1 # this has to be a relative path because the hrefs will break zipFile.write(fileUri, os.path.basename(fileUri)) dts.info("info:packageDTS", _("DTS of %(entryFile)s has %(numberOfFiles)s files packaged into %(packageOutputFile)s."), modelObject=dts, entryFile=entryFilename, numberOfFiles=numFiles, packageOutputFile=pkgFilename)
def openFileStream(cntlr, filepath, mode='r', encoding=None): if isHttpUrl(filepath) and cntlr: _cacheFilepath = cntlr.webCache.getfilename(filepath) if _cacheFilepath is None: raise IOError(_("Unable to open file: {0}.").format(filepath)) filepath = _cacheFilepath # file path may be server (or memcache) or local file system if filepath.startswith(SERVER_WEB_CACHE) and cntlr: filestream = None cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\","/") if cntlr.isGAE: # check if in memcache cachedBytes = gaeGet(cacheKey) if cachedBytes: filestream = io.BytesIO(cachedBytes) if filestream is None: filestream = io.BytesIO() cntlr.webCache.retrieve(cntlr.webCache.cacheFilepathToUrl(filepath), filestream=filestream) if cntlr.isGAE: gaeSet(cacheKey, filestream.getvalue()) if mode.endswith('t') or encoding: contents = filestream.getvalue() filestream.close() filestream = FileNamedStringIO(filepath, contents.decode(encoding or 'utf-8')) return filestream # local file system elif encoding is None and 'b' not in mode: openedFileStream = io.open(filepath, mode='rb') hdrBytes = openedFileStream.read(512) encoding = XmlUtil.encoding(hdrBytes, default=None) openedFileStream.close() return io.open(filepath, mode=mode, encoding=encoding) else: # local file system return io.open(filepath, mode=mode, encoding=encoding)
def addLocallyReferencedFile(elt): if elt.tag in ("a", "img", "{http://www.w3.org/1999/xhtml}a", "{http://www.w3.org/1999/xhtml}img"): for attrTag, attrValue in elt.items(): if attrTag in ("href", "src") and not isHttpUrl(attrValue) and not os.path.isabs(attrValue): file = os.path.join(sourceDir,attrValue) if os.path.exists(file): self.reportedFiles.add(os.path.join(sourceDir,attrValue))
def openFileStream(cntlr, filepath, mode='r', encoding=None): if isHttpUrl(filepath) and cntlr: filepath = cntlr.webCache.getfilename(filepath) # file path may be server (or memcache) or local file system if filepath.startswith(SERVER_WEB_CACHE) and cntlr and not cntlr.localOnly: filestream = None cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\","/") if cntlr.isGAE: # check if in memcache cachedBytes = gaeGet(cacheKey) if cachedBytes: filestream = io.BytesIO(cachedBytes) if filestream is None: filestream = io.BytesIO() cntlr.webCache.retrieve(cntlr.webCache.cacheFilepathToUrl(filepath), filestream=filestream) if cntlr.isGAE: gaeSet(cacheKey, filestream.getvalue()) if mode.endswith('t') or encoding: contents = filestream.getvalue() filestream.close() filestream = FileNamedStringIO(filepath, contents.decode(encoding or 'utf-8')) return filestream # local file system elif encoding is None and 'b' not in mode: openedFileStream = io.open(filepath, mode='rb') hdrBytes = openedFileStream.read(512) encoding = XmlUtil.encoding(hdrBytes, default=None) openedFileStream.close() return io.open(filepath, mode=mode, encoding=encoding) else: # local file system if cntlr and cntlr.isGAE and cntlr.localOnly and filepath[0] == '/': # can't do an absolute path here, so presume it is a relative to the path of the app source filepath = '.%s' % filepath return io.open(filepath, mode=mode, encoding=encoding)
def createFacts(facts, parent): for fact in facts: if fact.isItem: attrs = {"contextRef": fact.contextID} if fact.id: attrs["id"] = fact.id if fact.isNumeric: attrs["unitRef"] = fact.unitID if fact.get("decimals"): attrs["decimals"] = fact.get("decimals") if fact.get("precision"): attrs["precision"] = fact.get("precision") if fact.isNil: attrs[XbrlConst.qnXsiNil] = "true" text = None else: text = fact.xValue if fact.xValid else fact.textValue newFact = targetInstance.createFact(fact.qname, attributes=attrs, text=text, parent=parent) newFactForOldObjId[fact.objectIndex] = newFact if filingFiles and fact.concept is not None and fact.concept.isTextBlock: # check for img and other filing references for xmltext in [text] + CDATApattern.findall(text): try: for elt in XML("<body>\n{0}\n</body>\n".format(xmltext)): if elt.tag in ("a", "img") and not isHttpUrl(attrValue) and not os.path.isabs(attrvalue): for attrTag, attrValue in elt.items(): if attrTag in ("href", "src"): filingFiles.add(attrValue) except (XMLSyntaxError, UnicodeDecodeError): pass elif fact.isTuple: newTuple = targetInstance.createFact(fact.qname, parent=parent) newFactForOldObjId[fact.objectIndex] = newTuple createFacts(fact.modelTupleFacts, newTuple)
def openFileStream(cntlr, filepath, mode='r', encoding=None): if isHttpUrl(filepath) and cntlr: filepath = cntlr.webCache.getfilename(filepath) # file path may be server (or memcache) or local file system if filepath.startswith(SERVER_WEB_CACHE) and cntlr: filestream = None cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\","/") if cntlr.isGAE: # check if in memcache cachedBytes = gaeGet(cacheKey) if cachedBytes: filestream = io.BytesIO(cachedBytes) if filestream is None: filestream = io.BytesIO() cntlr.webCache.retrieve(cntlr.webCache.cacheFilepathToUrl(filepath), filestream=filestream) if cntlr.isGAE: gaeSet(cacheKey, filestream.getvalue()) if mode.endswith('t') or encoding: contents = filestream.getvalue() filestream.close() filestream = io.StringIO(contents.decode(encoding or 'utf-8')) return filestream else: # local file system return io.open(filepath, mode=mode, encoding=encoding)
def __init__(self, url, cntlr=None, checkIfXmlIsEis=False): self.url = str(url) # allow either string or FileNamedStringIO self.baseIsHttp = isHttpUrl(self.url) self.cntlr = cntlr self.type = self.url.lower()[-7:] self.isTarGz = self.type == ".tar.gz" if not self.isTarGz: self.type = self.type[3:] self.isZip = self.type == ".zip" self.isEis = self.type == ".eis" self.isXfd = self.type == ".xfd" or self.type == ".frm" self.isRss = self.type == ".rss" or self.url.endswith(".rss.xml") self.isInstalledTaxonomyPackage = False self.isOpen = False self.fs = None self.selection = None self.filesDir = None self.referencedFileSources = {} # archive file name, fileSource object self.mappedPaths = None # remappings of path segments may be loaded by taxonomyPackage manifest # for SEC xml files, check if it's an EIS anyway if not (self.isZip or self.isEis or self.isXfd or self.isRss) and self.type == ".xml": if os.path.split(self.url)[-1] in TAXONOMY_PACKAGE_FILE_NAMES: self.isInstalledTaxonomyPackage = True elif checkIfXmlIsEis: try: file = open(self.cntlr.webCache.getfilename(self.url), "r", errors="replace") l = file.read(128) file.close() if re.match(r"\s*(<[?]xml[^?]+[?]>)?\s*<(cor[a-z]*:)?edgarSubmission", l): self.isEis = True except EnvironmentError as err: if self.cntlr: self.cntlr.addToLog(_("[{0}] {1}").format(type(err).__name__, err)) pass
def package(dts): if dts.fileSource.isArchive: return import os from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED from arelle.UrlUtil import isHttpUrl try: import zlib compression = ZIP_DEFLATED except ImportError: compression = ZIP_STORED dts.info( "info:packageDTS", _("Python's zlib module is not available, output is not compressed." ), modelObject=dts) entryFilename = dts.fileSource.url pkgFilename = entryFilename + ".zip" with ZipFile(pkgFilename, 'w', compression) as zipFile: numFiles = 0 for fileUri in sorted(dts.urlDocs.keys()): if not isHttpUrl(fileUri): numFiles += 1 # this has to be a relative path because the hrefs will break zipFile.write(fileUri, os.path.basename(fileUri)) dts.info( "info:packageDTS", _("DTS of %(entryFile)s has %(numberOfFiles)s files packaged into %(packageOutputFile)s." ), modelObject=dts, entryFile=entryFilename, numberOfFiles=numFiles, packageOutputFile=pkgFilename)
def ok(self, event=None): selection = self.treeView.selection() if len(selection) > 0: if hasattr(self, "taxonomyPackage"): # load file source remappings self.filesource.mappedPaths = self.taxonomyPackage["remappings"] filename = None if self.openType in (ARCHIVE, DISCLOSURE_SYSTEM): filename = self.filenames[int(selection[0][4:])] if isinstance(filename,tuple): if self.isRss: filename = filename[4] else: filename = filename[0] elif self.openType == ENTRY_POINTS: epName = selection[0] self.filesource.reportName = epName #index 0 is the remapped Url, as opposed to the canonical one used for display # Greg Acsone reports [0] does not work for Corep 1.6 pkgs, need [1], old style packages filename = self.taxonomyPackage["nameToUrls"][epName][1] if not filename.endswith("/"): # check if it's an absolute URL rather than a path into the archive if not isHttpUrl(filename) and self.metadataFilePrefix != self.taxonomyPkgMetaInf: # assume it's a path inside the archive: filename = self.metadataFilePrefix + filename if filename is not None and not filename.endswith("/"): if hasattr(self, "taxonomyPackage"): # attempt to unmap the filename to original file # will be mapped again in loading, but this allows schemaLocation to be unmapped for prefix, remapping in self.taxonomyPackage["remappings"].items(): if isHttpUrl(remapping): remapStart = remapping elif self.filesource.isArchive: remapStart = remapping[len(self.filesource.baseurl):] if remapStart.startswith('\\'): remapStart = remapStart[1:] if remapStart.endswith('\\'): remapStart = remapStart[:-1]+'/' else: remapStart = self.metadataFilePrefix + remapping if filename.startswith(remapStart): # set unmmapped file filename = prefix + filename[len(remapStart):] break self.filesource.select(filename) self.accepted = True self.close()
def select(self, selection): self.selection = selection if isHttpUrl(selection) or os.path.isabs(selection): self.url = selection elif self.baseIsHttp or os.sep == '/': self.url = self.baseurl + "/" + selection else: # MSFT os.sep == '\\' self.url = self.baseurl + os.sep + selection.replace("/", os.sep)
def getAttachmentFilename(self, url): # get the filename attachment from the header if url and isHttpUrl(url): try: fp = self.opener.open(url, timeout=self.timeout) return cgi.parse_header(fp.headers.get("Content-Disposition"))[1]["filename"] except Exception: pass return None
def packageInfo(URL, reload=False): # TODO several directories, eg User Application Data packageFilename = _cntlr.webCache.getfilename(URL, reload=reload, normalize=True) if packageFilename: filesource = None try: global openFileSource if openFileSource is None: from arelle.FileSource import openFileSource filesource = openFileSource(packageFilename, _cntlr) if filesource.isZip: metadataFiles = filesource.taxonomyPackageMetadataFiles if len(metadataFiles) != 1: raise IOError( _("Taxonomy package contained more than one metadata file: {0}.").format( ", ".join(metadataFiles) ) ) metadataFile = metadataFiles[0] metadata = filesource.file(filesource.url + os.sep + metadataFile)[0] metadataFilePrefix = os.sep.join(os.path.split(metadataFile)[:-1]) if metadataFilePrefix: metadataFilePrefix += os.sep metadataFilePrefix = filesource.baseurl + os.sep + metadataFilePrefix elif os.path.basename(filesource.url) == ".taxonomyPackage.xml": # individual manifest file metadataFile = metadata = filesource.url metadataFilePrefix = os.sep.join(os.path.split(metadataFile)[:-1]) if metadataFilePrefix: metadataFilePrefix += os.sep else: raise IOError( _("File must be a taxonomy package (zip file) or manifest (.taxonomyPackage.xml): {0}.").format( metadataFile ) ) parsedPackage = parsePackage(_cntlr, metadata) package = { "name": parsedPackage["name"], "status": "enabled", "version": parsedPackage["version"], "fileDate": time.strftime("%Y-%m-%dT%H:%M:%S UTC", time.gmtime(os.path.getmtime(packageFilename))), "URL": URL, "description": parsedPackage["description"], "remappings": dict( ( prefix, remapping if isHttpUrl(remapping) else (metadataFilePrefix + remapping.replace("/", os.sep)), ) for prefix, remapping in parsedPackage["remappings"].items() ), } filesource.close() return package except EnvironmentError: pass if filesource: filesource.close() return None
def ok(self, event=None): selection = self.treeView.selection() if len(selection) > 0: if hasattr(self, "taxonomyPackage"): # load file source remappings self.filesource.mappedPaths = \ dict((prefix, remapping if isHttpUrl(remapping) else (self.filesource.baseurl + os.sep + self.metadataFilePrefix +remapping.replace("/", os.sep))) for prefix, remapping in self.taxonomyPackage["remappings"].items()) filename = None if self.openType in (ARCHIVE, DISCLOSURE_SYSTEM): filename = self.filenames[int(selection[0][4:])] if isinstance(filename, tuple): if self.isRss: filename = filename[4] else: filename = filename[0] elif self.openType == ENTRY_POINTS: epName = selection[0] #index 0 is the remapped Url, as opposed to the canonical one used for display filename = self.taxonomyPackage["nameToUrls"][epName][0] if not filename.endswith("/"): # check if it's an absolute URL rather than a path into the archive if not isHttpUrl(filename): # assume it's a path inside the archive: filename = self.metadataFilePrefix + filename if filename is not None and not filename.endswith("/"): if hasattr(self, "taxonomyPackage"): # attempt to unmap the filename to original file # will be mapped again in loading, but this allows schemaLocation to be unmapped for prefix, remapping in self.taxonomyPackage[ "remappings"].items(): if isHttpUrl(remapping): remapStart = remapping else: remapStart = self.metadataFilePrefix + remapping if filename.startswith(remapStart): # set unmmapped file filename = prefix + filename[len(remapStart):] break self.filesource.select(filename) self.accepted = True self.close()
def archiveFilenameParts(filename, checkIfXmlIsEis=False): # check if path has an archive file plus appended in-archive content reference for archiveSep in archivePathSeparators: if filename and archiveSep in filename and (not archiveSep.startswith(".xml") or checkIfXmlIsEis): filenameParts = filename.partition(archiveSep) fileDir = filenameParts[0] + archiveSep[:-1] if isHttpUrl(fileDir) or os.path.isfile(fileDir): # if local, be sure it is not a directory name return (fileDir, filenameParts[2]) return None
def disallowedHrefOfNamespace(self, href, namespaceUri): if namespaceUri in self.standardTaxonomiesDict: if href in self.standardTaxonomiesDict[namespaceUri]: return False if namespaceUri in self.standardLocalHrefs and not isHttpUrl(href): normalizedHref = href.replace("\\", "/") if any(normalizedHref.endswith(localHref) for localHref in self.standardLocalHrefs[namespaceUri]): return False return False
def ok(self, event=None): selection = self.treeView.selection() if len(selection) > 0: if hasattr(self, "taxonomyPackage"): # load file source remappings self.filesource.mappedPaths = \ dict((prefix, remapping if isHttpUrl(remapping) else (self.filesource.baseurl + os.sep + self.metadataFilePrefix +remapping.replace("/", os.sep))) for prefix, remapping in self.taxonomyPackage["remappings"].items()) filename = None if self.openType in (ARCHIVE, DISCLOSURE_SYSTEM): filename = self.filenames[int(selection[0][4:])] if isinstance(filename,tuple): if self.isRss: filename = filename[4] else: filename = filename[0] elif self.openType == ENTRY_POINTS: epName = selection[0] #index 0 is the remapped Url, as opposed to the canonical one used for display filename = self.taxonomyPackage["nameToUrls"][epName][0] if not filename.endswith("/"): # check if it's an absolute URL rather than a path into the archive if not isHttpUrl(filename): # assume it's a path inside the archive: filename = self.metadataFilePrefix + filename if filename is not None and not filename.endswith("/"): if hasattr(self, "taxonomyPackage"): # attempt to unmap the filename to original file # will be mapped again in loading, but this allows schemaLocation to be unmapped for prefix, remapping in self.taxonomyPackage["remappings"].items(): if isHttpUrl(remapping): remapStart = remapping else: remapStart = self.metadataFilePrefix + remapping if filename.startswith(remapStart): # set unmmapped file filename = prefix + filename[len(remapStart):] break self.filesource.select(filename) self.accepted = True self.close()
def getheaders(self, url): if url and isHttpUrl(url): try: fp = self.opener.open(url, timeout=self.timeout) headers = fp.info() fp.close() return headers except Exception: pass return {}
def addLocallyReferencedFile(elt): if elt.tag in ("a", "img", "{http://www.w3.org/1999/xhtml}a", "{http://www.w3.org/1999/xhtml}img"): for attrTag, attrValue in elt.items(): if attrTag in ("href", "src") and not isHttpUrl(attrValue) and not os.path.isabs(attrValue): attrValue = attrValue.partition('#')[0] # remove anchor if attrValue: # ignore anchor references to base document attrValue = os.path.normpath(attrValue) # change url path separators to host separators file = os.path.join(sourceDir,attrValue) if modelXbrl.fileSource.isInArchive(file, checkExistence=True) or os.path.exists(file): self.reportedFiles.add(attrValue) # add file name within source directory
def geturl(self, url): # get the url that the argument url redirects or resolves to if url and isHttpUrl(url): try: fp = self.opener.open(url, timeout=self.timeout) actualurl = fp.geturl() fp.close() return actualurl except Exception: pass return None
def addLocallyReferencedFile(elt,filingFiles): if elt.tag in ("a", "img"): for attrTag, attrValue in elt.items(): if attrTag in ("href", "src") and not isHttpUrl(attrValue) and not os.path.isabs(attrValue): attrValue = attrValue.partition('#')[0] # remove anchor if attrValue: # ignore anchor references to base document attrValue = os.path.normpath(attrValue) # change url path separators to host separators file = os.path.join(sourceDir,attrValue) if modelXbrl.fileSource.isInArchive(file, checkExistence=True) or os.path.exists(file): filingFiles.add(file)
def disallowedHrefOfNamespace(self, href, namespaceUri): if namespaceUri in self.standardTaxonomiesDict: if href in self.standardTaxonomiesDict[namespaceUri]: return False if namespaceUri in self.standardLocalHrefs and not isHttpUrl(href): normalizedHref = href.replace("\\","/") if any(normalizedHref.endswith(localHref) for localHref in self.standardLocalHrefs[namespaceUri]): return False return False
def addLocallyReferencedFile(elt): if elt.tag in ("a", "img", "{http://www.w3.org/1999/xhtml}a", "{http://www.w3.org/1999/xhtml}img"): for attrTag, attrValue in elt.items(): if attrTag in ("href", "src") and not isHttpUrl( attrValue) and not os.path.isabs(attrValue): file = os.path.join(sourceDir, attrValue) if os.path.exists(file): self.reportedFiles.add( os.path.join(sourceDir, attrValue))
def getAttachmentFilename( self, url): # get the filename attachment from the header if url and isHttpUrl(url): try: fp = self.opener.open(url, timeout=self.timeout) return cgi.parse_header( fp.headers.get("Content-Disposition"))[1]["filename"] except Exception: pass return None
def inlineXbrlDocumentSetLoader(modelXbrl, normalizedUri, filepath, isEntry=False, namespace=None, **kwargs): if isEntry: try: if "entrypoint" in kwargs: _target = kwargs["entrypoint"]["ixdsTarget"] elif "ixdsTarget" in kwargs: # passed from validate (multio test cases) _target = kwargs["ixdsTarget"] else: _target = modelXbrl.modelManager.formulaOptions.parameterValues.get("ixdsTarget")[1] except (KeyError, AttributeError, IndexError, TypeError): _target = None modelXbrl.ixdsTarget = _target or None # None if an empty string specified if IXDS_SURROGATE in normalizedUri: # create surrogate entry object for inline document set which references ix documents xml = ["<instances>\n"] modelXbrl.ixdsDocUrls = [] schemeFixup = isHttpUrl(normalizedUri) # schemes after separator have // normalized to single / msUNCfixup = modelXbrl.modelManager.cntlr.isMSW and normalizedUri.startswith("\\\\") # path starts with double backslash \\ if schemeFixup: defectiveScheme = normalizedUri.partition("://")[0] + ":/" fixupPosition = len(defectiveScheme) for i, url in enumerate(normalizedUri.split(IXDS_DOC_SEPARATOR)): if schemeFixup and url.startswith(defectiveScheme) and url[len(defectiveScheme)] != "/": url = url[:fixupPosition] + "/" + url[fixupPosition:] if i == 0: docsetUrl = url else: if msUNCfixup and not url.startswith("\\\\") and url.startswith("\\"): url = "\\" + url xml.append("<instance>{}</instance>\n".format(url)) modelXbrl.ixdsDocUrls.append(url) xml.append("</instances>\n") ixdocset = create(modelXbrl, Type.INLINEXBRLDOCUMENTSET, docsetUrl, isEntry=True, initialXml="".join(xml)) ixdocset.type = Type.INLINEXBRLDOCUMENTSET ixdocset.targetDocumentSchemaRefs = set() # union all the instance schemaRefs _firstdoc = True for elt in ixdocset.xmlRootElement.iter(tag="instance"): # load ix document ixdoc = load(modelXbrl, elt.text, referringElement=elt) if ixdoc is not None and ixdoc.type == Type.INLINEXBRL: # set reference to ix document in document set surrogate object referencedDocument = ModelDocumentReference("inlineDocument", elt) ixdocset.referencesDocument[ixdoc] = referencedDocument for referencedDoc in ixdoc.referencesDocument.keys(): if referencedDoc.type == Type.SCHEMA: ixdocset.targetDocumentSchemaRefs.add(ixdoc.relativeUri(referencedDoc.uri)) ixdocset.ixNS = ixdoc.ixNS # set docset ixNS if _firstdoc: _firstdoc = False ixdocset.targetDocumentPreferredFilename = os.path.splitext(ixdoc.uri)[0] + ".xbrl" ixdoc.inDTS = True # behaves like an entry if hasattr(modelXbrl, "ixdsHtmlElements"): # has any inline root elements inlineIxdsDiscover(modelXbrl, ixdocset) # compile cross-document IXDS references return ixdocset return None
def saveFile(cntlr, filepath, contents, encoding=None): if isHttpUrl(filepath): filepath = cntlr.webCache.getfilename(filepath) # file path may be server (or memcache) or local file system if filepath.startswith(SERVER_WEB_CACHE): cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\","/") if cntlr.isGAE: # check if in memcache gaeSet(cacheKey, contents.encode(encoding or 'utf-8')) else: with io.open(filepath, 'wt', encoding=(encoding or 'utf-8')) as f: f.write(contents)
def archiveFilenameParts(filename, checkIfXmlIsEis=False): # check if path has an archive file plus appended in-archive content reference for archiveSep in archivePathSeparators: if (filename and archiveSep in filename and (not archiveSep.startswith(".xml") or checkIfXmlIsEis)): filenameParts = filename.partition(archiveSep) fileDir = filenameParts[0] + archiveSep[:-1] if (isHttpUrl(fileDir) or os.path.isfile(fileDir) ): # if local, be sure it is not a directory name return (fileDir, filenameParts[2]) return None
def saveFile(cntlr, filepath, contents, encoding=None): if isHttpUrl(filepath): filepath = cntlr.webCache.getfilename(filepath) # file path may be server (or memcache) or local file system if filepath.startswith(SERVER_WEB_CACHE): cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\", "/") if cntlr.isGAE: # check if in memcache gaeSet(cacheKey, contents.encode(encoding or 'utf-8')) else: with io.open(filepath, 'wt', encoding=(encoding or 'utf-8')) as f: f.write(contents)
def openFileStream(cntlr, filepath, mode='r', encoding=None): if PackageManager.isMappedUrl(filepath): filepath = PackageManager.mappedUrl(filepath) elif isHttpUrl(filepath) and cntlr and hasattr(cntlr, "modelManager"): # may be called early in initialization for PluginManager filepath = cntlr.modelManager.disclosureSystem.mappedUrl(filepath) if archiveFilenameParts(filepath): # file is in an archive return openFileSource(filepath, cntlr).file(filepath, binary='b' in mode, encoding=encoding)[0] if isHttpUrl(filepath) and cntlr: _cacheFilepath = cntlr.webCache.getfilename(filepath, normalize=True) # normalize is separate step in ModelDocument retrieval, combined here if _cacheFilepath is None: raise IOError(_("Unable to open file: {0}.").format(filepath)) filepath = _cacheFilepath # file path may be server (or memcache) or local file system if filepath.startswith(SERVER_WEB_CACHE) and cntlr: filestream = None cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\","/") if cntlr.isGAE: # check if in memcache cachedBytes = gaeGet(cacheKey) if cachedBytes: filestream = io.BytesIO(cachedBytes) if filestream is None: filestream = io.BytesIO() cntlr.webCache.retrieve(cntlr.webCache.cacheFilepathToUrl(filepath), filestream=filestream) if cntlr.isGAE: gaeSet(cacheKey, filestream.getvalue()) if mode.endswith('t') or encoding: contents = filestream.getvalue() filestream.close() filestream = FileNamedStringIO(filepath, contents.decode(encoding or 'utf-8')) return filestream # local file system elif encoding is None and 'b' not in mode: openedFileStream = io.open(filepath, mode='rb') hdrBytes = openedFileStream.read(512) encoding = XmlUtil.encoding(hdrBytes, default=None) openedFileStream.close() return io.open(filepath, mode=mode, encoding=encoding) else: # local file system return io.open(filepath, mode=mode, encoding=encoding)
def isStandardUri(val, uri): try: return val._isStandardUri[uri] except KeyError: isStd = (uri in val.disclosureSystem.standardTaxonomiesDict or (not isHttpUrl(uri) and # try 2011-12-23 RH: if works, remove the localHrefs # any(u.endswith(e) for u in (uri.replace("\\","/"),) for e in disclosureSystem.standardLocalHrefs) "/basis/sbr/" in uri.replace("\\","/") )) val._isStandardUri[uri] = isStd return isStd
def isStandardUri(val, uri): try: return val._isStandardUri[uri] except KeyError: isStd = ( uri in val.disclosureSystem.standardTaxonomiesDict or (not isHttpUrl(uri) and # try 2011-12-23 RH: if works, remove the localHrefs # any(u.endswith(e) for u in (uri.replace("\\","/"),) for e in disclosureSystem.standardLocalHrefs) "/basis/sbr/" in uri.replace("\\", "/"))) val._isStandardUri[uri] = isStd return isStd
def normalizeUrl(self, url, base=None): if url: if url.startswith("file://"): url = url[7:] elif url.startswith("file:\\"): url = url[6:] if url and not (isHttpUrl(url) or os.path.isabs(url)): if base is not None and not isHttpUrl(base) and '%' in url: url = unquote(url) if base: if isHttpUrl(base): scheme, sep, path = base.partition("://") normedPath = scheme + sep + posixpath.normpath( os.path.dirname(path) + "/" + url) else: if '%' in base: base = unquote(base) normedPath = os.path.normpath( os.path.join(os.path.dirname(base), url)) else: # includes base == '' (for forcing relative path) normedPath = url if normedPath.startswith("file://"): normedPath = normedPath[7:] elif normedPath.startswith("file:\\"): normedPath = normedPath[6:] # no base, not normalized, must be relative to current working directory if base is None and not os.path.isabs(url): normedPath = os.path.abspath(normedPath) else: normedPath = url if normedPath: if isHttpUrl(normedPath): scheme, sep, pathpart = normedPath.partition("://") pathpart = pathpart.replace('\\', '/') endingSep = '/' if pathpart[ -1] == '/' else '' # normpath drops ending directory separator return scheme + "://" + posixpath.normpath( pathpart) + endingSep normedPath = os.path.normpath(normedPath) if normedPath.startswith(self.cacheDir): normedPath = self.cacheFilepathToUrl(normedPath) return normedPath
def packageInfo(URL, reload=False): #TODO several directories, eg User Application Data packageFilename = _cntlr.webCache.getfilename(URL, reload=reload, normalize=True) if packageFilename: from arelle.FileSource import TAXONOMY_PACKAGE_FILE_NAMES filesource = None try: global openFileSource if openFileSource is None: from arelle.FileSource import openFileSource filesource = openFileSource(packageFilename, _cntlr) if filesource.isZip: metadataFiles = filesource.taxonomyPackageMetadataFiles ''' allow multiple if len(metadataFiles) != 1: raise IOError(_("Taxonomy package contained more than one metadata file: {0}.") .format(', '.join(metadataFiles))) ''' metadataFile = metadataFiles[0] metadata = filesource.file(filesource.url + os.sep + metadataFile)[0] metadataFilePrefix = os.sep.join(os.path.split(metadataFile)[:-1]) if metadataFilePrefix: metadataFilePrefix += os.sep metadataFilePrefix = filesource.baseurl + os.sep + metadataFilePrefix elif os.path.basename(filesource.url) in TAXONOMY_PACKAGE_FILE_NAMES: # individual manifest file metadataFile = metadata = filesource.url metadataFilePrefix = os.sep.join(os.path.split(metadataFile)[:-1]) if metadataFilePrefix: metadataFilePrefix += os.sep else: raise IOError(_("File must be a taxonomy package (zip file), catalog file, or manifest (): {0}.") .format(metadataFile, ', '.join(TAXONOMY_PACKAGE_FILE_NAMES))) parsedPackage = parsePackage(_cntlr, metadata) package = {'name': parsedPackage['name'], 'status': 'enabled', 'version': parsedPackage['version'], 'fileDate': time.strftime('%Y-%m-%dT%H:%M:%S UTC', time.gmtime(os.path.getmtime(packageFilename))), 'URL': URL, 'description': parsedPackage['description'], 'remappings': dict( (prefix, remapping if isHttpUrl(remapping) else (metadataFilePrefix +remapping.replace("/", os.sep))) for prefix, remapping in parsedPackage["remappings"].items()), } filesource.close() return package except EnvironmentError: pass if filesource: filesource.close() return None
def packageInfo(URL, reload=False): #TODO several directories, eg User Application Data packageFilename = _cntlr.webCache.getfilename(URL, reload=reload, normalize=True) if packageFilename: from arelle.FileSource import TAXONOMY_PACKAGE_FILE_NAMES filesource = None try: global openFileSource if openFileSource is None: from arelle.FileSource import openFileSource filesource = openFileSource(packageFilename, _cntlr) if filesource.isZip: metadataFiles = filesource.taxonomyPackageMetadataFiles ''' allow multiple if len(metadataFiles) != 1: raise IOError(_("Taxonomy package contained more than one metadata file: {0}.") .format(', '.join(metadataFiles))) ''' metadataFile = metadataFiles[0] metadata = filesource.file(filesource.url + os.sep + metadataFile)[0] metadataFilePrefix = os.sep.join(os.path.split(metadataFile)[:-1]) if metadataFilePrefix: metadataFilePrefix += os.sep metadataFilePrefix = filesource.baseurl + os.sep + metadataFilePrefix elif os.path.basename(filesource.url) in TAXONOMY_PACKAGE_FILE_NAMES: # individual manifest file metadataFile = metadata = filesource.url metadataFilePrefix = os.sep.join(os.path.split(metadataFile)[:-1]) if metadataFilePrefix: metadataFilePrefix += os.sep else: raise IOError(_("File must be a taxonomy package (zip file), catalog file, or manifest (): {0}.") .format(packageFilename, ', '.join(TAXONOMY_PACKAGE_FILE_NAMES))) parsedPackage = parsePackage(_cntlr, metadata) package = {'name': parsedPackage['name'], 'status': 'enabled', 'version': parsedPackage['version'], 'fileDate': time.strftime('%Y-%m-%dT%H:%M:%S UTC', time.gmtime(os.path.getmtime(packageFilename))), 'URL': URL, 'description': parsedPackage['description'], 'remappings': dict( (prefix, remapping if isHttpUrl(remapping) else (metadataFilePrefix +remapping.replace("/", os.sep))) for prefix, remapping in parsedPackage["remappings"].items()), } filesource.close() return package except EnvironmentError: pass if filesource: filesource.close() return None
def saveFile(cntlr, filepath, contents, encoding=None, mode='wt'): if isHttpUrl(filepath): _cacheFilepath = cntlr.webCache.getfilename(filepath) if _cacheFilepath is None: raise IOError(_("Unable to open file: {0}.").format(filepath)) filepath = _cacheFilepath # file path may be server (or memcache) or local file system if filepath.startswith(SERVER_WEB_CACHE): cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\", "/") if cntlr.isGAE: # check if in memcache gaeSet(cacheKey, contents.encode(encoding or 'utf-8')) else: with io.open(filepath, mode, encoding=(encoding or 'utf-8')) as f: f.write(contents)
def addReferencedFile(docElt, elt): if elt.tag in ("a", "img", "{http://www.w3.org/1999/xhtml}a", "{http://www.w3.org/1999/xhtml}img"): for attrTag, attrValue in elt.items(): if attrTag in ("href", "src") and ( not localFilesOnly or (not isHttpUrl(attrValue) and not os.path.isabs(attrValue))): attrValue = attrValue.partition('#')[0] # remove anchor if attrValue: # ignore anchor references to base document base = docElt.modelDocument.baseForElement(docElt) normalizedUri = docElt.modelXbrl.modelManager.cntlr.webCache.normalizeUrl(attrValue, base) if not docElt.modelXbrl.fileSource.isInArchive(normalizedUri): normalizedUri = docElt.modelXbrl.modelManager.cntlr.webCache.getfilename(normalizedUri) if modelXbrl.fileSource.isInArchive(normalizedUri, checkExistence=True) or os.path.exists(normalizedUri): referencedFiles.add(attrValue) # add file name within source directory
def saveFile(cntlr, filepath, contents, encoding=None, mode='wt'): if isHttpUrl(filepath): _cacheFilepath = cntlr.webCache.getfilename(filepath) if _cacheFilepath is None: raise IOError(_("Unable to open file: {0}.").format(filepath)) filepath = _cacheFilepath # file path may be server (or memcache) or local file system if filepath.startswith(SERVER_WEB_CACHE): cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\","/") if cntlr.isGAE: # check if in memcache gaeSet(cacheKey, contents.encode(encoding or 'utf-8')) else: with io.open(filepath, mode, encoding=(encoding or 'utf-8')) as f: f.write(contents)
def __init__(self, url, cntlr=None, checkIfXmlIsEis=False): global pluginClassMethods if pluginClassMethods is None: # dynamic import from arelle.PluginManager import pluginClassMethods self.url = str(url) # allow either string or FileNamedStringIO self.baseIsHttp = isHttpUrl(self.url) self.cntlr = cntlr self.type = self.url.lower()[-7:] self.isTarGz = self.type == ".tar.gz" if not self.isTarGz: self.type = self.type[3:] self.isZip = self.type == ".zip" self.isZipBackslashed = False # windows style backslashed paths self.isEis = self.type == ".eis" self.isXfd = (self.type == ".xfd" or self.type == ".frm") self.isRss = (self.type == ".rss" or self.url.endswith(".rss.xml")) self.isInstalledTaxonomyPackage = False self.isOpen = False self.fs = None self.selection = None self.filesDir = None self.referencedFileSources = {} # archive file name, fileSource object self.mappedPaths = None # remappings of path segments may be loaded by taxonomyPackage manifest # for SEC xml files, check if it's an EIS anyway if (not (self.isZip or self.isEis or self.isXfd or self.isRss) and self.type == ".xml"): if os.path.split(self.url)[-1] in TAXONOMY_PACKAGE_FILE_NAMES: self.isInstalledTaxonomyPackage = True elif checkIfXmlIsEis: try: file = open(self.cntlr.webCache.getfilename(self.url), 'r', errors='replace') l = file.read( 256) # may have comments before first element file.close() if re.match( r"\s*(<[?]xml[^?]+[?]>)?\s*(<!--.*-->\s*)*<(cor[a-z]*:|sdf:)?edgarSubmission", l): self.isEis = True except EnvironmentError as err: if self.cntlr: self.cntlr.addToLog( _("[{0}] {1}").format(type(err).__name__, err)) pass
def __init__(self, url, cntlr=None, checkIfXmlIsEis=False): global pluginClassMethods if pluginClassMethods is None: # dynamic import from arelle.PluginManager import pluginClassMethods self.url = str(url) # allow either string or FileNamedStringIO self.baseIsHttp = isHttpUrl(self.url) self.cntlr = cntlr self.type = self.url.lower()[-7:] self.isTarGz = self.type == ".tar.gz" if not self.isTarGz: self.type = self.type[3:] self.isZip = self.type == ".zip" self.isZipBackslashed = False # windows style backslashed paths self.isEis = self.type == ".eis" self.isXfd = (self.type == ".xfd" or self.type == ".frm") self.isRss = (self.type == ".rss" or self.url.endswith(".rss.xml")) self.isInstalledTaxonomyPackage = False self.isOpen = False self.fs = None self.selection = None self.filesDir = None self.referencedFileSources = {} # archive file name, fileSource object self.mappedPaths = None # remappings of path segments may be loaded by taxonomyPackage manifest # for SEC xml files, check if it's an EIS anyway if (not (self.isZip or self.isEis or self.isXfd or self.isRss) and self.type == ".xml"): if os.path.split(self.url)[-1] in TAXONOMY_PACKAGE_FILE_NAMES: self.isInstalledTaxonomyPackage = True elif checkIfXmlIsEis: try: file = open(self.cntlr.webCache.getfilename(self.url), 'r', errors='replace') l = file.read(256) # may have comments before first element file.close() if re.match(r"\s*(<[?]xml[^?]+[?]>)?\s*(<!--.*-->\s*)*<(cor[a-z]*:|sdf:)?edgarSubmission", l): self.isEis = True except EnvironmentError as err: if self.cntlr: self.cntlr.addToLog(_("[{0}] {1}").format(type(err).__name__, err)) pass
def createFacts(facts, parent): for fact in facts: if fact.isItem: attrs = {"contextRef": fact.contextID} if fact.id: attrs["id"] = fact.id if fact.isNumeric: attrs["unitRef"] = fact.unitID if fact.get("decimals"): attrs["decimals"] = fact.get("decimals") if fact.get("precision"): attrs["precision"] = fact.get("precision") if fact.isNil: attrs[XbrlConst.qnXsiNil] = "true" text = None else: text = fact.xValue if fact.xValid else fact.textValue newFact = targetInstance.createFact(fact.qname, attributes=attrs, text=text, parent=parent) newFactForOldObjId[fact.objectIndex] = newFact if filingFiles and fact.concept is not None and fact.concept.isTextBlock: # check for img and other filing references for xmltext in [text] + CDATApattern.findall(text): try: for elt in XML( "<body>\n{0}\n</body>\n".format(xmltext)): if elt.tag in ("a", "img") and not isHttpUrl( attrValue) and not os.path.isabs( attrvalue): for attrTag, attrValue in elt.items(): if attrTag in ("href", "src"): filingFiles.add(attrValue) except (XMLSyntaxError, UnicodeDecodeError): pass elif fact.isTuple: newTuple = targetInstance.createFact(fact.qname, parent=parent) newFactForOldObjId[fact.objectIndex] = newTuple createFacts(fact.modelTupleFacts, newTuple)
def __init__(self, url, cntlr=None, checkIfXmlIsEis=False): self.url = str(url) # allow either string or FileNamedStringIO self.baseIsHttp = isHttpUrl(self.url) self.cntlr = cntlr self.type = self.url.lower()[-7:] self.isTarGz = self.type == ".tar.gz" if not self.isTarGz: self.type = self.type[3:] self.isZip = self.type == ".zip" self.isEis = self.type == ".eis" self.isXfd = (self.type == ".xfd" or self.type == ".frm") self.isRss = (self.type == ".rss" or self.url.endswith(".rss.xml")) self.isInstalledTaxonomyPackage = False self.isOpen = False self.fs = None self.selection = None self.filesDir = None self.referencedFileSources = {} # archive file name, fileSource object self.mappedPaths = None # remappings of path segments may be loaded by taxonomyPackage manifest self.reportName = None # used when selecting an entry from a taxonomy package # for SEC xml files, check if it's an EIS anyway if (not (self.isZip or self.isEis or self.isXfd or self.isRss) and self.type == ".xml"): if os.path.split(self.url)[-1] in TAXONOMY_PACKAGE_FILE_NAMES: self.isInstalledTaxonomyPackage = True elif checkIfXmlIsEis: try: file = open(self.cntlr.webCache.getfilename(self.url), 'r', errors='replace') l = file.read(128) file.close() if re.match(r"\s*(<[?]xml[^?]+[?]>)?\s*<cor[a-z]*:edgarSubmission", l): self.isEis = True except EnvironmentError as err: if self.cntlr: self.cntlr.addToLog(_("[{0}] {1}").format(type(err).__name__, err)) pass
def open(self, reloadCache=False): if not self.isOpen: if (self.isZip or self.isTarGz or self.isEis or self.isXfd or self.isRss or self.isInstalledTaxonomyPackage) and self.cntlr: self.basefile = self.cntlr.webCache.getfilename( self.url, reload=reloadCache) else: self.basefile = self.url self.baseurl = self.url # url gets changed by selection if not self.basefile: return # an error should have been logged if self.isZip: try: self.fs = zipfile.ZipFile(openFileStream( self.cntlr, self.basefile, 'rb'), mode="r") self.isOpen = True except EnvironmentError as err: self.logError(err) pass elif self.isTarGz: try: self.fs = tarfile.open(self.basefile, "r:gz") self.isOpen = True except EnvironmentError as err: self.logError(err) pass elif self.isEis: # check first line of file buf = b'' try: file = open(self.basefile, 'rb') more = True while more: l = file.read(8) if len(l) < 8: break if len(buf) == 0 and l.startswith( b"<?xml "): # not compressed buf = l + file.read() # not compressed break compressedBytes = file.read( struct.unpack(">L", l[0:4])[0]) if len(compressedBytes) <= 0: break buf += zlib.decompress(compressedBytes) file.close() except EnvironmentError as err: self.logError(err) pass #uncomment to save for debugging #with open("c:/temp/test.xml", "wb") as f: # f.write(buf) if buf.startswith(b"<?xml "): try: # must strip encoding str = buf.decode(XmlUtil.encoding(buf)) endEncoding = str.index("?>", 0, 128) if endEncoding > 0: str = str[endEncoding + 2:] file = io.StringIO(initial_value=str) parser = etree.XMLParser(recover=True, huge_tree=True) self.eisDocument = etree.parse(file, parser=parser) file.close() self.isOpen = True except EnvironmentError as err: self.logError(err) return # provide error message later except etree.LxmlError as err: self.logError(err) return # provide error message later elif self.isXfd: # check first line of file file = open(self.basefile, 'rb') firstline = file.readline() if firstline.startswith( b"application/x-xfdl;content-encoding=\"asc-gzip\""): # file has been gzipped base64input = file.read(-1) file.close() file = None fb = base64.b64decode(base64input) ungzippedBytes = b"" totalLenUncompr = 0 i = 0 while i < len(fb): lenCompr = fb[i + 0] * 256 + fb[i + 1] lenUncomp = fb[i + 2] * 256 + fb[i + 3] lenRead = 0 totalLenUncompr += lenUncomp gzchunk = (bytes((31, 139, 8, 0)) + fb[i:i + lenCompr]) try: with gzip.GzipFile( fileobj=io.BytesIO(gzchunk)) as gf: while True: readSize = min(16384, lenUncomp - lenRead) readBytes = gf.read(size=readSize) lenRead += len(readBytes) ungzippedBytes += readBytes if len(readBytes) == 0 or (lenUncomp - lenRead) <= 0: break except IOError as err: pass # provide error message later i += lenCompr + 4 #for learning the content of xfd file, uncomment this: #with open("c:\\temp\\test.xml", "wb") as fh: # fh.write(ungzippedBytes) file = io.StringIO( initial_value=ungzippedBytes.decode("utf-8")) else: # position to start of file file.seek(0, io.SEEK_SET) try: self.xfdDocument = etree.parse(file) file.close() self.isOpen = True except EnvironmentError as err: self.logError(err) return # provide error message later except etree.LxmlError as err: self.logError(err) return # provide error message later elif self.isRss: try: self.rssDocument = etree.parse(self.basefile) self.isOpen = True except EnvironmentError as err: self.logError(err) return # provide error message later except etree.LxmlError as err: self.logError(err) return # provide error message later elif self.isInstalledTaxonomyPackage: self.isOpen = True # load mappings try: metadataFiles = self.taxonomyPackageMetadataFiles if len(metadataFiles) != 1: raise IOError( _("Taxonomy package must contain one and only one metadata file: {0}." ).format(', '.join(metadataFiles))) # HF: this won't work, see DialogOpenArchive for correct code # not sure if it is used taxonomyPackage = PackageManager.parsePackage( self.cntlr, self.url) fileSourceDir = os.path.dirname(self.baseurl) + os.sep self.mappedPaths = \ dict((prefix, remapping if isHttpUrl(remapping) else (fileSourceDir + remapping.replace("/", os.sep))) for prefix, remapping in taxonomyPackage["remappings"].items()) except EnvironmentError as err: self.logError(err) return # provide error message later
def validateXbrlFinally(val, *args, **kwargs): if not (val.validateESEFplugin): return _xhtmlNs = "{{{}}}".format(xhtml) _xhtmlNsLen = len(_xhtmlNs) modelXbrl = val.modelXbrl modelDocument = modelXbrl.modelDocument _statusMsg = _("validating {0} filing rules").format( val.disclosureSystem.name) modelXbrl.profileActivity() modelXbrl.modelManager.showStatus(_statusMsg) reportXmlLang = None firstRootmostXmlLangDepth = 9999999 _ifrsNs = None for targetNs in modelXbrl.namespaceDocs.keys(): if ifrsNsPattern.match(targetNs): _ifrsNs = targetNs if not _ifrsNs: modelXbrl.error("ESEF.RTS.ifrsRequired", _("RTS on ESEF requires IFRS taxonomy."), modelObject=modelXbrl) return esefPrimaryStatementPlaceholders = set( qname(_ifrsNs, n) for n in esefPrimaryStatementPlaceholderNames) esefMandatoryElements2020 = set( qname(_ifrsNs, n) for n in esefMandatoryElementNames2020) if modelDocument.type == ModelDocument.Type.INSTANCE: modelXbrl.error("ESEF.I.1.instanceShallBeInlineXBRL", _("RTS on ESEF requires inline XBRL instances."), modelObject=modelXbrl) checkFilingDimensions( val) # sets up val.primaryItems and val.domainMembers val.hasExtensionSchema = val.hasExtensionPre = val.hasExtensionCal = val.hasExtensionDef = val.hasExtensionLbl = False checkFilingDTS(val, modelXbrl.modelDocument, []) modelXbrl.profileActivity("... filer DTS checks", minTimeToShow=1.0) if not (val.hasExtensionSchema and val.hasExtensionPre and val.hasExtensionCal and val.hasExtensionDef and val.hasExtensionLbl): missingFiles = [] if not val.hasExtensionSchema: missingFiles.append("schema file") if not val.hasExtensionPre: missingFiles.append("presentation linkbase") if not val.hasExtensionCal: missingFiles.append("calculation linkbase") if not val.hasExtensionDef: missingFiles.append("definition linkbase") if not val.hasExtensionLbl: missingFiles.append("label linkbase") modelXbrl.warning( "ESEF.3.1.1.extensionTaxonomyWrongFilesStructure", _("Extension taxonomies MUST consist of at least a schema file and presentation, calculation, definition and label linkbases" ": missing %(missingFiles)s"), modelObject=modelXbrl, missingFiles=", ".join(missingFiles)) #if modelDocument.type == ModelDocument.Type.INLINEXBRLDOCUMENTSET: # # reports only under reports, none elsewhere # modelXbrl.fileSource.dir if modelDocument.type in (ModelDocument.Type.INLINEXBRL, ModelDocument.Type.INLINEXBRLDOCUMENTSET, ModelDocument.Type.INSTANCE): footnotesRelationshipSet = modelXbrl.relationshipSet("XBRL-footnotes") orphanedFootnotes = set() noLangFootnotes = set() factLangFootnotes = defaultdict(set) footnoteRoleErrors = set() transformRegistryErrors = set() def checkFootnote(elt, text): if text: # non-empty footnote must be linked to a fact if not empty if not any( isinstance(rel.fromModelObject, ModelFact) for rel in footnotesRelationshipSet.toModelObject(elt)): orphanedFootnotes.add(elt) lang = elt.xmlLang if not lang: noLangFootnotes.add(elt) else: for rel in footnotesRelationshipSet.toModelObject(elt): if rel.fromModelObject is not None: factLangFootnotes[rel.fromModelObject].add(lang) if elt.role != XbrlConst.footnote or not all( rel.arcrole == XbrlConst.factFootnote and rel.linkrole == XbrlConst.defaultLinkRole for rel in footnotesRelationshipSet.toModelObject(elt)): footnoteRoleErrors.add(elt) # check file name of each inline document (which might be below a top-level IXDS) for doc in modelXbrl.urlDocs.values(): if doc.type == ModelDocument.Type.INLINEXBRL: _baseName, _baseExt = os.path.splitext(doc.basename) if _baseExt not in (".xhtml", ".html"): modelXbrl.warning( "ESEF.RTS.Art.3.fileNameExtension", _("FileName SHOULD have the extension .xhtml or .html: %(fileName)s" ), modelObject=doc, fileName=doc.basename) docinfo = doc.xmlRootElement.getroottree().docinfo if " html" in docinfo.doctype: modelXbrl.warning( "ESEF.RTS.Art.3.htmlDoctype", _("Doctype SHOULD NOT be html: %(fileName)s"), modelObject=doc, fileName=doc.basename) if modelDocument.type in (ModelDocument.Type.INLINEXBRL, ModelDocument.Type.INLINEXBRLDOCUMENTSET): hiddenEltIds = {} presentedHiddenEltIds = defaultdict(list) eligibleForTransformHiddenFacts = [] requiredToDisplayFacts = [] requiredToDisplayFactIds = {} firstIxdsDoc = True for ixdsHtmlRootElt in modelXbrl.ixdsHtmlElements: # ix root elements for all ix docs in IXDS ixNStag = ixdsHtmlRootElt.modelDocument.ixNStag ixTags = set(ixNStag + ln for ln in ("nonNumeric", "nonFraction", "references", "relationship")) ixTextTags = set(ixNStag + ln for ln in ("nonFraction", "continuation", "footnote")) ixExcludeTag = ixNStag + "exclude" ixTupleTag = ixNStag + "tuple" ixFractionTag = ixNStag + "fraction" for elt, depth in etreeIterWithDepth(ixdsHtmlRootElt): eltTag = elt.tag if isinstance( elt, (_ElementTree, _Comment, _ProcessingInstruction)): continue # comment or other non-parsed element else: eltTag = elt.tag if eltTag.startswith(_xhtmlNs): eltTag = eltTag[_xhtmlNsLen:] if firstIxdsDoc and (not reportXmlLang or depth < firstRootmostXmlLangDepth): xmlLang = elt.get( "{http://www.w3.org/XML/1998/namespace}lang" ) if xmlLang: reportXmlLang = xmlLang firstRootmostXmlLangDepth = depth if ((eltTag in ("object", "script")) or (eltTag == "a" and "javascript:" in elt.get("href", "")) or (eltTag == "img" and "javascript:" in elt.get("src", ""))): modelXbrl.error( "ESEF.2.5.1.executableCodePresent", _("Inline XBRL documents MUST NOT contain executable code: %(element)s" ), modelObject=elt, element=eltTag) elif eltTag == "img": src = elt.get("src", "").strip() hasParentIxTextTag = False # check if image is in an ix text-bearing element _ancestorElt = elt while (_ancestorElt is not None): if _ancestorElt.tag == ixExcludeTag: # excluded from any parent text-bearing ix element break if _ancestorElt.tag in ixTextTags: hasParentIxTextTag = True break _ancestorElt = _ancestorElt.getparent() if scheme(src) in ("http", "https", "ftp"): modelXbrl.error( "ESEF.3.5.1.inlinXbrlContainsExternalReferences", _("Inline XBRL instance documents MUST NOT contain any reference pointing to resources outside the reporting package: %(element)s" ), modelObject=elt, element=eltTag) elif not src.startswith("data:image"): if hasParentIxTextTag: modelXbrl.error( "ESEF.2.5.1.imageInIXbrlElementNotEmbedded", _("Images appearing within an inline XBRL element MUST be embedded regardless of their size." ), modelObject=elt) else: # presume it to be an image file, check image contents try: base = elt.modelDocument.baseForElement( elt) normalizedUri = elt.modelXbrl.modelManager.cntlr.webCache.normalizeUrl( src, base) if not elt.modelXbrl.fileSource.isInArchive( normalizedUri): normalizedUri = elt.modelXbrl.modelManager.cntlr.webCache.getfilename( normalizedUri) imglen = 0 with elt.modelXbrl.fileSource.file( normalizedUri, binary=True)[0] as fh: imglen += len(fh.read()) if imglen < browserMaxBase64ImageLength: modelXbrl.error( "ESEF.2.5.1.embeddedImageNotUsingBase64Encoding", _("Images MUST be included in the XHTML document as a base64 encoded string unless their size exceeds support of browsers (%(maxImageSize)s): %(file)s." ), modelObject=elt, maxImageSize= browserMaxBase64ImageLength, file=os.path.basename( normalizedUri)) except IOError as err: modelXbrl.error( "ESEF.2.5.1.imageFileCannotBeLoaded", _("Image file which isn't openable '%(src)s', error: %(error)s" ), modelObject=elt, src=src, error=err) elif not any( src.startswith(m) for m in allowedImgMimeTypes): modelXbrl.error( "ESEF.2.5.1.embeddedImageNotUsingBase64Encoding", _("Images MUST be included in the XHTML document as a base64 encoded string, encoding disallowed: %(src)s." ), modelObject=elt, src=attrValue[:128]) elif eltTag == "a": href = elt.get("href", "").strip() if scheme(href) in ("http", "https", "ftp"): modelXbrl.error( "ESEF.3.5.1.inlinXbrlContainsExternalReferences", _("Inline XBRL instance documents MUST NOT contain any reference pointing to resources outside the reporting package: %(element)s" ), modelObject=elt, element=eltTag) elif eltTag == "base" or elt.tag == "{http://www.w3.org/XML/1998/namespace}base": modelXbrl.error( "ESEF.2.4.2.htmlOrXmlBaseUsed", _("The HTML <base> elements and xml:base attributes MUST NOT be used in the Inline XBRL document." ), modelObject=elt, element=eltTag) elif eltTag == "link" and elt.get( "type") == "text/css": if len(modelXbrl.ixdsHtmlElements) > 1: f = elt.get("href") if not f or isHttpUrl(f) or os.path.isabs(f): modelXbrl.warning( "ESEF.2.5.4.externalCssReportPackage", _("The CSS file should be physically stored within the report package: %{file}s." ), modelObject=elt, file=f) else: modelXbrl.error( "ESEF.2.5.4.externalCssFileForSingleIXbrlDocument", _("Where an Inline XBRL document set contains a single document, the CSS MUST be embedded within the document." ), modelObject=elt, element=eltTag) elif eltTag == "style" and elt.get( "type") == "text/css": if len(modelXbrl.ixdsHtmlElements) > 1: modelXbrl.warning( "ESEF.2.5.4.embeddedCssForMultiHtmlIXbrlDocumentSets", _("Where an Inline XBRL document set contains multiple documents, the CSS SHOULD be defined in a separate file." ), modelObject=elt, element=eltTag) if eltTag in ixTags and elt.get("target"): modelXbrl.error( "ESEF.2.5.3.targetAttributeUsed", _("Target attribute MUST not be used: element %(localName)s, target attribute %(target)s." ), modelObject=elt, localName=elt.elementQname, target=elt.get("target")) if eltTag == ixTupleTag: modelXbrl.error( "ESEF.2.4.1.tupleElementUsed", _("The ix:tuple element MUST not be used in the Inline XBRL document: %(qname)s." ), modelObject=elt, qname=elt.qname) if eltTag == ixFractionTag: modelXbrl.error( "ESEF.2.4.1.fractionElementUsed", _("The ix:fraction element MUST not be used in the Inline XBRL document." ), modelObject=elt) if elt.get("{http://www.w3.org/XML/1998/namespace}base" ) is not None: modelXbrl.error( "ESEF.2.4.1.xmlBaseUsed", _("xml:base attributes MUST NOT be used in the Inline XBRL document: element %(localName)s, base attribute %(base)s." ), modelObject=elt, localName=elt.elementQname, base=elt.get( "{http://www.w3.org/XML/1998/namespace}base")) if isinstance(elt, ModelInlineFootnote): checkFootnote(elt, elt.value) elif isinstance( elt, ModelResource ) and elt.qname == XbrlConst.qnLinkFootnote: checkFootnote(elt, elt.value) elif isinstance(elt, ModelInlineFact): if elt.format is not None and elt.format.namespaceURI not in IXT_NAMESPACES: transformRegistryErrors.add(elt) for ixHiddenElt in ixdsHtmlRootElt.iterdescendants( tag=ixNStag + "hidden"): for tag in (ixNStag + "nonNumeric", ixNStag + "nonFraction"): for ixElt in ixHiddenElt.iterdescendants(tag=tag): if ( getattr(ixElt, "xValid", 0) >= VALID # may not be validated ): # add future "and" conditions on elements which can be in hidden if (ixElt.concept.baseXsdType not in untransformableTypes and not ixElt.isNil): eligibleForTransformHiddenFacts.append( ixElt) elif ixElt.id is None: requiredToDisplayFacts.append(ixElt) if ixElt.id: hiddenEltIds[ixElt.id] = ixElt firstIxdsDoc = False if eligibleForTransformHiddenFacts: modelXbrl.warning( "ESEF.2.4.1.transformableElementIncludedInHiddenSection", _("The ix:hidden section of Inline XBRL document MUST not include elements eligible for transformation. " "%(countEligible)s fact(s) were eligible for transformation: %(elements)s" ), modelObject=eligibleForTransformHiddenFacts, countEligible=len(eligibleForTransformHiddenFacts), elements=", ".join( sorted( set( str(f.qname) for f in eligibleForTransformHiddenFacts)))) for ixdsHtmlRootElt in modelXbrl.ixdsHtmlElements: for ixElt in ixdsHtmlRootElt.getroottree().iterfind( "//{http://www.w3.org/1999/xhtml}*[@style]"): hiddenFactRefMatch = styleIxHiddenPattern.match( ixElt.get("style", "")) if hiddenFactRefMatch: hiddenFactRef = hiddenFactRefMatch.group(2) if hiddenFactRef not in hiddenEltIds: modelXbrl.error( "ESEF.2.4.1.esefIxHiddenStyleNotLinkingFactInHiddenSection", _("\"-esef-ix-hidden\" style identifies @id, %(id)s of a fact that is not in ix:hidden section." ), modelObject=ixElt, id=hiddenFactRef) else: presentedHiddenEltIds[hiddenFactRef].append(ixElt) for hiddenEltId, ixElt in hiddenEltIds.items(): if (hiddenEltId not in presentedHiddenEltIds and getattr(ixElt, "xValid", 0) >= VALID and # may not be validated (ixElt.concept.baseXsdType in untransformableTypes or ixElt.isNil)): requiredToDisplayFacts.append(ixElt) if requiredToDisplayFacts: modelXbrl.warning( "ESEF.2.4.1.factInHiddenSectionNotInReport", _("The ix:hidden section contains %(countUnreferenced)s fact(s) whose @id is not applied on any \"-esef-ix- hidden\" style: %(elements)s" ), modelObject=requiredToDisplayFacts, countUnreferenced=len(requiredToDisplayFacts), elements=", ".join( sorted( set(str(f.qname) for f in requiredToDisplayFacts)))) del eligibleForTransformHiddenFacts, hiddenEltIds, presentedHiddenEltIds, requiredToDisplayFacts elif modelDocument.type == ModelDocument.Type.INSTANCE: for elt in modelDocument.xmlRootElement.iter(): if elt.qname == XbrlConst.qnLinkFootnote: # for now assume no private elements extend link:footnote checkFootnote(elt, elt.stringValue) contextsWithDisallowedOCEs = [] contextsWithDisallowedOCEcontent = [] contextsWithPeriodTime = [] contextsWithPeriodTimeZone = [] contextIdentifiers = defaultdict(list) nonStandardTypedDimensions = defaultdict(set) for context in modelXbrl.contexts.values(): for elt in context.iterdescendants( "{http://www.xbrl.org/2003/instance}startDate", "{http://www.xbrl.org/2003/instance}endDate", "{http://www.xbrl.org/2003/instance}instant"): m = datetimePattern.match(elt.stringValue) if m: if m.group(1): contextsWithPeriodTime.append(context) if m.group(3): contextsWithPeriodTimeZone.append(context) for elt in context.iterdescendants( "{http://www.xbrl.org/2003/instance}segment"): contextsWithDisallowedOCEs.append(context) break for elt in context.iterdescendants( "{http://www.xbrl.org/2003/instance}scenario"): if isinstance(elt, ModelObject): if any(True for child in elt.iterchildren() if isinstance(child, ModelObject) and child.tag not in ( "{http://xbrl.org/2006/xbrldi}explicitMember", "{http://xbrl.org/2006/xbrldi}typedMember")): contextsWithDisallowedOCEcontent.append(context) # check periods here contextIdentifiers[context.entityIdentifier].append(context) if contextsWithDisallowedOCEs: modelXbrl.error( "ESEF.2.1.3.segmentUsed", _("xbrli:segment container MUST NOT be used in contexts: %(contextIds)s" ), modelObject=contextsWithDisallowedOCEs, contextIds=", ".join(c.id for c in contextsWithDisallowedOCEs)) if contextsWithDisallowedOCEcontent: modelXbrl.error( "ESEF.2.1.3.scenarioContainsNonDimensionalContent", _("xbrli:scenario in contexts MUST NOT contain any other content than defined in XBRL Dimensions specification: %(contextIds)s" ), modelObject=contextsWithDisallowedOCEcontent, contextIds=", ".join( c.id for c in contextsWithDisallowedOCEcontent)) if len(contextIdentifiers) > 1: modelXbrl.error( "ESEF.2.1.4.multipleIdentifiers", _("All entity identifiers in contexts MUST have identical content: %(contextIdentifiers)s" ), modelObject=modelXbrl, contextIds=", ".join(i[1] for i in contextIdentifiers)) for (contextScheme, contextIdentifier), contextElts in contextIdentifiers.items(): if contextScheme != iso17442: modelXbrl.warning( "ESEF.2.1.1.nonLEIContextScheme", _("The scheme attribute of the xbrli:identifier element should have \"%(leiScheme)s\" as its content: %(contextScheme)s" ), modelObject=contextElts, contextScheme=contextScheme, leiScheme=iso17442) else: leiValidity = LeiUtil.checkLei(contextIdentifier) if leiValidity == LeiUtil.LEI_INVALID_LEXICAL: modelXbrl.warning( "ESEF.2.1.1.invalidIdentifierFormat", _("The LEI context identifier has an invalid format: %(identifier)s" ), modelObject=contextElts, identifier=contextIdentifier) elif leiValidity == LeiUtil.LEI_INVALID_CHECKSUM: modelXbrl.warning( "ESEF.2.1.1.invalidIdentifier", _("The LEI context identifier has checksum error: %(identifier)s" ), modelObject=contextElts, identifier=contextIdentifier) if contextsWithPeriodTime: modelXbrl.warning( "ESEF.2.1.2.periodWithTimeContent", _("Context period startDate, endDate and instant elements should be in whole days without time: %(contextIds)s" ), modelObject=contextsWithPeriodTime, contextIds=", ".join(c.id for c in contextsWithPeriodTime)) if contextsWithPeriodTimeZone: modelXbrl.warning( "ESEF.2.1.2.periodWithTimeZone", _("Context period startDate, endDate and instant elements should be in whole days without a timezone: %(contextIds)s" ), modelObject=contextsWithPeriodTimeZone, contextIds=", ".join(c.id for c in contextsWithPeriodTimeZone)) # identify unique contexts and units mapContext = {} mapUnit = {} uniqueContextHashes = {} for context in modelXbrl.contexts.values(): h = context.contextDimAwareHash if h in uniqueContextHashes: if context.isEqualTo(uniqueContextHashes[h]): mapContext[context] = uniqueContextHashes[h] else: uniqueContextHashes[h] = context del uniqueContextHashes uniqueUnitHashes = {} utrValidator = ValidateUtr(modelXbrl) utrUnitIds = set( u.unitId for unitItemType in utrValidator.utrItemTypeEntries.values() for u in unitItemType.values()) for unit in modelXbrl.units.values(): h = unit.hash if h in uniqueUnitHashes: if unit.isEqualTo(uniqueUnitHashes[h]): mapUnit[unit] = uniqueUnitHashes[h] else: uniqueUnitHashes[h] = unit # check if any custom measure is in UTR for measureTerm in unit.measures: for measure in measureTerm: ns = measure.namespaceURI if ns != XbrlConst.iso4217 and not ns.startswith( "http://www.xbrl.org/"): if measure.localName in utrUnitIds: modelXbrl.error( "ESEF.RTS.III.1.G1-7-1.customUnitInUtr", _("Custom measure SHOULD NOT duplicate a UnitID of UTR: %(measure)s" ), modelObject=unit, measure=measure) del uniqueUnitHashes reportedMandatory = set() precisionFacts = set() numFactsByConceptContextUnit = defaultdict(list) textFactsByConceptContext = defaultdict(list) footnotesRelationshipSet = modelXbrl.relationshipSet( XbrlConst.factFootnote, XbrlConst.defaultLinkRole) noLangFacts = [] textFactsMissingReportLang = [] conceptsUsed = set() for qn, facts in modelXbrl.factsByQname.items(): if qn in mandatory: reportedMandatory.add(qn) for f in facts: if f.precision is not None: precisionFacts.add(f) if f.isNumeric: numFactsByConceptContextUnit[(f.qname, mapContext.get( f.context, f.context), mapUnit.get( f.unit, f.unit))].append(f) if f.concept is not None and not f.isNil and f.xValid >= VALID and f.xValue > 1 and f.concept.type is not None and ( f.concept.type.qname == PERCENT_TYPE or f.concept.type.isDerivedFrom(PERCENT_TYPE)): modelXbrl.warning( "ESEF.2.2.2.percentGreaterThan100", _("A percent fact should have value <= 100: %(element)s in context %(context)s value %(value)s" ), modelObject=f, element=f.qname, context=f.context.id, value=f.xValue) elif f.concept is not None and f.concept.type is not None: if f.concept.type.isOimTextFactType: if not f.xmlLang: noLangFacts.append(f) elif f.context is not None: textFactsByConceptContext[( f.qname, mapContext.get(f.context, f.context))].append(f) conceptsUsed.add(f.concept) if f.context is not None: for dim in f.context.qnameDims.values(): conceptsUsed.add(dim.dimension) if dim.isExplicit: conceptsUsed.add(dim.member) elif dim.isTyped: conceptsUsed.add(dim.typedMember) if noLangFacts: modelXbrl.error( "ESEF.2.5.2.undefinedLanguageForTextFact", _("Each tagged text fact MUST have the 'xml:lang' attribute assigned or inherited." ), modelObject=noLangFacts) # missing report lang text facts if reportXmlLang: for fList in textFactsByConceptContext.values(): if not any(f.xmlLang == reportXmlLang for f in fList): modelXbrl.error( "ESEF.2.5.2.taggedTextFactOnlyInLanguagesOtherThanLanguageOfAReport", _("Each tagged text fact MUST have the 'xml:lang' provided in at least the language of the report: %(element)s" ), modelObject=fList, element=fList[0].qname) # 2.2.4 test for fList in numFactsByConceptContextUnit.values(): if len(fList) > 1: f0 = fList[0] if any(f.isNil for f in fList): _inConsistent = not all(f.isNil for f in fList) elif all( inferredDecimals(f) == inferredDecimals(f0) for f in fList[1:]): # same decimals v0 = rangeValue(f0.value) _inConsistent = not all( rangeValue(f.value) == v0 for f in fList[1:]) else: # not all have same decimals aMax, bMin = rangeValue(f0.value, inferredDecimals(f0)) for f in fList[1:]: a, b = rangeValue(f.value, inferredDecimals(f)) if a > aMax: aMax = a if b < bMin: bMin = b _inConsistent = (bMin < aMax) if _inConsistent: modelXbrl.error( ("ESEF.2.2.4.inconsistentDuplicateNumericFactInInlineXbrlDocument" ), "Inconsistent duplicate numeric facts MUST NOT appear in the content of an inline XBRL document. %(fact)s that was used more than once in contexts equivalent to %(contextID)s: values %(values)s. ", modelObject=fList, fact=f0.qname, contextID=f0.contextID, values=", ".join( strTruncate(f.value, 128) for f in fList)) if precisionFacts: modelXbrl.warning( "ESEF.2.2.1.precisionAttributeUsed", _("The accuracy of numeric facts SHOULD be defined with the 'decimals' attribute rather than the 'precision' attribute: %(elements)s." ), modelObject=precisionFacts, elements=", ".join(sorted( str(e.qname) for e in precisionFacts))) missingElements = (mandatory - reportedMandatory) if missingElements: modelXbrl.error( "ESEF.???.missingRequiredElements", _("Required elements missing from document: %(elements)s."), modelObject=modelXbrl, elements=", ".join(sorted(str(qn) for qn in missingElements))) if transformRegistryErrors: modelXbrl.warning( "ESEF.2.2.3.transformRegistry", _("ESMA recommends applying the latest available version of the Transformation Rules Registry marked with 'Recommendation' status for these elements: %(elements)s." ), modelObject=transformRegistryErrors, elements=", ".join( sorted( str(fact.qname) for fact in transformRegistryErrors))) if orphanedFootnotes: modelXbrl.error( "ESEF.2.3.1.unusedFootnote", _("Non-empty footnotes must be connected to fact(s)."), modelObject=orphanedFootnotes) if noLangFootnotes: modelXbrl.error( "ESEF.2.3.1.undefinedLanguageForFootnote", _("Each footnote MUST have the 'xml:lang' attribute whose value corresponds to the language of the text in the content of the respective footnote." ), modelObject=noLangFootnotes) nonDefLangFtFacts = set(f for f, langs in factLangFootnotes.items() if reportXmlLang not in langs) if nonDefLangFtFacts: modelXbrl.error( "ESEF.2.3.1.footnoteOnlyInLanguagesOtherThanLanguageOfAReport", _("Each fact MUST have at least one footnote with 'xml:lang' attribute whose value corresponds to the language of the text in the content of the respective footnote: %(qnames)s." ), modelObject=nonDefLangFtFacts, qnames=", ".join( sorted(str(f.qname) for f in nonDefLangFtFacts))) del nonDefLangFtFacts if footnoteRoleErrors: modelXbrl.error( "ESEF.2.3.1.nonStandardRoleForFootnote", _("The xlink:role attribute of a link:footnote and link:footnoteLink element as well as xlink:arcrole attribute of a link:footnoteArc MUST be defined in the XBRL Specification 2.1." ), modelObject=footnoteRoleErrors) nonStdFootnoteElts = list() for modelLink in modelXbrl.baseSets[("XBRL-footnotes", None, None, None)]: for elt in modelLink.iterchildren(): if isinstance( elt, (_ElementTree, _Comment, _ProcessingInstruction)): continue # comment or other non-parsed element if elt.qname not in FOOTNOTE_LINK_CHILDREN: nonStdFootnoteElts.append(elt) if nonStdFootnoteElts: modelXbrl.error( "ESEF.2.3.2.nonStandardElementInFootnote", _("A link:footnoteLink element MUST have no children other than link:loc, link:footnote, and link:footnoteArc." ), modelObject=nonStdFootnoteElts) for qn in modelXbrl.qnameDimensionDefaults.values(): conceptsUsed.add(modelXbrl.qnameConcepts.get(qn)) # unused elements in linkbases for arcroles, err in ( ((parentChild, ), "elementsNotUsedForTaggingAppliedInPresentationLinkbase"), ((summationItem, ), "elementsNotUsedForTaggingAppliedInCalculationLinkbase"), ((dimensionDomain, domainMember), "elementsNotUsedForTaggingAppliedInDefinitionLinkbase")): unreportedLbElts = set() for arcrole in arcroles: for rel in modelXbrl.relationshipSet( arcrole).modelRelationships: fr = rel.fromModelObject to = rel.toModelObject if arcrole in (parentChild, summationItem): if fr is not None and not fr.isAbstract and fr not in conceptsUsed and isExtension( val, rel): unreportedLbElts.add(fr) if to is not None and not to.isAbstract and to not in conceptsUsed and isExtension( val, rel): unreportedLbElts.add(to) elif arcrole == dimensionDomain: # dimension, always abstract if fr is not None and fr not in conceptsUsed and isExtension( val, rel): unreportedLbElts.add(fr) if to is not None and rel.isUsable and to not in conceptsUsed and isExtension( val, rel): unreportedLbElts.add(to) elif arcrole == domainMember: if to is not None and not fr.isAbstract and rel.isUsable and to not in conceptsUsed and isExtension( val, rel): unreportedLbElts.add(to) if unreportedLbElts: modelXbrl.error( "ESEF.3.4.6." + err, _("All usable concepts in extension taxonomy relationships MUST be applied by tagged facts: %(elements)s." ), modelObject=unreportedLbElts, elements=", ".join( sorted((str(c.qname) for c in unreportedLbElts)))) # 3.4.4 check for presentation preferred labels missingConceptLabels = defaultdict(set) # by role pfsConceptsRootInPreLB = set() def checkLabels(parent, relSet, labelrole, visited): if not parent.label( labelrole, lang=reportXmlLang, fallbackToQname=False): if parent.name != "NotesAccountingPoliciesAndMandatoryTags": # TEMPORARY TBD remove missingConceptLabels[labelrole].add(parent) visited.add(parent) conceptRels = defaultdict( list) # counts for concepts without preferred label role for rel in relSet.fromModelObject(parent): child = rel.toModelObject if child is not None: labelrole = rel.preferredLabel if not labelrole: conceptRels[child].append(rel) if child not in visited: checkLabels(child, relSet, labelrole, visited) for concept, rels in conceptRels.items(): if len(rels) > 1: modelXbrl.warning( "ESEF.3.4.4.missingPreferredLabelRole", _("Preferred label role SHOULD be used when concept is duplicated in same presentation tree location: %(qname)s." ), modelObject=rels + [concept], qname=concept.qname) visited.remove(parent) for ELR in modelXbrl.relationshipSet(parentChild).linkRoleUris: relSet = modelXbrl.relationshipSet(parentChild, ELR) for rootConcept in relSet.rootConcepts: checkLabels(rootConcept, relSet, None, set()) # check for PFS element which isn't an orphan if rootConcept.qname in esefPrimaryStatementPlaceholders and relSet.fromModelObject( rootConcept): pfsConceptsRootInPreLB.add(rootConcept) for labelrole, concepts in missingConceptLabels.items(): modelXbrl.warning( "ESEF.3.4.5.missingLabelForRoleInReportLanguage", _("Label for %(role)s role SHOULD be available in report language for concepts: %(qnames)s." ), modelObject=concepts, qnames=", ".join(str(c.qname) for c in concepts), role=os.path.basename(labelrole) if labelrole else "standard") if not pfsConceptsRootInPreLB: # no PFS statements were recognized modelXbrl.error( "ESEF.RTS.Annex.II.Par.1.Par.7.missingPrimaryFinancialStatement", _("A primary financial statement placeholder element MUST be a root of a presentation linkbase tree." ), modelObject=modelXbrl) # dereference del missingConceptLabels, pfsConceptsRootInPreLB # mandatory factc RTS Annex II missingMandatoryElements = esefMandatoryElements2020 - modelXbrl.factsByQname.keys( ) if missingMandatoryElements: modelXbrl.error( "ESEF.RTS.Annex.II.Par.2.missingMandatoryMarkups", _("Mandatory elements to be marked up are missing: %(qnames)s." ), modelObject=missingMandatoryElements, qnames=", ".join( sorted(str(qn) for qn in missingMandatoryElements))) # duplicated core taxonomy elements for name, concepts in modelXbrl.nameConcepts.items(): if len(concepts) > 1: i = None # ifrs Concept for c in concepts: if c.qname.namespaceURI == _ifrsNs: i = c break if i is not None: for c in concepts: if c != i and c.balance == i.balance and c.periodType == i.periodType: modelXbrl.error( "ESEF.RTS.Annex.IV.Par.4.2.extensionElementDuplicatesCoreElement", _("Extension elements must not duplicate the existing elements from the core taxonomy and be identifiable %(qname)s." ), modelObject=(c, i), qname=c.qname) modelXbrl.profileActivity(_statusMsg, minTimeToShow=0.0) modelXbrl.modelManager.showStatus(None)
def packageInfo(URL, reload=False, packageManifestName=None): #TODO several directories, eg User Application Data packageFilename = _cntlr.webCache.getfilename(URL, reload=reload, normalize=True) if packageFilename: from arelle.FileSource import TAXONOMY_PACKAGE_FILE_NAMES filesource = None try: global openFileSource if openFileSource is None: from arelle.FileSource import openFileSource filesource = openFileSource(packageFilename, _cntlr) # allow multiple manifests [[metadata, prefix]...] for multiple catalogs packages = [] if filesource.isZip: if packageManifestName: packageFiles = [ fileName for fileName in filesource.dir if fnmatch(fileName, packageManifestName) ] else: packageFiles = filesource.taxonomyPackageMetadataFiles if len(packageFiles) < 1: raise IOError( _("Taxonomy package contained no metadata file: {0}."). format(', '.join(packageFiles))) for packageFile in packageFiles: packageFileUrl = filesource.file(filesource.url + os.sep + packageFile)[0] packageFilePrefix = os.sep.join( os.path.split(packageFile)[:-1]) if packageFilePrefix: packageFilePrefix += os.sep packageFilePrefix = filesource.baseurl + os.sep + packageFilePrefix packages.append([packageFileUrl, packageFilePrefix]) elif os.path.basename( filesource.url ) in TAXONOMY_PACKAGE_FILE_NAMES: # individual manifest file packageFile = packageFileUrl = filesource.url packageFilePrefix = os.sep.join( os.path.split(packageFile)[:-1]) if packageFilePrefix: packageFilePrefix += os.sep packages.append([packageFileUrl, packageFilePrefix]) else: raise IOError( _("File must be a taxonomy package (zip file), catalog file, or manifest (): {0}." ).format(packageFilename, ', '.join(TAXONOMY_PACKAGE_FILE_NAMES))) remappings = {} packageNames = [] descriptions = [] for packageFileUrl, packageFilePrefix in packages: parsedPackage = parsePackage(_cntlr, packageFileUrl) packageNames.append(parsedPackage['name']) if parsedPackage.get('description'): descriptions.append(parsedPackage['description']) for prefix, remapping in parsedPackage["remappings"].items(): remappings[prefix] = (remapping if isHttpUrl(remapping) else (packageFilePrefix + remapping.replace("/", os.sep))) package = { 'name': ", ".join(packageNames), 'status': 'enabled', 'version': parsedPackage['version'], 'fileDate': time.strftime('%Y-%m-%dT%H:%M:%S UTC', time.gmtime(os.path.getmtime(packageFilename))), 'URL': URL, 'manifestName': packageManifestName, 'description': "; ".join(descriptions), 'remappings': remappings, } filesource.close() return package except EnvironmentError: pass if filesource: filesource.close() return None
def ok(self, event=None): selection = self.treeView.selection() if len(selection) > 0: filename = None if self.openType in (ARCHIVE, DISCLOSURE_SYSTEM): if self.multiselect: filenames = [] for _selection in selection: filename = self.filenames[int(_selection[4:])] if isinstance(filename, tuple): if self.isRss: filename = filename[4] else: filename = filename[0] if self.multiselect: filenames.append(filename) else: break if self.multiselect and filenames: self.filesource.select(filenames) # array of file names self.accepted = True self.close() elif self.openType == ENTRY_POINTS: epName = selection[0] #index 0 is the remapped Url, as opposed to the canonical one used for display # Greg Acsone reports [0] does not work for Corep 1.6 pkgs, need [1], old style packages filenames = [] for _url, _type in self.packageContainedInstances: # check if selection was an inline instance if _type in selection: if _url in self.packageContainedIXDSes: # taxonomy package filenames.extend(self.packageContainedIXDSes[_url]) else: # single instance filenames.append(_url) if not filenames: # else if it's a named taxonomy entry point of an installed package for url in self.taxonomyPackage["entryPoints"][epName]: filename = url[1] # use unmapped file name if not filename.endswith("/"): # check if it's an absolute URL rather than a path into the archive if not isHttpUrl( filename ) and self.metadataFilePrefix != self.taxonomyPkgMetaInf: # assume it's a path inside the archive: filename = self.metadataFilePrefix + filename filenames.append(filename) if filenames: self.filesource.select(filenames) self.accepted = True self.close() return elif self.openType in (PLUGIN, PACKAGE): filename = self.filenames[int(selection[0][4:])][2] if filename is not None and not self.multiselect and not filename.endswith( "/"): if hasattr(self, "taxonomyPackage"): # attempt to unmap the filename to original file # will be mapped again in loading, but this allows schemaLocation to be unmapped for prefix, remapping in self.taxonomyPackage[ "remappings"].items(): if isHttpUrl(remapping): remapStart = remapping else: remapStart = self.metadataFilePrefix + remapping if filename.startswith(remapStart): # set unmmapped file filename = prefix + filename[len(remapStart):] break if (self.metadataFilePrefix.endswith("/META-INF/") and isHttpUrl(prefix) and filename.startswith( self.metadataFilePrefix[:-10]) and filename.startswith( remapping[len(self.filesource.url) + 1:])): # recover unmapped file name for chosen in-archive relative file filename = prefix + filename[ len(remapping) - len(self.filesource.url) - 1:] if self.openType in (PLUGIN, PACKAGE): self.filesource.selection = filename else: self.filesource.select(filename) self.accepted = True self.close()
def saveTargetDocument(modelXbrl, targetDocumentFilename, targetDocumentSchemaRefs, outputZip=None, filingFiles=None, *args, **kwargs): targetUrl = modelXbrl.modelManager.cntlr.webCache.normalizeUrl( targetDocumentFilename, modelXbrl.modelDocument.filepath) targetUrlParts = targetUrl.rpartition(".") targetUrl = targetUrlParts[0] + "_extracted." + targetUrlParts[2] modelXbrl.modelManager.showStatus( _("Extracting instance ") + os.path.basename(targetUrl)) targetInstance = ModelXbrl.create(modelXbrl.modelManager, newDocumentType=Type.INSTANCE, url=targetUrl, schemaRefs=targetDocumentSchemaRefs, isEntry=True) ValidateXbrlDimensions.loadDimensionDefaults( targetInstance) # need dimension defaults # roleRef and arcroleRef (of each inline document) for sourceRefs in (modelXbrl.targetRoleRefs, modelXbrl.targetArcroleRefs): for roleRefElt in sourceRefs.values(): addChild(targetInstance.modelDocument.xmlRootElement, roleRefElt.qname, attributes=roleRefElt.items()) # contexts for context in modelXbrl.contexts.values(): newCntx = targetInstance.createContext( context.entityIdentifier[0], context.entityIdentifier[1], 'instant' if context.isInstantPeriod else 'duration' if context.isStartEndPeriod else 'forever', context.startDatetime, context.endDatetime, None, context.qnameDims, [], [], id=context.id) for unit in modelXbrl.units.values(): measures = unit.measures newUnit = targetInstance.createUnit(measures[0], measures[1], id=unit.id) modelXbrl.modelManager.showStatus(_("Creating and validating facts")) newFactForOldObjId = {} def createFacts(facts, parent): for fact in facts: if fact.isItem: attrs = {"contextRef": fact.contextID} if fact.id: attrs["id"] = fact.id if fact.isNumeric: attrs["unitRef"] = fact.unitID if fact.get("decimals"): attrs["decimals"] = fact.get("decimals") if fact.get("precision"): attrs["precision"] = fact.get("precision") if fact.isNil: attrs[XbrlConst.qnXsiNil] = "true" text = None else: text = fact.xValue if fact.xValid else fact.textValue newFact = targetInstance.createFact(fact.qname, attributes=attrs, text=text, parent=parent) newFactForOldObjId[fact.objectIndex] = newFact if filingFiles and fact.concept is not None and fact.concept.isTextBlock: # check for img and other filing references for xmltext in [text] + CDATApattern.findall(text): try: for elt in XML( "<body>\n{0}\n</body>\n".format(xmltext)): if elt.tag in ("a", "img") and not isHttpUrl( attrValue) and not os.path.isabs( attrvalue): for attrTag, attrValue in elt.items(): if attrTag in ("href", "src"): filingFiles.add(attrValue) except (XMLSyntaxError, UnicodeDecodeError): pass elif fact.isTuple: newTuple = targetInstance.createFact(fact.qname, parent=parent) newFactForOldObjId[fact.objectIndex] = newTuple createFacts(fact.modelTupleFacts, newTuple) createFacts(modelXbrl.facts, None) # footnote links footnoteIdCount = {} modelXbrl.modelManager.showStatus( _("Creating and validating footnotes & relationships")) HREF = "{http://www.w3.org/1999/xlink}href" footnoteLinks = defaultdict(list) for linkKey, linkPrototypes in modelXbrl.baseSets.items(): arcrole, linkrole, linkqname, arcqname = linkKey if (linkrole and linkqname and arcqname and # fully specified roles arcrole != "XBRL-footnotes" and any( lP.modelDocument.type == Type.INLINEXBRL for lP in linkPrototypes)): for linkPrototype in linkPrototypes: if linkPrototype not in footnoteLinks[linkrole]: footnoteLinks[linkrole].append(linkPrototype) for linkrole in sorted(footnoteLinks.keys()): for linkPrototype in footnoteLinks[linkrole]: newLink = addChild(targetInstance.modelDocument.xmlRootElement, linkPrototype.qname, attributes=linkPrototype.attributes) for linkChild in linkPrototype: attributes = linkChild.attributes if isinstance(linkChild, LocPrototype): if HREF not in linkChild.attributes: linkChild.attributes[HREF] = \ "#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex]) addChild(newLink, linkChild.qname, attributes=attributes) elif isinstance(linkChild, ArcPrototype): addChild(newLink, linkChild.qname, attributes=attributes) elif isinstance(linkChild, ModelInlineFootnote): idUseCount = footnoteIdCount.get(linkChild.footnoteID, 0) + 1 if idUseCount > 1: # if footnote with id in other links bump the id number attributes = linkChild.attributes.copy() attributes["id"] = "{}_{}".format( attributes["id"], idUseCount) footnoteIdCount[linkChild.footnoteID] = idUseCount newChild = addChild(newLink, linkChild.qname, attributes=attributes) copyIxFootnoteHtml( linkChild, newChild, targetModelDocument=targetInstance.modelDocument, withText=True) if filingFiles and linkChild.textValue: footnoteHtml = XML("<body/>") copyIxFootnoteHtml(linkChild, footnoteHtml) for elt in footnoteHtml.iter(): if elt.tag in ("a", "img"): for attrTag, attrValue in elt.items(): if attrTag in ( "href", "src") and not isHttpUrl( attrValue ) and not os.path.isabs(attrvalue): filingFiles.add(attrValue) targetInstance.saveInstance(overrideFilepath=targetUrl, outputZip=outputZip) if getattr(modelXbrl, "isTestcaseVariation", False): modelXbrl.extractedInlineInstance = True # for validation comparison modelXbrl.modelManager.showStatus(_("Saved extracted instance"), 5000)