def func_json_data(xule_context, *args): """Read a json file/url. Arguments: file_url (string or url) Returns a dictionary/list of the json data. """ file_url = args[0] if file_url.type not in ('string', 'uri'): raise XuleProcessingError(_("The file url argument of the json-dta() function must be a string or uri, found '{}'.".format(file_url.value)), xule_context) from arelle import PackageManager mapped_file_url = PackageManager.mappedUrl(file_url.value) # Using the FileSource object in arelle. This will open the file and handle taxonomy package mappings. from arelle import FileSource file_source = FileSource.openFileSource(file_url.value, xule_context.global_context.cntlr) file = file_source.file(file_url.value, binary=True) # file is tuple of one item as a BytesIO stream. Since this is in bytes, it needs to be converted to text via a decoder. # Assuming the file is in utf-8. data_source = [x.decode('utf-8') for x in file[0].readlines()] try: json_source = json.loads(''.join(data_source)) #except JSONDecodeError: except ValueError: raise XuleProcessingError(_("The file '{}' is not a valid JSON file.".format(file_url.value)), xule_context) x = xv.system_collection_to_xule(json_source, xule_context) return xv.system_collection_to_xule(json_source, xule_context)
def openFileStream(cntlr, filepath, mode='r', encoding=None): if PackageManager.isMappedUrl(filepath): filepath = PackageManager.mappedUrl(filepath) elif isHttpUrl(filepath) and cntlr and hasattr( cntlr, "modelManager" ): # may be called early in initialization for PluginManager filepath = cntlr.modelManager.disclosureSystem.mappedUrl(filepath) if archiveFilenameParts(filepath): # file is in an archive return openFileSource(filepath, cntlr).file(filepath, binary='b' in mode, encoding=encoding)[0] if isHttpUrl(filepath) and cntlr: _cacheFilepath = cntlr.webCache.getfilename( filepath, normalize=True ) # normalize is separate step in ModelDocument retrieval, combined here if _cacheFilepath is None: raise IOError(_("Unable to open file: {0}.").format(filepath)) filepath = _cacheFilepath # file path may be server (or memcache) or local file system if filepath.startswith(SERVER_WEB_CACHE) and cntlr: filestream = None cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\", "/") if cntlr.isGAE: # check if in memcache cachedBytes = gaeGet(cacheKey) if cachedBytes: filestream = io.BytesIO(cachedBytes) if filestream is None: filestream = io.BytesIO() cntlr.webCache.retrieve( cntlr.webCache.cacheFilepathToUrl(filepath), filestream=filestream) if cntlr.isGAE: gaeSet(cacheKey, filestream.getvalue()) if mode.endswith('t') or encoding: contents = filestream.getvalue() filestream.close() filestream = FileNamedStringIO( filepath, contents.decode(encoding or 'utf-8')) return filestream # local file system elif encoding is None and 'b' not in mode: openedFileStream = io.open(filepath, mode='rb') hdrBytes = openedFileStream.read(512) encoding = XmlUtil.encoding(hdrBytes, default=None) openedFileStream.close() return io.open(filepath, mode=mode, encoding=encoding) else: # local file system return io.open(filepath, mode=mode, encoding=encoding)
def openFileStream(cntlr, filepath, mode='r', encoding=None): if PackageManager.isMappedUrl(filepath): filepath = PackageManager.mappedUrl(filepath) else: filepath = cntlr.modelManager.disclosureSystem.mappedUrl(filepath) if archiveFilenameParts(filepath): # file is in an archive return openFileSource(filepath, cntlr).file(filepath, binary='b' in mode, encoding=encoding)[0] if isHttpUrl(filepath) and cntlr: _cacheFilepath = cntlr.webCache.getfilename(filepath) if _cacheFilepath is None: raise IOError(_("Unable to open file: {0}.").format(filepath)) filepath = _cacheFilepath # file path may be server (or memcache) or local file system if filepath.startswith(SERVER_WEB_CACHE) and cntlr: filestream = None cacheKey = filepath[len(SERVER_WEB_CACHE) + 1:].replace("\\","/") if cntlr.isGAE: # check if in memcache cachedBytes = gaeGet(cacheKey) if cachedBytes: filestream = io.BytesIO(cachedBytes) if filestream is None: filestream = io.BytesIO() cntlr.webCache.retrieve(cntlr.webCache.cacheFilepathToUrl(filepath), filestream=filestream) if cntlr.isGAE: gaeSet(cacheKey, filestream.getvalue()) if mode.endswith('t') or encoding: contents = filestream.getvalue() filestream.close() filestream = FileNamedStringIO(filepath, contents.decode(encoding or 'utf-8')) return filestream # local file system elif encoding is None and 'b' not in mode: openedFileStream = io.open(filepath, mode='rb') hdrBytes = openedFileStream.read(512) encoding = XmlUtil.encoding(hdrBytes, default=None) openedFileStream.close() return io.open(filepath, mode=mode, encoding=encoding) else: # local file system return io.open(filepath, mode=mode, encoding=encoding)
def func_json_data(xule_context, *args): """Read a json file/url. Arguments: file_url (string or url) Returns a dictionary/list of the json data. """ file_url = args[0] if file_url.type not in ('string', 'uri'): raise XuleProcessingError( _("The file url argument of the json-dta() function must be a string or uri, found '{}'." .format(file_url.value)), xule_context) from arelle import PackageManager mapped_file_url = PackageManager.mappedUrl(file_url.value) # Using the FileSource object in arelle. This will open the file and handle taxonomy package mappings. from arelle import FileSource file_source = FileSource.openFileSource(file_url.value, xule_context.global_context.cntlr) file = file_source.file(file_url.value, binary=True) # file is tuple of one item as a BytesIO stream. Since this is in bytes, it needs to be converted to text via a decoder. # Assuming the file is in utf-8. data_source = [x.decode('utf-8') for x in file[0].readlines()] try: json_source = json.loads(''.join(data_source)) #except JSONDecodeError: except ValueError: raise XuleProcessingError( _("The file '{}' is not a valid JSON file.".format( file_url.value)), xule_context) x = xv.system_collection_to_xule(json_source, xule_context) return xv.system_collection_to_xule(json_source, xule_context)
def func_csv_data(xule_context, *args): """Read a csv file/url. Arguments: file_url (string or url) has_header (boolean) - determines if the first line of the csv file has headers type list (list) - list of xule types in the order of the columns of the csv file. This is optional. If not provided, then all the data will be treated as stirngs. as_dictionary (boolean) - return the row as a dictionary instead of a list. This is optional. """ if len(args) < 2: raise XuleProcessingError( _("The csv-data() function requires at least 2 arguments (file url, has headers), found {} arguments." .format(len(args))), xule_context) if len(args) > 4: raise XuleProcessingError( _("The csv-data() function takes no more than 3 arguments (file url, has headers, column types, as dictionary), found {} arguments." .format(len(args))), xule_context) file_url = args[0] has_headers = args[1] if file_url.type not in ('string', 'uri'): raise XuleProcessingError( _("The file url argument (1st argument) of the csv-dta() function must be a string or uri, found '{}'." .format(file_url.value)), xule_context) if has_headers.type != 'bool': raise XuleProcessingError( _("The has headers argument (2nd argument) of the csv-data() function muset be a boolean, found '{}'." .format(has_headers.type)), xule_context) if len(args) >= 3: column_types = args[2] if column_types.type == 'none': ordered_cols = None elif column_types.type == 'list': ordered_cols = list() for col in column_types.value: if col.type != 'string': raise XuleProcessingError( _("The type list argument (3rd argument) of the csv-data() function must be a list of strings, found '{}'." .format(col.type)), xule_context) ordered_cols.append(col.value) else: raise XuleProcessingError( _("The type list argument (3rd argument) of the csv-data() fucntion must be list, found '{}'." .format(column_types.type)), xule_context) else: ordered_cols = None if len(args) == 4: if args[3].type != 'bool': raise XuleProcessingError( _("The as dictionary argument (4th argument) of the csv-data() function must be a boolean, found '{}'." .format(args[3].type)), xule_context) if args[3].value: return_row_type = 'dictionary' else: return_row_type = 'list' else: return_row_type = 'list' if return_row_type == 'dictionary' and not has_headers.value: raise XuleProcessingError( _("When the csv-data() function is returning the rows as dictionaries (4th argument), the has headers argument (2nd argument) must be true." ), xule_context) result = list() result_shadow = list() from arelle import PackageManager mapped_file_url = PackageManager.mappedUrl(file_url.value) # Using the FileSource object in arelle. This will open the file and handle taxonomy package mappings. from arelle import FileSource file_source = FileSource.openFileSource(file_url.value, xule_context.global_context.cntlr) file = file_source.file(file_url.value, binary=True) # file is tuple of one item as a BytesIO stream. Since this is in bytes, it needs to be converted to text via a decoder. # Assuming the file is in utf-8. data_source = [x.decode('utf-8') for x in file[0].readlines()] import csv reader = csv.reader(data_source) first_line = True row_num = 0 for line in reader: row_num += 1 if first_line and has_headers.value: first_line = False #skip the headers line if return_row_type == 'dictionary': # Need to get the names from the first row column_names = [x for x in line] if len(column_names) != len(set(column_names)): raise XuleProcessingError( _("There are duplicate column names in the csv file. This is not allowed when return rows as dictionaries. File: {}" .format(file_url.value)), xule_context) continue if return_row_type == 'list': result_line = list() result_line_shadow = list() else: #dictionary result_line = dict() result_line_shadow = dict() for col_num, item in enumerate(line): if ordered_cols is not None and col_num >= len(ordered_cols): raise XuleProcessingError( _("The nubmer of columns on row {} is greater than the number of column types provided in the third argument of the csv-data() function. File: {}" .format(row_num, file_url.value)), xule_context) item_value = convert_file_data_item( item, ordered_cols[col_num] if ordered_cols is not None else None, xule_context) if return_row_type == 'list': result_line.append(item_value) result_line_shadow.append(item_value.value) else: #dictonary if col_num >= len(column_names): raise xule_context( _("The number of columns on row {} is greater than the number of headers in the csv file. File: {}" .format( row_num, mappedUrl if mapped_file_url == file_url.value else file_url.value + ' --> ' + mapped_file_url)), xule_context) result_line[xv.XuleValue(xule_context, column_names[col_num], 'string')] = item_value result_line_shadow[column_names[col_num]] = item_value.value if return_row_type == 'list': result.append( xv.XuleValue(xule_context, tuple(result_line), 'list', shadow_collection=tuple(result_line_shadow))) result_shadow.append(tuple(result_line_shadow)) else: #dictionary result.append( xv.XuleValue(xule_context, frozenset(result_line.items()), 'dictionary', shadow_collection=frozenset( result_line_shadow.items()))) result_shadow.append(frozenset(result_line_shadow.items())) return xv.XuleValue(xule_context, tuple(result), 'list', shadow_collection=tuple(result_shadow))
def func_csv_data(xule_context, *args): """Read a csv file/url. Arguments: file_url (string or url) has_header (boolean) - determines if the first line of the csv file has headers type list (list) - list of xule types in the order of the columns of the csv file. This is optional. If not provided, then all the data will be treated as stirngs. as_dictionary (boolean) - return the row as a dictionary instead of a list. This is optional. """ file_url = args[0] has_headers = args[1] if len(args) < 2: raise XuleProcessingError(_("The csv-data() function requires at least 2 arguments (file url, has headers), found {} arguments.".format(len(args))), xule_context) if len(args) > 4: raise XuleProcessingError(_("The csv-data() function takes no more than 3 arguments (file url, has headers, column types, as dictionary), found {} arguments.".format(len(args))), xule_context) if file_url.type not in ('string', 'uri'): raise XuleProcessingError(_("The file url argument (1st argument) of the csv-dta() function must be a string or uri, found '{}'.".format(file_url.value)), xule_contet) if has_headers.type != 'bool': raise XuleProcessingError(_("The has headers argument (2nd argument) of the csv-data() function muset be a boolean, found '{}'.".format(has_headers.type)), xule_context) if len(args) >= 3: column_types = args[2] if column_types.type == 'none': ordered_cols = None elif column_types.type == 'list': ordered_cols = list() for col in column_types.value: if col.type != 'string': raise XuleProcessingError(_("The type list argument (3rd argument) of the csv-data() function must be a list of strings, found '{}'.".format(col.type)), xule_context) ordered_cols.append(col.value) else: raise XuleProcessingError(_("The type list argument (3rd argument) of the csv-data() fucntion must be list, found '{}'.".format(column_types.type)), xule_context) else: ordered_cols = None if len(args) == 4: if args[3].type != 'bool': raise XuleProcessingError(_("The as dictionary argument (4th argument) of the csv-data() function must be a boolean, found '{}'.".format(args[3].type)), xule_context) if args[3].value: return_row_type = 'dictionary' else: return_row_type = 'list' else: return_row_type = 'list' if return_row_type == 'dictionary' and not has_headers.value: raise XuleProcessingError(_("When the csv-data() function is returning the rows as dictionaries (4th argument), the has headers argument (2nd argument) must be true."), xule_context) result = list() result_shadow = list() from arelle import PackageManager mapped_file_url = PackageManager.mappedUrl(file_url.value) # Using the FileSource object in arelle. This will open the file and handle taxonomy package mappings. from arelle import FileSource file_source = FileSource.openFileSource(file_url.value, xule_context.global_context.cntlr) file = file_source.file(file_url.value, binary=True) # file is tuple of one item as a BytesIO stream. Since this is in bytes, it needs to be converted to text via a decoder. # Assuming the file is in utf-8. data_source = [x.decode('utf-8') for x in file[0].readlines()] # if mapped_file_url.startswith('http://') or mapped_file_url.startswith('https://'): # # if mapped_file_url.startswith('https://') and getattr(xule_context.global_context.options, 'noCertificateCheck', False): # try: # import ssl # context = ssl.create_default_context() # context.check_hostname = False # context.verify_mode = ssl.CERT_NONE # except ImportError: # context=None # else: # context = None # try: # data_source = urllib.request.urlopen(mapped_file_url, context=context).read().decode('utf-8').splitlines() # except urllib.error.HTTPError as he: # raise XuleProcessingError(_("Trying to open url '{}', got HTTP {} - {}, error".format(mapped_file_url, he.code, he.reason)), xule_context) # else: # try: # with open(mapped_file_url, 'r', newline='') as data_file: # data_source = data_file.readlines() # except FileNotFoundError: # raise XuleProcessingError(_("Trying to open file '{}', but file is not found.".format(mapped_file_url)), xule_context) import csv reader = csv.reader(data_source) first_line = True row_num = 0 for line in reader: row_num += 1 if first_line and has_headers.value: first_line = False #skip the headers line if return_row_type == 'dictionary': # Need to get the names from the first row column_names = [x for x in line] if len(column_names) != len(set(column_names)): raise XuleProcessingError(_("There are duplicate column names in the csv file. This is not allowed when return rows as dictionaries. File: {}".format(file_url.value)), xule_context) continue if return_row_type == 'list': result_line = list() result_line_shadow = list() else: #dictionary result_line = dict() result_line_shadow = dict() for col_num, item in enumerate(line): if ordered_cols is not None and col_num >= len(ordered_cols): raise XuleProcessingError(_("The nubmer of columns on row {} is greater than the number of column types provided in the third argument of the csv-data() function. File: {}".format(row_num, file_url.value)), xule_context) item_value = convert_file_data_item(item, ordered_cols[col_num] if ordered_cols is not None else None, xule_context) if return_row_type == 'list': result_line.append(item_value) result_line_shadow.append(item_value.value) else: #dictonary if col_num >= len(column_names): raise xule_context(_("The number of columns on row {} is greater than the number of headers in the csv file. File: {}".format(row_num, mappedUrl if mapped_file_url == file_url.value else file_url.value + ' --> ' + mapped_file_url)), xule_context) result_line[xv.XuleValue(xule_context, column_names[col_num], 'string')] = item_value result_line_shadow[column_names[col_num]] = item_value.value if return_row_type == 'list': result.append(xv.XuleValue(xule_context, tuple(result_line), 'list', shadow_collection=tuple(result_line_shadow))) result_shadow.append(result_line_shadow) else: #dictionary result.append(xv.XuleValue(xule_context, frozenset(result_line.items()), 'dictionary', shadow_collection=frozenset(result_line_shadow.items()))) result_shadow.append(frozenset(result_line_shadow.items())) return xv.XuleValue(xule_context, tuple(result), 'list', shadow_collection=tuple(result_shadow))