def load_test(): """ Returns a small test MRIO The test system contains: - six regions, - seven sectors, - seven final demand categories - two extensions (emissions and factor_inputs) The test system only contains Z, Y, F, F_Y. The rest can be calculated with calc_all() Notes ----- For development: This function can be used as an example of how to parse an IOSystem Returns ------- IOSystem """ # row_header: # number of rows containing header on the top of the file (for the # columns) # col_header: # number of cols containing header on the beginning of the file (for the # rows) # row and columns header contain also the row for the units, this are # afterwards safed as a extra dataframe # # unit_col: column containing the unit for the table file_data = collections.namedtuple( 'file_data', ['file_name', 'row_header', 'col_header', 'unit_col']) # file names and header specs of the system test_system = dict( Z=file_data(file_name='trade_flows_Z.txt', row_header=2, col_header=3, unit_col=2), Y=file_data(file_name='finald_demand_Y.txt', row_header=2, col_header=3, unit_col=2), fac=file_data(file_name='factor_input.txt', row_header=2, col_header=2, unit_col=1), emissions=file_data(file_name='emissions.txt', row_header=2, col_header=3, unit_col=2), FDemissions=file_data(file_name='FDemissions.txt', row_header=2, col_header=3, unit_col=2), ) meta_rec = MRIOMetaData(location=PYMRIO_PATH['test_mrio']) # read the data into a dicts as pandas.DataFrame data = {key: pd.read_csv( os.path.join(PYMRIO_PATH['test_mrio'], test_system[key].file_name), index_col=list(range(test_system[key].col_header)), header=list(range(test_system[key].row_header)), sep='\t') for key in test_system} meta_rec._add_fileio('Load test_mrio from {}'.format( PYMRIO_PATH['test_mrio'])) # distribute the data into dics which can be passed to # the IOSystem. To do so, some preps are necessary: # - name the header data # - save unit in own dataframe and drop unit from the tables trade = dict(Z=data['Z'], Y=data['Y']) factor_inputs = dict(F=data['fac']) emissions = dict(F=data['emissions'], F_Y=data['FDemissions']) trade['Z'].index.names = ['region', 'sector', 'unit'] trade['Z'].columns.names = ['region', 'sector'] trade['unit'] = (pd.DataFrame(trade['Z'].iloc[:, 0] .reset_index(level='unit').unit)) trade['Z'].reset_index(level='unit', drop=True, inplace=True) trade['Y'].index.names = ['region', 'sector', 'unit'] trade['Y'].columns.names = ['region', 'category'] trade['Y'].reset_index(level='unit', drop=True, inplace=True) factor_inputs['name'] = 'Factor Inputs' factor_inputs['F'].index.names = ['inputtype', 'unit', ] factor_inputs['F'].columns.names = ['region', 'sector'] factor_inputs['unit'] = (pd.DataFrame(factor_inputs['F'].iloc[:, 0] .reset_index(level='unit').unit)) factor_inputs['F'].reset_index(level='unit', drop=True, inplace=True) emissions['name'] = 'Emissions' emissions['F'].index.names = ['stressor', 'compartment', 'unit', ] emissions['F'].columns.names = ['region', 'sector'] emissions['unit'] = (pd.DataFrame(emissions['F'].iloc[:, 0] .reset_index(level='unit').unit)) emissions['F'].reset_index(level='unit', drop=True, inplace=True) emissions['F_Y'].index.names = ['stressor', 'compartment', 'unit'] emissions['F_Y'].columns.names = ['region', 'category'] emissions['F_Y'].reset_index(level='unit', drop=True, inplace=True) # the population data - this is optional (None can be passed if no data is # available) popdata = pd.read_csv( os.path.join(PYMRIO_PATH['test_mrio'], './population.txt'), index_col=0, sep='\t').astype(float) return IOSystem(Z=data['Z'], Y=data['Y'], unit=trade['unit'], meta=meta_rec, factor_inputs=factor_inputs, emissions=emissions, population=popdata)
def _load_ini_based_io(path, recursive=False, ini=None, subini={}, include_core=True, only_coefficients=False): # pragma: no cover """ DEPRECATED: For convert a previous version to the new json format Loads a IOSystem or Extension from a ini files This function can be used to load a IOSystem or Extension specified in a ini file. DataFrames (tables) are loaded from text or binary pickle files. For the latter, the extension .pkl or .pickle is assumed, in all other case the tables are assumed to be in .txt format. Parameters ---------- path : string path or ini file name for the data to load recursive : boolean, optional If True, load also the data in the subfolders and add them as extensions to the IOSystem (in that case path must point to the root). Only first order subfolders are considered (no subfolders in subfolders) and if a folder does not contain a ini file it's skipped. Use the subini parameter in case of multiple ini files in a subfolder. Attribute name of the extension in the IOSystem are based on the subfolder name. Default is False ini : string, optional If there are several ini files in the root folder, take this one for loading the data If None (default) take the ini found in the folder, error if several are found subini : dict, optional If there are multiple ini in the subfolder, use the ini given in the dict. Format: 'subfoldername':'ininame' If a key for a subfolder is not found or None (default), the ini found in the folder will be taken, error if several are found include_core : boolean, optional If False the load method does not include A, L and Z matrix. This significantly reduces the required memory if the purpose is only to analyse the results calculated beforehand. Returns ------- IOSystem or Extension class depending on systemtype in the ini file None in case of errors """ # check path and given parameter ini_file_name = None path = os.path.abspath(os.path.normpath(path)) if os.path.splitext(path)[1] == '.ini': (path, ini_file_name) = os.path.split(path) if ini: ini_file_name = ini if not os.path.exists(path): raise ReadError('Given path does not exist') return None if not ini_file_name: _inifound = False for file in os.listdir(path): if os.path.splitext(file)[1] == '.ini': if _inifound: raise ReadError( 'Found multiple ini files in folder - specify one') return None ini_file_name = file _inifound = True # read the ini io_ini = configparser.RawConfigParser() io_ini.optionxform = lambda option: option io_ini.read(os.path.join(path, ini_file_name)) systemtype = io_ini.get('systemtype', 'systemtype', fallback=None) name = io_ini.get('meta', 'name', fallback=os.path.splitext(ini_file_name)[0]) if systemtype == 'IOSystem': ret_system = IOSystem(name=name) elif systemtype == 'Extension': ret_system = Extension(name=name) else: raise ReadError('System not defined in ini') return None for key in io_ini['meta']: setattr(ret_system, key, io_ini.get('meta', key, fallback=None)) for key in io_ini['files']: if '_nr_index_col' in key: continue if '_nr_header' in key: continue if not include_core: not_to_load = ['A', 'L', 'Z'] if key in not_to_load: continue if only_coefficients: _io = IOSystem() if key not in _io.__coefficients__ + ['unit']: continue file_name = io_ini.get('files', key) nr_index_col = io_ini.get( 'files', key + '_nr_index_col', fallback=None) nr_header = io_ini.get('files', key + '_nr_header', fallback=None) if (nr_index_col is None) or (nr_header is None): raise ReadError( 'Index or column specification missing for {}'. format(str(file_name))) return None _index_col = list(range(int(nr_index_col))) _header = list(range(int(nr_header))) if _index_col == [0]: _index_col = 0 if _header == [0]: _header = 0 file = os.path.join(path, file_name) logging.info('Load data from {}'.format(file)) if (os.path.splitext(file)[1] == '.pkl' or os.path.splitext(file)[1] == '.pickle'): setattr(ret_system, key, pd.read_pickle(file)) else: setattr(ret_system, key, pd.read_csv(file, index_col=_index_col, header=_header, sep='\t')) if recursive: # look for subfolder in the given path subfolder_list = os.walk(path).__next__()[1] # loop all subfolder and append extension based on # ini file in subfolder for subfolder in subfolder_list: subini_file_name = subini.get(subfolder) subpath = os.path.abspath(os.path.join(path, subfolder)) if not subini_file_name: _inifound = False for file in os.listdir(subpath): if os.path.splitext(file)[1] == '.ini': if _inifound: raise ReadError( 'Found multiple ini files in subfolder ' '{} - specify one'.format(subpath)) return None subini_file_name = file _inifound = True if not _inifound: continue # read the ini subio_ini = configparser.RawConfigParser() subio_ini.optionxform = lambda option: option subio_ini.read(os.path.join(subpath, subini_file_name)) systemtype = subio_ini.get('systemtype', 'systemtype', fallback=None) name = subio_ini.get('meta', 'name', fallback=os.path.splitext( subini_file_name)[0]) if systemtype == 'IOSystem': raise ReadError('IOSystem found in subfolder {} - ' 'only extensions expected'.format(subpath)) return None elif systemtype == 'Extension': sub_system = Extension(name=name) else: raise ReadError('System not defined in ini') return None for key in subio_ini['meta']: setattr(sub_system, key, subio_ini.get('meta', key, fallback=None)) for key in subio_ini['files']: if '_nr_index_col' in key: continue if '_nr_header' in key: continue if only_coefficients: _ext = Extension('temp') if key not in _ext.__coefficients__ + ['unit']: continue file_name = subio_ini.get('files', key) nr_index_col = subio_ini.get('files', key + '_nr_index_col', fallback=None) nr_header = subio_ini.get('files', key + '_nr_header', fallback=None) if (nr_index_col is None) or (nr_header is None): raise ReadError('Index or column specification missing ' 'for {}'.format(str(file_name))) return None _index_col = list(range(int(nr_index_col))) _header = list(range(int(nr_header))) if _index_col == [0]: _index_col = 0 if _header == [0]: _header = 0 file = os.path.join(subpath, file_name) logging.info('Load data from {}'.format(file)) if (os.path.splitext(file)[1] == '.pkl' or os.path.splitext(file)[1] == '.pickle'): setattr(sub_system, key, pd.read_pickle(file)) else: setattr(sub_system, key, pd.read_csv(file, index_col=_index_col, header=_header, sep='\t')) # get valid python name from folder def clean(varStr): return re.sub(r'\W|^(?=\d)', '_', str(varStr)) setattr(ret_system, clean(subfolder), sub_system) return ret_system
def load(path, include_core=True, path_in_arc=''): """ Loads a IOSystem or Extension previously saved with pymrio This function can be used to load a IOSystem or Extension specified in a metadata file (as defined in DEFAULT_FILE_NAMES['filepara']: metadata.json) DataFrames (tables) are loaded from text or binary pickle files. For the latter, the extension .pkl or .pickle is assumed, in all other case the tables are assumed to be in .txt format. Parameters ---------- path : pathlib.Path or string Path or path with para file name for the data to load. This must either point to the directory containing the uncompressed data or the location of a compressed zip file with the data. In the later case the parameter 'path_in_arc' need to be specific to further indicate the location of the data in the compressed file. include_core : boolean, optional If False the load method does not include A, L and Z matrix. This significantly reduces the required memory if the purpose is only to analyse the results calculated beforehand. path_in_arc: string, optional Path to the data in the zip file (where the fileparameters file is located). path_in_arc must be given without leading dot and slash; thus to point to the data in the root of the compressed file pass '', for data in e.g. the folder 'emissions' pass 'emissions/'. Only used if parameter 'path' points to an compressed zip file. Returns ------- IOSystem or Extension class depending on systemtype in the json file None in case of errors """ path = Path(path) if not path.exists(): raise ReadError('Given path does not exist') file_para = get_file_para(path=path, path_in_arc=path_in_arc) if file_para.content['systemtype'] == GENERIC_NAMES['iosys']: if zipfile.is_zipfile(str(path)): ret_system = IOSystem(meta=MRIOMetaData( location=path, path_in_arc=os.path.join(file_para.folder, DEFAULT_FILE_NAMES['metadata']))) ret_system.meta._add_fileio( "Loaded IO system from {} - {}".format(path, path_in_arc)) else: ret_system = IOSystem(meta=MRIOMetaData( location=path / DEFAULT_FILE_NAMES['metadata'])) ret_system.meta._add_fileio( "Loaded IO system from {}".format(path)) elif file_para.content['systemtype'] == GENERIC_NAMES['ext']: ret_system = Extension(file_para.content['name']) else: raise ReadError('Type of system no defined in the file parameters') return None for key in file_para.content['files']: if not include_core and key not in ['A', 'L', 'Z']: continue file_name = file_para.content['files'][key]['name'] nr_index_col = file_para.content['files'][key]['nr_index_col'] nr_header = file_para.content['files'][key]['nr_header'] _index_col = list(range(int(nr_index_col))) _header = list(range(int(nr_header))) _index_col = 0 if _index_col == [0] else _index_col _header = 0 if _header == [0] else _header if key == 'FY': # Legacy code to read data saved with version < 0.4 key = 'F_Y' if zipfile.is_zipfile(str(path)): full_file_name = os.path.join(file_para.folder, file_name) logging.info('Load data from {}'.format(full_file_name)) with zipfile.ZipFile(file=str(path)) as zf: if (os.path.splitext(str(full_file_name))[1] == '.pkl' or os.path.splitext(str(full_file_name))[1] == '.pickle'): setattr(ret_system, key, pd.read_pickle(zf.open(full_file_name))) else: setattr(ret_system, key, pd.read_csv(zf.open(full_file_name), index_col=_index_col, header=_header, sep='\t')) else: full_file_name = path / file_name logging.info('Load data from {}'.format(full_file_name)) if (os.path.splitext(str(full_file_name))[1] == '.pkl' or os.path.splitext(str(full_file_name))[1] == '.pickle'): setattr(ret_system, key, pd.read_pickle(full_file_name)) else: setattr(ret_system, key, pd.read_csv(full_file_name, index_col=_index_col, header=_header, sep='\t')) return ret_system
def parse_exiobase22(path, charact=None, iosystem=None, version='exiobase 2.2', popvector='exio2'): """ Parse the exiobase 2.2 source files for the IOSystem The function parse product by product and industry by industry source file with flow matrices (Z) Parameters ---------- path : string Path to the EXIOBASE source files charact : string, optional Filename with path to the characterisation matrices for the extensions (xls). This is provided together with the EXIOBASE system and given as a xls file. The four sheets Q_factorinputs, Q_emission, Q_materials and Q_resources are read and used to generate one new extensions with the impacts iosystem : string, optional Note for the IOSystem, recommended to be 'pxp' or 'ixi' for product by product or industry by industry. However, this can be any string and can have more information if needed (eg for different technology assumptions) The string will be passed to the IOSystem. version : string, optional This can be used as a version tracking system. Default: exiobase 2.2 popvector : string or pd.DataFrame, optional The population vector for the countries. This can be given as pd.DataFrame(index = population, columns = countrynames) or, (default) will be taken from the pymrio module. If popvector = None no population data will be passed to the IOSystem. Returns ------- IOSystem A IOSystem with the parsed exiobase 2 data Raises ------ EXIOError If the exiobase source files are not complete in the given path """ path = path.rstrip('\\') path = os.path.abspath(path) # standard file names in exiobase files_exio = dict( # exiobase 2.2 Z='mrIot_version2.2.0.txt', Y='mrFinalDemand_version2.2.0.txt', F_fac='mrFactorInputs_version2.2.0.txt', F_emissions='mrEmissions_version2.2.0.txt', F_materials='mrMaterials_version2.2.0.txt', F_resources='mrResources_version2.2.0.txt', FY_emissions='mrFDEmissions_version2.2.0.txt', FY_materials='mrFDMaterials_version2.2.0.txt', # old exiobase 2.1 filenames #Z = 'mrIot.txt', #Y = 'mrFinalDemand.txt', #F_fac = 'mrFactorInputs.txt', #F_emissions = 'mrEmissions.txt', #F_materials = 'mrMaterials.txt', #F_resources = 'mrResources.txt', #FY_emissions = 'mrFDEmissions.txt', #FY_materials = 'mrFDMaterials.txt', ) # check if source exiobase is complete _intersect = [ val for val in files_exio.values() if val in os.listdir(path) ] if len(_intersect) != len(files_exio.values()): raise pymrio.core.EXIOError('EXIOBASE files missing') # number of row and column headers in EXIOBASE head_col = dict() head_row = dict() head_col[ 'Z'] = 3 # number of cols containing row headers at the beginning head_row['Z'] = 2 # number of rows containing col headers at the top head_col['Y'] = 3 head_row['Y'] = 2 head_col['F_fac'] = 2 head_row['F_fac'] = 2 head_col['F_emissions'] = 3 head_row['F_emissions'] = 2 head_col['F_materials'] = 2 head_row['F_materials'] = 2 head_col['F_resources'] = 3 head_row['F_resources'] = 2 head_col['FY_emissions'] = 3 head_row['FY_emissions'] = 2 head_col['FY_materials'] = 2 head_row['FY_materials'] = 2 # read the data into pandas logging.info('Read exiobase2 from {}'.format(path)) data = { key: pd.read_table(os.path.join(path, files_exio[key]), index_col=list(range(head_col[key])), header=list(range(head_row[key]))) for key in files_exio } # refine multiindex and save units data['Z'].index.names = ['region', 'sector', 'unit'] data['Z'].columns.names = ['region', 'sector'] data['unit'] = pd.DataFrame( data['Z'].iloc[:, 0].reset_index(level='unit').unit) data['Z'].reset_index(level='unit', drop=True, inplace=True) data['Y'].index.names = ['region', 'sector', 'unit'] data['Y'].columns.names = ['region', 'category'] data['Y'].reset_index(level='unit', drop=True, inplace=True) ext_unit = dict() for key in [ 'F_fac', 'F_emissions', 'F_materials', 'F_resources', 'FY_emissions', 'FY_materials' ]: if head_col[key] == 3: data[key].index.names = ['stressor', 'compartment', 'unit'] if head_col[key] == 2: data[key].index.names = ['stressor', 'unit'] if 'FY' in key: data[key].columns.names = ['region', 'category'] data[key].reset_index(level='unit', drop=True, inplace=True) else: data[key].columns.names = ['region', 'sector'] ext_unit[key] = pd.DataFrame( data[key].iloc[:, 0].reset_index(level='unit').unit) data[key].reset_index(level='unit', drop=True, inplace=True) if key is 'F_resources': data[key].reset_index(level='compartment', drop=True, inplace=True) ext_unit[key].reset_index(level='compartment', drop=True, inplace=True) # build the extensions ext = dict() ext['factor_inputs'] = { 'F': data['F_fac'], 'unit': ext_unit['F_fac'], 'name': 'factor input' } ext['emissions'] = { 'F': data['F_emissions'], 'FY': data['FY_emissions'], 'unit': ext_unit['F_emissions'], 'name': 'emissons' } ext['materials'] = { 'F': data['F_materials'], 'FY': data['FY_materials'], 'unit': ext_unit['F_materials'], 'name': 'material extraction' } ext['resources'] = { 'F': data['F_resources'], 'unit': ext_unit['F_resources'], 'name': 'resources' } # read the characterisation matrices if available # and build one extension with the impacts if charact: # dict with correspondence to the extensions Qsheets = { 'Q_factorinputs': 'factor_inputs', 'Q_emission': 'emissions', 'Q_materials': 'materials', 'Q_resources': 'resources' } Q_head_col = dict() Q_head_row = dict() Q_head_col_rowname = dict() Q_head_col_rowunit = dict() Q_head_col_metadata = dict() # number of cols containing row headers at the beginning Q_head_col['Q_emission'] = 4 # number of rows containing col headers at the top - this will be # skipped Q_head_row['Q_emission'] = 3 # assuming the same classification as in the extensions Q_head_col['Q_factorinputs'] = 2 Q_head_row['Q_factorinputs'] = 2 Q_head_col['Q_resources'] = 2 Q_head_row['Q_resources'] = 3 Q_head_col['Q_materials'] = 2 Q_head_row['Q_materials'] = 2 # column to use as name for the rows Q_head_col_rowname['Q_emission'] = 1 Q_head_col_rowname['Q_factorinputs'] = 0 Q_head_col_rowname['Q_resources'] = 0 Q_head_col_rowname['Q_materials'] = 0 # column to use as unit for the rows which gives also the last column # before the data Q_head_col_rowunit['Q_emission'] = 3 Q_head_col_rowunit['Q_factorinputs'] = 1 Q_head_col_rowunit['Q_resources'] = 1 Q_head_col_rowunit['Q_materials'] = 1 charac_data = { Qname: pd.read_excel(charact, sheetname=Qname, skiprows=list(range(0, Q_head_row[Qname])), header=None) for Qname in Qsheets } _units = dict() # temp for the calculated impacts which than # get summarized in the 'impact' _impact = dict() impact = dict() for Qname in Qsheets: # unfortunately the names in Q_emissions are # not completely unique - fix that _index = charac_data[Qname][Q_head_col_rowname[Qname]] if Qname is 'Q_emission': _index[42] = _index[42] + ' 2008' _index[43] = _index[43] + ' 2008' _index[44] = _index[44] + ' 2010' _index[45] = _index[45] + ' 2010' charac_data[Qname].index = ( charac_data[Qname][Q_head_col_rowname[Qname]]) _units[Qname] = pd.DataFrame( charac_data[Qname].iloc[:, Q_head_col_rowunit[Qname]]) _units[Qname].columns = ['unit'] _units[Qname].index.name = 'impact' charac_data[Qname] = charac_data[ Qname].ix[:, Q_head_col_rowunit[Qname] + 1:] charac_data[Qname].index.name = 'impact' if 'FY' in ext[Qsheets[Qname]]: _FY = ext[Qsheets[Qname]]['FY'].values else: _FY = np.zeros( [ext[Qsheets[Qname]]['F'].shape[0], data['Y'].shape[1]]) _impact[Qname] = { 'F': charac_data[Qname].dot(ext[Qsheets[Qname]]['F'].values), 'FY': charac_data[Qname].dot(_FY), 'unit': _units[Qname] } impact['F'] = (_impact['Q_factorinputs']['F'].append( _impact['Q_emission']['F']).append( _impact['Q_materials']['F']).append( _impact['Q_resources']['F'])) impact['FY'] = (_impact['Q_factorinputs']['FY'].append( _impact['Q_emission']['FY']).append( _impact['Q_materials']['FY']).append( _impact['Q_resources']['FY'])) impact['F'].columns = ext['emissions']['F'].columns impact['FY'].columns = ext['emissions']['FY'].columns impact['unit'] = (_impact['Q_factorinputs']['unit'].append( _impact['Q_emission']['unit']).append( _impact['Q_materials']['unit']).append( _impact['Q_resources']['unit'])) impact['name'] = 'impact' ext['impacts'] = impact if popvector is 'exio2': popdata = pd.read_table(os.path.join(PYMRIO_PATH['exio20'], './misc/population.txt'), index_col=0).astype(float) else: popdata = popvector return IOSystem(Z=data['Z'], Y=data['Y'], unit=data['unit'], population=popdata, **ext)
def load(path, include_core=True): """ Loads a IOSystem or Extension previously saved with pymrio This function can be used to load a IOSystem or Extension specified in a ini file. DataFrames (tables) are loaded from text or binary pickle files. For the latter, the extension .pkl or .pickle is assumed, in all other case the tables are assumed to be in .txt format. Parameters ---------- path : string path or ini file name for the data to load include_core : boolean, optional If False the load method does not include A, L and Z matrix. This significantly reduces the required memory if the purpose is only to analyse the results calculated beforehand. Returns ------- IOSystem or Extension class depending on systemtype in the json file None in case of errors """ path = path.rstrip('\\') path = os.path.abspath(path) if not os.path.exists(path): raise ReadError('Given path does not exist') return None para_file_path = os.path.join(path, DEFAULT_FILE_NAMES['filepara']) if not os.path.isfile(para_file_path): raise ReadError('No file parameter file found') return None with open(para_file_path, 'r') as pf: file_para = json.load(pf) if file_para['systemtype'] == GENERIC_NAMES['iosys']: meta_file_path = os.path.join(path, DEFAULT_FILE_NAMES['metadata']) ret_system = IOSystem(meta=MRIOMetaData(location=meta_file_path)) ret_system.meta._add_fileio("Loaded IO system from {}".format(path)) elif file_para['systemtype'] == GENERIC_NAMES['ext']: ret_system = Extension(file_para['name']) else: raise ReadError('Type of system no defined in the file parameters') return None for key in file_para['files']: if not include_core: if key in ['A', 'L', 'Z']: continue file_name = file_para['files'][key]['name'] full_file_name = os.path.join(path, file_name) nr_index_col = file_para['files'][key]['nr_index_col'] nr_header = file_para['files'][key]['nr_header'] logging.info('Load data from {}'.format(full_file_name)) _index_col = list(range(int(nr_index_col))) _header = list(range(int(nr_header))) if _index_col == [0]: _index_col = 0 if _header == [0]: _header = 0 if (os.path.splitext(full_file_name)[1] == '.pkl' or os.path.splitext(full_file_name)[1] == '.pickle'): setattr(ret_system, key, pd.read_pickle(full_file_name)) else: setattr( ret_system, key, pd.read_table(full_file_name, index_col=_index_col, header=_header)) return ret_system
def load_test(): """Returns a small test MRIO The test system contains: - six regions, - seven sectors, - seven final demand categories - two extensions (emissions and factor_inputs) The test system only contains Z, Y, F, F_Y. The rest can be calculated with calc_all() Notes ----- For development: This function can be used as an example of how to parse an IOSystem Returns ------- IOSystem """ # row_header: # number of rows containing header on the top of the file (for the # columns) # col_header: # number of cols containing header on the beginning of the file (for the # rows) # row and columns header contain also the row for the units, this are # afterwards safed as a extra dataframe # # unit_col: column containing the unit for the table file_data = collections.namedtuple( "file_data", ["file_name", "row_header", "col_header", "unit_col"]) # file names and header specs of the system test_system = dict( Z=file_data(file_name="trade_flows_Z.txt", row_header=2, col_header=3, unit_col=2), Y=file_data(file_name="finald_demand_Y.txt", row_header=2, col_header=3, unit_col=2), fac=file_data(file_name="factor_input.txt", row_header=2, col_header=2, unit_col=1), emissions=file_data(file_name="emissions.txt", row_header=2, col_header=3, unit_col=2), FDemissions=file_data(file_name="FDemissions.txt", row_header=2, col_header=3, unit_col=2), ) meta_rec = MRIOMetaData(location=PYMRIO_PATH["test_mrio"]) # read the data into a dicts as pandas.DataFrame data = { key: pd.read_csv( os.path.join(PYMRIO_PATH["test_mrio"], test_system[key].file_name), index_col=list(range(test_system[key].col_header)), header=list(range(test_system[key].row_header)), sep="\t", ) for key in test_system } meta_rec._add_fileio("Load test_mrio from {}".format( PYMRIO_PATH["test_mrio"])) # distribute the data into dics which can be passed to # the IOSystem. To do so, some preps are necessary: # - name the header data # - save unit in own dataframe and drop unit from the tables trade = dict(Z=data["Z"], Y=data["Y"]) factor_inputs = dict(F=data["fac"]) emissions = dict(F=data["emissions"], F_Y=data["FDemissions"]) trade["Z"].index.names = ["region", "sector", "unit"] trade["Z"].columns.names = ["region", "sector"] trade["unit"] = pd.DataFrame( trade["Z"].iloc[:, 0].reset_index(level="unit").unit) trade["Z"].reset_index(level="unit", drop=True, inplace=True) trade["Y"].index.names = ["region", "sector", "unit"] trade["Y"].columns.names = ["region", "category"] trade["Y"].reset_index(level="unit", drop=True, inplace=True) factor_inputs["name"] = "Factor Inputs" factor_inputs["F"].index.names = [ "inputtype", "unit", ] factor_inputs["F"].columns.names = ["region", "sector"] factor_inputs["unit"] = pd.DataFrame( factor_inputs["F"].iloc[:, 0].reset_index(level="unit").unit) factor_inputs["F"].reset_index(level="unit", drop=True, inplace=True) emissions["name"] = "Emissions" emissions["F"].index.names = [ "stressor", "compartment", "unit", ] emissions["F"].columns.names = ["region", "sector"] emissions["unit"] = pd.DataFrame( emissions["F"].iloc[:, 0].reset_index(level="unit").unit) emissions["F"].reset_index(level="unit", drop=True, inplace=True) emissions["F_Y"].index.names = ["stressor", "compartment", "unit"] emissions["F_Y"].columns.names = ["region", "category"] emissions["F_Y"].reset_index(level="unit", drop=True, inplace=True) # the population data - this is optional (None can be passed if no data is # available) popdata = pd.read_csv( os.path.join(PYMRIO_PATH["test_mrio"], "./population.txt"), index_col=0, sep="\t", ).astype(float) return IOSystem( Z=data["Z"], Y=data["Y"], unit=trade["unit"], meta=meta_rec, factor_inputs=factor_inputs, emissions=emissions, population=popdata, )