コード例 #1
0
ファイル: fileio.py プロジェクト: nigolred/pymrio
def load_test():
    """ Returns a small test MRIO

    The test system contains:

        - six regions,
        - seven sectors,
        - seven final demand categories
        - two extensions (emissions and factor_inputs)

    The test system only contains Z, Y, F, F_Y. The rest can be calculated with
    calc_all()

    Notes
    -----

        For development: This function can be used as an example of
        how to parse an IOSystem

    Returns
    -------

    IOSystem

    """

    # row_header:
    #    number of rows containing header on the top of the file (for the
    #    columns)
    # col_header:
    #    number of cols containing header on the beginning of the file (for the
    #    rows)
    # row and columns header contain also the row for the units, this are
    # afterwards safed as a extra dataframe
    #
    # unit_col: column containing the unit for the table
    file_data = collections.namedtuple(
            'file_data', ['file_name', 'row_header', 'col_header', 'unit_col'])

    # file names and header specs of the system
    test_system = dict(
        Z=file_data(file_name='trade_flows_Z.txt',
                    row_header=2, col_header=3, unit_col=2),
        Y=file_data(file_name='finald_demand_Y.txt',
                    row_header=2, col_header=3, unit_col=2),
        fac=file_data(file_name='factor_input.txt',
                      row_header=2, col_header=2, unit_col=1),
        emissions=file_data(file_name='emissions.txt',
                            row_header=2, col_header=3, unit_col=2),
        FDemissions=file_data(file_name='FDemissions.txt',
                              row_header=2, col_header=3, unit_col=2),
        )

    meta_rec = MRIOMetaData(location=PYMRIO_PATH['test_mrio'])

    # read the data into a dicts as pandas.DataFrame
    data = {key: pd.read_csv(
                 os.path.join(PYMRIO_PATH['test_mrio'],
                              test_system[key].file_name),
                 index_col=list(range(test_system[key].col_header)),
                 header=list(range(test_system[key].row_header)),
                 sep='\t')
            for key in test_system}

    meta_rec._add_fileio('Load test_mrio from {}'.format(
        PYMRIO_PATH['test_mrio']))

    # distribute the data into dics which can be passed to
    # the IOSystem. To do so, some preps are necessary:
    # - name the header data
    # - save unit in own dataframe and drop unit from the tables

    trade = dict(Z=data['Z'], Y=data['Y'])
    factor_inputs = dict(F=data['fac'])
    emissions = dict(F=data['emissions'], F_Y=data['FDemissions'])

    trade['Z'].index.names = ['region', 'sector', 'unit']
    trade['Z'].columns.names = ['region', 'sector']
    trade['unit'] = (pd.DataFrame(trade['Z'].iloc[:, 0]
                     .reset_index(level='unit').unit))
    trade['Z'].reset_index(level='unit', drop=True, inplace=True)

    trade['Y'].index.names = ['region', 'sector', 'unit']
    trade['Y'].columns.names = ['region', 'category']
    trade['Y'].reset_index(level='unit', drop=True, inplace=True)

    factor_inputs['name'] = 'Factor Inputs'
    factor_inputs['F'].index.names = ['inputtype', 'unit', ]
    factor_inputs['F'].columns.names = ['region', 'sector']
    factor_inputs['unit'] = (pd.DataFrame(factor_inputs['F'].iloc[:, 0]
                             .reset_index(level='unit').unit))
    factor_inputs['F'].reset_index(level='unit', drop=True, inplace=True)

    emissions['name'] = 'Emissions'
    emissions['F'].index.names = ['stressor', 'compartment', 'unit', ]
    emissions['F'].columns.names = ['region', 'sector']
    emissions['unit'] = (pd.DataFrame(emissions['F'].iloc[:, 0]
                         .reset_index(level='unit').unit))
    emissions['F'].reset_index(level='unit', drop=True, inplace=True)
    emissions['F_Y'].index.names = ['stressor', 'compartment', 'unit']
    emissions['F_Y'].columns.names = ['region', 'category']
    emissions['F_Y'].reset_index(level='unit', drop=True, inplace=True)

    # the population data - this is optional (None can be passed if no data is
    # available)
    popdata = pd.read_csv(
            os.path.join(PYMRIO_PATH['test_mrio'], './population.txt'),
            index_col=0, sep='\t').astype(float)

    return IOSystem(Z=data['Z'],
                    Y=data['Y'],
                    unit=trade['unit'],
                    meta=meta_rec,
                    factor_inputs=factor_inputs,
                    emissions=emissions,
                    population=popdata)
コード例 #2
0
ファイル: fileio.py プロジェクト: nigolred/pymrio
def _load_ini_based_io(path, recursive=False, ini=None,
                       subini={}, include_core=True,
                       only_coefficients=False):    # pragma: no cover
    """ DEPRECATED: For convert a previous version to the new json format

    Loads a IOSystem or Extension from a ini files

    This function can be used to load a IOSystem or Extension specified in a
    ini file. DataFrames (tables) are loaded from text or binary pickle files.
    For the latter, the extension .pkl or .pickle is assumed, in all other case
    the tables are assumed to be in .txt format.

    Parameters
    ----------

    path : string
        path or ini file name for the data to load

    recursive : boolean, optional
        If True, load also the data in the subfolders and add them as
        extensions to the IOSystem (in that case path must point to the root).
        Only first order subfolders are considered (no subfolders in
        subfolders) and if a folder does not contain a ini file it's skipped.
        Use the subini parameter in case of multiple ini files in a subfolder.
        Attribute name of the extension in the IOSystem are based on the
        subfolder name.  Default is False

    ini : string, optional
        If there are several ini files in the root folder, take this one for
        loading the data If None (default) take the ini found in the folder,
        error if several are found

    subini : dict, optional
        If there are multiple ini in the subfolder, use the ini given in the
        dict.  Format: 'subfoldername':'ininame' If a key for a subfolder is
        not found or None (default), the ini found in the folder will be taken,
        error if several are found

    include_core : boolean, optional
        If False the load method does not include A, L and Z matrix. This
        significantly reduces the required memory if the purpose is only
        to analyse the results calculated beforehand.

    Returns
    -------

        IOSystem or Extension class depending on systemtype in the ini file
        None in case of errors

    """

    # check path and given parameter
    ini_file_name = None

    path = os.path.abspath(os.path.normpath(path))

    if os.path.splitext(path)[1] == '.ini':
        (path, ini_file_name) = os.path.split(path)

    if ini:
        ini_file_name = ini

    if not os.path.exists(path):
        raise ReadError('Given path does not exist')
        return None

    if not ini_file_name:
        _inifound = False
        for file in os.listdir(path):
            if os.path.splitext(file)[1] == '.ini':
                if _inifound:
                    raise ReadError(
                            'Found multiple ini files in folder - specify one')
                    return None
                ini_file_name = file
                _inifound = True

    # read the ini
    io_ini = configparser.RawConfigParser()
    io_ini.optionxform = lambda option: option

    io_ini.read(os.path.join(path, ini_file_name))

    systemtype = io_ini.get('systemtype', 'systemtype', fallback=None)
    name = io_ini.get('meta', 'name',
                      fallback=os.path.splitext(ini_file_name)[0])

    if systemtype == 'IOSystem':
        ret_system = IOSystem(name=name)
    elif systemtype == 'Extension':
        ret_system = Extension(name=name)
    else:
        raise ReadError('System not defined in ini')
        return None

    for key in io_ini['meta']:
        setattr(ret_system, key, io_ini.get('meta', key, fallback=None))

    for key in io_ini['files']:
        if '_nr_index_col' in key:
            continue
        if '_nr_header' in key:
            continue

        if not include_core:
            not_to_load = ['A', 'L', 'Z']
            if key in not_to_load:
                continue

        if only_coefficients:
            _io = IOSystem()
            if key not in _io.__coefficients__ + ['unit']:
                continue

        file_name = io_ini.get('files', key)
        nr_index_col = io_ini.get(
            'files', key + '_nr_index_col', fallback=None)
        nr_header = io_ini.get('files', key + '_nr_header', fallback=None)

        if (nr_index_col is None) or (nr_header is None):
            raise ReadError(
                    'Index or column specification missing for {}'.
                    format(str(file_name)))
            return None

        _index_col = list(range(int(nr_index_col)))
        _header = list(range(int(nr_header)))

        if _index_col == [0]:
            _index_col = 0
        if _header == [0]:
            _header = 0
        file = os.path.join(path, file_name)
        logging.info('Load data from {}'.format(file))

        if (os.path.splitext(file)[1] == '.pkl' or
                os.path.splitext(file)[1] == '.pickle'):
            setattr(ret_system, key,
                    pd.read_pickle(file))
        else:
            setattr(ret_system, key,
                    pd.read_csv(file,
                                index_col=_index_col,
                                header=_header, sep='\t'))

    if recursive:
        # look for subfolder in the given path
        subfolder_list = os.walk(path).__next__()[1]

        # loop all subfolder and append extension based on
        # ini file in subfolder
        for subfolder in subfolder_list:
            subini_file_name = subini.get(subfolder)
            subpath = os.path.abspath(os.path.join(path, subfolder))

            if not subini_file_name:
                _inifound = False
                for file in os.listdir(subpath):
                    if os.path.splitext(file)[1] == '.ini':
                        if _inifound:
                            raise ReadError(
                                'Found multiple ini files in subfolder '
                                '{} - specify one'.format(subpath))
                            return None
                        subini_file_name = file
                        _inifound = True
            if not _inifound:
                continue

            # read the ini
            subio_ini = configparser.RawConfigParser()
            subio_ini.optionxform = lambda option: option

            subio_ini.read(os.path.join(subpath, subini_file_name))

            systemtype = subio_ini.get('systemtype', 'systemtype',
                                       fallback=None)
            name = subio_ini.get('meta', 'name',
                                 fallback=os.path.splitext(
                                     subini_file_name)[0])

            if systemtype == 'IOSystem':
                raise ReadError('IOSystem found in subfolder {} - '
                                'only extensions expected'.format(subpath))
                return None
            elif systemtype == 'Extension':
                sub_system = Extension(name=name)
            else:
                raise ReadError('System not defined in ini')
                return None

            for key in subio_ini['meta']:
                setattr(sub_system, key, subio_ini.get('meta', key,
                                                       fallback=None))

            for key in subio_ini['files']:
                if '_nr_index_col' in key:
                    continue
                if '_nr_header' in key:
                    continue

                if only_coefficients:
                    _ext = Extension('temp')
                    if key not in _ext.__coefficients__ + ['unit']:
                        continue

                file_name = subio_ini.get('files', key)
                nr_index_col = subio_ini.get('files', key + '_nr_index_col',
                                             fallback=None)
                nr_header = subio_ini.get('files', key + '_nr_header',
                                          fallback=None)

                if (nr_index_col is None) or (nr_header is None):
                    raise ReadError('Index or column specification missing '
                                    'for {}'.format(str(file_name)))
                    return None

                _index_col = list(range(int(nr_index_col)))
                _header = list(range(int(nr_header)))

                if _index_col == [0]:
                    _index_col = 0
                if _header == [0]:
                    _header = 0
                file = os.path.join(subpath, file_name)
                logging.info('Load data from {}'.format(file))
                if (os.path.splitext(file)[1] == '.pkl' or
                        os.path.splitext(file)[1] == '.pickle'):
                    setattr(sub_system, key,
                            pd.read_pickle(file))
                else:
                    setattr(sub_system, key,
                            pd.read_csv(file,
                                        index_col=_index_col,
                                        header=_header,
                                        sep='\t'))

                # get valid python name from folder
                def clean(varStr):
                    return re.sub(r'\W|^(?=\d)', '_', str(varStr))

                setattr(ret_system, clean(subfolder), sub_system)

    return ret_system
コード例 #3
0
ファイル: fileio.py プロジェクト: nigolred/pymrio
def load(path, include_core=True, path_in_arc=''):
    """ Loads a IOSystem or Extension previously saved with pymrio

    This function can be used to load a IOSystem or Extension specified in a
    metadata file (as defined in DEFAULT_FILE_NAMES['filepara']: metadata.json)

    DataFrames (tables) are loaded from text or binary pickle files.
    For the latter, the extension .pkl or .pickle is assumed, in all other case
    the tables are assumed to be in .txt format.

    Parameters
    ----------
    path : pathlib.Path or string
        Path or path with para file name for the data to load. This must
        either point to the directory containing the uncompressed data or
        the location of a compressed zip file with the data. In the
        later case the parameter 'path_in_arc' need to be specific to
        further indicate the location of the data in the compressed file.

    include_core : boolean, optional
        If False the load method does not include A, L and Z matrix. This
        significantly reduces the required memory if the purpose is only
        to analyse the results calculated beforehand.

    path_in_arc: string, optional
        Path to the data in the zip file (where the fileparameters file is
        located). path_in_arc must be given without leading dot and slash;
        thus to point to the data in the root of the compressed file pass '',
        for data in e.g. the folder 'emissions' pass 'emissions/'.  Only used
        if parameter 'path' points to an compressed zip file.

    Returns
    -------

        IOSystem or Extension class depending on systemtype in the json file
        None in case of errors

    """
    path = Path(path)

    if not path.exists():
        raise ReadError('Given path does not exist')

    file_para = get_file_para(path=path, path_in_arc=path_in_arc)

    if file_para.content['systemtype'] == GENERIC_NAMES['iosys']:
        if zipfile.is_zipfile(str(path)):
            ret_system = IOSystem(meta=MRIOMetaData(
                location=path,
                path_in_arc=os.path.join(file_para.folder,
                                         DEFAULT_FILE_NAMES['metadata'])))
            ret_system.meta._add_fileio(
                "Loaded IO system from {} - {}".format(path, path_in_arc))
        else:
            ret_system = IOSystem(meta=MRIOMetaData(
                location=path / DEFAULT_FILE_NAMES['metadata']))
            ret_system.meta._add_fileio(
                "Loaded IO system from {}".format(path))

    elif file_para.content['systemtype'] == GENERIC_NAMES['ext']:
        ret_system = Extension(file_para.content['name'])

    else:
        raise ReadError('Type of system no defined in the file parameters')
        return None

    for key in file_para.content['files']:
        if not include_core and key not in ['A', 'L', 'Z']:
            continue

        file_name = file_para.content['files'][key]['name']
        nr_index_col = file_para.content['files'][key]['nr_index_col']
        nr_header = file_para.content['files'][key]['nr_header']
        _index_col = list(range(int(nr_index_col)))
        _header = list(range(int(nr_header)))
        _index_col = 0 if _index_col == [0] else _index_col
        _header = 0 if _header == [0] else _header

        if key == 'FY':  # Legacy code to read data saved with version < 0.4
            key = 'F_Y'

        if zipfile.is_zipfile(str(path)):
            full_file_name = os.path.join(file_para.folder, file_name)
            logging.info('Load data from {}'.format(full_file_name))

            with zipfile.ZipFile(file=str(path)) as zf:
                if (os.path.splitext(str(full_file_name))[1] == '.pkl' or
                        os.path.splitext(str(full_file_name))[1] == '.pickle'):
                    setattr(ret_system, key,
                            pd.read_pickle(zf.open(full_file_name)))
                else:
                    setattr(ret_system, key,
                            pd.read_csv(zf.open(full_file_name),
                                        index_col=_index_col,
                                        header=_header, sep='\t'))
        else:
            full_file_name = path / file_name
            logging.info('Load data from {}'.format(full_file_name))

            if (os.path.splitext(str(full_file_name))[1] == '.pkl' or
                    os.path.splitext(str(full_file_name))[1] == '.pickle'):
                setattr(ret_system, key,
                        pd.read_pickle(full_file_name))
            else:
                setattr(ret_system, key,
                        pd.read_csv(full_file_name,
                                    index_col=_index_col,
                                    header=_header,
                                    sep='\t'))
    return ret_system
コード例 #4
0
def parse_exiobase22(path,
                     charact=None,
                     iosystem=None,
                     version='exiobase 2.2',
                     popvector='exio2'):
    """ Parse the exiobase 2.2 source files for the IOSystem 
   
    The function parse product by product and industry by industry source file
    with flow matrices (Z)

    Parameters
    ----------
    path : string
        Path to the EXIOBASE source files
    charact : string, optional
        Filename with path to the characterisation matrices for the extensions
        (xls).  This is provided together with the EXIOBASE system and given as
        a xls file. The four sheets  Q_factorinputs, Q_emission, Q_materials and
        Q_resources are read and used to generate one new extensions with the
        impacts 
    iosystem : string, optional
        Note for the IOSystem, recommended to be 'pxp' or 'ixi' for
        product by product or industry by industry.
        However, this can be any string and can have more information if needed
        (eg for different technology assumptions)
        The string will be passed to the IOSystem.
    version : string, optional
        This can be used as a version tracking system. Default: exiobase 2.2 
    popvector : string or pd.DataFrame, optional
        The population vector for the countries.  This can be given as
        pd.DataFrame(index = population, columns = countrynames) or, (default)
        will be taken from the pymrio module. If popvector = None no population
        data will be passed to the IOSystem.

    Returns
    -------
    IOSystem
        A IOSystem with the parsed exiobase 2 data

    Raises
    ------
    EXIOError
        If the exiobase source files are not complete in the given path

    """
    path = path.rstrip('\\')
    path = os.path.abspath(path)

    # standard file names in exiobase
    files_exio = dict(

        # exiobase 2.2
        Z='mrIot_version2.2.0.txt',
        Y='mrFinalDemand_version2.2.0.txt',
        F_fac='mrFactorInputs_version2.2.0.txt',
        F_emissions='mrEmissions_version2.2.0.txt',
        F_materials='mrMaterials_version2.2.0.txt',
        F_resources='mrResources_version2.2.0.txt',
        FY_emissions='mrFDEmissions_version2.2.0.txt',
        FY_materials='mrFDMaterials_version2.2.0.txt',

        # old exiobase 2.1 filenames
        #Z = 'mrIot.txt',
        #Y = 'mrFinalDemand.txt',
        #F_fac = 'mrFactorInputs.txt',
        #F_emissions = 'mrEmissions.txt',
        #F_materials = 'mrMaterials.txt',
        #F_resources = 'mrResources.txt',
        #FY_emissions = 'mrFDEmissions.txt',
        #FY_materials = 'mrFDMaterials.txt',
    )

    # check if source exiobase is complete
    _intersect = [
        val for val in files_exio.values() if val in os.listdir(path)
    ]
    if len(_intersect) != len(files_exio.values()):
        raise pymrio.core.EXIOError('EXIOBASE files missing')

    # number of row and column headers in EXIOBASE
    head_col = dict()
    head_row = dict()
    head_col[
        'Z'] = 3  #  number of cols containing row headers at the beginning
    head_row['Z'] = 2  #  number of rows containing col headers at the top
    head_col['Y'] = 3
    head_row['Y'] = 2
    head_col['F_fac'] = 2
    head_row['F_fac'] = 2
    head_col['F_emissions'] = 3
    head_row['F_emissions'] = 2
    head_col['F_materials'] = 2
    head_row['F_materials'] = 2
    head_col['F_resources'] = 3
    head_row['F_resources'] = 2
    head_col['FY_emissions'] = 3
    head_row['FY_emissions'] = 2
    head_col['FY_materials'] = 2
    head_row['FY_materials'] = 2

    # read the data into pandas
    logging.info('Read exiobase2 from {}'.format(path))
    data = {
        key: pd.read_table(os.path.join(path, files_exio[key]),
                           index_col=list(range(head_col[key])),
                           header=list(range(head_row[key])))
        for key in files_exio
    }

    # refine multiindex and save units
    data['Z'].index.names = ['region', 'sector', 'unit']
    data['Z'].columns.names = ['region', 'sector']
    data['unit'] = pd.DataFrame(
        data['Z'].iloc[:, 0].reset_index(level='unit').unit)
    data['Z'].reset_index(level='unit', drop=True, inplace=True)
    data['Y'].index.names = ['region', 'sector', 'unit']
    data['Y'].columns.names = ['region', 'category']
    data['Y'].reset_index(level='unit', drop=True, inplace=True)
    ext_unit = dict()
    for key in [
            'F_fac', 'F_emissions', 'F_materials', 'F_resources',
            'FY_emissions', 'FY_materials'
    ]:
        if head_col[key] == 3:
            data[key].index.names = ['stressor', 'compartment', 'unit']
        if head_col[key] == 2:
            data[key].index.names = ['stressor', 'unit']
        if 'FY' in key:
            data[key].columns.names = ['region', 'category']
            data[key].reset_index(level='unit', drop=True, inplace=True)
        else:
            data[key].columns.names = ['region', 'sector']
            ext_unit[key] = pd.DataFrame(
                data[key].iloc[:, 0].reset_index(level='unit').unit)
            data[key].reset_index(level='unit', drop=True, inplace=True)
            if key is 'F_resources':
                data[key].reset_index(level='compartment',
                                      drop=True,
                                      inplace=True)
                ext_unit[key].reset_index(level='compartment',
                                          drop=True,
                                          inplace=True)

    # build the extensions
    ext = dict()
    ext['factor_inputs'] = {
        'F': data['F_fac'],
        'unit': ext_unit['F_fac'],
        'name': 'factor input'
    }
    ext['emissions'] = {
        'F': data['F_emissions'],
        'FY': data['FY_emissions'],
        'unit': ext_unit['F_emissions'],
        'name': 'emissons'
    }
    ext['materials'] = {
        'F': data['F_materials'],
        'FY': data['FY_materials'],
        'unit': ext_unit['F_materials'],
        'name': 'material extraction'
    }
    ext['resources'] = {
        'F': data['F_resources'],
        'unit': ext_unit['F_resources'],
        'name': 'resources'
    }

    # read the characterisation matrices if available
    # and build one extension with the impacts
    if charact:
        # dict with correspondence to the extensions
        Qsheets = {
            'Q_factorinputs': 'factor_inputs',
            'Q_emission': 'emissions',
            'Q_materials': 'materials',
            'Q_resources': 'resources'
        }
        Q_head_col = dict()
        Q_head_row = dict()
        Q_head_col_rowname = dict()
        Q_head_col_rowunit = dict()
        Q_head_col_metadata = dict()
        # number of cols containing row headers at the beginning
        Q_head_col['Q_emission'] = 4
        # number of rows containing col headers at the top - this will be
        # skipped
        Q_head_row['Q_emission'] = 3
        # assuming the same classification as in the extensions
        Q_head_col['Q_factorinputs'] = 2
        Q_head_row['Q_factorinputs'] = 2
        Q_head_col['Q_resources'] = 2
        Q_head_row['Q_resources'] = 3
        Q_head_col['Q_materials'] = 2
        Q_head_row['Q_materials'] = 2

        #  column to use as name for the rows
        Q_head_col_rowname['Q_emission'] = 1
        Q_head_col_rowname['Q_factorinputs'] = 0
        Q_head_col_rowname['Q_resources'] = 0
        Q_head_col_rowname['Q_materials'] = 0

        # column to use as unit for the rows which gives also the last column
        # before the data
        Q_head_col_rowunit['Q_emission'] = 3
        Q_head_col_rowunit['Q_factorinputs'] = 1
        Q_head_col_rowunit['Q_resources'] = 1
        Q_head_col_rowunit['Q_materials'] = 1

        charac_data = {
            Qname: pd.read_excel(charact,
                                 sheetname=Qname,
                                 skiprows=list(range(0, Q_head_row[Qname])),
                                 header=None)
            for Qname in Qsheets
        }

        _units = dict()
        # temp for the calculated impacts which than
        # get summarized in the 'impact'
        _impact = dict()
        impact = dict()
        for Qname in Qsheets:
            # unfortunately the names in Q_emissions are
            # not completely unique - fix that
            _index = charac_data[Qname][Q_head_col_rowname[Qname]]
            if Qname is 'Q_emission':
                _index[42] = _index[42] + ' 2008'
                _index[43] = _index[43] + ' 2008'
                _index[44] = _index[44] + ' 2010'
                _index[45] = _index[45] + ' 2010'
            charac_data[Qname].index = (
                charac_data[Qname][Q_head_col_rowname[Qname]])

            _units[Qname] = pd.DataFrame(
                charac_data[Qname].iloc[:, Q_head_col_rowunit[Qname]])
            _units[Qname].columns = ['unit']
            _units[Qname].index.name = 'impact'
            charac_data[Qname] = charac_data[
                Qname].ix[:, Q_head_col_rowunit[Qname] + 1:]
            charac_data[Qname].index.name = 'impact'

            if 'FY' in ext[Qsheets[Qname]]:
                _FY = ext[Qsheets[Qname]]['FY'].values
            else:
                _FY = np.zeros(
                    [ext[Qsheets[Qname]]['F'].shape[0], data['Y'].shape[1]])
            _impact[Qname] = {
                'F': charac_data[Qname].dot(ext[Qsheets[Qname]]['F'].values),
                'FY': charac_data[Qname].dot(_FY),
                'unit': _units[Qname]
            }

        impact['F'] = (_impact['Q_factorinputs']['F'].append(
            _impact['Q_emission']['F']).append(
                _impact['Q_materials']['F']).append(
                    _impact['Q_resources']['F']))
        impact['FY'] = (_impact['Q_factorinputs']['FY'].append(
            _impact['Q_emission']['FY']).append(
                _impact['Q_materials']['FY']).append(
                    _impact['Q_resources']['FY']))
        impact['F'].columns = ext['emissions']['F'].columns
        impact['FY'].columns = ext['emissions']['FY'].columns
        impact['unit'] = (_impact['Q_factorinputs']['unit'].append(
            _impact['Q_emission']['unit']).append(
                _impact['Q_materials']['unit']).append(
                    _impact['Q_resources']['unit']))
        impact['name'] = 'impact'
        ext['impacts'] = impact

    if popvector is 'exio2':
        popdata = pd.read_table(os.path.join(PYMRIO_PATH['exio20'],
                                             './misc/population.txt'),
                                index_col=0).astype(float)
    else:
        popdata = popvector

    return IOSystem(Z=data['Z'],
                    Y=data['Y'],
                    unit=data['unit'],
                    population=popdata,
                    **ext)
コード例 #5
0
ファイル: fileio.py プロジェクト: thomasgibon/pymrio
def load(path, include_core=True):
    """ Loads a IOSystem or Extension previously saved with pymrio

    This function can be used to load a IOSystem or Extension specified in a
    ini file. DataFrames (tables) are loaded from text or binary pickle files.
    For the latter, the extension .pkl or .pickle is assumed, in all other case
    the tables are assumed to be in .txt format.

    Parameters
    ----------

    path : string
        path or ini file name for the data to load

    include_core : boolean, optional
        If False the load method does not include A, L and Z matrix. This
        significantly reduces the required memory if the purpose is only
        to analyse the results calculated beforehand.

    Returns
    -------

        IOSystem or Extension class depending on systemtype in the json file
        None in case of errors

    """
    path = path.rstrip('\\')
    path = os.path.abspath(path)

    if not os.path.exists(path):
        raise ReadError('Given path does not exist')
        return None

    para_file_path = os.path.join(path, DEFAULT_FILE_NAMES['filepara'])
    if not os.path.isfile(para_file_path):
        raise ReadError('No file parameter file found')
        return None

    with open(para_file_path, 'r') as pf:
        file_para = json.load(pf)

    if file_para['systemtype'] == GENERIC_NAMES['iosys']:
        meta_file_path = os.path.join(path, DEFAULT_FILE_NAMES['metadata'])
        ret_system = IOSystem(meta=MRIOMetaData(location=meta_file_path))
        ret_system.meta._add_fileio("Loaded IO system from {}".format(path))
    elif file_para['systemtype'] == GENERIC_NAMES['ext']:
        ret_system = Extension(file_para['name'])
    else:
        raise ReadError('Type of system no defined in the file parameters')
        return None

    for key in file_para['files']:
        if not include_core:
            if key in ['A', 'L', 'Z']:
                continue

        file_name = file_para['files'][key]['name']
        full_file_name = os.path.join(path, file_name)
        nr_index_col = file_para['files'][key]['nr_index_col']
        nr_header = file_para['files'][key]['nr_header']

        logging.info('Load data from {}'.format(full_file_name))

        _index_col = list(range(int(nr_index_col)))
        _header = list(range(int(nr_header)))

        if _index_col == [0]:
            _index_col = 0
        if _header == [0]:
            _header = 0

        if (os.path.splitext(full_file_name)[1] == '.pkl'
                or os.path.splitext(full_file_name)[1] == '.pickle'):
            setattr(ret_system, key, pd.read_pickle(full_file_name))
        else:
            setattr(
                ret_system, key,
                pd.read_table(full_file_name,
                              index_col=_index_col,
                              header=_header))

    return ret_system
コード例 #6
0
def load_test():
    """Returns a small test MRIO

    The test system contains:

        - six regions,
        - seven sectors,
        - seven final demand categories
        - two extensions (emissions and factor_inputs)

    The test system only contains Z, Y, F, F_Y. The rest can be calculated with
    calc_all()

    Notes
    -----

        For development: This function can be used as an example of
        how to parse an IOSystem

    Returns
    -------

    IOSystem

    """

    # row_header:
    #    number of rows containing header on the top of the file (for the
    #    columns)
    # col_header:
    #    number of cols containing header on the beginning of the file (for the
    #    rows)
    # row and columns header contain also the row for the units, this are
    # afterwards safed as a extra dataframe
    #
    # unit_col: column containing the unit for the table
    file_data = collections.namedtuple(
        "file_data", ["file_name", "row_header", "col_header", "unit_col"])

    # file names and header specs of the system
    test_system = dict(
        Z=file_data(file_name="trade_flows_Z.txt",
                    row_header=2,
                    col_header=3,
                    unit_col=2),
        Y=file_data(file_name="finald_demand_Y.txt",
                    row_header=2,
                    col_header=3,
                    unit_col=2),
        fac=file_data(file_name="factor_input.txt",
                      row_header=2,
                      col_header=2,
                      unit_col=1),
        emissions=file_data(file_name="emissions.txt",
                            row_header=2,
                            col_header=3,
                            unit_col=2),
        FDemissions=file_data(file_name="FDemissions.txt",
                              row_header=2,
                              col_header=3,
                              unit_col=2),
    )

    meta_rec = MRIOMetaData(location=PYMRIO_PATH["test_mrio"])

    # read the data into a dicts as pandas.DataFrame
    data = {
        key: pd.read_csv(
            os.path.join(PYMRIO_PATH["test_mrio"], test_system[key].file_name),
            index_col=list(range(test_system[key].col_header)),
            header=list(range(test_system[key].row_header)),
            sep="\t",
        )
        for key in test_system
    }

    meta_rec._add_fileio("Load test_mrio from {}".format(
        PYMRIO_PATH["test_mrio"]))

    # distribute the data into dics which can be passed to
    # the IOSystem. To do so, some preps are necessary:
    # - name the header data
    # - save unit in own dataframe and drop unit from the tables

    trade = dict(Z=data["Z"], Y=data["Y"])
    factor_inputs = dict(F=data["fac"])
    emissions = dict(F=data["emissions"], F_Y=data["FDemissions"])

    trade["Z"].index.names = ["region", "sector", "unit"]
    trade["Z"].columns.names = ["region", "sector"]
    trade["unit"] = pd.DataFrame(
        trade["Z"].iloc[:, 0].reset_index(level="unit").unit)
    trade["Z"].reset_index(level="unit", drop=True, inplace=True)

    trade["Y"].index.names = ["region", "sector", "unit"]
    trade["Y"].columns.names = ["region", "category"]
    trade["Y"].reset_index(level="unit", drop=True, inplace=True)

    factor_inputs["name"] = "Factor Inputs"
    factor_inputs["F"].index.names = [
        "inputtype",
        "unit",
    ]
    factor_inputs["F"].columns.names = ["region", "sector"]
    factor_inputs["unit"] = pd.DataFrame(
        factor_inputs["F"].iloc[:, 0].reset_index(level="unit").unit)
    factor_inputs["F"].reset_index(level="unit", drop=True, inplace=True)

    emissions["name"] = "Emissions"
    emissions["F"].index.names = [
        "stressor",
        "compartment",
        "unit",
    ]
    emissions["F"].columns.names = ["region", "sector"]
    emissions["unit"] = pd.DataFrame(
        emissions["F"].iloc[:, 0].reset_index(level="unit").unit)
    emissions["F"].reset_index(level="unit", drop=True, inplace=True)
    emissions["F_Y"].index.names = ["stressor", "compartment", "unit"]
    emissions["F_Y"].columns.names = ["region", "category"]
    emissions["F_Y"].reset_index(level="unit", drop=True, inplace=True)

    # the population data - this is optional (None can be passed if no data is
    # available)
    popdata = pd.read_csv(
        os.path.join(PYMRIO_PATH["test_mrio"], "./population.txt"),
        index_col=0,
        sep="\t",
    ).astype(float)

    return IOSystem(
        Z=data["Z"],
        Y=data["Y"],
        unit=trade["unit"],
        meta=meta_rec,
        factor_inputs=factor_inputs,
        emissions=emissions,
        population=popdata,
    )