Example #1
0
def download_oecd(storage_folder,
                  version='v2018',
                  years=None,
                  overwrite_existing=False):
    """ Downloads the OECD ICIO tables

    Parameters
    ----------
    storage_folder: str, valid path
        Location to store the download, folder will be created if
        not existing. If the file is already present in the folder,
        the download of the specific file will be skipped.

    version: string or int, optional
        Two versions of the ICIO OECD tables are currently availabe:
        Version >v2016<: based on >SNA93< / >ISIC Rev.3<
        Version >v2018<: based on >SNA08< / >ISIC Rev.4< (default)
        Pass any of the identifiers between >< to specifiy the
        version to be downloaded.

    years: list of int (4 digit) or str, optional
        If years is given only downloads the specific years.

    overwrite_existing: boolean, optional
        If False, skip download of file already existing in
        the storage folder (default). Set to True to replace
        files.

    Returns
    -------

    Meta data of the downloaded MRIOs

    """
    # Implementation Notes:
    # For OECD the generic download routines can not be used
    # b/c the 2018 version is coded as aspx fileview property
    # in the html source - instead a hardcoded dict is used
    # to select the url for download

    try:
        os.makedirs(storage_folder)
    except FileExistsError:
        pass

    if type(version) is int:
        version = str(version)

    if ('8' in version) or ('4' in version):
        version = 'v2018'
    elif ('3' in version) or ('6' in version):
        version = 'v2016'
    else:
        raise ValueError('Version not understood')

    if type(years) is int or type(years) is str:
        years = [years]
    if not years:
        if version == 'v2018':
            years = range(2005, 2016)
        else:
            years = range(1995, 2012)
    years = [str(yy) for yy in years]

    meta = MRIOMetaData(location=storage_folder,
                        description='OECD-ICIO download',
                        name='OECD-ICIO',
                        system='IxI',
                        version=version)

    oecd_webcontent = requests.get(OECD_CONFIG['url_db_view']).text
    for yy in years:
        if yy not in OECD_CONFIG['datafiles'][version].keys():
            raise ValueError(
                'Datafile for {} not specified or available.'.format(yy))
        if version == 'v2016':
            url_to_check = os.path.basename(
                OECD_CONFIG['datafiles'][version][yy])
        else:
            url_to_check = OECD_CONFIG['datafiles'][version][yy]
        if url_to_check not in oecd_webcontent:
            raise ValueError(
                'Specified datafile for {} () not found in the current'
                'OECD ICIO webpage.\n'
                'Perhaps filenames have been changed - update OECD_CONFIG '
                'to the new filenames'.format(yy, url_to_check))

        filename = 'ICIO' + version.lstrip('v') + '_' + yy + '.zip'
        storage_file = os.path.join(storage_folder, filename)
        req = requests.get(OECD_CONFIG['datafiles'][version][yy], stream=True)
        with open(storage_file, 'wb') as lf:
            for chunk in req.iter_content(1024 * 5):
                lf.write(chunk)

        meta._add_fileio('Downloaded {} to {}'.format(
            OECD_CONFIG['datafiles'][version][yy], filename))

    meta.save()
    return meta
Example #2
0
def load_test():
    """ Returns a small test MRIO

    The test system contains:

        - six regions,
        - seven sectors,
        - seven final demand categories
        - two extensions (emissions and factor_inputs)

    The test system only contains Z, Y, F, F_Y. The rest can be calculated with
    calc_all()

    Notes
    -----

        For development: This function can be used as an example of
        how to parse an IOSystem

    Returns
    -------

    IOSystem

    """

    # row_header:
    #    number of rows containing header on the top of the file (for the
    #    columns)
    # col_header:
    #    number of cols containing header on the beginning of the file (for the
    #    rows)
    # row and columns header contain also the row for the units, this are
    # afterwards safed as a extra dataframe
    #
    # unit_col: column containing the unit for the table
    file_data = collections.namedtuple(
            'file_data', ['file_name', 'row_header', 'col_header', 'unit_col'])

    # file names and header specs of the system
    test_system = dict(
        Z=file_data(file_name='trade_flows_Z.txt',
                    row_header=2, col_header=3, unit_col=2),
        Y=file_data(file_name='finald_demand_Y.txt',
                    row_header=2, col_header=3, unit_col=2),
        fac=file_data(file_name='factor_input.txt',
                      row_header=2, col_header=2, unit_col=1),
        emissions=file_data(file_name='emissions.txt',
                            row_header=2, col_header=3, unit_col=2),
        FDemissions=file_data(file_name='FDemissions.txt',
                              row_header=2, col_header=3, unit_col=2),
        )

    meta_rec = MRIOMetaData(location=PYMRIO_PATH['test_mrio'])

    # read the data into a dicts as pandas.DataFrame
    data = {key: pd.read_csv(
                 os.path.join(PYMRIO_PATH['test_mrio'],
                              test_system[key].file_name),
                 index_col=list(range(test_system[key].col_header)),
                 header=list(range(test_system[key].row_header)),
                 sep='\t')
            for key in test_system}

    meta_rec._add_fileio('Load test_mrio from {}'.format(
        PYMRIO_PATH['test_mrio']))

    # distribute the data into dics which can be passed to
    # the IOSystem. To do so, some preps are necessary:
    # - name the header data
    # - save unit in own dataframe and drop unit from the tables

    trade = dict(Z=data['Z'], Y=data['Y'])
    factor_inputs = dict(F=data['fac'])
    emissions = dict(F=data['emissions'], F_Y=data['FDemissions'])

    trade['Z'].index.names = ['region', 'sector', 'unit']
    trade['Z'].columns.names = ['region', 'sector']
    trade['unit'] = (pd.DataFrame(trade['Z'].iloc[:, 0]
                     .reset_index(level='unit').unit))
    trade['Z'].reset_index(level='unit', drop=True, inplace=True)

    trade['Y'].index.names = ['region', 'sector', 'unit']
    trade['Y'].columns.names = ['region', 'category']
    trade['Y'].reset_index(level='unit', drop=True, inplace=True)

    factor_inputs['name'] = 'Factor Inputs'
    factor_inputs['F'].index.names = ['inputtype', 'unit', ]
    factor_inputs['F'].columns.names = ['region', 'sector']
    factor_inputs['unit'] = (pd.DataFrame(factor_inputs['F'].iloc[:, 0]
                             .reset_index(level='unit').unit))
    factor_inputs['F'].reset_index(level='unit', drop=True, inplace=True)

    emissions['name'] = 'Emissions'
    emissions['F'].index.names = ['stressor', 'compartment', 'unit', ]
    emissions['F'].columns.names = ['region', 'sector']
    emissions['unit'] = (pd.DataFrame(emissions['F'].iloc[:, 0]
                         .reset_index(level='unit').unit))
    emissions['F'].reset_index(level='unit', drop=True, inplace=True)
    emissions['F_Y'].index.names = ['stressor', 'compartment', 'unit']
    emissions['F_Y'].columns.names = ['region', 'category']
    emissions['F_Y'].reset_index(level='unit', drop=True, inplace=True)

    # the population data - this is optional (None can be passed if no data is
    # available)
    popdata = pd.read_csv(
            os.path.join(PYMRIO_PATH['test_mrio'], './population.txt'),
            index_col=0, sep='\t').astype(float)

    return IOSystem(Z=data['Z'],
                    Y=data['Y'],
                    unit=trade['unit'],
                    meta=meta_rec,
                    factor_inputs=factor_inputs,
                    emissions=emissions,
                    population=popdata)
Example #3
0
def download_exiobase3(
    storage_folder,
    years=None,
    system=None,
    overwrite_existing=False,
    doi="10.5281/zenodo.3583070",
):
    """
    Downloads EXIOBASE 3 files from Zenodo

    Since version 3.7 EXIOBASE gets published on the Zenodo scientific data
    repository.  This function download the lastest available version from
    Zenodo, for previous version the corresponding DOI (parameter 'doi') needs
    to specified.

    Version 3.7: 10.5281/zenodo.3583071
    Version 3.8: 10.5281/zenodo.4277368


    Parameters
    ----------
    storage_folder: str, valid path
        Location to store the download, folder will be created if
        not existing. If the file is already present in the folder,
        the download of the specific file will be skipped.


    years: list of int or str, optional
        If years is given only downloads the specific years (be default all years will be downloaded).
        Years must be given in 4 digits.

    system: string or list of strings, optional
        'pxp': download product by product classification
        'ixi': download industry by industry classification
        ['ixi', 'pxp'] or None (default): download both classifications

    overwrite_existing: boolean, optional
        If False, skip download of file already existing in
        the storage folder (default). Set to True to replace
        files.

    doi: string, optional.
        The EXIOBASE DOI to be downloaded. By default that resolves
        to the DOI citing the latest available version. For the previous DOI
        see the block 'Versions' on the right hand side of
        https://zenodo.org/record/4277368.

    Returns
    -------

    Meta data of the downloaded MRIOs

    """

    os.makedirs(storage_folder, exist_ok=True)

    doi_url = "https://doi.org/" + doi
    EXIOBASE3_CONFIG["url_db_view"] = doi_url

    exio_web_content = _get_url_datafiles(**EXIOBASE3_CONFIG)

    file_pattern = re.compile(r"IOT_[1,2]\d\d\d_[p,i]x[p,i]\.zip")
    available_files = [
        file_pattern.search(url).group() for url in exio_web_content.data_urls
    ]

    available_years = {filename.split("_")[1] for filename in available_files}
    if type(years) is int or type(years) is str:
        years = [years]
    years = years if years else list(available_years)

    system = system if system else ["pxp", "ixi"]
    if type(system) is str:
        system = [system]

    meta = MRIOMetaData(
        location=storage_folder,
        description="EXIOBASE3 metadata file for pymrio",
        name="EXIO3",
        system=",".join(system),
        version=doi,
    )

    requested_urls = []
    for file_specs in itertools.product(years, system):
        filename = list(
            filter(
                lambda x: str(file_specs[0]) in x and str(file_specs[1]) in x,
                available_files,
            ))

        if not filename:
            meta._add_fileio(
                "Could not find EXIOBASE 3 source file with >{}< and >{}<".
                format(file_specs[0], file_specs[1]))
            continue
        requested_urls += [
            u for u in exio_web_content.data_urls for f in filename if f in u
        ]

    meta = _download_urls(
        url_list=requested_urls,
        storage_folder=storage_folder,
        overwrite_existing=overwrite_existing,
        meta_handler=meta,
    )

    meta.save()
    return meta
Example #4
0
def load_test():
    """ Returns a small test MRIO

    The test system contains:

        - six regions,
        - seven sectors,
        - seven final demand categories
        - two extensions (emissions and factor_inputs)

    The test system only contains Z, Y, F, FY. The rest can be calculated with
    calc_all()

    Notes
    -----

        For development: This function can be used as an example of
        how to parse an IOSystem

    Returns
    -------

    IOSystem

    """

    # row_header:
    #    number of rows containing header on the top of the file (for the
    #    columns)
    # col_header:
    #    number of cols containing header on the beginning of the file (for the
    #    rows)
    # row and columns header contain also the row for the units, this are
    # afterwards safed as a extra dataframe
    #
    # unit_col: column containing the unit for the table
    file_data = collections.namedtuple(
            'file_data', ['file_name', 'row_header', 'col_header', 'unit_col'])

    # file names and header specs of the system
    test_system = dict(
        Z=file_data(file_name='trade_flows_Z.txt',
                    row_header=2, col_header=3, unit_col=2),
        Y=file_data(file_name='finald_demand_Y.txt',
                    row_header=2, col_header=3, unit_col=2),
        fac=file_data(file_name='factor_input.txt',
                      row_header=2, col_header=2, unit_col=1),
        emissions=file_data(file_name='emissions.txt',
                            row_header=2, col_header=3, unit_col=2),
        FDemissions=file_data(file_name='FDemissions.txt',
                              row_header=2, col_header=3, unit_col=2),
        )

    meta_rec = MRIOMetaData(location=PYMRIO_PATH['test_mrio'])

    # read the data into a dicts as pandas.DataFrame
    data = {key: pd.read_table(
                 os.path.join(PYMRIO_PATH['test_mrio'],
                              test_system[key].file_name),
                 index_col=list(range(test_system[key].col_header)),
                 header=list(range(test_system[key].row_header)))
            for key in test_system}

    meta_rec._add_fileio('Load test_mrio from {}'.format(
        PYMRIO_PATH['test_mrio']))

    # distribute the data into dics which can be passed to
    # the IOSystem. To do so, some preps are necessary:
    # - name the header data
    # - save unit in own dataframe and drop unit from the tables

    trade = dict(Z=data['Z'], Y=data['Y'])
    factor_inputs = dict(F=data['fac'])
    emissions = dict(F=data['emissions'], FY=data['FDemissions'])

    trade['Z'].index.names = ['region', 'sector', 'unit']
    trade['Z'].columns.names = ['region', 'sector']
    trade['unit'] = (pd.DataFrame(trade['Z'].iloc[:, 0]
                     .reset_index(level='unit').unit))
    trade['Z'].reset_index(level='unit', drop=True, inplace=True)

    trade['Y'].index.names = ['region', 'sector', 'unit']
    trade['Y'].columns.names = ['region', 'category']
    trade['Y'].reset_index(level='unit', drop=True, inplace=True)

    factor_inputs['name'] = 'Factor Inputs'
    factor_inputs['F'].index.names = ['inputtype', 'unit', ]
    factor_inputs['F'].columns.names = ['region', 'sector']
    factor_inputs['unit'] = (pd.DataFrame(factor_inputs['F'].iloc[:, 0]
                             .reset_index(level='unit').unit))
    factor_inputs['F'].reset_index(level='unit', drop=True, inplace=True)

    emissions['name'] = 'Emissions'
    emissions['F'].index.names = ['stressor', 'compartment', 'unit', ]
    emissions['F'].columns.names = ['region', 'sector']
    emissions['unit'] = (pd.DataFrame(emissions['F'].iloc[:, 0]
                         .reset_index(level='unit').unit))
    emissions['F'].reset_index(level='unit', drop=True, inplace=True)
    emissions['FY'].index.names = ['stressor', 'compartment', 'unit']
    emissions['FY'].columns.names = ['region', 'category']
    emissions['FY'].reset_index(level='unit', drop=True, inplace=True)

    # the population data - this is optional (None can be passed if no data is
    # available)
    popdata = pd.read_table(
            os.path.join(PYMRIO_PATH['test_mrio'], './population.txt'),
            index_col=0).astype(float)

    return IOSystem(Z=data['Z'],
                    Y=data['Y'],
                    unit=trade['unit'],
                    meta=meta_rec,
                    factor_inputs=factor_inputs,
                    emissions=emissions,
                    population=popdata)
def load_test():
    """Returns a small test MRIO

    The test system contains:

        - six regions,
        - seven sectors,
        - seven final demand categories
        - two extensions (emissions and factor_inputs)

    The test system only contains Z, Y, F, F_Y. The rest can be calculated with
    calc_all()

    Notes
    -----

        For development: This function can be used as an example of
        how to parse an IOSystem

    Returns
    -------

    IOSystem

    """

    # row_header:
    #    number of rows containing header on the top of the file (for the
    #    columns)
    # col_header:
    #    number of cols containing header on the beginning of the file (for the
    #    rows)
    # row and columns header contain also the row for the units, this are
    # afterwards safed as a extra dataframe
    #
    # unit_col: column containing the unit for the table
    file_data = collections.namedtuple(
        "file_data", ["file_name", "row_header", "col_header", "unit_col"])

    # file names and header specs of the system
    test_system = dict(
        Z=file_data(file_name="trade_flows_Z.txt",
                    row_header=2,
                    col_header=3,
                    unit_col=2),
        Y=file_data(file_name="finald_demand_Y.txt",
                    row_header=2,
                    col_header=3,
                    unit_col=2),
        fac=file_data(file_name="factor_input.txt",
                      row_header=2,
                      col_header=2,
                      unit_col=1),
        emissions=file_data(file_name="emissions.txt",
                            row_header=2,
                            col_header=3,
                            unit_col=2),
        FDemissions=file_data(file_name="FDemissions.txt",
                              row_header=2,
                              col_header=3,
                              unit_col=2),
    )

    meta_rec = MRIOMetaData(location=PYMRIO_PATH["test_mrio"])

    # read the data into a dicts as pandas.DataFrame
    data = {
        key: pd.read_csv(
            os.path.join(PYMRIO_PATH["test_mrio"], test_system[key].file_name),
            index_col=list(range(test_system[key].col_header)),
            header=list(range(test_system[key].row_header)),
            sep="\t",
        )
        for key in test_system
    }

    meta_rec._add_fileio("Load test_mrio from {}".format(
        PYMRIO_PATH["test_mrio"]))

    # distribute the data into dics which can be passed to
    # the IOSystem. To do so, some preps are necessary:
    # - name the header data
    # - save unit in own dataframe and drop unit from the tables

    trade = dict(Z=data["Z"], Y=data["Y"])
    factor_inputs = dict(F=data["fac"])
    emissions = dict(F=data["emissions"], F_Y=data["FDemissions"])

    trade["Z"].index.names = ["region", "sector", "unit"]
    trade["Z"].columns.names = ["region", "sector"]
    trade["unit"] = pd.DataFrame(
        trade["Z"].iloc[:, 0].reset_index(level="unit").unit)
    trade["Z"].reset_index(level="unit", drop=True, inplace=True)

    trade["Y"].index.names = ["region", "sector", "unit"]
    trade["Y"].columns.names = ["region", "category"]
    trade["Y"].reset_index(level="unit", drop=True, inplace=True)

    factor_inputs["name"] = "Factor Inputs"
    factor_inputs["F"].index.names = [
        "inputtype",
        "unit",
    ]
    factor_inputs["F"].columns.names = ["region", "sector"]
    factor_inputs["unit"] = pd.DataFrame(
        factor_inputs["F"].iloc[:, 0].reset_index(level="unit").unit)
    factor_inputs["F"].reset_index(level="unit", drop=True, inplace=True)

    emissions["name"] = "Emissions"
    emissions["F"].index.names = [
        "stressor",
        "compartment",
        "unit",
    ]
    emissions["F"].columns.names = ["region", "sector"]
    emissions["unit"] = pd.DataFrame(
        emissions["F"].iloc[:, 0].reset_index(level="unit").unit)
    emissions["F"].reset_index(level="unit", drop=True, inplace=True)
    emissions["F_Y"].index.names = ["stressor", "compartment", "unit"]
    emissions["F_Y"].columns.names = ["region", "category"]
    emissions["F_Y"].reset_index(level="unit", drop=True, inplace=True)

    # the population data - this is optional (None can be passed if no data is
    # available)
    popdata = pd.read_csv(
        os.path.join(PYMRIO_PATH["test_mrio"], "./population.txt"),
        index_col=0,
        sep="\t",
    ).astype(float)

    return IOSystem(
        Z=data["Z"],
        Y=data["Y"],
        unit=trade["unit"],
        meta=meta_rec,
        factor_inputs=factor_inputs,
        emissions=emissions,
        population=popdata,
    )