def test_cn_parser_main_1():
    try:
        get_comptes_nationaux_data(2013)
        result = True
    except:
        result = False
    assert result, "The final table of comptabilite nationale could not be generated in cn_parser_main"
def test_get_or_construct_value1():
    folder_year = 2013
    overall_dict = {
        'pib': {
            'code': 'B1g/PIB',
            'institution': 'S1',
            'description': 'PIB'
            },
        'complicated_var': {
            'code': None,
            'description': 'PIB0',
            'formula': '2*pib - pib - pib + pib*pib - pib^2'
            },
        'very_complicated_var': {
            'code': None,
            'description': 'PIB0',
            'formula': 'complicated_var^2'
            }
        }
    df = get_comptes_nationaux_data(folder_year)

    variable_name = 'pib'
    pib_serie = get_or_construct_value(df, variable_name, overall_dict, years = range(1949, 2014))
    variable_name = 'very_complicated_var'
    serie, formula = get_or_construct_value(df, variable_name, overall_dict, years = range(1949, 2014))
    assert isinstance(serie, pandas.DataFrame)
    assert serie.columns == [variable_name]
    assert all(serie[variable_name] == 0), serie[variable_name]
Beispiel #3
0
def test_get_or_construct_data_CN1():  # copied on the one in cn_test
    df = get_comptes_nationaux_data(2013)
    values_CN1_target = read_CN1(2013)
    variables_CN1 = generate_CN1_variables(2013)
    values_CN1, formulas_CN1 = get_or_construct_data(df, variables_CN1,
                                                     range(1949, 2014))
    assert_frame_equal(values_CN1, values_CN1_target)
def test_get_or_construct_data_CN1():  # copied on the one in cn_test
    df = get_comptes_nationaux_data(2013)
    values_CN1_target = read_CN1(2013)
    variables_CN1 = generate_CN1_variables(2013)
    values_CN1, formulas_CN1 = get_or_construct_data(df, variables_CN1, range(1949, 2014))
    print values_CN1.columns
    print values_CN1_target.columns
    assert_frame_equal(values_CN1, values_CN1_target)
def test_get_or_construct_data_profits():  # copied on the one in cn_test
    df = get_comptes_nationaux_data(2013)

    values_profits_societes_target = read_profits_societes()
    dict_profits = create_dict_profits()
    values_profits_societes = get_or_construct_data(df, dict_profits)[0]

    assert_frame_equal(values_profits_societes, values_profits_societes_target)
Beispiel #6
0
def output_for_sheets(entry_by_index_list, version_year, csv_file_name):
    """
    Output the final data needed to recreate a sheet of "Agrégats IPP - Comptabilité nationale" into a csv file.

    Parameters
    ----------
    entry_by_index : dictionnary
        A dictionnary with keys 'code', 'institution', 'ressources', 'year', 'description'.
    version_year : int
        Year of the version of Comptabilité Nationale data the user wishes to have (most often the latest version).
    csv_file_name : string
        path to the output csv file. Extension should be .txt (and not .csv) in order to be read by Excel.

    Example
    --------
    >>> CN1 = cn_output.output_for_sheets(
    ...    cn_sheets_lists.list_CN1, 2013,
    ...    os.path.join(cn_directory, u'Agrégats IPP - Comptabilité nationale.txt')
    ...    )

    Returns None. Creates a csv file containing the values of all the variables needed to construct sheet CN1, for all
    years. Each column is a variable, i.e. a tuple containing the agregates's code, the institution concerned,
    whether it is Ressources or Emplois, and the file from where the data was extracted.

    Note
    ------
    The first drop_duplicates() should be unnecessary : the presence of drop_duplicates in the parsers should
    eliminate the need for that.

    The second drop_duplicates(), i.e. a drop_duplicates on all variables except those referring to the source file,
    is essential to avoid the same data being repeated in different columns when it is sourced from different files
    (typically, TEE and a Comptes nationaux file).
    """
    list_variables = entry_by_index_list
    table = get_comptes_nationaux_data(version_year)

    extract = look_many(table, list_variables)
    extract = extract.drop_duplicates()
    extract = extract.drop_duplicates(
        (u'code', u'institution', u'ressources', u'value', u'year')
    )  # this eliminates doubles, i.e. identical info coming from distinct sources (eg. TEE and Compte)

    df = reshape_to_long_for_output(extract)

    df_long_to_csv(df, csv_file_name)
    return df
def output_for_sheets(entry_by_index_list, version_year, csv_file_name):
    """
    Output the final data needed to recreate a sheet of "Agrégats IPP - Comptabilité nationale" into a csv file.

    Parameters
    ----------
    entry_by_index : dictionnary
        A dictionnary with keys 'code', 'institution', 'ressources', 'year', 'description'.
    version_year : int
        Year of the version of Comptabilité Nationale data the user wishes to have (most often the latest version).
    csv_file_name : string
        path to the output csv file. Extension should be .txt (and not .csv) in order to be read by Excel.

    Example
    --------
    >>> CN1 = cn_output.output_for_sheets(
    ...    cn_sheets_lists.list_CN1, 2013,
    ...    os.path.join(cn_directory, u'Agrégats IPP - Comptabilité nationale.txt')
    ...    )

    Returns None. Creates a csv file containing the values of all the variables needed to construct sheet CN1, for all
    years. Each column is a variable, i.e. a tuple containing the agregates's code, the institution concerned,
    whether it is Ressources or Emplois, and the file from where the data was extracted.

    Note
    ------
    The first drop_duplicates() should be unnecessary : the presence of drop_duplicates in the parsers should
    eliminate the need for that.

    The second drop_duplicates(), i.e. a drop_duplicates on all variables except those referring to the source file,
    is essential to avoid the same data being repeated in different columns when it is sourced from different files
    (typically, TEE and a Comptes nationaux file).
    """
    list_variables = entry_by_index_list
    table = get_comptes_nationaux_data(version_year)

    extract = look_many(table, list_variables)
    extract = extract.drop_duplicates()
    extract = extract.drop_duplicates((u'code', u'institution', u'ressources', u'value',
       u'year'))  # this eliminates doubles, i.e. identical info coming from distinct sources (eg. TEE and Compte)

    df = reshape_to_long_for_output(extract)

    df_long_to_csv(df, csv_file_name)
    return df
# -*- coding: utf-8 -*-


import os
import pandas
import pkg_resources
from ipp_macro_series_parser.config import Config

from ipp_macro_series_parser.comptes_nationaux.parser_main import get_comptes_nationaux_data
from ipp_macro_series_parser.data_extraction import (
    look_many, look_up, get_or_construct_value, get_or_construct_data)
from ipp_macro_series_parser.comptes_nationaux.sheets_lists import variables_CN1, variables_CN2

parser = Config(
    config_files_directory = os.path.join(pkg_resources.get_distribution('ipp-macro-series-parser').location)
    )
cn_directory = parser.get('data', 'cn_directory')
cn_hdf = parser.get('data', 'cn_hdf_directory')
cn_csv = parser.get('data', 'cn_csv_directory')
tests_directory = parser.get('data', 'tests_directory')

tests_data = os.path.join(
    pkg_resources.get_distribution('ipp-macro-series-parser').location,
    'ipp_macro_series_parser/tests/data')

df = get_comptes_nationaux_data(2013)

values_CN1, formulas_CN1 = get_or_construct_data(df, variables_CN1, range(1949, 2014))
values_CN2, formulas_CN2 = get_or_construct_data(df, variables_CN2, range(1949, 2014))
Beispiel #9
0
def get_tidy_data(year):
    df = get_comptes_nationaux_data(year)
    return df
# -*- coding: utf-8 -*-

import os
import pkg_resources

from ipp_macro_series_parser.config import Config
from ipp_macro_series_parser.comptes_nationaux.parser_main import get_comptes_nationaux_data
from ipp_macro_series_parser.data_extraction import get_or_construct_data
from ipp_macro_series_parser.comptes_nationaux.sheets_lists import variables_CN1, variables_CN2

parser = Config()
cn_directory = parser.get('data', 'cn_directory')
cn_hdf = parser.get('data', 'cn_hdf_directory')
cn_csv = parser.get('data', 'cn_csv_directory')
tests_directory = parser.get('data', 'tests_directory')

tests_data = os.path.join(
    pkg_resources.get_distribution('ipp-macro-series-parser').location,
    'ipp_macro_series_parser/tests/data')

df = get_comptes_nationaux_data(2013)

values_CN1, formulas_CN1 = get_or_construct_data(df, variables_CN1,
                                                 range(1949, 2014))
values_CN2, formulas_CN2 = get_or_construct_data(df, variables_CN2,
                                                 range(1949, 2014))
def test_cn_parser_main_2():
    df = get_comptes_nationaux_data(2013)
    for element in df.duplicated():
        assert element == 0, "The final table of comptabilite nationale contains duplicates"
def get_tidy_data(year):
    df = get_comptes_nationaux_data(year)
    return df