def test_cn_parser_main_1(): try: get_comptes_nationaux_data(2013) result = True except: result = False assert result, "The final table of comptabilite nationale could not be generated in cn_parser_main"
def test_get_or_construct_value1(): folder_year = 2013 overall_dict = { 'pib': { 'code': 'B1g/PIB', 'institution': 'S1', 'description': 'PIB' }, 'complicated_var': { 'code': None, 'description': 'PIB0', 'formula': '2*pib - pib - pib + pib*pib - pib^2' }, 'very_complicated_var': { 'code': None, 'description': 'PIB0', 'formula': 'complicated_var^2' } } df = get_comptes_nationaux_data(folder_year) variable_name = 'pib' pib_serie = get_or_construct_value(df, variable_name, overall_dict, years = range(1949, 2014)) variable_name = 'very_complicated_var' serie, formula = get_or_construct_value(df, variable_name, overall_dict, years = range(1949, 2014)) assert isinstance(serie, pandas.DataFrame) assert serie.columns == [variable_name] assert all(serie[variable_name] == 0), serie[variable_name]
def test_get_or_construct_data_CN1(): # copied on the one in cn_test df = get_comptes_nationaux_data(2013) values_CN1_target = read_CN1(2013) variables_CN1 = generate_CN1_variables(2013) values_CN1, formulas_CN1 = get_or_construct_data(df, variables_CN1, range(1949, 2014)) assert_frame_equal(values_CN1, values_CN1_target)
def test_get_or_construct_data_CN1(): # copied on the one in cn_test df = get_comptes_nationaux_data(2013) values_CN1_target = read_CN1(2013) variables_CN1 = generate_CN1_variables(2013) values_CN1, formulas_CN1 = get_or_construct_data(df, variables_CN1, range(1949, 2014)) print values_CN1.columns print values_CN1_target.columns assert_frame_equal(values_CN1, values_CN1_target)
def test_get_or_construct_data_profits(): # copied on the one in cn_test df = get_comptes_nationaux_data(2013) values_profits_societes_target = read_profits_societes() dict_profits = create_dict_profits() values_profits_societes = get_or_construct_data(df, dict_profits)[0] assert_frame_equal(values_profits_societes, values_profits_societes_target)
def output_for_sheets(entry_by_index_list, version_year, csv_file_name): """ Output the final data needed to recreate a sheet of "Agrégats IPP - Comptabilité nationale" into a csv file. Parameters ---------- entry_by_index : dictionnary A dictionnary with keys 'code', 'institution', 'ressources', 'year', 'description'. version_year : int Year of the version of Comptabilité Nationale data the user wishes to have (most often the latest version). csv_file_name : string path to the output csv file. Extension should be .txt (and not .csv) in order to be read by Excel. Example -------- >>> CN1 = cn_output.output_for_sheets( ... cn_sheets_lists.list_CN1, 2013, ... os.path.join(cn_directory, u'Agrégats IPP - Comptabilité nationale.txt') ... ) Returns None. Creates a csv file containing the values of all the variables needed to construct sheet CN1, for all years. Each column is a variable, i.e. a tuple containing the agregates's code, the institution concerned, whether it is Ressources or Emplois, and the file from where the data was extracted. Note ------ The first drop_duplicates() should be unnecessary : the presence of drop_duplicates in the parsers should eliminate the need for that. The second drop_duplicates(), i.e. a drop_duplicates on all variables except those referring to the source file, is essential to avoid the same data being repeated in different columns when it is sourced from different files (typically, TEE and a Comptes nationaux file). """ list_variables = entry_by_index_list table = get_comptes_nationaux_data(version_year) extract = look_many(table, list_variables) extract = extract.drop_duplicates() extract = extract.drop_duplicates( (u'code', u'institution', u'ressources', u'value', u'year') ) # this eliminates doubles, i.e. identical info coming from distinct sources (eg. TEE and Compte) df = reshape_to_long_for_output(extract) df_long_to_csv(df, csv_file_name) return df
def output_for_sheets(entry_by_index_list, version_year, csv_file_name): """ Output the final data needed to recreate a sheet of "Agrégats IPP - Comptabilité nationale" into a csv file. Parameters ---------- entry_by_index : dictionnary A dictionnary with keys 'code', 'institution', 'ressources', 'year', 'description'. version_year : int Year of the version of Comptabilité Nationale data the user wishes to have (most often the latest version). csv_file_name : string path to the output csv file. Extension should be .txt (and not .csv) in order to be read by Excel. Example -------- >>> CN1 = cn_output.output_for_sheets( ... cn_sheets_lists.list_CN1, 2013, ... os.path.join(cn_directory, u'Agrégats IPP - Comptabilité nationale.txt') ... ) Returns None. Creates a csv file containing the values of all the variables needed to construct sheet CN1, for all years. Each column is a variable, i.e. a tuple containing the agregates's code, the institution concerned, whether it is Ressources or Emplois, and the file from where the data was extracted. Note ------ The first drop_duplicates() should be unnecessary : the presence of drop_duplicates in the parsers should eliminate the need for that. The second drop_duplicates(), i.e. a drop_duplicates on all variables except those referring to the source file, is essential to avoid the same data being repeated in different columns when it is sourced from different files (typically, TEE and a Comptes nationaux file). """ list_variables = entry_by_index_list table = get_comptes_nationaux_data(version_year) extract = look_many(table, list_variables) extract = extract.drop_duplicates() extract = extract.drop_duplicates((u'code', u'institution', u'ressources', u'value', u'year')) # this eliminates doubles, i.e. identical info coming from distinct sources (eg. TEE and Compte) df = reshape_to_long_for_output(extract) df_long_to_csv(df, csv_file_name) return df
# -*- coding: utf-8 -*- import os import pandas import pkg_resources from ipp_macro_series_parser.config import Config from ipp_macro_series_parser.comptes_nationaux.parser_main import get_comptes_nationaux_data from ipp_macro_series_parser.data_extraction import ( look_many, look_up, get_or_construct_value, get_or_construct_data) from ipp_macro_series_parser.comptes_nationaux.sheets_lists import variables_CN1, variables_CN2 parser = Config( config_files_directory = os.path.join(pkg_resources.get_distribution('ipp-macro-series-parser').location) ) cn_directory = parser.get('data', 'cn_directory') cn_hdf = parser.get('data', 'cn_hdf_directory') cn_csv = parser.get('data', 'cn_csv_directory') tests_directory = parser.get('data', 'tests_directory') tests_data = os.path.join( pkg_resources.get_distribution('ipp-macro-series-parser').location, 'ipp_macro_series_parser/tests/data') df = get_comptes_nationaux_data(2013) values_CN1, formulas_CN1 = get_or_construct_data(df, variables_CN1, range(1949, 2014)) values_CN2, formulas_CN2 = get_or_construct_data(df, variables_CN2, range(1949, 2014))
def get_tidy_data(year): df = get_comptes_nationaux_data(year) return df
# -*- coding: utf-8 -*- import os import pkg_resources from ipp_macro_series_parser.config import Config from ipp_macro_series_parser.comptes_nationaux.parser_main import get_comptes_nationaux_data from ipp_macro_series_parser.data_extraction import get_or_construct_data from ipp_macro_series_parser.comptes_nationaux.sheets_lists import variables_CN1, variables_CN2 parser = Config() cn_directory = parser.get('data', 'cn_directory') cn_hdf = parser.get('data', 'cn_hdf_directory') cn_csv = parser.get('data', 'cn_csv_directory') tests_directory = parser.get('data', 'tests_directory') tests_data = os.path.join( pkg_resources.get_distribution('ipp-macro-series-parser').location, 'ipp_macro_series_parser/tests/data') df = get_comptes_nationaux_data(2013) values_CN1, formulas_CN1 = get_or_construct_data(df, variables_CN1, range(1949, 2014)) values_CN2, formulas_CN2 = get_or_construct_data(df, variables_CN2, range(1949, 2014))
def test_cn_parser_main_2(): df = get_comptes_nationaux_data(2013) for element in df.duplicated(): assert element == 0, "The final table of comptabilite nationale contains duplicates"