def test_get_or_construct_value1(): folder_year = 2013 overall_dict = { 'pib': { 'code': 'B1g/PIB', 'institution': 'S1', 'description': 'PIB' }, 'complicated_var': { 'code': None, 'description': 'PIB0', 'formula': '2*pib - pib - pib + pib*pib - pib^2' }, 'very_complicated_var': { 'code': None, 'description': 'PIB0', 'formula': 'complicated_var^2' } } df = get_comptes_nationaux_data(folder_year) variable_name = 'pib' pib_serie = get_or_construct_value(df, variable_name, overall_dict, years = range(1949, 2014)) variable_name = 'very_complicated_var' serie, formula = get_or_construct_value(df, variable_name, overall_dict, years = range(1949, 2014)) assert isinstance(serie, pandas.DataFrame) assert serie.columns == [variable_name] assert all(serie[variable_name] == 0), serie[variable_name]
def test_run_through(): years = [2006, 2007, 2008, 2009] df = get_denombrements_fiscaux_data_frame(years=years) index_by_variable_name = create_index_by_variable_name( formula_by_variable_name, level_2_formula_by_variable_name) variable_name = 'interets_imposes_au_prelevement_liberatoire' get_or_construct_value(df, variable_name, index_by_variable_name, years=years) variable_name = 'dividendes_imposes_au_prelevement_liberatoire' get_or_construct_value(df, variable_name, index_by_variable_name, years=years) variable_name = 'revenus_imposes_au_prelevement_liberatoire' get_or_construct_value(df, variable_name, index_by_variable_name, years=years, fill_value=0) variable_name = 'assurances_vie_imposees_au_prelevement_liberatoire' get_or_construct_value(df, variable_name, index_by_variable_name, years=years) variable_name = 'f2da' get_or_construct_value(df, variable_name, index_by_variable_name, years=years)
def test_run_through(): years = [2006, 2007, 2008, 2009] df = get_denombrements_fiscaux_data_frame(years=years) index_by_variable_name = create_index_by_variable_name(formula_by_variable_name, level_2_formula_by_variable_name) variable_name = "interets_imposes_au_prelevement_liberatoire" get_or_construct_value(df, variable_name, index_by_variable_name, years=years) variable_name = "dividendes_imposes_au_prelevement_liberatoire" get_or_construct_value(df, variable_name, index_by_variable_name, years=years) variable_name = "revenus_imposes_au_prelevement_liberatoire" get_or_construct_value(df, variable_name, index_by_variable_name, years=years, fill_value=0) variable_name = "assurances_vie_imposees_au_prelevement_liberatoire" get_or_construct_value(df, variable_name, index_by_variable_name, years=years) variable_name = "f2da" get_or_construct_value(df, variable_name, index_by_variable_name, years=years)
def build_aggregates(raw_data, formula_by_variable_name, level_2_formula_by_variable_name=None, years=None, fill_value=numpy.NaN): assert years is not None aggregates = None index_by_variable_name = create_index_by_variable_name( formula_by_variable_name, level_2_formula_by_variable_name) for variable_name in formula_by_variable_name.keys( ) + level_2_formula_by_variable_name.keys(): serie, formula = get_or_construct_value(raw_data, variable_name, index_by_variable_name, years=years, fill_value=fill_value) serie = serie.reset_index().drop_duplicates().set_index('year') assert not numpy.any( serie.index.duplicated()), 'Duplicated index for {} : {}'.format( variable_name, serie) if aggregates is None: aggregates = serie else: try: aggregates = pandas.concat([aggregates, serie], axis=1, verify_integrity=True) except Exception, e: print "aggregates", aggregates print "serie", serie raise (e)
def test_get_or_construct_value_empty(): df = get_tidy_data(2013) incomplete_overall_dict = create_dict_for_test_get_value_empty() value_empty, formula_of_empty = get_or_construct_value(df, 'notfound_arg', incomplete_overall_dict) # should return KeyError because components of formula are not in dict # TODO: assert KeyError... return value_empty
def test_get_or_construct_value_empty(): df = get_tidy_data(2013) incomplete_overall_dict = create_dict_for_test_get_value_empty() value_empty, formula_of_empty = get_or_construct_value( df, 'notfound_arg', incomplete_overall_dict) # should return KeyError because components of formula are not in dict # TODO: assert KeyError... return value_empty
def assert_value_construction(variable_name, test): year = test['year'] target = test['target'] value = get_or_construct_value( df, variable_name, index_by_variable_name, years = years, fill_value = 0)[0].loc[year] if year >= 2009: assert all(value == target), "{} for {}: got {} instead of {}".format( variable_name, year, value.values, target)
def assert_value_construction(variable_name, test): year = test['year'] target = test['target'] value = get_or_construct_value(df, variable_name, index_by_variable_name, years=years)[0].loc[year] assert all(value == target), "{} for {}: got {} instead of {}".format( variable_name, year, value.values, target)
def test_get_or_construct_value_working(): df = get_tidy_data(2013) simple_dict = { 'Interets verses par rdm': { 'code': 'D41', 'institution': 'S2', 'ressources': False } } dict_with_div = { 'Interets_verses_par_rdm': { 'code': 'D41', 'institution': 'S2', 'ressources': False }, 'Interets_verses_par_rdm / 100': { 'code': 'D41', 'institution': 'S2', 'ressources': False, 'formula': 'Interets_verses_par_rdm / 100' } } dict_sal_cot_soc = create_dict_sal_cot_soc() dict_revenus_rdm = create_dict_revenus_rdm() dict_with_squares = create_dict_w_squares() dict_profits = create_dict_profits() value_simple_dict, formula_simple_dict = get_or_construct_value( df, 'Interets verses par rdm', simple_dict, years=range(1949, 2014)) value_div, formula_div = get_or_construct_value( df, 'Interets_verses_par_rdm / 100', dict_with_div, years=range(1949, 2014)) value_sq, formula_sq = get_or_construct_value(df, 'Square_of_sum', dict_with_squares, years=range(1949, 2014)) value_rdm_net, formula_rdm_net = get_or_construct_value( df, 'Interets_et_dividendes_verses_par_rdm_nets', dict_revenus_rdm, years=range(1949, 2014)) values_profits, formulas_profits = get_or_construct_value( df, 'Profits_des_societes', dict_profits, years=range(1949, 2014)) value_sal_cs, formula_sal_cs = get_or_construct_value( df, 'Sal_cs_verses_societes', dict_sal_cot_soc, years=range(1978, 2014)) return value_simple_dict, value_div, value_sq, value_rdm_net, values_profits, value_sal_cs
def build_aggregates(raw_data, formula_by_variable_name, level_2_formula_by_variable_name = None, years = None, fill_value = numpy.NaN): assert years is not None aggregates = None index_by_variable_name = create_index_by_variable_name(formula_by_variable_name, level_2_formula_by_variable_name) for variable_name in formula_by_variable_name.keys() + level_2_formula_by_variable_name.keys(): serie, formula = get_or_construct_value( raw_data, variable_name, index_by_variable_name, years = years, fill_value = fill_value) serie = serie.reset_index().drop_duplicates().set_index('year') assert not numpy.any(serie.index.duplicated()), 'Duplicated index for {} : {}'.format( variable_name, serie) if aggregates is None: aggregates = serie else: try: aggregates = pandas.concat([aggregates, serie], axis = 1, verify_integrity = True) except Exception, e: print "aggregates", aggregates print "serie", serie raise(e)
def test_get_or_construct_value_working(): df = get_tidy_data(2013) simple_dict = {'Interets verses par rdm': {'code': 'D41', 'institution': 'S2', 'ressources': False}} dict_with_div = { 'Interets_verses_par_rdm': { 'code': 'D41', 'institution': 'S2', 'ressources': False }, 'Interets_verses_par_rdm / 100': { 'code': 'D41', 'institution': 'S2', 'ressources': False, 'formula': 'Interets_verses_par_rdm / 100' } } dict_sal_cot_soc = create_dict_sal_cot_soc() dict_revenus_rdm = create_dict_revenus_rdm() dict_with_squares = create_dict_w_squares() dict_profits = create_dict_profits() value_simple_dict, formula_simple_dict = get_or_construct_value( df, 'Interets verses par rdm', simple_dict, years = range(1949, 2014) ) value_div, formula_div = get_or_construct_value( df, 'Interets_verses_par_rdm / 100', dict_with_div, years = range(1949, 2014) ) value_sq, formula_sq = get_or_construct_value( df, 'Square_of_sum', dict_with_squares, years = range(1949, 2014) ) value_rdm_net, formula_rdm_net = get_or_construct_value( df, 'Interets_et_dividendes_verses_par_rdm_nets', dict_revenus_rdm, years = range(1949, 2014) ) values_profits, formulas_profits = get_or_construct_value( df, 'Profits_des_societes', dict_profits, years = range(1949, 2014) ) value_sal_cs, formula_sal_cs = get_or_construct_value( df, 'Sal_cs_verses_societes', dict_sal_cot_soc, years = range(1978, 2014) ) return value_simple_dict, value_div, value_sq, value_rdm_net, values_profits, value_sal_cs
def assert_value_construction(variable_name, test): year = test["year"] target = test["target"] value = get_or_construct_value(df, variable_name, index_by_variable_name, years=years)[0].loc[year] assert all(value == target), "{} for {}: got {} instead of {}".format(variable_name, year, value.values, target)
def test_run_through(): years = [2006, 2007, 2008, 2009, 2010, 2011] df = get_denombrements_fiscaux_data_frame(years=years) index_by_variable_name = create_index_by_variable_name( formula_by_variable_name, level_2_formula_by_variable_name) variable_name = 'interets_imposes_au_prelevement_liberatoire' get_or_construct_value(df, variable_name, index_by_variable_name, years=years) variable_name = 'dividendes_imposes_au_prelevement_liberatoire' get_or_construct_value(df, variable_name, index_by_variable_name, years=years) variable_name = 'revenus_imposes_au_prelevement_liberatoire' get_or_construct_value(df, variable_name, index_by_variable_name, years=years, fill_value=0) variable_name = 'assurances_vie_imposees_au_prelevement_liberatoire' get_or_construct_value(df, variable_name, index_by_variable_name, years=years) variable_name = 'f2da' get_or_construct_value(df, variable_name, index_by_variable_name, years=years) variable_name = u'f5he' get_or_construct_value(df, variable_name, index_by_variable_name, years=range(2010, 2012)) variable_name = u'f5jr' get_or_construct_value(df, variable_name, index_by_variable_name, years=range(2007, 2012), fill_value=0) variable_name = 'plus_values_professionnelles_regime_normal' get_or_construct_value(df, variable_name, index_by_variable_name, years=range(2007, 2012), fill_value=0)
def test_run_through(): years = [2006, 2007, 2008, 2009, 2010, 2011] df = get_denombrements_fiscaux_data_frame(years = years) index_by_variable_name = create_index_by_variable_name(formula_by_variable_name, level_2_formula_by_variable_name) variable_name = 'interets_imposes_au_prelevement_liberatoire' get_or_construct_value(df, variable_name, index_by_variable_name, years = years) variable_name = 'dividendes_imposes_au_prelevement_liberatoire' get_or_construct_value(df, variable_name, index_by_variable_name, years = years) variable_name = 'revenus_imposes_au_prelevement_liberatoire' get_or_construct_value(df, variable_name, index_by_variable_name, years = years, fill_value = 0) variable_name = 'assurances_vie_imposees_au_prelevement_liberatoire' get_or_construct_value(df, variable_name, index_by_variable_name, years = years) variable_name = 'f2da' get_or_construct_value(df, variable_name, index_by_variable_name, years = years) variable_name = u'f5he' get_or_construct_value(df, variable_name, index_by_variable_name, years = range(2010, 2012)) variable_name = u'f5jr' get_or_construct_value(df, variable_name, index_by_variable_name, years = range(2007, 2012), fill_value = 0) variable_name = 'plus_values_professionnelles_regime_normal' get_or_construct_value(df, variable_name, index_by_variable_name, years = range(2007, 2012), fill_value = 0)