def test_homemade_nbptr_function( reform_config_base_2020, nbptr_parametres_par_defaut, various_cas_types ): # Verifie que les resultats de nbptr et irpp sont les mêmes avec la fonction par defaut period = "2020" data = dataframe_from_cas_types_description(various_cas_types) tbs_reforme_sans_nbptr = IncomeTaxReform( FranceTaxBenefitSystem(), reform_config_base_2020, period ) tbs_reforme_avec_nbptr = IncomeTaxReform( FranceTaxBenefitSystem(), { "impot_revenu": { **(reform_config_base_2020["impot_revenu"]), **nbptr_parametres_par_defaut, } }, period, ) sim_sans_nbptr, _ = simulation(period, data, tbs_reforme_sans_nbptr) sim_avec_nbptr, _ = simulation(period, data, tbs_reforme_avec_nbptr) print("sans", sim_sans_nbptr.calculate("nbptr", period)) print("avec", sim_avec_nbptr.calculate("nbptr", period)) assert array_equal( sim_sans_nbptr.calculate("nbptr", period), sim_avec_nbptr.calculate("nbptr", period), ) assert array_equal( sim_sans_nbptr.calculate("irpp", period), sim_avec_nbptr.calculate("irpp", period), )
def test_h5_input(input_h5="./Simulation_engine/dummy_data.h5", name_variables=("rfr", "irpp", "nbptr"), aggfunc="sum", compdic=None, is_plf=False): PERIOD = "2018" TBS = TBS_PLF if is_plf else FranceTaxBenefitSystem() DUMMY_DATA = pandas.read_hdf(input_h5) simulation_base_deciles = simulation(PERIOD, DUMMY_DATA, TBS) df = aggregats_ff(PERIOD, simulation_base_deciles, name_variables).sort_values(by="rfr") if aggfunc == "sum": # Pour la somme, on calcule les % d'erreur sur la répartition. testerrorvalues(df) aggs_to_compute = ["wprm", "salaire_de_base", "retraite_brute" ] + list(name_variables) val_donnees_pac_agg = 0 trpac_agg = [ compdic[ag] for ag in ["nbF", "nbG", "nbH", "nbJ", "nbR"] if compdic is not None and ag in compdic ] val_reelle_pac_agg = sum(trpac_agg) if len(trpac_agg) else None for ag in aggs_to_compute: if aggfunc == "sum": nom_a_afficher = "Total aggrégé" if ag != "wprm": val_donnees = (df[ag] * df["wprm"]).sum() else: val_donnees = (df[ag]).sum() elif aggfunc == "countnonzero": if ag != "wprm": nom_a_afficher = "Non nuls" val_donnees = (df[df[ag] != 0]["wprm"]).sum() else: nom_a_afficher = "Nombre FF (c'est comme ça le count sur wprm)" val_donnees = df[ag].count() else: raise ( "Only aggregation functions supported are sum and countnonzero. The rest is not very good if you want my opinion" ) val_reelle = compdic[ ag] if compdic is not None and ag in compdic else None print("{} {} : {:.0f} {} {}".format( nom_a_afficher, ag, val_donnees, val_reelle if val_reelle is not None else "", "{:.2f}%".format((val_donnees / val_reelle - 1) * 100) if val_reelle is not None else "", )) if ag in ["nbF", "nbG", "nbH", "nbJ", "nbR"]: val_donnees_pac_agg += val_donnees if val_reelle_pac_agg is not None: print("{} {} : {:.0f} {} {}".format( nom_a_afficher, "Enfants cumules", val_donnees_pac_agg, val_reelle_pac_agg if val_reelle_pac_agg is not None else "", "{:.2f}%".format((val_donnees_pac_agg / val_reelle_pac_agg - 1) * 100) if val_reelle_pac_agg is not None else "", ))
def test_coefficient_proratisation_only_contract_periods_wide(): tax_benefit_system = FranceTaxBenefitSystem() scenario = tax_benefit_system.new_scenario() scenario.init_single_entity(period='2017', # wide: we simulate for the year parent1=dict(salaire_de_base={'2017-11':2300}, effectif_entreprise=1, code_postal_entreprise="75001", categorie_salarie=u'prive_non_cadre', contrat_de_travail_debut='2017-11-1', contrat_de_travail_fin='2017-12-01', allegement_fillon_mode_recouvrement=u'progressif')) simulation = scenario.new_simulation() assert_equal(simulation.calculate('coefficient_proratisation','2017-11'),1) assert_equal(simulation.calculate('coefficient_proratisation','2017-12'),0) assert_equal(simulation.calculate('coefficient_proratisation','2017-10'),0) assert_equal(simulation.calculate_add('coefficient_proratisation','2017'),1)
def test_coefficient_proratisation_only_contract_periods_wide(): tax_benefit_system = FranceTaxBenefitSystem() scenario = tax_benefit_system.new_scenario() init_single_entity(scenario, period='2017', # wide: we simulate for the year parent1=dict(salaire_de_base={'2017-11': 2300}, effectif_entreprise=1, code_postal_entreprise="75001", categorie_salarie=u'prive_non_cadre', contrat_de_travail_debut={2017: '2017-11-01'}, contrat_de_travail_fin={2017: '2017-12-01'}, allegement_fillon_mode_recouvrement=u'progressif')) simulation = scenario.new_simulation() assert simulation.calculate('coefficient_proratisation', '2017-11') == 1 assert simulation.calculate('coefficient_proratisation', '2017-12') == 0 assert simulation.calculate('coefficient_proratisation', '2017-10') == 0 assert simulation.calculate_add('coefficient_proratisation', '2017') == 1
def compare_input_data( input_h5="./Simulation_engine/dummy_data.h5", input_h5_b="./Simulation_engine/dummy_data.h5", name_variables=("rfr", "irpp", "nbptr"), PERIOD=None, ): if PERIOD is None: PERIOD = annee_de_calcul TBS = FranceTaxBenefitSystem() DUMMY_DATA = pandas.read_hdf(input_h5) simulation_base_deciles, dictionnaire_datagrouped = simulation( PERIOD, DUMMY_DATA, TBS) df = dictionnaire_datagrouped["foyer_fiscal"][["wprm"]] for nv in name_variables: df["{}_base".format(nv)] = simulation_base_deciles.calculate( nv, PERIOD) isdif = False data2 = pandas.read_hdf(input_h5_b) col = "b" newsim, ddg2 = simulation(PERIOD, data2, TBS) for nv in name_variables: df["{}_{}".format(nv, col)] = newsim.calculate(nv, PERIOD) isdif |= len(df[df["{}_{}".format(nv, col)] - df["{}_base".format(nv)] > 0.01]) + len( df[df["{}_{}".format(nv, col)] - df["{}_base".format(nv)] < -0.01]) return not isdif
def test_decomposition_variables(): tbs = FranceTaxBenefitSystem() path = Path('openfisca_dash_ui/decomposition.yaml') yaml = YAML(typ='safe') decomposition = yaml.load(path) def check(tree): for node in tree: assert node['code'] in tbs.variables check(node['children'])
def test_coefficient_proratisation_only_contract_periods_narrow(): tax_benefit_system = FranceTaxBenefitSystem() scenario = tax_benefit_system.new_scenario() init_single_entity( scenario, period='2017-11', # narrow: we simulate for the month parent1=dict(salaire_de_base={'2017-11': 2300}, effectif_entreprise=1, code_postal_entreprise="75001", categorie_salarie='prive_non_cadre', contrat_de_travail_debut={2017: '2017-11-01'}, contrat_de_travail_fin={2017: '2017-12-01'}, allegement_fillon_mode_recouvrement='progressif')) simulation = scenario.new_simulation() assert simulation.calculate('coefficient_proratisation', '2017-11') == 1 assert simulation.calculate('coefficient_proratisation', '2017-12') == 0 assert simulation.calculate('coefficient_proratisation', '2017-10') == 0 assert simulation.calculate_add('coefficient_proratisation', '2017') == 1
def test_zero_nbptr(reform_config_base_2020, nbptr_zero, various_cas_types): # Verifie que les resultats de nbptr sont bien zero pour tout le monde si tous les param # sont à zéro period = "2020" data = dataframe_from_cas_types_description(various_cas_types) tbs_reforme_avec_nbptr = IncomeTaxReform( FranceTaxBenefitSystem(), {"impot_revenu": {**(reform_config_base_2020["impot_revenu"]), **nbptr_zero}}, period, ) sim_avec_nbptr, _ = simulation(period, data, tbs_reforme_avec_nbptr) resultats_nbptr = sim_avec_nbptr.calculate("nbptr", period) assert not resultats_nbptr.any()
def scenar_values( minv, maxv, var_brute, var_nette, pourcentage_hausse=0.001, valeur_hausse=100 ): """ Calcule les valeurs de var_nette pour var_brute dans [minv, maxv] et exporte dans un CSV avec les colonnes suivantes : var_brute,var_nette """ df = calcule_maillage_intervalle( var_brute, minv, maxv, pourcentage_hausse, valeur_hausse ) PERIOD = str(annee_de_calcul) TBS = FranceTaxBenefitSystem() # définit un ménage par ligne sim = simulation(PERIOD, df, TBS) net = var_nette df[net] = sim[0].calculate_add(net, PERIOD) return df[[var_brute, var_nette]]
def test_deux_adultes_ancien_combattants_deux_enfants(reform_config_base_2020): # données foyer = { "declarants": [ { "ancienCombattant": True, "invalide": False, "parentIsole": False, "retraite": False, "veuf": False }, { "ancienCombattant": True, "invalide": False, "parentIsole": False, "retraite": False, "veuf": False } ], "personnesACharge": [ { "chargePartagee": False, "invalide": False }, { "chargePartagee": False, "invalide": False } ], "residence": "metropole", "revenuImposable": 120000 } data = dataframe_from_cas_types_description([foyer]) period = "2020" # loi française + réforme IR tbs_reforme_impot_revenu = IncomeTaxReform( FranceTaxBenefitSystem(), reform_config_base_2020, period ) built_simulation, _dict_data_by_entity = simulation( period, data, tbs_reforme_impot_revenu ) nbptr = built_simulation.calculate("nbptr", period) assert nbptr == [3.5]
apresy = dfv[bestsol + 1][0] avantx = dfv[bestsol][1] apresx = dfv[bestsol + 1][1] lambda_ = (val_brute - avanty) / (apresy - avanty) return lambda_ * apresx + (1 - lambda_) * avantx conversion_variables = {} conversion_variables["salaire_de_base_to_salaire_imposable"] = scenar_values( 0, 12_000_000, "salaire_de_base", "salaire_imposable") conversion_variables["retraite_brute_to_retraite_imposable"] = scenar_values( 0, 12_000_000, "retraite_brute", "retraite_imposable") PERIOD = "2018" TBS = FranceTaxBenefitSystem() TBS_PLF = IncomeTaxReform(TBS, reformePLF, PERIOD) CAS_TYPE = load_data("DCT.csv") SIMCAT = partial(simulation, period=PERIOD, data=CAS_TYPE) SIMCAT_BASE = SIMCAT(tbs=TBS) if not version_beta_sans_simu_pop: # Initialisation des données utilisées pour le calcul sur la population DUMMY_DATA = load_data(data_path).sort_values(by="idfoy") print( "Dummy Data loaded", len(DUMMY_DATA), "lines", len(DUMMY_DATA["idfoy"].unique()), "foyers fiscaux", )
import sys sys.path.insert(0, '/home/giuliano/Documents/openfisca/openfisca-core') import openfisca_core sys.path.insert(0, '/home/giuliano/Documents/openfisca/openfisca-france') # Call module describing the French System from openfisca_france import FranceTaxBenefitSystem # Initialize the legislation tax_benefit_system = FranceTaxBenefitSystem() from openfisca_france.reforms import plf2018 reform = plf2018.plf2018(tax_benefit_system) def init_profile(scenario): scenario.init_single_entity( period=2018, parent1=dict( age=40, salaire_de_base=1671 * 12, ), menage=dict( loyer=5000, # Annual basis statut_occupation_logement=3, taxe_habitation=-600, ), ) return scenario
from copy import deepcopy import pprint from openfisca_france import FranceTaxBenefitSystem from tests.test_entities import TEST_CASE_AGES tbs = FranceTaxBenefitSystem() period = '2020' test_case = deepcopy(TEST_CASE_AGES) test_case['period'] = period # pprint.pprint(test_case) simulation = tbs.new_scenario().init_from_dict(test_case).new_simulation() print(simulation.calculate('revenu_disponible', period))
# return list(x) # return x # with Path("decomposition.json").open('w') as fd: # json.dump(decomposition_tree, fd, indent=2, default=serialize) return decomposition_tree decomposition_file_path = Path("decomposition.json") if decomposition_file_path.is_file(): print("Loading decomposition from file...") with decomposition_file_path.open() as fd: decomposition_tree = json.load(fd) else: print("Initializing France tax and benefit system...") tbs = FranceTaxBenefitSystem() print("Pre-calculating decomposition...") decomposition_tree = precalculate_decomposition_json(tbs) app = dash.Dash() server = app.server # Referenced by Procfile app.layout = html.Div(children=[ html.H1(children='OpenFisca'), html.P(children=[ "Salaire de base : ", html.Span(id="salaire-de-base-value"), " € / an", ]), dcc.Slider( id="salaire-de-base",
def main(): parser = argparse.ArgumentParser() parser.add_argument( '-s', '--source-dir', default='yaml-clean', help='path of source directory containing clean IPP YAML files') parser.add_argument( '-t', '--target', default='ipp-tax-and-benefit-tables-to-openfisca-parameters.yaml', help= 'path of generated YAML file containing the association between IPP fields to OpenFisca parameters' ) parser.add_argument('-v', '--verbose', action='store_true', default=False, help="increase output verbosity") args = parser.parse_args() logging.basicConfig( level=logging.DEBUG if args.verbose else logging.WARNING, stream=sys.stdout) file_system_encoding = sys.getfilesystemencoding() ipp_infos_by_value = {} for source_dir_encoded, directories_name_encoded, filenames_encoded in os.walk( args.source_dir): directories_name_encoded.sort() for filename_encoded in sorted(filenames_encoded): if not filename_encoded.endswith('.yaml'): continue filename = filename_encoded.decode(file_system_encoding) sheet_name = os.path.splitext(filename)[0] source_file_path_encoded = os.path.join(source_dir_encoded, filename_encoded) relative_file_path_encoded = source_file_path_encoded[ len(args.source_dir):].lstrip(os.sep) relative_file_path = relative_file_path_encoded.decode( file_system_encoding) if sheet_name.isupper(): continue assert sheet_name.islower(), sheet_name log.info(u'Loading file {}'.format(relative_file_path)) with open(source_file_path_encoded) as source_file: data = yaml.load(source_file) rows = data.get(u"Valeurs") if rows is None: log.info(u' Skipping file {} without "Valeurs"'.format( relative_file_path)) continue for row in rows: start = row.get(u"Date d'effet") if start is None: for date_name in date_names: start = row.get(date_name) if start is not None: break else: # No date found. Skip row. continue elif not isinstance(start, datetime.date): start = start[u"Année Revenus"] for name, child in row.iteritems(): if name in date_names: continue for path, value in iter_ipp_values(child): if isinstance(value, basestring): split_value = value.split() if len(split_value) == 2 and split_value[1] in ( u'%', u'AF', # anciens francs u'CFA', # francs CFA u'COTISATIONS', u'EUR', u'FRF', ): value = float(split_value[0]) if isinstance(value, float) and value == int(value): value = int(value) full_path = tuple( relative_file_path.split(os.sep)[:-1]) + ( sheet_name, name) + tuple(path) ipp_infos_by_value.setdefault(value, []).append( dict( path=full_path, start=start, )) # print yaml.dump(ipp_infos_by_value, allow_unicode = True, default_flow_style = False, indent = 2, width = 120) tax_benefit_system = FranceTaxBenefitSystem() # print yaml.dump(tax_benefit_system.legislation_json, allow_unicode = True, default_flow_style = False, indent = 2, # width = 120) # openfisca_infos_by_value = {} # for path, start, value in iter_openfisca_values(tax_benefit_system.legislation_json): # openfisca_infos_by_value.setdefault(value, []).append(dict( # path = tuple(path), # start = start, # )) # print yaml.dump(openfisca_infos_by_value, allow_unicode = True, default_flow_style = False, indent = 2, width = 120) # ipp_count = {} # for path, start, value in iter_openfisca_values(tax_benefit_system.legislation_json): # ipp_infos = ipp_infos_by_value.get(value) # if ipp_infos is None: # # OpenFisca parameter doesn't exit in IPP. # continue # for ipp_info in ipp_infos: # if ipp_info['start'] == start: # ipp_child = ipp_count # ipp_path = ipp_info['path'] # for name in path: # ipp_child = ipp_child.setdefault(name, {}) # ipp_child_count = ipp_child.setdefault('count_by_path', {}) # for ipp_index in range(len(ipp_path)): # ipp_sub_path = ipp_path[:ipp_index + 1] # ipp_child_count[ipp_sub_path] = ipp_child_count.get(ipp_sub_path, 0) + 1 # print yaml.dump(ipp_count, allow_unicode = True, default_flow_style = False, indent = 2, width = 120) starts_by_ipp_path_by_openfisca_path = {} starts_by_openfisca_path_by_ipp_path = {} for path, start, value in iter_openfisca_values( tax_benefit_system.legislation_json): ipp_infos = ipp_infos_by_value.get(value) if ipp_infos is None: # OpenFisca parameter doesn't exit in IPP. continue same_start_ipp_paths = [ ipp_info['path'] for ipp_info in ipp_infos if ipp_info['start'] == start ] if len(same_start_ipp_paths) == 1: ipp_path = same_start_ipp_paths[0] starts_by_ipp_path_by_openfisca_path.setdefault( tuple(path), {}).setdefault(ipp_path, set()).add(start) starts_by_openfisca_path_by_ipp_path.setdefault( ipp_path, {}).setdefault(tuple(path), set()).add(start) # for openfisca_path, starts_by_ipp_path in sorted(starts_by_ipp_path_by_openfisca_path.iteritems()): ## if len(starts_by_ipp_path) == 1: ## print u'.'.join(openfisca_path), '->', u' / '.join(starts_by_ipp_path.keys()[0]) # if len(starts_by_ipp_path) > 1: # print u'.'.join(openfisca_path), '->', starts_by_ipp_path # for ipp_path, starts_by_openfisca_path in sorted(starts_by_openfisca_path_by_ipp_path.iteritems()): # if len(starts_by_openfisca_path) == 1: # print u' / '.join(ipp_path), '->', u'.'.join( # unicode(fragment) # for fragment in starts_by_openfisca_path.keys()[0] # ) ## if len(starts_by_openfisca_path) > 1: ## print u' / '.join(ipp_path), '->', u'.'.join( ## unicode(fragment) ## for fragment in starts_by_openfisca_path.keys()[0] ## ) openfisca_path_by_ipp_tree = collections.OrderedDict() for ipp_path, starts_by_openfisca_path in sorted( starts_by_openfisca_path_by_ipp_path.iteritems()): openfisca_path_by_ipp_sub_tree = openfisca_path_by_ipp_tree for ipp_name in ipp_path[:-1]: openfisca_path_by_ipp_sub_tree = openfisca_path_by_ipp_sub_tree.setdefault( ipp_name, collections.OrderedDict()) ipp_name = ipp_path[-1] openfisca_path_by_ipp_sub_tree[ipp_name] = [ u'.'.join(unicode(fragment) for fragment in openfisca_name) for openfisca_name in sorted(starts_by_openfisca_path) ] with open(args.target, 'w') as target_file: yaml.dump(openfisca_path_by_ipp_tree, target_file, allow_unicode=True, default_flow_style=False, indent=2, width=120) return 0
def ajustement_h5( input_h5="./Simulation_engine/dummy_data.h5", output_h5="./Simulation_engine/dummy_data_ajuste.h5", distribution_rfr_population="./Simulation_engine/Calib/ResFinalCalibSenat.csv", PERIOD=None, ): if PERIOD is None: PERIOD = annee_de_calcul ajuste_h5 = output_h5 TBS = FranceTaxBenefitSystem() DUMMY_DATA = pandas.read_hdf(input_h5) # Keeping computations short with option to keep file under 1000 FF # DUMMY_DATA = DUMMY_DATA[DUMMY_DATA["idmen"] < 1000] simulation_base_deciles = simulation(PERIOD, DUMMY_DATA, TBS) df = aggregats_ff(PERIOD, simulation_base_deciles).sort_values(by="rfr") print("{} FF sur {} ont un revenu>0 , donc {:.2f}% ont que dalle ".format( len(df[df["rfr"] > 0.01]), len(df), 100 - 100 * len(df[df["rfr"] > 0.01]) / len(df), )) # Step 1 : Ajustement du nombre de mecs à zéro... oldweight = 1 - df[df["rfr"] > 0.01]["wprm"].sum() / df["wprm"].sum() targetweight = 0.06 redweightifrfr0 = targetweight * (1 - oldweight) / oldweight / ( 1 - targetweight) print( "Non en fait {} FF sur {} ont un revenu>0 , donc {:.2f}% ont que dalle. Je vais les ajuster." .format( df[df["rfr"] > 0.01]["wprm"].sum(), df["wprm"].sum(), 100 - 100 * df[df["rfr"] > 0.01]["wprm"].sum() / df["wprm"].sum(), )) print("old : {} new : {} adj : {}".format(oldweight, targetweight, redweightifrfr0)) # Ajustement de réduction du poids df["adjwstep0"] = 1 df["realwprm"] = df["wprm"] df.loc[df["rfr"] < 0.01, "adjwstep0"] = redweightifrfr0 df.loc[df["rfr"] < 0.01, "realwprm"] = df["wprm"] * redweightifrfr0 # Calibration du nombre total de foyers fiscaux target_foyers_fiscaux = 38_332_977 # src : https://www.impots.gouv.fr/portail/statistiques (2018) adjust_wprm = target_foyers_fiscaux / df["realwprm"].sum() df["realwprm"] = df["realwprm"] * adjust_wprm print( "Non en fait {} FF sur {} ont un revenu>0 , donc {:.2f}% ont que dalle " .format( df[df["rfr"] > 0.01]["wprm"].sum(), df["wprm"].sum(), 100 - 100 * df[df["rfr"] > 0]["wprm"].sum() / df["wprm"].sum(), )) # Step 1.1 : Ajuster le 1er décile (pour l'instant on fait que dalle, y a pas vraiment d'impact # Step 2 : PBP (pareto by parts) # Stats officielles so = pandas.read_csv(distribution_rfr_population) # doit contenir : # Colonne Rk : Revenu Fiscal de référence # Colonne Nk : Pourcentage de foyers fiscaux ayant un RFR >= à la colonne Rk # Colonne Ark : RFR moyen des foyers fiscaux ayant un RFR >= à la colonne Rk (utilisée seulement pour la loi du # plus haut décile # Je vais désormais déterminer la distribution de tout le monde : # 2.0 - bon je vais associer le running weight de chaque mec... totw = df["realwprm"].sum() df = df.sort_values(by="rfr") df["nw"] = df["realwprm"] / totw # normalized weight (total = 1) df["rsnw"] = df["nw"].cumsum( ) - df["nw"] / 2 # somme cumulée des nw. on prend # 2.1 - dans le premier décile : Les valeurs exactes de l'ERFS * un facteur scalaire qui permet de rendre le premier décile = ce que je veux. targetFirstDec = so["Rk"][1] limWeightFirstDec = so["Nk"][1] limOrigFirstDec = max(df[df["rsnw"] <= 1 - limWeightFirstDec]["rfr"]) df["adjrevstep2"] = 1 df.loc[df["rsnw"] <= 1 - limWeightFirstDec, "adjrevstep2"] = (targetFirstDec / limOrigFirstDec) # 2.2 - dans toutes les autres catégories (sauf la dernière) : la distrib restrinte à un intervalle est une loi de Pareto au premier paramètre = le # debut de l'intervalle et deuxième paramètre : celui qui permet d'obtenir le bon nombre de gens dans l'intervalle # Détermination de ce paramètre sonk = so["Nk"].values # parce que je sais toujours pas itérer ligne à ligne dans un DataFrame sork = so["Rk"].values paramsPareto = [-1] for i in range(1, len(sonk) - 1): n0 = sonk[i] n1 = sonk[i + 1] r0 = sork[i] r1 = sork[i + 1] newparam = math.log(n1 / n0) / math.log(r0 / r1) paramsPareto += [newparam] # 2.3 - dans la dernière catégorie : je prend le param de la loi de Pareto qui permet d'égaliser la moyenne de la dernière tranche # OK la moyenne d'une Pareto est : esp = (1 + 1/(k-1)) * xm # k = 1/(esp/xm - 1) + 1 lastaverage = so["dArk"].values[-1] * 1000 lastthresh = sork[-1] paramsPareto += [1 / (lastaverage / lastthresh - 1) + 1] so["paramPareto"] = paramsPareto df["realrfr"] = df.apply(reverseCDF(so), axis=1) df["realrfrw"] = df["realrfr"] * df["realwprm"] # OK now that this great function works (does it? Why not try it? comparing it now to the original function??) # I can generate the REAL rfr # End of step 2. testerrorvalues(df, "rfr", "wprm") aa = testerrorvalues(df, "realrfr", "realwprm") print("Aggregated Error % after calibration :", aa) # OKOK bon maintenant mon df contient le bon rfr et le bon realwprm df["total_ajust_revenu"] = 1 df.loc[df["rfr"] > 0, "total_ajust_revenu"] = df["realrfr"] / df["rfr"] df["total_ajust_poids"] = df["realwprm"] / df["wprm"] # Je vais ajuster le .h5 to_transform = pandas.read_hdf(input_h5) tt_colonnes = to_transform.columns df_changes = df[["idfoy", "total_ajust_revenu", "total_ajust_poids"]] to_transform = to_transform.merge(df_changes, on="idfoy") colspoids = ["wprm"] colsrevenus = [ "chomage_brut", "pensions_alimentaires_percues", "rag", "ric", "rnc", "salaire_de_base", "f4ba", # "loyer", # "taxe_habitation", ] colsrevenus = [col for col in colsrevenus if col in to_transform.columns] for cp in colspoids: to_transform[cp] = to_transform[cp] * to_transform["total_ajust_poids"] for cp in colsrevenus: to_transform[ cp] = to_transform[cp] * to_transform["total_ajust_revenu"] to_transform = to_transform[tt_colonnes] to_transform.to_hdf(ajuste_h5, key="input")
from openfisca_core.parameters import ParameterNode, Scale from openfisca_france import FranceTaxBenefitSystem tax_benefit_system = FranceTaxBenefitSystem() parameters = tax_benefit_system.parameters def get_parameters_by_unit(parameter, parameters_by_unit=None): """ Build a dictionnary collecting the legislation parameters according to their units """ if parameters_by_unit is None: parameters_by_unit = dict( scale_none=list(), scale_currency=list(), none=list(), currency=list(), rate=list(), year=list(), ) for sub_parameter in parameter.children.values(): if isinstance(sub_parameter, ParameterNode): get_parameters_by_unit(sub_parameter, parameters_by_unit) else: if isinstance(sub_parameter, Scale): unit = sub_parameter.metadata.get('unit') rate_unit = sub_parameter.metadata.get('rate_unit') threshold_unit = sub_parameter.metadata.get('threshold_unit') if unit is not None: raise ValueError(
from openfisca_france import FranceTaxBenefitSystem tax_benefit_system = FranceTaxBenefitSystem() scenario = tax_benefit_system.new_scenario() scenario.init_single_entity( period = 2015, parent1 = dict( age = 30, salaire_de_base = 50000, ), enfants = [ dict(age = 12), dict(age = 18), ], ) simulation = scenario.new_simulation() #Some variable can only be computed on a monthly basis #simulation.calculate('af', '2015') print(simulation.calculate('af', '2015-01')) #calculate variable af for January 2015 simulation.calculate_add("af", "2015") #to sum on the whole year #Some variable can only be computed on a anual basis simulation.calculate('irpp', period = '2015') scenario.init_single_entity( period = 2019, parent1 = dict( age = 30, salaire_de_base = 70000, ), )
def create_individu_for_inversion(year, revenu_type='net'): assert revenu_type in ['net', 'imposable'] assert year is not None # Using data produced by preprocessing.build_merged_dataframes temporary_store = get_store(file_name='erfs_fpr') individus = temporary_store['individus_{}_post_01'.format(year)] if revenu_type == 'net': old_by_new_variables = { 'chomage_i': 'chomage_net', 'pens_alim_recue_i': 'pensions_alimentaires_percues', 'rag_i': 'rag_net', 'retraites_i': 'retraite_nette', 'ric_i': 'ric_net', 'rnc_i': 'rnc_net', 'salaires_i': 'salaire_net', } elif revenu_type == 'imposable': old_by_new_variables = { 'chomage_i': 'chomage_imposable', 'pens_alim_recue_i': 'pensions_alimentaires_percues', 'rag_i': 'rag_net', 'retraites_i': 'retraite_imposable', 'ric_i': 'ric_net', 'rnc_i': 'rnc_net', 'salaires_i': 'salaire_imposable', } for variable in old_by_new_variables: assert variable in individus.columns.tolist( ), "La variable {} n'est pas présente".format(variable) individus.rename( columns=old_by_new_variables, inplace=True, ) created_variables = [] create_ages(individus, year) created_variables.append('age') created_variables.append('age_en_mois') create_date_naissance(individus, age_variable=None, annee_naissance_variable='naia', mois_naissance='naim', year=year) created_variables.append('date_naissance') period = periods.period(year) # create_revenus(individus, revenu_type = revenu_type) # created_variables.append('taux_csg_remplacement') create_contrat_de_travail(individus, period=period, salaire_type=revenu_type) created_variables.append('contrat_de_travail') created_variables.append('heures_remunerees_volume') create_categorie_salarie(individus, period=period) created_variables.append('categorie_salarie') tax_benefit_system = FranceTaxBenefitSystem() create_salaire_de_base(individus, period=period, revenu_type=revenu_type, tax_benefit_system=tax_benefit_system) created_variables.append('salaire_de_base') create_effectif_entreprise(individus, period=period) created_variables.append('effectif_entreprise') create_traitement_indiciaire_brut(individus, period=period, revenu_type=revenu_type, tax_benefit_system=tax_benefit_system) created_variables.append('traitement_indiciaire_brut') created_variables.append('primes_fonction_publique') other_variables = ['salaire_{}'.format(revenu_type)] temporary_store['individu_for_inversion_{}'.format(year)] = individus[ created_variables + other_variables]
def test_useless_variables( input_h5="./Simulation_engine/dummy_data.h5", outfile_path=None, name_variables=("rfr", "irpp", "nbptr"), PERIOD=None, ): if PERIOD is None: PERIOD = annee_de_calcul pandas.options.mode.chained_assignment = None list_useless_variables = [] TBS = FranceTaxBenefitSystem() DUMMY_DATA = pandas.read_hdf(input_h5) simulation_base_deciles, dictionnaire_datagrouped = simulation( PERIOD, DUMMY_DATA, TBS) df = dictionnaire_datagrouped["foyer_fiscal"][["wprm"]] for nv in name_variables: df["{}_base".format(nv)] = simulation_base_deciles.calculate( nv, PERIOD) for col in DUMMY_DATA.columns: if col == "wprm": # we don't want to remove this one continue isdif = False data_wo_column = DUMMY_DATA[[ k for k in DUMMY_DATA.columns if k != col ]] try: newsim, ddg2 = simulation(PERIOD, data_wo_column, TBS) resvar = {nv: {} for nv in name_variables} for nv in name_variables: df["{}_{}".format(nv, col)] = newsim.calculate(nv, PERIOD) resvar[nv]["countdif"] = len(df[ df["{}_{}".format(nv, col)] != df["{}_base".format(nv)]]) # print(col,nv,resvar[nv]["countdif"]) # print(df[df["{}_{}".format(nv,col)]!=df["{}_base".format(nv)]],len(df[df["{}_{}".format(nv,col)]!=df["{}_base".format(nv)]])) isdif |= resvar[nv]["countdif"] if not isdif: list_useless_variables += [col] print( col, "is", "not" if isdif else "", "useless", "{}".format([resvar[nv]["countdif"] for nv in name_variables]) if isdif else "", ) except Exception: print(col, "is definitely not useless") data_wo_useless = DUMMY_DATA[[ k for k in DUMMY_DATA.columns if k not in list_useless_variables ]] newsim, ddg2 = simulation(PERIOD, data_wo_column, TBS) isdif = False for nv in name_variables: # print(col,nv,resvar[nv]["countdif"]) # print(df[df["{}_{}".format(nv,col)]!=df["{}_base".format(nv)]],len(df[df["{}_{}".format(nv,col)]!=df["{}_base".format(nv)]])) isdif |= len( df[df["{}_{}".format(nv, col)] != df["{}_base".format(nv)]]) if isdif: print( "Removing all variables at once didn't work, good luck with that") else: if outfile_path is None: outfile_path = input_h5.replace(".h5", "_useful.h5") data_wo_useless.to_hdf(outfile_path, key="input") print( "It seems lots of columns don't do anything. Data with only useful columns was exported to {}" .format(outfile_path)) return list_useless_variables