def load_dataset(self, dataset, year: int) -> None: data = dataset.load(year) year = str(year) self.system = openfisca_uk.CountryTaxBenefitSystem() self.apply_reforms(self.reforms) builder = SimulationBuilder() builder.create_entities(self.system) self.relations = { "person": np.array(data["P_person_id"][year]), "benunit": np.array(data["B_benunit_id"][year]), "household": np.array(data["H_household_id"][year]), "person-benunit": np.array(data["P_benunit_id"][year]), "person-household": np.array(data["P_household_id"][year]), } builder.declare_person_entity("person", np.array(data["P_person_id"][year])) benunits = builder.declare_entity("benunit", np.array(data["B_benunit_id"][year])) households = builder.declare_entity( "household", np.array(data["H_household_id"][year])) person_roles = np.array(np.array(data["P_role"][year])).astype(str) builder.join_with_persons( benunits, np.array(data["P_benunit_id"][year]), person_roles) # define person-benunit memberships builder.join_with_persons( households, np.array(data["P_household_id"][year]), person_roles) # define person-household memberships model = builder.build(self.system) skipped = [] for variable in data.keys(): if variable in self.system.variables: for period in data[variable].keys(): values = np.array(data[variable][period]) target_dtype = self.system.variables[variable].value_type if target_dtype in (Enum, str): values = values.astype(str) else: values = values.astype(target_dtype) try: model.set_input(variable, period, values) except: skipped += [variable] if skipped: warnings.warn( f"Incomplete initialisation: skipped {len(skipped)} variables:" ) self.simulation = model data.close()
def build_simulation(tax_benefit_system, period, titres_ids, communes_ids): simulation_builder = SimulationBuilder() simulation_builder.create_entities(tax_benefit_system) simulation_builder.declare_person_entity( 'article', titres_ids) # titres sans doublons via renommage multicommunes # associer les communes aux titres : commune_instance = simulation_builder.declare_entity( 'commune', communes_ids) # un id par titre existant dans l'ordre de titres_ids : titres_des_communes = communes_ids # role de chaque titre dans la commune = article : titres_communes_roles = ['article'] * len(titres_des_communes) simulation_builder.join_with_persons(commune_instance, titres_des_communes, roles=titres_communes_roles) return simulation_builder.build(tax_benefit_system)
def simulation_from_dgcl_csv(period, data, tbs, data_previous_year=None): sb = SimulationBuilder() sb.create_entities(tbs) sb.declare_person_entity("commune", data.index) nombre_communes = len(data.index) etat_instance = sb.declare_entity('etat', ['france']) etat_communes = ['france'] * nombre_communes sb.join_with_persons(etat_instance, etat_communes, [None] * nombre_communes) simulation = sb.build(tbs) simulation.max_spiral_loops = 10 for champ_openfisca in data.columns: if " " not in champ_openfisca: # oui c'est comme ça que je checke # qu'une variable es openfisca ne me jugez pas simulation.set_input( champ_openfisca, period, data[champ_openfisca], ) # data_previous_year est un dataframe dont toutes les colonnes # portent des noms de variables openfisca # et contiennent des valeurs de l'an dernier. if data_previous_year is not None: # on rassemble les informations de l'an dernier pour les communes # qui existent cette année (valeurs nouvelles communes à zéro) data = data.merge(data_previous_year, on=code_comm, how='left', suffixes=["_currentyear", ""]) for champ_openfisca in data_previous_year.columns: if " " not in champ_openfisca: # oui c'est comme ça que je checke # qu'une variable est openfisca ne me jugez pas simulation.set_input( champ_openfisca, str(int(period) - 1), data[champ_openfisca].fillna(0), ) return simulation
def simulation(period, data, tbs): # Traduction des roles attribués au format openfisca data["quimenof"] = "enfant" data.loc[data["quifoy"] == 1, "quimenof"] = "conjoint" data.loc[data["quifoy"] == 0, "quimenof"] = "personne_de_reference" data["quifoyof"] = "personne_a_charge" data.loc[data["quifoy"] == 1, "quifoyof"] = "conjoint" data.loc[data["quifoy"] == 0, "quifoyof"] = "declarant_principal" data["quifamof"] = "enfant" data.loc[data["quifam"] == 1, "quifamof"] = "conjoint" data.loc[data["quifam"] == 0, "quifamof"] = "demandeur" sb = SimulationBuilder() sb.create_entities(tbs) sb.declare_person_entity("individu", data.index) # Creates openfisca entities and generates grouped listentities = {"foy": "foyer_fiscal", "men": "menage", "fam": "famille"} instances = {} dictionnaire_datagrouped = {"individu": data} for ent, ofent in listentities.items(): persons_ent = data["id" + ent].values persons_ent_roles = data["qui" + ent + "of"].values ent_ids = data["id" + ent].unique() instances[ofent] = sb.declare_entity(ofent, ent_ids) sb.join_with_persons(instances[ofent], persons_ent, roles=persons_ent_roles) # The following ssumes data defined for an entity are the same for all rows in # the same entity. Or at least that the first non null value found for an # entity will always be the total value for an entity (which is the case for # f4ba). These checks are performed in the checkdata function defined below. dictionnaire_datagrouped[ofent] = (data.groupby( "id" + ent, as_index=False).first().sort_values(by="id" + ent)) # These variables should not be attributed to any OpenFisca Entity columns_not_OF_variables = set([ "idmen", "idfoy", "idfam", "noindiv", "level_0", "quifam", "quifoy", "quimen", "idmen_x", "idmen_y", "wprm", "index", "idmen_original", "idfoy_original", "idfam_original", "quifamof", "quifoyof", "quimenof", ]) simulation = sb.build(tbs) memory_config = MemoryConfig( max_memory_occupation= 0.95, # When 95% of the virtual memory is full, switch to disk storage priority_variables=["salary", "age"], # Always store these variables in memory variables_to_drop=non_cached_variables, ) simulation.memory_config = memory_config # Attribution des variables à la bonne entité OpenFisca for colonne in data.columns: if colonne not in columns_not_OF_variables: # try: simulation.set_input( colonne, period, dictionnaire_datagrouped[tbs.get_variable(colonne).entity.key] [colonne], ) return simulation, dictionnaire_datagrouped
def load_dataset(self, entity_dfs: Tuple[pd.DataFrame], verbose: bool = False) -> None: person, benunit, household = entity_dfs self.system = openfisca_uk.CountryTaxBenefitSystem() self.apply_reforms(self.reforms) builder = SimulationBuilder() builder.create_entities(self.system) person.sort_values("P_person_id", inplace=True) benunit.sort_values("B_benunit_id", inplace=True) household.sort_values("H_household_id", inplace=True) person["id"] = person["P_person_id"] benunit["id"] = benunit["B_benunit_id"] household["id"] = household["H_household_id"] person.sort_values("id", inplace=True) person.reset_index(inplace=True, drop=True) benunit.sort_values("id", inplace=True) benunit.reset_index(inplace=True, drop=True) household.sort_values("id", inplace=True) household.reset_index(inplace=True, drop=True) self.relations = { "person": np.array(person["P_person_id"]), "benunit": np.array(benunit["B_benunit_id"]), "household": np.array(household["H_household_id"]), "person-benunit": np.array(person["P_benunit_id"]), "person-household": np.array(person["P_household_id"]), } person_ids = np.array(person["P_person_id"]) benunit_ids = np.array(benunit["B_benunit_id"]) household_ids = np.array(household["H_household_id"]) builder.declare_person_entity("person", person_ids) benunits = builder.declare_entity("benunit", benunit_ids) households = builder.declare_entity("household", household_ids) person_roles = np.array(person["P_role"]) builder.join_with_persons( benunits, person["P_benunit_id"], person_roles) # define person-benunit memberships builder.join_with_persons( households, np.array(person["P_household_id"]), person_roles) # define person-household memberships model = builder.build(self.system) skipped = [] for input_file in [person, benunit, household]: for column in input_file.columns: if column != "P_role": try: def_period = self.system.get_variable( column).definition_period if def_period in ["eternity", "year"]: input_periods = [self.input_year] else: input_periods = period( self.input_year).get_subperiods(def_period) for subperiod in input_periods: model.set_input(column, subperiod, np.array(input_file[column])) except Exception: skipped += [column] if skipped and verbose: print( f"Incomplete initialisation: skipped {len(skipped)} variables:" ) for var in skipped: print(f"{var}") return model
def load_frs(self, *reforms, verbose=False, bonus={}): """ Create and populate a tax-benefit simulation model from OpenFisca. Arguments: reforms: any reforms to apply, in order. data: any data to use instead of the loaded Family Resources Survey. input_period: the period in which to enter all data (at the moment, all data is entered under this period). Returns: A Simulation object. """ system = CountryTaxBenefitSystem() for reform in reforms: system = reform(system) # apply each reform in order builder = SimulationBuilder() builder.create_entities( system) # create the entities (person, benunit, etc.) person_file = pd.read_csv(os.path.join(self.data_dir, "person.csv"), low_memory=False) benunit_file = pd.read_csv(os.path.join(self.data_dir, "benunit.csv"), low_memory=False) household_file = pd.read_csv(os.path.join(self.data_dir, "household.csv"), low_memory=False) person_file.sort_values(by=["person_id"], inplace=True) benunit_file.sort_values(by=["benunit_id"], inplace=True) household_file.sort_values(by=["household_id"], inplace=True) for input_file in [person_file, benunit_file, household_file]: input_file = input_file.sort_index() self.relations = { "person": np.array(person_file["person_id"]), "benunit": np.array(benunit_file["benunit_id"]), "household": np.array(household_file["household_id"]), "person-benunit": np.array(person_file["benunit_id"]), "person-household": np.array(person_file["household_id"]), } person_ids = np.array(person_file["person_id"]) benunit_ids = np.array(benunit_file["benunit_id"]) household_ids = np.array(household_file["household_id"]) builder.declare_person_entity("person", person_ids) benunits = builder.declare_entity("benunit", benunit_ids) households = builder.declare_entity("household", household_ids) person_roles = person_file["role"] builder.join_with_persons( benunits, np.array(person_file["benunit_id"]), person_roles) # define person-benunit memberships builder.join_with_persons(households, np.array(person_file["household_id"]), person_roles) person_file["index"] = np.arange(start=0, stop=len(person_file)) model = builder.build(system) skipped = [] for input_file in [person_file, benunit_file, household_file]: for column in input_file.columns: if column in bonus: input_file[column] += bonus[column] if column != "role": try: def_period = system.get_variable( column).definition_period if def_period in ["eternity", "year"]: input_periods = [self.input_period] else: input_periods = period( self.input_period).get_subperiods(def_period) for subperiod in input_periods: model.set_input(column, subperiod, np.array(input_file[column])) except Exception as e: skipped += [column] if skipped and verbose: print( f"Incomplete initialisation: skipped {len(skipped)} variables:" ) for var in skipped: print(f"{var}") return model
# READ DATA data_persons = pandas.read_csv('./data_persons.csv') data_households = pandas.read_csv('./data_households.csv') # SIMULATION BUILDER sb = SimulationBuilder() sb.create_entities(tax_benefit_system) persons_ids = data_persons.person_id sb.declare_person_entity('person', persons_ids) # Instanciate the household entity: households_ids = data_households.household_id household_instance = sb.declare_entity('household', households_ids) # Join households data on persons: persons_households = data_persons.household_id persons_households_roles = data_persons.person_role_in_household sb.join_with_persons(household_instance, persons_households, persons_households_roles) # SIMULATION simulation = sb.build(tax_benefit_system) period = '2019-03' simulation.set_input('salary', period, data_persons.person_salary) total_taxes = simulation.calculate('total_taxes', period)