Exemple #1
0
 def load_dataset(self, dataset, year: int) -> None:
     data = dataset.load(year)
     year = str(year)
     self.system = openfisca_uk.CountryTaxBenefitSystem()
     self.apply_reforms(self.reforms)
     builder = SimulationBuilder()
     builder.create_entities(self.system)
     self.relations = {
         "person": np.array(data["P_person_id"][year]),
         "benunit": np.array(data["B_benunit_id"][year]),
         "household": np.array(data["H_household_id"][year]),
         "person-benunit": np.array(data["P_benunit_id"][year]),
         "person-household": np.array(data["P_household_id"][year]),
     }
     builder.declare_person_entity("person",
                                   np.array(data["P_person_id"][year]))
     benunits = builder.declare_entity("benunit",
                                       np.array(data["B_benunit_id"][year]))
     households = builder.declare_entity(
         "household", np.array(data["H_household_id"][year]))
     person_roles = np.array(np.array(data["P_role"][year])).astype(str)
     builder.join_with_persons(
         benunits, np.array(data["P_benunit_id"][year]),
         person_roles)  # define person-benunit memberships
     builder.join_with_persons(
         households, np.array(data["P_household_id"][year]),
         person_roles)  # define person-household memberships
     model = builder.build(self.system)
     skipped = []
     for variable in data.keys():
         if variable in self.system.variables:
             for period in data[variable].keys():
                 values = np.array(data[variable][period])
                 target_dtype = self.system.variables[variable].value_type
                 if target_dtype in (Enum, str):
                     values = values.astype(str)
                 else:
                     values = values.astype(target_dtype)
                 try:
                     model.set_input(variable, period, values)
                 except:
                     skipped += [variable]
     if skipped:
         warnings.warn(
             f"Incomplete initialisation: skipped {len(skipped)} variables:"
         )
     self.simulation = model
     data.close()
def build_simulation(tax_benefit_system, period, titres_ids, communes_ids):
    simulation_builder = SimulationBuilder()
    simulation_builder.create_entities(tax_benefit_system)
    simulation_builder.declare_person_entity(
        'article',
        titres_ids)  # titres sans doublons via renommage multicommunes

    # associer les communes aux titres :
    commune_instance = simulation_builder.declare_entity(
        'commune', communes_ids)
    # un id par titre existant dans l'ordre de titres_ids :
    titres_des_communes = communes_ids
    # role de chaque titre dans la commune = article :
    titres_communes_roles = ['article'] * len(titres_des_communes)
    simulation_builder.join_with_persons(commune_instance,
                                         titres_des_communes,
                                         roles=titres_communes_roles)

    return simulation_builder.build(tax_benefit_system)
Exemple #3
0
def simulation_from_dgcl_csv(period, data, tbs, data_previous_year=None):
    sb = SimulationBuilder()
    sb.create_entities(tbs)
    sb.declare_person_entity("commune", data.index)

    nombre_communes = len(data.index)
    etat_instance = sb.declare_entity('etat', ['france'])
    etat_communes = ['france'] * nombre_communes
    sb.join_with_persons(etat_instance, etat_communes, [None] * nombre_communes)

    simulation = sb.build(tbs)
    simulation.max_spiral_loops = 10
    for champ_openfisca in data.columns:
        if " " not in champ_openfisca:  # oui c'est comme ça que je checke
            # qu'une variable es openfisca ne me jugez pas
            simulation.set_input(
                champ_openfisca,
                period,
                data[champ_openfisca],
            )
    # data_previous_year est un dataframe dont toutes les colonnes
    # portent des noms de variables openfisca
    # et contiennent des valeurs de l'an dernier.
    if data_previous_year is not None:
        # on rassemble les informations de l'an dernier pour les communes
        # qui existent cette année (valeurs nouvelles communes à zéro)
        data = data.merge(data_previous_year, on=code_comm, how='left', suffixes=["_currentyear", ""])
        for champ_openfisca in data_previous_year.columns:
            if " " not in champ_openfisca:  # oui c'est comme ça que je checke
                # qu'une variable est openfisca ne me jugez pas
                simulation.set_input(
                    champ_openfisca,
                    str(int(period) - 1),
                    data[champ_openfisca].fillna(0),
                )
    return simulation
Exemple #4
0
def simulation(period, data, tbs):
    # Traduction des roles attribués au format openfisca
    data["quimenof"] = "enfant"
    data.loc[data["quifoy"] == 1, "quimenof"] = "conjoint"
    data.loc[data["quifoy"] == 0, "quimenof"] = "personne_de_reference"

    data["quifoyof"] = "personne_a_charge"
    data.loc[data["quifoy"] == 1, "quifoyof"] = "conjoint"
    data.loc[data["quifoy"] == 0, "quifoyof"] = "declarant_principal"

    data["quifamof"] = "enfant"
    data.loc[data["quifam"] == 1, "quifamof"] = "conjoint"
    data.loc[data["quifam"] == 0, "quifamof"] = "demandeur"

    sb = SimulationBuilder()
    sb.create_entities(tbs)

    sb.declare_person_entity("individu", data.index)

    # Creates openfisca entities and generates grouped

    listentities = {"foy": "foyer_fiscal", "men": "menage", "fam": "famille"}

    instances = {}
    dictionnaire_datagrouped = {"individu": data}

    for ent, ofent in listentities.items():
        persons_ent = data["id" + ent].values
        persons_ent_roles = data["qui" + ent + "of"].values
        ent_ids = data["id" + ent].unique()
        instances[ofent] = sb.declare_entity(ofent, ent_ids)
        sb.join_with_persons(instances[ofent],
                             persons_ent,
                             roles=persons_ent_roles)

        # The following ssumes data defined for an entity are the same for all rows in
        # the same entity. Or at least that the first non null value found for an
        # entity will always be the total value for an entity (which is the case for
        # f4ba). These checks are performed in the checkdata function defined below.
        dictionnaire_datagrouped[ofent] = (data.groupby(
            "id" + ent, as_index=False).first().sort_values(by="id" + ent))

    # These variables should not be attributed to any OpenFisca Entity
    columns_not_OF_variables = set([
        "idmen",
        "idfoy",
        "idfam",
        "noindiv",
        "level_0",
        "quifam",
        "quifoy",
        "quimen",
        "idmen_x",
        "idmen_y",
        "wprm",
        "index",
        "idmen_original",
        "idfoy_original",
        "idfam_original",
        "quifamof",
        "quifoyof",
        "quimenof",
    ])

    simulation = sb.build(tbs)
    memory_config = MemoryConfig(
        max_memory_occupation=
        0.95,  # When 95% of the virtual memory is full, switch to disk storage
        priority_variables=["salary",
                            "age"],  # Always store these variables in memory
        variables_to_drop=non_cached_variables,
    )
    simulation.memory_config = memory_config

    # Attribution des variables à la bonne entité OpenFisca
    for colonne in data.columns:
        if colonne not in columns_not_OF_variables:
            # try:
            simulation.set_input(
                colonne,
                period,
                dictionnaire_datagrouped[tbs.get_variable(colonne).entity.key]
                [colonne],
            )
    return simulation, dictionnaire_datagrouped
Exemple #5
0
 def load_dataset(self,
                  entity_dfs: Tuple[pd.DataFrame],
                  verbose: bool = False) -> None:
     person, benunit, household = entity_dfs
     self.system = openfisca_uk.CountryTaxBenefitSystem()
     self.apply_reforms(self.reforms)
     builder = SimulationBuilder()
     builder.create_entities(self.system)
     person.sort_values("P_person_id", inplace=True)
     benunit.sort_values("B_benunit_id", inplace=True)
     household.sort_values("H_household_id", inplace=True)
     person["id"] = person["P_person_id"]
     benunit["id"] = benunit["B_benunit_id"]
     household["id"] = household["H_household_id"]
     person.sort_values("id", inplace=True)
     person.reset_index(inplace=True, drop=True)
     benunit.sort_values("id", inplace=True)
     benunit.reset_index(inplace=True, drop=True)
     household.sort_values("id", inplace=True)
     household.reset_index(inplace=True, drop=True)
     self.relations = {
         "person": np.array(person["P_person_id"]),
         "benunit": np.array(benunit["B_benunit_id"]),
         "household": np.array(household["H_household_id"]),
         "person-benunit": np.array(person["P_benunit_id"]),
         "person-household": np.array(person["P_household_id"]),
     }
     person_ids = np.array(person["P_person_id"])
     benunit_ids = np.array(benunit["B_benunit_id"])
     household_ids = np.array(household["H_household_id"])
     builder.declare_person_entity("person", person_ids)
     benunits = builder.declare_entity("benunit", benunit_ids)
     households = builder.declare_entity("household", household_ids)
     person_roles = np.array(person["P_role"])
     builder.join_with_persons(
         benunits, person["P_benunit_id"],
         person_roles)  # define person-benunit memberships
     builder.join_with_persons(
         households, np.array(person["P_household_id"]),
         person_roles)  # define person-household memberships
     model = builder.build(self.system)
     skipped = []
     for input_file in [person, benunit, household]:
         for column in input_file.columns:
             if column != "P_role":
                 try:
                     def_period = self.system.get_variable(
                         column).definition_period
                     if def_period in ["eternity", "year"]:
                         input_periods = [self.input_year]
                     else:
                         input_periods = period(
                             self.input_year).get_subperiods(def_period)
                     for subperiod in input_periods:
                         model.set_input(column, subperiod,
                                         np.array(input_file[column]))
                 except Exception:
                     skipped += [column]
     if skipped and verbose:
         print(
             f"Incomplete initialisation: skipped {len(skipped)} variables:"
         )
         for var in skipped:
             print(f"{var}")
     return model
Exemple #6
0
    def load_frs(self, *reforms, verbose=False, bonus={}):
        """
        Create and populate a tax-benefit simulation model from OpenFisca.

        Arguments:
            reforms: any reforms to apply, in order.
            data: any data to use instead of the loaded Family Resources Survey.
            input_period: the period in which to enter all data (at the moment, all data is entered under this period).

        Returns:
            A Simulation object.
        """
        system = CountryTaxBenefitSystem()
        for reform in reforms:
            system = reform(system)  # apply each reform in order
        builder = SimulationBuilder()
        builder.create_entities(
            system)  # create the entities (person, benunit, etc.)
        person_file = pd.read_csv(os.path.join(self.data_dir, "person.csv"),
                                  low_memory=False)
        benunit_file = pd.read_csv(os.path.join(self.data_dir, "benunit.csv"),
                                   low_memory=False)
        household_file = pd.read_csv(os.path.join(self.data_dir,
                                                  "household.csv"),
                                     low_memory=False)
        person_file.sort_values(by=["person_id"], inplace=True)
        benunit_file.sort_values(by=["benunit_id"], inplace=True)
        household_file.sort_values(by=["household_id"], inplace=True)
        for input_file in [person_file, benunit_file, household_file]:
            input_file = input_file.sort_index()
        self.relations = {
            "person": np.array(person_file["person_id"]),
            "benunit": np.array(benunit_file["benunit_id"]),
            "household": np.array(household_file["household_id"]),
            "person-benunit": np.array(person_file["benunit_id"]),
            "person-household": np.array(person_file["household_id"]),
        }
        person_ids = np.array(person_file["person_id"])
        benunit_ids = np.array(benunit_file["benunit_id"])
        household_ids = np.array(household_file["household_id"])
        builder.declare_person_entity("person", person_ids)
        benunits = builder.declare_entity("benunit", benunit_ids)
        households = builder.declare_entity("household", household_ids)
        person_roles = person_file["role"]
        builder.join_with_persons(
            benunits, np.array(person_file["benunit_id"]),
            person_roles)  # define person-benunit memberships
        builder.join_with_persons(households,
                                  np.array(person_file["household_id"]),
                                  person_roles)
        person_file["index"] = np.arange(start=0, stop=len(person_file))
        model = builder.build(system)
        skipped = []
        for input_file in [person_file, benunit_file, household_file]:
            for column in input_file.columns:
                if column in bonus:
                    input_file[column] += bonus[column]
                if column != "role":
                    try:
                        def_period = system.get_variable(
                            column).definition_period
                        if def_period in ["eternity", "year"]:
                            input_periods = [self.input_period]
                        else:
                            input_periods = period(
                                self.input_period).get_subperiods(def_period)
                        for subperiod in input_periods:
                            model.set_input(column, subperiod,
                                            np.array(input_file[column]))
                    except Exception as e:
                        skipped += [column]
        if skipped and verbose:
            print(
                f"Incomplete initialisation: skipped {len(skipped)} variables:"
            )
            for var in skipped:
                print(f"{var}")
        return model
Exemple #7
0
# READ DATA

data_persons = pandas.read_csv('./data_persons.csv')
data_households = pandas.read_csv('./data_households.csv')

# SIMULATION BUILDER

sb = SimulationBuilder()
sb.create_entities(tax_benefit_system)

persons_ids = data_persons.person_id
sb.declare_person_entity('person', persons_ids)

# Instanciate the household entity:
households_ids = data_households.household_id
household_instance = sb.declare_entity('household', households_ids)

# Join households data on persons:
persons_households = data_persons.household_id
persons_households_roles = data_persons.person_role_in_household
sb.join_with_persons(household_instance, persons_households,
                     persons_households_roles)

# SIMULATION

simulation = sb.build(tax_benefit_system)

period = '2019-03'
simulation.set_input('salary', period, data_persons.person_salary)

total_taxes = simulation.calculate('total_taxes', period)