Exemple #1
0
def get_data():
    """Generate key datasets for UBI reforms.

    Returns:
        DataFrame: Baseline DataFrame with core variables.
        DataFrame: UBI tax reform DataFrame with core variables.
        float: Yearly revenue raised by the UBI tax reform.
    """
    baseline = Microsimulation()
    baseline_df = baseline.df(
        [var for var in BASELINE_COLS if var != "is_disabled_for_ubi"],
        map_to="household")
    reform_no_ubi = ubi_reform(0, 0, 0, 0, pd.Series([0] * 12, index=REGIONS))
    reform_no_ubi_sim = Microsimulation(reform_no_ubi)
    reform_base_df = reform_no_ubi_sim.df(BASELINE_COLS, map_to="household")
    budget = (baseline.calc("net_income", map_to="household").sum() -
              reform_no_ubi_sim.calc("net_income", map_to="household").sum())
    baseline_df_pd = pd.DataFrame(baseline_df)
    baseline_df_pd["household_weight"] = baseline_df.weights
    reform_base_df_pd = pd.DataFrame(reform_base_df)
    reform_base_df_pd["household_weight"] = reform_base_df.weights
    return baseline_df_pd, reform_base_df_pd, budget
def impute_incomes(dataset: Dataset = FRS, year: int = 2022) -> MicroDataFrame:
    """Imputation of high incomes from the SPI.

    Args:
        dataset (type): The dataset to clone.
        year (int): The year to clone.

    Returns:
        Dict[str, ArrayLike]: The mapping from the original dataset to the cloned dataset.
    """
    from openfisca_uk import Microsimulation

    # Most recent SPI used - if it's before the FRS year then data will be uprated
    # automatically by OpenFisca-UK
    spi = Microsimulation(dataset=SPI)
    frs = Microsimulation(
        dataset=dataset,
        year=year,
    )

    regions = spi.calc("region").unique()

    spi_df = spi.df(PREDICTORS + IMPUTATIONS)
    frs_df = frs.df(PREDICTORS)

    frs_df.region = frs_df.region.map(
        {name: float(i)
         for i, name in enumerate(regions)})
    spi_df.region = spi_df.region.map(
        {name: float(i)
         for i, name in enumerate(regions)})

    return si.rf_impute(
        x_train=spi_df.drop(IMPUTATIONS, axis=1),
        y_train=spi_df[IMPUTATIONS],
        x_new=frs_df,
        verbose=True,
    )
Exemple #3
0
    "in_poverty_bhc",
    "in_deep_poverty_bhc",
]

BASELINE_HH_COLS = ["household_weight", "people", "region"]

# Extract these for baseline too.
REFORM_HH_COLS = [
    "household_net_income",
    "equiv_household_net_income",
    "poverty_gap_bhc",
    "poverty_gap_ahc",
]

p_base = mdf.MicroDataFrame(
    baseline_sim.df(BASELINE_PERSON_COLS + REFORM_PERSON_COLS,
                    map_to="person"),
    weights="household_weight",
)
p_base.rename(
    dict(zip(REFORM_PERSON_COLS, [i + "_base" for i in REFORM_PERSON_COLS])),
    axis=1,
    inplace=True,
)

hh_base = mdf.MicroDataFrame(
    baseline_sim.df(BASELINE_HH_COLS + REFORM_HH_COLS, map_to="household"),
    weights="household_weight",
)
hh_base.rename(
    dict(zip(REFORM_HH_COLS, [i + "_base" for i in REFORM_HH_COLS])),
    axis=1,