def get_data(): """Generate key datasets for UBI reforms. Returns: DataFrame: Baseline DataFrame with core variables. DataFrame: UBI tax reform DataFrame with core variables. float: Yearly revenue raised by the UBI tax reform. """ baseline = Microsimulation() baseline_df = baseline.df( [var for var in BASELINE_COLS if var != "is_disabled_for_ubi"], map_to="household") reform_no_ubi = ubi_reform(0, 0, 0, 0, pd.Series([0] * 12, index=REGIONS)) reform_no_ubi_sim = Microsimulation(reform_no_ubi) reform_base_df = reform_no_ubi_sim.df(BASELINE_COLS, map_to="household") budget = (baseline.calc("net_income", map_to="household").sum() - reform_no_ubi_sim.calc("net_income", map_to="household").sum()) baseline_df_pd = pd.DataFrame(baseline_df) baseline_df_pd["household_weight"] = baseline_df.weights reform_base_df_pd = pd.DataFrame(reform_base_df) reform_base_df_pd["household_weight"] = reform_base_df.weights return baseline_df_pd, reform_base_df_pd, budget
def impute_incomes(dataset: Dataset = FRS, year: int = 2022) -> MicroDataFrame: """Imputation of high incomes from the SPI. Args: dataset (type): The dataset to clone. year (int): The year to clone. Returns: Dict[str, ArrayLike]: The mapping from the original dataset to the cloned dataset. """ from openfisca_uk import Microsimulation # Most recent SPI used - if it's before the FRS year then data will be uprated # automatically by OpenFisca-UK spi = Microsimulation(dataset=SPI) frs = Microsimulation( dataset=dataset, year=year, ) regions = spi.calc("region").unique() spi_df = spi.df(PREDICTORS + IMPUTATIONS) frs_df = frs.df(PREDICTORS) frs_df.region = frs_df.region.map( {name: float(i) for i, name in enumerate(regions)}) spi_df.region = spi_df.region.map( {name: float(i) for i, name in enumerate(regions)}) return si.rf_impute( x_train=spi_df.drop(IMPUTATIONS, axis=1), y_train=spi_df[IMPUTATIONS], x_new=frs_df, verbose=True, )
"in_poverty_bhc", "in_deep_poverty_bhc", ] BASELINE_HH_COLS = ["household_weight", "people", "region"] # Extract these for baseline too. REFORM_HH_COLS = [ "household_net_income", "equiv_household_net_income", "poverty_gap_bhc", "poverty_gap_ahc", ] p_base = mdf.MicroDataFrame( baseline_sim.df(BASELINE_PERSON_COLS + REFORM_PERSON_COLS, map_to="person"), weights="household_weight", ) p_base.rename( dict(zip(REFORM_PERSON_COLS, [i + "_base" for i in REFORM_PERSON_COLS])), axis=1, inplace=True, ) hh_base = mdf.MicroDataFrame( baseline_sim.df(BASELINE_HH_COLS + REFORM_HH_COLS, map_to="household"), weights="household_weight", ) hh_base.rename( dict(zip(REFORM_HH_COLS, [i + "_base" for i in REFORM_HH_COLS])), axis=1,