def child_benefits_data(start, end):
    """
    Data preparation for kindergeld parameters. Returns a dataframe.
    Parameters:
    start (Int): Defines the start of the simulated period
    end (Int):  Defines the end of the simulated period
    """

    # Calculate simulation period
    years = range(start, end + 1)

    # Data preparation for Kindergeld params
    kindergeld_df = pd.DataFrame()

    for i in years:
        policy_params, policy_functions = set_up_policy_environment(i)
        kindergeld_df[i] = policy_params["kindergeld"]["kindergeld"].values()

    kindergeld_df = kindergeld_df.transpose()

    kindergeld_labels = [
        "First child", "Second child", "Third child", "Fourth child"
    ]
    kindergeld_df.columns = kindergeld_labels

    return kindergeld_df
def social_assistance_data(start, end):
    """
    For a year range returns the policy parameters to plot the social security
    contributions

    start (Int):
        Defines the start of the simulated period
    end (Int):
        Defines the end of the simulated period

    returns:
        soz_ass_out: pd.DataFrame
    """

    years = range(start, end + 1)

    soz_ass_dict = {}

    for i in years:
        policy_params, policy_functions = set_up_policy_environment(i)
        if i <= 2010:
            anteil_regelsatz = policy_params["arbeitsl_geld_2"][
                "anteil_regelsatz"]
            anteil_regelsatz["ein_erwachsener"] = 1
            regelsätze = (np.array(list(anteil_regelsatz.values())) *
                          policy_params["arbeitsl_geld_2"]["regelsatz"])
            soz_ass_dict[i] = dict(zip(anteil_regelsatz.keys(), regelsätze))
        else:
            soz_ass_dict[i] = dict(
                zip(
                    [
                        "ein_erwachsener",
                        "zwei_erwachsene",
                        "weitere_erwachsene",
                        "kinder_14_24",
                        "kinder_7_13",
                        "kinder_0_6",
                    ],
                    policy_params["arbeitsl_geld_2"]["regelsatz"].values(),
                ))

    soz_ass_df = pd.DataFrame.from_dict(soz_ass_dict, orient="index")
    soz_ass_out = soz_ass_df[[
        "ein_erwachsener",
        "zwei_erwachsene",
        "weitere_erwachsene",
        "kinder_14_24",
        "kinder_7_13",
        "kinder_0_6",
    ]]
    return soz_ass_out
Beispiel #3
0
def test_pension(input_data, year):
    column = "rente_anspr_m"
    year_data = input_data[input_data["jahr"] == year]
    df = year_data[INPUT_COLS].copy()
    policy_params, policy_functions = set_up_policy_environment(
        date=f"{year}-07-01")

    calc_result = compute_taxes_and_transfers(
        data=df,
        params=policy_params,
        functions=policy_functions,
        targets=column,
    )
    assert_series_equal(calc_result[column].round(2), year_data[column])
Beispiel #4
0
def test_update_earning_points(input_data, year):
    year_data = input_data[input_data["jahr"] == year]
    df = year_data[INPUT_COLS].copy()

    policy_params, policy_functions = set_up_policy_environment(
        date=f"{year}-07-01")

    calc_result = compute_taxes_and_transfers(
        data=df,
        params=policy_params,
        functions=policy_functions,
        targets="entgeltpunkte_update",
    )
    assert_series_equal(calc_result["entgeltpunkte_update"],
                        year_data["EP_end"],
                        check_names=False)
def test_synthetic():
    """
    Test creation of synthetic data
    """
    # run with defaults
    df = create_synthetic_data()
    # rent must be positive
    assert df["kaltmiete_m_hh"].min() > 0
    # heating cost must be positive
    assert df["heizkosten_m_hh"].min() > 0
    # no NaN values
    assert df.notna().all().all()
    # correct dimensions for every household type
    assert len(df[df["hh_typ"] == "couple_0_children"] == 2)
    assert len(df[df["hh_typ"] == "single_2_children"] == 3)
    assert len(df[df["hh_typ"] == "couple_2_children"] == 4)
    # unique personal id?
    assert df["p_id"].is_unique

    doppelverdiener = create_synthetic_data(hh_typen=["couple"],
                                            n_children=[0],
                                            double_earner=True,
                                            bruttolohn_m=2000)

    assert (doppelverdiener["bruttolohn_m"] > 0).all()

    # test heterogeneity
    incrange = create_synthetic_data(
        hh_typen=["couple"],
        n_children=0,
        heterogeneous_vars={
            "bruttolohn_m": list(np.arange(0, 6000, 1000)),
            "vermögen_hh": [10_000, 500_000, 1_000_000],
        },
    )
    # is household id unique?
    assert (incrange.groupby("hh_id").size() == 2).all()

    assert incrange.notna().all().all()

    # finally, run through gettsim
    policy_params, policy_functions = set_up_policy_environment(2020)
    results = compute_taxes_and_transfers(df, policy_params, policy_functions)
    assert len(results) == len(df)
def social_security_data(start, end):
    """
    For a year range returns the policy parameters to plot the social security
    contributions

    start (Int): Defines the start of the simulated period
    end (Int):  Defines the end of the simulated period

    returns dataframe
    """
    years = range(start, end + 1)

    soz_vers_dict = {}

    for i in years:
        policy_params, policy_functions = set_up_policy_environment(i)
        soz_vers_dict[i] = policy_params["soz_vers_beitr"]["soz_vers_beitr"]

    soz_vers_df = pd.DataFrame(data=soz_vers_dict).transpose()
    # Dictionary entries into columns
    ges_krankenv = soz_vers_df["ges_krankenv"].apply(pd.Series)
    pflegev = soz_vers_df["pflegev"].apply(pd.Series)
    #
    soz_vers_out = pd.concat(
        [soz_vers_df[["arbeitsl_v", "rentenv"]], ges_krankenv, pflegev],
        axis=1)

    soz_vers_out.columns = [
        "unemployment insurance",
        "pension insurance",
        "health insurance employer",
        "health insurance employee",
        "care insurance",
        "additional care insurance no child",
    ]
    # We don't need the top-up for childless persons
    soz_vers_out = soz_vers_out.drop(
        columns=["additional care insurance no child"])

    return soz_vers_out
def tax_rate_data(start, end):
    """
    For a given year span returns the policy parameters to plot income tax
    rate per income

    sel_year (Int): The year for which the data will be simulated. The range for
                    which parameters can be simulated is 2002-2020.

    returns dict
    """
    years = range(start, end + 1)
    einkommen = pd.Series(data=np.linspace(0, 300000, 601))
    tax_rate_dict_full = {}
    for i in years:
        policy_params, policy_functions = set_up_policy_environment(i)
        eink_params = policy_params["eink_st"]
        soli_params = policy_params["soli_st"]["soli_st"]

        eink_tax = st_tarif(einkommen, eink_params)
        soli = piecewise_polynomial(
            eink_tax,
            thresholds=soli_params["thresholds"],
            rates=soli_params["rates"],
            intercepts_at_lower_thresholds=soli_params[
                "intercepts_at_lower_thresholds"],
        )
        marginal_rate = np.gradient(eink_tax, einkommen)
        overall_marginal_rate = np.gradient(eink_tax + soli, einkommen)
        tax_rate_dict_full[i] = {
            "tax_rate": (eink_tax / einkommen),
            "overall_tax_rate": ((soli + eink_tax) / einkommen),
            "marginal_rate": pd.Series(marginal_rate),
            "overall_marginal_rate": pd.Series(overall_marginal_rate),
            "income": einkommen,
        }

    return tax_rate_dict_full
def heatmap_data():
    LI = pd.Series(data=np.linspace(0, 310000, 250))  # Labor Income
    CI = pd.Series(data=np.linspace(0, 100000, 250))  # Capital Income

    # Get relevant policy params from GETTSIM
    policy_params, policy_functions = set_up_policy_environment(2020)
    CD = policy_params["eink_st_abzuege"]["sparerpauschbetrag"]
    CTau = policy_params["abgelt_st"][
        "abgelt_st_satz"]  # Capital income tax rate

    TCI = CI - CD  # taxable capital income
    TCI[TCI < 0] = 0  # replace negative taxable income
    CT = TCI * CTau  # Capital income tax

    heatmap_df = pd.DataFrame(columns=LI)

    # Iterate through LI and CI combinations for separate taxes
    for i in range(len(LI)):
        this_column = heatmap_df.columns[i]
        e = pd.Series(data=[LI[i]] * len(LI))
        c = e + CI
        heatmap_df[this_column] = (st_tarif(c, policy_params["eink_st"])) - (
            st_tarif(e, policy_params["eink_st"]) + CT)

    heatmap_df.index = CI

    heatmap_source = pd.DataFrame(heatmap_df.stack(),
                                  columns=["Change to tax burden"
                                           ]).reset_index()
    heatmap_source.columns = [
        "Capital income",
        "Labor income",
        "Change to tax burden",
    ]

    # Data to show where average household per decile is located in heatmap
    deciles = ["", "", "", "", "", "", "", "", "", "P90", "P95", "P99", "P100"]
    capital_income_tax = pd.Series(
        data=[0, 0, 0, 0, 0, 4, 15, 36, 52, 84, 167, 559,
              13873])  # from Bach & Buslei 2017 table 3-2
    capital_income = capital_income_tax / 0.26375
    total_income = pd.Series(data=[
        0,
        -868,
        4569,
        9698,
        14050,
        18760,
        23846,
        29577,
        36769,
        47676,
        63486,
        95899,
        350423,
    ])  # from Bach & Buslei 2017 table 3-2 "Äquivalenzgewichtetes Einkommen"
    labor_income = total_income - capital_income

    household_dict = {
        "deciles": deciles,
        "capital_income": capital_income,
        "labor_income": labor_income,
    }

    return {
        "heatmap_source": heatmap_source,
        "household_dict": household_dict,
    }
def individiual_view_data():

    LI = pd.Series(data=range(0, 250001, 500))  # Labor Income
    CI = pd.Series(data=range(0, 250001, 500))  # Capital Income
    # np.linspace(-1, 300001, 300001)
    LD = 0.2 * LI  # Assumption
    TTI = LI + CI  # Total Income
    TD = 0.2 * TTI  # Assumption
    TI = TTI - TD  # taxable income

    # Calculate variables separated taxes
    TLI = LI - LD  # taxable labor income

    # Get relevant policy params from GETTSIM
    policy_params, policy_functions = set_up_policy_environment(2020)

    Tau_flat = (
        (st_tarif(TLI, policy_params["eink_st"]) / TLI).fillna(0).round(2)
    )  # Income tax rate - flat

    Tau_integrated = (
        (st_tarif(TI, policy_params["eink_st"]) / TI).fillna(0).round(2)
    )  # Income tax rate - integrated

    CD = pd.Series(
        data=[policy_params["eink_st_abzuege"]["sparerpauschbetrag"]] *
        len(LI))  # Capital income deductions

    CTau = policy_params["abgelt_st"]["abgelt_st_satz"]  # Capital tax rate

    TCI = CI - CD  # taxable capital income
    TCI[TCI < 0] = 0  # replace negative taxable income
    # Calculate variables integrated taxes

    T = (TI * Tau_integrated).round(2)  # Total tax

    # taxable capital income
    LT = (TLI * Tau_flat).round(2)  # Labor income tax
    CT = TCI * CTau  # Capital income tax

    # Net incomes
    NCI = TCI - CT  # Capital
    NLI = (TLI - LT).round(2)  # Labor
    NI = (TI - T).round(2)  # Total

    # blank placeholder
    B = [0] * len(LI)

    data_full = {
        "x_range": [
            "Gross income (S)",
            "Taxable income (S)",
            "Net income (S)",
            "Gross income (R)",
            "Taxable income (R)",
            "Net income (R)",
        ],
        "CI": [CI, B, B, CI, B, B],
        "LI": [LI, B, B, LI, B, B],
        "TI": [B, B, B, B, TI, B],
        "NI": [B, B, B, B, B, NI],
        "T": [B, B, B, B, B, T],
        "CD": [B, CD, CD, B, B, B],
        "LD": [B, LD, B, B, B, B],
        "TCI": [B, TCI, B, B, B, B],
        "TLI": [B, TLI, B, B, B, B],
        "CT": [B, B, CT, B, B, B],
        "LT": [B, B, LT, B, B, B],
        "NCI": [B, B, NCI, B, B, B],
        "NLI": [B, B, NLI, B, B, B],
        "TD": [B, B, B, B, TD, B],
        "LI_list": ["LI", "TLI", "LT", "NLI", "LD"],
        "CI_list": ["CI", "CD", "TCI", "CT", "NCI"],
        "Total_list": ["TI", "NI", "T", "TD"],
        "Final_order": [
            "CI",
            "LI",
            "CD",
            "TCI",
            "TLI",
            "TI",
            "LD",
            "TD",
            "CT",
            "NCI",
            "NLI",
            "LT",
            "NI",
            "T",
        ],
    }

    return data_full
def prepare_wg_data(sel_year, hh_size):
    """
    For a given year and household_size this function creates the
    simulation dataframe later used for plotting.
    Parameters:
    sel_year: Int
        The year for which the wohngeld will be simulated

    hh_size: Int
        The size of the houshold for which wohngeld will be simulated.
        Values between 1 and 13. More than 12 just adds a lump-sum on top

    Returns dataframe.
    """
    # Retrieve policy parameters for the selected year
    policy_params, policy_functions = set_up_policy_environment(sel_year)
    params = policy_params["wohngeld"]

    # Range of relevant income and rent combinations for the simulation
    einkommen = pd.Series(data=np.linspace(0, 4000, 81))
    miete = pd.Series(data=np.linspace(0, 2000, 81))
    household_size = pd.Series(data=[hh_size] * len(einkommen))

    # Miete needs to be corrected acc. to mietstufe and hh size
    if sel_year <= 2008:
        wohngeld_miete = wohngeld_miete_bis_2008(
            pd.Series([3] * len(miete)),
            pd.Series([1980] * len(miete)),
            household_size,
            pd.Series(range(len(miete))),
            miete,
            pd.Series([1] * len(miete)),
            wohngeld_min_miete(household_size, params),
            params,
        )
    if 2009 <= sel_year <= 2020:
        wohngeld_miete = wohngeld_miete_ab_2009(
            pd.Series([3] * len(miete)),
            household_size,
            pd.Series(range(len(miete))),
            miete,
            pd.Series([1] * len(miete)),
            wohngeld_min_miete(household_size, params),
            params,
        )
    if sel_year >= 2021:
        wohngeld_miete = wohngeld_miete_ab_2021(
            pd.Series([3] * len(miete)),
            household_size,
            pd.Series(range(len(miete))),
            miete,
            pd.Series([1] * len(miete)),
            wohngeld_min_miete(household_size, params),
            params,
        )

    # Create a dataframe for the simulated data
    wohngeld_df = pd.DataFrame(columns=einkommen)

    # To-do think about household["Mietstufe"]

    # Iterate through einkommen for all einkommen and miete combinations
    for i in range(len(einkommen)):
        this_column = wohngeld_df.columns[i]
        e = pd.Series(data=[einkommen[i]] * len(einkommen))
        wohngeld_df[this_column] = wohngeld_basis(
            haushaltsgröße=household_size,
            # Account for minimum income
            wohngeld_eink=np.maximum(e, params["min_eink"][hh_size]),
            wohngeld_miete=wohngeld_miete,
            wohngeld_params=params,
        )
    wohngeld_df.index = miete

    return wohngeld_df
def deduction_data(start, end):
    """
    Data preparation for income tax deduction parameters. Return a dataframe.

    Parameters:
    start (Int): Defines the start of the simulated period
    end (Int):  Defines the end of the simulated period
    """

    # Period for simulation:
    years = range(start, end + 1)
    eink_ab_df = pd.DataFrame()
    # input older grundfreibetrag values by hand
    grundfreibetrag = {
        2001: 14093 / 1.95583,
        2000: 13499 / 1.95583,
        1999: 13067 / 1.95583,
        1998: 12365 / 1.95583,
        1997: 12095 / 1.95583,
        1996: 12095 / 1.95583,
        1995: 5616 / 1.95583,
        1994: 5616 / 1.95583,
        1993: 5616 / 1.95583,
        1992: 5616 / 1.95583,
        1991: 5616 / 1.95583,
        1990: 5616 / 1.95583,
        1989: 4752 / 1.95583,
        1988: 4752 / 1.95583,
        1987: 4536 / 1.95583,
        1986: 4536 / 1.95583,
        1985: 4212 / 1.95583,
        1984: 4212 / 1.95583,
        1983: 4212 / 1.95583,
        1982: 4212 / 1.95583,
        1981: 4212 / 1.95583,
        1980: 3690 / 1.95583,
        1979: 3690 / 1.95583,
        1978: 3329 / 1.95583,
        1977: 3029 / 1.95583,
        1976: 3029 / 1.95583,
        1975: 3029 / 1.95583,
    }
    # Loop through years to get the policy parameters
    for i in years:
        policy_params, policy_functions = set_up_policy_environment(i)
        params = policy_params["eink_st_abzuege"]
        if i < 2002:
            params["grundfreibetrag"] = round(grundfreibetrag[i])
        if i >= 2002:
            params["grundfreibetrag"] = policy_params["eink_st"][
                "eink_st_tarif"]["thresholds"][1]
        eink_ab_df[i] = params.values()

    eink_ab_df.index = params.keys()
    deduction_df = eink_ab_df.transpose()
    # Adjust dictionary entries into columns for kinderfreibetrag
    deduction_df = pd.concat(
        [
            deduction_df.drop(["kinderfreibetrag", "datum"], axis=1),
            deduction_df["kinderfreibetrag"].apply(pd.Series),
        ],
        axis=1,
    )
    deduction_df = deduction_df.drop(["behinderten_pauschbetrag", 0], axis=1)

    return deduction_df