def test_fail_if_missing_pid(minimal_input_data): data = minimal_input_data.drop("p_id", axis=1).copy() with pytest.raises( ValueError, match="The input data must contain the column p_id", ): compute_taxes_and_transfers(data, {}, functions=[], targets=[])
def test_fail_if_non_unique_pid(minimal_input_data): data = minimal_input_data.copy() data["p_id"] = 1 with pytest.raises( ValueError, match="The following p_ids are non-unique", ): compute_taxes_and_transfers(data, {}, functions=[], targets=[])
def test_function_without_data_dependency_is_not_mistaken_for_data(): n_individuals = 5 df = pd.DataFrame(index=np.arange(n_individuals)) def a(): return pd.Series(range(n_individuals)) def b(a): return a compute_taxes_and_transfers(df, {}, functions=[a, b], targets="b")
def test_function_without_data_dependency_is_not_mistaken_for_data( minimal_input_data): def a(): return pd.Series(range(minimal_input_data.shape[0])) def b(a): return a compute_taxes_and_transfers(minimal_input_data, {}, functions=[a, b], targets="b")
def test_missing_root_nodes_raises_error(minimal_input_data): def b(a): return a def c(b): return b with pytest.raises( ValueError, match="The following data columns are missing", ): compute_taxes_and_transfers(minimal_input_data, {}, functions=[b, c], targets="c")
def test_missing_root_nodes_raises_error(): n_individuals = 5 df = pd.DataFrame(index=np.arange(n_individuals)) def b(a): return a def c(b): return b with pytest.raises( ValueError, match="The following data columns are missing", ): compute_taxes_and_transfers(df, {}, functions=[b, c], targets="c")
def test_pension(input_data, year): column = "rente_anspr_m" year_data = input_data[input_data["jahr"] == year] df = year_data[INPUT_COLS].copy() policy_params, policy_functions = set_up_policy_environment( date=f"{year}-07-01") calc_result = compute_taxes_and_transfers( data=df, params=policy_params, functions=policy_functions, targets=column, ) assert_series_equal(calc_result[column].round(2), year_data[column])
def test_update_earning_points(input_data, year): year_data = input_data[input_data["jahr"] == year] df = year_data[INPUT_COLS].copy() policy_params, policy_functions = set_up_policy_environment( date=f"{year}-07-01") calc_result = compute_taxes_and_transfers( data=df, params=policy_params, functions=policy_functions, targets="entgeltpunkte_update", ) assert_series_equal(calc_result["entgeltpunkte_update"], year_data["EP_end"], check_names=False)
def test_synthetic(): """ Test creation of synthetic data """ # run with defaults df = create_synthetic_data() # rent must be positive assert df["kaltmiete_m_hh"].min() > 0 # heating cost must be positive assert df["heizkosten_m_hh"].min() > 0 # no NaN values assert df.notna().all().all() # correct dimensions for every household type assert len(df[df["hh_typ"] == "couple_0_children"] == 2) assert len(df[df["hh_typ"] == "single_2_children"] == 3) assert len(df[df["hh_typ"] == "couple_2_children"] == 4) # unique personal id? assert df["p_id"].is_unique doppelverdiener = create_synthetic_data(hh_typen=["couple"], n_children=[0], double_earner=True, bruttolohn_m=2000) assert (doppelverdiener["bruttolohn_m"] > 0).all() # test heterogeneity incrange = create_synthetic_data( hh_typen=["couple"], n_children=0, heterogeneous_vars={ "bruttolohn_m": list(np.arange(0, 6000, 1000)), "vermögen_hh": [10_000, 500_000, 1_000_000], }, ) # is household id unique? assert (incrange.groupby("hh_id").size() == 2).all() assert incrange.notna().all().all() # finally, run through gettsim policy_params, policy_functions = set_up_policy_environment(2020) results = compute_taxes_and_transfers(df, policy_params, policy_functions) assert len(results) == len(df)