コード例 #1
0
def test_fail_if_missing_pid(minimal_input_data):
    data = minimal_input_data.drop("p_id", axis=1).copy()

    with pytest.raises(
            ValueError,
            match="The input data must contain the column p_id",
    ):
        compute_taxes_and_transfers(data, {}, functions=[], targets=[])
コード例 #2
0
def test_fail_if_non_unique_pid(minimal_input_data):
    data = minimal_input_data.copy()
    data["p_id"] = 1

    with pytest.raises(
            ValueError,
            match="The following p_ids are non-unique",
    ):
        compute_taxes_and_transfers(data, {}, functions=[], targets=[])
コード例 #3
0
ファイル: test_interface.py プロジェクト: davpahl/gettsim
def test_function_without_data_dependency_is_not_mistaken_for_data():
    n_individuals = 5
    df = pd.DataFrame(index=np.arange(n_individuals))

    def a():
        return pd.Series(range(n_individuals))

    def b(a):
        return a

    compute_taxes_and_transfers(df, {}, functions=[a, b], targets="b")
コード例 #4
0
def test_function_without_data_dependency_is_not_mistaken_for_data(
        minimal_input_data):
    def a():
        return pd.Series(range(minimal_input_data.shape[0]))

    def b(a):
        return a

    compute_taxes_and_transfers(minimal_input_data, {},
                                functions=[a, b],
                                targets="b")
コード例 #5
0
def test_missing_root_nodes_raises_error(minimal_input_data):
    def b(a):
        return a

    def c(b):
        return b

    with pytest.raises(
            ValueError,
            match="The following data columns are missing",
    ):
        compute_taxes_and_transfers(minimal_input_data, {},
                                    functions=[b, c],
                                    targets="c")
コード例 #6
0
ファイル: test_interface.py プロジェクト: davpahl/gettsim
def test_missing_root_nodes_raises_error():
    n_individuals = 5
    df = pd.DataFrame(index=np.arange(n_individuals))

    def b(a):
        return a

    def c(b):
        return b

    with pytest.raises(
            ValueError,
            match="The following data columns are missing",
    ):
        compute_taxes_and_transfers(df, {}, functions=[b, c], targets="c")
コード例 #7
0
def test_pension(input_data, year):
    column = "rente_anspr_m"
    year_data = input_data[input_data["jahr"] == year]
    df = year_data[INPUT_COLS].copy()
    policy_params, policy_functions = set_up_policy_environment(
        date=f"{year}-07-01")

    calc_result = compute_taxes_and_transfers(
        data=df,
        params=policy_params,
        functions=policy_functions,
        targets=column,
    )
    assert_series_equal(calc_result[column].round(2), year_data[column])
コード例 #8
0
def test_update_earning_points(input_data, year):
    year_data = input_data[input_data["jahr"] == year]
    df = year_data[INPUT_COLS].copy()

    policy_params, policy_functions = set_up_policy_environment(
        date=f"{year}-07-01")

    calc_result = compute_taxes_and_transfers(
        data=df,
        params=policy_params,
        functions=policy_functions,
        targets="entgeltpunkte_update",
    )
    assert_series_equal(calc_result["entgeltpunkte_update"],
                        year_data["EP_end"],
                        check_names=False)
コード例 #9
0
def test_synthetic():
    """
    Test creation of synthetic data
    """
    # run with defaults
    df = create_synthetic_data()
    # rent must be positive
    assert df["kaltmiete_m_hh"].min() > 0
    # heating cost must be positive
    assert df["heizkosten_m_hh"].min() > 0
    # no NaN values
    assert df.notna().all().all()
    # correct dimensions for every household type
    assert len(df[df["hh_typ"] == "couple_0_children"] == 2)
    assert len(df[df["hh_typ"] == "single_2_children"] == 3)
    assert len(df[df["hh_typ"] == "couple_2_children"] == 4)
    # unique personal id?
    assert df["p_id"].is_unique

    doppelverdiener = create_synthetic_data(hh_typen=["couple"],
                                            n_children=[0],
                                            double_earner=True,
                                            bruttolohn_m=2000)

    assert (doppelverdiener["bruttolohn_m"] > 0).all()

    # test heterogeneity
    incrange = create_synthetic_data(
        hh_typen=["couple"],
        n_children=0,
        heterogeneous_vars={
            "bruttolohn_m": list(np.arange(0, 6000, 1000)),
            "vermögen_hh": [10_000, 500_000, 1_000_000],
        },
    )
    # is household id unique?
    assert (incrange.groupby("hh_id").size() == 2).all()

    assert incrange.notna().all().all()

    # finally, run through gettsim
    policy_params, policy_functions = set_up_policy_environment(2020)
    results = compute_taxes_and_transfers(df, policy_params, policy_functions)
    assert len(results) == len(df)