def test_global_model_shrinkage(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df["Target"]

    shrink_est_without_global = GroupedEstimator(
        DummyRegressor(),
        ["Planet", "Country", "City"],
        shrinkage="min_n_obs",
        use_global_model=False,
        min_n_obs=2,
    )

    shrink_est_with_global = GroupedEstimator(
        DummyRegressor(),
        ["Country", "City"],
        value_columns=[],
        shrinkage="min_n_obs",
        use_global_model=True,
        min_n_obs=2,
    )

    shrink_est_without_global.fit(X, y)
    shrink_est_with_global.fit(X, y)

    pd.testing.assert_series_equal(shrink_est_with_global.predict(X),
                                   shrink_est_without_global.predict(X))
def test_chickweight_raise_error_cols_missing2():
    df = load_chicken(as_frame=True)
    mod = GroupedEstimator(estimator=LinearRegression(), groups="diet")
    mod.fit(df[["time", "diet"]], df["weight"])
    with pytest.raises(ValueError) as e:
        mod.predict(df[["diet", "chick"]])
        assert "not in columns" in str(e)
Example #3
0
def test_chickweight_raise_error_cols_missing2():
    df = load_chicken(give_pandas=True)
    mod = GroupedEstimator(estimator=LinearRegression(), groups="diet")
    mod.fit(df[['time', 'diet']], df['weight'])
    with pytest.raises(ValueError) as e:
        mod.predict(df[['diet', 'chick']])
        assert "not in columns" in str(e)
Example #4
0
def test_chickweight_can_do_fallback():
    df = load_chicken(give_pandas=True)
    mod = GroupedEstimator(estimator=LinearRegression(), groups="diet")
    mod.fit(df[['time', 'diet']], df['weight'])
    assert set(mod.estimators_.keys()) == {1, 2, 3, 4}
    to_predict = pd.DataFrame({"time": [21, 21], "diet": [5, 6]})
    assert mod.predict(to_predict).shape == (2, )
    assert mod.predict(to_predict)[0] == mod.predict(to_predict)[1]
Example #5
0
def test_fallback_can_raise_error():
    df = load_chicken(give_pandas=True)
    mod = GroupedEstimator(estimator=LinearRegression(),
                           groups="diet",
                           use_fallback=False)
    mod.fit(df[['time', 'diet']], df['weight'])
    to_predict = pd.DataFrame({"time": [21, 21], "diet": [5, 6]})
    with pytest.raises(ValueError):
        mod.predict(to_predict)
Example #6
0
def test_fallback_can_raise_error():
    df = load_chicken(give_pandas=True)
    mod = GroupedEstimator(estimator=LinearRegression(),
                           groups="diet",
                           use_global_model=False,
                           shrinkage=None)
    mod.fit(df[['time', 'diet']], df['weight'])
    to_predict = pd.DataFrame({"time": [21, 21], "diet": [5, 6]})
    with pytest.raises(ValueError) as e:
        mod.predict(to_predict)
        assert "found a group" in str(e)
def test_shrinkage_single_group(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df["Target"]

    shrink_est = GroupedEstimator(
        DummyRegressor(),
        "Country",
        value_columns=[],
        shrinkage="constant",
        use_global_model=True,
        alpha=0.1,
    )

    shrinkage_factors = np.array([0.1, 0.9])

    shrink_est.fit(X, y)

    expected_prediction = [
        np.array([means["Earth"], means["NL"]]) @ shrinkage_factors,
        np.array([means["Earth"], means["NL"]]) @ shrinkage_factors,
        np.array([means["Earth"], means["BE"]]) @ shrinkage_factors,
        np.array([means["Earth"], means["BE"]]) @ shrinkage_factors,
    ]

    assert expected_prediction == shrink_est.predict(X).tolist()
def test_custom_shrinkage(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df["Target"]

    def shrinkage_func(group_sizes):
        n = len(group_sizes)
        return np.repeat(1 / n, n)

    shrink_est = GroupedEstimator(
        DummyRegressor(),
        ["Planet", "Country", "City"],
        shrinkage=shrinkage_func,
        use_global_model=False,
    )

    shrinkage_factors = np.array([1, 1, 1]) / 3

    shrink_est.fit(X, y)

    expected_prediction = [
        np.array([means["Earth"], means["NL"], means["Amsterdam"]
                  ]) @ shrinkage_factors,
        np.array([means["Earth"], means["NL"], means["Rotterdam"]
                  ]) @ shrinkage_factors,
        np.array([means["Earth"], means["BE"], means["Antwerp"]
                  ]) @ shrinkage_factors,
        np.array([means["Earth"], means["BE"], means["Brussels"]])
        @ shrinkage_factors,
    ]

    assert expected_prediction == shrink_est.predict(X).tolist()
def test_relative_shrinkage(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df["Target"]

    shrink_est = GroupedEstimator(
        DummyRegressor(),
        ["Planet", "Country", "City"],
        shrinkage="relative",
        use_global_model=False,
    )

    shrinkage_factors = np.array([4, 2, 1]) / 7

    shrink_est.fit(X, y)

    expected_prediction = [
        np.array([means["Earth"], means["NL"], means["Amsterdam"]
                  ]) @ shrinkage_factors,
        np.array([means["Earth"], means["NL"], means["Rotterdam"]
                  ]) @ shrinkage_factors,
        np.array([means["Earth"], means["BE"], means["Antwerp"]
                  ]) @ shrinkage_factors,
        np.array([means["Earth"], means["BE"], means["Brussels"]])
        @ shrinkage_factors,
    ]

    assert expected_prediction == shrink_est.predict(X).tolist()
Example #10
0
def test_predict_missing_group_column(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df['Target']

    shrink_est = GroupedEstimator(DummyRegressor(),
                                  ["Planet", 'Country', 'City'],
                                  shrinkage="constant",
                                  use_global_model=False,
                                  alpha=0.1)

    shrink_est.fit(X, y)

    with pytest.raises(ValueError) as e:
        shrink_est.predict(X.drop(columns=['Country']))
        assert "group columns" in str(e)
Example #11
0
def test_constant_shrinkage(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df['Target']

    shrink_est = GroupedEstimator(DummyRegressor(),
                                  ["Planet", 'Country', 'City'],
                                  shrinkage="constant",
                                  use_global_model=False,
                                  alpha=0.1)

    shrinkage_factors = np.array([0.01, 0.09, 0.9])

    shrink_est.fit(X, y)

    expected_prediction = [
        np.array([means["Earth"], means["NL"], means["Amsterdam"]
                  ]) @ shrinkage_factors,
        np.array([means["Earth"], means["NL"], means["Rotterdam"]
                  ]) @ shrinkage_factors,
        np.array([means["Earth"], means["BE"], means["Antwerp"]
                  ]) @ shrinkage_factors,
        np.array([means["Earth"], means["BE"], means["Brussels"]])
        @ shrinkage_factors,
    ]

    assert expected_prediction == shrink_est.predict(X).tolist()
Example #12
0
def test_predict_missing_value_column(shrinkage_data):
    df, means = shrinkage_data

    value_column = "predictor"

    X, y = df.drop(columns="Target"), df['Target']
    X = X.assign(**{value_column: np.random.normal(size=X.shape[0])})

    shrink_est = GroupedEstimator(LinearRegression(),
                                  ["Planet", 'Country', 'City'],
                                  shrinkage="constant",
                                  use_global_model=False,
                                  alpha=0.1)

    shrink_est.fit(X, y)

    with pytest.raises(ValueError) as e:
        shrink_est.predict(X.drop(columns=[value_column]))
        assert "columns to use" in str(e)
def test_unseen_groups_shrinkage(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df["Target"]

    shrink_est = GroupedEstimator(DummyRegressor(),
                                  ["Planet", "Country", "City"],
                                  shrinkage="constant",
                                  alpha=0.1)

    shrink_est.fit(X, y)

    unseen_group = pd.DataFrame({
        "Planet": ["Earth"],
        "Country": ["DE"],
        "City": ["Hamburg"]
    })

    with pytest.raises(ValueError) as e:
        shrink_est.predict(X=pd.concat([unseen_group] * 4, axis=0))
        assert "found a group" in str(e)
def test_min_n_obs_shrinkage(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df["Target"]

    shrink_est = GroupedEstimator(
        DummyRegressor(),
        ["Planet", "Country", "City"],
        shrinkage="min_n_obs",
        use_global_model=False,
        min_n_obs=2,
    )

    shrink_est.fit(X, y)

    expected_prediction = [means["NL"], means["NL"], means["BE"], means["BE"]]

    assert expected_prediction == shrink_est.predict(X).tolist()
Example #15
0
def test_chickweight_raise_error_cols_missing1():
    df = load_chicken(give_pandas=True)
    mod = GroupedEstimator(estimator=LinearRegression(), groups="diet")
    mod.fit(df[['time', 'diet']], df['weight'])
    with pytest.raises(KeyError):
        mod.predict(df[['time', 'chick']])