Ejemplo n.º 1
0
def test_min_n_obs_shrinkage_too_little_obs(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df["Target"]

    too_big_n_obs = X.shape[0] + 1

    shrink_est = GroupedEstimator(
        DummyRegressor(),
        ["Planet", "Country", "City"],
        shrinkage="min_n_obs",
        use_global_model=False,
        min_n_obs=too_big_n_obs,
    )

    with pytest.raises(ValueError) as e:
        shrink_est.fit(X, y)

        assert (
            f"There is no group with size greater than or equal to {too_big_n_obs}"
            in str(e))
Ejemplo n.º 2
0
def test_global_model_shrinkage(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df['Target']

    shrink_est_without_global = GroupedEstimator(DummyRegressor(),
                                                 ["Planet", 'Country', 'City'],
                                                 shrinkage="min_n_obs",
                                                 use_global_model=False,
                                                 min_n_obs=2)

    shrink_est_with_global = GroupedEstimator(DummyRegressor(),
                                              ['Country', 'City'],
                                              value_columns=[],
                                              shrinkage="min_n_obs",
                                              use_global_model=True,
                                              min_n_obs=2)

    shrink_est_without_global.fit(X, y)
    shrink_est_with_global.fit(X, y)

    pd.testing.assert_series_equal(shrink_est_with_global.predict(X),
                                   shrink_est_without_global.predict(X))
Ejemplo n.º 3
0
def test_shrinkage_single_group(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df['Target']

    shrink_est = GroupedEstimator(DummyRegressor(),
                                  'Country',
                                  value_columns=[],
                                  shrinkage="constant",
                                  use_global_model=True,
                                  alpha=0.1)

    shrinkage_factors = np.array([0.1, 0.9])

    shrink_est.fit(X, y)

    expected_prediction = [
        np.array([means["Earth"], means["NL"]]) @ shrinkage_factors,
        np.array([means["Earth"], means["NL"]]) @ shrinkage_factors,
        np.array([means["Earth"], means["BE"]]) @ shrinkage_factors,
        np.array([means["Earth"], means["BE"]]) @ shrinkage_factors,
    ]

    assert expected_prediction == shrink_est.predict(X).tolist()
Ejemplo n.º 4
0
def test_chickweight_np_keys():
    df = load_chicken(give_pandas=True)
    mod = GroupedEstimator(estimator=LinearRegression(), groups=[1, 2])
    mod.fit(df[['time', 'chick', 'diet']].values, df['weight'].values)
    # there should still only be 50 groups on this dataset
    assert len(mod.estimators_.keys()) == 50
Ejemplo n.º 5
0
def test_chickweight_raise_error_cols_missing2():
    df = load_chicken(give_pandas=True)
    mod = GroupedEstimator(estimator=LinearRegression(), groups="diet")
    mod.fit(df[['time', 'diet']], df['weight'])
    with pytest.raises(ValueError):
        mod.predict(df[['diet', 'chick']])
Ejemplo n.º 6
0
def test_chickweight_df2_keys():
    df = load_chicken(give_pandas=True)
    mod = GroupedEstimator(estimator=LinearRegression(), groups="chick")
    mod.fit(df[['time', 'chick']], df['weight'])
    assert set(mod.estimators_.keys()) == set(range(1, 50 + 1))
Ejemplo n.º 7
0
def test_chickweight_df1_keys():
    df = load_chicken(give_pandas=True)
    mod = GroupedEstimator(estimator=LinearRegression(), groups="diet")
    mod.fit(df[['time', 'diet']], df['weight'])
    assert set(mod.estimators_.keys()) == {1, 2, 3, 4}
Ejemplo n.º 8
0
def test_chickweight_df2_keys():
    df = load_chicken(as_frame=True)
    mod = GroupedEstimator(estimator=LinearRegression(), groups="chick")
    mod.fit(df[["time", "chick"]], df["weight"])
    assert set(mod.estimators_.keys()) == set(range(1, 50 + 1))
Ejemplo n.º 9
0
def test_chickweight_df1_keys():
    df = load_chicken(as_frame=True)
    mod = GroupedEstimator(estimator=LinearRegression(), groups="diet")
    mod.fit(df[["time", "diet"]], df["weight"])
    assert set(mod.estimators_.keys()) == {1, 2, 3, 4}
Ejemplo n.º 10
0
def test_chickweight_np_keys():
    df = load_chicken(as_frame=True)
    mod = GroupedEstimator(estimator=LinearRegression(), groups=[1, 2])
    mod.fit(df[["time", "chick", "diet"]].values, df["weight"].values)
    # there should still only be 50 groups on this dataset
    assert len(mod.estimators_.keys()) == 50