Beispiel #1
0
def test_grouping_column_not_in_df(penguins_df):
    meta = GroupedTransformer(StandardScaler(),
                              groups=["island", "unexisting_column"])

    # This should raise ValueError
    with pytest.raises(ValueError):
        meta.fit_transform(penguins_df)
Beispiel #2
0
def test_df(penguins_df):
    meta = GroupedTransformer(StandardScaler(), groups=["island", "sex"])

    transformed = meta.fit_transform(penguins_df)

    # 2 columns for grouping not in the result
    assert transformed.shape == (penguins_df.shape[0],
                                 penguins_df.shape[1] - 2)
Beispiel #3
0
def test_grouping_column_not_in_array(penguins):
    X = penguins

    meta = GroupedTransformer(StandardScaler(), groups=[0, 5])

    # This should raise ValueError
    with pytest.raises(ValueError):
        meta.fit_transform(X[:, :3])
Beispiel #4
0
def test_df_missing_group(penguins_df):
    meta = GroupedTransformer(StandardScaler(), groups=["island", "sex"])

    # Otherwise the fixture is changed
    X = penguins_df.copy()
    X.loc[0, "island"] = None

    with pytest.raises(ValueError):
        meta.fit_transform(X)
Beispiel #5
0
def test_array_with_multiple_string_cols(penguins):
    X = penguins

    meta = GroupedTransformer(StandardScaler(), groups=[0, -1])

    transformed = meta.fit_transform(X)

    # 2 columns for grouping not in the result
    assert transformed.shape == (X.shape[0], X.shape[1] - 2)
Beispiel #6
0
def test_non_transformer(dataset_with_single_grouping):
    X, y, _, _, grouper = dataset_with_single_grouping

    # This is not a transformer
    trf = LinearRegression()
    transformer = GroupedTransformer(trf, groups=grouper)

    with pytest.raises(ValueError):
        transformer.fit(X, y)
Beispiel #7
0
def test_no_grouping(penguins_df):
    penguins_numeric = penguins_df[[
        "bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g"
    ]]

    meta = GroupedTransformer(StandardScaler(), groups=None)
    nonmeta = StandardScaler()

    assert (meta.fit_transform(penguins_numeric) == nonmeta.fit_transform(
        penguins_numeric)).all()
Beispiel #8
0
def test_with_y(penguins_df):
    X = penguins_df.drop(columns=["sex"])
    y = penguins_df["sex"]

    meta = GroupedTransformer(StandardScaler(), groups="island")

    # This should work fine
    transformed = meta.fit_transform(X, y)

    # 1 column for grouping not in the result
    assert transformed.shape == (X.shape[0], X.shape[1] - 1)
Beispiel #9
0
def test_get_params():
    trf = StandardScaler(with_std=False)
    transformer = GroupedTransformer(trf, groups=0)

    assert transformer.get_params() == {
        "transformer__with_mean": True,
        "transformer__with_std": False,
        "transformer__copy": True,
        "transformer": trf,
        "groups": 0,
        "use_global_model": True,
    }
Beispiel #10
0
def test_array_with_strings():
    X = np.array([
        ("group0", 2),
        ("group0", 0),
        ("group1", 1),
        ("group1", 3),
    ],
                 dtype='object')

    trf = MinMaxScaler()
    transformer = GroupedTransformer(trf, groups=0, use_global_model=False)
    transformer.fit_transform(X)
Beispiel #11
0
def test_all_groups_scaled(dataset_with_single_grouping, scaling_range):
    X, y, groups, X_with_groups, grouper = dataset_with_single_grouping

    trf = MinMaxScaler(scaling_range)
    transformer = GroupedTransformer(trf, groups=grouper)
    transformed = transformer.fit(X_with_groups, y).transform(X_with_groups)

    df_with_groups = pd.concat(
        [pd.Series(groups.flatten(), name="G"),
         pd.DataFrame(transformed)],
        axis=1)

    assert np.allclose(df_with_groups.groupby("G").min(), scaling_range[0])
    assert np.allclose(df_with_groups.groupby("G").max(), scaling_range[1])
Beispiel #12
0
def test_exception_in_group(multiple_obs_fitter):
    X = np.array([
        [1, 2],
        [1, 0],
        [2, 1],
    ])

    # Only works on groups greater than 1, so will raise an error in group 2
    transformer = GroupedTransformer(multiple_obs_fitter,
                                     groups=0,
                                     use_global_model=False)

    with pytest.raises(ValueError) as e:
        transformer.fit(X)

        assert "group 2" in str(e)
Beispiel #13
0
def test_group_correlation_minmaxscaler(dataset_with_single_grouping,
                                        scaling_range):
    X, y, groups, X_with_groups, grouper = dataset_with_single_grouping

    trf = MinMaxScaler(scaling_range)
    transformer = GroupedTransformer(trf, groups=grouper)
    transformed = transformer.fit(X_with_groups, y).transform(X_with_groups)

    # For each column, check that all grouped correlations are 1 (because MinMaxScaler scales linear)
    for col in range(X.shape[1]):
        assert (pd.concat([
            pd.Series(groups.flatten(), name="group"),
            pd.Series(X[:, col], name="original"),
            pd.Series(transformed[:, col], name="transformed"),
        ],
                          axis=1).groupby("group").corr().pipe(np.allclose, 1))
Beispiel #14
0
def test_missing_groups_transform_global(dataset_with_single_grouping,
                                         scaling_range):
    X, y, groups, X_with_groups, grouper = dataset_with_single_grouping

    trf = MinMaxScaler(scaling_range)
    transformer = GroupedTransformer(trf, groups=grouper)
    transformer.fit(X_with_groups, y)

    # Array with 2 rows, first column a new group. Remaining top are out of range so should be the range
    X_test = np.concatenate([
        np.array([[3], [3]]),
        np.stack([X.min(axis=0), X.max(axis=0)], axis=0)
    ],
                            axis=1)

    transformed = transformer.transform(X_test)

    # Top row should all be equal to the small value of the range, bottom the other
    assert np.allclose(transformed[0, :], scaling_range[0])
    assert np.allclose(transformed[1, :], scaling_range[1])
Beispiel #15
0
def test_multiple_grouping_columns(dataset_with_multiple_grouping,
                                   scaling_range):
    X, y, groups, X_with_groups, grouper = dataset_with_multiple_grouping

    trf = MinMaxScaler(scaling_range)
    transformer = GroupedTransformer(trf, groups=grouper)
    transformed = transformer.fit(X_with_groups, y).transform(X_with_groups)

    df_with_groups = pd.concat(
        [pd.DataFrame(groups, columns=["A", "B"]),
         pd.DataFrame(transformed)],
        axis=1)

    assert np.allclose(
        df_with_groups.groupby(["A", "B"]).min(), scaling_range[0])

    # If a group has a single element, it defaults to min, so check wether all maxes are one of the bounds
    maxes = df_with_groups.groupby(["A", "B"]).max()
    assert np.all(
        np.isclose(maxes, scaling_range[1])
        | np.isclose(maxes, scaling_range[0])
        # We have at least some groups larger than 1, so there we should find the max of the range
    ) and np.any(np.isclose(maxes, scaling_range[1]))
Beispiel #16
0
def test_missing_groups_transform_noglobal(dataset_with_single_grouping,
                                           scaling_range):
    X, y, groups, X_with_groups, grouper = dataset_with_single_grouping

    trf = MinMaxScaler(scaling_range)
    transformer = GroupedTransformer(trf,
                                     groups=grouper,
                                     use_global_model=False)
    transformer.fit(X_with_groups, y)

    # Array with 2 rows, first column a new group. Remaining top are out of range so should be the range
    X_test = np.concatenate([
        np.array([[3], [3]]),
        np.stack([X.min(axis=0) - 1, X.max(axis=0) + 1], axis=0)
    ],
                            axis=1)

    with pytest.raises(ValueError):
        transformer.transform(X_test)
Beispiel #17
0
def test_estimator_checks(test_fn):
    trf = GroupedTransformer(StandardScaler(), groups=0)
    test_fn(GroupedTransformer.__name__, trf)
Beispiel #18
0
def test_set_params():
    trf = StandardScaler(with_std=False)
    transformer = GroupedTransformer(trf, groups=0)

    transformer.set_params(transformer__with_std=True)
    assert trf.with_std