예제 #1
0
def test_non_transformer(dataset_with_single_grouping):
    X, y, _, _, grouper = dataset_with_single_grouping

    # This is not a transformer
    trf = LinearRegression()
    transformer = GroupedTransformer(trf, groups=grouper)

    with pytest.raises(ValueError):
        transformer.fit(X, y)
예제 #2
0
def test_exception_in_group(multiple_obs_fitter):
    X = np.array([
        [1, 2],
        [1, 0],
        [2, 1],
    ])

    # Only works on groups greater than 1, so will raise an error in group 2
    transformer = GroupedTransformer(multiple_obs_fitter,
                                     groups=0,
                                     use_global_model=False)

    with pytest.raises(ValueError) as e:
        transformer.fit(X)

        assert "group 2" in str(e)
예제 #3
0
def test_missing_groups_transform_noglobal(dataset_with_single_grouping,
                                           scaling_range):
    X, y, groups, X_with_groups, grouper = dataset_with_single_grouping

    trf = MinMaxScaler(scaling_range)
    transformer = GroupedTransformer(trf,
                                     groups=grouper,
                                     use_global_model=False)
    transformer.fit(X_with_groups, y)

    # Array with 2 rows, first column a new group. Remaining top are out of range so should be the range
    X_test = np.concatenate([
        np.array([[3], [3]]),
        np.stack([X.min(axis=0) - 1, X.max(axis=0) + 1], axis=0)
    ],
                            axis=1)

    with pytest.raises(ValueError):
        transformer.transform(X_test)
예제 #4
0
def test_missing_groups_transform_global(dataset_with_single_grouping,
                                         scaling_range):
    X, y, groups, X_with_groups, grouper = dataset_with_single_grouping

    trf = MinMaxScaler(scaling_range)
    transformer = GroupedTransformer(trf, groups=grouper)
    transformer.fit(X_with_groups, y)

    # Array with 2 rows, first column a new group. Remaining top are out of range so should be the range
    X_test = np.concatenate([
        np.array([[3], [3]]),
        np.stack([X.min(axis=0), X.max(axis=0)], axis=0)
    ],
                            axis=1)

    transformed = transformer.transform(X_test)

    # Top row should all be equal to the small value of the range, bottom the other
    assert np.allclose(transformed[0, :], scaling_range[0])
    assert np.allclose(transformed[1, :], scaling_range[1])
예제 #5
0
def test_all_groups_scaled(dataset_with_single_grouping, scaling_range):
    X, y, groups, X_with_groups, grouper = dataset_with_single_grouping

    trf = MinMaxScaler(scaling_range)
    transformer = GroupedTransformer(trf, groups=grouper)
    transformed = transformer.fit(X_with_groups, y).transform(X_with_groups)

    df_with_groups = pd.concat(
        [pd.Series(groups.flatten(), name="G"),
         pd.DataFrame(transformed)],
        axis=1)

    assert np.allclose(df_with_groups.groupby("G").min(), scaling_range[0])
    assert np.allclose(df_with_groups.groupby("G").max(), scaling_range[1])
예제 #6
0
def test_group_correlation_minmaxscaler(dataset_with_single_grouping,
                                        scaling_range):
    X, y, groups, X_with_groups, grouper = dataset_with_single_grouping

    trf = MinMaxScaler(scaling_range)
    transformer = GroupedTransformer(trf, groups=grouper)
    transformed = transformer.fit(X_with_groups, y).transform(X_with_groups)

    # For each column, check that all grouped correlations are 1 (because MinMaxScaler scales linear)
    for col in range(X.shape[1]):
        assert (pd.concat([
            pd.Series(groups.flatten(), name="group"),
            pd.Series(X[:, col], name="original"),
            pd.Series(transformed[:, col], name="transformed"),
        ],
                          axis=1).groupby("group").corr().pipe(np.allclose, 1))
예제 #7
0
def test_multiple_grouping_columns(dataset_with_multiple_grouping,
                                   scaling_range):
    X, y, groups, X_with_groups, grouper = dataset_with_multiple_grouping

    trf = MinMaxScaler(scaling_range)
    transformer = GroupedTransformer(trf, groups=grouper)
    transformed = transformer.fit(X_with_groups, y).transform(X_with_groups)

    df_with_groups = pd.concat(
        [pd.DataFrame(groups, columns=["A", "B"]),
         pd.DataFrame(transformed)],
        axis=1)

    assert np.allclose(
        df_with_groups.groupby(["A", "B"]).min(), scaling_range[0])

    # If a group has a single element, it defaults to min, so check wether all maxes are one of the bounds
    maxes = df_with_groups.groupby(["A", "B"]).max()
    assert np.all(
        np.isclose(maxes, scaling_range[1])
        | np.isclose(maxes, scaling_range[0])
        # We have at least some groups larger than 1, so there we should find the max of the range
    ) and np.any(np.isclose(maxes, scaling_range[1]))