예제 #1
0
def test_keep():
    # featsel with df with column names
    X, target = get_random_data()
    fsel = FeatureSelector(verbose=0, keep=[2, "x5"])
    new_X = fsel.fit_transform(pd.DataFrame(X, columns=[1, 2, "3", "x4", "x5", "eng6", "eng7"]), target)
    assert isinstance(new_X, pd.DataFrame)
    assert set(new_X.columns) == set(["1", "eng6", "eng7", "2", "x5"]), "Wrong features selected (%r)" % new_X.columns
예제 #2
0
def test_regular_df_X_y():
    # featsel with df without column names
    X, target = get_random_data()
    fsel = FeatureSelector(verbose=0)
    new_X = fsel.fit_transform(pd.DataFrame(X), pd.DataFrame(target))
    assert isinstance(new_X, pd.DataFrame)
    assert set(new_X.columns) == set([0, 5, 6]), "Wrong features selected (%r)" % new_X.columns
예제 #3
0
def test_regular_X_y():
    # featsel with numpy arrays
    X, target = get_random_data()
    fsel = FeatureSelector(verbose=0)
    new_X = fsel.fit_transform(X, target)
    assert isinstance(new_X, np.ndarray)
    assert new_X.shape[1] == 3, "Wrong number of features selected"
예제 #4
0
def test_nans():
    # featsel with df without column names
    X, target = get_random_data()
    X[998, 0] = np.nan
    X[999, 1] = np.nan
    fsel = FeatureSelector(verbose=0)
    try:
        _ = fsel.fit(pd.DataFrame(X), target)
    except ValueError:
        pass
    else:
        raise AssertionError("fit with NaNs should throw an error")
    _ = fsel.fit_transform(pd.DataFrame(X[:900]), target[:900])
    df = fsel.transform(pd.DataFrame(X))
    assert pd.isna(df[0].iloc[998]), "The first feature should be NaN"
    assert np.sum(pd.isna(df).to_numpy(dtype=int)) == 1, "only 1 place should be NaN"
    assert set(df.columns) == set([0, 5, 6]), "Wrong features selected (%r)" % df.columns
예제 #5
0
def test_few_runs():
    # featsel with numpy arrays
    X, target = get_random_data()
    fsel = FeatureSelector(verbose=0, featsel_runs=0)
    new_X = fsel.fit_transform(X, target)
    assert new_X.shape[1] == 3, "Wrong number of features selected"
    fsel = FeatureSelector(verbose=0, featsel_runs=1)
    new_X = fsel.fit_transform(X, target)
    assert new_X.shape[1] == 3, "Wrong number of features selected"
예제 #6
0
    except ValueError:
        pass
    else:
        raise AssertionError("fit with NaNs should throw an error")
    _ = fsel.fit_transform(pd.DataFrame(X[:900]), target[:900])
    df = fsel.transform(pd.DataFrame(X))
    assert pd.isna(df[0].iloc[998]), "The first feature should be NaN"
    assert np.sum(pd.isna(df).to_numpy(dtype=int)) == 1, "only 1 place should be NaN"
    assert set(df.columns) == set([0, 5, 6]), "Wrong features selected (%r)" % df.columns


if __name__ == '__main__':
    print("## Running sklearn tests")
    # we allow for nan in transform
    successful_tests = set(["check_estimators_nan_inf"])
    for estimator, check in check_estimator(FeatureSelector(featsel_runs=1), generate_only=True):
        if check.func.__name__ not in successful_tests:
            print(check.func.__name__)
            successful_tests.add(check.func.__name__)
            check(estimator)
    # additionally check the class, but don't run all the other tests
    for estimator, check in check_estimator(FeatureSelector(), generate_only=True):
        if check.func.__name__ not in successful_tests:
            print(check.func.__name__)
            successful_tests.add(check.func.__name__)
            check(estimator)

    print("## Running custom tests")
    test_regular_X_y()
    test_regular_df_X_y()
    test_df_X_y()
예제 #7
0
    else:
        raise AssertionError("fit with NaNs should throw an error")
    _ = fsel.fit_transform(pd.DataFrame(X[:900]), target[:900])
    df = fsel.transform(pd.DataFrame(X))
    assert pd.isna(df[0].iloc[998]), "The first feature should be NaN"
    assert np.sum(
        pd.isna(df).to_numpy(dtype=int)) == 1, "only 1 place should be NaN"
    assert set(df.columns) == set(
        [0, 5, 6]), "Wrong features selected (%r)" % df.columns


if __name__ == '__main__':
    print("## Running sklearn tests")
    # we allow for nan in transform
    successful_tests = set(["check_estimators_nan_inf"])
    for estimator, check in check_estimator(FeatureSelector(featsel_runs=1),
                                            generate_only=True):
        if check.func.__name__ not in successful_tests:
            print(check.func.__name__)
            successful_tests.add(check.func.__name__)
            check(estimator)
    # additionally check the class, but don't run all the other tests
    for estimator, check in check_estimator(FeatureSelector,
                                            generate_only=True):
        if check.func.__name__ not in successful_tests:
            print(check.func.__name__)
            successful_tests.add(check.func.__name__)
            check(estimator)

    print("## Running custom tests")
    test_regular_X_y()