Esempio n. 1
0
def test_transform():
    test_df = pd.DataFrame({'A': [0., 0.], 'B': [0., 1.]})

    variance_filter = VarianceFilter()
    variance_filter.columns_to_drop = ['B']
    test_df = variance_filter.transform(test_df)

    assert test_df.equals(pd.DataFrame({'A': [0., 0.]}))
Esempio n. 2
0
def test_remove_min_variance_for_categorical():
    train_df = pd.DataFrame({'A': ['a', 'b', 'c'], 'B': ['a', 'a', 'a']})
    test_df = pd.DataFrame({'A': ['a', 'a', 'b'], 'B': ['a', 'b', 'c']})

    variance_filter = VarianceFilter(unique_cut=50)
    train_df = variance_filter.fit_transform(train_df)
    test_df = variance_filter.transform(test_df)

    # Make sure column 'B' is dropped for both train and test set
    # Also, column 'A' must not be dropped for the test set even though its
    # variance in the test set is below the threshold
    assert train_df.equals(pd.DataFrame({'A': ['a', 'b', 'c']}))
    assert test_df.equals(pd.DataFrame({'A': ['a', 'a', 'b']}))