def test_transform(): test_df = pd.DataFrame({'A': [0., 0.], 'B': [0., 1.]}) variance_filter = VarianceFilter() variance_filter.columns_to_drop = ['B'] test_df = variance_filter.transform(test_df) assert test_df.equals(pd.DataFrame({'A': [0., 0.]}))
def test_remove_min_variance_for_categorical(): train_df = pd.DataFrame({'A': ['a', 'b', 'c'], 'B': ['a', 'a', 'a']}) test_df = pd.DataFrame({'A': ['a', 'a', 'b'], 'B': ['a', 'b', 'c']}) variance_filter = VarianceFilter(unique_cut=50) train_df = variance_filter.fit_transform(train_df) test_df = variance_filter.transform(test_df) # Make sure column 'B' is dropped for both train and test set # Also, column 'A' must not be dropped for the test set even though its # variance in the test set is below the threshold assert train_df.equals(pd.DataFrame({'A': ['a', 'b', 'c']})) assert test_df.equals(pd.DataFrame({'A': ['a', 'a', 'b']}))