def test_subset_transformer_mutate(sample_data): """After modifying one column, that column is different and the complement is the same""" # noqa X_tr, X_te = sample_data input = 'size' t = ballet.eng.SubsetTransformer(input, lambda x: x + 1) result_tr = t.fit_transform(X_tr) result_te = t.transform(X_te) # the input col is modified assert_series_not_equal(result_tr[input], X_tr[input]) assert_series_not_equal(result_te[input], X_te[input]) # the complement is passed through unchanged complement = [col for col in X_tr.columns if col != input] assert_frame_equal(result_tr[complement], X_tr[complement]) assert_frame_equal(result_te[complement], X_te[complement])
def test_conditional_transformer_both_satisfied(sample_data): X_tr, X_te = sample_data t = ballet.eng.ConditionalTransformer( lambda ser: ser.sum() > 0, lambda ser: ser + 1, ) # all the features are selected by sum > 0 t.fit(X_tr) result_tr = t.transform(X_tr) for col in ['value', 'size']: assert_series_not_equal(result_tr[col], X_tr[col]) result_te = t.transform(X_te) for col in ['value', 'size']: assert_series_not_equal(result_te[col], X_te[col])
def test_conditional_transformer_one_satisfied(sample_data): X_tr, X_te = sample_data t = ballet.eng.ConditionalTransformer( lambda ser: (ser.dropna() >= 3).all(), lambda ser: ser.fillna(0) + 1, ) t.fit(X_tr) result_tr = t.transform(X_tr) result_te = t.transform(X_te) # only 'size' is selected by the condition assert_series_not_equal(result_tr['size'], X_tr['size']) assert_series_not_equal(result_te['size'], X_te['size']) # 'value' is not selected by the condition, has items less than 3 assert_series_equal(result_tr['value'], X_tr['value']) assert_series_equal(result_te['value'], X_te['value'])
def test_conditional_transformer_unsatisfy_transform(sample_data): X_tr, X_te = sample_data t = ballet.eng.ConditionalTransformer( lambda ser: (ser.dropna() >= 3).all(), lambda ser: ser, lambda ser: ser.fillna(0) - 1, ) t.fit(X_tr) result_tr = t.transform(X_tr) result_te = t.transform(X_te) # size is transformed by satisfy condition, but passed through assert_series_equal(result_tr['size'], X_tr['size']) assert_series_equal(result_te['size'], X_te['size']) # value is transformed by unsatisfy condition and is not equal assert_series_not_equal(result_tr['value'], X_tr['value']) assert_series_not_equal(result_te['value'], X_te['value'])
def test_assert_series_not_equal(): a = pd.Series(np.arange(21)) b = a.copy() with pytest.raises(AssertionError): assert_series_not_equal(a, b) c = a + 1 assert_series_not_equal(a, c) d = pd.Series(np.arange(17)) assert_series_not_equal(a, d) e = pdt.makeDataFrame() assert_series_not_equal(a, e)