Exemple #1
0
def test_grouped_function_transformer():
    df = pd.DataFrame(
        data={
            'country': ['USA', 'USA', 'USA', 'Canada', 'Fiji'],
            'year': [2001, 2002, 2003, 2001, 2001],
            'length': [1, 2, 3, 4, 5],
            'width': [1.0, 1.0, 7.5, 9.0, 11.0],
        }).set_index(['country', 'year']).sort_index()

    # with groupby kwargs, produces a df
    func = np.sum
    trans = ballet.eng.GroupedFunctionTransformer(
        func, groupby_kwargs={'level': 'country'})
    trans.fit(df)
    result = trans.transform(df)
    expected_result = df.groupby(level='country').apply(func)
    assert_frame_equal(result, expected_result)

    # without groupby kwargs, produces a series
    func = np.min
    trans = ballet.eng.GroupedFunctionTransformer(func)
    trans.fit(df)
    result = trans.transform(df)
    expected_result = df.pipe(func)
    assert_series_equal(result, expected_result)
Exemple #2
0
def test_groupwise_transformer_ignore_on_transform_error(sample_data):
    X_tr, X_te = sample_data

    exc = Exception

    class TransformErrorTransformer(ballet.eng.BaseTransformer):
        def transform(self, X, **transform_kwargs):
            raise exc

    groupwise_transformer = ballet.eng.GroupwiseTransformer(
        TransformErrorTransformer(),
        groupby_kwargs={'level': 'name'},
        handle_error='ignore',
    )

    groupwise_transformer.fit(X_tr)

    result_tr = groupwise_transformer.transform(X_tr)
    expected_tr = X_tr

    assert_frame_equal(result_tr, expected_tr)

    result_te = groupwise_transformer.transform(X_te)
    expected_te = X_te
    assert_frame_equal(result_te, expected_te)
Exemple #3
0
def test_groupwise_transformer_ignore_on_new_group(sample_data,
                                                   individual_transformer,
                                                   groupby_kwargs):

    X_tr, X_te = sample_data

    groupwise_transformer = ballet.eng.GroupwiseTransformer(
        individual_transformer,
        groupby_kwargs=groupby_kwargs,
        handle_unknown='ignore',
    )

    groupwise_transformer.fit(X_tr)

    X_te = X_te.copy().reset_index()
    X_te.loc[0, 'name'] = 'Z'  # new group
    X_te = X_te.set_index(['name', 'year'])

    result = groupwise_transformer.transform(X_te)

    # the first group, Z, is new, and values are passed through, so such
    # be nan
    expected = X_te.copy()
    expected['value'] = np.array([np.nan, 1.5, 5.0])
    expected['size'] = np.array([4.0, 1.0, 4.0])

    assert_frame_equal(result, expected)
Exemple #4
0
def test_single_lagger():
    # simple test
    data = pd.util.testing.makeTimeSeries()

    trans = ballet.eng.ts.SingleLagger(1)
    result = trans.fit_transform(data)
    expected_result = data.shift(1)

    assert_series_equal(result, expected_result)

    data = pd.DataFrame(
        data={
            'city': ['LA', 'LA', 'LA', 'NYC', 'BOS', 'BOS', 'BOS'],
            'year': [2001, 2002, 2003, 2002, 2003, 2004, 2005],
            'width': [1, 2, 3, 4, 5, 6, 7],
        }).set_index(['city', 'year']).sort_index()
    trans = ballet.eng.ts.SingleLagger(1, groupby_kwargs={'level': 'city'})
    result = trans.fit_transform(data)
    expected_result = pd.DataFrame(
        data={
            'city': ['LA', 'LA', 'LA', 'NYC', 'BOS', 'BOS', 'BOS'],
            'year': [2001, 2002, 2003, 2002, 2003, 2004, 2005],
            'width': [np.nan, 1, 2, np.nan, np.nan, 5, 6],
        }).set_index(['city', 'year']).sort_index()

    assert_frame_equal(result, expected_result)
Exemple #5
0
def test_subset_transformer_identity(sample_data):
    """After passing through a column unchanged, the entire df is the same as before"""  # noqa
    X_tr, X_te = sample_data

    t = ballet.eng.SubsetTransformer('value', None)
    result_tr = t.fit_transform(X_tr)
    result_te = t.transform(X_te)

    assert_frame_equal(result_tr, X_tr)
    assert_frame_equal(result_te, X_te)
Exemple #6
0
def test_groupwise_transformer_can_transform(sample_data,
                                             groupwise_transformer):
    X_tr, X_te = sample_data
    groupwise_transformer.fit(X_tr)

    result_tr = groupwise_transformer.transform(X_tr)
    expected_tr = X_tr.copy()
    expected_tr['value'] = np.array([1, 2, 1.5, 4, 4, 5, 5])
    expected_tr = expected_tr.drop('size', axis=1)
    assert_frame_equal(result_tr, expected_tr)

    result_te = groupwise_transformer.transform(X_te)
    expected_te = X_te.copy()
    expected_te['value'] = np.array([1.5, 1.5, 5])
    expected_te = expected_te.drop('size', axis=1)
    assert_frame_equal(result_te, expected_te)
Exemple #7
0
def test_subset_transformer_mutate(sample_data):
    """After modifying one column, that column is different and the complement is the same"""  # noqa
    X_tr, X_te = sample_data

    input = 'size'
    t = ballet.eng.SubsetTransformer(input, lambda x: x + 1)
    result_tr = t.fit_transform(X_tr)
    result_te = t.transform(X_te)

    # the input col is modified
    assert_series_not_equal(result_tr[input], X_tr[input])
    assert_series_not_equal(result_te[input], X_te[input])

    # the complement is passed through unchanged
    complement = [col for col in X_tr.columns if col != input]
    assert_frame_equal(result_tr[complement], X_tr[complement])
    assert_frame_equal(result_te[complement], X_te[complement])
Exemple #8
0
def test_assert_frame_equal():
    a = pdt.makeCustomDataframe(10, 7, data_gen_f=lambda row, col: row * col)
    b = a.copy()
    assert_frame_equal(a, b)

    c = a + 1
    with pytest.raises(AssertionError):
        assert_frame_equal(a, c)

    d = pdt.makeCustomDataframe(11, 9)
    with pytest.raises(AssertionError):
        assert_frame_equal(a, d)

    e = pdt.makeTimeSeries()
    with pytest.raises(AssertionError):
        assert_frame_equal(a, e)

    f = pd.DataFrame([1, 2, 3, 4])
    g = pd.Series([1, 2, 3, 4])
    with pytest.raises(AssertionError):
        assert_frame_equal(f, g)