Ejemplo n.º 1
0
def test_binning_complex():
    # Test with complex n_bins
    binner = BinningTransformer(cols=["a", "b"],
                                n_bins=[2, 3],
                                strategy="uniform",
                                return_bin_label=False,
                                overwrite=True)

    binner.fit(iris)
    trans = binner.transform(iris)

    # show the columns stayed the same
    assert trans.columns.tolist() == iris.columns.tolist()

    # assert the different levels of integers
    assert_array_equal(np.unique(trans.a.values), [0, 1])
    assert_array_equal(np.unique(trans.b.values), [0, 1, 2])

    # show both types are now int
    assert trans.dtypes['a'].name.startswith("int")
    assert trans.dtypes['b'].name.startswith("int")

    # Test with overwrite = False
    binner.overwrite = False
    trans2 = binner.transform(iris)
    assert trans2.shape[1] == 6
    assert trans2.columns.tolist() == [
        "a", "b", "c", "d", "a_binned", "b_binned"
    ], trans2.columns
Ejemplo n.º 2
0
def test_binning_simple():
    binner = BinningTransformer(cols=["a"],
                                n_bins=3,
                                strategy="uniform",
                                return_bin_label=True,
                                overwrite=True)
    binner.fit(iris)
    trans = binner.transform(iris)

    # show the dfs are not the same
    assert trans is not iris

    # show the columns stayed the same, though
    assert trans.columns.tolist() == iris.columns.tolist()

    # show we have a string datatype now
    assert trans.dtypes['a'].name == 'object'

    # if we set the return_bin_label to false and then transform again
    # show we actually get an integer back
    binner.return_bin_label = False
    trans2 = binner.transform(iris)
    assert trans2.dtypes['a'].name.startswith("int")

    # show there are three levels
    assert_array_equal(np.unique(trans2.a.values), [0, 1, 2])
Ejemplo n.º 3
0
def test_binning_pctile():
    binner = BinningTransformer(cols=["a"],
                                n_bins=3,
                                strategy="percentile",
                                return_bin_label=True,
                                overwrite=False)

    binner.fit(iris)
    trans = binner.transform(iris)
    unq = np.unique(trans["a_binned"].values).tolist()
    assert unq == ["(-Inf, 5.40]", "(5.40, 6.30]", "(6.30, Inf]"], unq