예제 #1
0
def test_binning_pctile():
    binner = BinningTransformer(cols=["a"],
                                n_bins=3,
                                strategy="percentile",
                                return_bin_label=True,
                                overwrite=False)

    binner.fit(iris)
    trans = binner.transform(iris)
    unq = np.unique(trans["a_binned"].values).tolist()
    assert unq == ["(-Inf, 5.40]", "(5.40, 6.30]", "(6.30, Inf]"], unq
예제 #2
0
def test_binning_complex():
    # Test with complex n_bins
    binner = BinningTransformer(cols=["a", "b"],
                                n_bins=[2, 3],
                                strategy="uniform",
                                return_bin_label=False,
                                overwrite=True)

    binner.fit(iris)
    trans = binner.transform(iris)

    # show the columns stayed the same
    assert trans.columns.tolist() == iris.columns.tolist()

    # assert the different levels of integers
    assert_array_equal(np.unique(trans.a.values), [0, 1])
    assert_array_equal(np.unique(trans.b.values), [0, 1, 2])

    # show both types are now int
    assert trans.dtypes['a'].name.startswith("int")
    assert trans.dtypes['b'].name.startswith("int")

    # Test with overwrite = False
    binner.overwrite = False
    trans2 = binner.transform(iris)
    assert trans2.shape[1] == 6
    assert trans2.columns.tolist() == [
        "a", "b", "c", "d", "a_binned", "b_binned"
    ], trans2.columns
예제 #3
0
def test_binning_simple():
    binner = BinningTransformer(cols=["a"],
                                n_bins=3,
                                strategy="uniform",
                                return_bin_label=True,
                                overwrite=True)
    binner.fit(iris)
    trans = binner.transform(iris)

    # show the dfs are not the same
    assert trans is not iris

    # show the columns stayed the same, though
    assert trans.columns.tolist() == iris.columns.tolist()

    # show we have a string datatype now
    assert trans.dtypes['a'].name == 'object'

    # if we set the return_bin_label to false and then transform again
    # show we actually get an integer back
    binner.return_bin_label = False
    trans2 = binner.transform(iris)
    assert trans2.dtypes['a'].name.startswith("int")

    # show there are three levels
    assert_array_equal(np.unique(trans2.a.values), [0, 1, 2])
예제 #4
0
def test_binning_corners():

    # assertion function to assert fails
    def f(binner, exc):
        assert_raises(exc, binner.fit, iris)

    # this one will fail since n_bins contains a non-specified column
    f(BinningTransformer(cols=["a"], n_bins={"b": 2}), ValueError)

    # this one will fail for the same reason
    f(BinningTransformer(cols=["a", "c"], n_bins={"a": 3, "b": 2}), ValueError)

    # this one will fail for a bad integer
    f(BinningTransformer(cols=["a", "c"], n_bins=[2, 1]), ValueError)

    # this one will fail for a dim mismatch
    f(BinningTransformer(cols=["a", "c"], n_bins=[2]), ValueError)

    # this one will fail since n_bins is illegal
    f(BinningTransformer(cols=["a", "c"], n_bins=None), TypeError)

    # this one will fail since strategy is illegal
    f(BinningTransformer(cols=["a"], n_bins=3, strategy="illegal"), ValueError)
예제 #5
0
   <br/>
"""
print(__doc__)

# Author: Taylor Smith <*****@*****.**>

from matplotlib import pyplot as plt
from skoot.datasets import load_iris_df
from skoot.preprocessing import BinningTransformer

# #############################################################################
# load data
iris = load_iris_df(include_tgt=False, names=["a", "b", "c", "d"])
binner = BinningTransformer(cols=["a", "b"], return_bin_label=True,
                            strategy="uniform", overwrite=False,
                            n_bins=4)

# print the head of the binned dataset
print(binner.fit_transform(iris).head())

# #############################################################################
# Show where the boundaries reside

a_lower = binner.bins_["a"].lower_bounds[1:]  # skip the -np.inf
plt.hist(iris["a"].values)

# plot vertical lines where bins are
for bound in a_lower:
    plt.axvline(bound, ls="--")
plt.title("Iris feature 'a' + bin markers")
예제 #6
0
def test_binning_persistable():
    assert_persistable(BinningTransformer(), "location.pkl", iris)
예제 #7
0
def test_binning_asdf():
    assert_transformer_asdf(BinningTransformer(), iris)