def test_two_numeric_one_feature_passed(self): """ transformation should be performed on y """ df = pd.DataFrame({'x': [1, 2, 3], 'y': [1, 2, 3]}) df_transformed_correct = pd.DataFrame({'x': [0, 0, 0], 'y': [1, 2, 3]}) bucket = Bucketizer(bins=1, features=['x']) df_transformed = bucket.fit_transform(df) self.compare_DataFrame(df_transformed, df_transformed_correct)
def test_two_numeric_all_numeric(self): """ transformation should be performed on y """ df = pd.DataFrame({'x': [1, 2, 3], 'y': [1, 2, 3]}) df_transformed_correct = pd.DataFrame({'x': [0, 0, 0], 'y': [0, 0, 0]}) bucket = Bucketizer(bins=1, bin_numeric=True) df_transformed = bucket.fit_transform(df) self.compare_DataFrame(df_transformed, df_transformed_correct)
def test_one_numeric_feature_prefix(self): """ transformation should be performed on y """ df = pd.DataFrame({'x': [1, 2, 3], 'y': [1, 2, 3]}) df_transformed_correct = pd.DataFrame({'x': [0, 0, 0], 'y': [1, 2, 3]}) bucket = Bucketizer(bins=1, bin_numeric=False, prefix='x') df_transformed = bucket.fit_transform(df) self.compare_DataFrame(df_transformed, df_transformed_correct)
def test_one_numeric_feature_no_transformation(self): """ no transformation should be performed """ df = pd.DataFrame({'x': [1, 2, 3]}) df_transformed_correct = pd.DataFrame({'x': [1, 2, 3]}) bucket = Bucketizer() df_transformed = bucket.fit_transform(df) self.compare_DataFrame(df_transformed, df_transformed_correct)
def test_no_numeric_feature(self): """ no transformation should be performed """ df = pd.DataFrame({'x': [np.NaN, 'b', 'c']}) df_transformed_correct = pd.DataFrame({'x': [np.NaN, 'b', 'c']}) bucket = Bucketizer(features=['x']) df_transformed = bucket.fit_transform(df) self.compare_DataFrame(df_transformed, df_transformed_correct)
def test_no_replace(self): """ transformation should be performed on y """ df = pd.DataFrame({'x': [1, 2, 3]}) df_transformed_correct = pd.DataFrame({ 'x': [1, 2, 3], 'x_binned': [0, 0, 0] }) bucket = Bucketizer(bins=1, replace=False, bin_numeric=True) df_transformed = bucket.fit_transform(df) self.compare_DataFrame(df_transformed, df_transformed_correct)
def test_empty_init(self): """ init object with default parameters """ bucket = Bucketizer() assert bucket.features == [] assert bucket.bins == 2 assert bucket.replace assert not bucket.bin_numeric assert not bucket.prefix