Example #1
0
def test_to_categorical_returns_correct_dataframe(categorical):
    to_cat = ToCategorical()
    result = to_cat.fit_transform(categorical)
    expected_cols = [
        'category_a_a1', 'category_a_a2', 'category_a_a3', 'category_b_b1',
        'category_b_b2', 'category_b_b3'
    ]

    assert isinstance(result, pd.DataFrame)
    assert len(categorical) == len(result)
    assert set(expected_cols) == set(result.columns)
    for col in expected_cols:
        assert pd.api.types.is_numeric_dtype(result[col])
Example #2
0
def test_to_categorical_discards_unseen_values(categorical):
    to_cat = ToCategorical()
    to_cat.fit(categorical)
    new_data = pd.DataFrame({
        "category_a": ["a1", "a2", "ab1"],
        "category_b": ["b1", "b2", "ab2"]
    })

    result = to_cat.transform(new_data)
    expected_cols = [
        'category_a_a1', 'category_a_a2', 'category_a_a3', 'category_b_b1',
        'category_b_b2', 'category_b_b3'
    ]

    assert isinstance(result, pd.DataFrame)
    assert 0 == result.isna().sum().sum()
    assert set(expected_cols) == set(result.columns)
Example #3
0
def test_featureunion_returns_concatenated_df(categorical, numerical):
    df = pd.concat([categorical, numerical], axis=1)
    first_pipe = make_pipeline(Select(['category_a', 'category_b']),
                               ToCategorical())
    union = DFFeatureUnion([('category', first_pipe),
                            ('number', Select(['number_a', 'number_b']))])

    transform_df = union.fit_transform(df)

    assert isinstance(transform_df, pd.DataFrame)
    assert 8 == len(transform_df.columns)
    assert len(df) == len(transform_df)
    def test_featureunion_returns_concatenated_df(self,
                                                  categorical: pd.DataFrame,
                                                  numerical: pd.DataFrame):
        df = pd.concat([categorical, numerical], axis=1)
        first_pipe = make_pipeline(Select(["category_a", "category_b"]),
                                   ToCategorical())
        union = DFFeatureUnion([("category", first_pipe),
                                ("number", Select(["number_a", "number_b"]))])

        transform_df = union.fit_transform(df)

        assert isinstance(transform_df, pd.DataFrame)
        assert 8 == len(transform_df.columns)
        assert len(df) == len(transform_df)
"""
bed_type
========
What type of bed is available. Better bed should increase price

Type of bed
dtype: category
"""

from ml_tooling.transformers import Select, ToCategorical
from sklearn.pipeline import Pipeline

bed_type = Pipeline([("select", Select("bed_type")),
                     ("categorical", ToCategorical())])
 def test_works_without_args(self):
     assert ToCategorical()
 def test_to_categorical_works_gridsearch(self, train_iris_dataset):
     grid = create_gridsearch(ToCategorical())
     model = Model(grid)
     result = model.score_estimator(train_iris_dataset)
     assert isinstance(result, Result)
 def test_to_categorical_works_in_cv(self, train_iris_dataset):
     model = create_model(ToCategorical())
     result = model.score_estimator(train_iris_dataset, cv=2)
     assert isinstance(result, Result)