def test_to_categorical_returns_correct_dataframe(categorical): to_cat = ToCategorical() result = to_cat.fit_transform(categorical) expected_cols = [ 'category_a_a1', 'category_a_a2', 'category_a_a3', 'category_b_b1', 'category_b_b2', 'category_b_b3' ] assert isinstance(result, pd.DataFrame) assert len(categorical) == len(result) assert set(expected_cols) == set(result.columns) for col in expected_cols: assert pd.api.types.is_numeric_dtype(result[col])
def test_to_categorical_discards_unseen_values(categorical): to_cat = ToCategorical() to_cat.fit(categorical) new_data = pd.DataFrame({ "category_a": ["a1", "a2", "ab1"], "category_b": ["b1", "b2", "ab2"] }) result = to_cat.transform(new_data) expected_cols = [ 'category_a_a1', 'category_a_a2', 'category_a_a3', 'category_b_b1', 'category_b_b2', 'category_b_b3' ] assert isinstance(result, pd.DataFrame) assert 0 == result.isna().sum().sum() assert set(expected_cols) == set(result.columns)
def test_featureunion_returns_concatenated_df(categorical, numerical): df = pd.concat([categorical, numerical], axis=1) first_pipe = make_pipeline(Select(['category_a', 'category_b']), ToCategorical()) union = DFFeatureUnion([('category', first_pipe), ('number', Select(['number_a', 'number_b']))]) transform_df = union.fit_transform(df) assert isinstance(transform_df, pd.DataFrame) assert 8 == len(transform_df.columns) assert len(df) == len(transform_df)
def test_featureunion_returns_concatenated_df(self, categorical: pd.DataFrame, numerical: pd.DataFrame): df = pd.concat([categorical, numerical], axis=1) first_pipe = make_pipeline(Select(["category_a", "category_b"]), ToCategorical()) union = DFFeatureUnion([("category", first_pipe), ("number", Select(["number_a", "number_b"]))]) transform_df = union.fit_transform(df) assert isinstance(transform_df, pd.DataFrame) assert 8 == len(transform_df.columns) assert len(df) == len(transform_df)
""" bed_type ======== What type of bed is available. Better bed should increase price Type of bed dtype: category """ from ml_tooling.transformers import Select, ToCategorical from sklearn.pipeline import Pipeline bed_type = Pipeline([("select", Select("bed_type")), ("categorical", ToCategorical())])
def test_works_without_args(self): assert ToCategorical()
def test_to_categorical_works_gridsearch(self, train_iris_dataset): grid = create_gridsearch(ToCategorical()) model = Model(grid) result = model.score_estimator(train_iris_dataset) assert isinstance(result, Result)
def test_to_categorical_works_in_cv(self, train_iris_dataset): model = create_model(ToCategorical()) result = model.score_estimator(train_iris_dataset, cv=2) assert isinstance(result, Result)