def test_feature_selector_category(): my_pipeline = make_pipeline( InfraCleaner(), DomainCleaner(), AddFeatures(), FeatureSelector("category") ) df_tr = my_pipeline.transform(df_input_start) assert_frame_equal(df_tr, df_output_select_category)
def test_feature_selector_numeric(): my_pipeline = make_pipeline( InfraCleaner(), DomainCleaner(), AddFeatures(), FeatureSelector(np.number) ) df_tr = my_pipeline.transform(df_input_start) assert_frame_equal(df_tr, df_output_select_numeric)
def test_regroup_categories_to_autre(): my_pipeline = make_pipeline( InfraCleaner(), DomainCleaner(), AddFeatures(), FeatureSelector("category"), RegroupeCreateCategoryAutre(), ) df_tr = my_pipeline.fit_transform(df_input_start) assert_frame_equal(df_tr, df_output_regroup_categories)
def create_preprocessing_pipeline(): """ This methode defines the pipeline used for the technical cleaning and for constructing new features Returns ------- sklearn.pipeline.Pipeline pipeline used for the preprocessing """ preprocessing_pipeline = make_pipeline(InfraCleaner(), DomainCleaner(), AddFeatures()) return preprocessing_pipeline
def test_add_features(): my_pipeline = make_pipeline(InfraCleaner(), DomainCleaner(), AddFeatures()) df_tr = my_pipeline.transform(df_input_start) assert_frame_equal(df_tr, df_output_addfeatures)
def test_pipeline(): my_pipeline = make_pipeline(InfraCleaner(), DomainCleaner()) df_tr = my_pipeline.transform(df_input_start) assert_frame_equal(df_tr, df_output_transformer)
def test_transform(): tr = DomainCleaner() df_tr = tr.transform(df_input) assert_frame_equal(df_tr, df_output_transformer)
def test_correct_values(): tr = DomainCleaner() df_tr = tr._correct_values(df_input) assert_frame_equal(df_tr, df_output_values)
def test_correct_outliers_errors(): tr = DomainCleaner() df_tr = tr._correct_outliers_errors(df_input) assert_frame_equal(df_tr, df_output_outliers)