def test_feature_selector_category():

    my_pipeline = make_pipeline(
        InfraCleaner(), DomainCleaner(), AddFeatures(), FeatureSelector("category")
    )
    df_tr = my_pipeline.transform(df_input_start)
    assert_frame_equal(df_tr, df_output_select_category)
def test_feature_selector_numeric():

    my_pipeline = make_pipeline(
        InfraCleaner(), DomainCleaner(), AddFeatures(), FeatureSelector(np.number)
    )
    df_tr = my_pipeline.transform(df_input_start)
    assert_frame_equal(df_tr, df_output_select_numeric)
def test_regroup_categories_to_autre():

    my_pipeline = make_pipeline(
        InfraCleaner(),
        DomainCleaner(),
        AddFeatures(),
        FeatureSelector("category"),
        RegroupeCreateCategoryAutre(),
    )
    df_tr = my_pipeline.fit_transform(df_input_start)
    assert_frame_equal(df_tr, df_output_regroup_categories)
Esempio n. 4
0
def create_preprocessing_pipeline():
    """ This methode defines the pipeline used for the technical cleaning and for constructing new features

    Returns
    -------
    sklearn.pipeline.Pipeline
        pipeline used for the preprocessing 
    """
    preprocessing_pipeline = make_pipeline(InfraCleaner(), DomainCleaner(),
                                           AddFeatures())

    return preprocessing_pipeline
def test_add_features():

    my_pipeline = make_pipeline(InfraCleaner(), DomainCleaner(), AddFeatures())
    df_tr = my_pipeline.transform(df_input_start)
    assert_frame_equal(df_tr, df_output_addfeatures)
def test_pipeline():

    my_pipeline = make_pipeline(InfraCleaner(), DomainCleaner())
    df_tr = my_pipeline.transform(df_input_start)
    assert_frame_equal(df_tr, df_output_transformer)
def test_transform():
    tr = DomainCleaner()
    df_tr = tr.transform(df_input)
    assert_frame_equal(df_tr, df_output_transformer)
def test_correct_values():
    tr = DomainCleaner()
    df_tr = tr._correct_values(df_input)
    assert_frame_equal(df_tr, df_output_values)
def test_correct_outliers_errors():
    tr = DomainCleaner()
    df_tr = tr._correct_outliers_errors(df_input)
    assert_frame_equal(df_tr, df_output_outliers)