예제 #1
0
파일: test_pipe.py 프로젝트: waszczak/skoot
def test_complex_grid_search():
    # build a pipeline
    pipe = Pipeline([
        ('dropper', FeatureFilter()),  # won't drop any
        ('collinearity', MultiCorrFilter(threshold=0.85)),
        ('imputer', SelectiveImputer()),  # pass through since all full
        ('scaler', SelectiveMaxAbsScaler()),
        ('boxcox', BoxCoxTransformer(suppress_warnings=True)),
        ('nzv', NearZeroVarianceFilter()),
        ('pca', SelectivePCA(n_components=0.9)),
        ('custom', make_transformer(subtract_k, k=1)),
        ('model', RandomForestClassifier(n_jobs=1))
    ])

    # let's define a set of hyper-parameters over which to search
    hp = {
        'collinearity__threshold': uniform(loc=.8, scale=.15),
        'collinearity__method': ['pearson', 'kendall', 'spearman'],
        'pca__n_components': uniform(loc=.75, scale=.2),
        'pca__whiten': [True, False],
        'custom__k': [1, 2, 3],
        'custom__func': [subtract_k, add_k],
        'model__n_estimators': randint(5, 10),
        'model__max_depth': randint(2, 5),
        'model__min_samples_leaf': randint(1, 5),
        'model__max_features': uniform(loc=.5, scale=.5),
        'model__max_leaf_nodes': randint(10, 15)
    }

    # define the gridsearch
    search = RandomizedSearchCV(
        pipe,
        hp,
        n_iter=2,  # just to test it even works
        scoring='accuracy',
        cv=cv,
        random_state=42,
        # in parallel so we are testing pickling of the classes
        n_jobs=2)

    # fit the search
    search.fit(X_train, y_train)

    # Show we can profile the best estimator
    profile_estimator(search.best_estimator_)

    # Assert that it's persistable
    assert_persistable(pipe, "location.pkl", X_train, y_train)
예제 #2
0
def test_haversine_persistable():
    assert_persistable(
        HaversineFeatures(cols=[('from_lat', 'from_lon'), ('to_lat',
                                                           'to_lon')]),
        "location.pkl", X)
예제 #3
0
def test_schema_persistable():
    assert_persistable(SchemaNormalizer(schema), "location.pkl", X)
예제 #4
0
def test_date_transformer_persistable():
    assert_persistable(
        DateTransformer(cols=["b", "c", "d"],
                        date_format=["%m/%d/%Y", None, None]), "location.pkl",
        df)
예제 #5
0
def test_all_persistable():
    for est in (BoxCoxTransformer, YeoJohnsonTransformer):
        assert_persistable(est(), "location.pkl", X)
예제 #6
0
def test_all_persistable():
    for est in (SelectiveStandardScaler, SelectiveRobustScaler,
                SelectiveMinMaxScaler, SelectiveMaxAbsScaler):
        assert_persistable(est(), "location.pkl", X)
예제 #7
0
def test_all_persistable():
    for est in (SelectivePCA, SelectiveTruncatedSVD, SelectiveNMF,
                SelectiveKernelPCA, SelectiveIncrementalPCA):
        assert_persistable(est(), location="location.pkl", X=X)
예제 #8
0
def test_all_persistable():
    for est in (FeatureFilter, SparseFeatureFilter, MultiCorrFilter,
                NearZeroVarianceFilter):
        assert_persistable(est(), location="loc.pkl", X=iris)
예제 #9
0
def test_classifier_imputers_persistable():
    for est in (BaggedClassifierImputer, ):
        assert_persistable(est(cols=['label'], random_state=42),
                           "location.pkl", Y)
예제 #10
0
def test_regressor_imputers_persistable():
    for est in (SelectiveImputer, BaggedRegressorImputer):
        assert_persistable(est(), "location.pkl", X)
예제 #11
0
def test_binning_persistable():
    assert_persistable(BinningTransformer(), "location.pkl", iris)
예제 #12
0
def test_interaction_persistable():
    assert_persistable(InteractionTermTransformer(cols=['a', 'b']),
                       location='loc.pkl',
                       X=X_pd)
예제 #13
0
def test_combos_persistable():
    assert_persistable(LinearCombinationFilter(), location="loc.pkl", X=X)
예제 #14
0
def test_time_deltas_persistable():
    assert_persistable(TimeDeltaFeatures(cols=['b', 'c']),
                       location='loc.pkl',
                       X=df2)
예제 #15
0
def test_date_factorizer_persistable():
    assert_persistable(DateFactorizer(cols=['b']), location="loc.pkl", X=df)
예제 #16
0
def test_dummy_persistable():
    assert_persistable(DummyEncoder(cols=iris.columns.tolist()),
                       "location.pkl", iris)