Esempi in Python per FeatureFilter

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: skoot.feature_selection

Classe/tipologia: FeatureFilter

Esempi su hotexamples.com: 5

FeatureFilter in Python: 5 esempi trovati. Questi sono i migliori esempi reali in Python per skoot.feature_selection.FeatureFilter, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

FeatureFilter(5)

fit_transform(2)

Metodi utilizzati di frequente

FeatureFilter (5)

fit_transform (2)

Esempio n. 1

Mostra file

def test_feature_filter_none():
    dpr = FeatureFilter(cols=None)

    # none should be dropped
    trans = dpr.fit_transform(iris)  # type: pd.DataFrame
    assert trans.equals(iris)
    assert trans is not iris

    # assert empty drop list
    assert dpr.drop_ == []

Esempio n. 2

Mostra file

def test_feature_filter_some():
    dpr = FeatureFilter(cols=['a', 'b'])
    trans = dpr.fit_transform(iris)

    # only two should have been dropped
    assert 'a' not in trans.columns
    assert 'b' not in trans.columns

    # should be two left
    assert trans.shape[1] == 2
    assert trans.equals(iris[['c', 'd']])

Esempio n. 3

Mostra file

File: test_pipe.py Progetto: waszczak/skoot

def test_complex_grid_search():
    # build a pipeline
    pipe = Pipeline([
        ('dropper', FeatureFilter()),  # won't drop any
        ('collinearity', MultiCorrFilter(threshold=0.85)),
        ('imputer', SelectiveImputer()),  # pass through since all full
        ('scaler', SelectiveMaxAbsScaler()),
        ('boxcox', BoxCoxTransformer(suppress_warnings=True)),
        ('nzv', NearZeroVarianceFilter()),
        ('pca', SelectivePCA(n_components=0.9)),
        ('custom', make_transformer(subtract_k, k=1)),
        ('model', RandomForestClassifier(n_jobs=1))
    ])

    # let's define a set of hyper-parameters over which to search
    hp = {
        'collinearity__threshold': uniform(loc=.8, scale=.15),
        'collinearity__method': ['pearson', 'kendall', 'spearman'],
        'pca__n_components': uniform(loc=.75, scale=.2),
        'pca__whiten': [True, False],
        'custom__k': [1, 2, 3],
        'custom__func': [subtract_k, add_k],
        'model__n_estimators': randint(5, 10),
        'model__max_depth': randint(2, 5),
        'model__min_samples_leaf': randint(1, 5),
        'model__max_features': uniform(loc=.5, scale=.5),
        'model__max_leaf_nodes': randint(10, 15)
    }

    # define the gridsearch
    search = RandomizedSearchCV(
        pipe,
        hp,
        n_iter=2,  # just to test it even works
        scoring='accuracy',
        cv=cv,
        random_state=42,
        # in parallel so we are testing pickling of the classes
        n_jobs=2)

    # fit the search
    search.fit(X_train, y_train)

    # Show we can profile the best estimator
    profile_estimator(search.best_estimator_)

    # Assert that it's persistable
    assert_persistable(pipe, "location.pkl", X_train, y_train)

Esempio n. 4

Mostra file

def test_complex_grid_search():
    # build a pipeline
    pipe = Pipeline([
        ('dropper', FeatureFilter()),  # won't drop any
        ('collinearity', MultiCorrFilter(threshold=0.85)),
        ('imputer', SelectiveImputer()),  # pass through since all full
        ('scaler', SelectiveScaler()),
        ('boxcox', BoxCoxTransformer()),
        ('nzv', NearZeroVarianceFilter()),
        ('pca', SelectivePCA(n_components=0.9)),
        ('model', RandomForestClassifier(n_jobs=1))
    ])

    # let's define a set of hyper-parameters over which to search
    hp = {
        'collinearity__threshold': uniform(loc=.8, scale=.15),
        'collinearity__method': ['pearson', 'kendall', 'spearman'],
        'scaler__scaler': [None, RobustScaler()],
        'pca__n_components': uniform(loc=.75, scale=.2),
        'pca__whiten': [True, False],
        'model__n_estimators': randint(5, 10),
        'model__max_depth': randint(2, 5),
        'model__min_samples_leaf': randint(1, 5),
        'model__max_features': uniform(loc=.5, scale=.5),
        'model__max_leaf_nodes': randint(10, 15)
    }

    # define the gridsearch
    search = RandomizedSearchCV(
        pipe,
        hp,
        n_iter=2,  # just to test it even works
        scoring='accuracy',
        cv=cv,
        random_state=42)

    # fit the search
    search.fit(X_train, y_train)

Esempio n. 5

Mostra file

def test_filter_asdf():
    assert_transformer_asdf(FeatureFilter(), iris)