Ejemplo n.º 1
0
def seq_feature_selection(data, target, n_features=None):
    predictors = data.drop(columns=target).select_dtypes(np.number)
    selector = SequentialFeatureSelector(estimator=LinearRegression(),
                                         n_features_to_select=n_features)
    selector = selector.fit(predictors, data[target])
    selected = selector.get_support(indices=True)
    return predictors.iloc[:, selected].columns.to_list()
Ejemplo n.º 2
0
def test_bad_direction():
    X, y = make_regression(n_features=5)
    sfs = SequentialFeatureSelector(LinearRegression(),
                                    n_features_to_select="auto",
                                    direction="bad")
    with pytest.raises(ValueError, match="must be either 'forward' or"):
        sfs.fit(X, y)
def run_sfs(x, y, output=None, caption=''):
    sfs = SequentialFeatureSelector(estimator=KNeighborsClassifier())
    sfs.fit(x, y)
    x_reduced = pd.DataFrame(sfs.transform(x), columns=x.columns[sfs.support_])
    print(f'reduced columns: {x_reduced.columns}')
    x_reduced.to_csv(f'{output}/{caption}-sfs.csv', index=False)
    return x_reduced
Ejemplo n.º 4
0
def test_n_features_to_select_float(direction, n_features_to_select, expected):
    # Test passing a float as n_features_to_select
    X, y = make_regression(n_features=10)
    sfs = SequentialFeatureSelector(LinearRegression(),
                                    n_features_to_select=n_features_to_select,
                                    direction=direction, cv=2)
    sfs.fit(X, y)
    assert sfs.n_features_to_select_ == expected
Ejemplo n.º 5
0
 def feature_selection(self, direction=None):
     sfs = SequentialFeatureSelector(self.model,
                                     n_features_to_select=3,
                                     direction=direction)
     self.sfs = sfs.fit(self.X_train, self.Y_train)
     self.X_train_columns = self.X_train.columns[sfs.get_support()]
     self.X_test_columns = self.X_test.columns[sfs.get_support()]
     self.X_train = self.X_train[self.X_train_columns]
     self.X_test = self.X_test[self.X_test_columns]
     self.model = LinearRegression().fit(self.X_train, self.Y_train)
Ejemplo n.º 6
0
def test_unsupervised_model_fit(n_features_to_select):
    # Make sure that models without classification labels are not being
    # validated

    X, y = make_blobs(n_features=6)
    sfs = SequentialFeatureSelector(
        KMeans(),
        n_features_to_select=n_features_to_select,
    )
    sfs.fit(X)
    assert sfs.transform(X).shape[1] == n_features_to_select
Ejemplo n.º 7
0
def test_no_y_validation_model_fit(y):
    # Make sure that other non-conventional y labels are not accepted

    X, clusters = make_blobs(n_features=6)
    sfs = SequentialFeatureSelector(
        KMeans(),
        n_features_to_select=3,
    )

    with pytest.raises((TypeError, ValueError)):
        sfs.fit(X, y)
Ejemplo n.º 8
0
def test_sanity(seed, direction, n_features_to_select,
                expected_selected_features):
    # Basic sanity check: 3 features, only f0 and f2 are correlated with the
    # target, f2 having a stronger correlation than f0. We expect f1 to be
    # dropped, and f2 to always be selected.

    rng = np.random.RandomState(seed)
    n_samples = 100
    X = rng.randn(n_samples, 3)
    y = 3 * X[:, 0] - 10 * X[:, 2]

    sfs = SequentialFeatureSelector(LinearRegression(),
                                    n_features_to_select=n_features_to_select,
                                    direction=direction, cv=2)
    sfs.fit(X, y)
    assert_array_equal(sfs.get_support(indices=True),
                       expected_selected_features)
Ejemplo n.º 9
0
    def select_greedy(data):
        X, X_test, y = data

        svr = svm.SVR(kernel="rbf", C=100, tol=1).fit(X, y)
        tic = time()
        select = SequentialFeatureSelector(svr,
                                           direction=direction,
                                           n_features_to_select=n_features,
                                           n_jobs=-1).fit(X, y)
        toc = time()

        joblib.dump(select.get_support(), "joblib/greedy_support")

        print(f"features selected: {select.get_support()}")
        print(f"done in: {toc - tic:.2f}s")

        return select.transform(X), select.transform(X_test), y
Ejemplo n.º 10
0
def test_raise_deprecation_warning():
    """Check that we raise a FutureWarning with `n_features_to_select`."""
    n_samples, n_features = 50, 3
    X, y = make_regression(n_samples, n_features, random_state=0)

    warn_msg = "Leaving `n_features_to_select` to None is deprecated"
    with pytest.warns(FutureWarning, match=warn_msg):
        SequentialFeatureSelector(LinearRegression()).fit(X, y)
Ejemplo n.º 11
0
def evaluate_model_sfs(model,
                       predictors,
                       labels,
                       direction='backward',
                       n_features=50,
                       n_jobs=-1):
    sfs = SequentialFeatureSelector(estimator=model,
                                    n_features_to_select=n_features,
                                    cv=5,
                                    direction=direction,
                                    n_jobs=n_jobs)
    sfs.fit(predictors, labels)
    predictors_reduced = predictors[predictors.columns[sfs.support_]]
    scores = cross_val_score(model,
                             predictors_reduced,
                             labels,
                             scoring="neg_mean_absolute_error",
                             cv=5,
                             n_jobs=n_jobs)
    return np.sqrt(-scores)
Ejemplo n.º 12
0
def select_features(model_name):
    data = read_data()
    X, y = split_data(data)
    model = joblib.load(pathToModels + model_name + '.pkl')

    all_features = X.columns.values
    print('Исходный набор:' + str(all_features))
    print('--------------------------------')

    print('First')
    sfs1 = SFS(model,
               n_features_to_select=16,
               direction='backward',
               scoring='f1',
               cv=10)
    result1 = sfs1.fit(X, y)
    part_of_features1 = all_features[result1.get_support(indices=True)]
    print(result1.get_support())
    print(result1.get_support(indices=True))
    print(str(part_of_features1))
    print('--------------------------------')

    print('Second')
    sfs2 = SFS(model,
               n_features_to_select=16,
               direction='forward',
               scoring='f1',
               cv=10)
    result2 = sfs2.fit(X, y)
    part_of_features2 = all_features[result2.get_support(indices=True)]
    print(result2.get_support())
    print(result2.get_support(indices=True))
    print(str(part_of_features2))
    print('--------------------------------')

    print('Third')
    sfs3 = SFS(model, direction='forward', scoring='f1', cv=10)
    result3 = sfs3.fit(X, y)
    part_of_features3 = all_features[result3.get_support(indices=True)]
    print(result3.get_support())
    print(result3.get_support(indices=True))
    print(str(part_of_features3))
    print('--------------------------------')

    print('Fourth')
    sfs4 = SFS(model, direction='backward', scoring='f1', cv=10)
    result4 = sfs4.fit(X, y)
    part_of_features4 = all_features[result4.get_support(indices=True)]
    print(result4.get_support())
    print(result4.get_support(indices=True))
    print(str(part_of_features4))
Ejemplo n.º 13
0
def test_sparse_support():
    # Make sure sparse data is supported

    X, y = make_regression(n_features=10)
    X = scipy.sparse.csr_matrix(X)
    sfs = SequentialFeatureSelector(LinearRegression(), cv=2)
    sfs.fit(X, y)
    sfs.transform(X)
    def run_sklearn(self):
        """ Train and evaluate models from sklearn """

        classifier_options = {
            #'lr': linear_model.LogisticRegressionCV(cv=10, n_jobs=10, max_iter=10000, verbose=0),
            'lr':
            linear_model.LogisticRegression(n_jobs=10,
                                            max_iter=100000,
                                            verbose=0),
            'svm':
            model_selection.GridSearchCV(svm.LinearSVC(dual=False,
                                                       max_iter=10000,
                                                       verbose=0),
                                         {
                                             'C': [.01, .1, 1, 10, 100],
                                             'penalty': ['l2']
                                         },
                                         n_jobs=10,
                                         cv=10,
                                         verbose=2),
            'mlp':
            neural_network.MLPClassifier(hidden_layer_sizes=(32, 50),
                                         activation='relu',
                                         early_stopping=True,
                                         verbose=2)
        }
        self.clf = classifier_options[self.clf_type]
        if self.sfs_k > 0:
            # Forward feature selection
            print("Doing forward feature selection...")
            sfs = SequentialFeatureSelector(self.clf,
                                            n_features_to_select=self.sfs_k,
                                            n_jobs=-1)
            sfs.fit(self.data.X_train, self.data.y_train)
            # Save out selected features
            outpath = os.path.join(
                '/projects/tumblr_community_identity/tmp/',
                f'sfs{self.sfs_k}_{self.extractor.select_k}.txt')
            np.savetxt(outpath, sfs.get_support())
            print(f"Saved forward feature selection mask to {outpath}")
            sfs_mask = sfs.get_support()
            #sfs_mask = np.loadtxt(
            #    '/projects/tumblr_community_identity/tmp/sfs20_500.txt').astype(bool)
            X_train = self.data.X_train[:, sfs_mask]
            X_dev = self.data.X_dev[:, sfs_mask]
            X_test = self.data.X_test[:, sfs_mask]
            self.data.X_train, self.data.X_dev, self.data.X_test = X_train, X_dev, X_test

        self.model = self.clf.fit(self.data.X_train, self.data.y_train)
        self.test_score = self.model.score(self.data.X_test, self.data.y_test)
        self.train_pred = self.model.predict(self.data.X_train)
        if self.data.X_dev is not None:
            self.dev_score = self.model.score(self.data.X_dev, self.data.y_dev)
            self.dev_pred = self.model.predict(self.data.X_dev)
        self.test_pred = self.model.predict(self.data.X_test)
Ejemplo n.º 15
0
def test_pipeline_support():
    # Make sure that pipelines can be passed into SFS and that SFS can be
    # passed into a pipeline

    n_samples, n_features = 50, 3
    X, y = make_regression(n_samples, n_features, random_state=0)

    # pipeline in SFS
    pipe = make_pipeline(StandardScaler(), LinearRegression())
    sfs = SequentialFeatureSelector(pipe, cv=2)
    sfs.fit(X, y)
    sfs.transform(X)

    # SFS in pipeline
    sfs = SequentialFeatureSelector(LinearRegression(), cv=2)
    pipe = make_pipeline(StandardScaler(), sfs)
    pipe.fit(X, y)
    pipe.transform(X)
Ejemplo n.º 16
0
def test_nan_support():
    # Make sure nans are OK if the underlying estimator supports nans

    rng = np.random.RandomState(0)
    n_samples, n_features = 100, 10
    X, y = make_regression(n_samples, n_features, random_state=0)
    nan_mask = rng.randint(0, 2, size=(n_samples, n_features), dtype=bool)
    X[nan_mask] = np.nan
    sfs = SequentialFeatureSelector(HistGradientBoostingRegressor(), cv=2)
    sfs.fit(X, y)
    sfs.transform(X)

    with pytest.raises(ValueError, match='Input contains NaN'):
        # LinearRegression does not support nans
        SequentialFeatureSelector(LinearRegression(), cv=2).fit(X, y)
Ejemplo n.º 17
0
def test_n_features_to_select(direction, n_features_to_select):
    # Make sure n_features_to_select is respected

    X, y = make_regression(n_features=10)
    sfs = SequentialFeatureSelector(LinearRegression(),
                                    n_features_to_select=n_features_to_select,
                                    direction=direction, cv=2)
    sfs.fit(X, y)
    if n_features_to_select is None:
        n_features_to_select = 5  # n_features // 2
    assert sfs.get_support(indices=True).shape[0] == n_features_to_select
    assert sfs.n_features_to_select_ == n_features_to_select
    assert sfs.transform(X).shape[1] == n_features_to_select
Ejemplo n.º 18
0
def feature_selection_k(k, X, y, model_name=None, dataset_name=None):
    if model_name == "RFE":
        model = RFE(n_features_to_select=k, estimator=DecisionTreeClassifier())
    elif model_name == "FSFS":
        model = SequentialFeatureSelector(n_features_to_select=k,
                                          estimator=DecisionTreeClassifier(),
                                          direction="forward")

    start = time.process_time()
    X_trans = model.fit_transform(X, y)
    end = time.process_time()
    runtime = end - start
    X_re = model.inverse_transform(X_trans)
    error = ((X_re - X)**2).mean().mean()
    return {
        "dataset": dataset_name,
        "model": model_name,
        "k": k,
        "runtime": runtime,
        "reconstruction_error": error
    }, model
Ejemplo n.º 19
0
def test_n_features_to_select_auto(direction):
    """Check the behaviour of `n_features_to_select="auto"` with different
    values for the parameter `tol`.
    """

    n_features = 10
    tol = 1e-3
    X, y = make_regression(n_features=n_features, random_state=0)
    sfs = SequentialFeatureSelector(
        LinearRegression(),
        n_features_to_select="auto",
        tol=tol,
        direction=direction,
        cv=2,
    )
    sfs.fit(X, y)

    max_features_to_select = n_features - 1

    assert sfs.get_support(indices=True).shape[0] <= max_features_to_select
    assert sfs.n_features_to_select_ <= max_features_to_select
    assert sfs.transform(X).shape[1] <= max_features_to_select
    assert sfs.get_support(indices=True).shape[0] == sfs.n_features_to_select_
Ejemplo n.º 20
0
from sklearn.feature_selection import SelectFromModel
model1 = LinearSVC(C=0.01, penalty='l2', dual=False,
                   random_state=0).fit(x, y)  #越小惩罚力度越大
sfm = SelectFromModel(model1).fit(x, y)
print('Features selected by SelectFromModel selection l2:',
      names[sfm.get_support()])

#使用LinearSCV penalty='l2'做base model 用SFS做feature select

from sklearn.feature_selection import SequentialFeatureSelector
model = LinearSVC(C=0.01, penalty='l2', dual=False,
                  random_state=0).fit(x, y)  #C越小惩罚力度越大
names = np.array(columns)
sfs_forward = SequentialFeatureSelector(model,
                                        n_features_to_select=7,
                                        scoring='accuracy',
                                        cv=5).fit(x, y)
sfs_backward = SequentialFeatureSelector(model,
                                         n_features_to_select=7,
                                         scoring='accuracy',
                                         direction='backward',
                                         cv=5).fit(x, y)
print('Features selected by forward SFS selection l2:',
      names[sfs_forward.get_support()])
print('Features selected by backward SFS selection l2:',
      names[sfs_backward.get_support()])
'''
L1惩罚项降维的原理在于保留多个对目标值具有同等相关性的特征中的一个,所以没选到的特征不代表不重要。
故,可结合L2惩罚项来优化。具体操作为:若一个特征在L1中的权值为1,选择在L2中权值差别不大且在L1中权值为0的特征构成同类集合,
将这一集合中的特征平分L1中的权值,故需要构建一个新的模型:
'''
Ejemplo n.º 21
0
def test_bad_n_features_to_select(n_features_to_select):
    X, y = make_regression(n_features=5)
    sfs = SequentialFeatureSelector(LinearRegression(),
                                    n_features_to_select=n_features_to_select)
    with pytest.raises(ValueError, match="must be either None"):
        sfs.fit(X, y)
# :class:`~sklearn.feature_selection.SequentialFeatureSelector`
# (SFS). SFS is a greedy procedure where, at each iteration, we choose the best
# new feature to add to our selected features based a cross-validation score.
# That is, we start with 0 features and choose the best single feature with the
# highest score. The procedure is repeated until we reach the desired number of
# selected features.
#
# We can also go in the reverse direction (backward SFS), *i.e.* start with all
# the features and greedily choose features to remove one by one. We illustrate
# both approaches here.

from sklearn.feature_selection import SequentialFeatureSelector

tic_fwd = time()
sfs_forward = SequentialFeatureSelector(ridge,
                                        n_features_to_select=2,
                                        direction="forward").fit(X, y)
toc_fwd = time()

tic_bwd = time()
sfs_backward = SequentialFeatureSelector(ridge,
                                         n_features_to_select=2,
                                         direction="backward").fit(X, y)
toc_bwd = time()

print("Features selected by forward sequential selection: "
      f"{feature_names[sfs_forward.get_support()]}")
print(f"Done in {toc_fwd - tic_fwd:.3f}s")
print("Features selected by backward sequential selection: "
      f"{feature_names[sfs_backward.get_support()]}")
print(f"Done in {toc_bwd - tic_bwd:.3f}s")
Ejemplo n.º 23
0
X = data[:, 0:56]  # x_s, time_s, Temp, ni_n, na_n, rho, v, p, E, H
y = data[:, 56:57]  # rhs[0:50]

print(data.shape)
print("x=", X.shape)
print("y=", y.shape)

feature_names = [f"feature {i}" for i in range(X.shape[1])]

est = ExtraTreesRegressor(n_estimators=50)
est = est.fit(X, y.ravel())

tic_fwd = time()
sfs_forward = SequentialFeatureSelector(est,
                                        n_features_to_select=2,
                                        direction="forward").fit(X, y.ravel())
toc_fwd = time()

tic_bwd = time()
sfs_backward = SequentialFeatureSelector(est,
                                         n_features_to_select=2,
                                         direction="backward").fit(
                                             X, y.ravel())
toc_bwd = time()

print("Features selected by forward sequential selection: "
      f"{feature_names[sfs_forward.get_support()]}")
print(f"Done in {toc_fwd - tic_fwd:.3f}s")
print("Features selected by backward sequential selection: "
      f"{feature_names[sfs_backward.get_support()]}")
X_train, X_test, y_train, y_test =\
    T_T_S(X,y,test_size = .3, random_state = 0, stratify = y)
# stratify ensures same class proportions of training and test data sets
print('Training Data Size = ', len(X_train))
print('Test Data Size = ', len(X_test))
print()

pause()

feat_labels = df_wine.columns[1:]
forest = RFC(n_estimators=500, random_state=1)
forest.fit(X_train, y_train)

tic_fwd = time()
sfs_forward = SequentialFeatureSelector(forest,
                                        n_features_to_select=5,
                                        direction='forward').fit(
                                            X_train, y_train)
toc_fwd = time()

tic_bwd = time()
sfs_backward = SequentialFeatureSelector(forest,
                                         n_features_to_select=5,
                                         direction='backward').fit(
                                             X_train, y_train)
toc_bwd = time()

print("Features selected by forward sequential selection: "
      f"{feat_labels[sfs_forward.get_support()]}")
print(f"Done in {toc_fwd - tic_fwd:.3f}s")
print("Features selected by backward sequential selection: "
      f"{feat_labels[sfs_backward.get_support()]}")
Ejemplo n.º 25
0
    # Plot
    scores_train = df_results_train.iloc[:, 0:5].values.tolist()
    scores_test  = df_results_test.iloc[:, 0:5].values.tolist()
    mean_train   = np.mean(scores_train, axis=1)
    sd_train     = np.std(scores_train, axis=1)
    mean_test    = np.mean(scores_test, axis=1)
    sd_test      = np.std(scores_test, axis=1)
    pyplot.plot(df_results_train.n_features, mean_train, 'o-', color='r', label="Training")
    pyplot.plot(df_results_test.n_features,  mean_test,  'o-', color='g', label="Cross-validation")
    pyplot.fill_between(df_results_train.n_features, mean_train - sd_train, mean_train + sd_train, alpha=0.1)
    pyplot.fill_between(df_results_train.n_features, mean_test  - sd_test,  mean_test  + sd_test,  alpha=0.1)
    pyplot.ylabel('MAE', fontsize=16)
    pyplot.xlabel('N features', fontsize=16)
    pyplot.legend(loc="best")
    # Select n_features:
    sfs = SequentialFeatureSelector(estimator=model, n_features_to_select=26, cv=myCViterator, direction='forward', n_jobs=6)
    sfs.fit(predictors_train, labels_train)
    data_reduced_train = train_prepared[ np.append(np.array(predictors_train.columns[sfs.support_]),['log_visit_rate']) ]
    data_reduced_test  = test_prepared[ np.append(np.array(predictors_test.columns[sfs.support_]),['log_visit_rate']) ]
    data_reduced_train.to_csv('C:/Users/angel/git/Observ_models/data/ML/Regression/train/data_reduced_15.csv', index=False)
    data_reduced_test.to_csv('C:/Users/angel/git/Observ_models/data/ML/Regression/test/data_reduced_15.csv', index=False)

    # #######################################
    # # Permutation importance
    # #######################################
    # model = SVR(C=2.62, epsilon=0.05, gamma=0.21) #{'C': 2.6180503547870377, 'coef0': -0.5901821308327051, 'epsilon': 0.045644987037295054, 'gamma': 0.2112333725279757, 'kernel': 'rbf'}
    # model.fit(predictors_train, labels_train)
    # perm_importance = permutation_importance(model, predictors_train, labels_train, random_state=135, n_jobs=6)
    # feature_names = predictors_train.columns
    # feature_importance = pd.DataFrame(sorted(zip(perm_importance.importances_mean, feature_names), reverse=True))
    # pyplot.barh(feature_importance.loc[:,1], feature_importance.loc[:,0])
Ejemplo n.º 26
0
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV, LeaveOneOut
from sklearn.pipeline import Pipeline

my_url = ('https://raw.githubusercontent.com/taroyabuki'
          '/fromzero/master/data/wine.csv')
my_data = pd.read_csv(my_url)

n = len(my_data)
my_data2 = my_data.assign(v1=[i % 2 for i in range(n)],
                          v2=[i % 3 for i in range(n)])
X, y = my_data2.drop(columns=['LPRICE2']), my_data2['LPRICE2']

my_sfs = SequentialFeatureSelector(
    estimator=LinearRegression(),
    direction='forward',  # 変数増加法
    cv=LeaveOneOut(),
    scoring='neg_mean_squared_error')

my_pipeline = Pipeline([  # 変数選択の後で再訓練を行うようにする.
    ('sfs', my_sfs),  # 変数選択
    ('lr', LinearRegression())
])  # 回帰分析

my_params = {'sfs__n_features_to_select': range(1, 6)}  # 選択する変数の上限
my_search = GridSearchCV(estimator=my_pipeline,
                         param_grid=my_params,
                         cv=LeaveOneOut(),
                         scoring='neg_mean_squared_error',
                         n_jobs=-1).fit(X, y)
my_model = my_search.best_estimator_  # 最良のパラメータで再訓練したモデル
Ejemplo n.º 27
0
def evaluate_model_sfs(model, predictors, labels, direction='backward', n_features=50, n_jobs=-1, cv=5):
    sfs = SequentialFeatureSelector(estimator=model, n_features_to_select=n_features, cv=cv, direction=direction, n_jobs=n_jobs)
    sfs.fit(predictors, labels)
    predictors_reduced = predictors[ predictors.columns[sfs.support_] ]
    return cross_validate(model, predictors_reduced, labels, scoring="neg_mean_absolute_error", cv=cv, n_jobs=n_jobs, return_train_score=True)
Ejemplo n.º 28
0

from sklearn.feature_selection import SequentialFeatureSelector


# In[25]:


lsvc = LinearSVC(C=0.01, penalty="l1", dual=False)
tree_clf = ExtraTreesClassifier(n_estimators=70)


# In[26]:


sfs = SequentialFeatureSelector(lsvc, n_features_to_select=9)
sfs_back = SequentialFeatureSelector(lsvc, n_features_to_select=9, direction='backward')

Sequential_SVC_dataset = sfs.fit_transform(X, y)
Sequential_SVC_dataset = pd.DataFrame(Sequential_SVC_dataset)
Sequential_SVC_dataset.name = 'Sequential_SVC_dataset'


datasets.append(Sequential_SVC_dataset)

Sequential_back_SVC_dataset = sfs_back.fit_transform(X, y) 
Sequential_back_SVC_dataset = pd.DataFrame(Sequential_back_SVC_dataset)
Sequential_back_SVC_dataset.name = 'Sequential_back_SVC_dataset'


datasets.append(Sequential_back_SVC_dataset)
# :class:`~sklearn.feature_selection.SequentialFeatureSelector`
# (SFS). SFS is a greedy procedure where, at each iteration, we choose the best
# new feature to add to our selected features based a cross-validation score.
# That is, we start with 0 features and choose the best single feature with the
# highest score. The procedure is repeated until we reach the desired number of
# selected features.
#
# We can also go in the reverse direction (backward SFS), *i.e.* start with all
# the features and greedily choose features to remove one by one. We illustrate
# both approaches here.

from sklearn.feature_selection import SequentialFeatureSelector

tic_fwd = time()
sfs_forward = SequentialFeatureSelector(lasso,
                                        n_features_to_select=2,
                                        direction='forward').fit(X, y)
toc_fwd = time()

tic_bwd = time()
sfs_backward = SequentialFeatureSelector(lasso,
                                         n_features_to_select=2,
                                         direction='backward').fit(X, y)
toc_bwd = time()

print("Features selected by forward sequential selection: "
      f"{feature_names[sfs_forward.get_support()]}")
print(f"Done in {toc_fwd - tic_fwd:.3f}s")
print("Features selected by backward sequential selection: "
      f"{feature_names[sfs_backward.get_support()]}")
print(f"Done in {toc_bwd - tic_bwd:.3f}s")
# -----------------------------------------
# A new iterative transformer to select features is available:
# :class:`~sklearn.feature_selection.SequentialFeatureSelector`.
# Sequential Feature Selection can add features one at a time (forward
# selection) or remove features from the list of the available features
# (backward selection), based on a cross-validated score maximization.
# See the :ref:`User Guide <sequential_feature_selection>`.

from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris

X, y = load_iris(return_X_y=True, as_frame=True)
feature_names = X.columns
knn = KNeighborsClassifier(n_neighbors=3)
sfs = SequentialFeatureSelector(knn, n_features_to_select=2)
sfs.fit(X, y)
print("Features selected by forward sequential selection: "
      f"{feature_names[sfs.get_support().tolist()]}")

##############################################################################
# New PolynomialCountSketch kernel approximation function
# -------------------------------------------------------
# The new :class:`~sklearn.kernel_approximation.PolynomialCountSketch`
# approximates a polynomial expansion of a feature space when used with linear
# models, but uses much less memory than
# :class:`~sklearn.preprocessing.PolynomialFeatures`.

from sklearn.datasets import fetch_covtype
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split