def test_rfe_cv_n_jobs(): generator = check_random_state(0) iris = load_iris() X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))] y = iris.target rfecv = RFECV(estimator=SVC(kernel="linear")) rfecv.fit(X, y) rfecv_ranking = rfecv.ranking_ # TODO: Remove in v1.2 when grid_scores_ is removed msg = (r"The `grid_scores_` attribute is deprecated in version 1\.0 in " r"favor of `cv_results_` and will be removed in version 1\.2.") with pytest.warns(FutureWarning, match=msg): rfecv_grid_scores = rfecv.grid_scores_ rfecv_cv_results_ = rfecv.cv_results_ rfecv.set_params(n_jobs=2) rfecv.fit(X, y) assert_array_almost_equal(rfecv.ranking_, rfecv_ranking) # TODO: Remove in v1.2 when grid_scores_ is removed with pytest.warns(FutureWarning, match=msg): assert_array_almost_equal(rfecv.grid_scores_, rfecv_grid_scores) assert rfecv_cv_results_.keys() == rfecv.cv_results_.keys() for key in rfecv_cv_results_.keys(): assert rfecv_cv_results_[key] == pytest.approx(rfecv.cv_results_[key])
def test_rfe_cv_n_jobs(): generator = check_random_state(0) iris = load_iris() X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))] y = iris.target rfecv = RFECV(estimator=SVC(kernel='linear')) rfecv.fit(X, y) rfecv_ranking = rfecv.ranking_ rfecv_grid_scores = rfecv.grid_scores_ rfecv.set_params(n_jobs=2) rfecv.fit(X, y) assert_array_almost_equal(rfecv.ranking_, rfecv_ranking) assert_array_almost_equal(rfecv.grid_scores_, rfecv_grid_scores)
#inspecting #support = array of true and false support = model.support_ # ranking = features chosen = 1 ranking = model.ranking_ #cross validation scores, one scores for each feature grid_scores = model.grid_scores_ #number of selected features selected_features = model.n_features_ # same as support, mask of selected features model.get_support() #information about the model model.get_params() model.set_params() model.estimator_.coef_ # Plot number of features VS. cross-validation scores plt.figure() plt.xlabel("Number of features selected") plt.ylabel("Cross validation score (nb of correct classifications)") plt.plot(range(1, len(model.grid_scores_) + 1), model.grid_scores_) plt.show() new_data = X.ix[:, [ 1, 2, 3, 4, 7, 8, 9, 10, 12, 13, 15, 17, 18, 19, 21, 26, 27, 28, 29, 32, 33, 34 ]] new_data['labels'] = df['label'] new_data.to_csv('selected_features_22.csv', index=None, header=True)