Esempio n. 1
0
def test_rfe_cv_n_jobs():
    generator = check_random_state(0)
    iris = load_iris()
    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
    y = iris.target

    rfecv = RFECV(estimator=SVC(kernel="linear"))
    rfecv.fit(X, y)
    rfecv_ranking = rfecv.ranking_

    # TODO: Remove in v1.2 when grid_scores_ is removed
    msg = (r"The `grid_scores_` attribute is deprecated in version 1\.0 in "
           r"favor of `cv_results_` and will be removed in version 1\.2.")
    with pytest.warns(FutureWarning, match=msg):
        rfecv_grid_scores = rfecv.grid_scores_

    rfecv_cv_results_ = rfecv.cv_results_

    rfecv.set_params(n_jobs=2)
    rfecv.fit(X, y)
    assert_array_almost_equal(rfecv.ranking_, rfecv_ranking)

    # TODO: Remove in v1.2 when grid_scores_ is removed
    with pytest.warns(FutureWarning, match=msg):
        assert_array_almost_equal(rfecv.grid_scores_, rfecv_grid_scores)

    assert rfecv_cv_results_.keys() == rfecv.cv_results_.keys()
    for key in rfecv_cv_results_.keys():
        assert rfecv_cv_results_[key] == pytest.approx(rfecv.cv_results_[key])
Esempio n. 2
0
def test_rfe_cv_n_jobs():
    generator = check_random_state(0)
    iris = load_iris()
    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
    y = iris.target

    rfecv = RFECV(estimator=SVC(kernel='linear'))
    rfecv.fit(X, y)
    rfecv_ranking = rfecv.ranking_
    rfecv_grid_scores = rfecv.grid_scores_

    rfecv.set_params(n_jobs=2)
    rfecv.fit(X, y)
    assert_array_almost_equal(rfecv.ranking_, rfecv_ranking)
    assert_array_almost_equal(rfecv.grid_scores_, rfecv_grid_scores)
#inspecting
#support = array of true and false
support = model.support_
# ranking = features chosen = 1
ranking = model.ranking_
#cross validation scores, one scores for each feature
grid_scores = model.grid_scores_
#number of selected features
selected_features = model.n_features_
# same as support, mask of selected features
model.get_support()

#information about the model
model.get_params()
model.set_params()
model.estimator_.coef_

# Plot number of features VS. cross-validation scores
plt.figure()
plt.xlabel("Number of features selected")
plt.ylabel("Cross validation score (nb of correct classifications)")
plt.plot(range(1, len(model.grid_scores_) + 1), model.grid_scores_)
plt.show()

new_data = X.ix[:, [
    1, 2, 3, 4, 7, 8, 9, 10, 12, 13, 15, 17, 18, 19, 21, 26, 27, 28, 29, 32,
    33, 34
]]
new_data['labels'] = df['label']
new_data.to_csv('selected_features_22.csv', index=None, header=True)