def rfecv_credit_example(image="rfecv_credit.png"):
    X, y = load_credit()

    _, ax = plt.subplots()
    cv = StratifiedKFold(5)
    oz = RFECV(RandomForestClassifier(), ax=ax, cv=cv, scoring="f1_weighted")
    oz.fit(X, y)
    oz.show(outpath=os.path.join(IMAGES, image))
def rfecv_sklearn_example(image="rfecv_sklearn_example.png"):
    X, y = make_classification(
        n_samples=1000,
        n_features=25,
        n_informative=3,
        n_redundant=2,
        n_repeated=0,
        n_classes=8,
        n_clusters_per_class=1,
        random_state=0,
    )

    _, ax = plt.subplots()

    oz = RFECV(SVC(kernel="linear", C=1), ax=ax)
    oz.fit(X, y)
    oz.show(outpath=os.path.join(IMAGES, image))
Esempio n. 3
0
# In[24]:


#instancia a classe
reg = LinearRegression()


# In[25]:


#Ranking das features com RFE com cross validation
#O ponto tracejado representa o score maximo com 27 features
rfecv = RFECV(reg,step=1, cv=3)
rfecv.fit(X,y)
rfecv.show()


# In[26]:


#lista de features utilizadas pelo modelo
list(zip(X.columns, rfecv.support_))


# In[27]:


#seleciona 5 features para o modelo utilizando somente RFE
rfe = RFE(reg, n_features_to_select=5, step=1 )
rfe = rfe.fit(X,y)
Esempio n. 4
0
    "auc_roc_dtree": 9,
    "auc_roc_bernoulliNB": 10,
    "auc_roc_LDA": 11,
    "auc_roc_gaussianNB": 12
}

df_results = df_results.replace({"Best model ROC": model_dict})
y = df_results["Best model ROC"]

cv = StratifiedKFold(2)
visualizer = RFECV(RandomForestClassifier(n_estimators=10),
                   cv=cv,
                   scoring='accuracy')

visualizer.fit(X, y)  # Fit the data to the visualizer
visualizer.show()

print("Optimal number of features : %d" % visualizer.n_features_)
print(visualizer.ranking_)
print(visualizer.estimator_.feature_importances_)

index_list = []
for index, value in enumerate(visualizer.ranking_):
    if value == 1:
        index_list.append(index)
    else:
        pass

selected_X = df_results.iloc[:, index_list]

with open('RFE_X_AUCROC_202.pickle', 'wb') as handle:
Esempio n. 5
0

# def check_model(c, n, X, y, X_test, y_test, class_names, outdir):
#     model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c)
#     rfe = RFE(model, n_features_to_select=n)

#     fit = rfe.fit(X,y)
#     y_predict = fit.predict(X_test)
#     predict_df = pd.DataFrame(y_predict.tolist())
#     predict_df.to_csv(outdir + '/predict_label.csv', sep='\t', index=False)



# test = [[1e-3,12],[1e-6,7],[1.0,32],[1e-6,27]]
# for c,n in test:
#     print(str(c)+ '-' +str(n))
#     this_out = 'figures/eva/c' + str(c) + '_n' + str(n)
#     check_model(c=c, n=n, outdir=this_out, X=X, y=y, X_test=X_test, y_test=y_test,
#               class_names=class_names)


## plot RFECV for LinearSVC
for c in [1e-6, 1e-3, 1]:
    model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c)
    viz = RFECV(model, scoring='f1_weighted')
    viz.fit(X, y)
    viz.show(outpath='figures/linear_svc_rfecv.pdf')