def LogisticGridSearch_OLD(): # C=1 is best cs = 10.0**np.arange(-1,2,0.25) aucs = [] for c in cs: clf = LogisticRegression(penalty='l1',C=c).fit(f_train, y_train) probs = clf.predict_proba(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1]) roc_auc = auc(fpr,tpr) cstr = '%0.2e'%c myplt = st.plotROC(fpr,tpr,roc_auc, figure=False, show=False, returnplt=True, showlegend=False, title='Grid Search - Logistic Regression ROC Curve') aucs.append(roc_auc) best = 0 for i in range(len(cs)): if aucs[i] > aucs[best]: best = i c = cs[best] clf = LogisticRegression(penalty='l1',C=c).fit(f_train, y_train) probs = clf.predict_proba(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1]) myplt = st.plotROC(fpr,tpr,roc_auc, legendlabel='Best C = %0.2e' % c, figure=False, show=False, returnplt=True, showlegend=True, title='Grid Search - Logistic Regression ROC Curve') myplt.show() return clf
def MultinomialNaiveBayesGridSearch_OLD(): # C=1 is best cs = 10.0**np.arange(-9,2,0.5) aucs = [] for c in cs: clf = MultinomialNB(alpha=c).fit(f_train, y_train) probs = clf.predict_proba(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1]) roc_auc = auc(fpr,tpr) cstr = '%0.2e'%c myplt = st.plotROC(fpr,tpr,roc_auc, figure=False, show=False, returnplt=True, showlegend=False, title='Grid Search - Multinomial Naive Bayes ROC Curve') aucs.append(roc_auc) best = 0 for i in range(len(cs)): if aucs[i] > aucs[best]: best = i c = cs[best] clf = MultinomialNB(alpha=c).fit(f_train, y_train) probs = clf.predict_proba(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1]) myplt = st.plotROC(fpr,tpr,roc_auc, legendlabel='Best alpha = %0.2e' % c, figure=False, show=False, returnplt=True, showlegend=True, title='Grid Search - Multinomial Naive Bayes ROC Curve') myplt.show() return clf
def GridSearch(data,params,classifier,classifier_name,paramname,probstype=1,clf_kwargs={}): f_train,f_test,y_train,y_test = data # C=1 is best def getROC(clf,probstype): if probstype == 1: probs = clf.predict_proba(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1]) else: probs = clf.decision_function(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs) return fpr,tpr aucs = [] mykwargs = clf_kwargs.copy() for c in params: mykwargs[paramname] = c clf = classifier(**mykwargs).fit(f_train, y_train) fpr,tpr = getROC(clf,probstype) roc_auc = auc(fpr,tpr) #cstr = '%0.2e'%c myplt = st.plotROC(fpr,tpr,roc_auc, figure=False, show=False, returnplt=True, showlegend=False, title='Grid Search: '+classifier_name+' ROC Curve') aucs.append(roc_auc) best = 0 for i in range(len(params)): if aucs[i] > aucs[best]: best = i c = params[best] mykwargs[paramname] = c clf = classifier(**mykwargs).fit(f_train, y_train) fpr,tpr = getROC(clf,probstype) myplt = st.plotROC(fpr,tpr,roc_auc, legendlabel='Best '+paramname+' = %0.2e' % c, figure=False, show=False, returnplt=True, showlegend=True, title='Grid Search: '+classifier_name+' ROC Curve') myplt.show() maxAUC = aucs[best] cs = params optC = params[best] plt.figure() maxauclabel = ("Max AUC = %0.2f, " %maxAUC)+paramname+(" =%s" %optC) plt.semilogx(cs,np.ones(len(cs))*maxAUC,'r',label=maxauclabel,linewidth=2,zorder=10) plt.semilogx(cs,aucs,zorder=1) plt.title('Grid Search: '+classifier_name+'AUC Scores') plt.xlabel(paramname) plt.ylabel('AUC Score') plt.legend(loc="lower right") #plt.legend(loc='lower left', bbox_to_anchor=(1, 0), # ncol=1, fancybox=True, shadow=False) plt.show() return clf
def SGDGridSearch_OLD(): # C=1 is best cs = 10.0**np.arange(-9,9,1) aucs = [] for c in cs: clf = SGDClassifier(penalty='l1',alpha=c).fit(f_train, y_train) probs = clf.decision_function(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs) roc_auc = auc(fpr,tpr) cstr = '%0.2e'%c myplt = st.plotROC(fpr,tpr,roc_auc, figure=False, show=False, returnplt=True, showlegend=False, title='Grid Search - SGD Classifier ROC Curve') aucs.append(roc_auc) best = 0 for i in range(len(cs)): if aucs[i] > aucs[best]: best = i c = cs[best] clf = SGDClassifier(penalty='l1',alpha=c).fit(f_train, y_train) probs = clf.decision_function(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs) myplt = st.plotROC(fpr,tpr,roc_auc, legendlabel='Best C = %0.2e' % c, figure=False, show=False, returnplt=True, showlegend=True, title='Grid Search - SGD Classifier ROC Curve') myplt.show() return clf, aucs
sel_bool_test = train == 0 sel_ind_train = np.where(sel_bool_train)[0] sel_ind_test = np.where(sel_bool_test)[0] f_train = features[sel_ind_train] f_test = features[sel_ind_test] # N approved = 1-rejected y_train = np.array(approved[sel_bool_train]).astype(int) y_test = np.array(approved[sel_bool_test]).astype(int) return f_train,f_test,y_train,y_test # CLASSIFIERS ''' clf1 = MultinomialNB().fit(f_train, y_train) probs = clf1.predict_proba(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1]) roc_auc = auc(fpr,tpr) st.plotROC(fpr,tpr,roc_auc,"MultinomialNB") clf2 = LogisticRegression(penalty='l1').fit(f_train, y_train) probs = clf2.predict_proba(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1]) roc_auc = auc(fpr,tpr) st.plotROC(fpr,tpr,roc_auc,"LogReg") clf3 = SGDClassifier(penalty='l1').fit(f_train, y_train)