def plot_total_correct_cumul(X_total_correct,ctrl): X_ctrl = X_total_correct[ctrl == True,0] X_pd = X_total_correct[ctrl == False,0] plt.plot(sorted(X_ctrl),np.linspace(0,1,len(X_ctrl)),'k',label='Control') plt.plot(sorted(X_pd),np.linspace(0,1,len(X_pd)),'r',label='PD') plt.xlabel('Total Correct') plt.ylabel('Cumulative Probability') plt.legend(loc=2)
def plot_cumul(X,Y,label): X_pos = X[Y == True] X_neg = X[Y == False] plt.plot(sorted(X_neg),np.linspace(0,1,len(X_neg)),'k',label='-') plt.plot(sorted(X_pos),np.linspace(0,1,len(X_pos)),'r',label='+') plt.xlabel(label) plt.ylabel('Cumulative Probability') plt.ylim(0,1) plt.legend(loc=2)
def plot_roc_curve(Y,p_parks_tc,p_parks_r): fpr_tc,tpr_tc,roc_auc_tc = get_roc_curve(Y,p_parks_tc) fpr_r_mnb,tpr_r_mnb,roc_auc_r_mnb = get_roc_curve(Y,p_parks_r) plt.plot(fpr_tc, tpr_tc, lw=2, color='gray', label='AUC using Total Correct = %0.2f' % (roc_auc_tc)) #plot(fpr_r_bnb, tpr_r_bnb, lw=2, color='r', label='Responses area = %0.2f' % (roc_auc_r_bnb)) plt.plot(fpr_r_mnb, tpr_r_mnb, lw=2, color='g', label='AUC using individual responses = %0.2f' % (roc_auc_r_mnb)) plt.xlabel('False Positive Rate')#, fontsize='large', fontweight='bold') plt.ylabel('True Positive Rate')#, fontsize='large', fontweight='bold') plt.title('ROC curves')#, fontsize='large', fontweight='bold') plt.xticks()#fontsize='large', fontweight='bold') plt.yticks()#fontsize='large', fontweight='bold') plt.legend(loc="lower right")
def roc_showdown(p_x,p_y,o_x,o_y,x_diag,y_diag,title='AUC',color='black'): from sklearn.metrics import roc_curve,auc p = p_x - p_y o = o_x - o_y p = p[np.abs(o)==1] # Only cases where x or y equals 1, but not both. o = o[np.abs(o)==1] o = o==1 fpr,tpr,_ = roc_curve(o, p) plt.plot(fpr,1-tpr,label="%s = %.3f" % (title,auc(fpr,tpr)),c=color) x_diag = x_diag.replace('Clinpath ','').replace('Nos','NOS') y_diag = y_diag.replace('Clinpath ','').replace('Nos','NOS') plt.xlabel('False %s rate' % x_diag)#'Fraction %s misdiagnosed as %s' % (y_diag,x_diag)) plt.ylabel('False %s rate' % y_diag)#'Fraction %s misdiagnosed as %s' % (x_diag,y_diag))
def plot_roc_curve(Y,n0=None,n1=None,smooth=False,no_plot=False,**ps): aucs = [] aucs_sd = [] if n0 is None: n0 = sum(Y==0) if n1 is None: n1 = sum(Y>0) for i,(title,p) in enumerate(sorted(ps.items())): fpr,tpr,auc = get_roc_curve(Y,p,smooth=smooth) aucs.append(auc) # Confidence Intervals for the Area under the ROC Curve # Cortes and Mohri # http://www.cs.nyu.edu/~mohri/pub/area.pdf m = n1 n = n0 A = auc Pxxy = 0 Pxyy = 0 iters = 10000 for j in range(iters): index = np.arange(len(Y)) np.random.shuffle(index) p_shuff = p[index] Y_shuff = Y[index] pa,pb = p_shuff[Y_shuff>0][0:2] na,nb = p_shuff[Y_shuff==0][0:2] Pxxy += ((pa>na) and (pb>na)) Pxyy += ((na<pa) and (nb<pa)) Pxxy/=iters Pxyy/=iters #print(A,Pxxy,Pxyy,m,n) var = (A*(1-A)+(m-1)*(Pxxy-(A**2))+(n-1)*(Pxyy-(A**2)))/(m*n) sd = np.sqrt(var) aucs_sd.append(sd) if not no_plot: plt.plot(fpr, tpr, lw=2, color=get_colors(i), label='%s = %0.2f' % (title,auc)) else: print('%s = %0.3f +/- %0.3f' % (title,auc,sd)) if not no_plot: plt.xlabel('False Positive Rate')#, fontsize='large', fontweight='bold') plt.ylabel('True Positive Rate')#, fontsize='large', fontweight='bold') plt.title('ROC curves')#, fontsize='large', fontweight='bold') plt.xticks()#fontsize='large', fontweight='bold') plt.yticks()#fontsize='large', fontweight='bold') plt.xlim(-0.01,1.01) plt.ylim(-0.01,1.01) plt.legend(loc="lower right",fontsize=17) return aucs,aucs_sd
def factor_analysis(tests): from sklearn.decomposition import FactorAnalysis from sklearn.cross_validation import cross_val_score matrix = correct_matrix(tests,kind='ctrl') print(matrix.shape) # matrix must have a number of rows divisible by 3. # if it does not, eliminate some rows, or pass cv=a to cross_val_score, # where 'a' is a number by which the number of rows is divisible. fa = FactorAnalysis() fa_scores = [] n_components = np.arange(1,41) for n in n_components: fa.n_components = n fa_scores.append(np.mean(cross_val_score(fa, matrix))) plt.plot(n_components,fa_scores) return n_components,fa_scores
prob_correct = logit(r_subject + r_q) likelihood = bernoulli.pmf(is_correct,prob_correct) log_l += np.log(likelihood) num_not_held_out = len(np.where(holdout==0)[0]) print "Log-likelihood per sample in sample is %.2f" % (log_l/num_not_held_out) log_ls_in.append(log_l/num_not_held_out) log_l = 0 for subject,question in zip(*np.where(holdout!=0)): r_subject = means['r_subject'][subject] if subject < len(ctrl): r_q = means['r_q'][question] if subject >= len(ctrl): r_q = means['r_q_pd'][question] is_correct = correct[subject][question] prob_correct = logit(r_subject + r_q) likelihood = bernoulli.pmf(is_correct,prob_correct) log_l += np.log(likelihood) num_held_out = len(np.where(holdout!=0)[0]) print "Log-likelihood per sample out of sample is %.2f" % (log_l/num_held_out) log_ls_out.append(log_l/num_held_out) cv_scores.append((log_ls_in,log_ls_out)) print betas print cv_scores plt.plot(betas,[np.mean(x[1]) for x in cv_scores]) plt.show()