Example #1
0
def plot_total_correct_cumul(X_total_correct,ctrl):
    X_ctrl = X_total_correct[ctrl == True,0]
    X_pd = X_total_correct[ctrl == False,0]
    plt.plot(sorted(X_ctrl),np.linspace(0,1,len(X_ctrl)),'k',label='Control')
    plt.plot(sorted(X_pd),np.linspace(0,1,len(X_pd)),'r',label='PD')
    plt.xlabel('Total Correct')
    plt.ylabel('Cumulative Probability')
    plt.legend(loc=2)
Example #2
0
def plot_cumul(X,Y,label):
    X_pos = X[Y == True]
    X_neg = X[Y == False]
    plt.plot(sorted(X_neg),np.linspace(0,1,len(X_neg)),'k',label='-')
    plt.plot(sorted(X_pos),np.linspace(0,1,len(X_pos)),'r',label='+')
    plt.xlabel(label)
    plt.ylabel('Cumulative Probability')
    plt.ylim(0,1)
    plt.legend(loc=2)
Example #3
0
def plot_roc_curve(Y,p_parks_tc,p_parks_r):
    fpr_tc,tpr_tc,roc_auc_tc = get_roc_curve(Y,p_parks_tc)
    fpr_r_mnb,tpr_r_mnb,roc_auc_r_mnb = get_roc_curve(Y,p_parks_r)
    plt.plot(fpr_tc, tpr_tc, lw=2, color='gray', label='AUC using Total Correct = %0.2f' % (roc_auc_tc))
    #plot(fpr_r_bnb, tpr_r_bnb, lw=2, color='r', label='Responses area = %0.2f' % (roc_auc_r_bnb))
    plt.plot(fpr_r_mnb, tpr_r_mnb, lw=2, color='g', label='AUC using individual responses = %0.2f' % (roc_auc_r_mnb))
    plt.xlabel('False Positive Rate')#, fontsize='large', fontweight='bold')
    plt.ylabel('True Positive Rate')#, fontsize='large', fontweight='bold')
    plt.title('ROC curves')#, fontsize='large', fontweight='bold')
    plt.xticks()#fontsize='large', fontweight='bold')
    plt.yticks()#fontsize='large', fontweight='bold')
    plt.legend(loc="lower right")
Example #4
0
def roc_showdown(p_x,p_y,o_x,o_y,x_diag,y_diag,title='AUC',color='black'):
    from sklearn.metrics import roc_curve,auc
    p = p_x - p_y
    o = o_x - o_y
    p = p[np.abs(o)==1] # Only cases where x or y equals 1, but not both.  
    o = o[np.abs(o)==1]
    o = o==1
    fpr,tpr,_ = roc_curve(o, p)
    plt.plot(fpr,1-tpr,label="%s = %.3f" % (title,auc(fpr,tpr)),c=color)
    x_diag = x_diag.replace('Clinpath ','').replace('Nos','NOS')
    y_diag = y_diag.replace('Clinpath ','').replace('Nos','NOS')
    plt.xlabel('False %s rate' % x_diag)#'Fraction %s misdiagnosed as %s' % (y_diag,x_diag))
    plt.ylabel('False %s rate' % y_diag)#'Fraction %s misdiagnosed as %s' % (x_diag,y_diag))
Example #5
0
def plot_roc_curve(Y,n0=None,n1=None,smooth=False,no_plot=False,**ps):
    aucs = []
    aucs_sd = []
    if n0 is None:
        n0 = sum(Y==0)
    if n1 is None:
        n1 = sum(Y>0)
    for i,(title,p) in enumerate(sorted(ps.items())):
        fpr,tpr,auc = get_roc_curve(Y,p,smooth=smooth)
        aucs.append(auc)
        # Confidence Intervals for the Area under the ROC Curve
        # Cortes and Mohri
        # http://www.cs.nyu.edu/~mohri/pub/area.pdf
        m = n1
        n = n0
        A = auc
        Pxxy = 0
        Pxyy = 0
        iters = 10000
        for j in range(iters):
            index = np.arange(len(Y))
            np.random.shuffle(index)
            p_shuff = p[index]
            Y_shuff = Y[index]
            pa,pb = p_shuff[Y_shuff>0][0:2]
            na,nb = p_shuff[Y_shuff==0][0:2]
            Pxxy += ((pa>na) and (pb>na))
            Pxyy += ((na<pa) and (nb<pa))
        Pxxy/=iters
        Pxyy/=iters
        #print(A,Pxxy,Pxyy,m,n)
        var = (A*(1-A)+(m-1)*(Pxxy-(A**2))+(n-1)*(Pxyy-(A**2)))/(m*n)
        sd = np.sqrt(var)
        aucs_sd.append(sd)
        if not no_plot:
            plt.plot(fpr, tpr, lw=2, color=get_colors(i), label='%s = %0.2f' % (title,auc))
        else:
            print('%s = %0.3f +/- %0.3f' % (title,auc,sd))
    if not no_plot:
        plt.xlabel('False Positive Rate')#, fontsize='large', fontweight='bold')
        plt.ylabel('True Positive Rate')#, fontsize='large', fontweight='bold')
        plt.title('ROC curves')#, fontsize='large', fontweight='bold')
        plt.xticks()#fontsize='large', fontweight='bold')
        plt.yticks()#fontsize='large', fontweight='bold')
        plt.xlim(-0.01,1.01)
        plt.ylim(-0.01,1.01)
        plt.legend(loc="lower right",fontsize=17)
    return aucs,aucs_sd
Example #6
0
def factor_analysis(tests):
	from sklearn.decomposition import FactorAnalysis
	from sklearn.cross_validation import cross_val_score
	
	matrix = correct_matrix(tests,kind='ctrl')
	print(matrix.shape)
	# matrix must have a number of rows divisible by 3.  
	# if it does not, eliminate some rows, or pass cv=a to cross_val_score,
	# where 'a' is a number by which the number of rows is divisible.  
	fa = FactorAnalysis()
	fa_scores = []
	n_components = np.arange(1,41)
	for n in n_components:
		fa.n_components = n
		fa_scores.append(np.mean(cross_val_score(fa, matrix)))

	plt.plot(n_components,fa_scores)
	
	return n_components,fa_scores
Example #7
0
            prob_correct = logit(r_subject + r_q)
            likelihood = bernoulli.pmf(is_correct,prob_correct)
            log_l += np.log(likelihood)
        num_not_held_out = len(np.where(holdout==0)[0])
        print "Log-likelihood per sample in sample is %.2f" % (log_l/num_not_held_out)
        log_ls_in.append(log_l/num_not_held_out)

        log_l = 0
        for subject,question in zip(*np.where(holdout!=0)):
            r_subject = means['r_subject'][subject]
            if subject < len(ctrl):
                r_q = means['r_q'][question]
            if subject >= len(ctrl):
                r_q = means['r_q_pd'][question]
            is_correct = correct[subject][question]
            prob_correct = logit(r_subject + r_q)
            likelihood = bernoulli.pmf(is_correct,prob_correct)
            log_l += np.log(likelihood)
        num_held_out = len(np.where(holdout!=0)[0])
        print "Log-likelihood per sample out of sample is %.2f" % (log_l/num_held_out)
        log_ls_out.append(log_l/num_held_out)

    cv_scores.append((log_ls_in,log_ls_out))

print betas
print cv_scores

plt.plot(betas,[np.mean(x[1]) for x in cv_scores])
plt.show()