예제 #1
0
completeness, contamination = completeness_contamination(predictions, y_test)

print "completeness", completeness
print "contamination", contamination

#------------------------------------------------------------
# Compute the decision boundary
clf = classifiers[1]
xlim = (0.7, 1.35)
ylim = (-0.15, 0.4)

xx, yy = np.meshgrid(np.linspace(xlim[0], xlim[1], 71),
                     np.linspace(ylim[0], ylim[1], 81))

Z = clf.predict_proba(np.c_[yy.ravel(), xx.ravel()])
Z = Z[:, 1].reshape(xx.shape)

#----------------------------------------------------------------------
# plot the results
fig = plt.figure(figsize=(8, 4))
fig.subplots_adjust(bottom=0.15, top=0.95, hspace=0.0,
                    left=0.1, right=0.95, wspace=0.2)

# left plot: data and decision boundary
ax = fig.add_subplot(121)
im = ax.scatter(X[-N_plot:, 1], X[-N_plot:, 0], c=y[-N_plot:],
                s=4, lw=0, cmap=plt.cm.binary, zorder=2)
im.set_clim(-0.5, 1)

im = ax.imshow(Z, origin='lower', aspect='auto',
예제 #2
0
sensitivity = truepos_n / int(cancer_pt.size / cancer_pt.ndim)

# In[51]:

#Generate grids for the entire plot
if inRedox:
    xx, yy = np.meshgrid(np.linspace(0, xaxis_range, grid_resol),
                         np.linspace(0, yaxis_range_rdx, grid_resol))
else:
    xx, yy = np.meshgrid(np.linspace(0, xaxis_range, grid_resol),
                         np.linspace(0, yaxis_range, grid_resol))

plot_grid = np.c_[xx.ravel(), yy.ravel()]

#Calculate the prediction probability for each point on the grid
grid_z = clf.predict_proba(plot_grid)[:, 1].reshape(xx.shape)

# In[99]:

xx

# In[95]:

plt.figure()
plt.contour(xx, yy, grid_z, [0.5], linewidths=2., colors='k')

plt.scatter(X,
            Y,
            c='r',
            marker='^',
            label='Cancer (N =' + str(cancer_pt.size / 2) + ')')
예제 #3
0
    completeness, contamination = completeness_contamination(
        predictions, y_test)

    print "completeness", completeness
    print "contamination", contamination

    #------------------------------------------------------------
    # Compute the decision boundary
    clf = classifiers[1]
    xlim = (0.7, 1.35)
    ylim = (-0.15, 0.4)

    xx, yy = np.meshgrid(np.linspace(xlim[0], xlim[1], 71),
                         np.linspace(ylim[0], ylim[1], 81))

    Z = clf.predict_proba(np.c_[yy.ravel(), xx.ravel()])
    Z = Z[:, 1].reshape(xx.shape)

    #----------------------------------------------------------------------
    # plot the results
    fig = plt.figure(figsize=(5, 2.5))
    fig.subplots_adjust(bottom=0.15,
                        top=0.95,
                        hspace=0.0,
                        left=0.1,
                        right=0.95,
                        wspace=0.2)

    # left plot: data and decision boundary
    ax = fig.add_subplot(121)
    im = ax.scatter(X[-N_plot:, 1],
예제 #4
0
def crossValidate(itr):
    norTrainNum, nor_isTraining = randTestData(t_data_perc, norDataNum)
    cnTrainNum, cn_isTraining = randTestData(t_data_perc, cnDataNum)
    isTraining = np.hstack((nor_isTraining, cn_isTraining))

    # Training
    clf = QDA()
    trained_clf = clf.fit(train_data[isTraining], labels[isTraining])

    # Using the remaining data for testing
    normal_pred = trained_clf.predict(normal_pt[nor_isTraining == False])
    trueneg_n = (normal_pred == 0).sum()
    specificity = trueneg_n / int(norDataNum - norTrainNum)

    cancer_pred = trained_clf.predict(cancer_pt[cn_isTraining == False])
    truepos_n = (cancer_pred == 1).sum()
    sensitivity = truepos_n / int(cnDataNum - cnTrainNum)

    # Generate grids for the entire plot
    if inRedox:
        xx, yy = np.meshgrid(np.linspace(0, xaxis_range, grid_resol), np.linspace(0, yaxis_range_rdx, grid_resol))
    else:
        xx, yy = np.meshgrid(np.linspace(0, xaxis_range, grid_resol), np.linspace(0, yaxis_range, grid_resol))

    plot_grid = np.c_[xx.ravel(), yy.ravel()]

    # Calculate the prediction probability for each point on the grid
    grid_z = clf.predict_proba(plot_grid)[:, 1].reshape(xx.shape)

    plt.figure()
    plt.contour(xx, yy, grid_z, [0.5], linewidths=2.0, colors="k")
    plt.scatter(
        x1[cn_isTraining == False],
        y1[cn_isTraining == False],
        c="r",
        marker="^",
        label="Cancer (N =" + str(cnDataNum - cnTrainNum) + ")",
    )
    plt.scatter(
        x2[nor_isTraining == False],
        y2[nor_isTraining == False],
        c="g",
        marker="^",
        label="Normal(N =" + str(norDataNum - norTrainNum) + ")",
    )

    plt.scatter(
        x1[cn_isTraining], y1[cn_isTraining], c="r", marker="o", label="Trn_Cancer (N =" + str(cnTrainNum) + ")"
    )
    plt.scatter(
        x2[nor_isTraining], y2[nor_isTraining], c="g", marker="o", label="Trn_Normal(N =" + str(norTrainNum) + ")"
    )

    plt.axis("tight")
    plt.xlabel(feature_x, fontsize="large")
    plt.ylabel(feature_y, fontsize="large")
    plt.legend()
    plt.suptitle(feature_x + " vs. " + feature_y, fontsize=16)
    plt.title(
        "Specificity: " + "{0:.3f}".format(specificity) + " ; " + "Sensitivity:" + "{0:.3f}".format(sensitivity),
        fontsize=12,
    )

    plt.savefig("cv" + str(itr) + ".jpg")

    return specificity, sensitivity
  X_test = pd.concat(test)
  all_ids.append(np.concatenate(idx))
  X_test = X_test.drop(['id'], axis=1)
  X_test = np.asarray(X_test.astype(float))


  current_prediction_lda = np.empty((X_test.shape[0], 6)) # number of test samples X number of labels
  current_prediction_lr = np.empty((X_test.shape[0], 6)) # number of test samples X number of labels
  current_prediction_qda = np.empty((X_test.shape[0], 6)) # number of test samples X number of labels
  X_test = data_preprocess_test(X_test)

  for i in range(6):
    print 'testing subject_id=',subject_id
    current_prediction_lr[:,i] = lr.predict_proba(X_test)[:,1]
    current_prediction_qda[:,i] = qda.predict_proba(X_test)[:,1]
    current_prediction_lda[:,i] = lda.predict_proba(X_test)[:,1]

  	# print 'predicted:',current_prediction[:,i]

  all_predictions_lda.append(current_prediction_lda)
  all_predictions_qda.append(current_prediction_qda)
  all_predictions_lr.append(current_prediction_lr)

  all_predictions_avg.append( (current_prediction_lda+current_prediction_qda+current_prediction_lr)/3 )

print 'testing complete'


print 'ids ',np.concatenate(all_ids).shape
print 'predictions ',np.concatenate(all_predictions_avg).shape
예제 #6
0
        remove = remove.union(redundant)
    
    print("For correlation coefficient = ", coefficient)
    #print(remove)
    #print(add)

    train_data = pd.DataFrame(data=train_data_g, columns = df.columns)[df.columns- remove].values
    test_data = pd.DataFrame(data=test_data_g, columns = df.columns)[df.columns- remove].values
    print("num of featurs = ", train_data.shape[1])

    clf = QDA();

    # This gets the time in ipython shell.
    print("Modelling time:")
    %time clf.fit(train_data, train_labels)
    print("Modelling time ends")

    print("prediction time starts:")
    %time predicted_labels = clf.predict(test_data)
    print("prediction time ends")
    #print(classification_report(test_labels, clf.predict(test_data)))
    print(classification_report(test_labels, predicted_labels))

    print("num of featurs = ", train_data.shape[1])
    y_true = test_labels;
    y_pred_proba = clf.predict_proba(test_data);
    fpr, tpr, thresholds = roc_curve(y_true, y_pred_proba[:, 1])
    roc_auc = auc(fpr, tpr)
    print("ROC AUC =", roc_auc)
    print("\n\n\n")
예제 #7
0
    angle = 180 * angle / np.pi  # convert to degrees
    # filled gaussian at 2 standard deviation
    ell = mpl.patches.Ellipse(mean,
                              2 * v[0]**0.5,
                              2 * v[1]**0.5,
                              180 + angle,
                              color=color)
    ell.set_clip_box(splot.bbox)
    ell.set_alpha(0.5)
    splot.add_artist(ell)


xx, yy = np.meshgrid(np.linspace(4, 8.5, 200), np.linspace(1.5, 4.5, 200))
X_grid = np.c_[xx.ravel(), yy.ravel()]
zz_lda = lda.predict_proba(X_grid)[:, 1].reshape(xx.shape)
zz_qda = qda.predict_proba(X_grid)[:, 1].reshape(xx.shape)

pl.figure()
splot = pl.subplot(1, 2, 1)
pl.contourf(xx, yy, zz_lda > 0.5, alpha=0.5)
pl.scatter(X[y == 0, 0], X[y == 0, 1], c='b', label=target_names[0])
pl.scatter(X[y == 1, 0], X[y == 1, 1], c='r', label=target_names[1])
pl.contour(xx, yy, zz_lda, [0.5], linewidths=2., colors='k')
plot_ellipse(splot, lda.means_[0], lda.covariance_, 'b')
plot_ellipse(splot, lda.means_[1], lda.covariance_, 'r')
pl.legend()
pl.axis('tight')
pl.title('Linear Discriminant Analysis')

splot = pl.subplot(1, 2, 2)
pl.contourf(xx, yy, zz_qda > 0.5, alpha=0.5)
예제 #8
0
def plot_ellipse(splot, mean, cov, color):
    v, w = linalg.eigh(cov)
    u = w[0] / linalg.norm(w[0])
    angle = np.arctan(u[1]/u[0])
    angle = 180 * angle / np.pi # convert to degrees
    # filled gaussian at 2 standard deviation
    ell = mpl.patches.Ellipse(mean, 2 * v[0] ** 0.5, 2 * v[1] ** 0.5,
                                            180 + angle, color=color)
    ell.set_clip_box(splot.bbox)
    ell.set_alpha(0.5)
    splot.add_artist(ell)

xx, yy = np.meshgrid(np.linspace(4, 8.5, 200), np.linspace(1.5, 4.5, 200))
X_grid = np.c_[xx.ravel(), yy.ravel()]
zz_lda = lda.predict_proba(X_grid)[:,1].reshape(xx.shape)
zz_qda = qda.predict_proba(X_grid)[:,1].reshape(xx.shape)

pl.figure()
splot = pl.subplot(1, 2, 1)
pl.contourf(xx, yy, zz_lda > 0.5, alpha=0.5)
pl.scatter(X[y==0,0], X[y==0,1], c='b', label=target_names[0])
pl.scatter(X[y==1,0], X[y==1,1], c='r', label=target_names[1])
pl.contour(xx, yy, zz_lda, [0.5], linewidths=2., colors='k')
plot_ellipse(splot, lda.means_[0], lda.covariance_, 'b')
plot_ellipse(splot, lda.means_[1], lda.covariance_, 'r')
pl.legend()
pl.axis('tight')
pl.title('Linear Discriminant Analysis')

splot = pl.subplot(1, 2, 2)
pl.contourf(xx, yy, zz_qda > 0.5, alpha=0.5)
예제 #9
0
def crossValidate(itr):
    norTrainNum, nor_isTraining = randTestData(t_data_perc, norDataNum)
    cnTrainNum, cn_isTraining = randTestData(t_data_perc, cnDataNum)
    isTraining = np.hstack((nor_isTraining, cn_isTraining))

    #Training
    clf = QDA()
    trained_clf = clf.fit(train_data[isTraining], labels[isTraining])

    #Using the remaining data for testing
    normal_pred = trained_clf.predict(normal_pt[nor_isTraining == False])
    trueneg_n = (normal_pred == 0).sum()
    specificity = trueneg_n / int(norDataNum - norTrainNum)

    cancer_pred = trained_clf.predict(cancer_pt[cn_isTraining == False])
    truepos_n = (cancer_pred == 1).sum()
    sensitivity = truepos_n / int(cnDataNum - cnTrainNum)

    #Generate grids for the entire plot
    if inRedox:
        xx, yy = np.meshgrid(np.linspace(0, xaxis_range, grid_resol),
                             np.linspace(0, yaxis_range_rdx, grid_resol))
    else:
        xx, yy = np.meshgrid(np.linspace(0, xaxis_range, grid_resol),
                             np.linspace(0, yaxis_range, grid_resol))

    plot_grid = np.c_[xx.ravel(), yy.ravel()]

    #Calculate the prediction probability for each point on the grid
    grid_z = clf.predict_proba(plot_grid)[:, 1].reshape(xx.shape)

    plt.figure()
    plt.contour(xx, yy, grid_z, [0.5], linewidths=2., colors='k')
    plt.scatter(x1[cn_isTraining == False],
                y1[cn_isTraining == False],
                c='r',
                marker='^',
                label='Cancer (N =' + str(cnDataNum - cnTrainNum) + ')')
    plt.scatter(x2[nor_isTraining == False],
                y2[nor_isTraining == False],
                c='g',
                marker='^',
                label='Normal(N =' + str(norDataNum - norTrainNum) + ')')

    plt.scatter(x1[cn_isTraining],
                y1[cn_isTraining],
                c='r',
                marker='o',
                label='Trn_Cancer (N =' + str(cnTrainNum) + ')')
    plt.scatter(x2[nor_isTraining],
                y2[nor_isTraining],
                c='g',
                marker='o',
                label='Trn_Normal(N =' + str(norTrainNum) + ')')

    plt.axis('tight')
    plt.xlabel(feature_x, fontsize='large')
    plt.ylabel(feature_y, fontsize='large')
    plt.legend()
    plt.suptitle(feature_x + ' vs. ' + feature_y, fontsize=16)
    plt.title('Specificity: ' + '{0:.3f}'.format(specificity) + ' ; ' +
              'Sensitivity:' + '{0:.3f}'.format(sensitivity),
              fontsize=12)

    plt.savefig('cv' + str(itr) + '.jpg')

    return specificity, sensitivity
예제 #10
0
# In[119]:

cancer_pred = trained_clf.predict(cancer_pt)
truepos_n = (cancer_pred == 1).sum()
sensitivity = truepos_n/int(cancer_ndata)


# In[120]:

#Generate grids for the entire plot
xx, yy, zz = np.meshgrid(np.linspace(0, 255, 100), np.linspace(0, 255, 100), np.linspace(0, 0.2, 200))
plot_grid = np.c_[xx.ravel(), yy.ravel(), zz.ravel()]

#Calculate the prediction probability for each point on the grid
grid_result = clf.predict_proba(plot_grid)[:,1].reshape(xx.shape)


# In[124]:

a = abs(grid_result - 0.5)
sur_x, sur_y = np.meshgrid(np.linspace(0, 255, 100), np.linspace(0, 255, 100))
sur_z = np.zeros(sur_x.size).reshape(sur_x.shape)

sur_z.shape
for i in range(100):
    for j in range(100):     
        sur_z[i][j] = zz[i][j][a[i][j].argmin()]
sur_z