Ejemplo n.º 1
0
test_y = mat["test_y"]  # shape (20730, 1)
training_images = mat["training_images"]  # shape (24, 24, 3, 21910)
training_y = mat["training_y"]  # shape (21910, 1)

montage_n = 300
sort_ix = np.argsort(training_y, axis=0)
sort_ix_low = sort_ix[:montage_n]  # get the 300 smallest
sort_ix_high = sort_ix[-montage_n:]  #Get the 300 largest

# visualize the 300 smallest and the 300 largest nuclei
X_small = training_images[:, :, :, sort_ix_low.ravel()]
X_large = training_images[:, :, :, sort_ix_high.ravel()]
fig = plt.figure(figsize=(16, 8))
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
util.montageRGB(X_small, ax1)
ax1.set_title('300 smallest nuclei')
util.montageRGB(X_large, ax2)
ax2.set_title('300 largest nuclei')

# dataset preparation
imageSize = training_images.shape

# every pixel is a feature so the number of features is:
# height x width x color channels
numFeatures = imageSize[0] * imageSize[1] * imageSize[2]

training_x = training_images.reshape(numFeatures, imageSize[3]).T.astype(float)
test_x = test_images.reshape(numFeatures, test_images.shape[3]).T.astype(float)

## training linear regression model
Ejemplo n.º 2
0
def nuclei_measurement():

    fn = '../data/nuclei_data.mat'
    mat = scipy.io.loadmat(fn)
    test_images = mat["test_images"]  # shape (24, 24, 3, 20730)
    test_y = mat["test_y"]  # shape (20730, 1)
    training_images = mat["training_images"]  # shape (24, 24, 3, 21910)
    training_y = mat["training_y"]  # shape (21910, 1)

    montage_n = 300
    sort_ix = np.argsort(training_y, axis=0)
    sort_ix_low = sort_ix[:montage_n]  # get the 300 smallest
    sort_ix_high = sort_ix[-montage_n:]  #Get the 300 largest

    # visualize the 300 smallest and the 300 largest nuclei
    X_small = training_images[:, :, :, sort_ix_low.ravel()]
    X_large = training_images[:, :, :, sort_ix_high.ravel()]
    fig = plt.figure(figsize=(16, 8))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)
    util.montageRGB(X_small, ax1)
    ax1.set_title('300 smallest nuclei')
    util.montageRGB(X_large, ax2)
    ax2.set_title('300 largest nuclei')

    # dataset preparation
    imageSize = training_images.shape

    # every pixel is a feature so the number of features is:
    # height x width x color channels
    numFeatures = imageSize[0] * imageSize[1] * imageSize[2]
    training_x = training_images.reshape(numFeatures,
                                         imageSize[3]).T.astype(float)
    test_x = test_images.reshape(numFeatures,
                                 test_images.shape[3]).T.astype(float)

    ## training linear regression model
    #---------------------------------------------------------------------#
    # TODO: Implement training of a linear regression model for measuring
    # the area of nuclei in microscopy images. Then, use the trained model
    # to predict the areas of the nuclei in the test dataset.
    #---------------------------------------------------------------------#

    # visualize the results
    fig2 = plt.figure(figsize=(16, 8))
    ax1 = fig2.add_subplot(121)
    line1, = ax1.plot(test_y, predicted_y, ".g", markersize=3)
    ax1.grid()
    ax1.set_xlabel('Area')
    ax1.set_ylabel('Predicted Area')
    ax1.set_title('Training with full sample')

    #training with smaller number of training samples
    #---------------------------------------------------------------------#
    # TODO: Train a model with reduced dataset size (e.g. every fourth
    # training sample).
    #---------------------------------------------------------------------#

    # visualize the results
    ax2 = fig2.add_subplot(122)
    line2, = ax2.plot(test_y, predicted_y, ".g", markersize=3)
    ax2.grid()
    ax2.set_xlabel('Area')
    ax2.set_ylabel('Predicted Area')
    ax2.set_title('Training with smaller sample')
Ejemplo n.º 3
0
def nuclei_measurement(batch_size=1000):

    fn = '../data/nuclei_data.mat'
    mat = scipy.io.loadmat(fn)
    test_images = mat["test_images"]  # shape (24, 24, 3, 20730)
    test_y = mat["test_y"]  # shape (20730, 1)
    training_images = mat["training_images"]  # shape (24, 24, 3, 21910)
    training_y = mat["training_y"]  # shape (21910, 1)

    montage_n = 300
    sort_ix = np.argsort(training_y, axis=0)
    sort_ix_low = sort_ix[:montage_n]  # get the 300 smallest
    sort_ix_high = sort_ix[-montage_n:]  #Get the 300 largest

    # visualize the 300 smallest and the 300 largest nuclei
    X_small = training_images[:, :, :, sort_ix_low.ravel()]
    X_large = training_images[:, :, :, sort_ix_high.ravel()]
    fig = plt.figure(figsize=(16, 8))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)
    util.montageRGB(X_small, ax1)
    ax1.set_title('300 smallest nuclei')
    util.montageRGB(X_large, ax2)
    ax2.set_title('300 largest nuclei')

    # dataset preparation
    imageSize = training_images.shape

    # every pixel is a feature so the number of features is:
    # height x width x color channels
    numFeatures = imageSize[0] * imageSize[1] * imageSize[2]
    print((numFeatures))
    training_x = training_images.reshape(numFeatures,
                                         imageSize[3]).T.astype(float)
    test_x = test_images.reshape(numFeatures,
                                 test_images.shape[3]).T.astype(float)

    #Predict Y (= area) for the test set and return the error too
    E_test, predicted_y = sup.linear_regression(training_x, test_x,
                                                numFeatures)

    # visualize the results
    fig2 = plt.figure(figsize=(16, 8))
    ax1 = fig2.add_subplot(121)
    line1, = ax1.plot(predicted_y, test_y, ".g", markersize=3)
    ax1.grid()
    ax1.set_xlabel('Area')
    ax1.set_ylabel('Predicted Area')
    ax1.set_title('Training with full sample')

    #training with smaller number of training samples
    #---------------------------------------------------------------------#
    # TODO: Train a model with reduced dataset size (e.g. every fourth
    # training sample).

    #Choose the samples randomly, only set the number of samples (original 21910 samples)
    ix = np.random.randint(imageSize[3], size=batch_size)

    #Select the train data (only select certain samples)
    training_x = training_images[:, :, :, ix].reshape(numFeatures,
                                                      len(ix)).T.astype(float)

    E_test_small, predicted_y = sup.linear_regression(training_x, test_x,
                                                      batch_size)

    #Evaluation
    print(
        "The error for testset using traindata consisting of all samples: {:.2f}"
        .format(E_test))
    print(
        "The error for testset using traindata consisting of less samples: {:.2f}"
        .format(E_test_small))
    #---------------------------------------------------------------------#

    # visualize the results
    ax2 = fig2.add_subplot(122)
    line2, = ax2.plot(predicted_y, test_y, ".g", markersize=3)
    ax2.grid()
    ax2.set_xlabel('Area')
    ax2.set_ylabel('Predicted Area')
    ax2.set_title('Training with smaller sample')
    fig2.savefig(
        "Predicted area and real area with batch size {}".format(batch_size))

    return E_test, E_test_small