def nuclei_classification(): ## dataset preparation fn = '../data/nuclei_data_classification.mat' mat = scipy.io.loadmat(fn) test_images = mat["test_images"] # (24, 24, 3, 20730) test_y = mat["test_y"] # (20730, 1) training_images = mat["training_images"] # (24, 24, 3, 14607) training_y = mat["training_y"] # (14607, 1) validation_images = mat["validation_images"] # (24, 24, 3, 7303) validation_y = mat["validation_y"] # (7303, 1) ## dataset preparation training_x, validation_x, test_x = util.reshape_and_normalize( training_images, validation_images, test_images) ## training linear regression model #-------------------------------------------------------------------# # TODO: Select values for the learning rate (mu), batch size # (batch_size) and number of iterations (num_iterations), as well as # initial values for the model parameters (Theta) that will result in # fast training of an accurate model for this classification problem. #-------------------------------------------------------------------# xx = np.arange(num_iterations) loss = np.empty(*xx.shape) loss[:] = np.nan validation_loss = np.empty(*xx.shape) validation_loss[:] = np.nan g = np.empty(*xx.shape) g[:] = np.nan fig = plt.figure(figsize=(8, 8)) ax2 = fig.add_subplot(111) ax2.set_xlabel('Iteration') ax2.set_ylabel('Loss (average per sample)') ax2.set_title('mu = ' + str(mu)) h1, = ax2.plot(xx, loss, linewidth=2) #'Color', [0.0 0.2 0.6], h2, = ax2.plot(xx, validation_loss, linewidth=2) #'Color', [0.8 0.2 0.8], ax2.set_ylim(0, 0.7) ax2.set_xlim(0, num_iterations) ax2.grid() text_str2 = 'iter.: {}, loss: {:.3f}, val. loss: {:.3f}'.format(0, 0, 0) txt2 = ax2.text(0.3, 0.95, text_str2, bbox={ 'facecolor': 'white', 'alpha': 1, 'pad': 10 }, transform=ax2.transAxes) for k in np.arange(num_iterations): # pick a batch at random idx = np.random.randint(training_x.shape[0], size=batch_size) training_x_ones = util.addones(training_x[idx, :]) validation_x_ones = util.addones(validation_x) # the loss function for this particular batch loss_fun = lambda Theta: cad.lr_nll(training_x_ones, training_y[idx], Theta) # gradient descent # instead of the numerical gradient, we compute the gradient with # the analytical expression, which is much faster Theta_new = Theta - mu * cad.lr_agrad(training_x_ones, training_y[idx], Theta).T loss[k] = loss_fun(Theta_new) / batch_size validation_loss[k] = cad.lr_nll(validation_x_ones, validation_y, Theta_new) / validation_x.shape[0] # visualize the training h1.set_ydata(loss) h2.set_ydata(validation_loss) text_str2 = 'iter.: {}, loss: {:.3f}, val. loss={:.3f} '.format( k, loss[k], validation_loss[k]) txt2.set_text(text_str2) Theta = None Theta = np.array(Theta_new) Theta_new = None tmp = None display(fig) clear_output(wait=True) plt.pause(.005)
def logistic_regression(): # dataset preparation num_training_samples = 300 num_validation_samples = 100 # here we reuse the function from the segmentation practicals m1=[2,3] m2=[-0,-4] s1=[[8,7],[7,8]] s2=[[8,6],[6,8]] [trainingX, trainingY] = util.generate_gaussian_data(num_training_samples, m1, m2, s1, s2) r,c = trainingX.shape print('Training sample shape: {}'.format(trainingX.shape)) # we need a validation set to monitor for overfitting [validationX, validationY] = util.generate_gaussian_data(num_validation_samples, m1, m2, s1, s2) r_val,c_val = validationX.shape print('Validation sample shape: {}'.format(validationX.shape)) validationXones = util.addones(validationX) # train a logistic regression model: # the learning rate for the gradient descent method # (the same as in intensity-based registration) mu = 0.001 # we are actually using stochastic gradient descent batch_size = 30 # initialize the parameters of the model with small random values, # we need one parameter for each feature and a bias Theta = 0.02*np.random.rand(c+1, 1) # number of gradient descent iterations num_iterations = 300 # variables to keep the loss and gradient at every iteration # (needed for visualization) iters = np.arange(num_iterations) loss = np.full(iters.shape, np.nan) validation_loss = np.full(iters.shape, np.nan) # Create base figure fig = plt.figure(figsize=(15,8)) ax1 = fig.add_subplot(121) im1, Xh_ones, num_range_points = util.plot_lr(trainingX, trainingY, Theta, ax1) util.scatter_data(trainingX, trainingY, ax=ax1); ax1.grid() ax1.set_xlabel('x_1') ax1.set_ylabel('x_2') ax1.legend() ax1.set_title('Training set') text_str1 = '{:.4f}; {:.4f}; {:.4f}'.format(0, 0, 0) txt1 = ax1.text(0.3, 0.95, text_str1, bbox={'facecolor': 'white', 'alpha': 1, 'pad': 10}, transform=ax1.transAxes) ax2 = fig.add_subplot(122) ax2.set_xlabel('Iteration') ax2.set_ylabel('Loss (average per sample)') ax2.set_title('mu = '+str(mu)) h1, = ax2.plot(iters, loss, linewidth=2, label='Training loss') h2, = ax2.plot(iters, validation_loss, linewidth=2, label='Validation loss') ax2.set_ylim(0, 0.7) ax2.set_xlim(0, num_iterations) ax2.grid() ax1.legend() text_str2 = 'iter.: {}, loss: {:.3f}, val. loss: {:.3f}'.format(0, 0, 0) txt2 = ax2.text(0.3, 0.95, text_str2, bbox={'facecolor': 'white', 'alpha': 1, 'pad': 10}, transform=ax2.transAxes) # iterate for k in np.arange(num_iterations): # pick a batch at random idx = np.random.randint(r, size=batch_size) # the loss function for this particular batch loss_fun = lambda Theta: cad.lr_nll(util.addones(trainingX[idx,:]), trainingY[idx], Theta) # gradient descent: # here we reuse the code for numerical computation of the gradient # of a function Theta = Theta - mu*reg.ngradient(loss_fun, Theta) # compute the loss for the current model parameters for the # training and validation sets # note that the loss is divided with the number of samples so # it is comparable for different number of samples loss[k] = loss_fun(Theta)/batch_size validation_loss[k] = cad.lr_nll(validationXones, validationY, Theta)/r_val # upldate the visualization ph = cad.sigmoid(Xh_ones.dot(Theta)) > 0.5 decision_map = ph.reshape(num_range_points, num_range_points) decision_map_trns = np.flipud(decision_map) im1.set_data(decision_map_trns) text_str1 = '{:.4f}; {:.4f}; {:.4f}'.format(Theta[0,0], Theta[1,0], Theta[2,0]) txt1.set_text(text_str1) h1.set_ydata(loss) h2.set_ydata(validation_loss) text_str2 = 'iter.={}, loss={:.3f}, val. loss={:.3f} '.format(k, loss[k], validation_loss[k]) txt2.set_text(text_str2) display(fig) clear_output(wait = True)
Theta = 0.02 * np.random.rand(c + 1, 1) # number of gradient descent iterations num_iterations = 300 # variables to keep the loss and gradient at every iteration # (needed for visualization) iters = np.arange(num_iterations) loss = np.full(iters.shape, np.nan) validation_loss = np.full(iters.shape, np.nan) # pick a batch at random idx = np.random.randint(r, size=batch_size) # the loss function for this particular batch fun = lambda Theta: cad.lr_nll(util.addones(trainingX[idx, :]), trainingY[idx], Theta) x = Theta h = 1e-3 # Computes the derivative of a function with numerical differentiation. # Input: # fun - function for which the gradient is computed # x - vector of parameter values at which to compute the gradient # h - a small positive number used in the finite difference formula # Output: # g - vector of partial derivatives (gradient) of fun #------------------------------------------------------------------# # TODO: Implement the computation of the partial derivatives of # the function at x with numerical differentiation. # g[k] should store the partial derivative w.r.t. the k-th parameter
E_validation_new = 0; k = 0 counter = 0; stopnow = 0 normgradient = 1; while normgradient>0.1 and stopnow<1: # pick a batch at random idx = np.random.randint(training_x.shape[0], size=batch_size) training_x_ones = util.addones(training_x[idx,:]) validation_x_ones = util.addones(validation_x) # the loss function for this particular batch loss_fun = lambda Theta: cad.lr_nll(training_x_ones, training_y[idx], Theta) # gradient descent # instead of the numerical gradient, we compute the gradient with # the analytical expression, which is much faster Theta_new = Theta - mu*cad.lr_agrad(training_x_ones, training_y[idx], Theta).T loss[k] = loss_fun(Theta_new)/batch_size validation_loss[k] = cad.lr_nll(validation_x_ones, validation_y, Theta_new)/validation_x.shape[0] #distance to zero (0,0) normgradient = np.linalg.norm(validation_loss[k]) # visualize the training
ax2.set_ylim(0, 0.7) ax2.set_xlim(0, num_iterations) ax2.grid() ax1.legend() text_str2 = 'iter.: {}, loss: {:.3f}, val. loss: {:.3f}'.format(0, 0, 0) txt2 = ax2.text(0.3, 0.95, text_str2, bbox={'facecolor': 'white', 'alpha': 1, 'pad': 10}, transform=ax2.transAxes) # iterate for k in np.arange(num_iterations): # pick a batch at random idx = np.random.randint(r, size=batch_size) # the loss function for this particular batch loss_fun = lambda Theta: cad.lr_nll(util.addones(trainingX[idx,:]), trainingY[idx], Theta) # gradient descent: # here we reuse the code for numerical computation of the gradient # of a function Theta = Theta - mu*reg.ngradient(loss_fun, Theta) # compute the loss for the current model parameters for the # training and validation sets # note that the loss is divided with the number of samples so # it is comparable for different number of samples loss[k] = loss_fun(Theta)/batch_size validation_loss[k] = cad.lr_nll(validationXones, validationY, Theta)/r_val # upldate the visualization ph = cad.sigmoid(Xh_ones.dot(Theta)) > 0.5
def auto_nuclei_classification(mu, batch_size): ## dataset preparation fn = '../data/nuclei_data_classification.mat' mat = scipy.io.loadmat(fn) test_images = mat["test_images"] # (24, 24, 3, 20730) test_y = mat["test_y"] # (20730, 1) training_images = mat["training_images"] # (24, 24, 3, 14607) training_y = mat["training_y"] # (14607, 1) validation_images = mat["training_images"] # (24, 24, 3, 14607) validation_y = mat["training_y"] # (14607, 1) ## dataset preparation imageSize = training_images.shape # every pixel is a feature so the number of features is: # height x width x color channels numFeatures = imageSize[0] * imageSize[1] * imageSize[2] training_x = training_images.reshape( numFeatures, training_images.shape[3]).T.astype(float) validation_x = validation_images.reshape( numFeatures, validation_images.shape[3]).T.astype(float) test_x = test_images.reshape(numFeatures, test_images.shape[3]).T.astype(float) # the training will progress much better if we # normalize the features meanTrain = np.mean(training_x, axis=0).reshape(1, -1) stdTrain = np.std(training_x, axis=0).reshape(1, -1) training_x = training_x - np.tile(meanTrain, (training_x.shape[0], 1)) training_x = training_x / np.tile(stdTrain, (training_x.shape[0], 1)) validation_x = validation_x - np.tile(meanTrain, (validation_x.shape[0], 1)) validation_x = validation_x / np.tile(stdTrain, (validation_x.shape[0], 1)) test_x = test_x - np.tile(meanTrain, (test_x.shape[0], 1)) test_x = test_x / np.tile(stdTrain, (test_x.shape[0], 1)) ## training linear regression model #-------------------------------------------------------------------# # TODO: Select values for the learning rate (mu), batch size # (batch_size) and number of iterations (num_iterations), as well as # initial values for the model parameters (Theta) that will result in # fast training of an accurate model for this classification problem. # a = 5 # mu_init =10**-a; # mu = mu_init; #number of training samples # batch_size = 3000 r, c = training_x.shape #initial weights Theta = 0.02 * np.random.rand(c + 1, 1) #-------------------------------------------------------------------# xx = np.arange(100000) loss = np.empty(*xx.shape) loss[:] = np.nan validation_loss = np.empty(*xx.shape) validation_loss[:] = np.nan g = np.empty(*xx.shape) g[:] = np.nan idx = np.random.randint(training_x.shape[0], size=batch_size) # Create base figure fig = plt.figure(figsize=(15, 10)) ax2 = fig.add_subplot(111) ax2.set_xlabel('Iteration') ax2.set_ylabel('Loss (average per sample)') ax2.set_title('mu = ' + str(mu)) h1, = ax2.plot(xx, loss, linewidth=2) #'Color', [0.0 0.2 0.6], h2, = ax2.plot(xx, validation_loss, linewidth=2) #'Color', [0.8 0.2 0.8], ax2.set_ylim(0, 0.7) ax2.grid() text_str2 = 'iter.: {}, loss: {:.3f}, val. loss: {:.3f}'.format(0, 0, 0) txt2 = ax2.text(0.3, 0.95, text_str2, bbox={ 'facecolor': 'white', 'alpha': 1, 'pad': 10 }, transform=ax2.transAxes) #Some initial parameter settings k = 0 #iteration number counter = 0 #count number of iterations, resets after 100 iterations stopnow = 0 #used to stop when loss doesn't decrease any further normgradient = 1 while normgradient > 0.1 and stopnow < 1: # pick a batch at random idx = np.random.randint(training_x.shape[0], size=batch_size) training_x_ones = util.addones(training_x[idx, :]) validation_x_ones = util.addones(validation_x) # the loss function for this particular batch loss_fun = lambda Theta: cad.lr_nll(training_x_ones, training_y[idx], Theta) # gradient descent # instead of the numerical gradient, we compute the gradient with # the analytical expression, which is much faster Theta_new = Theta - mu * cad.lr_agrad(training_x_ones, training_y[idx], Theta).T #Caclulate the loss and the validation loss loss[k] = loss_fun(Theta_new) / batch_size validation_loss[k] = cad.lr_nll(validation_x_ones, validation_y, Theta_new) / validation_x.shape[0] #distance to zero (0,0) used for minimizing loss normgradient = np.linalg.norm(validation_loss[k]) # visualize the training ax2.set_xlim(0, k) #axis needs to be adapted every iteration ax2.set_title('mu = {:.2}'.format(mu)) h1.set_ydata(loss) h2.set_ydata(validation_loss) text_str2 = 'iter.: {}, loss: {:.3f}, val. loss={:.3f} '.format( k, loss[k], validation_loss[k]) txt2.set_text(text_str2) display(fig) clear_output(wait=True) #Set the new weights Theta = None Theta = np.array(Theta_new) Theta_new = None tmp = None display(fig) clear_output(wait=True) plt.pause(.005) #Stop when the validation_loss doesn't decrease any further by comparing the current validation loss with x iterations ago if k > 100: if round(validation_loss[k], 4) == round(validation_loss[k - 25], 4): stopnow = 1 print("The validation loss has reached its equilibrium") #increment iteration parameters k += 1 counter += 1 #save the final loss curve fig.savefig("Loss curve for batch size {} and init mu {:.2}.png".format( batch_size, mu)) #predict the test data with the final weights predictedY_test = util.addones(test_x).dot(Theta) #calculate the error for the test squared error E_test = np.sum(np.square(np.subtract(predictedY_test, test_y))) return predictedY_test, E_test
def nuclei_classification(mu, batch_size, num_iterations): ## dataset preparation fn = '../data/nuclei_data_classification.mat' mat = scipy.io.loadmat(fn) test_images = mat["test_images"] # (24, 24, 3, 20730) test_y = mat["test_y"] # (20730, 1) training_images = mat["training_images"] # (24, 24, 3, 14607) training_y = mat["training_y"] # (14607, 1) validation_images = mat["training_images"] # (24, 24, 3, 14607) validation_y = mat["training_y"] # (14607, 1) ## dataset preparation imageSize = training_images.shape # every pixel is a feature so the number of features is: # height x width x color channels numFeatures = imageSize[0] * imageSize[1] * imageSize[2] training_x = training_images.reshape( numFeatures, training_images.shape[3]).T.astype(float) validation_x = validation_images.reshape( numFeatures, validation_images.shape[3]).T.astype(float) test_x = test_images.reshape(numFeatures, test_images.shape[3]).T.astype(float) # the training will progress much better if we # normalize the features meanTrain = np.mean(training_x, axis=0).reshape(1, -1) stdTrain = np.std(training_x, axis=0).reshape(1, -1) training_x = training_x - np.tile(meanTrain, (training_x.shape[0], 1)) training_x = training_x / np.tile(stdTrain, (training_x.shape[0], 1)) validation_x = validation_x - np.tile(meanTrain, (validation_x.shape[0], 1)) validation_x = validation_x / np.tile(stdTrain, (validation_x.shape[0], 1)) test_x = test_x - np.tile(meanTrain, (test_x.shape[0], 1)) test_x = test_x / np.tile(stdTrain, (test_x.shape[0], 1)) ## training linear regression model #-------------------------------------------------------------------# # TODO: Select values for the learning rate (mu), batch size # (batch_size) and number of iterations (num_iterations), as well as # initial values for the model parameters (Theta) that will result in # fast training of an accurate model for this classification problem. # mu = 0.00001 # batch_size = 500 # num_iterations = 300 r, c = training_x.shape Theta = 0.02 * np.random.rand(c + 1, 1) #-------------------------------------------------------------------# xx = np.arange(num_iterations) loss = np.empty(*xx.shape) loss[:] = np.nan validation_loss = np.empty(*xx.shape) validation_loss[:] = np.nan g = np.empty(*xx.shape) g[:] = np.nan fig = plt.figure(figsize=(8, 8)) ax2 = fig.add_subplot(111) ax2.set_xlabel('Iteration') ax2.set_ylabel('Loss (average per sample)') ax2.set_title('mu = ' + str(mu)) h1, = ax2.plot(xx, loss, linewidth=2) #'Color', [0.0 0.2 0.6], h2, = ax2.plot(xx, validation_loss, linewidth=2) #'Color', [0.8 0.2 0.8], ax2.set_ylim(0, 0.7) ax2.set_xlim(0, num_iterations) ax2.grid() text_str2 = 'iter.: {}, loss: {:.3f}, val. loss: {:.3f}'.format(0, 0, 0) txt2 = ax2.text(0.3, 0.95, text_str2, bbox={ 'facecolor': 'white', 'alpha': 1, 'pad': 10 }, transform=ax2.transAxes) for k in np.arange(num_iterations): # pick a batch at random idx = np.random.randint(training_x.shape[0], size=batch_size) training_x_ones = util.addones(training_x[idx, :]) validation_x_ones = util.addones(validation_x) # the loss function for this particular batch loss_fun = lambda Theta: cad.lr_nll(training_x_ones, training_y[idx], Theta) # gradient descent # instead of the numerical gradient, we compute the gradient with # the analytical expression, which is much faster Theta_new = Theta - mu * cad.lr_agrad(training_x_ones, training_y[idx], Theta).T loss[k] = loss_fun(Theta_new) / batch_size validation_loss[k] = cad.lr_nll(validation_x_ones, validation_y, Theta_new) / validation_x.shape[0] # visualize the training h1.set_ydata(loss) h2.set_ydata(validation_loss) text_str2 = 'iter.: {}, loss: {:.3f}, val. loss={:.3f}'.format( k, loss[k], validation_loss[k]) txt2.set_text(text_str2) Theta = None Theta = np.array(Theta_new) Theta_new = None tmp = None display(fig) clear_output(wait=True) plt.pause(.005) #save the final loss curve plt.savefig("Loss curve for batch size {} and init mu {:.2}.png".format( batch_size, mu)) # ---------------------------------------------------------------------# # TODO: Compute the error for the trained model. predictedY_test = util.addones(test_x).dot(Theta) E_test = np.sum(np.square(np.subtract(predictedY_test, test_y))) return predictedY_test, E_test