def tt_plot_comp(train_x, train_y, test_x, test_y): """Train, test and plot a comparison of update functions. Arguments: train_x, train_y, test_x, test_y: train and test data """ funcs = [sgd, sgd_momentum, rms_prop] cost = [] accr = [] for func in funcs: # train and test train, predict, _ = cnn(update_func=func) test_accr = [] train_cost = [] for _ in tqdm(range(NO_ITERS)): train_x, train_y = shuffle_data(train_x, train_y) test_x, test_y = shuffle_data(test_x, test_y) cost = 0.0 train_length = len(train_x) starts = range(0, train_length, BATCH_SIZE) ends = range(BATCH_SIZE, train_length, BATCH_SIZE) for start, end in zip(starts, ends): cost += train(train_x[start:end], train_y[start:end]) # average out the cost for one epoch cost = cost / (train_length // BATCH_SIZE) train_cost += [cost] test_accr.append( np.mean(np.argmax(test_y, axis=1) == predict(test_x))) # output max accuracy at # iterations print('%.1f accuracy at %d iterations' % (np.max(test_accr) * 100, np.argmax(test_accr) + 1)) cost += [train_cost] accr += [test_accr] # plot test accuracy pylab.figure() for label, series in zip([x.__name__ for x in funcs], accr): pylab.plot(range(NO_ITERS), series, label=label) pylab.xlabel('epochs') pylab.ylabel('test accuracy') pylab.legend() pylab.savefig(os.path.join(CUR_DIR, 'project_2a_test.png')) # plot training cost pylab.figure() for label, series in zip([x.__name__ for x in funcs], cost): pylab.plot(range(NO_ITERS), series, label=label) pylab.xlabel('epochs') pylab.ylabel('training cost') pylab.legend() pylab.savefig(os.path.join(CUR_DIR, 'project_2a_train.png')) pylab.show()
def load_train_test(): """Load training and testing data.""" # read and divide data into test and train sets cal_housing = np.loadtxt(os.path.join( DATA_DIR, 'cal_housing.data'), delimiter=',') x_data, y_data = cal_housing[:, :8], cal_housing[:, -1] y_data = (np.asmatrix(y_data)).transpose() x_data, y_data = shuffle_data(x_data, y_data) # separate train and test data test_len = 3 * x_data.shape[0] // 10 # 3:7 test:train split test_x, test_y = x_data[:test_len], y_data[:test_len] train_x, train_y = x_data[test_len:], y_data[test_len:] # scale and normalize data train_x_max, train_x_min = np.max(train_x, axis=0), np.min(train_x, axis=0) test_x_max, test_x_min = np.max(test_x, axis=0), np.min(test_x, axis=0) train_x = scale(train_x, train_x_min, train_x_max) test_x = scale(test_x, test_x_min, test_x_max) train_x_mean, train_x_std = np.mean( train_x, axis=0), np.std(train_x, axis=0) test_x_mean, test_x_std = np.mean(test_x, axis=0), np.std(test_x, axis=0) train_x = normalize(train_x, train_x_mean, train_x_std) test_x = normalize(test_x, test_x_mean, test_x_std) return train_x, train_y, test_x, test_y
def train_test(batch_size=4, hl_neuron=10, decay=1e-6, layer_4=False): """Train and test the neural network with data. Arguments: batch_size: int - batch size for mini-batch gradient descent hl_neuron: int - number of neurons for hidden layer decay: float - decay parameter """ # init functions and variables if layer_4: train, predict = nn_4_layer(hl_neuron, decay) else: train, predict = nn_3_layer(hl_neuron, decay) train_x, train_y, test_x, test_y = load_train_test() n_tr = len(train_x) test_accuracy = [] train_cost = [] timings = [] start_time = 0 # train and test for _ in tqdm(range(EPOCHS)): train_x, train_y = shuffle_data(train_x, train_y) cost = 0.0 for start, end in zip(range(0, n_tr, batch_size), range(batch_size, n_tr, batch_size)): start_time = time.time() cost += train(train_x[start:end], train_y[start:end]) timings.append((time.time() - start_time) * 1e6) train_cost = np.append(train_cost, cost / (n_tr // batch_size)) test_accuracy = np.append( test_accuracy, np.mean(np.argmax(test_y, axis=1) == predict(test_x))) # print results print('%.1f accuracy at %d iterations' % (np.max(test_accuracy) * 100, np.argmax(test_accuracy) + 1)) average_time = np.average(timings) print('average time per update: {}'.format(average_time)) return (train_cost, test_accuracy, average_time)
def nn_4_layer(train_x, train_y, test_x, test_y, no_hidden=30, learning_rate=1e-4): """Train and test the 4-layer neural network.""" no_features = train_x.shape[1] x_mat = T.matrix('x') # data sample d_mat = T.matrix('d') # desired output # initialize weights and biases for hidden layer(s) and output layer w_o = theano.shared(np.random.randn(20) * .01, FL_X) b_o = theano.shared(np.random.randn() * .01, FL_X) w_h1 = theano.shared(np.random.randn(no_features, no_hidden) * .01, FL_X) b_h1 = theano.shared(np.random.randn(no_hidden) * 0.01, FL_X) w_h2 = theano.shared(np.random.randn(no_hidden, 20) * .01, FL_X) b_h2 = theano.shared(np.random.randn(20) * 0.01, FL_X) # learning rate alpha = theano.shared(learning_rate, FL_X) # define mathematical expressions h1_out = T.nnet.sigmoid(T.dot(x_mat, w_h1) + b_h1) h2_out = T.nnet.sigmoid(T.dot(h1_out, w_h2) + b_h2) y_vec = T.dot(h2_out, w_o) + b_o cost = T.abs_(T.mean(T.sqr(d_mat - y_vec))) accuracy = T.mean(d_mat - y_vec) # define gradients dw_o, db_o, dw_h1, db_h1, dw_h2, db_h2 = T.grad( cost, [w_o, b_o, w_h1, b_h1, w_h2, b_h2]) # compile train and test functions train = theano.function(inputs=[x_mat, d_mat], outputs=cost, updates=[[w_o, w_o - alpha * dw_o], [b_o, b_o - alpha * db_o], [w_h1, w_h1 - alpha * dw_h1], [b_h1, b_h1 - alpha * db_h1], [w_h2, w_h2 - alpha * dw_h2], [b_h2, b_h2 - alpha * db_h2]], allow_input_downcast=True) test = theano.function(inputs=[x_mat, d_mat], outputs=[y_vec, cost, accuracy], allow_input_downcast=True) # train and test train_cost = np.zeros(EPOCHS) test_cost = np.zeros(EPOCHS) test_accuracy = np.zeros(EPOCHS) min_error = 1e+15 best_iter = 0 best_w_o = np.zeros(20) best_w_h1 = np.zeros([no_features, no_hidden]) best_w_h2 = np.zeros([no_hidden, 20]) best_b_o = 0 best_b_h1 = np.zeros(no_hidden) best_b_h2 = np.zeros(20) alpha.set_value(learning_rate) for i in tqdm(range(EPOCHS)): train_x, train_y = shuffle_data(train_x, train_y) train_cost[i] = train(train_x, np.transpose(train_y)) _, test_cost[i], test_accuracy[i] = test(test_x, np.transpose(test_y)) if test_cost[i] < min_error: best_iter = i min_error = test_cost[i] best_w_o = w_o.get_value() best_w_h1 = w_h1.get_value() best_w_h2 = w_h2.get_value() best_b_o = b_o.get_value() best_b_h1 = b_h1.get_value() best_b_h2 = b_h2.get_value() # set weights and biases to values at which performance was best w_o.set_value(best_w_o) b_o.set_value(best_b_o) w_h1.set_value(best_w_h1) b_h1.set_value(best_b_h1) w_h2.set_value(best_w_h2) b_h2.set_value(best_b_h2) _, best_cost, best_accuracy = test(test_x, np.transpose(test_y)) print('Minimum error: %.1f, Best accuracy %.1f, Number of Iterations: %d' % (best_cost, best_accuracy, best_iter)) return train_cost, test_cost, test_accuracy
def tt_plot_func(train_x, train_y, test_x, test_y, func=sgd): """Train, test and plot using a particular update function. Arguments: train_x, train_y, test_x, test_y: train and test data func: update function to use, default to nn_cnn.sgd """ # train and test train, predict, test = cnn(update_func=func) test_accr = [] train_cost = [] for i in tqdm(range(NO_ITERS)): train_x, train_y = shuffle_data(train_x, train_y) test_x, test_y = shuffle_data(test_x, test_y) cost = 0.0 train_length = len(train_x) starts = range(0, train_length, BATCH_SIZE) ends = range(BATCH_SIZE, train_length, BATCH_SIZE) for start, end in zip(starts, ends): cost += train(train_x[start:end], train_y[start:end]) # average out the cost for one epoch cost = cost / (train_length // BATCH_SIZE) train_cost += [cost] test_accr.append(np.mean(np.argmax(test_y, axis=1) == predict(test_x))) # output max accuracy at # iterations print('%.1f accuracy at %d iterations' % (np.max(test_accr) * 100, np.argmax(test_accr) + 1)) # plot test accuracy pylab.figure() pylab.plot(range(NO_ITERS), test_accr) pylab.xlabel('epochs') pylab.ylabel('test accuracy') pylab.savefig(os.path.join(CUR_DIR, 'project_2a_test.png')) # plot training cost pylab.figure() pylab.plot(range(NO_ITERS), train_cost) pylab.xlabel('epochs') pylab.ylabel('training cost') pylab.savefig(os.path.join(CUR_DIR, 'project_2a_train.png')) # pick a random image ind = np.random.randint(low=0, high=2000) conv_1, pool_1, conv_2, pool_2 = test(test_x[ind:ind + 1, :]) # show input image pylab.figure() pylab.gray() pylab.axis('off') pylab.imshow(test_x[ind, :].reshape(28, 28)) pylab.title('input image') pylab.savefig(os.path.join(CUR_DIR, 'img_input.png')) # show convolved and pooled feature maps pylab.figure() pylab.gray() for i in range(15): pylab.subplot(3, 5, i + 1) pylab.axis('off') pylab.imshow(conv_1[0, i, :].reshape(20, 20)) pylab.suptitle('layer 1 convolved feature maps') pylab.savefig(os.path.join(CUR_DIR, 'img_conv_1.png')) pylab.figure() pylab.gray() for i in range(15): pylab.subplot(3, 5, i + 1) pylab.axis('off') pylab.imshow(pool_1[0, i, :].reshape(10, 10)) pylab.suptitle('layer 1 pooled feature maps') pylab.savefig(os.path.join(CUR_DIR, 'img_pooled_1.png')) pylab.figure() pylab.gray() for i in range(20): pylab.subplot(4, 5, i + 1) pylab.axis('off') pylab.imshow(conv_2[0, i, :].reshape(6, 6)) pylab.suptitle('layer 2 convolved feature maps') pylab.savefig(os.path.join(CUR_DIR, 'img_conv_2.png')) pylab.figure() pylab.gray() for i in range(20): pylab.subplot(4, 5, i + 1) pylab.axis('off') pylab.imshow(pool_2[0, i, :].reshape(3, 3)) pylab.suptitle('layer 2 pooled feature maps') pylab.savefig(os.path.join(CUR_DIR, 'img_pooled_2.png')) pylab.show()