def test_softmax_backward(self): np.random.seed(42) rel_error_max = 1e-5 for test_num in range(10): N = np.random.choice(range(1, 20)) D = np.random.choice(range(1, 100)) x = np.random.randn(N, D) dout = np.random.randn(*x.shape) layer = SoftMaxModule() out = layer.forward(x) dx = layer.backward(dout) dx_num = eval_numerical_gradient_array(lambda xx: layer.forward(xx), x, dout) self.assertLess(rel_error(dx, dx_num), rel_error_max)
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ # DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) # Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### def reshape_cifar10_mlp(x): batch_size = x.shape[0] x = x.transpose([2, 3, 1, 0]) x = x.reshape([-1, batch_size]) x = x.transpose() return x cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size) x_train = reshape_cifar10_mlp(x_train) softmax = SoftMaxModule() crossent = CrossEntropyModule() mlp = MLP(x_train.shape[1], dnn_hidden_units, y_train.shape[1]) train_accs = [] train_losses = [] eval_accs = [] eval_losses = [] for i in np.arange(FLAGS.max_steps): print('\nStep: {}\n'.format(i)) print('Training: ') logits = mlp.forward(x_train) softmax_logits = softmax.forward(logits) train_loss = crossent.forward(softmax_logits, y_train) train_acc = accuracy(softmax_logits, y_train) print('loss: {:.4f}, acc: {:.4f}\n'.format(train_loss, train_acc)) dL = crossent.backward(softmax_logits, y_train) dL = softmax.backward(dL) mlp.backward(dL) for layer in mlp.layers: if isinstance(layer, LinearModule): layer.params[ 'weight'] -= FLAGS.learning_rate * layer.grads['weight'] layer.params[ 'bias'] -= FLAGS.learning_rate * layer.grads['bias'] x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size) x_train = reshape_cifar10_mlp(x_train) if i % FLAGS.eval_freq == 0: print('Evaluation: ') x_eval, y_eval = cifar10['test'].images, cifar10['test'].labels x_eval = reshape_cifar10_mlp(x_eval) logits = mlp.forward(x_eval) softmax_logits = softmax.forward(logits) eval_loss = crossent.forward(softmax_logits, y_eval) eval_acc = accuracy(softmax_logits, y_eval) train_losses.append(train_loss) train_accs.append(train_acc) eval_losses.append(eval_loss) eval_accs.append(eval_acc) print('loss: {:.4f}, acc: {:.4f}'.format(eval_loss, eval_acc)) print('Evaluation: ') x_eval, y_eval = cifar10['test'].images, cifar10['test'].labels x_eval = reshape_cifar10_mlp(x_eval) logits = mlp.forward(x_eval) softmax_logits = softmax.forward(logits) eval_loss = crossent.forward(softmax_logits, y_eval) eval_acc = accuracy(softmax_logits, y_eval) train_losses.append(train_loss) train_accs.append(train_acc) eval_losses.append(eval_loss) eval_accs.append(eval_acc) print('loss: {:.4f}, acc: {:.4f}'.format(eval_loss, eval_acc)) print('Finished training.') plt.figure(figsize=(10, 5)) plt.plot(np.arange(len(train_losses)), train_losses, label='training loss') plt.plot(np.arange(len(eval_losses)), eval_losses, label='evaluation loss') plt.legend() plt.xlabel('Iterations [x{}]'.format(FLAGS.eval_freq)) plt.savefig('results/mlp_loss.png', bbox_inches='tight') plt.figure(figsize=(10, 5)) plt.plot(np.arange(len(train_accs)), train_accs, label='training accuracy') plt.plot(np.arange(len(eval_accs)), eval_accs, label='evaluation accuracy') plt.legend() plt.xlabel('Iterations [x{}]'.format(FLAGS.eval_freq)) plt.savefig('results/mlp_acc.png', bbox_inches='tight')