def test_softmax_backward(self):
    np.random.seed(42)
    rel_error_max = 1e-5

    for test_num in range(10):
      N = np.random.choice(range(1, 20))
      D = np.random.choice(range(1, 100))
      x = np.random.randn(N, D)
      dout = np.random.randn(*x.shape)

      layer = SoftMaxModule()
      
      out = layer.forward(x)
      dx = layer.backward(dout)
      dx_num = eval_numerical_gradient_array(lambda xx: layer.forward(xx), x, dout)

      self.assertLess(rel_error(dx, dx_num), rel_error_max)
def train():
    """
    Performs training and evaluation of MLP model.

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    # DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    # Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################
    def reshape_cifar10_mlp(x):
        batch_size = x.shape[0]
        x = x.transpose([2, 3, 1, 0])
        x = x.reshape([-1, batch_size])
        x = x.transpose()
        return x

    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)
    x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size)
    x_train = reshape_cifar10_mlp(x_train)

    softmax = SoftMaxModule()
    crossent = CrossEntropyModule()
    mlp = MLP(x_train.shape[1], dnn_hidden_units, y_train.shape[1])

    train_accs = []
    train_losses = []
    eval_accs = []
    eval_losses = []
    for i in np.arange(FLAGS.max_steps):
        print('\nStep: {}\n'.format(i))
        print('Training: ')
        logits = mlp.forward(x_train)
        softmax_logits = softmax.forward(logits)
        train_loss = crossent.forward(softmax_logits, y_train)
        train_acc = accuracy(softmax_logits, y_train)
        print('loss: {:.4f}, acc: {:.4f}\n'.format(train_loss, train_acc))

        dL = crossent.backward(softmax_logits, y_train)
        dL = softmax.backward(dL)
        mlp.backward(dL)

        for layer in mlp.layers:
            if isinstance(layer, LinearModule):
                layer.params[
                    'weight'] -= FLAGS.learning_rate * layer.grads['weight']
                layer.params[
                    'bias'] -= FLAGS.learning_rate * layer.grads['bias']

        x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size)
        x_train = reshape_cifar10_mlp(x_train)
        if i % FLAGS.eval_freq == 0:
            print('Evaluation: ')
            x_eval, y_eval = cifar10['test'].images, cifar10['test'].labels
            x_eval = reshape_cifar10_mlp(x_eval)

            logits = mlp.forward(x_eval)
            softmax_logits = softmax.forward(logits)
            eval_loss = crossent.forward(softmax_logits, y_eval)
            eval_acc = accuracy(softmax_logits, y_eval)

            train_losses.append(train_loss)
            train_accs.append(train_acc)
            eval_losses.append(eval_loss)
            eval_accs.append(eval_acc)
            print('loss: {:.4f}, acc: {:.4f}'.format(eval_loss, eval_acc))

    print('Evaluation: ')
    x_eval, y_eval = cifar10['test'].images, cifar10['test'].labels
    x_eval = reshape_cifar10_mlp(x_eval)

    logits = mlp.forward(x_eval)
    softmax_logits = softmax.forward(logits)
    eval_loss = crossent.forward(softmax_logits, y_eval)
    eval_acc = accuracy(softmax_logits, y_eval)

    train_losses.append(train_loss)
    train_accs.append(train_acc)
    eval_losses.append(eval_loss)
    eval_accs.append(eval_acc)
    print('loss: {:.4f}, acc: {:.4f}'.format(eval_loss, eval_acc))

    print('Finished training.')

    plt.figure(figsize=(10, 5))
    plt.plot(np.arange(len(train_losses)), train_losses, label='training loss')
    plt.plot(np.arange(len(eval_losses)), eval_losses, label='evaluation loss')
    plt.legend()
    plt.xlabel('Iterations [x{}]'.format(FLAGS.eval_freq))
    plt.savefig('results/mlp_loss.png', bbox_inches='tight')

    plt.figure(figsize=(10, 5))
    plt.plot(np.arange(len(train_accs)), train_accs, label='training accuracy')
    plt.plot(np.arange(len(eval_accs)), eval_accs, label='evaluation accuracy')
    plt.legend()
    plt.xlabel('Iterations [x{}]'.format(FLAGS.eval_freq))
    plt.savefig('results/mlp_acc.png', bbox_inches='tight')