def calculate_loss(self, gt_verbs, role_label_pred, gt_labels,args):

        batch_size = role_label_pred.size()[0]
        if args.train_all:
            loss = 0
            for i in range(batch_size):
                for index in range(gt_labels.size()[1]):
                    frame_loss = 0
                    #verb_loss = utils.cross_entropy_loss(verb_pred[i], gt_verbs[i])
                    #frame_loss = criterion(role_label_pred[i], gt_labels[i,index])
                    for j in range(0, self.max_role_count):
                        frame_loss += utils.cross_entropy_loss(role_label_pred[i][j], gt_labels[i,index,j] ,self.vocab_size)
                    frame_loss = frame_loss/len(self.encoder.verb2_role_dict[self.encoder.verb_list[gt_verbs[i]]])
                    #print('frame loss', frame_loss, 'verb loss', verb_loss)
                    loss += frame_loss
        else:
            #verb from pre-trained
            loss = 0
            for i in range(batch_size):
                for index in range(gt_labels.size()[1]):
                    frame_loss = 0
                    #verb_loss = utils.cross_entropy_loss(verb_pred[i], gt_verbs[i])
                    #frame_loss = criterion(role_label_pred[i], gt_labels[i,index])
                    for j in range(0, self.max_role_count):
                        frame_loss += utils.cross_entropy_loss(role_label_pred[i][j], gt_labels[i,index,j] ,self.vocab_size)
                    frame_loss = frame_loss/len(self.encoder.verb2_role_dict[self.encoder.verb_list[gt_verbs[i]]])
                    #print('frame loss', frame_loss, 'verb loss', verb_loss)
                    loss += frame_loss


        final_loss = loss/batch_size
        #print('loss :', final_loss)
        return final_loss
Exemple #2
0
    def calculate_eval_loss(self, verb_pred, gt_verbs, role_label_pred, gt_labels,args):

        batch_size = verb_pred.size()[0]

        sorted_idx = torch.sort(verb_pred, 1, True)[1]
        pred_verbs = sorted_idx[:,0]
        #print('eval pred verbs :', pred_verbs)
        if args.train_all:
            loss = 0
            for i in range(batch_size):
                for index in range(gt_labels.size()[1]):
                    frame_loss = 0
                    verb_loss = utils.cross_entropy_loss(verb_pred[i], gt_verbs[i])
                    gt_role_list = self.encoder.get_role_ids(gt_verbs[i])
                    pred_role_list = self.encoder.get_role_ids(pred_verbs[i])

                    #print ('role list diff :', gt_role_list, pred_role_list)

                    for j in range(0, self.max_role_count):
                        if pred_role_list[j] == len(self.encoder.role_list):
                            continue
                        if pred_role_list[j] in gt_role_list:
                            #print('eval loss :', gt_role_list, pred_role_list[j])
                            g_idx = (gt_role_list == pred_role_list[j]).nonzero()
                            #print('found idx' , g_idx)
                            frame_loss += utils.cross_entropy_loss(role_label_pred[i][j], gt_labels[i,index,g_idx] ,self.vocab_size)

                    frame_loss = verb_loss + frame_loss/len(self.encoder.verb2_role_dict[self.encoder.verb_list[gt_verbs[i]]])
                    #print('frame loss', frame_loss)
                    loss += frame_loss
        else:
            loss = 0
            for i in range(batch_size):
                for index in range(gt_labels.size()[1]):
                    frame_loss = 0
                    verb_loss = utils.cross_entropy_loss(verb_pred[i], gt_verbs[i])
                    gt_role_list = self.encoder.get_role_ids(gt_verbs[i])
                    pred_role_list = self.encoder.get_role_ids(pred_verbs[i])

                    #print ('role list diff :', gt_role_list, pred_role_list)

                    for j in range(0, self.max_role_count):
                        if pred_role_list[j] == len(self.encoder.role_list):
                            continue
                        if pred_role_list[j] in gt_role_list:
                            #print('eval loss :', gt_role_list, pred_role_list[j])
                            g_idx = (gt_role_list == pred_role_list[j]).nonzero()
                            #print('found idx' , g_idx)
                            frame_loss += utils.cross_entropy_loss(role_label_pred[i][j], gt_labels[i,index,g_idx] ,self.vocab_size)

                    frame_loss = frame_loss/len(self.encoder.verb2_role_dict[self.encoder.verb_list[gt_verbs[i]]])
                    #print('frame loss', frame_loss)
                    loss += frame_loss


        final_loss = loss/batch_size
        #print('loss :', final_loss)
        return final_loss
Exemple #3
0
 def fit(self, X_train, y_train, X_test, y_test, batch_size, num_epochs,
         optimizer):
     loss_history = []
     train_accuracy = []
     test_accuracy = []
     self.init()
     data_gen = utils.DataGenerator(X_train, y_train, batch_size)
     itr = 0
     for epoch in range(num_epochs):
         epoch_iter = 0
         epoch_accuracy = []
         for X, Y in data_gen:
             optimizer.zeroGrad()
             probabilities = self.forward(X)
             loss = utils.cross_entropy_loss(probabilities, Y)
             self.backward(Y)
             loss_history += [loss]
             itr += 1
             epoch_iter += 1
             optimizer.step()
             epoch_acc = self.evaluate(X, Y)
             epoch_accuracy.append(epoch_acc)
         train_acc = np.array(epoch_accuracy).sum() / epoch_iter
         train_accuracy.append(train_acc)
         test_acc = self.evaluate(X_test, y_test)
         test_accuracy.append(test_acc)
         print("epoch = {}, train accuracy = {} test accuracy = {}".format(
             epoch, train_acc, test_acc))
     return loss_history, train_accuracy, test_accuracy
    def calculate_loss(self, rot_pred, gt_labels):

        batch_size = rot_pred.size()[0]
        loss = 0
        for i in range(batch_size):
            verb_loss = utils.cross_entropy_loss(rot_pred[i], gt_labels[i])
            loss += verb_loss

        final_loss = loss/batch_size
        return final_loss
    def calculate_loss(self, verb_pred, gt_verbs, role_label_pred, gt_labels):

        batch_size = verb_pred.size()[0]
        loss = 0
        for i in range(batch_size):
            for index in range(gt_labels.size()[1]):
                frame_loss = 0
                verb_loss = utils.cross_entropy_loss(verb_pred[i], gt_verbs[i])
                for j in range(0, self.max_role_count):
                    frame_loss += utils.cross_entropy_loss(
                        role_label_pred[i][j], gt_labels[i, index, j],
                        self.vocab_size)
                loss += (verb_loss +
                         frame_loss / len(self.encoder.verb2_role_dict[
                             self.encoder.verb_list[gt_verbs[i]]]))

        final_loss = loss / batch_size
        print('loss :', final_loss)
        return final_loss
Exemple #6
0
    def calculate_loss(self, verb_pred, gt_verbs):

        batch_size = verb_pred.size()[0]
        loss = 0
        #print('eval pred verbs :', pred_verbs)
        for i in range(batch_size):
            verb_loss = utils.cross_entropy_loss(verb_pred[i], gt_verbs[i])
            loss += verb_loss

        final_loss = loss / batch_size
        return final_loss
    def calculate_loss(self, agent_pred, gt_labels):

        batch_size = agent_pred.size()[0]
        loss = 0
        for i in range(batch_size):
            for index in range(gt_labels.size()[1]):
                loss += utils.cross_entropy_loss(agent_pred[i],
                                                 gt_labels[i, index])

        final_loss = loss / batch_size
        #print('loss :', final_loss)
        return final_loss
Exemple #8
0
    def calculate_loss(self, verb_pred, gt_verbs, role_label_pred, gt_labels):

        batch_size = verb_pred.size()[0]
        verb_ref = verb_pred.size(1)
        loss = 0

        for i in range(batch_size):
            for index in range(verb_ref):
                frame_loss = 0
                verb_loss = utils.cross_entropy_loss(verb_pred[i][index],
                                                     gt_verbs[i])
                for j in range(0, self.max_role_count):
                    frame_loss += utils.cross_entropy_loss(
                        role_label_pred[i][j], gt_labels[i, index, j],
                        self.all_nouns_count)
                frame_loss = verb_loss + frame_loss / len(
                    self.encoder.verb2_role_dict[self.encoder.verb_list[
                        gt_verbs[i]]])
                loss += frame_loss

        final_loss = loss / batch_size
        return final_loss
Exemple #9
0
Fichier : NN.py Projet : Orenmc/NN
    def train(self):
        data = self.train_data
        labels = self.train_labels
        for epoch in range(self.epochs):
            utils.tic()
            total_loss = 0.0  # every epoch loss should start with zero
            good = 0.0
            total_size = 0.0
            # TODO: shuffle?
            data, labels = utils.shuffle(data, labels)
            for d, l in zip(data, labels):
                total_size += 1
                pred, cache = self.fprop(d)
                # check the prediction
                y_hat = np.argmax(pred)
                if y_hat == l:
                    good += 1

                err_cost = float(pred[int(l)])  # loss = -1 * log(err_cost)

                cross_entropy = utils.cross_entropy_loss(err_cost)
                if self.L2:
                    cross_entropy += utils.L2_cost(self.parameters["W"],
                                                   self.L2)
                total_loss += cross_entropy

                grads = self.bprop(cache, d, l)
                self.weights_updates(grads)

            print('epoch {}:'.format(epoch + 1))
            acc = good * 100 / total_size
            train_acc.append(acc)
            avg_loss = total_loss / total_size
            train_loss.append(avg_loss)

            print('train accuracy: {:2.2f}%'.format(acc))
            print('train AVG loss: {:2.2f}'.format(avg_loss))

            self.validation_acc()
            print('time:')
            utils.toc()
            # end of epoch
        # cache all about model
        trained_model = {
            "norm": self.norm,
            "parameters": self.parameters,
            "lr": self.lr
        }
        directory = str(len(self.hidden)) + 'Hidden/L2/'
        np.save(directory + 'model_' + self.model_name, trained_model)
        self.printGraph(directory)
Exemple #10
0
    def calculate_loss(self, verb_pred, gt_verbs, role_label_pred, gt_labels):

        batch_size = verb_pred.size()[0]
        criterion = nn.CrossEntropyLoss(ignore_index=self.vocab_size)
        loss = 0
        for i in range(batch_size):
            for index in range(gt_labels.size()[1]):
                frame_loss = 0
                verb_loss = utils.cross_entropy_loss(verb_pred[i], gt_verbs[i])
                #frame_loss = criterion(role_label_pred[i], gt_labels[i,index])
                for j in range(0, self.max_role_count):
                    frame_loss += utils.cross_entropy_loss(
                        role_label_pred[i][j], gt_labels[i, index, j],
                        self.vocab_size)
                frame_loss = verb_loss + frame_loss / len(
                    self.encoder.verb2_role_dict[self.encoder.verb_list[
                        gt_verbs[i]]])
                #print('frame loss', frame_loss, 'verb loss', verb_loss)
                loss += frame_loss

        final_loss = loss / batch_size
        #print('loss :', final_loss)
        return final_loss
Exemple #11
0
    def calculate_loss(self, ans_predict, all_answers):

        batch_size = ans_predict.size()[0]
        loss = 0
        for i in range(batch_size):
            frame_loss = 0
            for index in range(all_answers.size()[1]):

                frame_loss += utils.cross_entropy_loss(ans_predict[i],
                                                       all_answers[i][index])
                #frame_loss = criterion(role_label_pred[i], gt_labels[i,index])
            loss += frame_loss

        final_loss = loss / batch_size
        #print('loss :', final_loss)
        return final_loss
    def calculate_loss_mul(self, verb_pred, gt_verbs):

        batch_size = verb_pred.size()[0]
        verb_ref = verb_pred.size(1)
        loss = 0
        #print('eval pred verbs :', pred_verbs)
        for i in range(batch_size):
            verb_loss = 0
            for r in range(verb_ref):
                verb_loss += utils.cross_entropy_loss(verb_pred[i][r],
                                                      gt_verbs[i])
            loss += verb_loss

        final_loss = loss / batch_size
        #print('loss :', final_loss)
        return final_loss
    def calculate_role_loss(self, gt_verbs, role_pred, gt_role,args):

        batch_size = role_pred.size()[0]
        if args.train_all:
            loss = 0
            for i in range(batch_size):
                frame_loss = 0
                for j in range(0, self.max_role_count):
                    frame_loss += utils.cross_entropy_loss(role_pred[i][j], gt_role[i,j] ,self.n_roles)
                frame_loss = frame_loss/len(self.encoder.verb2_role_dict[self.encoder.verb_list[gt_verbs[i]]])
                loss += frame_loss


        final_loss = loss/batch_size
        #print('loss :', final_loss)
        return final_loss
    def calculate_eval_loss(self, verb_pred, gt_verbs, gt_labels):

        batch_size = verb_pred.size()[0]
        loss = 0
        #print('eval pred verbs :', pred_verbs)
        for i in range(batch_size):
            for index in range(gt_labels.size()[1]):
                frame_loss = 0
                verb_loss = utils.cross_entropy_loss(verb_pred[i], gt_verbs[i])

                #frame_loss += verb_loss
                #print('frame loss', frame_loss)
                loss += verb_loss

        final_loss = loss / batch_size
        #print('loss :', final_loss)
        return final_loss
Exemple #15
0
Fichier : NN.py Projet : Orenmc/NN
    def validation_acc(self):
        total = 0.0
        good = 0.0
        total_loss = 0.0

        for d, l in zip(self.val_data, self.val_labels):
            total += 1
            pred, cache = self.fprop(d)
            y_hat = np.argmax(pred)
            if y_hat == int(l):
                good += 1
            err_cost = float(pred[int(l)])
            cross_entropy = utils.cross_entropy_loss(err_cost)
            if self.L2:
                cross_entropy += utils.L2_cost(self.parameters["W"], self.L2)
            total_loss += cross_entropy

        acc = good * 100 / total
        val_acc.append(acc)
        avg_loss = total_loss / total
        val_loss.append(avg_loss)

        print('val acc {:2.2f}%'.format(good / total * 100))
        print('val AVG loss: {:2.2f}'.format(avg_loss))
Exemple #16
0
 def add_loss_op(self, pred):
     loss = cross_entropy_loss(self.labels_placeholder, pred)
     return loss
Exemple #17
0
    def converged_gradient(self, num_iter, X, V, W, iter_check=50000, threshold=0.005,
                           gradient_v=None, gradient_w=None, error=True, gradient_check=False,
                           epsilon=10.**-5, x_j=None, y_j=None):
        training_error = None
        training_loss = None

        if num_iter > 1000000:
            return (True, training_error, training_loss)
        # There are two ways to determine if the gradient has converged.
        # (1) Use the training error (error=True)
        # (2) Use the magnitude of the gradient (error=False)
        # In both cases, training_error and training_loss are attached to the response
        # for the purposes of plotting.
        if error:
            if num_iter % iter_check != 0:
                return (False, training_error, training_loss)
            else:
                if gradient_check:
                    # Randomly check five weights.
                    for _ in range(5):
                        # import pdb; pdb.set_trace()
                        random_wi = np.random.randint(W.shape[0])
                        random_wj = np.random.randint(W.shape[1])
                        random_vi = np.random.randint(V.shape[0])
                        random_vj = np.random.randint(V.shape[1])

                        W_plus_epsilon = W.copy()
                        W_plus_epsilon[random_wi][random_wj] = W_plus_epsilon[random_wi][random_wj] + epsilon
                        Z_W_plus = self.perform_forward_pass(x_j, V, W_plus_epsilon)[1]

                        W_minus_epsilon = W.copy()
                        W_minus_epsilon[random_wi][random_wj] = W_minus_epsilon[random_wi][random_wj] - epsilon
                        Z_W_minus = self.perform_forward_pass(x_j, V, W_minus_epsilon)[1]

                        V_plus_epsilon = V.copy()
                        V_plus_epsilon[random_vi][random_vj] = V_plus_epsilon[random_vi][random_vj] + epsilon
                        Z_V_plus = self.perform_forward_pass(x_j, V_plus_epsilon, W)[1]

                        V_minus_epsilon = V.copy()
                        V_minus_epsilon[random_vi][random_vj] = V_minus_epsilon[random_vi][random_vj] - epsilon
                        Z_V_minus = self.perform_forward_pass(x_j, V_minus_epsilon, W)[1]

                        y = np.zeros(10)
                        y[y_j] = 1

                        if self.loss_function == "mean-squared-error":
                            W_plus_cost = mean_squared_error(Z_W_plus, y)
                            W_minus_cost = mean_squared_error(Z_W_minus, y)
                            V_plus_cost = mean_squared_error(Z_V_plus, y)
                            V_minus_cost = mean_squared_error(Z_V_minus, y)
                        else:
                            W_plus_cost = cross_entropy_loss(Z_W_plus.T, y)
                            W_minus_cost = cross_entropy_loss(Z_W_minus.T, y)
                            V_plus_cost = cross_entropy_loss(Z_V_plus.T, y)
                            V_minus_cost = cross_entropy_loss(Z_V_minus.T, y)

                        gradient_approx_wij = (W_plus_cost - W_minus_cost) / (2. * epsilon)
                        gradient_approx_vij = (V_plus_cost - V_minus_cost) / (2. * epsilon)

                        if gradient_approx_wij > gradient_w[random_wi][random_wj] + threshold or \
                           gradient_approx_wij < gradient_w[random_wi][random_wj] - threshold or \
                           gradient_approx_vij > gradient_v[random_vi][random_vj] + threshold or \
                           gradient_approx_vij < gradient_v[random_vi][random_vj] - threshold:
                            raise AssertionError("The gradient was incorrectly computed.")

                classifications_training, training_Z = self.predict(X, V, W, return_Z=True)
                training_error, training_indices_error = benchmark(classifications_training, self.labels)

                if self.validation_data is not None and self.validation_labels is not None:
                    classifications_validation = self.predict(self.validation_data, V, W)
                    validation_error, validation_indices_error = benchmark(classifications_validation, self.validation_labels)

                if self.loss_function == "mean-squared-error":
                    training_loss = mean_squared_error(training_Z.T, self.Y)
                else:
                    training_loss = cross_entropy_loss(training_Z.T, self.Y)

                print("Completed %d iterations.\nThe training error is %.2f.\n The training loss is %.2f."
                      % (num_iter, training_error, training_loss))

                if self.validation_data is not None and self.validation_labels is not None:
                    print("The error on the validation set is %.2f." % validation_error)

                if training_error < threshold:
                    return (True, training_error, training_loss)

                return (False, training_error, training_loss)
        else:
            if num_iter % iter_check == 0:
                classifications_training, training_Z = self.predict(X, V, W, return_Z=True)
                training_error, indices_error = benchmark(classifications_training, self.labels)

                if self.validation_data is not None and self.validation_labels is not None:
                    classifications_validation = self.predict(self.validation_data, V, W)
                    validation_error, validation_indices_error = benchmark(classifications_validation, self.validation_labels)

                if self.loss_function == "mean-squared-error":
                    training_loss = mean_squared_error(training_Z.T, self.Y)
                else:
                    training_loss = cross_entropy_loss(training_Z.T, self.Y)

                print("Completed %d iterations. The training error is %.2f. Training loss is %.2f" % (num_iter, training_error))

                if self.validation_data is not None and self.validation_labels is not None:
                    print("The error on the validation set is %.2f." % validation_error)

            if np.linalg.norm(gradient_v) < threshold and np.linalg.norm(gradient_w) < threshold:
                return (True, training_error, training_loss)
            else:
                return (False, training_error, training_loss)
Exemple #18
0
    X_train, Y_train = utils.shuffle(X_train, Y_train)

    for i in range(int(np.floor((train_size / batch_size)))):
        X = X_train[i * batch_size:(i + 1) * batch_size]
        Y = Y_train[i * batch_size:(i + 1) * batch_size]

        #w,b = utils.gradient_descent(X,Y,w,b,lr)
        w_grad, b_grad = utils.gradient_descent(X, Y, w, b)
        w -= lr / np.sqrt(step) * w_grad
        b -= lr / np.sqrt(step) * b_grad
        step += 1
    y_train_pred = utils.f(X_train, w, b)
    Y_train_pred = np.round(y_train_pred)
    train_acc.append(utils.accruacy(Y_train_pred, Y_train))
    train_loss.append(
        utils.cross_entropy_loss(y_train_pred, Y_train) / train_size)

    y_valid_pred = utils.f(X_valid, w, b)
    Y_valid_pred = np.round(y_valid_pred)
    valid_acc.append(utils.accruacy(Y_valid_pred, Y_valid))
    valid_loss.append(
        utils.cross_entropy_loss(y_valid_pred, Y_valid) / valid_size)

print('Training loss: {}'.format(train_loss[-1]))
print('Validation loss: {}'.format(valid_loss[-1]))
print('Training accuracy: {}'.format(train_acc[-1]))
print('Validation accuracy: {}'.format(valid_acc[-1]))

# Loss curve
plt.plot(train_loss)
plt.plot(valid_loss)
    # ---------------Training Network------------------------
    train_cost, val_cost, err_tr, err_val, nn_weight_list = Train_network(
        epochmax, reg_lambda, LearningRate, nnparams, layer_sizes,
        minibatchsize, momentum, activ_func, activ_Grad_func, X_train, Y_train,
        X_val, Y_val)

    print('epochmax:{:3.0f}'.format(epochmax),
          ' L2 Regularization: {:1.3f}'.format(reg_lambda),
          ' Learning rate: {:1.2f}'.format(LearningRate), ' Layer Sizes',
          layer_sizes)

    # ---------------Printing Results------------------------
    activations = forward_prop(layer_sizes, nn_weight_list, X_train, Y_train,
                               activ_func)
    output_p = activations[-1]
    J_train = cross_entropy_loss(num_labels, output_p, Y_train, reg_lambda,
                                 nn_weight_list)
    mean_err = Mean_classification_error(Y_train, output_p)
    print 'Train  ', ' Loss: ', J_train, ' Error: ', mean_err

    activation_val = forward_prop(layer_sizes, nn_weight_list, X_val, Y_val,
                                  activ_func)
    output_p = activation_val[-1]
    J_val = cross_entropy_loss(num_labels, output_p, Y_val, reg_lambda,
                               nn_weight_list)
    mean_err2 = Mean_classification_error(Y_val, output_p)
    print 'Validation  ', 'Loss: ', J_val, 'Error: ', mean_err2

    activation_test = forward_prop(layer_sizes, nn_weight_list, X_test, Y_test,
                                   activ_func)
    output_p = activation_test[-1]
    mean_err = Mean_classification_error(Y_test, output_p)
Exemple #20
0
    [training_data_dir], num_epochs=None)

image_batch, label_batch = load_data_from_tfrecords(filename_queue,
                                                    batch_size)

label = tf.one_hot(label_batch, num_classes, 1, 0)
label = tf.reshape(tf.cast(label, tf.float32), [batch_size, num_classes])
image = tf.cast(image_batch, tf.float32)
image = tf.map_fn(lambda img: tf.image.per_image_standardization(img),
                  image, dtype=tf.float32)


output = model.lenet_advanced(image, num_classes, True, 0.5)
output = tf.reshape(tf.cast(output, tf.float32), [batch_size, num_classes])

loss = utils.cross_entropy_loss(output, label)

train = tf.train.AdamOptimizer(0.001).minimize(loss)

global_vars_init_op = tf.global_variables_initializer()
local_vars_init_op = tf.local_variables_initializer()
combined_op = tf.group(local_vars_init_op, global_vars_init_op)
model_variables = slim.get_model_variables()
saver = tf.train.Saver(model_variables)

with tf.Session() as sess:
    sess.run(combined_op)
    # saver.restore(sess, '/home/kris/PycharmProjects/traffic_sign_recognition/lenet_parameters.ckpt')
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    for i in range(50000):
Exemple #21
0
def grad_test_b(X_train, y_train):
    softmax_in = 2
    softmax_out = 5
    model = models.MyNeuralNetwork()
    model.add(layers.Softmax(softmax_in, softmax_out))
    model.init()
    for p in model.parameters:
        p.grad = 0.

    eps0 = 1
    eps = np.array([(0.5**i) * eps0 for i in range(10)])

    d = np.random.random((1, 5))
    d = d / np.sum(d)
    grad_diff = []

    x_data = np.array([X_train[0]])
    x_label = np.array([y_train[0]])

    for epss in eps:
        model_grad = copy.deepcopy(model)
        probabilities_grad = model_grad.forward(x_data)
        model2 = copy.deepcopy(model)
        model2.graph[0].bias.data += d * epss
        probabilities_grad2 = model2.forward(x_data)
        grad_diff.append(
            np.abs(
                utils.cross_entropy_loss(probabilities_grad2, x_label) -
                utils.cross_entropy_loss(probabilities_grad, x_label)))

    fig, axs = plt.subplots(2, 2, figsize=(12, 8), constrained_layout=True)
    fig.suptitle('Gradient test by b', fontsize=16)

    axs[0, 0].plot(eps, grad_diff)
    axs[0, 0].set_xlabel('$\epsilon$')
    axs[0, 0].set_title('$|f(x+\epsilon d) - f(x)|$')

    axs[0, 1].plot(
        range(len(grad_diff) - 1),
        [grad_diff[i + 1] / grad_diff[i] for i in range(len(grad_diff) - 1)])
    axs[0, 1].set_xlabel('$i$')
    axs[0, 1].set_title('rate of decrease')
    axs[0, 1].set_ylim([0, 1])

    grad_diff = []
    for epss in eps:
        model_grad = copy.deepcopy(model)
        probabilities_grad = copy.deepcopy(model_grad.forward(x_data))
        model2 = copy.deepcopy(model)
        model2.graph[0].bias.data += d * epss
        probabilities_grad2 = copy.deepcopy(model2.forward(x_data))
        model2.backward(x_label)
        grad_x = model2.graph[0].bias.grad
        grad_diff.append(
            np.abs(
                utils.cross_entropy_loss(probabilities_grad2, x_label) -
                utils.cross_entropy_loss(probabilities_grad, x_label) -
                epss * np.dot(d.flatten().T, grad_x.flatten())))

    axs[1, 0].plot(eps, grad_diff)
    axs[1, 0].set_xlabel('$\epsilon$')
    axs[1, 0].set_title('$|f(x+\epsilon d) - f(x) - \epsilon d^{T} grad(x)|$')

    axs[1, 1].plot(
        range(len(grad_diff) - 1),
        [grad_diff[i + 1] / grad_diff[i] for i in range(len(grad_diff) - 1)])
    axs[1, 1].set_xlabel('$i$')
    axs[1, 1].set_title('rate of decrease')
    axs[1, 1].set_ylim([0, 1])

    plt.show()