Ejemplo n.º 1
0
def train_multi_layer_network(data):
    print()
    print("*****************start train multi layer network*****************")

    model = FullyConnectedNet([100, 100, 100, 100], weight_scale=1e-2, use_batchnorm=True)
    solver = Solver(model, data,
                    num_epochs=10, batch_size=100,
                    update_rule="adam",
                    optim_config={
                        "learning_rate": 0.001
                    },
                    lr_decay=0.95,
                    verbose=True)
    solver.train()

    plt.subplot(2, 1, 1)
    plt.title("Training loss")
    plt.plot(solver.loss_history, "o")
    plt.xlabel("Iteration")

    plt.subplot(2, 1, 2)
    plt.title("Accuracy")
    plt.plot(solver.train_acc_history, "-o", label="train")
    plt.plot(solver.val_acc_history, "-o", label="val")
    plt.plot([0.5] * len(solver.val_acc_history), "k--")
    plt.xlabel("Epoch")
    plt.legend(loc="lower right")
    plt.gcf().set_size_inches(15, 12)
    plt.show()

    y_test_pred = np.argmax(model.loss(data["X_test"]), axis=1)
    y_val_pred = np.argmax(model.loss(data["X_val"]), axis=1)
    print("Validation set accuracy: ", (y_val_pred == data["y_val"]).mean())
    print("Test set accuracy: ", (y_test_pred == data["y_test"]).mean())
Ejemplo n.º 2
0
def visualize_dropout(data):
    # Train two identical nets, one with dropout and one without
    np.random.seed(231)
    num_train = 500
    small_data = {
        "X_train": data["X_train"][:num_train],
        "y_train": data["y_train"][:num_train],
        "X_val": data["X_val"],
        "y_val": data["y_val"],
    }

    solvers = {}
    dropout_choices = [0, 0.75]
    for dropout in dropout_choices:
        model = FullyConnectedNet([100], dropout=dropout)
        print(dropout)

        solver = Solver(model,
                        small_data,
                        num_epochs=25,
                        batch_size=100,
                        update_rule="adam",
                        optim_config={
                            "learning_rate": 5e-4,
                        },
                        verbose=True,
                        print_every=100)
        solver.train()
        solvers[dropout] = solver

    # Plot train and validation accuracies of the two models
    train_accs = []
    val_accs = []
    for dropout in dropout_choices:
        solver = solvers[dropout]
        train_accs.append(solver.train_acc_history[-1])
        val_accs.append(solver.val_acc_history[-1])

    plt.subplot(3, 1, 1)
    for dropout in dropout_choices:
        plt.plot(solvers[dropout].train_acc_history,
                 "o",
                 label="%.2f dropout" % dropout)
    plt.title("Train accuracy")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend(ncol=2, loc="lower right")

    plt.subplot(3, 1, 2)
    for dropout in dropout_choices:
        plt.plot(solvers[dropout].val_acc_history,
                 "o",
                 label="%.2f dropout" % dropout)
    plt.title("Val accuracy")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend(ncol=2, loc="lower right")

    plt.gcf().set_size_inches(15, 15)
    plt.show()
 def test_overfit_small_batch(self):
     num_train = 50
     data = self.data
     small_data = {
       'X_train': data['X_train'][:num_train],
       'y_train': data['y_train'][:num_train],
       'X_val': data['X_val'],
       'y_val': data['y_val'],
     }
     
     
     learning_rate = 1e-2
     weight_scale = 6e-2
     model = FullyConnectedNet([100, 100, 100,100],
                   weight_scale=weight_scale, dtype=np.float64)
     
     solver = Solver(model, small_data,
                     print_every=10, num_epochs=20, batch_size=25,
                     update_rule='sgd',
                     optim_config={
                       'learning_rate': learning_rate,
                     }
              )
     solver.train()
     
     plt.plot(solver.loss_history, 'o')
     plt.title('Training loss history')
     plt.xlabel('Iteration')
     plt.ylabel('Training loss')
     plt.show()
     return
Ejemplo n.º 4
0
def train_two_layer_network(data):
    print()
    print("*****************start train two layer network*****************")

    model = TwoLayerNet()
    solver = Solver(model, data, update_rule="sgd",
                    optim_config={
                        "learning_rate": 1e-3
                    },
                    lr_decay=0.95, num_epochs=10,
                    batch_size=100, print_every=100)
    solver.train()

    plt.subplot(2, 1, 1)
    plt.title("Training loss")
    plt.plot(solver.loss_history, "o")
    plt.xlabel("Iteration")

    plt.subplot(2, 1, 2)
    plt.title("Accuracy")
    plt.plot(solver.train_acc_history, "-o", label="train")
    plt.plot(solver.val_acc_history, "-o", label="val")
    plt.plot([0.5] * len(solver.val_acc_history), "k--")
    plt.xlabel("Epoch")
    plt.legend(loc="lower right")
    plt.gcf().set_size_inches(15, 12)
    plt.show()
Ejemplo n.º 5
0
    def train_net(self):
        data = self.data
        num_train = data['X_train'].shape[0]
        small_data = {
          'X_train': data['X_train'][:num_train],
          'y_train': data['y_train'][:num_train],
          'X_val': data['X_val'],
          'y_val': data['y_val'],
        }
        model = ThreeLayerConvNet(weight_scale=0.001, hidden_dim=500, reg=0.001)

        solver = Solver(model, data,
                        num_epochs=1, batch_size=50,
                        update_rule='adam',
                        optim_config={
                          'learning_rate': 1e-3,
                        },
                        verbose=True, print_every=20)
        solver.train()
        from assignment2.cs231n.vis_utils import visualize_grid

        grid = visualize_grid(model.params['W1'].transpose(0, 2, 3, 1))
        plt.imshow(grid.astype('uint8'))
        plt.axis('off')
        plt.gcf().set_size_inches(5, 5)
        plt.show()

        return
Ejemplo n.º 6
0
def train_three_layer_network(data):
    print()
    print("*****************start train three layer network*****************")

    model = ThreeLayerConvNet(weight_scale=0.001, hidden_dim=500, reg=0.001, filter_size=3, num_filters=4)

    solver = Solver(model, data,
                    num_epochs=5, batch_size=50,
                    update_rule="adam",
                    optim_config={
                        "learning_rate": 1e-3,
                    },
                    verbose=True, print_every=20)
    solver.train()

    plt.subplot(2, 1, 1)
    plt.title("Training loss")
    plt.plot(solver.loss_history, "o")
    plt.xlabel("Iteration")

    plt.subplot(2, 1, 2)
    plt.title("Accuracy")
    plt.plot(solver.train_acc_history, "-o", label="train")
    plt.plot(solver.val_acc_history, "-o", label="val")
    plt.plot([0.5] * len(solver.val_acc_history), "k--")
    plt.xlabel("Epoch")
    plt.legend(loc="lower right")
    plt.gcf().set_size_inches(15, 12)
    plt.show()

    y_test_pred = np.argmax(model.loss(data["X_test"]), axis=1)
    y_val_pred = np.argmax(model.loss(data["X_val"]), axis=1)
    print("Validation set accuracy: ", (y_val_pred == data["y_val"]).mean())
    print("Test set accuracy: ", (y_test_pred == data["y_test"]).mean())
 def compare_rmsprop_adam(self):
     num_train = 4000
     data = self.data
     small_data = {
       'X_train': data['X_train'][:num_train],
       'y_train': data['y_train'][:num_train],
       'X_val': data['X_val'],
       'y_val': data['y_val'],
     }
     solvers = {}
     learning_rates = {'rmsprop': 1e-4, 'adam': 1e-3}
     
     for update_rule in ['adam', 'rmsprop']:
         print 'running with ', update_rule
         model = FullyConnectedNet([100, 100, 100, 100, 100], weight_scale=5e-2)
         
         solver = Solver(model, small_data,
                         num_epochs=5, batch_size=100,
                         update_rule=update_rule,
                         optim_config={
                           'learning_rate': learning_rates[update_rule]
                         },
                         verbose=True)
         solvers[update_rule] = solver
         solver.train()
         print
     
     plt.subplot(3, 1, 1)
     plt.title('Training loss')
     plt.xlabel('Iteration')
     
     plt.subplot(3, 1, 2)
     plt.title('Training accuracy')
     plt.xlabel('Epoch')
     
     plt.subplot(3, 1, 3)
     plt.title('Validation accuracy')
     plt.xlabel('Epoch')
     
     for update_rule, solver in solvers.iteritems():
         plt.subplot(3, 1, 1)
         plt.plot(solver.loss_history, 'o', label=update_rule)
         
         plt.subplot(3, 1, 2)
         plt.plot(solver.train_acc_history, '-o', label=update_rule)
         
         plt.subplot(3, 1, 3)
         plt.plot(solver.val_acc_history, '-o', label=update_rule)
       
     for i in [1, 2, 3]:
         plt.subplot(3, 1, i)
         plt.legend(loc='upper center', ncol=4)
     plt.gcf().set_size_inches(15, 15)
     plt.show()
     return
    def check_best_model(self):
        
        data = self.data
#         num_train = 400
        num_train = data['X_train'].shape[0]
        small_data = {
          'X_train': data['X_train'][:num_train],
          'y_train': data['y_train'][:num_train],
          'X_val': data['X_val'],
          'y_val': data['y_val'],
        }
        dropout=0.1
        model = FullyConnectedNet([100, 100, 100], weight_scale=5e-2, use_batchnorm=True, dropout=dropout)
   
        update_rule = 'adam'
        learning_rate = 1e-3
        solver = Solver(model, small_data,
                        num_epochs=5, batch_size=100,
                        update_rule=update_rule,
                        optim_config={
                          'learning_rate': learning_rate
                        },
                        verbose=True)
        solver.train()
        
        test_acc = solver.check_accuracy(self.X_test, self.y_test)
        print "test accuracy :{}".format(test_acc)
        #visualiztion
       
        plt.subplot(2, 1, 1)
        plt.title('Training loss')
        plt.xlabel('Iteration')
        plt.plot(solver.loss_history, 'o', label='traing loss')
        
        plt.subplot(2, 1, 2)
        plt.title('Training/validation accuracy')
        plt.xlabel('Epoch')
        plt.plot(solver.train_acc_history, '-o', label='train accuracy')
        plt.plot(solver.val_acc_history, '-o', label='validation accuracy')
            

           
          
        for i in [1, 2]:
            plt.subplot(2, 1, i)
            plt.legend(loc='upper center', ncol=4)
        plt.gcf().set_size_inches(15, 15)
#         plt.show()
        return
    def test_solver(self):
        
        
#         X_train, y_train, X_val, y_val,_,_ = self.get_CIFAR10_data()
#         data = {
#             'X_train': X_train,
#             'y_train': y_train,
#             'X_val': X_val,
#             'y_val': y_val}
        
        data = self.data
        input_dim=3*32*32
        hidden_dim=100
        num_classes=10
        weight_scale=1e-3
        reg=0.0
        model = TwoLayerNet(input_dim=input_dim, hidden_dim=hidden_dim, num_classes=num_classes,
                             weight_scale=weight_scale, reg=reg)
        
        solver = Solver(model, data,
                                    update_rule='sgd',
                                    optim_config={
                                        'learning_rate': 1e-3,
                                    },
                                    lr_decay=0.95,
                                    num_epochs=10, batch_size=100,
                                    print_every=100)
        solver.train()
        
        # Run this cell to visualize training loss and train / val accuracy

        plt.subplot(2, 1, 1)
        plt.title('Training loss')
        plt.plot(solver.loss_history, 'o')
        plt.xlabel('Iteration')
        
        plt.subplot(2, 1, 2)
        plt.title('Accuracy')
        plt.plot(solver.train_acc_history, '-o', label='train')
        plt.plot(solver.val_acc_history, '-o', label='val')
        plt.plot([0.5] * len(solver.val_acc_history), 'k--')
        plt.xlabel('Epoch')
        plt.legend(loc='lower right')
        plt.gcf().set_size_inches(15, 12)
        plt.show()
        return
Ejemplo n.º 10
0
 def overfit_small_data(self):
     num_train = 100
     data = self.data
     small_data = {
       'X_train': data['X_train'][:num_train],
       'y_train': data['y_train'][:num_train],
       'X_val': data['X_val'],
       'y_val': data['y_val'],
     }
     
     model = ThreeLayerConvNet(weight_scale=1e-2)
     
     solver = Solver(model, small_data,
                     num_epochs=10, batch_size=50,
                     update_rule='adam',
                     optim_config={
                       'learning_rate': 1e-3,
                     },
                     verbose=True, print_every=1)
     solver.train()
     return
Ejemplo n.º 11
0
data = {
    'X_train': X[8000:35117, :],
    #load labels
    'y_train': y[8000:35117],
    'X_val': X[3000:8000, :],
    'y_val': y[3000:8000]
}

num_inputs = 35126
input_dim = (3, 256, 256)
reg = 0.1
num_classes = 5
model = ThreeLayerConvNet(num_filters=5,
                          filter_size=5,
                          input_dim=input_dim,
                          hidden_dim=7,
                          num_classes=5,
                          dtype=np.float64,
                          reg=reg)
solver = Solver(model,
                data,
                num_epochs=1,
                batch_size=5000,
                update_rule='adam',
                optim_config={
                    'learning_rate': 1e-3,
                },
                verbose=True,
                print_every=20)
solver.train()
    def weight_initialization_batch_norm(self):
        # Try training a very deep net with batchnorm
        data = self.data
        hidden_dims = [50, 50, 50, 50, 50, 50, 50]

        num_train = 1000
        small_data = {
            'X_train': data['X_train'][:num_train],
            'y_train': data['y_train'][:num_train],
            'X_val': data['X_val'],
            'y_val': data['y_val'],
        }

        bn_solvers = {}
        solvers = {}
        weight_scales = np.logspace(-4, 0, num=20)
        for i, weight_scale in enumerate(weight_scales):
            print 'Running weight scale %d / %d' % (i + 1, len(weight_scales))
            bn_model = FullyConnectedNet(hidden_dims,
                                         weight_scale=weight_scale,
                                         use_batchnorm=True)
            model = FullyConnectedNet(hidden_dims,
                                      weight_scale=weight_scale,
                                      use_batchnorm=False)

            bn_solver = Solver(bn_model,
                               small_data,
                               num_epochs=10,
                               batch_size=50,
                               update_rule='adam',
                               optim_config={
                                   'learning_rate': 1e-3,
                               },
                               verbose=False,
                               print_every=200)
            bn_solver.train()
            bn_solvers[weight_scale] = bn_solver

            solver = Solver(model,
                            small_data,
                            num_epochs=10,
                            batch_size=50,
                            update_rule='adam',
                            optim_config={
                                'learning_rate': 1e-3,
                            },
                            verbose=False,
                            print_every=200)
            solver.train()
            solvers[weight_scale] = solver
        # Plot results of weight scale experiment
        best_train_accs, bn_best_train_accs = [], []
        best_val_accs, bn_best_val_accs = [], []
        final_train_loss, bn_final_train_loss = [], []

        for ws in weight_scales:
            best_train_accs.append(max(solvers[ws].train_acc_history))
            bn_best_train_accs.append(max(bn_solvers[ws].train_acc_history))

            best_val_accs.append(max(solvers[ws].val_acc_history))
            bn_best_val_accs.append(max(bn_solvers[ws].val_acc_history))

            final_train_loss.append(np.mean(solvers[ws].loss_history[-100:]))
            bn_final_train_loss.append(
                np.mean(bn_solvers[ws].loss_history[-100:]))

        plt.subplot(3, 1, 1)
        plt.title('Best val accuracy vs weight initialization scale')
        plt.xlabel('Weight initialization scale')
        plt.ylabel('Best val accuracy')
        plt.semilogx(weight_scales, best_val_accs, '-o', label='baseline')
        plt.semilogx(weight_scales, bn_best_val_accs, '-o', label='batchnorm')
        plt.legend(ncol=2, loc='lower right')

        plt.subplot(3, 1, 2)
        plt.title('Best train accuracy vs weight initialization scale')
        plt.xlabel('Weight initialization scale')
        plt.ylabel('Best training accuracy')
        plt.semilogx(weight_scales, best_train_accs, '-o', label='baseline')
        plt.semilogx(weight_scales,
                     bn_best_train_accs,
                     '-o',
                     label='batchnorm')
        plt.legend()

        plt.subplot(3, 1, 3)
        plt.title('Final training loss vs weight initialization scale')
        plt.xlabel('Weight initialization scale')
        plt.ylabel('Final training loss')
        plt.semilogx(weight_scales, final_train_loss, '-o', label='baseline')
        plt.semilogx(weight_scales,
                     bn_final_train_loss,
                     '-o',
                     label='batchnorm')
        plt.legend()

        plt.gcf().set_size_inches(10, 15)
        plt.show()
        return
    def batch_norm_with_deep(self):
        # Try training a very deep net with batchnorm
        data = self.data
        hidden_dims = [100, 100, 100, 100, 100]

        num_train = 1000
        small_data = {
            'X_train': data['X_train'][:num_train],
            'y_train': data['y_train'][:num_train],
            'X_val': data['X_val'],
            'y_val': data['y_val'],
        }

        weight_scale = 2e-2
        bn_model = FullyConnectedNet(hidden_dims,
                                     weight_scale=weight_scale,
                                     use_batchnorm=True)
        model = FullyConnectedNet(hidden_dims,
                                  weight_scale=weight_scale,
                                  use_batchnorm=False)

        bn_solver = Solver(bn_model,
                           small_data,
                           num_epochs=10,
                           batch_size=50,
                           update_rule='adam',
                           optim_config={
                               'learning_rate': 1e-3,
                           },
                           verbose=True,
                           print_every=200)
        bn_solver.train()

        solver = Solver(model,
                        small_data,
                        num_epochs=10,
                        batch_size=50,
                        update_rule='adam',
                        optim_config={
                            'learning_rate': 1e-3,
                        },
                        verbose=True,
                        print_every=200)
        solver.train()
        plt.subplot(3, 1, 1)
        plt.title('Training loss')
        plt.xlabel('Iteration')

        plt.subplot(3, 1, 2)
        plt.title('Training accuracy')
        plt.xlabel('Epoch')

        plt.subplot(3, 1, 3)
        plt.title('Validation accuracy')
        plt.xlabel('Epoch')

        plt.subplot(3, 1, 1)
        plt.plot(solver.loss_history, 'o', label='baseline')
        plt.plot(bn_solver.loss_history, 'o', label='batchnorm')

        plt.subplot(3, 1, 2)
        plt.plot(solver.train_acc_history, '-o', label='baseline')
        plt.plot(bn_solver.train_acc_history, '-o', label='batchnorm')

        plt.subplot(3, 1, 3)
        plt.plot(solver.val_acc_history, '-o', label='baseline')
        plt.plot(bn_solver.val_acc_history, '-o', label='batchnorm')

        for i in [1, 2, 3]:
            plt.subplot(3, 1, i)
            plt.legend(loc='upper center', ncol=4)
        plt.gcf().set_size_inches(15, 15)
        plt.show()
        return
Ejemplo n.º 14
0
from assignment2.cs231n.solver import Solver
import h5py
h5f = h5py.File('img_data.h5','r')
X = h5f['dataset_1'][:]
h5f.close()
print data.shape
#load data
#data=??
data={
	'X_train':X[8000:35117,:],
	#load labels
	'y_train':y[8000:35117],
	'X_val':X[3000:8000,:],
	'y_val':y[3000:8000]
}


num_inputs = 35126
input_dim = (3, 256, 256)
reg = 0.1
num_classes = 5
model = ThreeLayerConvNet(num_filters=5, filter_size=5,input_dim=input_dim, hidden_dim=7,num_classes=5,dtype=np.float64,reg=reg)
solver = Solver(model, data,
                num_epochs=1, batch_size=5000,
                update_rule='adam',
                optim_config={
                  'learning_rate': 1e-3,
                },
                verbose=True, print_every=20)
solver.train()
Ejemplo n.º 15
0
def visualize_batch_normalization(data):
    print()
    print(
        "*****************start visualizing batch normalization*****************"
    )

    np.random.seed(231)
    # Try training a very deep net with batchnorm
    hidden_dims = [50, 50, 50, 50, 50, 50, 50]

    num_train = 1000
    small_data = {
        "X_train": data["X_train"][:num_train],
        "y_train": data["y_train"][:num_train],
        "X_val": data["X_val"],
        "y_val": data["y_val"],
    }

    bn_solvers = {}
    solvers = {}
    weight_scales = np.logspace(-4, 0, num=20)
    for i, weight_scale in enumerate(weight_scales):
        print("Running weight scale %d / %d" % (i + 1, len(weight_scales)))
        bn_model = FullyConnectedNet(hidden_dims,
                                     weight_scale=weight_scale,
                                     use_batchnorm=True)
        model = FullyConnectedNet(hidden_dims,
                                  weight_scale=weight_scale,
                                  use_batchnorm=False)

        bn_solver = Solver(bn_model,
                           small_data,
                           num_epochs=10,
                           batch_size=50,
                           update_rule="adam",
                           optim_config={"learning_rate": 1e-3},
                           verbose=False,
                           print_every=200)
        bn_solver.train()
        bn_solvers[weight_scale] = bn_solver

        solver = Solver(model,
                        small_data,
                        num_epochs=10,
                        batch_size=50,
                        update_rule="adam",
                        optim_config={"learning_rate": 1e-3},
                        verbose=False,
                        print_every=200)
        solver.train()
        solvers[weight_scale] = solver

    # Plot results of weight scale experiment
    best_train_accs, bn_best_train_accs = [], []
    best_val_accs, bn_best_val_accs = [], []
    final_train_loss, bn_final_train_loss = [], []

    for ws in weight_scales:
        best_train_accs.append(max(solvers[ws].train_acc_history))
        bn_best_train_accs.append(max(bn_solvers[ws].train_acc_history))

        best_val_accs.append(max(solvers[ws].val_acc_history))
        bn_best_val_accs.append(max(bn_solvers[ws].val_acc_history))

        final_train_loss.append(np.mean(solvers[ws].loss_history[-100:]))
        bn_final_train_loss.append(np.mean(bn_solvers[ws].loss_history[-100:]))

    plt.subplot(3, 1, 1)
    plt.title("Best val accuracy vs weight initialization scale")
    plt.xlabel("Weight initialization scale")
    plt.ylabel("Best val accuracy")
    plt.semilogx(weight_scales, best_val_accs, "-o", label="baseline")
    plt.semilogx(weight_scales, bn_best_val_accs, "-o", label="batchnorm")
    plt.legend(ncol=2, loc="lower right")

    plt.subplot(3, 1, 2)
    plt.title("Best train accuracy vs weight initialization scale")
    plt.xlabel("Weight initialization scale")
    plt.ylabel("Best training accuracy")
    plt.semilogx(weight_scales, best_train_accs, "-o", label="baseline")
    plt.semilogx(weight_scales, bn_best_train_accs, "-o", label="batchnorm")
    plt.legend()

    plt.subplot(3, 1, 3)
    plt.title("Final training loss vs weight initialization scale")
    plt.xlabel("Weight initialization scale")
    plt.ylabel("Final training loss")
    plt.semilogx(weight_scales, final_train_loss, "-o", label="baseline")
    plt.semilogx(weight_scales, bn_final_train_loss, "-o", label="batchnorm")
    plt.legend()
    plt.gca().set_ylim(1.0, 3.5)

    plt.gcf().set_size_inches(10, 15)
    plt.show()
Ejemplo n.º 16
0
    def experiment_regularization(self):
        # Train two identical nets, one with dropout and one without
        data = self.data
        num_train = 500
        small_data = {
            'X_train': data['X_train'][:num_train],
            'y_train': data['y_train'][:num_train],
            'X_val': data['X_val'],
            'y_val': data['y_val'],
        }

        solvers = {}
        dropout_choices = [0, 0.75]
        for dropout in dropout_choices:
            model = FullyConnectedNet([500], dropout=dropout)
            print dropout

            solver = Solver(model,
                            small_data,
                            num_epochs=25,
                            batch_size=100,
                            update_rule='adam',
                            optim_config={
                                'learning_rate': 5e-4,
                            },
                            verbose=True,
                            print_every=100)
            solver.train()
            solvers[dropout] = solver
        # Plot train and validation accuracies of the two models

        train_accs = []
        val_accs = []
        for dropout in dropout_choices:
            solver = solvers[dropout]
            train_accs.append(solver.train_acc_history[-1])
            val_accs.append(solver.val_acc_history[-1])

        plt.subplot(3, 1, 1)
        for dropout in dropout_choices:
            plt.plot(solvers[dropout].train_acc_history,
                     'o',
                     label='%.2f dropout' % dropout)
        plt.title('Train accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend(ncol=2, loc='lower right')

        plt.subplot(3, 1, 2)
        for dropout in dropout_choices:
            plt.plot(solvers[dropout].val_acc_history,
                     'o',
                     label='%.2f dropout' % dropout)
        plt.title('Val accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend(ncol=2, loc='lower right')

        plt.gcf().set_size_inches(15, 15)
        plt.show()
        return