def over_fit_small_data():
    data = get_CIFAR10_data()
    np.random.seed(231)
    num_train = 100
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }

    model = ThreeLayerConvNet(weight_scale=1e-2)

    solver = Solver(model,
                    small_data,
                    num_epochs=15,
                    batch_size=50,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    verbose=True,
                    print_every=1)
    solver.train()
    plot_loss_acc_history(solver)
Exemple #2
0
def overfit_small_data(model=None, epochs=10, num_train=20, verbose=True):

    data = get_CIFAR10_data(dir='datasets/cifar-10-batches-py')
    small_data = {
        'X_train': data['X_train'][:num_train] / 127.0,
        'y_train': data['y_train'][:num_train],
        'X_val':
        data['X_val'][:num_train] / 127.0,  # batch size must be constant
        'y_val': data['y_val'][:num_train],
    }

    if model is None:
        input_dim = small_data['X_train'].shape[1:]
        print input_dim
        # 32 - 16, 8, 4, 2
        model = FlexNet(input_dim=input_dim,
                        num_filters=(8, 8, 16, 16),
                        hidden_dim=(100, ))
        model.print_params()

    print '\n--- Training a few epochs ---'

    solver = Solver(model,
                    small_data,
                    num_epochs=epochs,
                    batch_size=np.minimum(50, num_train),
                    update_rule='sgd',
                    optim_config={
                        'learning_rate': 1e-4,
                    },
                    verbose=verbose,
                    print_every=1)
    solver.train()
    print 'Train acc:', solver.train_acc_history[-1]
    return model
def RunTwoLayerNet():
    model = TwoLayerNet()
    solver = Solver(model,
                    data,
                    optim_config={'learning_rate': 1e-3,},
                    lr_decay=0.95,
                    print_every=100)
    solver.train()
def RunFullyConnectedNet():
    model = FullyConnectedNet([100, 50], dropout=0.5, use_batchnorm=True)
    #model = FullyConnectedNet([100, 50])
    solver = Solver(model,
                    data,
                    optim_config={'learning_rate': 1e-3,},
                    lr_decay=0.95,
                    print_every=100)
    solver.train()
Exemple #5
0
def RunCnnNet():
    model = ThreeLayerConvNet(reg=1e-2)
    solver = Solver(model,
                    data,
                    optim_config={
                        'learning_rate': 1e-5,
                    },
                    lr_decay=0.95,
                    print_every=100)
    solver.train()
Exemple #6
0
def RunFullyConnectedNet():
    model = FullyConnectedNet([100, 50], dropout=0.5, use_batchnorm=True)
    solver = Solver(model,
                    data,
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    lr_decay=0.95,
                    print_every=100)
    solver.train()
Exemple #7
0
def RunTwoLayerNet():
    model = TwoLayerNet()
    solver = Solver(model,
                    data,
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    lr_decay=0.95,
                    print_every=100)
    solver.train()
def run_batchsize_experiments(normalization_mode):
    np.random.seed(231)
    # Try training a very deep net with batchnorm
    hidden_dims = [100, 100, 100, 100, 100]
    num_train = 1000
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }
    n_epochs = 10
    weight_scale = 2e-2
    batch_sizes = [5, 10, 50]
    lr = 10**(-3.5)
    solver_bsize = batch_sizes[0]

    print('No normalization: batch size = ', solver_bsize)
    model = FullyConnectedNet(hidden_dims,
                              weight_scale=weight_scale,
                              normalization=None)
    solver = Solver(model,
                    small_data,
                    num_epochs=n_epochs,
                    batch_size=solver_bsize,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': lr,
                    },
                    verbose=False)
    solver.train()

    bn_solvers = []
    for i in range(len(batch_sizes)):
        b_size = batch_sizes[i]
        print('Normalization: batch size = ', b_size)
        bn_model = FullyConnectedNet(hidden_dims,
                                     weight_scale=weight_scale,
                                     normalization=normalization_mode)
        bn_solver = Solver(bn_model,
                           small_data,
                           num_epochs=n_epochs,
                           batch_size=b_size,
                           update_rule='adam',
                           optim_config={
                               'learning_rate': lr,
                           },
                           verbose=False)
        bn_solver.train()
        bn_solvers.append(bn_solver)

    return bn_solvers, solver, batch_sizes
def RunCnnNet():
    model = ThreeLayerConvNet(weight_scale=0.001, reg=0.001)

    solver = Solver(model,
                    data,
                    num_epochs=1,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    verbose=True,
                    print_every=20)
    solver.train()
Exemple #10
0
def RunCnnNet():
    model = ThreeLayerConvNet(weight_scale=0.001, reg=0.001)

    solver = Solver(model,
                    data,
                    num_epochs=1,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    verbose=True,
                    print_every=20)
    solver.train()
def TwoLayerNetDemo(reg=0.0):
    data = get_CIFAR10_data(9000, 1000)
    model = TwoLayerNet(reg=reg)
    solver = Solver(model, data, update_rule='sgd',
                    optim_config={'learning_rate': 1e-3, },
                    lr_decay=0.95, num_epochs=10,
                    batch_size=100, print_every=100)

    solver.train()

    X_test = data['X_test']
    y_test = data['y_test']
    num_samples = y_test.shape[0]

    acc = solver.predict(X_test, y_test, num_samples)
    print ["Accuracy", acc]
Exemple #12
0
def test_mnist(num_epochs=60, batch_size=60, learning_rate=3e-3):
    X_train, y_train = get_mnist_data('mnist_train.csv', 50000)
    X_val, y_val = get_mnist_data('mnist_test.csv', 10000)
    hidden_dims = [100, 100, 100]
    # num_train = 48000
    test_data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val
    }
    weight_scale = 2e-2
    bn_model = FullyConnectedNet(hidden_dims,
                                 input_dim=1 * 784,
                                 weight_scale=weight_scale,
                                 use_batchnorm=True)
    bn_solver = Solver(bn_model,
                       test_data,
                       num_epochs=num_epochs,
                       batch_size=batch_size,
                       update_rule='sgd',
                       optim_config={
                           'learning_rate': learning_rate,
                       },
                       verbose=True,
                       print_every=400)
    step, train_accuracies, val_accuracies, loss = bn_solver.train()
    return bn_model, step, train_accuracies, val_accuracies, loss
def train_net():
    data = get_CIFAR10_data()
    model = ThreeLayerConvNet(weight_scale=0.001, hidden_dim=500, reg=0.001)

    solver = Solver(model,
                    data,
                    num_epochs=1,
                    batch_size=50,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    verbose=True,
                    print_every=20)
    solver.train()
    visualize_filters(model)
def ThreeLayerConvNetDemo(batch_size=32, num_filters=9, use_batchnorm=False,
                          weight_scale=1e-2, reg=0.0, update_rule='sgd'):
    data = get_CIFAR10_data(1000, 100)
    hidden_dims = [100, 50]
    model = ThreeLayerConvNet(num_filters=num_filters)

    solver = Solver(model, data, update_rule=update_rule,
                    optim_config={'learning_rate': 1e-3, },
                    lr_decay=0.95, num_epochs=10,
                    batch_size=batch_size, print_every=100)

    solver.train()

    X_test = data['X_test'][1:100]
    y_test = data['y_test'][1:100]
    num_samples = y_test.shape[0]

    acc = solver.predict(X_test, y_test, num_samples)
    print ["Accuracy", acc]
def SMALL_CNN():
    num_train = 100
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }

    model = ThreeLayerConvNet(weight_scale=1e-3)

    solver = Solver(model,
                    small_data,
                    num_epochs=10,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-4,
                    },
                    verbose=True,
                    print_every=20)

    solver.train()
def FullyConnectedNetDemo(dropout=0.5, use_batchnorm=True, HeReLU=False,
                          weight_scale=1e-2, reg=0.0, update_rule='adam',
                          num_epochs=10):
    data = get_CIFAR10_data(19000, 1000)
    hidden_dims = [100, 50]
    model = FullyConnectedNet(hidden_dims=hidden_dims,
                              weight_scale=weight_scale,
                              use_batchnorm=use_batchnorm,
                              HeReLU=False, reg=reg)

    solver = Solver(model, data, update_rule=update_rule,
                    optim_config={'learning_rate': 1e-3, },
                    lr_decay=0.95, num_epochs=num_epochs,
                    batch_size=100, print_every=100)

    solver.train()

    X_test = data['X_test']
    y_test = data['y_test']
    num_samples = y_test.shape[0]

    acc = solver.predict(X_test, y_test, num_samples)
    print ["Accuracy", acc]
Exemple #17
0
solver = Solver(model, data,
                num_epochs=1, batch_size=8,
                update_rule='adam',
                lr_decay=1,
                max_jitter=0,
                h5_file = 'croped2',
                flipOrNot=True,
                optim_config={
                  'learning_rate': learning_rate,
                    #1e-4
                    'beta2': 0.999
                },
                verbose=True, print_every=1000)

solver.train()
plt.subplot(2, 1, 1)
plt.plot(solver.loss_history, 'o')
plt.xlabel('iteration')
plt.ylabel('loss')

plt.subplot(2, 1, 2)
plt.plot(solver.train_acc_history, '-o')
plt.plot(solver.val_acc_history, '-o')
plt.legend(['train', 'val'], loc='upper left')
plt.xlabel('epoch')
plt.ylabel('accuracy')

#plt.show()

Exemple #18
0
#
# loss, grads = model.loss(X, y)
# print 'Initial loss (no regularization): ', loss
#
# for name in sorted(grads):
#   f = lambda _: model.loss(X, y)[0]
#   grad_num = eval_numerical_gradient(f, model.params[name], verbose=False, h=1e-5)
#   print '%s relative error: %.2e' % (name, rel_error(grad_num, grads[name]))

data = get_CIFAR10_data()
#for k, v in data.iteritems():
#  print '%s: ' % k, v.shape

model = ConvNet(weight_scale=0.001, hidden_dim=500, reg=0)

print

solver = Solver(model, data,
                num_epochs=10, batch_size=50,
                update_rule='adam',
                optim_config={
                  'learning_rate': 5e-3,
                },
                verbose=True, print_every=100)

describe_solver(solver)

print

solver.train()
  'X_train': data['X_train'][:num_train],
  'y_train': data['y_train'][:num_train],
  'X_val': data['X_val'],
  'y_val': data['y_val'],
}

model = ThreeLayerConvNet(weight_scale=1e-2)

solver = Solver(model, small_data,
                num_epochs=10, batch_size=50,
                update_rule='adam',
                optim_config={
                  'learning_rate': 1e-3,
                },
                verbose=True, print_every=1)
solver.train()


# Plotting the loss, training accuracy, and validation accuracy should show clear overfitting:

# In[16]:

plt.subplot(2, 1, 1)
plt.plot(solver.loss_history, 'o')
plt.xlabel('iteration')
plt.ylabel('loss')

plt.subplot(2, 1, 2)
plt.plot(solver.train_acc_history, '-o')
plt.plot(solver.val_acc_history, '-o')
plt.legend(['train', 'val'], loc='upper left')
                             normalization='batchnorm')
model = FullyConnectedNet(hidden_dims,
                          weight_scale=weight_scale,
                          normalization=None)

bn_solver = Solver(bn_model,
                   small_data,
                   num_epochs=1,
                   batch_size=50,
                   update_rule='adam',
                   optim_config={
                       'learning_rate': 1e-3,
                   },
                   verbose=True,
                   print_every=20)
bn_solver.train()

solver = Solver(model,
                small_data,
                num_epochs=1,
                batch_size=50,
                update_rule='adam',
                optim_config={
                    'learning_rate': 1e-3,
                },
                verbose=True,
                print_every=20)
solver.train()

# Run the following to visualize the results from two networks trained above. You should find that using batch normalization helps the network to converge much faster.
Exemple #21
0
def regularization_experiment():
    data = get_CIFAR10_data()

    # Train two identical nets, one with dropout and one without
    np.random.seed(231)
    num_train = 500
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }
    solvers = {}
    dropout_choices = [1, 0.9, 0.75, 0.5, 0.25]
    for dropout in dropout_choices:
        model = FullyConnectedNet([500], dropout=dropout)
        print(dropout)

        solver = Solver(model,
                        small_data,
                        num_epochs=25,
                        batch_size=100,
                        update_rule='adam',
                        optim_config={
                            'learning_rate': 5e-4,
                        },
                        verbose=True,
                        print_every=100)
        solver.train()
        solvers[dropout] = solver

    # Plot train and validation accuracies of the two models
    train_accs = []
    val_accs = []
    for dropout in dropout_choices:
        solver = solvers[dropout]
        train_accs.append(solver.train_acc_history[-1])
        val_accs.append(solver.val_acc_history[-1])

    plt.subplot(3, 1, 1)
    for dropout in dropout_choices:
        plt.plot(solvers[dropout].train_acc_history,
                 '-o',
                 label='%.2f dropout' % dropout)
    plt.title('Train accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(ncol=2, loc='lower right')

    plt.subplot(3, 1, 2)
    for dropout in dropout_choices:
        plt.plot(solvers[dropout].val_acc_history,
                 '-o',
                 label='%.2f dropout' % dropout)
    plt.title('Val accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(ncol=2, loc='lower right')

    plt.gcf().set_size_inches(15, 20)
    plt.show()
def main():
    # Load the (preprocessed) CIFAR10 data.
    data = get_CIFAR10_raw_data()
    for k, v in data.iteritems():
        print '%s: ' % k, v.shape

    # Get small data for finetuning
    small_data = get_small_data(data, 5000)

    # Network Architecture
    # {conv- [batch norm] - relu - pool}
    cnn_layer_1 = (64, 3, 1, 1)
    pool_layer_1 = (2, 2, 2)
    layer_1 = (cnn_layer_1, pool_layer_1)
    cnn_layer_2 = (128, 3, 1, 1)
    pool_layer_2 = (2, 2, 2)
    layer_2 = (cnn_layer_2, pool_layer_2)
    cnn_layer_3 = (256, 3, 1, 1)
    pool_layer_3 = (2, 2, 2)
    layer_3 = (cnn_layer_3, pool_layer_3)
    hidden_dims_CNN = (layer_1, layer_2, layer_3)

    # {affine - [batch norm] - relu - [dropout]}
    fc_layer_1 = 256
    drop_layer_1 = 1
    layer_1 = (fc_layer_1, drop_layer_1)
    fc_layer_2 = 128
    drop_layer_2 = 1
    layer_2 = (fc_layer_2, drop_layer_2)
    hidden_dims_FC = (layer_1, layer_2)

    num_classes = 10

    model = ConvNet(input_dim=(3, 32, 32),
                    hidden_dims_CNN=hidden_dims_CNN,
                    hidden_dims_FC=hidden_dims_FC,
                    num_classes=num_classes,
                    weight_scale=1e-2,
                    reg=0.001,
                    dtype=np.float32)

    select_num_train_data = 0
    test_weght_scale = 0
    test_lr = 1

    # Test how many data is enough for training
    if select_num_train_data == 1:
        num_train = (500, 1000, 5000, 10000)
        epoch = (20, 10, 2, 1)
        for i in range(0, len(num_train)):
            print 'num_train_data : %d' % (num_train[i])

            small_data = get_small_data(data, num_train[i])
            solver = Solver(model,
                            small_data,
                            num_epochs=epoch[i],
                            batch_size=100,
                            update_rule='sgd_momentum',
                            optim_config={
                                'learning_rate': 1e-3,
                            },
                            verbose=False,
                            print_every=20)

            solver.train()
            print 'num_train : %d, train_acc : %f, val_acc : %f' % (
                num_train[i], solver.train_acc_history[-1],
                solver.val_acc_history[-1])

    # Test settings of weight initialization
    if test_weght_scale == 1:
        weight_scale = (1e-2, 1e-3, -1)
        for i in range(0, len(weight_scale)):
            print 'weight_scale : %f' % (weight_scale[i])
            model = ConvNet(input_dim=(3, 32, 32),
                            hidden_dims_CNN=hidden_dims_CNN,
                            hidden_dims_FC=hidden_dims_FC,
                            num_classes=num_classes,
                            weight_scale=weight_scale[i],
                            reg=0.001,
                            dtype=np.float32)

            solver = Solver(model,
                            small_data,
                            num_epochs=2,
                            batch_size=100,
                            update_rule='sgd_momentum',
                            optim_config={
                                'learning_rate': 1e-3,
                            },
                            verbose=True,
                            print_every=20)
            solver.train()

            print 'weight_scale : %f, train_acc : %f, val_acc : %f' % (
                weight_scale[i], solver.train_acc_history[-1],
                solver.val_acc_history[-1])
    if test_lr == 1:
        lr = (1e-2, 1e-3, 1e-4)
        for i in range(0, len(lr)):
            print 'lr : %f' % (lr[i])
            model = ConvNet(input_dim=(3, 32, 32),
                            hidden_dims_CNN=hidden_dims_CNN,
                            hidden_dims_FC=hidden_dims_FC,
                            num_classes=num_classes,
                            weight_scale=-1,
                            reg=0.001,
                            dtype=np.float32)

            solver = Solver(model,
                            small_data,
                            num_epochs=10,
                            batch_size=100,
                            update_rule='sgd_momentum',
                            optim_config={
                                'learning_rate': lr[i],
                            },
                            verbose=True,
                            print_every=10)
            solver.train()

            print 'lr : %f, train_acc : %f, val_acc : %f' % (
                lr[i], solver.train_acc_history[-1],
                solver.val_acc_history[-1])
Exemple #23
0
def batch_normalization_and_initialization():
    """
    We will now run a small experiment to study the interaction of batch normalization
    and weight initialization.

    The first cell will train 8-layer networks both with and without batch normalization
    using different scales for weight initialization. The second layer will plot training
    accuracy, validation set accuracy, and training loss as a function of the weight
    initialization scale.
    """
    # Load the (preprocessed) CIFAR10 data.
    data = get_CIFAR10_data()
    np.random.seed(231)
    # Try training a very deep net with batchnorm
    hidden_dims = [50, 50, 50, 50, 50, 50, 50]
    num_train = 1000
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }
    bn_solvers_ws = {}
    solvers_ws = {}
    weight_scales = np.logspace(-4, 0, num=20)
    for i, weight_scale in enumerate(weight_scales):
        print('Running weight scale %d / %d' % (i + 1, len(weight_scales)))
        bn_model = FullyConnectedNet(hidden_dims,
                                     weight_scale=weight_scale,
                                     normalization='batchnorm')
        model = FullyConnectedNet(hidden_dims,
                                  weight_scale=weight_scale,
                                  normalization=None)
        bn_solver = Solver(bn_model,
                           small_data,
                           num_epochs=10,
                           batch_size=50,
                           update_rule='adam',
                           optim_config={'learning_rate': 1e-3},
                           verbose=False,
                           print_every=200)
        bn_solver.train()
        bn_solvers_ws[weight_scale] = bn_solver
        solver = Solver(model,
                        small_data,
                        num_epochs=10,
                        batch_size=50,
                        update_rule='adam',
                        optim_config={'learning_rate': 1e-3},
                        verbose=False,
                        print_every=200)
        solver.train()
        solvers_ws[weight_scale] = solver

    # Plot results of weight scale experiment
    best_train_accs, bn_best_train_accs = [], []
    best_val_accs, bn_best_val_accs = [], []
    final_train_loss, bn_final_train_loss = [], []

    for ws in weight_scales:
        best_train_accs.append(max(solvers_ws[ws].train_acc_history))
        bn_best_train_accs.append(max(bn_solvers_ws[ws].train_acc_history))

        best_val_accs.append(max(solvers_ws[ws].val_acc_history))
        bn_best_val_accs.append(max(bn_solvers_ws[ws].val_acc_history))

        final_train_loss.append(np.mean(solvers_ws[ws].loss_history[-100:]))
        bn_final_train_loss.append(
            np.mean(bn_solvers_ws[ws].loss_history[-100:]))
    """
    semilogx半对数坐标函数:只有一个坐标轴是对数坐标另一个是普通算术坐标。 在下列情况下建议用半对数坐标:
    (1)变量之一在所研究的范围内发生了几个数量级的变化。 
    (2)在自变量由零开始逐渐增大的初始阶段,当自变量的少许变化引起因变量极大变化时,
    此时采用半对数坐标纸,曲线最大变化范围可伸长,使图形轮廓清楚。
    (3)需要将某种函数变换为直线函数关系。
    """
    plt.subplot(3, 1, 1)
    plt.title('Best val accuracy vs weight initialization scale')
    plt.xlabel('Weight initialization scale')
    plt.ylabel('Best val accuracy')
    plt.semilogx(weight_scales, best_val_accs, '-o', label='baseline')
    plt.semilogx(weight_scales, bn_best_val_accs, '-o', label='batchnorm')
    plt.legend(ncol=2, loc='lower right')

    plt.subplot(3, 1, 2)
    plt.title('Best train accuracy vs weight initialization scale')
    plt.xlabel('Weight initialization scale')
    plt.ylabel('Best training accuracy')
    plt.semilogx(weight_scales, best_train_accs, '-o', label='baseline')
    plt.semilogx(weight_scales, bn_best_train_accs, '-o', label='batchnorm')
    plt.legend(ncol=1, loc='upper right')

    plt.subplot(3, 1, 3)
    plt.title('Final training loss vs weight initialization scale')
    plt.xlabel('Weight initialization scale')
    plt.ylabel('Final training loss')
    plt.semilogx(weight_scales, final_train_loss, '-o', label='baseline')
    plt.semilogx(weight_scales, bn_final_train_loss, '-o', label='batchnorm')
    plt.legend(ncol=1, loc='lower left')
    plt.gca().set_ylim(1.0, 3.5)

    plt.gcf().set_size_inches(15, 15)
    plt.show()
Exemple #24
0
def check_for_deep_network():
    # Load the (preprocessed) CIFAR10 data.
    data = get_CIFAR10_data()
    for k, v in data.items():
        print('%s: ' % k, v.shape)
    np.random.seed(231)
    # Try training a very deep net with batchnorm
    hidden_dims = [100, 100, 100, 100, 100]
    num_train = 1000
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'][:num_train],
        'y_val': data['y_val'][:num_train]
    }
    weight_scale = 2e-2
    reg = 0.01
    bn_model = FullyConnectedNet(hidden_dims,
                                 reg=reg,
                                 weight_scale=weight_scale,
                                 normalization='batchnorm')
    model = FullyConnectedNet(hidden_dims,
                              reg=reg,
                              weight_scale=weight_scale,
                              normalization=None)

    bn_solver = Solver(bn_model,
                       small_data,
                       num_epochs=10,
                       batch_size=50,
                       update_rule='adam',
                       optim_config={'learning_rate': 1e-3},
                       verbose=True,
                       print_every=20)
    bn_solver.train()

    solver = Solver(model,
                    small_data,
                    num_epochs=10,
                    batch_size=50,
                    update_rule='adam',
                    optim_config={'learning_rate': 1e-3},
                    verbose=True,
                    print_every=20)
    solver.train()

    plt.subplot(3, 1, 1)
    plot_training_history('Training loss',
                          'Iteration',
                          solver, [bn_solver],
                          lambda x: x.loss_history,
                          bl_marker='o',
                          bn_marker='o')
    plt.subplot(3, 1, 2)
    plot_training_history('Training accuracy',
                          'Epoch',
                          solver, [bn_solver],
                          lambda x: x.train_acc_history,
                          bl_marker='-o',
                          bn_marker='-o')
    plt.subplot(3, 1, 3)
    plot_training_history('Validation accuracy',
                          'Epoch',
                          solver, [bn_solver],
                          lambda x: x.val_acc_history,
                          bl_marker='-o',
                          bn_marker='-o')
    plt.show()
Exemple #25
0
    # (Iteration 20 / 20) loss: 0.649341
    # (Epoch 10 / 10) train acc: 0.920000; val_acc: 0.237000
    # Train small data set
    model = ThreeLayerConvNet(weight_scale=1e-2)
    solver = Solver(model,
                    small_data,
                    num_epochs=10,
                    batch_size=50,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    verbose=True,
                    print_every=1)
    solver.train()
    print('Train small data set')
    if do_plotting:
        plt.subplot(2, 1, 1)
        plt.plot(solver.loss_history, 'o')
        plt.xlabel('iteration')
        plt.ylabel('loss')

        plt.subplot(2, 1, 2)
        plt.plot(solver.train_acc_history, '-o')
        plt.plot(solver.val_acc_history, '-o')
        plt.legend(['train', 'val'], loc='upper left')
        plt.xlabel('epoch')
        plt.ylabel('accuracy')
        plt.show()
opcon = {'learning_rate': lr, 'momentum': mm};

###############################################################
""" NO USER INPUTS PAST THIS LINE """
###############################################################


# Initialize model and solver
model = FancyNet(num_filters=nfilter, filter_sizes=sfilter, maxpools=mp,
                use_spatial_batchnorm=sbn, hidden_dims=hd, use_batchnorm=bn, reg=rg, weight_scale=ws);

solver = Solver(model, data, num_epochs=ne, batch_size=bs, update_rule=uprule, optim_config=opcon, lr_decay=lrd,
               verbose=vb, print_every=pe);

# Optimize the model (this is the part that takes a while)
solver.train();


# Check if this beats the previous best accuracy:
train_hist = solver.train_acc_history;
val_hist = solver.val_acc_history;
train_best = np.max(train_hist);
val_best = np.max(val_hist);

print 'Max training accuracy: ' + str(train_best);
print 'Max validation accuracy:  ' + str(val_best);
if val_best > Best_val_acc:
    Best_model = model;
    Best_train_acc = train_best;
    Best_val_acc = val_best;
    print '  New record!'
def RunCnnNet():
  model = ThreeLayerConvNet(reg = 1e-2)
  solver = Solver(model, data, optim_config={'learning_rate': 1e-5,}, lr_decay=0.95, print_every = 100)
  solver.train()