コード例 #1
0
ファイル: BatchNormalization.py プロジェクト: MrTurtleW/code
def fully_connected_nets_with_batch_normalization():
    N, D, H1, H2, C = 2, 15, 20, 30, 10
    X = np.random.randn(N, D)
    y = np.random.randint(C, size=(N, ))

    for reg in [0, 3.14]:
        print("Running check with reg={}".format(reg))
        model = FullyConnectedNet([H1, H2],
                                  input_dim=D,
                                  num_classes=C,
                                  reg=reg,
                                  weight_scale=5e-2,
                                  dtype=np.float64,
                                  use_batchnorm=True)

        loss, grads = model.loss(X, y)
        print("Initial loss: {}".format(loss))

        for name in sorted(grads):
            f = lambda _: model.loss(X, y)[0]
            grad_num = eval_numerical_gradient(f,
                                               model.params[name],
                                               verbose=False,
                                               h=1e-5)
            print("{} relative error: {}".format(
                name, rel_error(grad_num, grads[name])))
コード例 #2
0
ファイル: Dropout.py プロジェクト: MrTurtleW/code
def fully_connected_nets_with_dropout():
    N, D, H1, H2, C = 2, 15, 20, 30, 10
    X = np.random.randn(N, D)
    y = np.random.randint(C, size=(N, ))

    for dropout in [0, 0.25, 0.5]:
        print("Running check with dropout={}".format(dropout))
        model = FullyConnectedNet([H1, H2],
                                  input_dim=D,
                                  num_classes=C,
                                  weight_scale=5e-2,
                                  dtype=np.float64,
                                  dropout=dropout,
                                  seed=123)
        loss, grads = model.loss(X, y)
        print("Initial loss: {}".format(loss))

        for name in sorted(grads):
            f = lambda _: model.loss(X, y)[0]
            grad_num = eval_numerical_gradient(f,
                                               model.params[name],
                                               verbose=False,
                                               h=1e-5)
            print("{} relative error: {}".format(
                name, rel_error(grad_num, grads[name])))
コード例 #3
0
def train_best_model():
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': y_test,
        'y_test': y_test
    }
    learning_rate = 3.1e-4
    weight_scale = 2.5e-2  #1e-5
    model = FullyConnectedNet([600, 500, 400, 300, 200, 100],
                              weight_scale=weight_scale,
                              dtype=np.float64,
                              dropout=0.25,
                              use_batchnorm=True,
                              reg=1e-2)
    solver = Solver(model,
                    data,
                    print_every=500,
                    num_epochs=30,
                    batch_size=100,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': learning_rate,
                    },
                    lr_decay=0.9)

    solver.train()
    scores = model.loss(X_test)
    y_pred = np.argmax(scores, axis=1)
    acc = np.mean(y_pred == y_test)
    print('test acc: %f' % (acc))
    best_model = model

    plt.subplot(2, 1, 1)
    plt.plot(solver.loss_history)
    plt.title('Loss history')
    plt.xlabel('Iteration')
    plt.ylabel('Loss')

    plt.subplot(2, 1, 2)
    plt.plot(solver.train_acc_history, label='train')
    plt.plot(solver.val_acc_history, label='val')
    plt.title('Classification accuracy history')
    plt.xlabel('Epoch')
    plt.ylabel('Clasification accuracy')
    plt.show()
コード例 #4
0
ファイル: BatchNormalization.py プロジェクト: MrTurtleW/code
def batchnorm_for_deep_networks():
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    hidden_dims = [100, 100, 100, 100, 100]
    num_train = 1000
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val
    }
    weight_scale = 2e-2
    bn_model = FullyConnectedNet(hidden_dims,
                                 weight_scale=weight_scale,
                                 use_batchnorm=True)
    model = FullyConnectedNet(hidden_dims,
                              weight_scale=weight_scale,
                              use_batchnorm=False)

    bn_solver = Solver(bn_model,
                       small_data,
                       num_epochs=10,
                       batch_size=50,
                       update_rule='adam',
                       optim_config={
                           'learning_rate': 1e-3,
                       },
                       verbose=True,
                       print_every=200)
    bn_solver.train()

    solver = Solver(model,
                    small_data,
                    num_epochs=10,
                    batch_size=50,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    verbose=True,
                    print_every=200)
    solver.train()
コード例 #5
0
dx_num = eval_numerical_gradient_array(
    lambda xx: dropout_forward(xx, dropout_param)[0], x, dout)

print('dx relative error: ', rel_error(dx, dx_num))

np.random.seed(231)
N, D, H1, H2, C = 2, 15, 20, 30, 10
X = np.random.randn(N, D)
y = np.random.randint(C, size=(N, ))

for dropout in [0, 0.25, 0.5]:
    print('Running check with dropout = ', dropout)
    model = FullyConnectedNet([H1, H2],
                              input_dim=D,
                              num_classes=C,
                              weight_scale=5e-2,
                              dtype=np.float64,
                              dropout=dropout,
                              seed=123)

    loss, grads = model.loss(X, y)
    print('Initial loss: ', loss)

    for name in sorted(grads):
        f = lambda _: model.loss(X, y)[0]
        grad_num = eval_numerical_gradient(f,
                                           model.params[name],
                                           verbose=False,
                                           h=1e-5)
        print('%s relative error: %.2e' %
              (name, rel_error(grad_num, grads[name])))
コード例 #6
0
print('')
best_model = None
################################################################################
# TODO: Train the best FullyConnectedNet that you can on CIFAR-10. You might   #
# batch normalization and dropout useful. Store your best model in the         #
# best_model variable.                                                         #
################################################################################

X_val = data['X_val']
y_val = data['y_val']
X_test = data['X_test']
y_test = data['y_test']
learning_rate = 3.1e-4
weight_scale = 2.5e-2
model = FullyConnectedNet([600, 500, 400, 300, 200, 100],
                          weight_scale=weight_scale,
                          dtype=np.float,
                          reg=0.02)
solver = Solver(model,
                data,
                print_every=500,
                num_epochs=30,
                batch_size=100,
                update_rule='adam',
                optim_config={
                    'learning_rate': learning_rate,
                },
                lr_decay=0.9)

solver.train()

best_model = model
コード例 #7
0
dx, dgamma, dbeta = batchnorm_backward(dout, cache)
print('dx error: ', rel_error(dx_num, dx))
print('dgamma error: ', rel_error(da_num, dgamma))
print('dbeta error: ', rel_error(db_num, dbeta))

np.random.seed(231)
N, D, H1, H2, C = 2, 15, 20, 30, 10
X = np.random.randn(N, D)
y = np.random.randint(C, size=(N, ))

for reg in [0, 3.14]:
    print('Running check with reg = ', reg)
    model = FullyConnectedNet([H1, H2],
                              input_dim=D,
                              num_classes=C,
                              reg=reg,
                              weight_scale=5e-2,
                              dtype=np.float64,
                              use_batchnorm=True)

    loss, grads = model.loss(X, y)
    print('Initial loss: ', loss)

    for name in sorted(grads):
        f = lambda _: model.loss(X, y)[0]
        grad_num = eval_numerical_gradient(f,
                                           model.params[name],
                                           verbose=False,
                                           h=1e-5)
        print('%s relative error: %.2e' %
              (name, rel_error(grad_num, grads[name])))
コード例 #8
0
def neural_network_with_rms_and_adam():
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': y_test,
        'y_test': y_test
    }
    num_train = 4000
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }
    solvers = {}
    learning_rates = {'rmsprop': 1e-4, 'adam': 1e-3}
    for update_rule in ['adam', 'rmsprop']:
        print('running with ', update_rule)
        model = FullyConnectedNet([100, 100, 100, 100, 100], weight_scale=5e-2)

        solver = Solver(
            model,
            small_data,
            num_epochs=5,
            batch_size=100,
            update_rule=update_rule,
            optim_config={'learning_rate': learning_rates[update_rule]},
            verbose=True)
        solvers[update_rule] = solver
        solver.train()

    plt.subplot(3, 1, 1)
    plt.title('Training loss')
    plt.xlabel('Iteration')

    plt.subplot(3, 1, 2)
    plt.title('Training accuracy')
    plt.xlabel('Epoch')

    plt.subplot(3, 1, 3)
    plt.title('Validation accuracy')
    plt.xlabel('Epoch')

    for update_rule, solver in solvers.items():
        plt.subplot(3, 1, 1)
        plt.plot(solver.loss_history, 'o', label=update_rule)

        plt.subplot(3, 1, 2)
        plt.plot(solver.train_acc_history, '-o', label=update_rule)

        plt.subplot(3, 1, 3)
        plt.plot(solver.val_acc_history, '-o', label=update_rule)

    for i in [1, 2, 3]:
        plt.subplot(3, 1, i)
        plt.legend(loc='upper center', ncol=4)
    plt.gcf().set_size_inches(15, 15)
    plt.show()
コード例 #9
0
def sgd_momentum_test():
    N, D = 4, 5
    w = np.linspace(-0.4, 0.6, num=N * D).reshape(N, D)
    dw = np.linspace(-0.6, 0.4, num=N * D).reshape(N, D)
    v = np.linspace(0.6, 0.9, num=N * D).reshape(N, D)

    config = {'learning_rate': 1e-3, 'velocity': v}
    next_w, _ = sgd_momentum(w, dw, config=config)

    expected_next_w = np.asarray(
        [[0.1406, 0.20738947, 0.27417895, 0.34096842, 0.40775789],
         [0.47454737, 0.54133684, 0.60812632, 0.67491579, 0.74170526],
         [0.80849474, 0.87528421, 0.94207368, 1.00886316, 1.07565263],
         [1.14244211, 1.20923158, 1.27602105, 1.34281053, 1.4096]])
    expected_velocity = np.asarray(
        [[0.5406, 0.55475789, 0.56891579, 0.58307368, 0.59723158],
         [0.61138947, 0.62554737, 0.63970526, 0.65386316, 0.66802105],
         [0.68217895, 0.69633684, 0.71049474, 0.72465263, 0.73881053],
         [0.75296842, 0.76712632, 0.78128421, 0.79544211, 0.8096]])

    print("next_w error: {}".format(rel_error(next_w, expected_next_w)))
    print("velocity error: {}".format(
        rel_error(expected_velocity, config['velocity'])))

    # Train a six-layer network with both SGD and SGD+momentum.
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': y_test,
        'y_test': y_test
    }
    num_train = 4000
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }
    solvers = {}
    for update_rule in ['sgd', 'sgd_momentum']:
        print("Running with {}".format(update_rule))
        model = FullyConnectedNet([100, 100, 100, 100, 100], weight_scale=5e-2)
        solver = Solver(model,
                        small_data,
                        num_epochs=5,
                        batch_size=100,
                        update_rule=update_rule,
                        optim_config={'learning_rate': 1e-2},
                        verbose=True)
        solvers[update_rule] = solver
        solver.train()

    plt.subplot(3, 1, 1)
    plt.title('Training loss')
    plt.xlabel('Iteration')

    plt.subplot(3, 1, 2)
    plt.title('Training accuracy')
    plt.xlabel('Epoch')

    plt.subplot(3, 1, 3)
    plt.title('Validation accuracy')
    plt.xlabel('Epoch')

    for update_rule, solver in solvers.items():
        plt.subplot(3, 1, 1)
        plt.plot(solver.loss_history, 'o', label=update_rule)

        plt.subplot(3, 1, 2)
        plt.plot(solver.train_acc_history, '-o', label=update_rule)

        plt.subplot(3, 1, 3)
        plt.plot(solver.val_acc_history, '-o', label=update_rule)

    for i in [1, 2, 3]:
        plt.subplot(3, 1, i)
        plt.legend(loc='upper center', ncol=4)
    plt.gcf().set_size_inches(15, 15)
コード例 #10
0
def multilayer_network_test():
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': y_test,
        'y_test': y_test
    }
    # Initial loss and gradient check
    # N, D, H1, H2, C = 2, 15, 20, 30, 10
    # X = np.random.randn(N, D)
    # y = np.random.randint(C, size=(N, ))

    # print(X.shape)
    # for reg in [0, 3.14]:
    #     print("Running check with reg={}".format(reg))
    #     model = FullyConnectedNet([H1, H2],
    #                               input_dim=D,
    #                               num_classes=C,
    #                               reg=reg,
    #                               weight_scale=5e-2,
    #                               dtype=np.float64)
    #     loss, grads = model.loss(X, y)
    #     print("Initial loss: {}".format(loss))

    #     for name in sorted(grads):
    #         f = lambda _: model.loss(X, y)[0]
    #         grad_num = eval_numerical_gradient(f, model.params[name], verbose=False, h=1e-5)
    #         print("{} relative {}".format(name, rel_error(grad_num, grads[name])))

    # As another sanity check (完整性检查), make sure you can overfit a smal dataset of 50 images.
    # First we will try a three-layer network with 100 units in each hidden layer.
    # You will need to tweak the learning rate and initialize scale, but you should
    # be able to overfit and achieve 100% training accuracy within 20 epoches.
    num_train = 50
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }

    ##########################################################################
    # weight_scale = 5e-2
    # learning_rate = 1e-3

    # model = FullyConnectedNet([100, 100],
    #                         weight_scale=weight_scale,
    #                         dtype=np.float64)
    # solver = Solver(model,
    #                 small_data,
    #                 print_every=10,
    #                 num_epochs=20,
    #                 batch_size=25,
    #                 update_rule='sgd',
    #                 optim_config={'learning_rate': learning_rate})
    # solver.train()

    # plt.plot(solver.loss_history, 'o')
    # plt.title('Training loss history')
    # plt.xlabel('Iteration')
    # plt.ylabel('Training loss')
    # plt.show()
    ##########################################################################

    ##########################################################################
    # Grid Search
    # best_accurcy = 0.0
    # best_solver = None
    # weight_scale = np.linspace(1e-3, 1e-2, 10)
    # learing_rate = np.linspace(1e-4, 1e-2, 100)
    # for w in weight_scale:
    #     for l in learing_rate:
    #         print("Training with weight_scale {} and learning_rate {}".format(w, l))
    #         model = FullyConnectedNet([100, 100],
    #                                 weight_scale=w,
    #                                 dtype=np.float64)
    #         solver = Solver(model,
    #                         small_data,
    #                         print_every=10,
    #                         num_epochs=20,
    #                         batch_size=25,
    #                         update_rule='sgd',
    #                         optim_config={'learning_rate': l})
    #         solver.train()

    #         if best_accurcy > solver.best_train_acc:
    #             best_accurcy = solver.best_train_acc
    #             best_solver = solver

    # plt.plot(solver.loss_history, 'o')
    # plt.title('Training loss history')
    # plt.xlabel('Iteration')
    # plt.ylabel('Training loss')
    # plt.show()
    ##########################################################################

    ##########################################################################
    # Five layer network
    learning_rate = 8e-4
    weight_scale = 1e-1
    model = FullyConnectedNet([100, 100, 100, 100],
                              weight_scale=weight_scale,
                              dtype=np.float64)
    solver = Solver(model,
                    small_data,
                    print_every=10,
                    num_epochs=20,
                    batch_size=25,
                    update_rule='sgd',
                    optim_config={'learning_rate': learning_rate})
    solver.train()
    plt.plot(solver.loss_history, 'o')
    plt.title('Training loss history')
    plt.xlabel('Iteration')
    plt.ylabel('Training loss')
    plt.show()
コード例 #11
0
ファイル: Dropout.py プロジェクト: MrTurtleW/code
def regularization_expriment():
    """
    We will train a pair of two-layer networks on 500 training examples: one will use no dropout,
    and one will use a dropout probability of 0.75.
    """
    num_train = 500
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val,
        'X_test': y_test,
        'y_test': y_test
    }

    solvers = {}
    dropout_choices = [0, 0.25, 0.5, 0.75, 0.8, 0.9, 0.99]
    for dropout in dropout_choices:
        model = FullyConnectedNet([500], weight_scale=5e-2, dropout=dropout)

        solver = Solver(model,
                        small_data,
                        num_epochs=25,
                        batch_size=100,
                        update_rule="adam",
                        optim_config={
                            'learning_rate': 5e-4,
                        },
                        verbose=True,
                        print_every=100)
        solver.train()
        solvers[dropout] = solver

    # Plot train and validation accuracies of the two models

    train_accs = []
    val_accs = []
    for dropout in dropout_choices:
        solver = solvers[dropout]
        train_accs.append(solver.train_acc_history[-1])
        val_accs.append(solver.val_acc_history[-1])

    plt.subplot(3, 1, 1)
    for dropout in dropout_choices:
        plt.plot(solvers[dropout].train_acc_history,
                 'o',
                 label='%.2f dropout' % dropout)
    plt.title('Train accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(ncol=2, loc='lower right')

    plt.subplot(3, 1, 2)
    for dropout in dropout_choices:
        plt.plot(solvers[dropout].val_acc_history,
                 'o',
                 label='%.2f dropout' % dropout)
    plt.title('Val accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(ncol=2, loc='lower right')

    plt.gcf().set_size_inches(15, 15)
    plt.show()