예제 #1
0
def batchnorm_forward_test():
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()

    # Batch normalization: Forward
    N, D1, D2, D3 = 200, 50, 60, 3
    X = np.random.randn(N, D1)
    W1 = np.random.randn(D1, D2)
    W2 = np.random.randn(D2, D3)
    a = np.maximum(0, X.dot(W1)).dot(W2)

    print("Before batch normalization")
    print("means: {}".format(a.mean(axis=0)))
    print("stds: {}".format(a.std(axis=0)))

    # Means should be zero and stds close to one
    print("After batch normalization (gamm=1, beta=0)")
    a_norm, _ = batchnorm_forward(a, np.ones(D3), np.zeros(D3),
                                  {'mode': 'train'})
    print("mean: {}".format(a_norm.mean(axis=0)))
    print("std: {}".format(a_norm.std(axis=0)))

    # Now means should be close to beta and stds close to gamma
    gamma = np.array([1.0, 2.0, 3.0])
    beta = np.array([11.0, 12.0, 13.0])
    a_norm, _ = batchnorm_forward(a, gamma, beta, {'mode': 'train'})
    print("After batch normalization (nontrivial gamma, beta)")
    print("means: {}".format(a_norm.mean(axis=0)))
    print("stds: {}".format(a_norm.std(axis=0)))

    # Check the test-time forward pass by running the training-time forward
    # pass many times to warm up the running averages, and then checking the
    # means and variances of activations after a test-time forward pass
    N, D1, D2, D3 = 200, 50, 60, 3
    W1 = np.random.randn(D1, D2)
    W2 = np.random.randn(D2, D3)
    bn_param = {'mode': 'train'}

    gamma = np.ones(D3)
    beta = np.zeros(D3)

    for t in range(50):
        X = np.random.randn(N, D1)
        a = np.maximum(0, X.dot(W1)).dot(W2)
        batchnorm_forward(a, gamma, beta, bn_param)

    bn_param['mode'] = 'test'
    X = np.random.randn(N, D1)
    a = np.maximum(0, X.dot(W1)).dot(W2)
    a_norm, _ = batchnorm_forward(a, gamma, beta, bn_param)

    # Means should be close to zero and stds close to one, but will be noiser than
    # training-time forward passes.
    print("After batch normalization (test-time)")
    print("means: {}".format(a_norm.mean(axis=0)))
    print("stds: {}".format(a_norm.std(axis=0)))
예제 #2
0
def train_best_model():
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': y_test,
        'y_test': y_test
    }
    learning_rate = 3.1e-4
    weight_scale = 2.5e-2  #1e-5
    model = FullyConnectedNet([600, 500, 400, 300, 200, 100],
                              weight_scale=weight_scale,
                              dtype=np.float64,
                              dropout=0.25,
                              use_batchnorm=True,
                              reg=1e-2)
    solver = Solver(model,
                    data,
                    print_every=500,
                    num_epochs=30,
                    batch_size=100,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': learning_rate,
                    },
                    lr_decay=0.9)

    solver.train()
    scores = model.loss(X_test)
    y_pred = np.argmax(scores, axis=1)
    acc = np.mean(y_pred == y_test)
    print('test acc: %f' % (acc))
    best_model = model

    plt.subplot(2, 1, 1)
    plt.plot(solver.loss_history)
    plt.title('Loss history')
    plt.xlabel('Iteration')
    plt.ylabel('Loss')

    plt.subplot(2, 1, 2)
    plt.plot(solver.train_acc_history, label='train')
    plt.plot(solver.val_acc_history, label='val')
    plt.title('Classification accuracy history')
    plt.xlabel('Epoch')
    plt.ylabel('Clasification accuracy')
    plt.show()
예제 #3
0
def solver_test():
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': y_test,
        'y_test': y_test
    }
    model = TwoLayerNet(reg=1e-1)
    solver = Solver(model,
                    data,
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    lr_decay=0.95,
                    num_epochs=10,
                    batch_size=100,
                    print_every=100)

    solver.train()
    scores = model.loss(X_test)
    y_pred = np.argmax(scores, axis=1)
    acc = np.mean(y_pred == y_test)
    print("Test acc: {}".format(acc))

    # Visualize training loss and train /val accuracy
    plt.subplot(2, 1, 1)
    plt.title('Training loss')
    plt.plot(solver.loss_history, 'o')
    plt.xlabel("Iteration")

    plt.subplot(2, 1, 2)
    plt.title("Accuracy")
    plt.plot(solver.train_acc_history, "-o", label="train")
    plt.plot(solver.val_acc_history, "-o", label="val")
    plt.plot([0.5] * len(solver.val_acc_history), 'k--')
    plt.xlabel("Epoch")
    plt.legend(loc="lower right")
    plt.gcf().set_size_inches(15, 12)
    plt.show()
예제 #4
0
def batchnorm_for_deep_networks():
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    hidden_dims = [100, 100, 100, 100, 100]
    num_train = 1000
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val
    }
    weight_scale = 2e-2
    bn_model = FullyConnectedNet(hidden_dims,
                                 weight_scale=weight_scale,
                                 use_batchnorm=True)
    model = FullyConnectedNet(hidden_dims,
                              weight_scale=weight_scale,
                              use_batchnorm=False)

    bn_solver = Solver(bn_model,
                       small_data,
                       num_epochs=10,
                       batch_size=50,
                       update_rule='adam',
                       optim_config={
                           'learning_rate': 1e-3,
                       },
                       verbose=True,
                       print_every=200)
    bn_solver.train()

    solver = Solver(model,
                    small_data,
                    num_epochs=10,
                    batch_size=50,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    verbose=True,
                    print_every=200)
    solver.train()
예제 #5
0
def neural_network_with_rms_and_adam():
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': y_test,
        'y_test': y_test
    }
    num_train = 4000
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }
    solvers = {}
    learning_rates = {'rmsprop': 1e-4, 'adam': 1e-3}
    for update_rule in ['adam', 'rmsprop']:
        print('running with ', update_rule)
        model = FullyConnectedNet([100, 100, 100, 100, 100], weight_scale=5e-2)

        solver = Solver(
            model,
            small_data,
            num_epochs=5,
            batch_size=100,
            update_rule=update_rule,
            optim_config={'learning_rate': learning_rates[update_rule]},
            verbose=True)
        solvers[update_rule] = solver
        solver.train()

    plt.subplot(3, 1, 1)
    plt.title('Training loss')
    plt.xlabel('Iteration')

    plt.subplot(3, 1, 2)
    plt.title('Training accuracy')
    plt.xlabel('Epoch')

    plt.subplot(3, 1, 3)
    plt.title('Validation accuracy')
    plt.xlabel('Epoch')

    for update_rule, solver in solvers.items():
        plt.subplot(3, 1, 1)
        plt.plot(solver.loss_history, 'o', label=update_rule)

        plt.subplot(3, 1, 2)
        plt.plot(solver.train_acc_history, '-o', label=update_rule)

        plt.subplot(3, 1, 3)
        plt.plot(solver.val_acc_history, '-o', label=update_rule)

    for i in [1, 2, 3]:
        plt.subplot(3, 1, i)
        plt.legend(loc='upper center', ncol=4)
    plt.gcf().set_size_inches(15, 15)
    plt.show()
예제 #6
0
def sgd_momentum_test():
    N, D = 4, 5
    w = np.linspace(-0.4, 0.6, num=N * D).reshape(N, D)
    dw = np.linspace(-0.6, 0.4, num=N * D).reshape(N, D)
    v = np.linspace(0.6, 0.9, num=N * D).reshape(N, D)

    config = {'learning_rate': 1e-3, 'velocity': v}
    next_w, _ = sgd_momentum(w, dw, config=config)

    expected_next_w = np.asarray(
        [[0.1406, 0.20738947, 0.27417895, 0.34096842, 0.40775789],
         [0.47454737, 0.54133684, 0.60812632, 0.67491579, 0.74170526],
         [0.80849474, 0.87528421, 0.94207368, 1.00886316, 1.07565263],
         [1.14244211, 1.20923158, 1.27602105, 1.34281053, 1.4096]])
    expected_velocity = np.asarray(
        [[0.5406, 0.55475789, 0.56891579, 0.58307368, 0.59723158],
         [0.61138947, 0.62554737, 0.63970526, 0.65386316, 0.66802105],
         [0.68217895, 0.69633684, 0.71049474, 0.72465263, 0.73881053],
         [0.75296842, 0.76712632, 0.78128421, 0.79544211, 0.8096]])

    print("next_w error: {}".format(rel_error(next_w, expected_next_w)))
    print("velocity error: {}".format(
        rel_error(expected_velocity, config['velocity'])))

    # Train a six-layer network with both SGD and SGD+momentum.
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': y_test,
        'y_test': y_test
    }
    num_train = 4000
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }
    solvers = {}
    for update_rule in ['sgd', 'sgd_momentum']:
        print("Running with {}".format(update_rule))
        model = FullyConnectedNet([100, 100, 100, 100, 100], weight_scale=5e-2)
        solver = Solver(model,
                        small_data,
                        num_epochs=5,
                        batch_size=100,
                        update_rule=update_rule,
                        optim_config={'learning_rate': 1e-2},
                        verbose=True)
        solvers[update_rule] = solver
        solver.train()

    plt.subplot(3, 1, 1)
    plt.title('Training loss')
    plt.xlabel('Iteration')

    plt.subplot(3, 1, 2)
    plt.title('Training accuracy')
    plt.xlabel('Epoch')

    plt.subplot(3, 1, 3)
    plt.title('Validation accuracy')
    plt.xlabel('Epoch')

    for update_rule, solver in solvers.items():
        plt.subplot(3, 1, 1)
        plt.plot(solver.loss_history, 'o', label=update_rule)

        plt.subplot(3, 1, 2)
        plt.plot(solver.train_acc_history, '-o', label=update_rule)

        plt.subplot(3, 1, 3)
        plt.plot(solver.val_acc_history, '-o', label=update_rule)

    for i in [1, 2, 3]:
        plt.subplot(3, 1, i)
        plt.legend(loc='upper center', ncol=4)
    plt.gcf().set_size_inches(15, 15)
예제 #7
0
def multilayer_network_test():
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': y_test,
        'y_test': y_test
    }
    # Initial loss and gradient check
    # N, D, H1, H2, C = 2, 15, 20, 30, 10
    # X = np.random.randn(N, D)
    # y = np.random.randint(C, size=(N, ))

    # print(X.shape)
    # for reg in [0, 3.14]:
    #     print("Running check with reg={}".format(reg))
    #     model = FullyConnectedNet([H1, H2],
    #                               input_dim=D,
    #                               num_classes=C,
    #                               reg=reg,
    #                               weight_scale=5e-2,
    #                               dtype=np.float64)
    #     loss, grads = model.loss(X, y)
    #     print("Initial loss: {}".format(loss))

    #     for name in sorted(grads):
    #         f = lambda _: model.loss(X, y)[0]
    #         grad_num = eval_numerical_gradient(f, model.params[name], verbose=False, h=1e-5)
    #         print("{} relative {}".format(name, rel_error(grad_num, grads[name])))

    # As another sanity check (完整性检查), make sure you can overfit a smal dataset of 50 images.
    # First we will try a three-layer network with 100 units in each hidden layer.
    # You will need to tweak the learning rate and initialize scale, but you should
    # be able to overfit and achieve 100% training accuracy within 20 epoches.
    num_train = 50
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }

    ##########################################################################
    # weight_scale = 5e-2
    # learning_rate = 1e-3

    # model = FullyConnectedNet([100, 100],
    #                         weight_scale=weight_scale,
    #                         dtype=np.float64)
    # solver = Solver(model,
    #                 small_data,
    #                 print_every=10,
    #                 num_epochs=20,
    #                 batch_size=25,
    #                 update_rule='sgd',
    #                 optim_config={'learning_rate': learning_rate})
    # solver.train()

    # plt.plot(solver.loss_history, 'o')
    # plt.title('Training loss history')
    # plt.xlabel('Iteration')
    # plt.ylabel('Training loss')
    # plt.show()
    ##########################################################################

    ##########################################################################
    # Grid Search
    # best_accurcy = 0.0
    # best_solver = None
    # weight_scale = np.linspace(1e-3, 1e-2, 10)
    # learing_rate = np.linspace(1e-4, 1e-2, 100)
    # for w in weight_scale:
    #     for l in learing_rate:
    #         print("Training with weight_scale {} and learning_rate {}".format(w, l))
    #         model = FullyConnectedNet([100, 100],
    #                                 weight_scale=w,
    #                                 dtype=np.float64)
    #         solver = Solver(model,
    #                         small_data,
    #                         print_every=10,
    #                         num_epochs=20,
    #                         batch_size=25,
    #                         update_rule='sgd',
    #                         optim_config={'learning_rate': l})
    #         solver.train()

    #         if best_accurcy > solver.best_train_acc:
    #             best_accurcy = solver.best_train_acc
    #             best_solver = solver

    # plt.plot(solver.loss_history, 'o')
    # plt.title('Training loss history')
    # plt.xlabel('Iteration')
    # plt.ylabel('Training loss')
    # plt.show()
    ##########################################################################

    ##########################################################################
    # Five layer network
    learning_rate = 8e-4
    weight_scale = 1e-1
    model = FullyConnectedNet([100, 100, 100, 100],
                              weight_scale=weight_scale,
                              dtype=np.float64)
    solver = Solver(model,
                    small_data,
                    print_every=10,
                    num_epochs=20,
                    batch_size=25,
                    update_rule='sgd',
                    optim_config={'learning_rate': learning_rate})
    solver.train()
    plt.plot(solver.loss_history, 'o')
    plt.title('Training loss history')
    plt.xlabel('Iteration')
    plt.ylabel('Training loss')
    plt.show()
예제 #8
0
def main():
    # toy_data()

    # Load the data
    num_training, num_validation, num_test = 49000, 1000, 1000
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data(
        num_training, num_validation)

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    # Reshape data to rows
    X_train = X_train.reshape(num_training, -1)
    X_val = X_val.reshape(num_validation, -1)
    X_test = X_test.reshape(num_test, -1)

    # Train a network
    input_size = 32 * 32 * 3
    hidden_size = 50
    num_classes = 10
    net = TwoLayerNet(input_size, hidden_size, num_classes)

    # Train the network
    stats = net.train(X_train,
                      y_train,
                      X_val,
                      y_val,
                      num_iters=1000,
                      batch_size=200,
                      learning_rate=1e-4,
                      learning_rate_decay=0.95,
                      reg=0.5,
                      verbose=True)

    val_acc = (net.predict(X_val) == y_val).mean()
    print("Validation accuracy: {}".format(val_acc))

    # Debug the training
    # Plot the loss function and train / validation accuracies
    plt.subplot(2, 1, 1)
    plt.plot(stats['loss_history'], label='train')
    plt.title('Loss history')
    plt.xlabel('Iteration')
    plt.ylabel('Loss')

    plt.tight_layout()

    plt.subplot(2, 1, 2)
    plt.plot(stats['train_acc_history'], label='train')
    plt.plot(stats['val_acc_history'], label='val')
    plt.title('Classification accuracy history')
    plt.xlabel('Epoch')
    plt.ylabel('Classification accuracy')

    plt.tight_layout()
    plt.show()

    # Visualize the weights of the network
    show_net_weights(net)

    # Below, you should experiment with different values of the various
    # hyperparameters, including hidden layer size, learning rate, numer
    # of training epochs, and regularization strength.
    best_net = None

    hidden_size = [75, 100, 125]

    results = {}
    best_val_acc = 0
    best_net = None

    learning_rates = np.array([0.7, 0.8, 0.9, 1, 1.1]) * 1e-3
    regularization_strengths = [0.75, 1, 1.25]

    print("Running...")
    for hs in hidden_size:
        for lr in learning_rates:
            for reg in regularization_strengths:
                print("Training with hs={}, lr={}, reg={}".format(hs, lr, reg))
                net = TwoLayerNet(input_size, hs, num_classes)
                stats = net.train(X_train,
                                  y_train,
                                  X_val,
                                  y_val,
                                  num_iters=1500,
                                  batch_size=200,
                                  learning_rate=lr)
                val_acc = (net.predict(X_val) == y_val).mean()
                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                    best_net = net
                results[(hs, lr, reg)] = val_acc

    print('Finished!')
    for hs, lr, reg in sorted(results):
        val_acc = results[(hs, lr, reg)]
        print('hs {} lr {} reg {} val accuracy: {}'.format(
            hs, lr, reg, val_acc))

    print(
        'best validation accuracy achieved during cross-validation: {}'.format(
            best_val_acc))
    # visualize the weights of the best network
    show_net_weights(best_net)
예제 #9
0
파일: softmax.py 프로젝트: MrTurtleW/code
def main():
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()

    num_training = 49000
    num_dev = 500
    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
    y_dev = y_train[mask]

    # Preprocessing: reshape the image data into rows
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    X_dev -= mean_image

    # add bias dimension and transform into columns
    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
    X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

    # Generate a random softmax weight matrix and use it to compute the loss.
    W = np.random.randn(3073, 10) * 0.0001
    loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0)

    # As a rough sanity check, our loss should be something close to -log(0.1).
    # Since the weight matrix W is uniform randomly selected, the predicted probability
    # of each class is uniform distribution and identically equals 1/10, where 10 is the number of classes
    print('loss: %f' % loss)
    print('sanity check: %f' % (-np.log(0.1)))

    f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 0.0)[0]
    grad_numerical = grad_check_sparse(f, W, grad, 10)

    # similar to SVM case, do another gradient check with regularization
    loss, grad = softmax_loss_naive(W, X_dev, y_dev, 1e2)
    f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 1e2)[0]
    grad_numerical = grad_check_sparse(f, W, grad, 10)

    # implement a vectorized version in softmax_loss_vectorized.
    tic = time.time()
    loss_naive, grad_naive = softmax_loss_naive(W, X_dev, y_dev, 0.00001)
    toc = time.time()
    print('Naive loss: {} computed in {}'.format(loss_naive, toc - tic))

    tic = time.time()
    loss_vectorized, grad_vectorized = softmax_loss_vectorized(
        W, X_dev, y_dev, 0.00001)
    toc = time.time()
    print('Vectorized loss: {} computed in {}'.format(loss_naive, toc - tic))

    grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
    print('Loss difference: %f' % np.abs(loss_naive - loss_vectorized))
    print('Gradient difference: %f' % grad_difference)

    # Use the validation set to tune hyperparameters (regularization strength and
    # learning rate). You should experiment with different ranges for the learning
    # rates and regularization strengths; if you are careful you should be able to
    # get a classification accuracy of over 0.35 on the validation set.
    results = {}
    best_val = -1
    best_softmax = None
    learning_rates = [1e-7, 2e-7, 5e-7]
    #regularization_strengths = [5e4, 1e8]
    regularization_strengths = [(1 + 0.1 * i) * 1e4 for i in range(-3, 4)
                                ] + [(5 + 0.1 * i) * 1e4 for i in range(-3, 4)]

    for lr in learning_rates:
        for rs in regularization_strengths:
            print('Traing SVM with rs {} and lr {}'.format(rs, lr))
            softmax = Softmax()
            softmax.train(X_train, y_train, lr, rs, num_iters=2000)
            y_train_pred = softmax.predict(X_train)
            train_accuracy = np.mean(y_train == y_train_pred)
            y_val_pred = softmax.predict(X_val)
            val_accuracy = np.mean(y_val == y_val_pred)
            if val_accuracy > best_val:
                best_val = val_accuracy
                best_softmax = softmax
            results[(lr, rs)] = train_accuracy, val_accuracy

    # Print out results.
    for lr, reg in sorted(results):
        train_accuracy, val_accuracy = results[(lr, reg)]
        print('lr %e reg %e train accuracy: %f val accuracy: %f' %
              (lr, reg, train_accuracy, val_accuracy))

    print('best validation accuracy achieved during cross-validation: %f' %
          best_val)

    # Evaluate the best softmax on test set
    y_test_pred = best_softmax.predict(X_test)
    test_accuracy = np.mean(y_test == y_test_pred)
    print('softmax on raw pixels final test set accuracy: %f' %
          (test_accuracy, ))

    # Visualize the learned weights for each class
    w = best_softmax.W[:-1, :]  # strip out the bias
    w = w.reshape(32, 32, 3, 10)

    w_min, w_max = np.min(w), np.max(w)

    classes = [
        'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship',
        'truck'
    ]
    for i in range(10):
        plt.subplot(2, 5, i + 1)

        # Rescale the weights to be between 0 and 255
        wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min)
        plt.imshow(wimg.astype('uint8'))
        plt.axis('off')
        plt.title(classes[i])
예제 #10
0
파일: svm.py 프로젝트: MrTurtleW/code
def main():
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    # visualize(X_train, y_train, X_test, y_test)

    # We will also make a development set, which is a small subset of
    # the training set.
    num_training = 49000
    num_dev = 500
    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
    y_dev = y_train[mask]

    # Preprocessing: reshape the image data into rows
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))

    # Preprocessing: substract the mean image
    # first: compute the image mean based on the training data
    # 如果不提供axis参数,则计算所有元素平均值
    mean_image = np.mean(X_train, axis=0)
    # plt.figure(figsize=(4, 4))
    # plt.imshow(mean_image.reshape(32, 32, 3).astype('uint8'))
    # plt.show()

    # second: subtract the mean image from train and test data
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    X_dev -= mean_image

    # third: append the bias dimension of ones so that our SVM
    # only has to worry about optimizing a single weight matrix W
    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
    X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

    # SVM Classifier
    # Start with random W and find a W that minimizes the loss
    W = np.random.randn(3073, 10) * 0.0001
    loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0)
    print('loss: {}'.format(loss))

    # To check that you have correctly implemented the gradient correctly,
    # you can numerically estimate the gradient of the loss function and
    # compare the numeric estimate to the gradient that you computed.
    # f = lambda w: svm_loss_naive(w, X_dev, y_dev, 1e2)[0]
    # grad_numerical = grad_check_sparse(f, W, grad)

    # Next implement the function svm_loss_vectorized; for now only compute the loss;
    tic = time.time()
    loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.00001)
    toc = time.time()
    print('Naive loss: {} computed in {}'.format(loss_naive, toc - tic))

    tic = time.time()
    loss_vectorized, grad_vectorized = svm_loss_vectorized(
        W, X_dev, y_dev, 0.00001)
    toc = time.time()
    print('Vectorized loss: {} computed in {}'.format(loss_vectorized,
                                                      toc - tic))

    print('difference: {}'.format(loss_naive - loss_vectorized))

    # Compute the gradient of the loss function in a vectorized way
    difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
    print('difference: {}'.format(difference))

    # SGD
    svm = LinearSVM()
    tic = time.time()
    loss_hist = svm.train(X_train,
                          y_train,
                          learning_rate=1e-7,
                          reg=5e4,
                          num_iters=1500,
                          verbose=True)
    toc = time.time()
    print("That took {}".format(toc - tic))

    # plot the loss
    # plt.plot(loss_hist)
    # plt.xlabel("Iteration number")
    # plt.ylabel("Loss value")
    # plt.show()

    # Evaluate the performance on both the training and validation set
    y_train_pred = svm.predict(X_train)
    print('Training accuracy: {}'.format(np.mean(y_train == y_train_pred)))
    y_val_pred = svm.predict(X_val)
    print("Validation accuracy: {}".format(np.mean(y_val == y_val_pred)))

    # Use the validation set to tune hyperparameters
    learing_rate = [1.4e-7, 1.5e-7, 1.6e-7]
    regulartization_strengths = [
        (1 + i * 0.1) * 1e-4 for i in range(-3, 3)
    ] + [(2 + 0.1 * i) * 1e-4 for i in range(-3, 3)]

    results = {}
    best_val = -1
    best_svm = None

    for rs in regulartization_strengths:
        for lr in learing_rate:
            print('Traing SVM with rs {} and lr {}'.format(rs, lr))
            svm = LinearSVM()
            loss_hist = svm.train(X_train, y_train, lr, rs, num_iters=3000)
            y_train_pred = svm.predict(X_train)
            train_accuracy = np.mean(y_train == y_train_pred)
            y_val_pred = svm.predict(X_val)
            val_accuracy = np.mean(y_val == y_val_pred)
            if val_accuracy > best_val:
                best_val = val_accuracy
                best_svm = svm
            results[(lr, rs)] = train_accuracy, val_accuracy

    # Print the results
    for lr, reg in sorted(results):
        train_accuracy, val_accuracy = results[(lr, reg)]
        print('lr {} reg {} train accuracy {} val accuracy: {}'.format(
            lr, reg, train_accuracy, val_accuracy))

    print(
        'best validation accuracy achieved during cross-validation: {}'.format(
            best_val))

    # Visualize the cross-validation results
    x_scatter = [math.log10(x[0]) for x in results]
    y_scatter = [math.log10(x[1]) for x in results]
    # plot training accuracy
    marker_size = 100
    colors = [results[x][0] for x in results]
    plt.subplot(2, 1, 1)
    plt.scatter(x_scatter, y_scatter, marker_size, c=colors)
    plt.colorbar()
    plt.xlabel('log learning rate')
    plt.ylabel('log retgularization strength')
    plt.title('CIFAR-10 training accuracy')

    # plot validation accuracy
    colors = [results[x][1] for x in results]
    plt.subplot(2, 1, 2)
    plt.scatter(x_scatter, y_scatter, marker_size, c=colors)
    plt.colorbar()
    plt.xlabel('log learning rate')
    plt.ylabel('log retgularization strength')
    plt.title('CIFAR-10 validation accuracy')
    plt.show()

    # Evaluate the best svm on test set
    y_test_pred = best_svm.predict(X_test)
    test_accuracy = np.mean(y_test == y_test_pred)
    print('Linear SVM on raw pixels final test set accuracy: {}'.format(
        test_accuracy))

    # Visualize the learned weights for each class.
    # Depending on your choice of learning rate and regularization strength, these may
    # or may not be nice to look at.
    w = best_svm.W[:-1, :]  # strip out the bias
    w = w.reshape(32, 32, 3, 10)
    w_min, w_max = np.min(w), np.max(w)
    classes = [
        'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship',
        'truck'
    ]
    for i in range(10):
        plt.subplot(2, 5, i + 1)

        # Rescale the weights to be between 0 and 255
        wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min)
        plt.imshow(wimg.astype('uint8'))
        plt.axis('off')
        plt.title(classes[i])
예제 #11
0
파일: Dropout.py 프로젝트: MrTurtleW/code
def regularization_expriment():
    """
    We will train a pair of two-layer networks on 500 training examples: one will use no dropout,
    and one will use a dropout probability of 0.75.
    """
    num_train = 500
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val,
        'X_test': y_test,
        'y_test': y_test
    }

    solvers = {}
    dropout_choices = [0, 0.25, 0.5, 0.75, 0.8, 0.9, 0.99]
    for dropout in dropout_choices:
        model = FullyConnectedNet([500], weight_scale=5e-2, dropout=dropout)

        solver = Solver(model,
                        small_data,
                        num_epochs=25,
                        batch_size=100,
                        update_rule="adam",
                        optim_config={
                            'learning_rate': 5e-4,
                        },
                        verbose=True,
                        print_every=100)
        solver.train()
        solvers[dropout] = solver

    # Plot train and validation accuracies of the two models

    train_accs = []
    val_accs = []
    for dropout in dropout_choices:
        solver = solvers[dropout]
        train_accs.append(solver.train_acc_history[-1])
        val_accs.append(solver.val_acc_history[-1])

    plt.subplot(3, 1, 1)
    for dropout in dropout_choices:
        plt.plot(solvers[dropout].train_acc_history,
                 'o',
                 label='%.2f dropout' % dropout)
    plt.title('Train accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(ncol=2, loc='lower right')

    plt.subplot(3, 1, 2)
    for dropout in dropout_choices:
        plt.plot(solvers[dropout].val_acc_history,
                 'o',
                 label='%.2f dropout' % dropout)
    plt.title('Val accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(ncol=2, loc='lower right')

    plt.gcf().set_size_inches(15, 15)
    plt.show()
예제 #12
0
def three_layer_convnet_test():
    # model = ThreeLayerConvNet()

    # N = 50
    # X = np.random.randn(N, 3, 32, 32)
    # y = np.random.randint(10, size=N)

    # loss, grads = model.loss(X, y)
    # print('Initial loss (no regularization): {}'.format(loss))

    # model.reg = 0.5
    # loss, grads = model.loss(X, y)
    # print("Initial loss(with regularization: {}".format(loss))

    # # Gradient check
    # num_inputs = 2
    # input_dim = (3, 16, 16)
    # reg = 0.0
    # num_classes = 10
    # X = np.random.randn(num_inputs, *input_dim)
    # y = np.random.randint(num_classes, size=num_inputs)

    # model = ThreeLayerConvNet(num_filters=3, filter_size=3,
    #                         input_dim=input_dim, hidden_dim=7,
    #                         dtype=np.float64)
    # loss, grads = model.loss(X, y)
    # for param_name in sorted(grads):
    #     f = lambda _: model.loss(X, y)[0]
    #     param_grad_num = eval_numerical_gradient(f, model.params[param_name], verbose=False, h=1e-6)
    #     e = rel_error(param_grad_num, grads[param_name])
    #     print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])))

    # Overfit small data
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    num_train = 100
    small_data = {
        'X_train': X_train[:num_train].transpose(0, 3, 1, 2),
        'y_train': y_train[:num_train],
        'X_val': X_val.transpose(0, 3, 1, 2),
        'y_val': y_val
    }
    model = ThreeLayerConvNet(weight_scale=1e-2)

    solver = Solver(model,
                    small_data,
                    num_epochs=20,
                    batch_size=50,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 4e-4,
                    },
                    verbose=True,
                    print_every=1)
    solver.train()

    plt.subplot(2, 1, 1)
    plt.plot(solver.loss_history, 'o')
    plt.xlabel('iteration')
    plt.ylabel('loss')

    plt.subplot(2, 1, 2)
    plt.plot(solver.train_acc_history, '-o')
    plt.plot(solver.val_acc_history, '-o')
    plt.legend(['train', 'val'], loc='upper left')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.show()