def batchnorm_forward_test(): X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() # Batch normalization: Forward N, D1, D2, D3 = 200, 50, 60, 3 X = np.random.randn(N, D1) W1 = np.random.randn(D1, D2) W2 = np.random.randn(D2, D3) a = np.maximum(0, X.dot(W1)).dot(W2) print("Before batch normalization") print("means: {}".format(a.mean(axis=0))) print("stds: {}".format(a.std(axis=0))) # Means should be zero and stds close to one print("After batch normalization (gamm=1, beta=0)") a_norm, _ = batchnorm_forward(a, np.ones(D3), np.zeros(D3), {'mode': 'train'}) print("mean: {}".format(a_norm.mean(axis=0))) print("std: {}".format(a_norm.std(axis=0))) # Now means should be close to beta and stds close to gamma gamma = np.array([1.0, 2.0, 3.0]) beta = np.array([11.0, 12.0, 13.0]) a_norm, _ = batchnorm_forward(a, gamma, beta, {'mode': 'train'}) print("After batch normalization (nontrivial gamma, beta)") print("means: {}".format(a_norm.mean(axis=0))) print("stds: {}".format(a_norm.std(axis=0))) # Check the test-time forward pass by running the training-time forward # pass many times to warm up the running averages, and then checking the # means and variances of activations after a test-time forward pass N, D1, D2, D3 = 200, 50, 60, 3 W1 = np.random.randn(D1, D2) W2 = np.random.randn(D2, D3) bn_param = {'mode': 'train'} gamma = np.ones(D3) beta = np.zeros(D3) for t in range(50): X = np.random.randn(N, D1) a = np.maximum(0, X.dot(W1)).dot(W2) batchnorm_forward(a, gamma, beta, bn_param) bn_param['mode'] = 'test' X = np.random.randn(N, D1) a = np.maximum(0, X.dot(W1)).dot(W2) a_norm, _ = batchnorm_forward(a, gamma, beta, bn_param) # Means should be close to zero and stds close to one, but will be noiser than # training-time forward passes. print("After batch normalization (test-time)") print("means: {}".format(a_norm.mean(axis=0))) print("stds: {}".format(a_norm.std(axis=0)))
def train_best_model(): X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() data = { 'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'X_test': y_test, 'y_test': y_test } learning_rate = 3.1e-4 weight_scale = 2.5e-2 #1e-5 model = FullyConnectedNet([600, 500, 400, 300, 200, 100], weight_scale=weight_scale, dtype=np.float64, dropout=0.25, use_batchnorm=True, reg=1e-2) solver = Solver(model, data, print_every=500, num_epochs=30, batch_size=100, update_rule='adam', optim_config={ 'learning_rate': learning_rate, }, lr_decay=0.9) solver.train() scores = model.loss(X_test) y_pred = np.argmax(scores, axis=1) acc = np.mean(y_pred == y_test) print('test acc: %f' % (acc)) best_model = model plt.subplot(2, 1, 1) plt.plot(solver.loss_history) plt.title('Loss history') plt.xlabel('Iteration') plt.ylabel('Loss') plt.subplot(2, 1, 2) plt.plot(solver.train_acc_history, label='train') plt.plot(solver.val_acc_history, label='val') plt.title('Classification accuracy history') plt.xlabel('Epoch') plt.ylabel('Clasification accuracy') plt.show()
def solver_test(): X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() data = { 'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'X_test': y_test, 'y_test': y_test } model = TwoLayerNet(reg=1e-1) solver = Solver(model, data, optim_config={ 'learning_rate': 1e-3, }, lr_decay=0.95, num_epochs=10, batch_size=100, print_every=100) solver.train() scores = model.loss(X_test) y_pred = np.argmax(scores, axis=1) acc = np.mean(y_pred == y_test) print("Test acc: {}".format(acc)) # Visualize training loss and train /val accuracy plt.subplot(2, 1, 1) plt.title('Training loss') plt.plot(solver.loss_history, 'o') plt.xlabel("Iteration") plt.subplot(2, 1, 2) plt.title("Accuracy") plt.plot(solver.train_acc_history, "-o", label="train") plt.plot(solver.val_acc_history, "-o", label="val") plt.plot([0.5] * len(solver.val_acc_history), 'k--') plt.xlabel("Epoch") plt.legend(loc="lower right") plt.gcf().set_size_inches(15, 12) plt.show()
def batchnorm_for_deep_networks(): X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() hidden_dims = [100, 100, 100, 100, 100] num_train = 1000 small_data = { 'X_train': X_train[:num_train], 'y_train': y_train[:num_train], 'X_val': X_val, 'y_val': y_val } weight_scale = 2e-2 bn_model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, use_batchnorm=True) model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, use_batchnorm=False) bn_solver = Solver(bn_model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=200) bn_solver.train() solver = Solver(model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=200) solver.train()
def neural_network_with_rms_and_adam(): X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() data = { 'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'X_test': y_test, 'y_test': y_test } num_train = 4000 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } solvers = {} learning_rates = {'rmsprop': 1e-4, 'adam': 1e-3} for update_rule in ['adam', 'rmsprop']: print('running with ', update_rule) model = FullyConnectedNet([100, 100, 100, 100, 100], weight_scale=5e-2) solver = Solver( model, small_data, num_epochs=5, batch_size=100, update_rule=update_rule, optim_config={'learning_rate': learning_rates[update_rule]}, verbose=True) solvers[update_rule] = solver solver.train() plt.subplot(3, 1, 1) plt.title('Training loss') plt.xlabel('Iteration') plt.subplot(3, 1, 2) plt.title('Training accuracy') plt.xlabel('Epoch') plt.subplot(3, 1, 3) plt.title('Validation accuracy') plt.xlabel('Epoch') for update_rule, solver in solvers.items(): plt.subplot(3, 1, 1) plt.plot(solver.loss_history, 'o', label=update_rule) plt.subplot(3, 1, 2) plt.plot(solver.train_acc_history, '-o', label=update_rule) plt.subplot(3, 1, 3) plt.plot(solver.val_acc_history, '-o', label=update_rule) for i in [1, 2, 3]: plt.subplot(3, 1, i) plt.legend(loc='upper center', ncol=4) plt.gcf().set_size_inches(15, 15) plt.show()
def sgd_momentum_test(): N, D = 4, 5 w = np.linspace(-0.4, 0.6, num=N * D).reshape(N, D) dw = np.linspace(-0.6, 0.4, num=N * D).reshape(N, D) v = np.linspace(0.6, 0.9, num=N * D).reshape(N, D) config = {'learning_rate': 1e-3, 'velocity': v} next_w, _ = sgd_momentum(w, dw, config=config) expected_next_w = np.asarray( [[0.1406, 0.20738947, 0.27417895, 0.34096842, 0.40775789], [0.47454737, 0.54133684, 0.60812632, 0.67491579, 0.74170526], [0.80849474, 0.87528421, 0.94207368, 1.00886316, 1.07565263], [1.14244211, 1.20923158, 1.27602105, 1.34281053, 1.4096]]) expected_velocity = np.asarray( [[0.5406, 0.55475789, 0.56891579, 0.58307368, 0.59723158], [0.61138947, 0.62554737, 0.63970526, 0.65386316, 0.66802105], [0.68217895, 0.69633684, 0.71049474, 0.72465263, 0.73881053], [0.75296842, 0.76712632, 0.78128421, 0.79544211, 0.8096]]) print("next_w error: {}".format(rel_error(next_w, expected_next_w))) print("velocity error: {}".format( rel_error(expected_velocity, config['velocity']))) # Train a six-layer network with both SGD and SGD+momentum. X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() data = { 'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'X_test': y_test, 'y_test': y_test } num_train = 4000 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } solvers = {} for update_rule in ['sgd', 'sgd_momentum']: print("Running with {}".format(update_rule)) model = FullyConnectedNet([100, 100, 100, 100, 100], weight_scale=5e-2) solver = Solver(model, small_data, num_epochs=5, batch_size=100, update_rule=update_rule, optim_config={'learning_rate': 1e-2}, verbose=True) solvers[update_rule] = solver solver.train() plt.subplot(3, 1, 1) plt.title('Training loss') plt.xlabel('Iteration') plt.subplot(3, 1, 2) plt.title('Training accuracy') plt.xlabel('Epoch') plt.subplot(3, 1, 3) plt.title('Validation accuracy') plt.xlabel('Epoch') for update_rule, solver in solvers.items(): plt.subplot(3, 1, 1) plt.plot(solver.loss_history, 'o', label=update_rule) plt.subplot(3, 1, 2) plt.plot(solver.train_acc_history, '-o', label=update_rule) plt.subplot(3, 1, 3) plt.plot(solver.val_acc_history, '-o', label=update_rule) for i in [1, 2, 3]: plt.subplot(3, 1, i) plt.legend(loc='upper center', ncol=4) plt.gcf().set_size_inches(15, 15)
def multilayer_network_test(): X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() data = { 'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'X_test': y_test, 'y_test': y_test } # Initial loss and gradient check # N, D, H1, H2, C = 2, 15, 20, 30, 10 # X = np.random.randn(N, D) # y = np.random.randint(C, size=(N, )) # print(X.shape) # for reg in [0, 3.14]: # print("Running check with reg={}".format(reg)) # model = FullyConnectedNet([H1, H2], # input_dim=D, # num_classes=C, # reg=reg, # weight_scale=5e-2, # dtype=np.float64) # loss, grads = model.loss(X, y) # print("Initial loss: {}".format(loss)) # for name in sorted(grads): # f = lambda _: model.loss(X, y)[0] # grad_num = eval_numerical_gradient(f, model.params[name], verbose=False, h=1e-5) # print("{} relative {}".format(name, rel_error(grad_num, grads[name]))) # As another sanity check (完整性检查), make sure you can overfit a smal dataset of 50 images. # First we will try a three-layer network with 100 units in each hidden layer. # You will need to tweak the learning rate and initialize scale, but you should # be able to overfit and achieve 100% training accuracy within 20 epoches. num_train = 50 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } ########################################################################## # weight_scale = 5e-2 # learning_rate = 1e-3 # model = FullyConnectedNet([100, 100], # weight_scale=weight_scale, # dtype=np.float64) # solver = Solver(model, # small_data, # print_every=10, # num_epochs=20, # batch_size=25, # update_rule='sgd', # optim_config={'learning_rate': learning_rate}) # solver.train() # plt.plot(solver.loss_history, 'o') # plt.title('Training loss history') # plt.xlabel('Iteration') # plt.ylabel('Training loss') # plt.show() ########################################################################## ########################################################################## # Grid Search # best_accurcy = 0.0 # best_solver = None # weight_scale = np.linspace(1e-3, 1e-2, 10) # learing_rate = np.linspace(1e-4, 1e-2, 100) # for w in weight_scale: # for l in learing_rate: # print("Training with weight_scale {} and learning_rate {}".format(w, l)) # model = FullyConnectedNet([100, 100], # weight_scale=w, # dtype=np.float64) # solver = Solver(model, # small_data, # print_every=10, # num_epochs=20, # batch_size=25, # update_rule='sgd', # optim_config={'learning_rate': l}) # solver.train() # if best_accurcy > solver.best_train_acc: # best_accurcy = solver.best_train_acc # best_solver = solver # plt.plot(solver.loss_history, 'o') # plt.title('Training loss history') # plt.xlabel('Iteration') # plt.ylabel('Training loss') # plt.show() ########################################################################## ########################################################################## # Five layer network learning_rate = 8e-4 weight_scale = 1e-1 model = FullyConnectedNet([100, 100, 100, 100], weight_scale=weight_scale, dtype=np.float64) solver = Solver(model, small_data, print_every=10, num_epochs=20, batch_size=25, update_rule='sgd', optim_config={'learning_rate': learning_rate}) solver.train() plt.plot(solver.loss_history, 'o') plt.title('Training loss history') plt.xlabel('Iteration') plt.ylabel('Training loss') plt.show()
def main(): # toy_data() # Load the data num_training, num_validation, num_test = 49000, 1000, 1000 X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data( num_training, num_validation) # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image # Reshape data to rows X_train = X_train.reshape(num_training, -1) X_val = X_val.reshape(num_validation, -1) X_test = X_test.reshape(num_test, -1) # Train a network input_size = 32 * 32 * 3 hidden_size = 50 num_classes = 10 net = TwoLayerNet(input_size, hidden_size, num_classes) # Train the network stats = net.train(X_train, y_train, X_val, y_val, num_iters=1000, batch_size=200, learning_rate=1e-4, learning_rate_decay=0.95, reg=0.5, verbose=True) val_acc = (net.predict(X_val) == y_val).mean() print("Validation accuracy: {}".format(val_acc)) # Debug the training # Plot the loss function and train / validation accuracies plt.subplot(2, 1, 1) plt.plot(stats['loss_history'], label='train') plt.title('Loss history') plt.xlabel('Iteration') plt.ylabel('Loss') plt.tight_layout() plt.subplot(2, 1, 2) plt.plot(stats['train_acc_history'], label='train') plt.plot(stats['val_acc_history'], label='val') plt.title('Classification accuracy history') plt.xlabel('Epoch') plt.ylabel('Classification accuracy') plt.tight_layout() plt.show() # Visualize the weights of the network show_net_weights(net) # Below, you should experiment with different values of the various # hyperparameters, including hidden layer size, learning rate, numer # of training epochs, and regularization strength. best_net = None hidden_size = [75, 100, 125] results = {} best_val_acc = 0 best_net = None learning_rates = np.array([0.7, 0.8, 0.9, 1, 1.1]) * 1e-3 regularization_strengths = [0.75, 1, 1.25] print("Running...") for hs in hidden_size: for lr in learning_rates: for reg in regularization_strengths: print("Training with hs={}, lr={}, reg={}".format(hs, lr, reg)) net = TwoLayerNet(input_size, hs, num_classes) stats = net.train(X_train, y_train, X_val, y_val, num_iters=1500, batch_size=200, learning_rate=lr) val_acc = (net.predict(X_val) == y_val).mean() if val_acc > best_val_acc: best_val_acc = val_acc best_net = net results[(hs, lr, reg)] = val_acc print('Finished!') for hs, lr, reg in sorted(results): val_acc = results[(hs, lr, reg)] print('hs {} lr {} reg {} val accuracy: {}'.format( hs, lr, reg, val_acc)) print( 'best validation accuracy achieved during cross-validation: {}'.format( best_val_acc)) # visualize the weights of the best network show_net_weights(best_net)
def main(): X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() num_training = 49000 num_dev = 500 mask = np.random.choice(num_training, num_dev, replace=False) X_dev = X_train[mask] y_dev = y_train[mask] # Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image # add bias dimension and transform into columns X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) # Generate a random softmax weight matrix and use it to compute the loss. W = np.random.randn(3073, 10) * 0.0001 loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0) # As a rough sanity check, our loss should be something close to -log(0.1). # Since the weight matrix W is uniform randomly selected, the predicted probability # of each class is uniform distribution and identically equals 1/10, where 10 is the number of classes print('loss: %f' % loss) print('sanity check: %f' % (-np.log(0.1))) f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad, 10) # similar to SVM case, do another gradient check with regularization loss, grad = softmax_loss_naive(W, X_dev, y_dev, 1e2) f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 1e2)[0] grad_numerical = grad_check_sparse(f, W, grad, 10) # implement a vectorized version in softmax_loss_vectorized. tic = time.time() loss_naive, grad_naive = softmax_loss_naive(W, X_dev, y_dev, 0.00001) toc = time.time() print('Naive loss: {} computed in {}'.format(loss_naive, toc - tic)) tic = time.time() loss_vectorized, grad_vectorized = softmax_loss_vectorized( W, X_dev, y_dev, 0.00001) toc = time.time() print('Vectorized loss: {} computed in {}'.format(loss_naive, toc - tic)) grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print('Loss difference: %f' % np.abs(loss_naive - loss_vectorized)) print('Gradient difference: %f' % grad_difference) # Use the validation set to tune hyperparameters (regularization strength and # learning rate). You should experiment with different ranges for the learning # rates and regularization strengths; if you are careful you should be able to # get a classification accuracy of over 0.35 on the validation set. results = {} best_val = -1 best_softmax = None learning_rates = [1e-7, 2e-7, 5e-7] #regularization_strengths = [5e4, 1e8] regularization_strengths = [(1 + 0.1 * i) * 1e4 for i in range(-3, 4) ] + [(5 + 0.1 * i) * 1e4 for i in range(-3, 4)] for lr in learning_rates: for rs in regularization_strengths: print('Traing SVM with rs {} and lr {}'.format(rs, lr)) softmax = Softmax() softmax.train(X_train, y_train, lr, rs, num_iters=2000) y_train_pred = softmax.predict(X_train) train_accuracy = np.mean(y_train == y_train_pred) y_val_pred = softmax.predict(X_val) val_accuracy = np.mean(y_val == y_val_pred) if val_accuracy > best_val: best_val = val_accuracy best_softmax = softmax results[(lr, rs)] = train_accuracy, val_accuracy # Print out results. for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)] print('lr %e reg %e train accuracy: %f val accuracy: %f' % (lr, reg, train_accuracy, val_accuracy)) print('best validation accuracy achieved during cross-validation: %f' % best_val) # Evaluate the best softmax on test set y_test_pred = best_softmax.predict(X_test) test_accuracy = np.mean(y_test == y_test_pred) print('softmax on raw pixels final test set accuracy: %f' % (test_accuracy, )) # Visualize the learned weights for each class w = best_softmax.W[:-1, :] # strip out the bias w = w.reshape(32, 32, 3, 10) w_min, w_max = np.min(w), np.max(w) classes = [ 'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck' ] for i in range(10): plt.subplot(2, 5, i + 1) # Rescale the weights to be between 0 and 255 wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min) plt.imshow(wimg.astype('uint8')) plt.axis('off') plt.title(classes[i])
def main(): X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() # visualize(X_train, y_train, X_test, y_test) # We will also make a development set, which is a small subset of # the training set. num_training = 49000 num_dev = 500 mask = np.random.choice(num_training, num_dev, replace=False) X_dev = X_train[mask] y_dev = y_train[mask] # Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) # Preprocessing: substract the mean image # first: compute the image mean based on the training data # 如果不提供axis参数,则计算所有元素平均值 mean_image = np.mean(X_train, axis=0) # plt.figure(figsize=(4, 4)) # plt.imshow(mean_image.reshape(32, 32, 3).astype('uint8')) # plt.show() # second: subtract the mean image from train and test data X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image # third: append the bias dimension of ones so that our SVM # only has to worry about optimizing a single weight matrix W X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) # SVM Classifier # Start with random W and find a W that minimizes the loss W = np.random.randn(3073, 10) * 0.0001 loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0) print('loss: {}'.format(loss)) # To check that you have correctly implemented the gradient correctly, # you can numerically estimate the gradient of the loss function and # compare the numeric estimate to the gradient that you computed. # f = lambda w: svm_loss_naive(w, X_dev, y_dev, 1e2)[0] # grad_numerical = grad_check_sparse(f, W, grad) # Next implement the function svm_loss_vectorized; for now only compute the loss; tic = time.time() loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.00001) toc = time.time() print('Naive loss: {} computed in {}'.format(loss_naive, toc - tic)) tic = time.time() loss_vectorized, grad_vectorized = svm_loss_vectorized( W, X_dev, y_dev, 0.00001) toc = time.time() print('Vectorized loss: {} computed in {}'.format(loss_vectorized, toc - tic)) print('difference: {}'.format(loss_naive - loss_vectorized)) # Compute the gradient of the loss function in a vectorized way difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print('difference: {}'.format(difference)) # SGD svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=5e4, num_iters=1500, verbose=True) toc = time.time() print("That took {}".format(toc - tic)) # plot the loss # plt.plot(loss_hist) # plt.xlabel("Iteration number") # plt.ylabel("Loss value") # plt.show() # Evaluate the performance on both the training and validation set y_train_pred = svm.predict(X_train) print('Training accuracy: {}'.format(np.mean(y_train == y_train_pred))) y_val_pred = svm.predict(X_val) print("Validation accuracy: {}".format(np.mean(y_val == y_val_pred))) # Use the validation set to tune hyperparameters learing_rate = [1.4e-7, 1.5e-7, 1.6e-7] regulartization_strengths = [ (1 + i * 0.1) * 1e-4 for i in range(-3, 3) ] + [(2 + 0.1 * i) * 1e-4 for i in range(-3, 3)] results = {} best_val = -1 best_svm = None for rs in regulartization_strengths: for lr in learing_rate: print('Traing SVM with rs {} and lr {}'.format(rs, lr)) svm = LinearSVM() loss_hist = svm.train(X_train, y_train, lr, rs, num_iters=3000) y_train_pred = svm.predict(X_train) train_accuracy = np.mean(y_train == y_train_pred) y_val_pred = svm.predict(X_val) val_accuracy = np.mean(y_val == y_val_pred) if val_accuracy > best_val: best_val = val_accuracy best_svm = svm results[(lr, rs)] = train_accuracy, val_accuracy # Print the results for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)] print('lr {} reg {} train accuracy {} val accuracy: {}'.format( lr, reg, train_accuracy, val_accuracy)) print( 'best validation accuracy achieved during cross-validation: {}'.format( best_val)) # Visualize the cross-validation results x_scatter = [math.log10(x[0]) for x in results] y_scatter = [math.log10(x[1]) for x in results] # plot training accuracy marker_size = 100 colors = [results[x][0] for x in results] plt.subplot(2, 1, 1) plt.scatter(x_scatter, y_scatter, marker_size, c=colors) plt.colorbar() plt.xlabel('log learning rate') plt.ylabel('log retgularization strength') plt.title('CIFAR-10 training accuracy') # plot validation accuracy colors = [results[x][1] for x in results] plt.subplot(2, 1, 2) plt.scatter(x_scatter, y_scatter, marker_size, c=colors) plt.colorbar() plt.xlabel('log learning rate') plt.ylabel('log retgularization strength') plt.title('CIFAR-10 validation accuracy') plt.show() # Evaluate the best svm on test set y_test_pred = best_svm.predict(X_test) test_accuracy = np.mean(y_test == y_test_pred) print('Linear SVM on raw pixels final test set accuracy: {}'.format( test_accuracy)) # Visualize the learned weights for each class. # Depending on your choice of learning rate and regularization strength, these may # or may not be nice to look at. w = best_svm.W[:-1, :] # strip out the bias w = w.reshape(32, 32, 3, 10) w_min, w_max = np.min(w), np.max(w) classes = [ 'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck' ] for i in range(10): plt.subplot(2, 5, i + 1) # Rescale the weights to be between 0 and 255 wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min) plt.imshow(wimg.astype('uint8')) plt.axis('off') plt.title(classes[i])
def regularization_expriment(): """ We will train a pair of two-layer networks on 500 training examples: one will use no dropout, and one will use a dropout probability of 0.75. """ num_train = 500 X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() small_data = { 'X_train': X_train[:num_train], 'y_train': y_train[:num_train], 'X_val': X_val, 'y_val': y_val, 'X_test': y_test, 'y_test': y_test } solvers = {} dropout_choices = [0, 0.25, 0.5, 0.75, 0.8, 0.9, 0.99] for dropout in dropout_choices: model = FullyConnectedNet([500], weight_scale=5e-2, dropout=dropout) solver = Solver(model, small_data, num_epochs=25, batch_size=100, update_rule="adam", optim_config={ 'learning_rate': 5e-4, }, verbose=True, print_every=100) solver.train() solvers[dropout] = solver # Plot train and validation accuracies of the two models train_accs = [] val_accs = [] for dropout in dropout_choices: solver = solvers[dropout] train_accs.append(solver.train_acc_history[-1]) val_accs.append(solver.val_acc_history[-1]) plt.subplot(3, 1, 1) for dropout in dropout_choices: plt.plot(solvers[dropout].train_acc_history, 'o', label='%.2f dropout' % dropout) plt.title('Train accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend(ncol=2, loc='lower right') plt.subplot(3, 1, 2) for dropout in dropout_choices: plt.plot(solvers[dropout].val_acc_history, 'o', label='%.2f dropout' % dropout) plt.title('Val accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend(ncol=2, loc='lower right') plt.gcf().set_size_inches(15, 15) plt.show()
def three_layer_convnet_test(): # model = ThreeLayerConvNet() # N = 50 # X = np.random.randn(N, 3, 32, 32) # y = np.random.randint(10, size=N) # loss, grads = model.loss(X, y) # print('Initial loss (no regularization): {}'.format(loss)) # model.reg = 0.5 # loss, grads = model.loss(X, y) # print("Initial loss(with regularization: {}".format(loss)) # # Gradient check # num_inputs = 2 # input_dim = (3, 16, 16) # reg = 0.0 # num_classes = 10 # X = np.random.randn(num_inputs, *input_dim) # y = np.random.randint(num_classes, size=num_inputs) # model = ThreeLayerConvNet(num_filters=3, filter_size=3, # input_dim=input_dim, hidden_dim=7, # dtype=np.float64) # loss, grads = model.loss(X, y) # for param_name in sorted(grads): # f = lambda _: model.loss(X, y)[0] # param_grad_num = eval_numerical_gradient(f, model.params[param_name], verbose=False, h=1e-6) # e = rel_error(param_grad_num, grads[param_name]) # print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name]))) # Overfit small data X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() num_train = 100 small_data = { 'X_train': X_train[:num_train].transpose(0, 3, 1, 2), 'y_train': y_train[:num_train], 'X_val': X_val.transpose(0, 3, 1, 2), 'y_val': y_val } model = ThreeLayerConvNet(weight_scale=1e-2) solver = Solver(model, small_data, num_epochs=20, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 4e-4, }, verbose=True, print_every=1) solver.train() plt.subplot(2, 1, 1) plt.plot(solver.loss_history, 'o') plt.xlabel('iteration') plt.ylabel('loss') plt.subplot(2, 1, 2) plt.plot(solver.train_acc_history, '-o') plt.plot(solver.val_acc_history, '-o') plt.legend(['train', 'val'], loc='upper left') plt.xlabel('epoch') plt.ylabel('accuracy') plt.show()