def train_best_model(): X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() data = { 'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'X_test': y_test, 'y_test': y_test } learning_rate = 3.1e-4 weight_scale = 2.5e-2 #1e-5 model = FullyConnectedNet([600, 500, 400, 300, 200, 100], weight_scale=weight_scale, dtype=np.float64, dropout=0.25, use_batchnorm=True, reg=1e-2) solver = Solver(model, data, print_every=500, num_epochs=30, batch_size=100, update_rule='adam', optim_config={ 'learning_rate': learning_rate, }, lr_decay=0.9) solver.train() scores = model.loss(X_test) y_pred = np.argmax(scores, axis=1) acc = np.mean(y_pred == y_test) print('test acc: %f' % (acc)) best_model = model plt.subplot(2, 1, 1) plt.plot(solver.loss_history) plt.title('Loss history') plt.xlabel('Iteration') plt.ylabel('Loss') plt.subplot(2, 1, 2) plt.plot(solver.train_acc_history, label='train') plt.plot(solver.val_acc_history, label='val') plt.title('Classification accuracy history') plt.xlabel('Epoch') plt.ylabel('Clasification accuracy') plt.show()
def solver_test(): X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() data = { 'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'X_test': y_test, 'y_test': y_test } model = TwoLayerNet(reg=1e-1) solver = Solver(model, data, optim_config={ 'learning_rate': 1e-3, }, lr_decay=0.95, num_epochs=10, batch_size=100, print_every=100) solver.train() scores = model.loss(X_test) y_pred = np.argmax(scores, axis=1) acc = np.mean(y_pred == y_test) print("Test acc: {}".format(acc)) # Visualize training loss and train /val accuracy plt.subplot(2, 1, 1) plt.title('Training loss') plt.plot(solver.loss_history, 'o') plt.xlabel("Iteration") plt.subplot(2, 1, 2) plt.title("Accuracy") plt.plot(solver.train_acc_history, "-o", label="train") plt.plot(solver.val_acc_history, "-o", label="val") plt.plot([0.5] * len(solver.val_acc_history), 'k--') plt.xlabel("Epoch") plt.legend(loc="lower right") plt.gcf().set_size_inches(15, 12) plt.show()
def batchnorm_for_deep_networks(): X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() hidden_dims = [100, 100, 100, 100, 100] num_train = 1000 small_data = { 'X_train': X_train[:num_train], 'y_train': y_train[:num_train], 'X_val': X_val, 'y_val': y_val } weight_scale = 2e-2 bn_model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, use_batchnorm=True) model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, use_batchnorm=False) bn_solver = Solver(bn_model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=200) bn_solver.train() solver = Solver(model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=200) solver.train()
def neural_network_with_rms_and_adam(): X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() data = { 'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'X_test': y_test, 'y_test': y_test } num_train = 4000 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } solvers = {} learning_rates = {'rmsprop': 1e-4, 'adam': 1e-3} for update_rule in ['adam', 'rmsprop']: print('running with ', update_rule) model = FullyConnectedNet([100, 100, 100, 100, 100], weight_scale=5e-2) solver = Solver( model, small_data, num_epochs=5, batch_size=100, update_rule=update_rule, optim_config={'learning_rate': learning_rates[update_rule]}, verbose=True) solvers[update_rule] = solver solver.train() plt.subplot(3, 1, 1) plt.title('Training loss') plt.xlabel('Iteration') plt.subplot(3, 1, 2) plt.title('Training accuracy') plt.xlabel('Epoch') plt.subplot(3, 1, 3) plt.title('Validation accuracy') plt.xlabel('Epoch') for update_rule, solver in solvers.items(): plt.subplot(3, 1, 1) plt.plot(solver.loss_history, 'o', label=update_rule) plt.subplot(3, 1, 2) plt.plot(solver.train_acc_history, '-o', label=update_rule) plt.subplot(3, 1, 3) plt.plot(solver.val_acc_history, '-o', label=update_rule) for i in [1, 2, 3]: plt.subplot(3, 1, i) plt.legend(loc='upper center', ncol=4) plt.gcf().set_size_inches(15, 15) plt.show()
def sgd_momentum_test(): N, D = 4, 5 w = np.linspace(-0.4, 0.6, num=N * D).reshape(N, D) dw = np.linspace(-0.6, 0.4, num=N * D).reshape(N, D) v = np.linspace(0.6, 0.9, num=N * D).reshape(N, D) config = {'learning_rate': 1e-3, 'velocity': v} next_w, _ = sgd_momentum(w, dw, config=config) expected_next_w = np.asarray( [[0.1406, 0.20738947, 0.27417895, 0.34096842, 0.40775789], [0.47454737, 0.54133684, 0.60812632, 0.67491579, 0.74170526], [0.80849474, 0.87528421, 0.94207368, 1.00886316, 1.07565263], [1.14244211, 1.20923158, 1.27602105, 1.34281053, 1.4096]]) expected_velocity = np.asarray( [[0.5406, 0.55475789, 0.56891579, 0.58307368, 0.59723158], [0.61138947, 0.62554737, 0.63970526, 0.65386316, 0.66802105], [0.68217895, 0.69633684, 0.71049474, 0.72465263, 0.73881053], [0.75296842, 0.76712632, 0.78128421, 0.79544211, 0.8096]]) print("next_w error: {}".format(rel_error(next_w, expected_next_w))) print("velocity error: {}".format( rel_error(expected_velocity, config['velocity']))) # Train a six-layer network with both SGD and SGD+momentum. X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() data = { 'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'X_test': y_test, 'y_test': y_test } num_train = 4000 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } solvers = {} for update_rule in ['sgd', 'sgd_momentum']: print("Running with {}".format(update_rule)) model = FullyConnectedNet([100, 100, 100, 100, 100], weight_scale=5e-2) solver = Solver(model, small_data, num_epochs=5, batch_size=100, update_rule=update_rule, optim_config={'learning_rate': 1e-2}, verbose=True) solvers[update_rule] = solver solver.train() plt.subplot(3, 1, 1) plt.title('Training loss') plt.xlabel('Iteration') plt.subplot(3, 1, 2) plt.title('Training accuracy') plt.xlabel('Epoch') plt.subplot(3, 1, 3) plt.title('Validation accuracy') plt.xlabel('Epoch') for update_rule, solver in solvers.items(): plt.subplot(3, 1, 1) plt.plot(solver.loss_history, 'o', label=update_rule) plt.subplot(3, 1, 2) plt.plot(solver.train_acc_history, '-o', label=update_rule) plt.subplot(3, 1, 3) plt.plot(solver.val_acc_history, '-o', label=update_rule) for i in [1, 2, 3]: plt.subplot(3, 1, i) plt.legend(loc='upper center', ncol=4) plt.gcf().set_size_inches(15, 15)
def multilayer_network_test(): X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() data = { 'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'X_test': y_test, 'y_test': y_test } # Initial loss and gradient check # N, D, H1, H2, C = 2, 15, 20, 30, 10 # X = np.random.randn(N, D) # y = np.random.randint(C, size=(N, )) # print(X.shape) # for reg in [0, 3.14]: # print("Running check with reg={}".format(reg)) # model = FullyConnectedNet([H1, H2], # input_dim=D, # num_classes=C, # reg=reg, # weight_scale=5e-2, # dtype=np.float64) # loss, grads = model.loss(X, y) # print("Initial loss: {}".format(loss)) # for name in sorted(grads): # f = lambda _: model.loss(X, y)[0] # grad_num = eval_numerical_gradient(f, model.params[name], verbose=False, h=1e-5) # print("{} relative {}".format(name, rel_error(grad_num, grads[name]))) # As another sanity check (完整性检查), make sure you can overfit a smal dataset of 50 images. # First we will try a three-layer network with 100 units in each hidden layer. # You will need to tweak the learning rate and initialize scale, but you should # be able to overfit and achieve 100% training accuracy within 20 epoches. num_train = 50 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } ########################################################################## # weight_scale = 5e-2 # learning_rate = 1e-3 # model = FullyConnectedNet([100, 100], # weight_scale=weight_scale, # dtype=np.float64) # solver = Solver(model, # small_data, # print_every=10, # num_epochs=20, # batch_size=25, # update_rule='sgd', # optim_config={'learning_rate': learning_rate}) # solver.train() # plt.plot(solver.loss_history, 'o') # plt.title('Training loss history') # plt.xlabel('Iteration') # plt.ylabel('Training loss') # plt.show() ########################################################################## ########################################################################## # Grid Search # best_accurcy = 0.0 # best_solver = None # weight_scale = np.linspace(1e-3, 1e-2, 10) # learing_rate = np.linspace(1e-4, 1e-2, 100) # for w in weight_scale: # for l in learing_rate: # print("Training with weight_scale {} and learning_rate {}".format(w, l)) # model = FullyConnectedNet([100, 100], # weight_scale=w, # dtype=np.float64) # solver = Solver(model, # small_data, # print_every=10, # num_epochs=20, # batch_size=25, # update_rule='sgd', # optim_config={'learning_rate': l}) # solver.train() # if best_accurcy > solver.best_train_acc: # best_accurcy = solver.best_train_acc # best_solver = solver # plt.plot(solver.loss_history, 'o') # plt.title('Training loss history') # plt.xlabel('Iteration') # plt.ylabel('Training loss') # plt.show() ########################################################################## ########################################################################## # Five layer network learning_rate = 8e-4 weight_scale = 1e-1 model = FullyConnectedNet([100, 100, 100, 100], weight_scale=weight_scale, dtype=np.float64) solver = Solver(model, small_data, print_every=10, num_epochs=20, batch_size=25, update_rule='sgd', optim_config={'learning_rate': learning_rate}) solver.train() plt.plot(solver.loss_history, 'o') plt.title('Training loss history') plt.xlabel('Iteration') plt.ylabel('Training loss') plt.show()
def regularization_expriment(): """ We will train a pair of two-layer networks on 500 training examples: one will use no dropout, and one will use a dropout probability of 0.75. """ num_train = 500 X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() small_data = { 'X_train': X_train[:num_train], 'y_train': y_train[:num_train], 'X_val': X_val, 'y_val': y_val, 'X_test': y_test, 'y_test': y_test } solvers = {} dropout_choices = [0, 0.25, 0.5, 0.75, 0.8, 0.9, 0.99] for dropout in dropout_choices: model = FullyConnectedNet([500], weight_scale=5e-2, dropout=dropout) solver = Solver(model, small_data, num_epochs=25, batch_size=100, update_rule="adam", optim_config={ 'learning_rate': 5e-4, }, verbose=True, print_every=100) solver.train() solvers[dropout] = solver # Plot train and validation accuracies of the two models train_accs = [] val_accs = [] for dropout in dropout_choices: solver = solvers[dropout] train_accs.append(solver.train_acc_history[-1]) val_accs.append(solver.val_acc_history[-1]) plt.subplot(3, 1, 1) for dropout in dropout_choices: plt.plot(solvers[dropout].train_acc_history, 'o', label='%.2f dropout' % dropout) plt.title('Train accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend(ncol=2, loc='lower right') plt.subplot(3, 1, 2) for dropout in dropout_choices: plt.plot(solvers[dropout].val_acc_history, 'o', label='%.2f dropout' % dropout) plt.title('Val accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend(ncol=2, loc='lower right') plt.gcf().set_size_inches(15, 15) plt.show()
def three_layer_convnet_test(): # model = ThreeLayerConvNet() # N = 50 # X = np.random.randn(N, 3, 32, 32) # y = np.random.randint(10, size=N) # loss, grads = model.loss(X, y) # print('Initial loss (no regularization): {}'.format(loss)) # model.reg = 0.5 # loss, grads = model.loss(X, y) # print("Initial loss(with regularization: {}".format(loss)) # # Gradient check # num_inputs = 2 # input_dim = (3, 16, 16) # reg = 0.0 # num_classes = 10 # X = np.random.randn(num_inputs, *input_dim) # y = np.random.randint(num_classes, size=num_inputs) # model = ThreeLayerConvNet(num_filters=3, filter_size=3, # input_dim=input_dim, hidden_dim=7, # dtype=np.float64) # loss, grads = model.loss(X, y) # for param_name in sorted(grads): # f = lambda _: model.loss(X, y)[0] # param_grad_num = eval_numerical_gradient(f, model.params[param_name], verbose=False, h=1e-6) # e = rel_error(param_grad_num, grads[param_name]) # print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name]))) # Overfit small data X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() num_train = 100 small_data = { 'X_train': X_train[:num_train].transpose(0, 3, 1, 2), 'y_train': y_train[:num_train], 'X_val': X_val.transpose(0, 3, 1, 2), 'y_val': y_val } model = ThreeLayerConvNet(weight_scale=1e-2) solver = Solver(model, small_data, num_epochs=20, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 4e-4, }, verbose=True, print_every=1) solver.train() plt.subplot(2, 1, 1) plt.plot(solver.loss_history, 'o') plt.xlabel('iteration') plt.ylabel('loss') plt.subplot(2, 1, 2) plt.plot(solver.train_acc_history, '-o') plt.plot(solver.val_acc_history, '-o') plt.legend(['train', 'val'], loc='upper left') plt.xlabel('epoch') plt.ylabel('accuracy') plt.show()