def over_fit_small_data(): data = get_CIFAR10_data() np.random.seed(231) num_train = 100 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } model = ThreeLayerConvNet(weight_scale=1e-2) solver = Solver(model, small_data, num_epochs=15, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=1) solver.train() plot_loss_acc_history(solver)
def overfit_small_data(model=None, epochs=10, num_train=20, verbose=True): data = get_CIFAR10_data(dir='datasets/cifar-10-batches-py') small_data = { 'X_train': data['X_train'][:num_train] / 127.0, 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'][:num_train] / 127.0, # batch size must be constant 'y_val': data['y_val'][:num_train], } if model is None: input_dim = small_data['X_train'].shape[1:] print input_dim # 32 - 16, 8, 4, 2 model = FlexNet(input_dim=input_dim, num_filters=(8, 8, 16, 16), hidden_dim=(100, )) model.print_params() print '\n--- Training a few epochs ---' solver = Solver(model, small_data, num_epochs=epochs, batch_size=np.minimum(50, num_train), update_rule='sgd', optim_config={ 'learning_rate': 1e-4, }, verbose=verbose, print_every=1) solver.train() print 'Train acc:', solver.train_acc_history[-1] return model
def RunTwoLayerNet(): model = TwoLayerNet() solver = Solver(model, data, optim_config={'learning_rate': 1e-3,}, lr_decay=0.95, print_every=100) solver.train()
def RunFullyConnectedNet(): model = FullyConnectedNet([100, 50], dropout=0.5, use_batchnorm=True) #model = FullyConnectedNet([100, 50]) solver = Solver(model, data, optim_config={'learning_rate': 1e-3,}, lr_decay=0.95, print_every=100) solver.train()
def RunCnnNet(): model = ThreeLayerConvNet(reg=1e-2) solver = Solver(model, data, optim_config={ 'learning_rate': 1e-5, }, lr_decay=0.95, print_every=100) solver.train()
def RunFullyConnectedNet(): model = FullyConnectedNet([100, 50], dropout=0.5, use_batchnorm=True) solver = Solver(model, data, optim_config={ 'learning_rate': 1e-3, }, lr_decay=0.95, print_every=100) solver.train()
def RunTwoLayerNet(): model = TwoLayerNet() solver = Solver(model, data, optim_config={ 'learning_rate': 1e-3, }, lr_decay=0.95, print_every=100) solver.train()
def run_batchsize_experiments(normalization_mode): np.random.seed(231) # Try training a very deep net with batchnorm hidden_dims = [100, 100, 100, 100, 100] num_train = 1000 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } n_epochs = 10 weight_scale = 2e-2 batch_sizes = [5, 10, 50] lr = 10**(-3.5) solver_bsize = batch_sizes[0] print('No normalization: batch size = ', solver_bsize) model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, normalization=None) solver = Solver(model, small_data, num_epochs=n_epochs, batch_size=solver_bsize, update_rule='adam', optim_config={ 'learning_rate': lr, }, verbose=False) solver.train() bn_solvers = [] for i in range(len(batch_sizes)): b_size = batch_sizes[i] print('Normalization: batch size = ', b_size) bn_model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, normalization=normalization_mode) bn_solver = Solver(bn_model, small_data, num_epochs=n_epochs, batch_size=b_size, update_rule='adam', optim_config={ 'learning_rate': lr, }, verbose=False) bn_solver.train() bn_solvers.append(bn_solver) return bn_solvers, solver, batch_sizes
def RunCnnNet(): model = ThreeLayerConvNet(weight_scale=0.001, reg=0.001) solver = Solver(model, data, num_epochs=1, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=20) solver.train()
def TwoLayerNetDemo(reg=0.0): data = get_CIFAR10_data(9000, 1000) model = TwoLayerNet(reg=reg) solver = Solver(model, data, update_rule='sgd', optim_config={'learning_rate': 1e-3, }, lr_decay=0.95, num_epochs=10, batch_size=100, print_every=100) solver.train() X_test = data['X_test'] y_test = data['y_test'] num_samples = y_test.shape[0] acc = solver.predict(X_test, y_test, num_samples) print ["Accuracy", acc]
def test_mnist(num_epochs=60, batch_size=60, learning_rate=3e-3): X_train, y_train = get_mnist_data('mnist_train.csv', 50000) X_val, y_val = get_mnist_data('mnist_test.csv', 10000) hidden_dims = [100, 100, 100] # num_train = 48000 test_data = { 'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val } weight_scale = 2e-2 bn_model = FullyConnectedNet(hidden_dims, input_dim=1 * 784, weight_scale=weight_scale, use_batchnorm=True) bn_solver = Solver(bn_model, test_data, num_epochs=num_epochs, batch_size=batch_size, update_rule='sgd', optim_config={ 'learning_rate': learning_rate, }, verbose=True, print_every=400) step, train_accuracies, val_accuracies, loss = bn_solver.train() return bn_model, step, train_accuracies, val_accuracies, loss
def train_net(): data = get_CIFAR10_data() model = ThreeLayerConvNet(weight_scale=0.001, hidden_dim=500, reg=0.001) solver = Solver(model, data, num_epochs=1, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=20) solver.train() visualize_filters(model)
def ThreeLayerConvNetDemo(batch_size=32, num_filters=9, use_batchnorm=False, weight_scale=1e-2, reg=0.0, update_rule='sgd'): data = get_CIFAR10_data(1000, 100) hidden_dims = [100, 50] model = ThreeLayerConvNet(num_filters=num_filters) solver = Solver(model, data, update_rule=update_rule, optim_config={'learning_rate': 1e-3, }, lr_decay=0.95, num_epochs=10, batch_size=batch_size, print_every=100) solver.train() X_test = data['X_test'][1:100] y_test = data['y_test'][1:100] num_samples = y_test.shape[0] acc = solver.predict(X_test, y_test, num_samples) print ["Accuracy", acc]
def SMALL_CNN(): num_train = 100 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } model = ThreeLayerConvNet(weight_scale=1e-3) solver = Solver(model, small_data, num_epochs=10, update_rule='adam', optim_config={ 'learning_rate': 1e-4, }, verbose=True, print_every=20) solver.train()
def FullyConnectedNetDemo(dropout=0.5, use_batchnorm=True, HeReLU=False, weight_scale=1e-2, reg=0.0, update_rule='adam', num_epochs=10): data = get_CIFAR10_data(19000, 1000) hidden_dims = [100, 50] model = FullyConnectedNet(hidden_dims=hidden_dims, weight_scale=weight_scale, use_batchnorm=use_batchnorm, HeReLU=False, reg=reg) solver = Solver(model, data, update_rule=update_rule, optim_config={'learning_rate': 1e-3, }, lr_decay=0.95, num_epochs=num_epochs, batch_size=100, print_every=100) solver.train() X_test = data['X_test'] y_test = data['y_test'] num_samples = y_test.shape[0] acc = solver.predict(X_test, y_test, num_samples) print ["Accuracy", acc]
solver = Solver(model, data, num_epochs=1, batch_size=8, update_rule='adam', lr_decay=1, max_jitter=0, h5_file = 'croped2', flipOrNot=True, optim_config={ 'learning_rate': learning_rate, #1e-4 'beta2': 0.999 }, verbose=True, print_every=1000) solver.train() plt.subplot(2, 1, 1) plt.plot(solver.loss_history, 'o') plt.xlabel('iteration') plt.ylabel('loss') plt.subplot(2, 1, 2) plt.plot(solver.train_acc_history, '-o') plt.plot(solver.val_acc_history, '-o') plt.legend(['train', 'val'], loc='upper left') plt.xlabel('epoch') plt.ylabel('accuracy') #plt.show()
# # loss, grads = model.loss(X, y) # print 'Initial loss (no regularization): ', loss # # for name in sorted(grads): # f = lambda _: model.loss(X, y)[0] # grad_num = eval_numerical_gradient(f, model.params[name], verbose=False, h=1e-5) # print '%s relative error: %.2e' % (name, rel_error(grad_num, grads[name])) data = get_CIFAR10_data() #for k, v in data.iteritems(): # print '%s: ' % k, v.shape model = ConvNet(weight_scale=0.001, hidden_dim=500, reg=0) print solver = Solver(model, data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 5e-3, }, verbose=True, print_every=100) describe_solver(solver) print solver.train()
'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } model = ThreeLayerConvNet(weight_scale=1e-2) solver = Solver(model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=1) solver.train() # Plotting the loss, training accuracy, and validation accuracy should show clear overfitting: # In[16]: plt.subplot(2, 1, 1) plt.plot(solver.loss_history, 'o') plt.xlabel('iteration') plt.ylabel('loss') plt.subplot(2, 1, 2) plt.plot(solver.train_acc_history, '-o') plt.plot(solver.val_acc_history, '-o') plt.legend(['train', 'val'], loc='upper left')
normalization='batchnorm') model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, normalization=None) bn_solver = Solver(bn_model, small_data, num_epochs=1, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=20) bn_solver.train() solver = Solver(model, small_data, num_epochs=1, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=20) solver.train() # Run the following to visualize the results from two networks trained above. You should find that using batch normalization helps the network to converge much faster.
def regularization_experiment(): data = get_CIFAR10_data() # Train two identical nets, one with dropout and one without np.random.seed(231) num_train = 500 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } solvers = {} dropout_choices = [1, 0.9, 0.75, 0.5, 0.25] for dropout in dropout_choices: model = FullyConnectedNet([500], dropout=dropout) print(dropout) solver = Solver(model, small_data, num_epochs=25, batch_size=100, update_rule='adam', optim_config={ 'learning_rate': 5e-4, }, verbose=True, print_every=100) solver.train() solvers[dropout] = solver # Plot train and validation accuracies of the two models train_accs = [] val_accs = [] for dropout in dropout_choices: solver = solvers[dropout] train_accs.append(solver.train_acc_history[-1]) val_accs.append(solver.val_acc_history[-1]) plt.subplot(3, 1, 1) for dropout in dropout_choices: plt.plot(solvers[dropout].train_acc_history, '-o', label='%.2f dropout' % dropout) plt.title('Train accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend(ncol=2, loc='lower right') plt.subplot(3, 1, 2) for dropout in dropout_choices: plt.plot(solvers[dropout].val_acc_history, '-o', label='%.2f dropout' % dropout) plt.title('Val accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend(ncol=2, loc='lower right') plt.gcf().set_size_inches(15, 20) plt.show()
def main(): # Load the (preprocessed) CIFAR10 data. data = get_CIFAR10_raw_data() for k, v in data.iteritems(): print '%s: ' % k, v.shape # Get small data for finetuning small_data = get_small_data(data, 5000) # Network Architecture # {conv- [batch norm] - relu - pool} cnn_layer_1 = (64, 3, 1, 1) pool_layer_1 = (2, 2, 2) layer_1 = (cnn_layer_1, pool_layer_1) cnn_layer_2 = (128, 3, 1, 1) pool_layer_2 = (2, 2, 2) layer_2 = (cnn_layer_2, pool_layer_2) cnn_layer_3 = (256, 3, 1, 1) pool_layer_3 = (2, 2, 2) layer_3 = (cnn_layer_3, pool_layer_3) hidden_dims_CNN = (layer_1, layer_2, layer_3) # {affine - [batch norm] - relu - [dropout]} fc_layer_1 = 256 drop_layer_1 = 1 layer_1 = (fc_layer_1, drop_layer_1) fc_layer_2 = 128 drop_layer_2 = 1 layer_2 = (fc_layer_2, drop_layer_2) hidden_dims_FC = (layer_1, layer_2) num_classes = 10 model = ConvNet(input_dim=(3, 32, 32), hidden_dims_CNN=hidden_dims_CNN, hidden_dims_FC=hidden_dims_FC, num_classes=num_classes, weight_scale=1e-2, reg=0.001, dtype=np.float32) select_num_train_data = 0 test_weght_scale = 0 test_lr = 1 # Test how many data is enough for training if select_num_train_data == 1: num_train = (500, 1000, 5000, 10000) epoch = (20, 10, 2, 1) for i in range(0, len(num_train)): print 'num_train_data : %d' % (num_train[i]) small_data = get_small_data(data, num_train[i]) solver = Solver(model, small_data, num_epochs=epoch[i], batch_size=100, update_rule='sgd_momentum', optim_config={ 'learning_rate': 1e-3, }, verbose=False, print_every=20) solver.train() print 'num_train : %d, train_acc : %f, val_acc : %f' % ( num_train[i], solver.train_acc_history[-1], solver.val_acc_history[-1]) # Test settings of weight initialization if test_weght_scale == 1: weight_scale = (1e-2, 1e-3, -1) for i in range(0, len(weight_scale)): print 'weight_scale : %f' % (weight_scale[i]) model = ConvNet(input_dim=(3, 32, 32), hidden_dims_CNN=hidden_dims_CNN, hidden_dims_FC=hidden_dims_FC, num_classes=num_classes, weight_scale=weight_scale[i], reg=0.001, dtype=np.float32) solver = Solver(model, small_data, num_epochs=2, batch_size=100, update_rule='sgd_momentum', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=20) solver.train() print 'weight_scale : %f, train_acc : %f, val_acc : %f' % ( weight_scale[i], solver.train_acc_history[-1], solver.val_acc_history[-1]) if test_lr == 1: lr = (1e-2, 1e-3, 1e-4) for i in range(0, len(lr)): print 'lr : %f' % (lr[i]) model = ConvNet(input_dim=(3, 32, 32), hidden_dims_CNN=hidden_dims_CNN, hidden_dims_FC=hidden_dims_FC, num_classes=num_classes, weight_scale=-1, reg=0.001, dtype=np.float32) solver = Solver(model, small_data, num_epochs=10, batch_size=100, update_rule='sgd_momentum', optim_config={ 'learning_rate': lr[i], }, verbose=True, print_every=10) solver.train() print 'lr : %f, train_acc : %f, val_acc : %f' % ( lr[i], solver.train_acc_history[-1], solver.val_acc_history[-1])
def batch_normalization_and_initialization(): """ We will now run a small experiment to study the interaction of batch normalization and weight initialization. The first cell will train 8-layer networks both with and without batch normalization using different scales for weight initialization. The second layer will plot training accuracy, validation set accuracy, and training loss as a function of the weight initialization scale. """ # Load the (preprocessed) CIFAR10 data. data = get_CIFAR10_data() np.random.seed(231) # Try training a very deep net with batchnorm hidden_dims = [50, 50, 50, 50, 50, 50, 50] num_train = 1000 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } bn_solvers_ws = {} solvers_ws = {} weight_scales = np.logspace(-4, 0, num=20) for i, weight_scale in enumerate(weight_scales): print('Running weight scale %d / %d' % (i + 1, len(weight_scales))) bn_model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, normalization='batchnorm') model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, normalization=None) bn_solver = Solver(bn_model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={'learning_rate': 1e-3}, verbose=False, print_every=200) bn_solver.train() bn_solvers_ws[weight_scale] = bn_solver solver = Solver(model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={'learning_rate': 1e-3}, verbose=False, print_every=200) solver.train() solvers_ws[weight_scale] = solver # Plot results of weight scale experiment best_train_accs, bn_best_train_accs = [], [] best_val_accs, bn_best_val_accs = [], [] final_train_loss, bn_final_train_loss = [], [] for ws in weight_scales: best_train_accs.append(max(solvers_ws[ws].train_acc_history)) bn_best_train_accs.append(max(bn_solvers_ws[ws].train_acc_history)) best_val_accs.append(max(solvers_ws[ws].val_acc_history)) bn_best_val_accs.append(max(bn_solvers_ws[ws].val_acc_history)) final_train_loss.append(np.mean(solvers_ws[ws].loss_history[-100:])) bn_final_train_loss.append( np.mean(bn_solvers_ws[ws].loss_history[-100:])) """ semilogx半对数坐标函数:只有一个坐标轴是对数坐标另一个是普通算术坐标。 在下列情况下建议用半对数坐标: (1)变量之一在所研究的范围内发生了几个数量级的变化。 (2)在自变量由零开始逐渐增大的初始阶段,当自变量的少许变化引起因变量极大变化时, 此时采用半对数坐标纸,曲线最大变化范围可伸长,使图形轮廓清楚。 (3)需要将某种函数变换为直线函数关系。 """ plt.subplot(3, 1, 1) plt.title('Best val accuracy vs weight initialization scale') plt.xlabel('Weight initialization scale') plt.ylabel('Best val accuracy') plt.semilogx(weight_scales, best_val_accs, '-o', label='baseline') plt.semilogx(weight_scales, bn_best_val_accs, '-o', label='batchnorm') plt.legend(ncol=2, loc='lower right') plt.subplot(3, 1, 2) plt.title('Best train accuracy vs weight initialization scale') plt.xlabel('Weight initialization scale') plt.ylabel('Best training accuracy') plt.semilogx(weight_scales, best_train_accs, '-o', label='baseline') plt.semilogx(weight_scales, bn_best_train_accs, '-o', label='batchnorm') plt.legend(ncol=1, loc='upper right') plt.subplot(3, 1, 3) plt.title('Final training loss vs weight initialization scale') plt.xlabel('Weight initialization scale') plt.ylabel('Final training loss') plt.semilogx(weight_scales, final_train_loss, '-o', label='baseline') plt.semilogx(weight_scales, bn_final_train_loss, '-o', label='batchnorm') plt.legend(ncol=1, loc='lower left') plt.gca().set_ylim(1.0, 3.5) plt.gcf().set_size_inches(15, 15) plt.show()
def check_for_deep_network(): # Load the (preprocessed) CIFAR10 data. data = get_CIFAR10_data() for k, v in data.items(): print('%s: ' % k, v.shape) np.random.seed(231) # Try training a very deep net with batchnorm hidden_dims = [100, 100, 100, 100, 100] num_train = 1000 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'][:num_train], 'y_val': data['y_val'][:num_train] } weight_scale = 2e-2 reg = 0.01 bn_model = FullyConnectedNet(hidden_dims, reg=reg, weight_scale=weight_scale, normalization='batchnorm') model = FullyConnectedNet(hidden_dims, reg=reg, weight_scale=weight_scale, normalization=None) bn_solver = Solver(bn_model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={'learning_rate': 1e-3}, verbose=True, print_every=20) bn_solver.train() solver = Solver(model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={'learning_rate': 1e-3}, verbose=True, print_every=20) solver.train() plt.subplot(3, 1, 1) plot_training_history('Training loss', 'Iteration', solver, [bn_solver], lambda x: x.loss_history, bl_marker='o', bn_marker='o') plt.subplot(3, 1, 2) plot_training_history('Training accuracy', 'Epoch', solver, [bn_solver], lambda x: x.train_acc_history, bl_marker='-o', bn_marker='-o') plt.subplot(3, 1, 3) plot_training_history('Validation accuracy', 'Epoch', solver, [bn_solver], lambda x: x.val_acc_history, bl_marker='-o', bn_marker='-o') plt.show()
# (Iteration 20 / 20) loss: 0.649341 # (Epoch 10 / 10) train acc: 0.920000; val_acc: 0.237000 # Train small data set model = ThreeLayerConvNet(weight_scale=1e-2) solver = Solver(model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=1) solver.train() print('Train small data set') if do_plotting: plt.subplot(2, 1, 1) plt.plot(solver.loss_history, 'o') plt.xlabel('iteration') plt.ylabel('loss') plt.subplot(2, 1, 2) plt.plot(solver.train_acc_history, '-o') plt.plot(solver.val_acc_history, '-o') plt.legend(['train', 'val'], loc='upper left') plt.xlabel('epoch') plt.ylabel('accuracy') plt.show()
opcon = {'learning_rate': lr, 'momentum': mm}; ############################################################### """ NO USER INPUTS PAST THIS LINE """ ############################################################### # Initialize model and solver model = FancyNet(num_filters=nfilter, filter_sizes=sfilter, maxpools=mp, use_spatial_batchnorm=sbn, hidden_dims=hd, use_batchnorm=bn, reg=rg, weight_scale=ws); solver = Solver(model, data, num_epochs=ne, batch_size=bs, update_rule=uprule, optim_config=opcon, lr_decay=lrd, verbose=vb, print_every=pe); # Optimize the model (this is the part that takes a while) solver.train(); # Check if this beats the previous best accuracy: train_hist = solver.train_acc_history; val_hist = solver.val_acc_history; train_best = np.max(train_hist); val_best = np.max(val_hist); print 'Max training accuracy: ' + str(train_best); print 'Max validation accuracy: ' + str(val_best); if val_best > Best_val_acc: Best_model = model; Best_train_acc = train_best; Best_val_acc = val_best; print ' New record!'
def RunCnnNet(): model = ThreeLayerConvNet(reg = 1e-2) solver = Solver(model, data, optim_config={'learning_rate': 1e-5,}, lr_decay=0.95, print_every = 100) solver.train()