def overfit_small_data(model=None, epochs=10, num_train=20, verbose=True): data = get_CIFAR10_data(dir='datasets/cifar-10-batches-py') small_data = { 'X_train': data['X_train'][:num_train] / 127.0, 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'][:num_train] / 127.0, # batch size must be constant 'y_val': data['y_val'][:num_train], } if model is None: input_dim = small_data['X_train'].shape[1:] print input_dim # 32 - 16, 8, 4, 2 model = FlexNet(input_dim=input_dim, num_filters=(8, 8, 16, 16), hidden_dim=(100, )) model.print_params() print '\n--- Training a few epochs ---' solver = Solver(model, small_data, num_epochs=epochs, batch_size=np.minimum(50, num_train), update_rule='sgd', optim_config={ 'learning_rate': 1e-4, }, verbose=verbose, print_every=1) solver.train() print 'Train acc:', solver.train_acc_history[-1] return model
def over_fit_small_data(): data = get_CIFAR10_data() np.random.seed(231) num_train = 100 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } model = ThreeLayerConvNet(weight_scale=1e-2) solver = Solver(model, small_data, num_epochs=15, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=1) solver.train() plot_loss_acc_history(solver)
def run_batchsize_experiments(normalization_mode): # Load the (preprocessed) CIFAR10 data. data = get_CIFAR10_data() np.random.seed(231) hidden_dims = [100, 100, 100, 100, 100] num_train = 1000 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } n_epochs = 10 weight_scale = 2e-2 batch_sizes = [5, 10, 50] learning_rate = 10**(-3.5) solver_bsize = batch_sizes[0] print('No normalization: batch size = ', solver_bsize) model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, normalization=None) solver = Solver(model, small_data, num_epochs=n_epochs, batch_size=solver_bsize, update_rule='adam', optim_config={'learning_rate': learning_rate}, verbose=False) solver.train() bn_solvers = [] for i in range(len(batch_sizes)): b_size = batch_sizes[i] print('Normalization: batch size = ', b_size) bn_model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, normalization=normalization_mode) bn_solver = Solver(bn_model, small_data, num_epochs=n_epochs, batch_size=b_size, update_rule='adam', optim_config={'learning_rate': learning_rate}, verbose=False) bn_solver.train() bn_solvers.append(bn_solver) return bn_solvers, solver, batch_sizes
def train_net(): data = get_CIFAR10_data() model = ThreeLayerConvNet(weight_scale=0.001, hidden_dim=500, reg=0.001) solver = Solver(model, data, num_epochs=1, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=20) solver.train() visualize_filters(model)
def TwoLayerNetDemo(reg=0.0): data = get_CIFAR10_data(9000, 1000) model = TwoLayerNet(reg=reg) solver = Solver(model, data, update_rule='sgd', optim_config={'learning_rate': 1e-3, }, lr_decay=0.95, num_epochs=10, batch_size=100, print_every=100) solver.train() X_test = data['X_test'] y_test = data['y_test'] num_samples = y_test.shape[0] acc = solver.predict(X_test, y_test, num_samples) print ["Accuracy", acc]
def ThreeLayerConvNetDemo(batch_size=32, num_filters=9, use_batchnorm=False, weight_scale=1e-2, reg=0.0, update_rule='sgd'): data = get_CIFAR10_data(1000, 100) hidden_dims = [100, 50] model = ThreeLayerConvNet(num_filters=num_filters) solver = Solver(model, data, update_rule=update_rule, optim_config={'learning_rate': 1e-3, }, lr_decay=0.95, num_epochs=10, batch_size=batch_size, print_every=100) solver.train() X_test = data['X_test'][1:100] y_test = data['y_test'][1:100] num_samples = y_test.shape[0] acc = solver.predict(X_test, y_test, num_samples) print ["Accuracy", acc]
def FullyConnectedNetDemo(dropout=0.5, use_batchnorm=True, HeReLU=False, weight_scale=1e-2, reg=0.0, update_rule='adam', num_epochs=10): data = get_CIFAR10_data(19000, 1000) hidden_dims = [100, 50] model = FullyConnectedNet(hidden_dims=hidden_dims, weight_scale=weight_scale, use_batchnorm=use_batchnorm, HeReLU=False, reg=reg) solver = Solver(model, data, update_rule=update_rule, optim_config={'learning_rate': 1e-3, }, lr_decay=0.95, num_epochs=num_epochs, batch_size=100, print_every=100) solver.train() X_test = data['X_test'] y_test = data['y_test'] num_samples = y_test.shape[0] acc = solver.predict(X_test, y_test, num_samples) print ["Accuracy", acc]
def regularization_experiment(): data = get_CIFAR10_data() # Train two identical nets, one with dropout and one without np.random.seed(231) num_train = 500 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } solvers = {} dropout_choices = [1, 0.9, 0.75, 0.5, 0.25] for dropout in dropout_choices: model = FullyConnectedNet([500], dropout=dropout) print(dropout) solver = Solver(model, small_data, num_epochs=25, batch_size=100, update_rule='adam', optim_config={ 'learning_rate': 5e-4, }, verbose=True, print_every=100) solver.train() solvers[dropout] = solver # Plot train and validation accuracies of the two models train_accs = [] val_accs = [] for dropout in dropout_choices: solver = solvers[dropout] train_accs.append(solver.train_acc_history[-1]) val_accs.append(solver.val_acc_history[-1]) plt.subplot(3, 1, 1) for dropout in dropout_choices: plt.plot(solvers[dropout].train_acc_history, '-o', label='%.2f dropout' % dropout) plt.title('Train accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend(ncol=2, loc='lower right') plt.subplot(3, 1, 2) for dropout in dropout_choices: plt.plot(solvers[dropout].val_acc_history, '-o', label='%.2f dropout' % dropout) plt.title('Val accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend(ncol=2, loc='lower right') plt.gcf().set_size_inches(15, 20) plt.show()
predictions_train = predict(train_x, train_y, parameters) predictions_test = predict(test_x, test_y, parameters) #%% #%% # Cleaning up variables to prevent loading data multiple times (which may cause memory issue) try: del X_train, y_train del X_test, y_test print('Clear previously loaded data.') except: pass # X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data(num_training=9000, num_validation=1000, num_test=1000) train_x_orig = X_train train_y = y_train test_x_orig = X_test test_y = y_test #%% # Shuffle data, and use one small part m_train_take = 25000 m_test_take = 10000 permutation = list(np.random.permutation(train_x_orig.shape[0])) train_x_orig = train_x_orig[permutation, :] train_y = train_y[permutation] train_x_orig = train_x_orig[0:m_train_take, :]
def batch_normalization_and_initialization(): """ We will now run a small experiment to study the interaction of batch normalization and weight initialization. The first cell will train 8-layer networks both with and without batch normalization using different scales for weight initialization. The second layer will plot training accuracy, validation set accuracy, and training loss as a function of the weight initialization scale. """ # Load the (preprocessed) CIFAR10 data. data = get_CIFAR10_data() np.random.seed(231) # Try training a very deep net with batchnorm hidden_dims = [50, 50, 50, 50, 50, 50, 50] num_train = 1000 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } bn_solvers_ws = {} solvers_ws = {} weight_scales = np.logspace(-4, 0, num=20) for i, weight_scale in enumerate(weight_scales): print('Running weight scale %d / %d' % (i + 1, len(weight_scales))) bn_model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, normalization='batchnorm') model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, normalization=None) bn_solver = Solver(bn_model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={'learning_rate': 1e-3}, verbose=False, print_every=200) bn_solver.train() bn_solvers_ws[weight_scale] = bn_solver solver = Solver(model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={'learning_rate': 1e-3}, verbose=False, print_every=200) solver.train() solvers_ws[weight_scale] = solver # Plot results of weight scale experiment best_train_accs, bn_best_train_accs = [], [] best_val_accs, bn_best_val_accs = [], [] final_train_loss, bn_final_train_loss = [], [] for ws in weight_scales: best_train_accs.append(max(solvers_ws[ws].train_acc_history)) bn_best_train_accs.append(max(bn_solvers_ws[ws].train_acc_history)) best_val_accs.append(max(solvers_ws[ws].val_acc_history)) bn_best_val_accs.append(max(bn_solvers_ws[ws].val_acc_history)) final_train_loss.append(np.mean(solvers_ws[ws].loss_history[-100:])) bn_final_train_loss.append( np.mean(bn_solvers_ws[ws].loss_history[-100:])) """ semilogx半对数坐标函数:只有一个坐标轴是对数坐标另一个是普通算术坐标。 在下列情况下建议用半对数坐标: (1)变量之一在所研究的范围内发生了几个数量级的变化。 (2)在自变量由零开始逐渐增大的初始阶段,当自变量的少许变化引起因变量极大变化时, 此时采用半对数坐标纸,曲线最大变化范围可伸长,使图形轮廓清楚。 (3)需要将某种函数变换为直线函数关系。 """ plt.subplot(3, 1, 1) plt.title('Best val accuracy vs weight initialization scale') plt.xlabel('Weight initialization scale') plt.ylabel('Best val accuracy') plt.semilogx(weight_scales, best_val_accs, '-o', label='baseline') plt.semilogx(weight_scales, bn_best_val_accs, '-o', label='batchnorm') plt.legend(ncol=2, loc='lower right') plt.subplot(3, 1, 2) plt.title('Best train accuracy vs weight initialization scale') plt.xlabel('Weight initialization scale') plt.ylabel('Best training accuracy') plt.semilogx(weight_scales, best_train_accs, '-o', label='baseline') plt.semilogx(weight_scales, bn_best_train_accs, '-o', label='batchnorm') plt.legend(ncol=1, loc='upper right') plt.subplot(3, 1, 3) plt.title('Final training loss vs weight initialization scale') plt.xlabel('Weight initialization scale') plt.ylabel('Final training loss') plt.semilogx(weight_scales, final_train_loss, '-o', label='baseline') plt.semilogx(weight_scales, bn_final_train_loss, '-o', label='batchnorm') plt.legend(ncol=1, loc='lower left') plt.gca().set_ylim(1.0, 3.5) plt.gcf().set_size_inches(15, 15) plt.show()
def check_for_deep_network(): # Load the (preprocessed) CIFAR10 data. data = get_CIFAR10_data() for k, v in data.items(): print('%s: ' % k, v.shape) np.random.seed(231) # Try training a very deep net with batchnorm hidden_dims = [100, 100, 100, 100, 100] num_train = 1000 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'][:num_train], 'y_val': data['y_val'][:num_train] } weight_scale = 2e-2 reg = 0.01 bn_model = FullyConnectedNet(hidden_dims, reg=reg, weight_scale=weight_scale, normalization='batchnorm') model = FullyConnectedNet(hidden_dims, reg=reg, weight_scale=weight_scale, normalization=None) bn_solver = Solver(bn_model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={'learning_rate': 1e-3}, verbose=True, print_every=20) bn_solver.train() solver = Solver(model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={'learning_rate': 1e-3}, verbose=True, print_every=20) solver.train() plt.subplot(3, 1, 1) plot_training_history('Training loss', 'Iteration', solver, [bn_solver], lambda x: x.loss_history, bl_marker='o', bn_marker='o') plt.subplot(3, 1, 2) plot_training_history('Training accuracy', 'Epoch', solver, [bn_solver], lambda x: x.train_acc_history, bl_marker='-o', bn_marker='-o') plt.subplot(3, 1, 3) plot_training_history('Validation accuracy', 'Epoch', solver, [bn_solver], lambda x: x.val_acc_history, bl_marker='-o', bn_marker='-o') plt.show()
from cs231n.data_utils import get_CIFAR10_data from cs231n.classifiers.mycnn import CNN from cs231n.solver import Solver dataset = get_CIFAR10_data() train_data = { 'X_train': dataset['X_train'], 'y_train': dataset['y_train'], 'X_val': dataset['X_val'], 'y_val': dataset['y_val'], } model = CNN() solver = Solver(model, train_data, update_rule='adam', optim_config={ 'learning_rate': 0.001, }, lr_decay=0.95, num_epochs=50, batch_size=100, print_every=100) solver.train() solver.check_accuracy(dataset['X_test'], dataset['y_test'])
init_checkpoint = {'model': '', 'epoch': 0, 'best_val_acc': 0, 'best_params': '', 'best_val_acc': 0, 'loss_history': [], 'train_acc_history': [], 'val_acc_history': []} name = 'check_0' os.mkdir(os.path.join(folder, 'checkpoints', name)) joblib.dump(init_checkpoint, os.path.join( folder, 'checkpoints', name, name + '.pkl')) path = folder # Load the (preprocessed) CIFAR10 data. data = get_CIFAR10_data(DIR_CS231n) for k, v in data.iteritems(): print '%s: ' % k, v.shape print 'The parameters are: ' for key, value in conf.iteritems(): print key + ': ', value, ' \n' # Initialize the model instance model = ThreeLayerConvNet(input_dim=input_dim, num_filters=num_filters, filter_size=filter_size, hidden_dim=hidden_dim, num_classes=num_classes, weight_scale=weight_scale, reg=reg,
init_checkpoint = {'model': '', 'epoch': 0, 'best_val_acc': 0, 'best_params': '', 'best_val_acc': 0, 'loss_history': [], 'train_acc_history': [], 'val_acc_history': []} name = 'check_0' os.mkdir(os.path.join(folder, 'checkpoints', name)) joblib.dump(init_checkpoint, os.path.join( folder, 'checkpoints', name, name + '.pkl')) path = folder # Load the (preprocessed) CIFAR10 data. data = get_CIFAR10_data(DIR_CS231n) for k, v in data.iteritems(): print '%s: ' % k, v.shape print 'The parameters are: ' for key, value in conf.iteritems(): print key + ': ', value, ' \n' # Initialize the model instance model = FirstConvNet(input_dim=input_dim, num_filters=num_filters, filter_size=filter_size, hidden_dims=hidden_dims, num_classes=num_classes, weight_scale=weight_scale, reg=reg,
X_val -= mean_image X_test -= mean_image X_dev -= mean_image # third: append the bias dimension of ones (i.e. bias trick) so that our SVM # only has to worry about optimizing a single weight matrix W. X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) print X_train.shape, X_val.shape, X_test.shape, X_dev.shape # (49000, 3073) (1000, 3073) (1000, 3073) (500, 3073) # Invoke the above function to get our data. X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = get_CIFAR10_data() print 'Train data shape: ', X_train.shape print 'Train labels shape: ', y_train.shape print 'Validation data shape: ', X_val.shape print 'Validation labels shape: ', y_val.shape print 'Test data shape: ', X_test.shape print 'Test labels shape: ', y_test.shape print 'dev data shape: ', X_dev.shape print 'dev labels shape: ', y_dev.shape # => # Train data shape: (49000, 3073) # Train labels shape: (49000,) # Validation data shape: (1000, 3073) # Validation labels shape: (1000,) # Test data shape: (1000, 3073) # Test labels shape: (1000,)
# # input_dim = (X.shape[1], X.shape[2], X.shape[3]) # # model = ConvNet(num_filters=2, input_dim=input_dim, filter_size=5, hidden_dim=10, use_batchnorm=True, gradcheck=True) # # #model = ThreeLayerConvNet(num_filters=2, input_dim=input_dim, filter_size=5, hidden_dim=10) # # loss, grads = model.loss(X, y) # print 'Initial loss (no regularization): ', loss # # for name in sorted(grads): # f = lambda _: model.loss(X, y)[0] # grad_num = eval_numerical_gradient(f, model.params[name], verbose=False, h=1e-5) # print '%s relative error: %.2e' % (name, rel_error(grad_num, grads[name])) data = get_CIFAR10_data() #for k, v in data.iteritems(): # print '%s: ' % k, v.shape model = ConvNet(weight_scale=0.001, hidden_dim=500, reg=0) print solver = Solver(model, data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 5e-3, }, verbose=True, print_every=100)
plt.rcParams['image.cmap'] = 'gray' # for auto-reloading external modules # see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython #%load_ext autoreload #%autoreload 2 def rel_error(x, y): """ returns relative error """ return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) # Load the (preprocessed) CIFAR10 data. data = get_CIFAR10_data() for k, v in data.iteritems(): print '%s: ' % k, v.shape # Test the affine_forward function num_inputs = 2 input_shape = (4, 5, 6) output_dim = 3 input_size = num_inputs * np.prod(input_shape) weight_size = output_dim * np.prod(input_shape) x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape) w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape), output_dim)
X, y = make_data() # Build network model = Sequential(batch_shape=X.shape) model.add(Dense(num_neurons=10)) model.build(loss=Softmax()) # Forward + Backward loss, grads = model.loss(X, y) print '--- Loss sanity check ---' print loss # loss_sanity_check() # test.overfit_small_data(model, num_train=num_train, epochs=20) total_examples = 3 data = get_CIFAR10_data(dir='datasets/cifar-10-batches-py') X = data['X_train'][:total_examples, :, :8, :8] / 127.0 y = data['y_train'][:total_examples] model = Sequential(batch_shape=X.shape, weight_scale=1e-3, reg=0.0, dtype=np.float64) #model.add(ConvBnRelu(2)) #model.add(Pool(pool_factor=8)) model.add(Dense(num_neurons=10)) model.add(Dense(num_neurons=10)) model.build(loss=Softmax()) model.print_params() print '--- Train a few epochs ---' solver = Solver(model, {'X_train': X, 'y_train': y, 'X_val': X, 'y_val': y}, num_epochs=20, batch_size=3, update_rule='sgd',