def check_dropout_fc_net(): print_formatted('Fully connected net with dropout', 'bold', 'blue') np.random.seed(231) N, D, H1, H2, C = 2, 15, 20, 30, 10 X = np.random.randn(N, D) y = np.random.randint(C, size=(N, )) print('Relative errors should be around e-6 or less.') print('It is fine if for dropout=1 you have W2 error on the order of e-5.') print() for dropout in [1, 0.75, 0.5]: print('Running check with dropout = ', dropout) model = FullyConnectedNet(input_dim=D, hidden_dims=[H1, H2], num_classes=C, weight_scale=5e-2, dropout=dropout, seed=123) loss, grads = model.loss(X, y) print('Initial loss: ', loss) for name in sorted(grads): f = lambda _: model.loss(X, y)[0] grad_num = evaluate_numerical_gradient(f, model.params[name], verbose=False, h=1e-5) print('%s relative error: %.2e' % (name, relative_error(grad_num, grads[name]))) print()
def train_with_layernorm(plot=False): print_formatted('Layer normalization', 'stage') hidden_dims = [100, 100, 100, 100, 100] weight_scale = 2e-2 num_train = 1000 small_data = { 'X_train': X_train[:num_train], 'y_train': y_train[:num_train], 'X_val': X_val, 'y_val': y_val, } print_formatted('without layernorm', 'bold', 'blue') model = FullyConnectedNet(input_dim=3072, hidden_dims=hidden_dims, num_classes=10, weight_scale=weight_scale) solver = Solver(model, small_data, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, num_epochs=10, batch_size=50, print_every=20) solver.train() print() print_formatted('with layernorm', 'bold', 'blue') ln_model = FullyConnectedNet(input_dim=3072, hidden_dims=hidden_dims, num_classes=10, weight_scale=weight_scale, normalization='layernorm') ln_solver = Solver(ln_model, small_data, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, num_epochs=10, batch_size=50, print_every=20) ln_solver.train() if plot: plot_stats('loss', 'train_acc', 'val_acc', solvers={ 'baseline': solver, 'with_norm': ln_solver }, filename='layernorm.png')
def overfit_small_data(plot=False): print_formatted('Overfitting small data', 'stage') num_train = 50 small_data = { 'X_train': X_train[:num_train], 'y_train': y_train[:num_train], 'X_val': X_val, 'y_val': y_val, } weight_scale = 3e-2 learning_rate = 1e-3 update_rule = 'adam' model = FullyConnectedNet(input_dim=3072, hidden_dims=[100, 100], num_classes=10, weight_scale=weight_scale) solver = Solver(model, small_data, update_rule=update_rule, optim_config={'learning_rate': learning_rate}, lr_decay=0.95, num_epochs=20, batch_size=25, print_every=10) solver.train() if plot: plot_stats('loss', solvers={'fc_net': solver}, filename='overfitting_loss_history.png')
def train_best_fc_model(plot=False): print_formatted('Best fully connected net', 'stage') hidden_dims = [100, 100, 100] weight_scale = 2e-2 num_epochs = 10 dropout = 1 data = { 'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'X_test': X_test, 'y_test': y_test, } print_formatted('training', 'bold', 'blue') model = FullyConnectedNet(input_dim=3072, hidden_dims=hidden_dims, num_classes=10, weight_scale=weight_scale, normalization='batchnorm', dropout=dropout) solver = Solver(model, data, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, num_epochs=num_epochs, batch_size=50, print_every=100) solver.train() print() if plot: plot_stats('loss', 'train_val_acc', solvers={'best_fc': solver}) print_formatted('evaluating', 'bold', 'blue') y_test_pred = np.argmax(model.loss(data['X_test']), axis=1) y_val_pred = np.argmax(model.loss(data['X_val']), axis=1) print('Validation set accuracy: ', (y_val_pred == data['y_val']).mean()) print('Test set accuracy: ', (y_test_pred == data['y_test']).mean())
def compare_update_rules(plot=False): print_formatted('Update rules', 'stage') num_train = 4000 small_data = { 'X_train': X_train[:num_train], 'y_train': y_train[:num_train], 'X_val': X_val, 'y_val': y_val, } learning_rates = { 'sgd': 1e-2, 'sgd_momentum': 1e-2, 'nesterov_momentum': 1e-2, 'adagrad': 1e-4, 'rmsprop': 1e-4, 'adam': 1e-3 } solvers = {} for update_rule in [ 'sgd', 'sgd_momentum', 'nesterov_momentum', 'adagrad', 'rmsprop', 'adam' ]: print_formatted('running with ' + update_rule, 'bold', 'blue') model = FullyConnectedNet(input_dim=3072, hidden_dims=[100] * 5, num_classes=10, weight_scale=5e-2) solver = Solver(model, small_data, num_epochs=5, batch_size=100, update_rule=update_rule, optim_config={ 'learning_rate': learning_rates[update_rule], }, verbose=True) solvers[update_rule] = solver solver.train() print() if plot: plot_stats('loss', 'train_acc', 'val_acc', solvers=solvers, filename='update_rules_comparison.png')
def check_batchnorm_fc_net(): print_formatted('Fully connected net with batchnorm', 'bold', 'blue') np.random.seed(231) N, D, H1, H2, C = 2, 15, 20, 30, 10 X = np.random.randn(N, D) y = np.random.randint(C, size=(N, )) print('Relative errors for W should be between 1e-4 ~ 1e-10.') print('Relative errors for b should be between 1e-8 ~ 1e-10.') print( 'Relative errors for gammas and betas should be between 1e-8 ~ 1e-9.') print() for reg in [0, 3.14]: print('Running check with reg = ', reg) model = FullyConnectedNet(input_dim=D, hidden_dims=[H1, H2], num_classes=C, weight_scale=5e-2, reg=reg, normalization='batchnorm') loss, grads = model.loss(X, y) print('Initial loss: ', loss) for name in sorted(grads): f = lambda _: model.loss(X, y)[0] grad_num = evaluate_numerical_gradient(f, model.params[name], verbose=False, h=1e-5) print('%s relative error: %.2e' % (name, relative_error(grad_num, grads[name]))) if reg == 0: print()
def train_with_dropout(plot=False): print_formatted('Dropout', 'stage') np.random.seed(231) num_train = 500 small_data = { 'X_train': X_train[:num_train], 'y_train': y_train[:num_train], 'X_val': X_val, 'y_val': y_val, } solvers = {} dropout_choices = [1, 0.25] for dropout in dropout_choices: if dropout == 1: print_formatted('without dropout, p = 1', 'bold', 'blue') else: print_formatted('with dropout, p = %.2f' % dropout, 'bold', 'blue') model = FullyConnectedNet(input_dim=3072, hidden_dims=[500], num_classes=10, dropout=dropout) solver = Solver(model, small_data, update_rule='adam', optim_config={ 'learning_rate': 5e-4, }, num_epochs=25, batch_size=100, print_every=100) solver.train() solvers[dropout] = solver if dropout == 1: print() if plot: plot_stats('train_acc', 'val_acc', solvers={ '1.00 dropout': solvers[1], '0.25 dropout': solvers[0.25] }, filename='dropout.png')
) ax.set_xlim([-scale, scale]) ax.set_ylim([-scale, scale]) ax.set_axis_off() fig.add_axes(ax) plt.scatter(X[:, 0], X[:, 1], s=50, c='black') if opt.k == 1: plt.quiver(X[:, 0], X[:, 1], T[:, 0], T[:, 1], scale=7.5, color='black') plt.savefig(os.path.join(output_path, outfile), dpi=DPI) gen = FullyConnectedNet(opt.latent_dim, 2, hidden_dim=250, num_hidden=3, z1_circle_topology=True).to(dev) disc = FullyConnectedNet(2, 1, hidden_dim=100, num_hidden=3) disc_tan = FullyConnectedNet(2, 2, hidden_dim=50, num_hidden=2) omega = DifferentialForm(opt.k, 2, alpha=0.1, zero_form=disc, one_form=disc_tan, downsampler=None, d_small=2, use_bound=True).to(dev)
add_axis = False data_path = './data2/PaviaU.mat' label_path = './data2/PaviaU_gt.mat' seed_everything(random_seed) train_x, test_x, train_y, test_y = load_mat(data_path, label_path, test_rate=test_size) n_feature = train_x.shape[1] n_class = len(np.unique(train_y)) train_x, test_x, train_y, test_y = prepare_batch_data(train_x, test_x, train_y, test_y, use_cuda=use_gpu, add_axis=add_axis) model = FullyConnectedNet(n_feature=n_feature, n_output=n_class) if use_gpu: model.cuda() print(model) _, accuracy = train_model_and_predict_2(model, train_x, test_x, train_y, test_y, lr=lr, epochs=epoch) print(accuracy)