def check_dropout_fc_net():
    print_formatted('Fully connected net with dropout', 'bold', 'blue')

    np.random.seed(231)
    N, D, H1, H2, C = 2, 15, 20, 30, 10
    X = np.random.randn(N, D)
    y = np.random.randint(C, size=(N, ))

    print('Relative errors should be around e-6 or less.')
    print('It is fine if for dropout=1 you have W2 error on the order of e-5.')
    print()

    for dropout in [1, 0.75, 0.5]:
        print('Running check with dropout = ', dropout)
        model = FullyConnectedNet(input_dim=D,
                                  hidden_dims=[H1, H2],
                                  num_classes=C,
                                  weight_scale=5e-2,
                                  dropout=dropout,
                                  seed=123)

        loss, grads = model.loss(X, y)
        print('Initial loss: ', loss)

        for name in sorted(grads):
            f = lambda _: model.loss(X, y)[0]
            grad_num = evaluate_numerical_gradient(f,
                                                   model.params[name],
                                                   verbose=False,
                                                   h=1e-5)
            print('%s relative error: %.2e' %
                  (name, relative_error(grad_num, grads[name])))

        print()
Beispiel #2
0
def train_with_layernorm(plot=False):
    print_formatted('Layer normalization', 'stage')

    hidden_dims = [100, 100, 100, 100, 100]
    weight_scale = 2e-2

    num_train = 1000
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val,
    }

    print_formatted('without layernorm', 'bold', 'blue')
    model = FullyConnectedNet(input_dim=3072,
                              hidden_dims=hidden_dims,
                              num_classes=10,
                              weight_scale=weight_scale)
    solver = Solver(model,
                    small_data,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    num_epochs=10,
                    batch_size=50,
                    print_every=20)
    solver.train()
    print()

    print_formatted('with layernorm', 'bold', 'blue')
    ln_model = FullyConnectedNet(input_dim=3072,
                                 hidden_dims=hidden_dims,
                                 num_classes=10,
                                 weight_scale=weight_scale,
                                 normalization='layernorm')
    ln_solver = Solver(ln_model,
                       small_data,
                       update_rule='adam',
                       optim_config={
                           'learning_rate': 1e-3,
                       },
                       num_epochs=10,
                       batch_size=50,
                       print_every=20)
    ln_solver.train()

    if plot:
        plot_stats('loss',
                   'train_acc',
                   'val_acc',
                   solvers={
                       'baseline': solver,
                       'with_norm': ln_solver
                   },
                   filename='layernorm.png')
Beispiel #3
0
def overfit_small_data(plot=False):
    print_formatted('Overfitting small data', 'stage')

    num_train = 50
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val,
    }

    weight_scale = 3e-2
    learning_rate = 1e-3
    update_rule = 'adam'

    model = FullyConnectedNet(input_dim=3072,
                              hidden_dims=[100, 100],
                              num_classes=10,
                              weight_scale=weight_scale)
    solver = Solver(model,
                    small_data,
                    update_rule=update_rule,
                    optim_config={'learning_rate': learning_rate},
                    lr_decay=0.95,
                    num_epochs=20,
                    batch_size=25,
                    print_every=10)
    solver.train()

    if plot:
        plot_stats('loss',
                   solvers={'fc_net': solver},
                   filename='overfitting_loss_history.png')
Beispiel #4
0
def train_best_fc_model(plot=False):
    print_formatted('Best fully connected net', 'stage')

    hidden_dims = [100, 100, 100]
    weight_scale = 2e-2
    num_epochs = 10
    dropout = 1

    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': X_test,
        'y_test': y_test,
    }

    print_formatted('training', 'bold', 'blue')
    model = FullyConnectedNet(input_dim=3072,
                              hidden_dims=hidden_dims,
                              num_classes=10,
                              weight_scale=weight_scale,
                              normalization='batchnorm',
                              dropout=dropout)
    solver = Solver(model,
                    data,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    num_epochs=num_epochs,
                    batch_size=50,
                    print_every=100)
    solver.train()
    print()

    if plot: plot_stats('loss', 'train_val_acc', solvers={'best_fc': solver})

    print_formatted('evaluating', 'bold', 'blue')
    y_test_pred = np.argmax(model.loss(data['X_test']), axis=1)
    y_val_pred = np.argmax(model.loss(data['X_val']), axis=1)
    print('Validation set accuracy: ', (y_val_pred == data['y_val']).mean())
    print('Test set accuracy: ', (y_test_pred == data['y_test']).mean())
Beispiel #5
0
def compare_update_rules(plot=False):
    print_formatted('Update rules', 'stage')

    num_train = 4000
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val,
    }

    learning_rates = {
        'sgd': 1e-2,
        'sgd_momentum': 1e-2,
        'nesterov_momentum': 1e-2,
        'adagrad': 1e-4,
        'rmsprop': 1e-4,
        'adam': 1e-3
    }
    solvers = {}

    for update_rule in [
            'sgd', 'sgd_momentum', 'nesterov_momentum', 'adagrad', 'rmsprop',
            'adam'
    ]:
        print_formatted('running with ' + update_rule, 'bold', 'blue')
        model = FullyConnectedNet(input_dim=3072,
                                  hidden_dims=[100] * 5,
                                  num_classes=10,
                                  weight_scale=5e-2)

        solver = Solver(model,
                        small_data,
                        num_epochs=5,
                        batch_size=100,
                        update_rule=update_rule,
                        optim_config={
                            'learning_rate': learning_rates[update_rule],
                        },
                        verbose=True)
        solvers[update_rule] = solver
        solver.train()
        print()

    if plot:
        plot_stats('loss',
                   'train_acc',
                   'val_acc',
                   solvers=solvers,
                   filename='update_rules_comparison.png')
def check_batchnorm_fc_net():
    print_formatted('Fully connected net with batchnorm', 'bold', 'blue')

    np.random.seed(231)
    N, D, H1, H2, C = 2, 15, 20, 30, 10
    X = np.random.randn(N, D)
    y = np.random.randint(C, size=(N, ))

    print('Relative errors for W should be between 1e-4 ~ 1e-10.')
    print('Relative errors for b should be between 1e-8 ~ 1e-10.')
    print(
        'Relative errors for gammas and betas should be between 1e-8 ~ 1e-9.')
    print()

    for reg in [0, 3.14]:
        print('Running check with reg = ', reg)
        model = FullyConnectedNet(input_dim=D,
                                  hidden_dims=[H1, H2],
                                  num_classes=C,
                                  weight_scale=5e-2,
                                  reg=reg,
                                  normalization='batchnorm')

        loss, grads = model.loss(X, y)
        print('Initial loss: ', loss)

        for name in sorted(grads):
            f = lambda _: model.loss(X, y)[0]
            grad_num = evaluate_numerical_gradient(f,
                                                   model.params[name],
                                                   verbose=False,
                                                   h=1e-5)
            print('%s relative error: %.2e' %
                  (name, relative_error(grad_num, grads[name])))

        if reg == 0: print()
Beispiel #7
0
def train_with_dropout(plot=False):
    print_formatted('Dropout', 'stage')

    np.random.seed(231)
    num_train = 500
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val,
    }

    solvers = {}
    dropout_choices = [1, 0.25]
    for dropout in dropout_choices:
        if dropout == 1:
            print_formatted('without dropout, p = 1', 'bold', 'blue')
        else:
            print_formatted('with dropout, p = %.2f' % dropout, 'bold', 'blue')

        model = FullyConnectedNet(input_dim=3072,
                                  hidden_dims=[500],
                                  num_classes=10,
                                  dropout=dropout)

        solver = Solver(model,
                        small_data,
                        update_rule='adam',
                        optim_config={
                            'learning_rate': 5e-4,
                        },
                        num_epochs=25,
                        batch_size=100,
                        print_every=100)
        solver.train()
        solvers[dropout] = solver

        if dropout == 1: print()

    if plot:
        plot_stats('train_acc',
                   'val_acc',
                   solvers={
                       '1.00 dropout': solvers[1],
                       '0.25 dropout': solvers[0.25]
                   },
                   filename='dropout.png')
Beispiel #8
0
)
ax.set_xlim([-scale, scale])
ax.set_ylim([-scale, scale])
ax.set_axis_off()
fig.add_axes(ax)

plt.scatter(X[:, 0], X[:, 1], s=50, c='black')

if opt.k == 1:
    plt.quiver(X[:, 0], X[:, 1], T[:, 0], T[:, 1], scale=7.5, color='black')

plt.savefig(os.path.join(output_path, outfile), dpi=DPI)

gen = FullyConnectedNet(opt.latent_dim,
                        2,
                        hidden_dim=250,
                        num_hidden=3,
                        z1_circle_topology=True).to(dev)

disc = FullyConnectedNet(2, 1, hidden_dim=100, num_hidden=3)

disc_tan = FullyConnectedNet(2, 2, hidden_dim=50, num_hidden=2)

omega = DifferentialForm(opt.k,
                         2,
                         alpha=0.1,
                         zero_form=disc,
                         one_form=disc_tan,
                         downsampler=None,
                         d_small=2,
                         use_bound=True).to(dev)
Beispiel #9
0
    add_axis = False
    data_path = './data2/PaviaU.mat'
    label_path = './data2/PaviaU_gt.mat'

    seed_everything(random_seed)

    train_x, test_x, train_y, test_y = load_mat(data_path,
                                                label_path,
                                                test_rate=test_size)
    n_feature = train_x.shape[1]
    n_class = len(np.unique(train_y))
    train_x, test_x, train_y, test_y = prepare_batch_data(train_x,
                                                          test_x,
                                                          train_y,
                                                          test_y,
                                                          use_cuda=use_gpu,
                                                          add_axis=add_axis)

    model = FullyConnectedNet(n_feature=n_feature, n_output=n_class)
    if use_gpu:
        model.cuda()
    print(model)
    _, accuracy = train_model_and_predict_2(model,
                                            train_x,
                                            test_x,
                                            train_y,
                                            test_y,
                                            lr=lr,
                                            epochs=epoch)
    print(accuracy)