Example #1
0
def test_2layer_net():
    params = init_toy_model()
    X, y = init_toy_data()
    Y_enc = ut.encode_labels(y)
    # Make the net
    layer_1 = layers.Linear(*params['W1'].T.shape,
                            reg='frob',
                            reg_param=0.05,
                            init_vals=(params['W1'].T, params['b1'].ravel()))
    act_1 = layers.Relu()
    layer_2 = layers.Linear(*params['W2'].T.shape,
                            reg='frob',
                            reg_param=0.05,
                            init_vals=(params['W2'].T, params['b2'].ravel()))
    net_2 = nn.Network([layer_1, act_1, layer_2], ls.CrossEntropy(),
                       optim.SGD(lr=1e-5))
    scores = net_2.forward(X)
    correct_scores = np.asarray([[-1.07260209, 0.05083871, -0.87253915],
                                 [-2.02778743, -0.10832494, -1.52641362],
                                 [-0.74225908, 0.15259725, -0.39578548],
                                 [-0.38172726, 0.10835902, -0.17328274],
                                 [-0.64417314, -0.18886813, -0.41106892]])
    diff = np.sum(np.abs(scores - correct_scores))
    assert (np.isclose(diff, 0.0, atol=1e-6))
    loss = net_2.loss(X, Y_enc)
    correct_loss = 1.071696123862817
    assert (np.isclose(loss, correct_loss, atol=1e-8))
Example #2
0
        def test_sample(filename):
            with open(filename, 'r') as o:
                lr = float(o.readline().strip())
                p, q, r = list(map(int, o.readline().split()))
                W = np.zeros([p, q])
                b = np.zeros([r])
                grads_flat = np.zeros([p*q + r])
                W_ans = np.zeros([p, q])
                b_ans = np.zeros([r])

                for i in range(p):
                    line = list(map(float, o.readline().split()))
                    W[i, :] = line
                line = list(map(float, o.readline().split()))
                b[:] = line
                line = list(map(float, o.readline().split()))
                grads_flat[:] = line
                for i in range(p):
                    line = list(map(float, o.readline().split()))
                    W_ans[i, :] = line
                line = list(map(float, o.readline().split()))
                b_ans[:] = line

            model = TestModel(W, b)
            optimizer = optim.SGD(model, lr)
            optimizer.update(grads_flat)

            self.assertTrue(np.isclose(model.params()[0], W_ans).all(), filename)
            self.assertTrue(np.isclose(model.params()[1], b_ans).all(), filename)
Example #3
0
def main(args):

    train_cfg = config_from_json(args.train_cfg)
    model_cfg = config_from_json(args.model_cfg)
    model_cfg.block_size = model_cfg.max_len // model_cfg.n_blocks
    set_seeds(train_cfg.seed)

    if model_cfg.projection not in ["dense", "cnn"]:
        if args.max_len == 0:
            model_cfg.reduced_max_len = model_cfg.max_len
        else:
            model_cfg.reduced_max_len = args.max_len
        if args.reduce_block_size:
            assert model_cfg.reduced_max_len % model_cfg.n_blocks == 0, "Reduced len cannot be divided by n_blocks"
            model_cfg.block_size = model_cfg.reduced_max_len // model_cfg.n_blocks
        else:
            assert model_cfg.reduced_max_len % model_cfg.block_size == 0, "Reduced len cannot be divided by initial block_size"
            model_cfg.n_blocks = model_cfg.reduced_max_len // model_cfg.block_size
        print("max_len:", model_cfg.reduced_max_len, "block_size:", model_cfg.block_size, "n_blocks:", model_cfg.n_blocks)
    else:
        if args.max_len != 0:
            warnings.warn("Projection is incompatible with a reduced max len, using default max_len")

    
    print("Loading dataset")
    (data, labels), criterion = get_data_and_optimizer_from_dataset(args.data_file, train_cfg.task)

    loader = GlueDataset(data, labels, train_cfg, model_cfg)
    model = BertInnerForSequenceClassification(model_cfg, loader.get_n_labels(), criterion)

    if train_cfg.optimizer == "lamb":
        if train_cfg.opt_level != "" and train_cfg.opt_level is not None:
            optimizer = apex.optimizers.FusedLAMB(model.parameters(), **train_cfg.optimizer_parameters)
        else:
            optimizer = torch_optimizer.Lamb(model.parameters(), **train_cfg.optimizer_parameters)

    elif train_cfg.optimizer == "radam":
        optimizer = torch_optimizer.RAdam(model.parameters(), **train_cfg.optimizer_parameters)
    elif train_cfg.optimizer == "sgd":
        optimizer = optim.SGD(model.parameters(), **train_cfg.optimizer_parameters)
    else:
        optimizer = optim4GPU(train_cfg, model)

    trainer = GlueTrainer(loader, model, optimizer, args.save_dir, get_device(), train_cfg.parallel)

    if args.load_model != "":
        print("Loading checkpoint")
        trainer.load_model(args.load_model, args.load_dataset_state)

    if not args.eval:
        trainer.train(train_cfg)
    else:
        trainer.eval(train_cfg)
Example #4
0
def train_framework(train_set,
                    val_set,
                    test_set,
                    epochs=1000,
                    mini_batch_size=50,
                    lr=0.001):

    # Turn autograd off
    torch.set_grad_enabled(False)

    # Net definition
    model = nn.Sequential(nn.Linear(2, 25, activation="relu"), F.ReLU(),
                          nn.Linear(25, 25, activation="relu"), F.ReLU(),
                          nn.Linear(25, 25, activation="relu"), F.ReLU(),
                          nn.Linear(25, 2, activation="relu"))

    # Training params
    opt = optim.SGD(lr, model)
    criterion = losses.LossMSE()

    # Train
    start_time = time.perf_counter()
    history = train_model(model,
                          train_set[0],
                          train_set[1],
                          val_set[0],
                          val_set[1],
                          criterion,
                          opt,
                          epochs,
                          mini_batch_size,
                          pytorch=False,
                          verbose=True)
    end_time = time.perf_counter()

    # Compute final accuracies
    train_acc = compute_accuracy(model,
                                 train_set[0],
                                 train_set[1],
                                 pytorch=False)
    test_acc = compute_accuracy(model, test_set[0], test_set[1], pytorch=False)
    print("\tTraining time : %s s" % (end_time - start_time))
    print("\tAccuracy : train_acc = %s \t test_acc = %s" %
          (train_acc, test_acc))

    return history, end_time - start_time, (train_acc, test_acc)
Example #5
0
def test_2layer_grad():
    params = init_toy_model()
    X, y = init_toy_data()
    Y_enc = ut.encode_labels(y)
    # Make the net
    layer_1 = layers.Linear(*params['W1'].T.shape,
                            reg='frob',
                            reg_param=0.05,
                            init_vals=(params['W1'].T, params['b1'].ravel()))
    act_1 = layers.Relu()
    layer_2 = layers.Linear(*params['W2'].T.shape,
                            reg='frob',
                            reg_param=0.05,
                            init_vals=(params['W2'].T, params['b2'].ravel()))
    net_2 = nn.Network([layer_1, act_1, layer_2], ls.CrossEntropy(),
                       optim.SGD(lr=1e-5))
    loss = net_2.loss(X, Y_enc)
    net_2.backward()

    def f_change_param(param_name, U):
        if param_name == 3:
            net_2.layers[0].params['b'] = U
        if param_name == 2:
            net_2.layers[0].params['W'] = U
        if param_name == 1:
            net_2.layers[2].params['b'] = U
        if param_name == 0:
            net_2.layers[2].params['W'] = U
        return net_2.loss(X, Y_enc)

    rel_errs = np.empty(4)
    for param_name in range(4):
        f = lambda U: f_change_param(param_name, U)
        if param_name == 3:
            pass_pars = net_2.layers[0].params['b']
        if param_name == 2:
            pass_pars = net_2.layers[0].params['W']
        if param_name == 1:
            pass_pars = net_2.layers[2].params['b']
        if param_name == 0:
            pass_pars = net_2.layers[2].params['W']
        param_grad_num = dutil.grad_check(f, pass_pars, epsilon=1e-5)
        rel_errs[param_name] = ut.rel_error(param_grad_num,
                                            net_2.grads[param_name])
    assert (np.allclose(rel_errs, np.zeros(4), atol=1e-7))
Example #6
0
        X_train, Y_train = generate_linear(n=100)
        X_test, Y_test = generate_linear(n=100)
    elif args.dataset == 'xor':
        X_train, Y_train = generate_XOR_easy()
        X_test, Y_test = generate_XOR_easy()
    else:
        raise RuntimeError('Dataset Not Found')

    net = Net()

    if args.criterion == 'mse':
        criterion = nn.MSE()
    elif args.criterion == 'crossentropy':
        criterion = nn.CrossEntropy()
    else:
        raise RuntimeError('Criterion Not Found')

    if args.optimizer == 'sgd':
        optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum)
    elif args.optimizer == 'adagrad':
        optimizer = optim.Adagrad(net.parameters(), lr=args.lr)
    else:
        raise RuntimeError('Optimizer Not Found')

    model = Model(net, criterion, optimizer)
    train_history = model.train(X_train, Y_train, epochs=args.epochs)
    test_history = model.test(X_test, Y_test)

    show_history(train_history)
    show_result(X_test, Y_test, test_history['predict'])
Example #7
0
def run():
    with open(args.cfg_path) as f:
        cfg = json.load(f)

    os.environ["CUDA_VISIBLE_DEVICES"] = args.device_ids
    num_GPU = len(args.device_ids.split(','))
    batch_size_train = cfg['train_batch_size'] * num_GPU
    batch_size_valid = cfg['test_batch_size'] * num_GPU
    num_workers = args.num_workers * num_GPU

    data_path = cfg['data_path_40']

    if cfg['image_size'] % cfg['patch_size'] != 0:
        raise Exception('Image size / patch size != 0 : {} / {}'.format(
            cfg['image_size'], cfg['patch_size']))

    patch_per_side = cfg['image_size'] // cfg['patch_size']
    grid_size = patch_per_side * patch_per_side
    model = MODELS[cfg['model']](num_classes=1,
                                 num_nodes=grid_size,
                                 use_crf=cfg['use_crf'])
    if args.resume:
        model = load_checkpoint(args, model)
    model = DataParallel(model, device_ids=None)
    model = model.to(device)

    loss_fn = nn.BCEWithLogitsLoss().to(device)
    optimizer = optim.SGD(model.parameters(),
                          lr=cfg['lr'],
                          momentum=cfg['momentum'],
                          weight_decay=1e-4,
                          l2_reg=False)

    summary_train = {
        'epoch': 0,
        'step': 0,
        'fp': 0,
        'tp': 0,
        'Neg': 0,
        'Pos': 0
    }
    summary_valid = {'loss': float('inf'), 'step': 0, 'acc': 0}
    summary_writer = SummaryWriter(log_path)
    loss_valid_best = float('inf')

    tumor_all = []
    paracancerous_all = []
    for epoch in range(args.start_epoch, args.end_epoch):

        dataset_train = GridImageDataset(data_path,
                                         cfg['json_path_train'],
                                         cfg['image_size'],
                                         cfg['patch_size'],
                                         cfg['crop_size'],
                                         rand_list=[])
        dataloader_train = DataLoader(dataset_train,
                                      batch_size=batch_size_train,
                                      num_workers=num_workers,
                                      drop_last=True,
                                      shuffle=True)

        dataset_valid = GridImageDataset(data_path,
                                         cfg['json_path_valid'],
                                         cfg['image_size'],
                                         cfg['patch_size'],
                                         cfg['crop_size'],
                                         way="valid")

        dataloader_valid = DataLoader(dataset_valid,
                                      batch_size=batch_size_valid,
                                      num_workers=num_workers,
                                      drop_last=True,
                                      shuffle=True)

        summary_train = train_epoch(epoch, summary_train, cfg, model, loss_fn,
                                    optimizer, dataloader_train)
        torch.save(
            {
                'epoch': summary_train['epoch'],
                'step': summary_train['step'],
                'state_dict': model.module.state_dict()
            }, (ckpt_path_save + '/' + str(epoch) + '.ckpt'))
        summary_writer.add_scalar('train/loss', summary_train['loss'], epoch)
        summary_writer.add_scalar('train/acc', summary_train['acc'], epoch)
        summary_writer.add_scalar('learning_rate', summary_train['lr'], epoch)
        summary_writer.add_scalar('train/Precision',
                                  summary_train['Precision'], epoch)
        summary_writer.add_scalar('train/Recall', summary_train['Recall'],
                                  epoch)
        summary_writer.add_scalar('train/F1', summary_train['F1'], epoch)

        if epoch % 2 == 0:

            summary_valid = valid_epoch(summary_valid, summary_writer, epoch,
                                        model, loss_fn, dataloader_valid)
            summary_writer.add_scalar('valid/loss', summary_valid['loss'],
                                      epoch)
            summary_writer.add_scalar('valid/acc', summary_valid['acc'], epoch)
            summary_writer.add_scalar('valid/Precision',
                                      summary_valid['Precision'], epoch)
            summary_writer.add_scalar('valid/Recall', summary_valid['Recall'],
                                      epoch)
            summary_writer.add_scalar('valid/F1', summary_valid['F1'], epoch)

        # summary_writer.add_scalar('learning_rate', lr, epoch)
        if summary_valid['loss'] < loss_valid_best:
            loss_valid_best = summary_valid['loss']

            torch.save(
                {
                    'epoch': summary_train['epoch'],
                    'step': summary_train['step'],
                    'state_dict': model.module.state_dict()
                }, os.path.join(ckpt_path_save, 'best.ckpt'))

    summary_writer.close()
Example #8
0
print("Generating dataset...")
nPoints = 1000
train, train_label = generate_disk_dataset(nPoints)
test, test_label = generate_disk_dataset(nPoints)

# Select model
print("Building the model...")
model = m.Sequential(m.Linear(2, 25), m.ReLU(), m.Linear(25, 25), m.ReLU(),
                     m.Linear(25, 25), m.ReLU(), m.Linear(25, 2))
#model = m.Sequential(m.Linear(2,25), m.ReLU(), m.Linear(25,2, bias = False))
#model = m.Sequential(m.Linear(2,128), m.ReLU(), m.Linear(128,2))
#model = m.Sequential(m.Linear(2,2))

# #Select optimizer
# optim = o.GradientDescent(0.04)
optim = o.SGD(10, 0.01)
# optim = o.SGDWithRepetition(153,0.05)

# #Select loss
loss = l.MSE()
# loss = l.CrossEntropyLoss()

# #Train the model and plot the train loss evolution
print("Training the model...")
v = optim.train_model(model, train, train_label, loss, n_epochs=100)
file = open('train_loss.out', 'w')
json.dump(v, file)
file.close()

fig = plt.figure()
plt.plot(