Beispiel #1
0
def train_epoch(model,
                loaders,
                criterion,
                optimizer,
                epoch,
                end_epoch,
                eval_freq=1,
                save_freq=10,
                output_dir='./',
                lr_init=0.01):

    time_ep = time.time()

    lr = training_utils.schedule(epoch, lr_init, end_epoch, swa=False)
    training_utils.adjust_learning_rate(optimizer, lr)
    train_res = training_utils.train_epoch(loaders["train"], model, criterion,
                                           optimizer)
    if (epoch == 0 or epoch % eval_freq == eval_freq - 1
            or epoch == end_epoch - 1):
        test_res = training_utils.eval(loaders["test"], model, criterion)
    else:
        test_res = {"loss": None, "accuracy": None}

    if (epoch + 1) % save_freq == 0:
        training_utils.save_checkpoint(
            output_dir,
            epoch + 1,
            state_dict=model.state_dict(),
            optimizer=optimizer.state_dict(),
        )

    time_ep = time.time() - time_ep
    values = [
        epoch + 1,
        lr,
        train_res["loss"],
        train_res["accuracy"],
        test_res["loss"],
        test_res["accuracy"],
        time_ep,
    ]
    table = tabulate.tabulate([values],
                              columns,
                              tablefmt="simple",
                              floatfmt="8.4f")
    if epoch % 40 == 0:
        table = table.split("\n")
        table = "\n".join([table[1]] + table)
    else:
        table = table.split("\n")[2]
    print(table)
Beispiel #2
0
    epochs = args.epochs,
    criterion = criterion,
    batch_size=args.batch_size,
    subspace_type=args.subspace, subspace_kwargs={'max_rank':args.max_num_models},
    momentum = args.momentum, wd=args.wd, lr_init=args.lr_init,
    swag_lr = args.swag_lr, swag_freq = 1, swag_start = args.swag_start,
    use_cuda = torch.cuda.is_available(), use_swag = args.swag,
    scale=args.scale, num_samples=args.num_samples,
    const_lr=args.no_schedule, double_bias_lr=False,
    model_variance=args.model_variance,
    **extra_args,
    input_dim=dataset.D, output_dim=output_dim, apply_var=args.noise_var, **model_cfg.kwargs
)

mname = args.model
if args.swag:
    mname = mname + args.subspace + args.inference

bb_args = argparse.Namespace(model=mname, dataset=args.dataset, split=args.split, seed=args.seed, database_path=args.database_path)

bb_result = run(bb_args, data=dataset, model=regression_model, is_test=args.database_path=='')
print(bb_result)

utils.save_checkpoint(
    args.dir,
    args.epochs,
    model_state_dict=regression_model.model.state_dict(),
    optimizer=regression_model.optimizer.state_dict(),
    result=bb_result
)
Beispiel #3
0
#printf=print
columns = ['ep', 'acc', 'loss', 'prior']

for epoch in range(args.epochs):
    train_res = utils.train_epoch(loaders['train'], proj_model, criterion,
                                  optimizer)
    values = [
        '%d/%d' % (epoch + 1, args.epochs), train_res['accuracy'],
        train_res['loss'], train_res['stats']['prior'],
        train_res['stats']['nll']
    ]
    if epoch == 0:
        printf(
            tabulate.tabulate([values],
                              columns,
                              tablefmt='simple',
                              floatfmt='8.4f'))
    else:
        printf(
            tabulate.tabulate([values],
                              columns,
                              tablefmt='plain',
                              floatfmt='8.4f').split('\n')[1])

print(utils.eval(loaders['test'], proj_model, criterion))

utils.save_checkpoint(args.dir,
                      epoch,
                      name='projected',
                      state_dict=proj_model.state_dict())
Beispiel #4
0
                      *model_cfg.args,
                      num_classes=num_classes,
                      **model_cfg.kwargs)
    swag_model.to(args.device)
    swag_model.load_state_dict(checkpoint["state_dict"])

columns = [
    "ep", "lr", "tr_loss", "tr_acc", "te_loss", "te_acc", "time", "mem_usage"
]
if args.swa:
    columns = columns[:-2] + ["swa_te_loss", "swa_te_acc"] + columns[-2:]
    swag_res = {"loss": None, "accuracy": None}

utils.save_checkpoint(
    args.dir,
    start_epoch,
    state_dict=model.state_dict(),
    optimizer=optimizer.state_dict(),
)

sgd_ens_preds = None
sgd_targets = None
n_ensembled = 0.0

for epoch in range(start_epoch, args.epochs):
    time_ep = time.time()

    if not args.no_schedule:
        lr = schedule(epoch)
        utils.adjust_learning_rate(optimizer, lr)
    else:
        lr = args.lr_init
Beispiel #5
0
    ]
    if epoch == 0:
        printf(
            tabulate.tabulate([values],
                              columns,
                              tablefmt='simple',
                              floatfmt='8.4f'))
    else:
        printf(
            tabulate.tabulate([values],
                              columns,
                              tablefmt='plain',
                              floatfmt='8.4f').split('\n')[1])

utils.save_checkpoint(args.dir,
                      epoch,
                      name='vi_rnvp',
                      state_dict=vi_model.state_dict())

if args.eval_ensemble:

    num_samples = 30

    predictions = np.zeros((len(loaders['test'].dataset), num_classes))
    targets = np.zeros(len(loaders['test'].dataset))

    printf, logfile = utils.get_logging_print(
        os.path.join(args.dir, args.log_fname + '-%s.txt'))
    print('Saving logs to: %s' % logfile)
    columns = ['iter ens', 'acc', 'nll']

    for i in range(num_samples):
Beispiel #6
0
        swa_state_dict = checkpoint["swa_state_dict"]
        if swa_state_dict is not None:
            swa_model.load_state_dict(swa_state_dict)
        swa_n_ckpt = checkpoint["swa_n"]
        if swa_n_ckpt is not None:
            swa_n = swa_n_ckpt

columns = ["ep", "lr", "tr_loss", "tr_acc", "te_loss", "te_acc", "time"]
if args.swa:
    columns = columns[:-1] + ["swa_te_loss", "swa_te_acc"] + columns[-1:]
    swa_res = {"loss": None, "accuracy": None}

utils.save_checkpoint(
    args.dir,
    start_epoch,
    state_dict=model.state_dict(),
    swa_state_dict=swa_model.state_dict() if args.swa else None,
    swa_n=swa_n if args.swa else None,
    optimizer=optimizer.state_dict(),
)

for epoch in range(start_epoch, args.epochs):
    time_ep = time.time()

    lr = schedule(epoch)
    utils.adjust_learning_rate(optimizer, lr)
    train_res = utils.train_epoch(loaders["train"], model, criterion,
                                  optimizer)
    if (epoch == 0 or epoch % args.eval_freq == args.eval_freq - 1
            or epoch == args.epochs - 1):
        test_res = utils.eval(loaders["test"], model, criterion)
    else:
Beispiel #7
0
                 targets=sgld_targets)

    if args.swag and (epoch + 1) >= args.ens_start and (
            epoch + 1 - args.ens_start) % args.swag_c_epochs == 0:
        swag_model.collect_model(model)
        if epoch == 0 or epoch % args.eval_freq == args.eval_freq - 1 or epoch == args.epochs - 1:
            swag_model.set_swa()
            utils.bn_update(loaders['train'], swag_model)
            swag_res = utils.eval(loaders['test'], swag_model, criterion)
        else:
            swag_res = {'loss': None, 'accuracy': None}

    if (epoch + 1) % args.save_freq == 0:
        utils.save_checkpoint(
            args.dir,
            epoch + 1,
            state_dict=model.state_dict(),
        )
        if args.swag and epoch + 1 >= args.ens_start:
            utils.save_checkpoint(
                args.dir,
                epoch + 1,
                name='swag',
                state_dict=swag_model.state_dict(),
            )

    time_ep = time.time() - time_ep
    memory_usage = torch.cuda.memory_allocated() / (1024.0**3)

    values = [
        epoch + 1, lr, train_res['loss'], train_res['accuracy'],
Beispiel #8
0
        print('Saving SWA model at epoch: ', epoch)
        swag_model.collect_model(model)

        if epoch % args.eval_freq is 0:
            swag_model.sample(0.0)
            bn_update(train_loader, swag_model)
            val_loss, val_err, val_iou = train_utils.test(
                swag_model, loaders['val'], criterion)
            print('SWA Val - Loss: {:.4f} | Acc: {:.4f} | IOU: {:.4f}'.format(
                val_loss, 1 - val_err, val_iou))

    ### Checkpoint ###
    if epoch % args.save_freq is 0:
        print('Saving model at Epoch: ', epoch)
        save_checkpoint(dir=args.dir,
                        epoch=epoch,
                        state_dict=model.state_dict(),
                        optimizer=optimizer.state_dict())
        if args.swa and (epoch + 1) > args.swa_start:
            save_checkpoint(
                dir=args.dir,
                epoch=epoch,
                name='swag',
                state_dict=swag_model.state_dict(),
            )

    if args.optimizer == 'RMSProp':
        ### Adjust Lr ###
        if epoch < args.ft_start:
            scheduler.step(epoch=epoch)
        else:
            scheduler.step(epoch=-1)  #reset to args.lr_init
    start_epoch = checkpoint['epoch']
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])

if args.swa and args.swa_resume is not None:
    checkpoint = torch.load(args.swa_resume)
    swag_model.load_state_dict(checkpoint['state_dict'])

columns = ['ep', 'lr', 'tr_loss', 'tr_acc', 'te_loss', 'te_acc', 'time', 'mem_usage']
if args.swa:
    columns = columns[:-2] + ['swa_te_loss', 'swa_te_acc'] + columns[-2:]
    swag_res = {'loss': None, 'accuracy': None}

utils.save_checkpoint(
    args.dir,
    start_epoch,
    state_dict=model.state_dict(),
    optimizer=optimizer.state_dict()
)

num_iterates = 0

for epoch in range(start_epoch, args.epochs):
    time_ep = time.time()

    if not args.no_schedule:
        lr = schedule(epoch)
        utils.adjust_learning_rate(optimizer, lr)
    else:
        lr = args.lr_init

    print('EPOCH %d. TRAIN' % (epoch + 1))
Beispiel #10
0
printf, logfile = utils.get_logging_print(os.path.join(args.dir, args.log_fname + '-%s.txt'))
print('Saving logs to: %s' % logfile)

nuts_kernel = NUTS(pyro_model.model, step_size=10.)

num_samples = 30

# x_, y_ = loaders["train"].dataset.tensors
mcmc_run = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=10).run(inpts, trgts)
#mcmc_run = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=100).run(islice(loaders["train"], 1000))
samples = torch.cat(list(mcmc_run.marginal(sites="t").support(flatten=True).values()), dim=-1)
print(samples)

utils.save_checkpoint(
    args.dir,
    0,
    name='nuts',
    state_dict=pyro_model.state_dict()
)


predictions = np.zeros((len(loaders['test'].dataset), num_classes))
targets = np.zeros(len(loaders['test'].dataset))

printf, logfile = utils.get_logging_print(os.path.join(args.dir, args.log_fname + '-%s.txt'))
print('Saving logs to: %s' % logfile)
columns = ['iter ens', 'acc', 'nll']

for i in range(num_samples):
    # utils.bn_update(loaders['train'], model, subset=args.bn_subset)
    pyro_model.eval()
    k = 0