Beispiel #1
0
            loss.backward()
            optimizer.step()
            minibatch_iter.set_postfix(loss=loss.item())


def test():
    model.eval()
    likelihood.eval()

    correct = 0
    with torch.no_grad(), num_likelihood_samples(16):
        for data, target in test_loader:
            if torch.cuda.is_available():
                data, target = data.cuda(), target.cuda()
            output = likelihood(model(data))  # This gives us 16 samples from the predictive distribution
            pred = output.probs.mean(0).argmax(-1)  # Taking the mean over all of the sample we've drawn
            correct += pred.eq(target.view_as(pred)).cpu().sum()
    print('Test set: Accuracy: {}/{} ({}%)'.format(
        correct, len(test_loader.dataset), 100. * correct / float(len(test_loader.dataset))
    ))


for epoch in range(1, n_epochs + 1):
    with use_toeplitz(False):
        train(epoch)
        test()
    scheduler.step()
    state_dict = model.state_dict()
    likelihood_state_dict = likelihood.state_dict()
    torch.save({'model': state_dict, 'likelihood': likelihood_state_dict}, 'dkl_cifar_checkpoint.dat')
Beispiel #2
0
            acq_value.item(),
            pred_rmse.item(),
            pred_avg_variance.item()
        ]
        print("Step RMSE: ", pred_rmse)
        all_outputs.append(step_output_list)

        start_ind = end_ind
        end_ind = int(end_ind + args.batch_size)

    output_dict = {
        "model_state_dict": model.cpu().state_dict(),
        "queried_points": {
            'x': model.cpu().train_inputs[0],
            'y': model.cpu().train_targets
        },
        "results": DataFrame(all_outputs)
    }
    torch.save(output_dict, args.output)


if __name__ == "__main__":
    args = parse()
    with fast_pred_var(True), \
            use_toeplitz(args.toeplitz), \
            detach_test_caches(True), \
            max_cholesky_size(args.cholesky_size), \
            max_root_decomposition_size(args.sketch_size), \
            root_pred_var(True):
        main(args)
Beispiel #3
0
def main(args):
    if args.cuda and torch.cuda.is_available():
        device = torch.device("cuda:0")
    else:
        device = torch.device("cpu")

    init_dict, train_dict, test_dict = prepare_data(args.data_loc,
                                                    args.num_init,
                                                    args.num_total,
                                                    test_is_year=False)
    init_x, init_y, init_y_var = (
        init_dict["x"].to(device),
        init_dict["y"].to(device),
        init_dict["y_var"].to(device),
    )
    train_x, train_y, train_y_var = (
        train_dict["x"].to(device),
        train_dict["y"].to(device),
        train_dict["y_var"].to(device),
    )
    test_x, test_y, test_y_var = (
        test_dict["x"].to(device),
        test_dict["y"].to(device),
        test_dict["y_var"].to(device),
    )

    model = FixedNoiseOnlineSKIGP(
        init_x,
        init_y.view(-1, 1),
        init_y_var.view(-1, 1),
        GridInterpolationKernel(
            base_kernel=ScaleKernel(
                MaternKernel(
                    ard_num_dims=2,
                    nu=0.5,
                    lengthscale_prior=GammaPrior(3.0, 6.0),
                ),
                outputscale_prior=GammaPrior(2.0, 0.15),
            ),
            grid_size=30,
            num_dims=2,
            grid_bounds=torch.tensor([[0.0, 1.0], [0.0, 1.0]]),
        ),
        learn_additional_noise=False,
    ).to(device)

    mll = BatchedWoodburyMarginalLogLikelihood(model.likelihood, model)

    print("---- Fitting initial model ----")
    start = time.time()
    with skip_logdet_forward(True), max_root_decomposition_size(
            args.sketch_size), use_toeplitz(args.toeplitz):
        fit_gpytorch_torch(mll, options={"lr": 0.1, "maxiter": 1000})
    end = time.time()
    print("Elapsed fitting time: ", end - start)

    model.zero_grad()
    model.eval()

    print("--- Generating initial predictions on test set ----")
    start = time.time()
    with detach_test_caches(True), max_root_decomposition_size(
            args.sketch_size), max_cholesky_size(
                args.cholesky_size), use_toeplitz(args.toeplitz):
        pred_dist = model(test_x)

        pred_mean = pred_dist.mean.detach()
        # pred_var = pred_dist.variance.detach()
    end = time.time()
    print("Elapsed initial prediction time: ", end - start)

    rmse_initial = ((pred_mean.view(-1) - test_y.view(-1))**2).mean().sqrt()
    print("Initial RMSE: ", rmse_initial.item())

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)

    mll_time_list = []
    rmse_list = []
    for i in range(500, train_x.shape[0]):
        model.zero_grad()
        model.train()

        start = time.time()
        with skip_logdet_forward(True), max_root_decomposition_size(
                args.sketch_size), max_cholesky_size(
                    args.cholesky_size), use_toeplitz(args.toeplitz):
            loss = -mll(model(train_x[:i]), train_y[:i]).sum()

        loss.backward()
        mll_time = start - time.time()

        optimizer.step()
        model.zero_grad()
        optimizer.zero_grad()
        start = time.time()
        with torch.no_grad():
            model.condition_on_observations(
                train_x[i].unsqueeze(0),
                train_y[i].view(1, 1),
                train_y_var[i].view(-1, 1),
                inplace=True,
            )
        fantasy_time = start - time.time()
        mll_time_list.append([-mll_time, -fantasy_time])

        if i % 25 == 0:
            start = time.time()
            model.eval()
            model.zero_grad()

            with detach_test_caches(), max_root_decomposition_size(
                    args.sketch_size), max_cholesky_size(args.cholesky_size):
                pred_dist = model(test_x)
            end = time.time()

            rmse = (((pred_dist.mean -
                      test_y.view(-1))**2).mean().sqrt().item())
            rmse_list.append([rmse, end - start])
            print("Current RMSE: ", rmse)
            print("Outputscale: ",
                  model.covar_module.base_kernel.raw_outputscale)
            print(
                "Lengthscale: ",
                model.covar_module.base_kernel.base_kernel.raw_lengthscale,
            )

            print("Step: ", i, "Train Loss: ", loss)
            optimizer.param_groups[0]["lr"] *= 0.9

    torch.save({
        "training": mll_time_list,
        "predictions": rmse_list
    }, args.output)
Beispiel #4
0
def main():
    parser = argparse.ArgumentParser(
        description='Deep Kernel Learning with synthetic data.')
    parser.add_argument('--datapath', type=str, help='Path to data directory.')
    parser.add_argument('--batchsize',
                        type=int,
                        default=10,
                        help='Batch size.')
    parser.add_argument('--n_epochs',
                        type=int,
                        default=10,
                        help='Number of epochs.')
    parser.add_argument('--lr',
                        type=float,
                        default=0.1,
                        help='Path to data directory.')
    parser.add_argument('--seed', type=int, default=42, help='Random seed.')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    traindata = Synthetic(args.datapath, 'train', download=True)
    train_loader = DataLoader(traindata, batch_size=args.batchsize)
    num_classes = len(np.unique(traindata.targets))

    testdata = Synthetic(args.datapath, 'test')
    test_loader = DataLoader(testdata, batch_size=args.batchsize)

    feature_extractor = ConvFeatureExtractor().to(device)
    num_features = feature_extractor._filter_sum

    model = DKLModel(feature_extractor, num_dim=5).to(device)
    likelihood = SoftmaxLikelihood(num_features=model.num_dim,
                                   n_classes=num_classes).to(device)

    optimizer = SGD([
        {
            'params': model.feature_extractor.parameters()
        },
        {
            'params': model.gp_layer.hyperparameters(),
            'lr': args.lr * 0.01
        },
        {
            'params': model.gp_layer.variational_parameters()
        },
        {
            'params': likelihood.parameters()
        },
    ],
                    lr=args.lr,
                    momentum=0.9,
                    nesterov=True,
                    weight_decay=0)

    scheduler = MultiStepLR(
        optimizer,
        milestones=[0.5 * args.n_epochs, 0.75 * args.n_epochs],
        gamma=0.1)

    for epoch in range(1, args.n_epochs + 1):
        scheduler.step()
        with settings.use_toeplitz(False), settings.max_preconditioner_size(0):
            train(epoch, train_loader, optimizer, likelihood, model, device)
            test(test_loader, likelihood, model, device)

        state_dict = model.state_dict()
        likelihood_state_dict = likelihood.state_dict()
        torch.save({
            'model': state_dict,
            'likelihood': likelihood_state_dict
        }, 'dkl_synthetic_checkpoint.dat')
Beispiel #5
0
def main_loop(flags):
    random.seed(flags.manual_seed)
    np.random.seed(flags.manual_seed)
    torch.manual_seed(flags.manual_seed)
    torch.cuda.manual_seed_all(flags.manual_seed)
    # Check if CUDA is available
    device_str = f"cuda:{flags.gpu}" if (torch.cuda.is_available()
                                         and flags.gpu > -1) else "cpu"
    flags.device = torch.device(device_str)
    print(flags)  # print the configuration
    # Construct model and all other necessary objects
    model, likelihood, mll, optimizer, train_ds, test_ds = construct_model(
        flags)
    print(f"Number of training samples: {len(train_ds)}")

    shuff = len(
        train_ds
    ) != flags.batch_size  # don't shuffle if each batch equals the dataset
    train_loader = torch.utils.data.DataLoader(train_ds,
                                               batch_size=flags.batch_size,
                                               shuffle=shuff)
    test_loader = torch.utils.data.DataLoader(test_ds,
                                              batch_size=flags.batch_size)

    # Set checkpoint path
    if flags.save_dir:
        save_dir = Path(flags.save_dir) / flags.model_name
        save_dir.mkdir(parents=True, exist_ok=True)
    else:
        save_dir = Path(mkdtemp())  # Create temporary directory

    best_loss = np.inf
    start_epoch = 1

    # Restore from checkpoint if one exists
    best_checkpoint = save_dir / "model_best.pth.tar"
    previous_checkpoints = list(save_dir.glob("checkpoint_*.pth.tar"))
    if previous_checkpoints:
        latest_chkpt = max(previous_checkpoints
                           )  # `max()` is here equivalent to `sorted(...)[-1]`
        print(f"===> Restoring from '{latest_chkpt}'")
        start_epoch, best_loss = utils.load_checkpoint(latest_chkpt, model,
                                                       likelihood, mll,
                                                       optimizer)

    print(f"Training for {flags.epochs} epochs")
    # Main training loop
    for epoch in range(start_epoch, start_epoch + flags.epochs):
        print(f"Training on epoch {epoch}")
        start = time.time()
        step_counter = (epoch - 1) * len(train_loader)
        with settings.use_toeplitz(device_str == "cpu"):
            # settings.fast_computations(covar_root_decomposition=False),\
            # settings.lazily_evaluate_kernels(state=False),\
            # settings.tridiagonal_jitter(1e-2),\
            # settings.max_cholesky_numel(4096),\
            # settings.max_preconditioner_size(10),\
            train(model, optimizer, train_loader, mll, step_counter, flags)
        end = time.time()
        print(f"Train time for epoch {epoch}: {end - start:0.2f}s")
        if epoch % flags.eval_epochs == 0:
            # do evaluation and update the best loss
            val_loss = evaluate(model, likelihood, test_loader, mll,
                                step_counter, flags)
            if flags.save_best and val_loss < best_loss:
                best_loss = val_loss
                print(f"Best loss yet. Saving in '{best_checkpoint}'")
                utils.save_checkpoint(best_checkpoint, model, likelihood, mll,
                                      optimizer, epoch, best_loss)

        if epoch % flags.chkpt_epochs == 0:
            # Save checkpoint
            chkpt_path = save_dir / f"checkpoint_{epoch:04d}.pth.tar"
            print(f"===> Saving checkpoint in '{chkpt_path}'")
            utils.save_checkpoint(chkpt_path, model, likelihood, mll,
                                  optimizer, epoch, best_loss)

    # if predictions are to be save or to be plotted, then make predictions on the test set
    if flags.preds_path or flags.plot:
        # print("Loading best model...")
        # utils.load_checkpoint(best_checkpoint, model, likelihood)
        print("Making predictions...")
        pred_mean, pred_var = predict(model, likelihood, test_loader,
                                      flags.device)
        utils.save_predictions(pred_mean, pred_var, save_dir, flags)
        if flags.plot:
            getattr(plot, flags.plot)(pred_mean, pred_var, train_ds, test_ds)
Beispiel #6
0
    for key in y_means:
        y_means[key] = y_means[key].cpu()

    output_dict = {
        "observations": {
            "x": train_x.cpu(),
            "y": train_y.cpu(),
            "means": y_means,
            "latent_y": latent_y.cpu(),
        },
        "results": DataFrame(all_outputs),
        "args": args
    }
    torch.save(output_dict, args.output)


if __name__ == "__main__":
    args = parse()
    use_fast_pred_var = True if not args.use_exact else False

    with use_toeplitz(args.toeplitz), max_cholesky_size(
        args.cholesky_size
    ), max_root_decomposition_size(args.sketch_size), cholesky_jitter(
        1e-3
    ), fast_pred_var(
        use_fast_pred_var 
    ), fast_pred_samples(
        True
    ):
        main(args)
Beispiel #7
0
    {'params': model.covar.parameters()},
    {'params': model.mean.parameters()},
    {'params': model.likelihood.parameters()},
], lr=0.01)

# "Loss" for GPs - the marginal log likelihood
mll = ExactMarginalLogLikelihood(likelihood, model)

training_iterations = 60


def train():
    iterator = tqdm(range(training_iterations))
    for i in iterator:
        # Zero backprop gradients
        optimizer.zero_grad()
        # Get output from model
        output = model(x_train)
        # Calc loss and backprop derivatives
        loss = -mll(output, y_train)
        loss.backward()
        iterator.set_postfix(loss=loss.item())
        optimizer.step()

train()

model.eval()
likelihood.eval()
with torch.no_grad(), use_toeplitz(False), fast_pred_var():
    preds = model(x_test)
    print('Test MAE: {}'.format(torch.mean(torch.abs(preds.mean - y_test))))