Beispiel #1
0
    def fit(self, inputs, targets, num_epochs, test_dataset=None):
        streaming_state = self.gp.streaming
        self.gp.set_streaming(False)
        dataset = torch.utils.data.TensorDataset(inputs, targets)
        dataloader = torch.utils.data.DataLoader(dataset, batch_size=1024, shuffle=True)
        num_batches = len(dataloader)

        self.mll = VariationalELBO(self.gp.likelihood, self.gp, num_data=len(dataset), beta=1.0)
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, num_epochs, 1e-4)
        records = []
        for epoch in range(num_epochs):
            self.train()
            avg_loss = 0
            for input_batch, target_batch in dataloader:
                self.optimizer.zero_grad()
                train_dist = self(input_batch)
                target_batch = self._reshape_targets(target_batch)
                loss = -self.mll(train_dist, target_batch).mean()
                loss.backward()
                self.optimizer.step()
                avg_loss += loss.item() / num_batches
            lr_scheduler.step()

            rmse = nll = float('NaN')
            if test_dataset is not None:
                test_x, test_y = test_dataset[:]
                with torch.no_grad():
                    rmse, nll = self.evaluate(test_x, test_y)

            records.append(dict(train_loss=avg_loss, test_rmse=rmse, test_nll=nll,
                                noise=self.gp.likelihood.noise.mean().item(), epoch=epoch + 1))

        self.gp.set_streaming(streaming_state)
        self.eval()
        return records
    def _update_hyperparameters(self):

        # Find optimal model hyperparameters
        self.train()
        self.likelihood.train()

        # Use the adam optimizer
        optimizer = Adam(self.parameters(), lr=0.1)

        # "Loss" for GPs - the marginal log likelihood
        # num_data refers to the number of training datapoints
        mll = VariationalELBO(self.likelihood, self,
                              self.train_targets.numel())

        training_iterations = 50
        for i in range(training_iterations):
            # Zero backpropped gradients from previous iteration
            optimizer.zero_grad()
            # Get predictive output
            output = self(self.train_inputs[0])
            # Calc loss and backprop gradients
            loss = -mll(output, self.train_targets)
            loss.backward()
            # print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item()))
            optimizer.step()
 def __init__(self, likelihood, model, num_data, base_loss):
     super().__init__()
     self.mll = DeepApproximateMLL(
         VariationalELBO(likelihood=likelihood,
                         model=model,
                         num_data=num_data))
     self.base_loss = base_loss
Beispiel #4
0
 def _set_mll(self, num_data, mll_conf):
     """mllとしてself._mllの指示の元、インスタンスを立てるメソッド
     """
     # mllのインスタンスを立てる
     if self._mll in variationalelbo:
         return VariationalELBO(
             self.likelihood,
             self.model,
             num_data=num_data,
             **mll_conf
         )
     elif self._mll in predictiveloglikelihood:
         return PredictiveLogLikelihood(
             self.likelihood,
             self.model,
             num_data=num_data,
             **mll_conf
         )
     elif self._mll in gammarobustvariationalelbo:
         return GammaRobustVariationalELBO(
             self.likelihood,
             self.model,
             num_data=num_data,
             **mll_conf
         )
     else:
         raise ValueError(f'mll={self._mll}は用意されていません')
    def setUp(self):
        super().setUp()
        train_X = torch.rand(10, 1, device=self.device)
        train_y = torch.sin(train_X) + torch.randn_like(train_X) * 0.2

        self.model = SingleTaskVariationalGP(
            train_X=train_X, likelihood=GaussianLikelihood()
        ).to(self.device)

        mll = VariationalELBO(self.model.likelihood, self.model.model, num_data=10)
        loss = -mll(self.model.likelihood(self.model(train_X)), train_y).sum()
        loss.backward()
Beispiel #6
0
def train(model, train_loader, n_iter=50):
    num_data = train_loader.dataset.X.shape[0]
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
    elbo = DeepApproximateMLL(
        VariationalELBO(model.likelihood, model, num_data=num_data))
    for i in range(n_iter):
        for x, y in train_loader:
            if torch.cuda.is_available():
                x, y = x.cuda(), y.cuda()
            optimizer.zero_grad()
            output = model(x)
            loss = -elbo(output, y)
            loss.backward()
            optimizer.step()
        print("Iter={}\t Loss:{:.3f}".format(i, loss.item()))
 def optimize(self, X, y, epochs=500, lr=0.01, samples=10):
     opt = torch.optim.Adam([{'params': self.parameters()}], lr=lr)
     mll = DeepApproximateMLL(
         VariationalELBO(self.likelihood, self, X.shape[-2]))
     self.train()
     self.likelihood.train()
     lls = []
     gpytorch.settings.skip_posterior_variances(state=False)
     for i in range(epochs):
         with gpytorch.settings.num_likelihood_samples(samples):
             opt.zero_grad()
             output = self(X)
             loss = -mll(output, y)
             loss.backward()
             noise = self.likelihood.noise.item()
             print('Iter: %d/%d,   Loss: %.5f,   Likelihood noise: %.3f' %
                   (i + 1, epochs, loss.item(), noise))
             lls.append(loss.item())
             opt.step()
     return lls
Beispiel #8
0
        def initialize_model(X, Y, old_model=None, **kwargs):
            if old_model is None:
                if args.dim == 3:
                    wiski_grid_size = 10
                elif args.dim == 2:
                    wiski_grid_size = 30

                grid_list = create_grid([wiski_grid_size] * args.dim, grid_bounds=bounds)
                inducing_points = (
                    torch.stack([x.reshape(-1) for x in torch.meshgrid(grid_list)])
                    .t()
                    .contiguous()
                    .clone()
                )

                likelihood = GaussianLikelihood()
                model_base = VariationalGPModel(
                    inducing_points,
                    likelihood=likelihood,
                    beta=1.0,
                    learn_inducing_locations=True,
                )
                model_obj = ApproximateGPyTorchModel(
                    model_base, likelihood, num_outputs=1
                )
                model_base.train_inputs = [X]
                model_base.train_targets = Y.view(-1)

                # we don't implement fixednoiseGaussian likelihoods for the streaming setting
                if args.fixed_noise:
                    model_obj.likelihood.noise = args.noise ** 2
                    model_obj.likelihood.requires_grad = False
            else:
                model_obj = old_model
                model_obj.train_inputs = [X]
                model_obj.train_targets = Y.view(-1)

            mll = VariationalELBO(
                model_obj.likelihood, model_obj.model, num_data=X.shape[-2]
            )
            return model_obj, mll
Beispiel #9
0
    def _initialize_models(self, ds_train):
        """
        Function to initialize the feature extractor, GP, and the optimizer before training.
        """

        # Initialize Feature Extractor (Residual Net)
        self.feature_extractor = FCResNet(input_dim=self.input_dim,
                                          features=self.features,
                                          depth=self.depth,
                                          spectral_normalization=True,
                                          coeff=self.coeff,
                                          n_power_iterations=FC_N_POWER_ITERATIONS,
                                          dropout_rate=FC_DROPOUT_RATE,
                                          )
        initial_inducing_points, initial_lengthscale = initial_values_for_GP(
            ds_train, self.feature_extractor, self.n_inducing_points
        )

        # Initialize Gaussian Process
        gp = GP(
            num_outputs=self.num_outputs,
            initial_lengthscale=initial_lengthscale,
            initial_inducing_points=initial_inducing_points,
            kernel=self.kernel,
        )

        # Initialize the overall model Deep Kernel Learning  GP
        self.model = DKL_GP(self.feature_extractor, gp)

        # Classification task with two classes
        self.likelihood = SoftmaxLikelihood(num_classes=NUM_OUTPUTS, mixing_weights=False)
        self.loss_fn = VariationalELBO(self.likelihood, gp, num_data=len(ds_train))

        # Initialize models' optimizer
        parameters = [
            {"params": self.model.feature_extractor.parameters(), "lr": self.lr},
            {"params": self.model.gp.parameters(), "lr": self.lr},
            {"params": self.likelihood.parameters(), "lr": self.lr},
        ]

        self.optimizer = torch.optim.Adam(parameters, weight_decay=OPTIMIZER_WEIGHT_DECAY)
Beispiel #10
0
def train(epoch, train_loader, optimizer, likelihood, model, device):
    model.train()
    likelihood.train()

    mll = VariationalELBO(likelihood,
                          model.gp_layer,
                          num_data=len(train_loader.dataset))

    train_loss = 0.
    for idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = -mll(output, target)
        loss.backward()
        optimizer.step()
        if (idx + 1) % 25 == 0:
            current_loss = loss.item()
            print(
                f'Epoch: {epoch} [{idx+1}/{len(train_loader)}], Loss: {current_loss:.6f}'
            )
Beispiel #11
0
    def fit(self, inputs, targets, num_epochs, test_dataset=None):
        streaming_state = self.gp.streaming
        self.gp.set_streaming(False)
        dataset = torch.utils.data.TensorDataset(inputs, targets)
        dataloader = torch.utils.data.DataLoader(dataset,
                                                 batch_size=256,
                                                 shuffle=True)
        self.mll = VariationalELBO(self.gp.likelihood,
                                   self.gp,
                                   num_data=len(dataset),
                                   beta=1.0)
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            self.optimizer, num_epochs, 1e-4)
        records = []
        num_batches = len(dataloader)
        for epoch in range(num_epochs):
            self.train()
            avg_loss = 0
            for input_batch, target_batch in dataloader:
                self.optimizer.zero_grad()
                features = self.stem(input_batch)
                train_dist = self.gp(features)
                loss = -self.mll(train_dist, target_batch)
                loss.backward()
                self.optimizer.step()
                avg_loss += loss.item() / num_batches
            lr_scheduler.step()

            test_acc = float('NaN')
            if test_dataset is not None:
                test_x, test_y = test_dataset[:]
                with torch.no_grad():
                    test_pred = self.predict(test_x)
                test_acc = test_pred.eq(test_y).float().mean().item()

            records.append(
                dict(train_loss=avg_loss, test_acc=test_acc, epoch=epoch + 1))
        self.gp.set_streaming(streaming_state)
        self.eval()
        return records
    def test_variational_setUp(self):
        for dtype in [torch.float, torch.double]:
            train_X = torch.rand(10, 1, device=self.device, dtype=dtype)
            train_y = torch.randn(10, 3, device=self.device, dtype=dtype)

            for ty, num_out in [[train_y, 3], [train_y, 1], [None, 3]]:
                batched_model = SingleTaskVariationalGP(
                    train_X,
                    train_Y=ty,
                    num_outputs=num_out,
                    learn_inducing_points=False,
                ).to(self.device)
                mll = VariationalELBO(
                    batched_model.likelihood, batched_model.model, num_data=10
                )

                with torch.enable_grad():
                    loss = -mll(
                        batched_model.likelihood(batched_model(train_X)), train_y
                    ).sum()
                    loss.backward()

                # ensure that inducing points do not require grad
                model_var_strat = batched_model.model.variational_strategy
                self.assertEqual(
                    model_var_strat.base_variational_strategy.inducing_points.grad,
                    None,
                )

                # but that the covariance does have a gradient
                self.assertIsNotNone(
                    batched_model.model.covar_module.raw_outputscale.grad
                )

                # check that we always have three outputs
                self.assertEqual(batched_model._num_outputs, 3)
                self.assertIsInstance(
                    batched_model.likelihood, MultitaskGaussianLikelihood
                )
Beispiel #13
0
    def update(self, inputs, targets, update_stem=True):
        if self.gp.streaming:
            self.gp.register_streaming_loss()
        inputs = inputs.view(-1, self.stem.input_dim)
        targets = targets.view(-1, self.target_dim)
        self.mll = VariationalELBO(self.gp.likelihood, self.gp, num_data=inputs.size(0),
                                   beta=self._prior_beta)
        self.train()
        for _ in range(self.num_update_steps):
            self.optimizer.zero_grad()
            features = self._get_features(inputs)
            features = features if update_stem else features.detach()
            train_dist = self.gp(features)
            targets = self._reshape_targets(targets)
            loss = -self.mll(train_dist, targets).mean()
            loss.backward()
            self.optimizer.step()

        self.eval()
        self._raw_inputs = [torch.cat([*self._raw_inputs, inputs])]
        stem_loss = gp_loss = loss.item()
        return stem_loss, gp_loss
Beispiel #14
0
    def fit(self,
            train_data,
            holdout_data,
            objective='elbo',
            max_epochs=None,
            normalize=True,
            early_stopping=False,
            pretrain=False,
            reinit_inducing_loc=False,
            verbose=False,
            max_steps=None,
            **kwargs):
        """
        Train the model on `dataset` by maximizing either the `VariationalELBO` or `PredictiveLogLikelihood` objective.
        Args:
            train_data (tuple of np.array objects)
            holdout_data (tuple of np.array objects)
            objective (str): "pll" or "elbo"
            max_epochs (int): max number of epochs to train
            normalize (bool): if True, z-score inputs and targets
            early_stopping (bool): If True, use holdout loss as convergence criterion.
                                   Requires holdout_ratio > 0.
            pretrain (bool): If True, pretrain the feature extractor with the MSE objective.
                             Requires self.feature_dim == self.label_dim.
            reinit_inducing_loc (bool): If True, initialize inducing points with k-means.
            max_steps (int)
            verbose (bool)

        Return:
            metrics (dict)
        """
        train_data = torch.utils.data.TensorDataset(
            torch.tensor(train_data[0], dtype=torch.get_default_dtype()),
            torch.tensor(train_data[1], dtype=torch.get_default_dtype()))
        holdout_data = torch.utils.data.TensorDataset(
            torch.tensor(holdout_data[0], dtype=torch.get_default_dtype()),
            torch.tensor(holdout_data[1], dtype=torch.get_default_dtype()))

        if objective == 'elbo':
            obj_fn = VariationalELBO(self.likelihood,
                                     self,
                                     num_data=len(train_data))
        elif objective == 'pll':
            obj_fn = PredictiveLogLikelihood(self.likelihood,
                                             self,
                                             num_data=len(train_data),
                                             beta=1e-3)
        else:
            raise RuntimeError("unrecognized model objective")

        if holdout_data and early_stopping:
            val_x, val_y = holdout_data[:]
            eval_loss, eval_mse = self._get_val_metrics(
                obj_fn, torch.nn.MSELoss(), val_x, val_y)
        if eval_loss != eval_loss or not early_stopping:
            snapshot_loss = 1e6
        else:
            snapshot_loss = eval_loss
        snapshot = (0, snapshot_loss)

        if verbose:
            print(
                f"[ SVGP ] initial holdout loss: {eval_loss:.4f}, MSE: {eval_mse:.4f}"
            )
        self.load_state_dict(self._train_ckpt)

        if normalize:
            train_inputs, train_labels = train_data[:]
            self.input_mean, self.input_std = train_inputs.mean(
                0), train_inputs.std(0)
            self.label_mean, self.label_std = train_labels.mean(
                0), train_labels.std(0)
            train_data = TensorDataset(train_inputs,
                                       (train_labels - self.label_mean) /
                                       self.label_std)

        if pretrain:
            if self.feature_dim == self.label_dim:
                if verbose:
                    print("[ SVGP ] pretraining feature extractor")
                self.nn.fit(
                    dataset=train_data,
                    holdout_ratio=0.,
                    early_stopping=False,
                )
            else:
                raise RuntimeError(
                    "features and labels must be the same size to pretrain")

        if reinit_inducing_loc:
            if verbose:
                print(
                    "[ SVGP ] initializing inducing point locations w/ k-means"
                )
            train_inputs, _ = train_data[:]
            self.set_inducing_loc(train_inputs)

        if verbose:
            print(
                f"[ SVGP ] training w/ objective {objective} on {len(train_data)} examples"
            )
        optimizer = Adam(self.optim_param_groups)
        if reinit_inducing_loc:
            temp = self.max_epochs_since_update
            self.max_epochs_since_update = 8
            loop_metrics, snapshot = self._training_loop(
                train_data,
                holdout_data,
                optimizer,
                obj_fn,
                snapshot,
                max_epochs,
                early_stopping,
                max_steps,
            )
            metrics = loop_metrics
            self.max_epochs_since_update = temp
            if verbose:
                print("[ SVGP ] dropping learning rate")

        for group in optimizer.param_groups:
            group['lr'] /= 10
        loop_metrics, snapshot = self._training_loop(train_data, holdout_data,
                                                     optimizer, obj_fn,
                                                     snapshot, max_epochs,
                                                     early_stopping, max_steps)
        if reinit_inducing_loc:
            for key in metrics.keys():
                metrics[key] += (loop_metrics[key])
        else:
            metrics = loop_metrics

        self._train_ckpt = deepcopy(self.state_dict())
        self.load_state_dict(self._eval_ckpt)
        self.train()  # TODO investigate GPyTorch load_state_dict bug
        eval_loss, eval_mse = self._get_val_metrics(obj_fn, torch.nn.MSELoss(),
                                                    val_x, val_y)
        metrics['holdout_mse'] = eval_mse
        metrics['holdout_loss'] = eval_loss

        if verbose:
            print(
                f"[ SVGP ] holdout loss: {metrics['val_loss'][-1]:.4f}, MSE: {metrics['val_mse'][-1]:.4f}"
            )
            print(f"[ SVGP ] loading snapshot from epoch {snapshot[0]}")
            print(
                f"[ SVGP ] final holdout loss: {eval_loss:.4f}, MSE: {eval_mse:.4f}"
            )

        self.eval()
        return metrics
Beispiel #15
0
    # Setting shapes
    N = len(Y)
    data_dim = Y.shape[1]
    latent_dim = 12
    n_inducing = 25
    pca = True

    # Model
    model = My_GPLVM_Model(N, data_dim, latent_dim, n_inducing, pca=pca)

    # Likelihood
    likelihood = GaussianLikelihood(batch_shape=model.batch_shape)

    # Declaring objective to be optimised along with optimiser
    mll = VariationalELBO(likelihood, model, num_data=len(Y))

    optimizer = torch.optim.Adam([{
        'params': model.parameters()
    }, {
        'params': likelihood.parameters()
    }],
                                 lr=0.01)

    # Training loop - optimises the objective wrt kernel hypers, variational params and inducing inputs
    # using the optimizer provided.

    loss_list = []
    iterator = trange(10000, leave=True)
    batch_size = 100
    for i in iterator:
model = DeepGaussianProcess(x_train_shape=x_train.shape)
if torch.cuda.is_available():
    model = model.cuda()

# Because deep GPs use some amounts of internal sampling (even in the stochastic variational setting),
# we need to handle the objective function (e.g. the ELBO) in a slightly different way.
num_samples = 10

optimizer = torch.optim.Adam([{"params": model.parameters()}], lr=0.1)
'''
DeepApproximateMLL only adds the elbo losses of each layer!
'''

marginal_loglikelihood = DeepApproximateMLL(
    VariationalELBO(model.likelihood, model, x_train.shape[-2]))

n_epochs = 100
for i in range(n_epochs):

    for x_batch, y_batch in train_loader:
        with num_likelihood_samples(num_samples):
            optimizer.zero_grad()
            output = model(x_batch)
            loss = -marginal_loglikelihood(output, y_batch)
            loss.backward()
            optimizer.step()

    print(f"epochs {i}, loss {loss.item()}")

## test and evaluate the model
def main():
    # Seed the random number generators
    np.random.seed(0)
    torch.manual_seed(0)
    
    # Create some toy data
    n = 500
    x = np.sort(np.random.uniform(0, 1, n))
    f = true_f(x)
    y = scipy.stats.bernoulli.rvs(scipy.special.expit(f))
    
    ## Uncomment to show raw data
    # plt.scatter(x, y, alpha=0.5)
    # plt.xlabel('$x$')
    # plt.ylabel('$y$')
    # plt.yticks([0, 1])
    # plt.show()

    ## Uncomment to show logits ("f")
    # fig, ax = plt.subplots()
    # x_plot = np.linspace(0, 1, 100)
    # ax.plot(x_plot, true_f(x_plot), alpha=0.5)
    # ax.scatter(x, f, alpha=0.5)
    # plt.show()

    train_x = torch.from_numpy(x.astype(np.float32))
    train_y = torch.from_numpy(y.astype(np.float32))
    
    # Set initial inducing points
    inducing_points = torch.rand(50)

    # Initialize model and likelihood
    model = GPClassificationModel(inducing_points=inducing_points)
    likelihood = BernoulliLikelihood()
    
    # Set number of epochs
    training_iter = 1000

    # Use the adam optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    # num_data refers to the number of training datapoints
    mll = VariationalELBO(likelihood, model, train_y.numel())

    iterator = tqdm(range(training_iter))

    for _ in iterator:

        # Zero backpropped gradients from previous iteration
        optimizer.zero_grad()
        # Get predictive output
        output = model(train_x)
        # Calc loss and backprop gradients
        loss = -mll(output, train_y)
        loss.backward()

        optimizer.step()

        iterator.set_postfix(loss=loss.item())
    
    # Show results
    test_x = torch.linspace(0, 1, 101)
    f_preds = model(test_x)

    pred = f_preds.sample(torch.Size((1000,))).numpy()

    fig, ax = plt.subplots()
    plot_gp_dist(ax, pred, test_x)
    ax.plot(test_x, true_f(test_x), alpha=0.5)
    plt.show()
Beispiel #18
0
    def fit(self,
            inputs,
            targets,
            covariances=None,
            n_samples=5000,
            max_iter=10000,
            learning_rate=1e-3,
            rtol=1e-4,
            ntol=100,
            auto=True,
            verbose=True):
        '''
        Optimises the hyperparameters of the GP kernel and likelihood.
        inputs: (nx2) numpy array
        targets: (n,) numpy array
        n_samples: number of samples to take from the inputs/targets at every optimisation epoch
        max_iter: maximum number of optimisation epochs
        learning_rate: optimiser step size
        rtol: change between -MLL values over ntol epoch that determine termination if auto==True
        ntol: number of epochs required to maintain rtol in order to terminate if auto==True
        auto: if True terminate based on rtol and ntol, else terminate at max_iter
        verbose: if True show progress bar, else nothing
        '''

        # sanity
        assert inputs.shape[0] == targets.shape[0]
        assert inputs.shape[1] == 2
        if covariances is not None:
            assert covariances.shape[0] == inputs.shape[0]
            assert covariances.shape[1] == covariances.shape[2] == 2

        # inducing points randomly distributed over data
        indpts = np.random.choice(inputs.shape[0], self.m, replace=True)
        self.variational_strategy.inducing_points.data = torch.from_numpy(
            inputs[indpts]).to(self.device).float()

        # number of random samples
        n = inputs.shape[0]
        n = n_samples if n >= n_samples else n

        # objective
        mll = VariationalELBO(self.likelihood, self, n, combine_terms=True)

        # stochastic optimiser
        opt = torch.optim.Adam(self.parameters(), lr=learning_rate)

        # convergence criterion
        if auto:
            criterion = ExpMAStoppingCriterion(rel_tol=rtol, n_window=ntol)

        # episode iteratior
        epochs = range(max_iter)
        epochs = tqdm.tqdm(epochs) if verbose else epochs

        # train
        self.train()
        self.likelihood.train()
        for _ in epochs:

            # randomly sample from the dataset
            idx = np.random.choice(inputs.shape[0], n, replace=False)
            input = torch.from_numpy(inputs[idx]).to(self.device).float()

            # if the inputs are distributional, sample them
            if covariances is not None:
                covariance = torch.from_numpy(covariances[idx]).to(
                    self.device).float()
                input = MultivariateNormal(input, covariance).rsample()

            # training targets
            target = torch.from_numpy(targets[idx]).to(self.device).float()

            # compute loss, compute gradient, and update
            loss = -mll(self(input), target)
            opt.zero_grad()
            loss.backward()
            opt.step()

            # verbosity and convergence check
            if verbose:
                epochs.set_description('Loss {:.4f}'.format(loss.item()))
            if auto and criterion.evaluate(loss.detach()):
                break
Beispiel #19
0
if torch.cuda.is_available():
    model = model.cuda()
    likelihood = likelihood.cuda()

n_epochs = 5
lr = 0.1

optimizer = SGD([{'params': model.feature_extractor.parameters(), 'weight_decay': 1e-4},
                 {'params': model.gp_layer.hyperparameters(), 'lr': lr * 0.01},
                 {'params': model.variational_parameters()},
                 {'params': likelihood.parameters()}],
                lr=lr, momentum=0.9, nesterov=True, weight_decay=0)

scheduler = MultiStepLR(optimizer, milestones=[0.5 * n_epochs, 0.75 * n_epochs], gamma=0.1)

mll = VariationalELBO(likelihood, model.gp_layer, num_data=len(train_loader.dataset))


def train(epoch):
    model.train()
    likelihood.train()

    minibatch_iter = tqdm(train_loader, desc=f"(Epoch {epoch}) Minibatch")
    with num_likelihood_samples(8):
        for data, target in minibatch_iter:
            if torch.cuda.is_available():
                data, target = data.cuda(), target.cuda()
            optimizer.zero_grad()
            output = model(data)
            loss = -mll(output, target)
            loss.backward()
Beispiel #20
0
def main(args):
    if args.cuda and torch.cuda.is_available():
        device = torch.device("cuda:0")
    else:
        device = torch.device("cpu")

    init_dict, train_dict, test_dict = prepare_data(args.data_loc,
                                                    args.num_init,
                                                    args.num_total,
                                                    test_is_year=False,
                                                    seed=args.seed)
    init_x, init_y, init_y_var = (
        init_dict["x"].to(device),
        init_dict["y"].to(device),
        init_dict["y_var"].to(device),
    )
    train_x, train_y, train_y_var = (
        train_dict["x"].to(device),
        train_dict["y"].to(device),
        train_dict["y_var"].to(device),
    )
    test_x, test_y, test_y_var = (
        test_dict["x"].to(device),
        test_dict["y"].to(device),
        test_dict["y_var"].to(device),
    )

    likelihood = FixedNoiseGaussianLikelihood(noise=init_y_var)
    grid_pts = create_grid(grid_sizes=[30, 30],
                           grid_bounds=torch.tensor([[0., 1.], [0., 1.]]))
    induc_points = torch.cat(
        [x.reshape(-1, 1) for x in torch.meshgrid(grid_pts)], dim=-1)

    model = VariationalGPModel(
        inducing_points=induc_points,
        mean_module=gpytorch.means.ZeroMean(),
        covar_module=ScaleKernel(
            MaternKernel(
                ard_num_dims=2,
                nu=0.5,
                lengthscale_prior=GammaPrior(3.0, 6.0),
            ),
            outputscale_prior=GammaPrior(2.0, 0.15),
        ),
        streaming=True,
        likelihood=likelihood,
        beta=args.beta,
        learn_inducing_locations=args.learn_inducing,
    ).to(device)
    mll = VariationalELBO(model.likelihood,
                          model,
                          beta=args.beta,
                          num_data=args.num_init)

    print("---- Fitting initial model ----")
    start = time.time()
    model.train()
    model.zero_grad()
    optimizer = torch.optim.Adam(model.parameters(), lr=10 * args.lr_init)
    model, loss = fit_variational_model(mll,
                                        model,
                                        optimizer,
                                        init_x,
                                        init_y,
                                        maxiter=1000)
    end = time.time()
    print("Elapsed fitting time: ", end - start)

    print("--- Now computing initial RMSE")
    model.eval()
    with gpytorch.settings.skip_posterior_variances(True):
        test_pred = model(test_x)
        pred_rmse = ((test_pred.mean - test_y)**2).mean().sqrt()

    print("---- Initial RMSE: ", pred_rmse.item())

    all_outputs = []
    start_ind = init_x.shape[0]
    end_ind = int(start_ind + args.batch_size)

    current_x = init_x
    current_y = init_y
    current_y_var = init_y_var

    for step in range(args.num_steps):
        if step > 0 and step % 25 == 0:
            print("Beginning step ", step)

        total_time_step_start = time.time()

        if step > 0:
            print("---- Fitting model ----")
            start = time.time()
            model.train()
            model.zero_grad()
            model.likelihood = FixedNoiseGaussianLikelihood(current_y_var)
            mll = VariationalELBO(model.likelihood,
                                  model,
                                  beta=args.beta,
                                  num_data=args.num_init)
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=args.lr_init * 0.99**step)
            model, loss = fit_variational_model(mll,
                                                model,
                                                optimizer,
                                                current_x,
                                                current_y,
                                                maxiter=300)

            model.zero_grad()
            end = time.time()
            print("Elapsed fitting time: ", end - start)
            # print("Named parameters: ", list(model.named_parameters()))

        if args.acqf == "max_post_var" and not args.random:
            candidates, acq_value = generate_candidates(model,
                                                        args.batch_size,
                                                        device,
                                                        maxiter=300)
        elif args.acqf == "max_test_var" and not args.random:
            model.eval()
            vals, inds = model(test_x).variance.sort()
            acq_value = vals[-args.batch_size:].mean().detach()
            candidates = test_x[inds[-args.batch_size:]]
        else:
            candidates = torch.rand(args.batch_size,
                                    train_x.shape[-1],
                                    device=device,
                                    dtype=train_x.dtype)
            acq_value = torch.zeros(1)
            model.eval()
            _ = model(test_x[:10])  # to init caches

        print("---- Finished optimizing; now querying dataset ---- ")
        with torch.no_grad():
            covar_dists = model.covar_module(candidates, train_x)
            nearest_points = covar_dists.evaluate().argmax(dim=-1)
            new_x = train_x[nearest_points]
            new_y = train_y[nearest_points]
            new_y_var = train_y_var[nearest_points]

            todrop = torch.tensor(
                [x in nearest_points for x in range(train_x.shape[0])])
            train_x, train_y, train_y_var = train_x[~todrop], train_y[
                ~todrop], train_y_var[~todrop]
            print("New train_x shape", train_x.shape)
            print("--- Now updating model with simulator ----")
            current_x = torch.cat((current_x, new_x), dim=0)
            current_y = torch.cat((current_y, new_y), dim=0)
            current_y_var = torch.cat((current_y_var, new_y_var), dim=0)

        print("--- Now computing updated RMSE")
        model.eval()
        test_pred = model(test_x)
        pred_rmse = ((test_pred.mean.view(-1) -
                      test_y.view(-1))**2).mean().sqrt()
        pred_avg_variance = test_pred.variance.mean()

        total_time_step_elapsed_time = time.time() - total_time_step_start
        step_output_list = [
            total_time_step_elapsed_time,
            acq_value.item(),
            pred_rmse.item(),
            pred_avg_variance.item(),
            loss.item()
        ]
        print("Step RMSE: ", pred_rmse)
        all_outputs.append(step_output_list)

        start_ind = end_ind
        end_ind = int(end_ind + args.batch_size)

    output_dict = {
        "model_state_dict": model.cpu().state_dict(),
        "queried_points": {
            'x': current_x,
            'y': current_y
        },
        "results": DataFrame(all_outputs)
    }
    torch.save(output_dict, args.output)
NUM_OUTPUT_DIMS = 1  # (the output/input dimension between hidden layer/last layer)

# Control shapes so the DGP see correct input dimensions.
# x should have shape [num_training_points, dimension]
print("x.shape:", x.shape)  # [52, 1]
print("x.shape[0]:", x.shape[0])  # [52] num_training_points
print("x.shape[-1]:", x.shape[-1])  # [1] input dimension
print("x.shape[-2]:", x.shape[-2])  # [52] num_training_points

# Initialize model with the x.shape, requires data on form [num_training_points, dimension]
model = DGP(x.shape)
# Set up the optmizer, Adam is generally the best with adaptive learning rate etc.
opt = torch.optim.Adam([{'params': model.parameters()}], lr=LEARNING_RATE)
# Set up marginal likelihood approximation. Uses the DeepApproximateMLL wrapper.
# The VariationalELBO requires to known the number of num_training_points, i.e. 52.
mll = DeepApproximateMLL(VariationalELBO(model.likelihood, model, x.shape[-2]))

# Set model and likelihood in training mode. For some implementations the 'grad()'
# feature must be turned on and off depending on training or evaluation.
model.train()
model.likelihood.train()

lls = []  # to save loss

# Train by iteration and updating "weights" and parameters.
gpytorch.settings.skip_posterior_variances(
    state=False)  # this is defualt False I think, but just to be sure
for i in range(EPOCHS):
    # Train with certain amount of samples by specificing with SAMPLES, otherwise defualt 10 samples is used.
    with gpytorch.settings.num_likelihood_samples(SAMPLES):
        opt.zero_grad(
Beispiel #22
0
    # Declaring model with initial inducing inputs and latent prior

    latent_prior = NormalPrior(X_prior_mean, torch.ones_like(X_prior_mean))
    model = GPLVM(Y=Y.T,
                  latent_dim=n_latent_dims,
                  n_inducing=n_inducing,
                  X_init=None,
                  pca=True,
                  latent_prior=None,
                  kernel=None,
                  likelihood=None)

    # Declaring objective to be optimised along with optimiser

    mll = VariationalELBO(model.likelihood, model, num_data=len(Y.T))

    optimizer = torch.optim.Adam([
        {
            'params': model.parameters()
        },
    ], lr=0.01)

    # Training loop

    losses = model.run(mll, optimizer, steps=2000)

    # Plot result

    plt.figure(figsize=(8, 6))
    colors = plt.get_cmap("tab10").colors[::]
Beispiel #23
0
def main(hparams):
    results_dir = get_results_directory(hparams.output_dir)
    writer = SummaryWriter(log_dir=str(results_dir))

    ds = get_dataset(hparams.dataset, root=hparams.data_root)
    input_size, num_classes, train_dataset, test_dataset = ds

    hparams.seed = set_seed(hparams.seed)

    if hparams.n_inducing_points is None:
        hparams.n_inducing_points = num_classes

    print(f"Training with {hparams}")
    hparams.save(results_dir / "hparams.json")

    if hparams.ard:
        # Hardcoded to WRN output size
        ard = 640
    else:
        ard = None

    feature_extractor = WideResNet(
        spectral_normalization=hparams.spectral_normalization,
        dropout_rate=hparams.dropout_rate,
        coeff=hparams.coeff,
        n_power_iterations=hparams.n_power_iterations,
        batchnorm_momentum=hparams.batchnorm_momentum,
    )

    initial_inducing_points, initial_lengthscale = initial_values_for_GP(
        train_dataset, feature_extractor, hparams.n_inducing_points
    )

    gp = GP(
        num_outputs=num_classes,
        initial_lengthscale=initial_lengthscale,
        initial_inducing_points=initial_inducing_points,
        separate_inducing_points=hparams.separate_inducing_points,
        kernel=hparams.kernel,
        ard=ard,
        lengthscale_prior=hparams.lengthscale_prior,
    )

    model = DKL_GP(feature_extractor, gp)
    model = model.cuda()

    likelihood = SoftmaxLikelihood(num_classes=num_classes, mixing_weights=False)
    likelihood = likelihood.cuda()

    elbo_fn = VariationalELBO(likelihood, gp, num_data=len(train_dataset))

    parameters = [
        {"params": feature_extractor.parameters(), "lr": hparams.learning_rate},
        {"params": gp.parameters(), "lr": hparams.learning_rate},
        {"params": likelihood.parameters(), "lr": hparams.learning_rate},
    ]

    optimizer = torch.optim.SGD(
        parameters, momentum=0.9, weight_decay=hparams.weight_decay
    )

    milestones = [60, 120, 160]

    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=milestones, gamma=0.2
    )

    def step(engine, batch):
        model.train()
        likelihood.train()

        optimizer.zero_grad()

        x, y = batch
        x, y = x.cuda(), y.cuda()

        y_pred = model(x)
        elbo = -elbo_fn(y_pred, y)

        elbo.backward()
        optimizer.step()

        return elbo.item()

    def eval_step(engine, batch):
        model.eval()
        likelihood.eval()

        x, y = batch
        x, y = x.cuda(), y.cuda()

        with torch.no_grad():
            y_pred = model(x)

        return y_pred, y

    trainer = Engine(step)
    evaluator = Engine(eval_step)

    metric = Average()
    metric.attach(trainer, "elbo")

    def output_transform(output):
        y_pred, y = output

        # Sample softmax values independently for classification at test time
        y_pred = y_pred.to_data_independent_dist()

        # The mean here is over likelihood samples
        y_pred = likelihood(y_pred).probs.mean(0)

        return y_pred, y

    metric = Accuracy(output_transform=output_transform)
    metric.attach(evaluator, "accuracy")

    metric = Loss(lambda y_pred, y: -elbo_fn(y_pred, y))
    metric.attach(evaluator, "elbo")

    kwargs = {"num_workers": 4, "pin_memory": True}

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=hparams.batch_size,
        shuffle=True,
        drop_last=True,
        **kwargs,
    )

    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=512, shuffle=False, **kwargs
    )

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_results(trainer):
        metrics = trainer.state.metrics
        elbo = metrics["elbo"]

        print(f"Train - Epoch: {trainer.state.epoch} ELBO: {elbo:.2f} ")
        writer.add_scalar("Likelihood/train", elbo, trainer.state.epoch)

        if hparams.spectral_normalization:
            for name, layer in model.feature_extractor.named_modules():
                if isinstance(layer, torch.nn.Conv2d):
                    writer.add_scalar(
                        f"sigma/{name}", layer.weight_sigma, trainer.state.epoch
                    )

        if not hparams.ard:
            # Otherwise it's too much to submit to tensorboard
            length_scales = model.gp.covar_module.base_kernel.lengthscale.squeeze()
            for i in range(length_scales.shape[0]):
                writer.add_scalar(
                    f"length_scale/{i}", length_scales[i], trainer.state.epoch
                )

        if trainer.state.epoch > 150 and trainer.state.epoch % 5 == 0:
            _, auroc, aupr = get_ood_metrics(
                hparams.dataset, "SVHN", model, likelihood, hparams.data_root
            )
            print(f"OoD Metrics - AUROC: {auroc}, AUPR: {aupr}")
            writer.add_scalar("OoD/auroc", auroc, trainer.state.epoch)
            writer.add_scalar("OoD/auprc", aupr, trainer.state.epoch)

        evaluator.run(test_loader)
        metrics = evaluator.state.metrics
        acc = metrics["accuracy"]
        elbo = metrics["elbo"]

        print(
            f"Test - Epoch: {trainer.state.epoch} "
            f"Acc: {acc:.4f} "
            f"ELBO: {elbo:.2f} "
        )

        writer.add_scalar("Likelihood/test", elbo, trainer.state.epoch)
        writer.add_scalar("Accuracy/test", acc, trainer.state.epoch)

        scheduler.step()

    pbar = ProgressBar(dynamic_ncols=True)
    pbar.attach(trainer)

    trainer.run(train_loader, max_epochs=200)

    # Done training - time to evaluate
    results = {}

    evaluator.run(train_loader)
    train_acc = evaluator.state.metrics["accuracy"]
    train_elbo = evaluator.state.metrics["elbo"]
    results["train_accuracy"] = train_acc
    results["train_elbo"] = train_elbo

    evaluator.run(test_loader)
    test_acc = evaluator.state.metrics["accuracy"]
    test_elbo = evaluator.state.metrics["elbo"]
    results["test_accuracy"] = test_acc
    results["test_elbo"] = test_elbo

    _, auroc, aupr = get_ood_metrics(
        hparams.dataset, "SVHN", model, likelihood, hparams.data_root
    )
    results["auroc_ood_svhn"] = auroc
    results["aupr_ood_svhn"] = aupr

    print(f"Test - Accuracy {results['test_accuracy']:.4f}")

    results_json = json.dumps(results, indent=4, sort_keys=True)
    (results_dir / "results.json").write_text(results_json)

    torch.save(model.state_dict(), results_dir / "model.pt")
    torch.save(likelihood.state_dict(), results_dir / "likelihood.pt")

    writer.close()
Beispiel #24
0
    def fit(self,
            epochs=75,
            train_loader=None,
            save_path=None,
            val_loader=None):
        initial_inducing_points, initial_lengthscale = initial_values_for_GP(
            train_loader.dataset, self.feature_extractor,
            self.n_inducing_points)

        self.gp = GP(
            num_outputs=self.num_classes,
            initial_lengthscale=initial_lengthscale,
            initial_inducing_points=initial_inducing_points,
            separate_inducing_points=self.separate_inducing_points,
            kernel=self.kernel,
            ard=self.ard,
            lengthscale_prior=self.lengthscale_prior,
        )

        self.model = DKL_GP(self.feature_extractor, self.gp)
        self.model.to(self.device)

        self.likelihood = SoftmaxLikelihood(num_classes=10,
                                            mixing_weights=False)
        self.likelihood = self.likelihood.to(self.device)

        self.elbo_fn = VariationalELBO(self.likelihood,
                                       self.gp,
                                       num_data=len(train_loader.dataset))

        parameters = [
            {
                "params": self.feature_extractor.parameters(),
                "lr": self.learning_rate
            },
            {
                "params": self.gp.parameters(),
                "lr": self.learning_rate
            },
            {
                "params": self.likelihood.parameters(),
                "lr": self.learning_rate
            },
        ]

        self.optimizer = torch.optim.SGD(parameters,
                                         momentum=0.9,
                                         weight_decay=self.weight_decay)

        self.scheduler = torch.optim.lr_scheduler.MultiStepLR(
            self.optimizer, milestones=[25, 50, 75], gamma=0.2)

        self.model.train()
        for epoch in tqdm(range(epochs)):
            running_loss = 0
            for i, (x, y) in enumerate(train_loader):
                self.model.train()

                self.optimizer.zero_grad()

                x, y = x.to(self.device), y.to(self.device)

                y_pred = self.model(x)
                elbo = -self.elbo_fn(y_pred, y)
                running_loss += elbo.item()
                elbo.backward()
                self.optimizer.step()

                if i % 50 == 0:
                    print("Iteration: {}, Loss = {}".format(
                        i, running_loss / (i + 1)))

            if epoch % 1 == 0 and val_loader is not None:
                self.model.eval()
                test_loss = 0
                correct = 0
                total = 0
                with torch.no_grad():
                    for batch_idx, (inputs, targets) in enumerate(val_loader):
                        inputs, y = inputs.to(self.device), F.one_hot(
                            targets, self.num_classes).float().to(self.device)
                        y_pred = self.model(data).to_data_independent_dist()
                        output = self.likelihood(y_pred).probs.mean(0)
                        predicted = torch.argmax(output, dim=1)
                        loss = -self.likelihood.expected_log_prob(
                            y, y_pred).mean()
                        test_loss += loss.item()
                        targets = targets.to(self.device)
                        total += targets.size(0)
                        correct += predicted.eq(targets.to(
                            self.device)).sum().item()
                acc = 100. * correct / total
                print("Epoch: {}, test acc: {}, test loss {}".format(
                    epoch, acc, test_loss / total))

            self.scheduler.step()

        if save_path is not None:
            self.save(save_path)