コード例 #1
0
    def kl_divergence(self):
        """Get the KL divergence."""
        variational_dist_u = self.variational_distribution.variational_distribution
        prior_dist = self.prior_distribution

        with settings.max_preconditioner_size(0):
            kl_divergence = torch.distributions.kl.kl_divergence(
                variational_dist_u, prior_dist)
        return kl_divergence
コード例 #2
0
ファイル: gp_list_torch.py プロジェクト: pnickl/reg
    def predict(self, input):
        input = transform(input.reshape((-1, self.input_size)), self.input_trans)

        with max_preconditioner_size(10), torch.no_grad():
            with max_root_decomposition_size(30), fast_pred_var():
                output = self.likelihood(self.model(input)).mean

        output = inverse_transform(output, self.target_trans)
        if self.incremental:
            return input[..., :self.target_size] + output
        else:
            return output
コード例 #3
0
    def predict(self, input):
        self.device = torch.device('cpu')

        self.model.eval().to(self.device)
        self.likelihood.eval().to(self.device)

        input = transform(torch.reshape(input, (-1, self.input_size)), self.input_trans)

        with max_preconditioner_size(10), torch.no_grad():
            with max_root_decomposition_size(30), fast_pred_var():
                output = self.likelihood(self.model(input)).mean

        output = inverse_transform(output[:, None], self.target_trans).squeeze()
        return output
コード例 #4
0
ファイル: sparse_gp_list_torch.py プロジェクト: pnickl/reg
    def predict(self, input):
        self.device = torch.device('cpu')

        self.model.eval().to(self.device)
        self.likelihood.eval().to(self.device)

        input = transform(input.reshape((-1, self.input_size)),
                          self.input_trans)

        with max_preconditioner_size(10), torch.no_grad():
            with max_root_decomposition_size(30), fast_pred_var():
                _input = [input for _ in range(self.target_size)]
                predictions = self.likelihood(*self.model(*_input))
                output = torch.stack([_pred.mean for _pred in predictions]).T

        output = inverse_transform(output, self.target_trans).squeeze()
        return output
コード例 #5
0
    def forward(self, x):
        """Forward propagate the module.

        This method determines how to marginalize out the inducing function values.
        Specifically, forward defines how to transform a variational distribution over
        the inducing point values, q(u), in to a variational distribution over
        the function values at specified locations x, q(f|x), by integrating
        p(f|x, u)q(u)du

        Parameters
        ----------
        x (torch.tensor):
            Locations x to get the variational posterior of the function values at.

        Returns
        -------
            The distribution q(f|x)
        """
        variational_dist = self.variational_distribution.approx_variational_distribution
        inducing_points = self.inducing_points
        inducing_batch_shape = inducing_points.shape[:-2]
        if inducing_batch_shape < x.shape[:-2] or len(
                inducing_batch_shape) < len(x.shape[:-2]):
            batch_shape = _mul_broadcast_shape(inducing_points.shape[:-2],
                                               x.shape[:-2])
            inducing_points = inducing_points.expand(
                *batch_shape, *inducing_points.shape[-2:])
            x = x.expand(*batch_shape, *x.shape[-2:])
            variational_dist = variational_dist.expand(batch_shape)

        # If our points equal the inducing points, we're done
        if torch.equal(x, inducing_points):
            return variational_dist

        # Otherwise, we have to marginalize
        else:
            num_induc = inducing_points.size(-2)
            full_inputs = torch.cat([inducing_points, x], dim=-2)
            full_output = self.model.forward(full_inputs)
            full_mean, full_covar = full_output.mean, full_output.lazy_covariance_matrix

            # Mean terms
            test_mean = full_mean[..., num_induc:]
            induc_mean = full_mean[..., :num_induc]
            mean_diff = (variational_dist.mean - induc_mean).unsqueeze(-1)

            # Covariance terms
            induc_induc_covar = full_covar[
                ..., :num_induc, :num_induc].add_jitter()
            induc_data_covar = full_covar[..., :num_induc,
                                          num_induc:].evaluate()
            data_data_covar = full_covar[..., num_induc:, num_induc:]
            aux = variational_dist.lazy_covariance_matrix.root_decomposition()
            root_variational_covar = aux.root.evaluate()

            # If we had to expand the inducing points,
            # shrink the inducing mean and induc_induc_covar dimension
            # This makes everything more computationally efficient
            if len(inducing_batch_shape) < len(induc_induc_covar.batch_shape):
                index = tuple(0 for _ in range(
                    len(induc_induc_covar.batch_shape) -
                    len(inducing_batch_shape)))
                repeat_size = torch.Size(
                    (tuple(induc_induc_covar.batch_shape[:len(index)]) + tuple(
                        1
                        for _ in induc_induc_covar.batch_shape[len(index):])))
                induc_induc_covar = BatchRepeatLazyTensor(
                    induc_induc_covar.__getitem__(index), repeat_size)

            # If we're less than a certain size, we'll compute the Cholesky
            # decomposition of induc_induc_covar
            cholesky = False
            if settings.fast_computations.log_prob.off() or (
                    num_induc <= settings.max_cholesky_size.value()):
                induc_induc_covar = CholLazyTensor(
                    induc_induc_covar.cholesky())
                cholesky = True

            # If we are making predictions and don't need variances, we can do things
            # very quickly.
            if not self.training and settings.skip_posterior_variances.on():
                if not hasattr(self, "_mean_cache"):
                    self._mean_cache = induc_induc_covar.inv_matmul(
                        mean_diff).detach()

                predictive_mean = torch.add(
                    test_mean,
                    induc_data_covar.transpose(-2, -1).matmul(
                        self._mean_cache).squeeze(-1))

                predictive_covar = ZeroLazyTensor(test_mean.size(-1),
                                                  test_mean.size(-1))

                return MultivariateNormal(predictive_mean, predictive_covar)

            # Cache the CG results
            # For now: run variational inference without a preconditioner
            # The preconditioner screws things up for some reason
            with settings.max_preconditioner_size(0):
                # Cache the CG results
                left_tensors = torch.cat([mean_diff, root_variational_covar],
                                         -1)
                with torch.no_grad():
                    eager_rhs = torch.cat([left_tensors, induc_data_covar], -1)
                    solve, probe_vecs, probe_vec_norms, probe_vec_solves, tmats = \
                        CachedCGLazyTensor.precompute_terms(
                            induc_induc_covar, eager_rhs.detach(),
                            logdet_terms=(not cholesky),
                            include_tmats=(not settings.skip_logdet_forward.on() and
                                           not cholesky)
                        )
                    eager_rhss = [
                        eager_rhs.detach(),
                        eager_rhs[..., left_tensors.size(-1):].detach(),
                        eager_rhs[..., :left_tensors.size(-1)].detach()
                    ]
                    solves = [
                        solve.detach(), solve[...,
                                              left_tensors.size(-1):].detach(),
                        solve[..., :left_tensors.size(-1)].detach()
                    ]
                    if settings.skip_logdet_forward.on():
                        eager_rhss.append(
                            torch.cat([probe_vecs, left_tensors], -1))
                        solves.append(
                            torch.cat([
                                probe_vec_solves,
                                solve[..., :left_tensors.size(-1)]
                            ], -1))
                induc_induc_covar = CachedCGLazyTensor(
                    induc_induc_covar,
                    eager_rhss=eager_rhss,
                    solves=solves,
                    probe_vectors=probe_vecs,
                    probe_vector_norms=probe_vec_norms,
                    probe_vector_solves=probe_vec_solves,
                    probe_vector_tmats=tmats,
                )

            if self.training:
                self._memoize_cache[
                    "prior_distribution_memo"] = MultivariateNormal(
                        induc_mean, induc_induc_covar)

            # Compute predictive mean/covariance
            inv_products = induc_induc_covar.inv_matmul(
                induc_data_covar, left_tensors.transpose(-1, -2))
            predictive_mean = torch.add(test_mean, inv_products[..., 0, :])
            predictive_covar = RootLazyTensor(inv_products[...,
                                                           1:, :].transpose(
                                                               -1, -2))
            if self.training:
                interp_data_data_var, _ = induc_induc_covar.inv_quad_logdet(
                    induc_data_covar, logdet=False, reduce_inv_quad=False)
                data_covariance = DiagLazyTensor(
                    (data_data_covar.diag() - interp_data_data_var).clamp(
                        0, math.inf))
            else:
                neg_induc_data_data_covar = torch.matmul(
                    induc_data_covar.transpose(-1, -2).mul(-1),
                    induc_induc_covar.inv_matmul(induc_data_covar))
                data_covariance = data_data_covar + neg_induc_data_data_covar
            predictive_covar = PsdSumLazyTensor(predictive_covar,
                                                data_covariance)

            return MultivariateNormal(predictive_mean, predictive_covar)
コード例 #6
0
ファイル: deepkernellearning.py プロジェクト: yngtodd/nlp
def main():
    parser = argparse.ArgumentParser(
        description='Deep Kernel Learning with synthetic data.')
    parser.add_argument('--datapath', type=str, help='Path to data directory.')
    parser.add_argument('--batchsize',
                        type=int,
                        default=10,
                        help='Batch size.')
    parser.add_argument('--n_epochs',
                        type=int,
                        default=10,
                        help='Number of epochs.')
    parser.add_argument('--lr',
                        type=float,
                        default=0.1,
                        help='Path to data directory.')
    parser.add_argument('--seed', type=int, default=42, help='Random seed.')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    traindata = Synthetic(args.datapath, 'train', download=True)
    train_loader = DataLoader(traindata, batch_size=args.batchsize)
    num_classes = len(np.unique(traindata.targets))

    testdata = Synthetic(args.datapath, 'test')
    test_loader = DataLoader(testdata, batch_size=args.batchsize)

    feature_extractor = ConvFeatureExtractor().to(device)
    num_features = feature_extractor._filter_sum

    model = DKLModel(feature_extractor, num_dim=5).to(device)
    likelihood = SoftmaxLikelihood(num_features=model.num_dim,
                                   n_classes=num_classes).to(device)

    optimizer = SGD([
        {
            'params': model.feature_extractor.parameters()
        },
        {
            'params': model.gp_layer.hyperparameters(),
            'lr': args.lr * 0.01
        },
        {
            'params': model.gp_layer.variational_parameters()
        },
        {
            'params': likelihood.parameters()
        },
    ],
                    lr=args.lr,
                    momentum=0.9,
                    nesterov=True,
                    weight_decay=0)

    scheduler = MultiStepLR(
        optimizer,
        milestones=[0.5 * args.n_epochs, 0.75 * args.n_epochs],
        gamma=0.1)

    for epoch in range(1, args.n_epochs + 1):
        scheduler.step()
        with settings.use_toeplitz(False), settings.max_preconditioner_size(0):
            train(epoch, train_loader, optimizer, likelihood, model, device)
            test(test_loader, likelihood, model, device)

        state_dict = model.state_dict()
        likelihood_state_dict = likelihood.state_dict()
        torch.save({
            'model': state_dict,
            'likelihood': likelihood_state_dict
        }, 'dkl_synthetic_checkpoint.dat')
コード例 #7
0
for i in range(training_iterations):
    # Zero backprop gradients
    optimizer.zero_grad()
    # Get output from model
    output = model(x_train)
    # Calc loss and backprop derivatives
    loss = -mll(output, y_train)
    loss.backward()
    print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item()))
    optimizer.step()
    torch.cuda.empty_cache()

model.eval()
likelihood.eval()

x_test = torch.from_numpy(np.linspace(1870, 2030, 200)[:, np.newaxis])
x_test = x_test.cuda()

with settings.max_preconditioner_size(10), torch.no_grad():
    with settings.max_root_decomposition_size(30), settings.fast_pred_var():
        f_preds = model(x_test)
        y_pred = likelihood(f_preds)

# plot
with torch.no_grad():
    mean = y_pred.mean.cpu().numpy()
    var = y_pred.variance.cpu().numpy()
    samples = y_pred.sample().cpu().numpy()
    plot_gp(mean, var, x_test.cpu().numpy(), X_train=x_train.cpu().numpy(), Y_train=y_train.cpu().numpy(), samples=samples)