Beispiel #1
0
    def log_likelihood(self):
        # evaluate the likelihood of the labelling (which is,
        # conveniently, just the likelihood of the current mixture
        # model)

        # FIXME: This should really be cached for the last invocation
        score = Counter()
        for c_idx, cluster in self._cluster_to_datum.iteritems():
            if not cluster: continue
            # Evaluate the likelihood of each individual cluster
            cluster_size = len(cluster)
            # The mean of the data points belonging to this cluster
            cluster_datum_mean = sum(cluster) / cluster_size

            # p(c)
            score += Gaussian.log_prob(cluster_datum_mean, self._prior_mean,
                                       self._prior_precision)
            # p(x|c)
            score += sum(
                Gaussian.log_prob(datum, cluster_datum_mean,
                                  self._cluster_precision)
                for datum in cluster)

            # score => p(x, c)

        # for the gaussian the dimensions are independent so we should
        # just be able to combine them directly
        return score.total_count()
class BLinear(nn.Module):
    """Bayesian Linear layer, default prior is Gaussian for weights and bias"""

    def __init__(self, in_features, out_features):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features

        # Weight parameters
        self.weight_mu = nn.Parameter(
            torch.Tensor(out_features, in_features).uniform_(-0.2, 0.2))
        self.weight_rho = nn.Parameter(
            torch.Tensor(out_features, in_features).uniform_(1e-1, 2))
        # variational posterior for the weights
        self.weight = Gaussian(self.weight_mu, self.weight_rho)

        # Bias parameters
        self.bias_mu = nn.Parameter(
            torch.Tensor(out_features).uniform_(-0.2, 0.2))
        self.bias_rho = nn.Parameter(
            torch.Tensor(out_features).uniform_(1e-1, 2))
        # variational posterior for the bias
        self.bias = Gaussian(self.bias_mu, self.bias_rho)

        # Prior distributions
        self.weight_prior = Gaussian(torch.Tensor([0.]), torch.Tensor([1.]))
        self.bias_prior = Gaussian(torch.Tensor([0.]), torch.Tensor([1.]))

        # initialize log_prior and log_posterior as 0
        self.log_prior = 0
        self.log_variational_posterior = 0

    def forward(self, input, sample=False, calculate_log_probs=False):
        # 1. Sample weights and bias from variational posterior
        if self.training or sample:
            weight = self.weight.sample()
            bias = self.bias.sample()
        else:
            weight = self.weight.mu
            bias = self.bias.mu

        # 2. Update log_prior and log_posterior according to current approximation
        if self.training or calculate_log_probs:
            self.log_prior = self.weight_prior.log_prob(
                weight) + self.bias_prior.log_prob(bias)
            self.log_variational_posterior = self.weight.log_prob(
                weight) + self.bias.log_prob(bias)
        else:
            self.log_prior, self.log_variational_posterior = 0, 0

        # 3. Do a forward pass through the layer
        return F.linear(input, weight, bias)
    def elbo(self, input, target, samples=20):
        outputs = []
        log_priors = torch.zeros(samples)
        log_variational_posteriors = torch.zeros(samples)

        # draw n_samples from the posterior (run n_samples forward passes)
        for i in range(samples):
            outputs.append(self(input, sample=True))
            log_priors[i] = self.log_prior()
            log_variational_posteriors[i] = self.log_variational_posterior()

        log_prior = log_priors.sum()
        log_variational_posterior = log_variational_posteriors.sum()

        outputs = torch.stack(outputs)
        y_dist = Gaussian(outputs.mean(0), self.noise)
        # negative_log_likelihood = self.loss_function(
        #     outputs.mean(0), target, reduction='sum')
        negative_log_likelihood = -y_dist.log_prob(target) / len(input)

        # loss = nll + kl
        loss = negative_log_likelihood
        kl = (log_variational_posterior - log_prior) / self.n_training
        loss += kl
        return loss, log_prior, log_variational_posterior, negative_log_likelihood
	def _cluster_log_probs(self, cluster, cluster_size, cluster_mean, cluster_covariance, new_point):
		""" Return the posterior, prior, and likelihood of new_point
		being in the cluster of size cluster_size centered at cluster_mean
		"""
		# the updated mean
		new_mean = (cluster_mean * cluster_size + new_point) / (cluster_size + 1)

		posterior_precision = self._prior_precision + self._cluster_precision
		# convex combination for mean
		posterior_mean = self._prior_mean * self._prior_precision
		posterior_mean += cluster_mean * self._cluster_precision
		posterior_mean /= posterior_precision

		posterior = Gaussian.log_prob(new_mean, posterior_mean, posterior_precision)
		# prior is keyed on the (potentially) updated params
		prior = Gaussian.log_prob(new_mean, self._prior_mean, self._prior_precision)
		likelihood = Gaussian.log_prob(new_point, new_mean, self._cluster_precision)

		return posterior, prior, likelihood
Beispiel #5
0
    def _cluster_log_probs(self, cluster, cluster_size, cluster_mean,
                           cluster_covariance, new_point):
        """ Return the posterior, prior, and likelihood of new_point
		being in the cluster of size cluster_size centered at cluster_mean
		"""
        # the updated mean
        new_mean = (cluster_mean * cluster_size + new_point) / (cluster_size +
                                                                1)

        posterior_precision = self._prior_precision + self._cluster_precision
        # convex combination for mean
        posterior_mean = self._prior_mean * self._prior_precision
        posterior_mean += cluster_mean * self._cluster_precision
        posterior_mean /= posterior_precision

        posterior = Gaussian.log_prob(new_mean, posterior_mean,
                                      posterior_precision)
        # prior is keyed on the (potentially) updated params
        prior = Gaussian.log_prob(new_mean, self._prior_mean,
                                  self._prior_precision)
        likelihood = Gaussian.log_prob(new_point, new_mean,
                                       self._cluster_precision)

        return posterior, prior, likelihood
	def log_likelihood(self):
		# evaluate the likelihood of the labelling (which is,
		# conveniently, just the likelihood of the current mixture
		# model)

		# FIXME: This should really be cached for the last invocation
		score = Counter()
		for c_idx, cluster in self._cluster_to_datum.iteritems():
			if not cluster: continue
			# Evaluate the likelihood of each individual cluster
			cluster_size = len(cluster)
			# The mean of the data points belonging to this cluster
			cluster_datum_mean = sum(cluster) / cluster_size

			# p(c)
			score += Gaussian.log_prob(cluster_datum_mean, self._prior_mean, self._prior_precision)
			# p(x|c)
			score += sum(Gaussian.log_prob(datum, cluster_datum_mean, self._cluster_precision) for datum in cluster)

			# score => p(x, c)

		# for the gaussian the dimensions are independent so we should
		# just be able to combine them directly
		return score.total_count()