def KL_phi(self): if self.inference == "collapsed": return ELBO_collapsed_Categorical(self.qphi_logits, self.alpha_z, K=self.n_basis, N=self.data_dim) elif self.inference == "fixed_pi": qphi = self.get_phi() pi = torch.ones_like(qphi) / self.n_basis KL = (qphi * (torch.log(qphi + 1e-16) - torch.log(pi))).sum() return KL elif self.inference == "non-collapsed": qDir = Dirichlet(concentration=self.qalpha_z) pDir = Dirichlet(concentration=self.alpha_z) # KL(q(pi) || p(pi)) KL_Dir = torch.distributions.kl_divergence(qDir, pDir) # E[log q(phi) - log p(phi | pi)] under q(pi)q(phi) qpi = qDir.rsample() qphi = self.get_phi() # KL categorical KL_Cat = ( qphi * (torch.log(qphi + 1e-16) - torch.log(qpi[None, :]))).sum() return KL_Dir + KL_Cat
def forward(self, inputs, labels, topics, lengths, sample_topics=False): enc_emb = self.lookup(inputs) dec_emb = self.lookup(inputs) lab_emb = self.label_lookup(labels).unsqueeze( 0) # to match with shape of z topics.unsqueeze_(0) # prior of z mu_pr, logvar_pr = self.z_prior(lab_emb) h, _ = self.encoder(enc_emb, lengths) if self.is_joint: hn = torch.cat([h, topics, lab_emb], dim=2) else: hn = torch.cat([h, lab_emb], dim=2) # posterior of z mu_po, logvar_po = self.fcmu(hn), self.fclogvar(hn) if self.training: z = self.reparameterize(mu_po, logvar_po) else: z = mu_po alphas = self.topic_prior(torch.cat([z, lab_emb], dim=2)) if sample_topics and not self.is_joint: # sampling only valid for marginal model dist = Dirichlet((topics * topics.size(2)).cpu()) topics = dist.rsample().to(alphas.device) code = torch.cat([z, topics, lab_emb], dim=2) outputs, _ = self.decoder(dec_emb, code, lengths=lengths) outputs = self.fcout(outputs) bow = self.bow_predictor(torch.cat([z, lab_emb], dim=2)) return outputs, (mu_pr, mu_po), (logvar_pr, logvar_po), alphas, bow
class Beta(Distribution): r""" Beta distribution parameterized by `concentration1` and `concentration0`. Example:: >>> m = Beta(torch.Tensor([0.5]), torch.Tensor([0.5])) >>> m.sample() # Beta distributed with concentration concentration1 and concentration0 0.1046 [torch.FloatTensor of size 1] Args: concentration1 (float or Tensor or Variable): 1st concentration parameter of the distribution (often referred to as alpha) concentration0 (float or Tensor or Variable): 2nd concentration parameter of the distribution (often referred to as beta) """ params = {'concentration1': constraints.positive, 'concentration0': constraints.positive} support = constraints.unit_interval has_rsample = True def __init__(self, concentration1, concentration0): if isinstance(concentration1, Number) and isinstance(concentration0, Number): concentration1_concentration0 = torch.Tensor([concentration1, concentration0]) else: concentration1, concentration0 = broadcast_all(concentration1, concentration0) concentration1_concentration0 = torch.stack([concentration1, concentration0], -1) self._dirichlet = Dirichlet(concentration1_concentration0) super(Beta, self).__init__(self._dirichlet._batch_shape) def rsample(self, sample_shape=()): value = self._dirichlet.rsample(sample_shape).select(-1, 0) if isinstance(value, Number): value = self._dirichlet.concentration.new([value]) return value def log_prob(self, value): self._validate_log_prob_arg(value) heads_tails = torch.stack([value, 1.0 - value], -1) return self._dirichlet.log_prob(heads_tails) def entropy(self): return self._dirichlet.entropy() @property def concentration1(self): result = self._dirichlet.concentration[..., 0] if isinstance(result, Number): return torch.Tensor([result]) else: return result @property def concentration0(self): result = self._dirichlet.concentration[..., 1] if isinstance(result, Number): return torch.Tensor([result]) else: return result
class Beta(Distribution): r""" Creates a Beta distribution parameterized by concentration `alpha` and `beta`. Example:: >>> m = Beta(torch.Tensor([0.5]), torch.Tensor([0.5])) >>> m.sample() # Beta distributed with concentration alpha and beta 0.1046 [torch.FloatTensor of size 1] Args: alpha (float or Tensor or Variable): 1st concentration parameter of the distribution beta (float or Tensor or Variable): 2nd concentration parameter of the distribution """ params = {'alpha': constraints.positive, 'beta': constraints.positive} support = constraints.unit_interval has_rsample = True def __init__(self, alpha, beta): if isinstance(alpha, Number) and isinstance(beta, Number): alpha_beta = torch.Tensor([alpha, beta]) else: alpha, beta = broadcast_all(alpha, beta) alpha_beta = torch.stack([alpha, beta], -1) self._dirichlet = Dirichlet(alpha_beta) super(Beta, self).__init__(self._dirichlet._batch_shape) def rsample(self, sample_shape=()): value = self._dirichlet.rsample(sample_shape).select(-1, 0) if isinstance(value, Number): value = self._dirichlet.alpha.new([value]) return value def log_prob(self, value): self._validate_log_prob_arg(value) heads_tails = torch.stack([value, 1.0 - value], -1) return self._dirichlet.log_prob(heads_tails) def entropy(self): return self._dirichlet.entropy() @property def alpha(self): result = self._dirichlet.alpha[..., 0] if isinstance(result, Number): return torch.Tensor([result]) else: return result @property def beta(self): result = self._dirichlet.alpha[..., 1] if isinstance(result, Number): return torch.Tensor([result]) else: return result
def train(self, x, sampling=True, independent=True): ''' Parameters ---------- x : a batch of data sampling : whether to sample from the variational posterior distributions(if Ture, the default), or just use the mean of the variational distributions Return ------ log_likehoods : log like hood for each sample kl_sum : Sum of the KL divergences between the variational distributions and their priors ''' # The variational distributions mu = Normal(self.locs, self.scales) sigma = Gamma(self.alpha, self.beta) theta = Dirichlet(self.couts) # Sample from the variational distributions if sampling: # Nb = x.shape[0] Nb = 1 mu_sample = mu.rsample((Nb, )) sigma_sample = torch.pow(sigma.rsample((Nb, )), -0.5) theta_sample = theta.rsample((Nb, )) else: mu_sample = torch.reshape(mu.mean, (1, self.Nc, self.Nd)) sigma_sample = torch.pow( torch.reshape(sigma.mean, (1, self.Nc, self.Nd)), -0.5) theta_sample = torch.reshape(theta.mean, (1, self.Nc)) # 1*Nc # The mixture density log_var = (sigma_sample**2).log() log_likelihoods = GMM.get_likelihoods(x, mu_sample.reshape( (self.Nc, self.Nd)), log_var.reshape( (self.Nc, self.Nd)), log=True) # Nc*Nb log_prob_ = theta_sample @ log_likelihoods log_prob = log_prob_ # Compute the KL divergence sum mu_div = kl_divergence(mu, self.mu_prior) sigma_div = kl_divergence(sigma, self.sigma_prior) theta_div = kl_divergence(theta, self.theta_prior) KL = mu_div + sigma_div + theta_div if 0: print("mu_div: %f \t sigma_div: %f \t theta_div: %f" % (mu_div.sum().detach().numpy(), sigma_div.sum().detach().numpy(), theta_div.sum().detach().numpy())) return KL, log_prob
class Beta(Distribution): r""" Creates a Beta distribution parameterized by concentration `alpha` and `beta`. Example:: >>> m = Beta(torch.Tensor([0.5]), torch.Tensor([0.5])) >>> m.sample() # Beta distributed with concentrarion alpha 0.1046 [torch.FloatTensor of size 2] Args: alpha (Tensor or Variable): concentration parameter of the distribution """ params = {'alpha': constraints.positive, 'beta': constraints.positive} support = constraints.unit_interval has_rsample = True def __init__(self, alpha, beta): if isinstance(alpha, Number) and isinstance(beta, Number): alpha_beta = torch.Tensor([alpha, beta]) else: alpha, beta = broadcast_all(alpha, beta) alpha_beta = torch.stack([alpha, beta], -1) self._dirichlet = Dirichlet(alpha_beta) super(Beta, self).__init__(self._dirichlet._batch_shape) def rsample(self, sample_shape=()): value = self._dirichlet.rsample(sample_shape).select(-1, 0) if isinstance(value, Number): value = self._dirichlet.alpha.new([value]) return value def log_prob(self, value): self._validate_log_prob_arg(value) heads_tails = torch.stack([value, 1.0 - value], -1) return self._dirichlet.log_prob(heads_tails) def entropy(self): return self._dirichlet.entropy()
def forward(self, inputs, topics, lengths, sample_topics=False): enc_emb = self.lookup(inputs) dec_emb = self.lookup(inputs) topics.unsqueeze_(0) hn, _ = self.encoder(enc_emb, lengths) if self.is_joint: hn = torch.cat([hn, topics], dim=2) mu, logvar = self.fcmu(hn), self.fclogvar(hn) if self.training: z = self.reparameterize(mu, logvar) else: z = mu alphas = self.topic_prior(z) if sample_topics and not self.is_joint: device = topics.device dist = Dirichlet((topics * alphas.sum(2, keepdim=True)).cpu()) topics = dist.rsample().to(device) code = torch.cat([z, topics], dim=2) outputs, _ = self.decoder(dec_emb, code, lengths=lengths) outputs = self.fcout(outputs) bow = self.bow_predictor(z).squeeze(0) return outputs, mu, logvar, alphas, bow
class Beta(Distribution): r""" Creates a Beta distribution parameterized by concentration `alpha` and `beta`. Example:: >>> m = Beta(torch.Tensor([0.5]), torch.Tensor([0.5])) >>> m.sample() # Beta distributed with concentrarion alpha 0.1046 [torch.FloatTensor of size 2] Args: alpha (Tensor or Variable): concentration parameter of the distribution """ has_rsample = True def __init__(self, alpha, beta): if isinstance(alpha, Number) and isinstance(beta, Number): alpha_beta = torch.Tensor([alpha, beta]) else: alpha, beta = broadcast_all(alpha, beta) alpha_beta = torch.stack([alpha, beta], -1) self._dirichlet = Dirichlet(alpha_beta) super(Beta, self).__init__(self._dirichlet._batch_shape) def rsample(self, sample_shape=()): value = self._dirichlet.rsample(sample_shape).select(-1, 0) if isinstance(value, Number): value = self._dirichlet.alpha.new([value]) return value def log_prob(self, value): self._validate_log_prob_arg(value) heads_tails = torch.stack([value, 1.0 - value], -1) return self._dirichlet.log_prob(heads_tails) def entropy(self): return self._dirichlet.entropy()
class Beta(ExponentialFamily): r""" Beta distribution parameterized by `concentration1` and `concentration0`. Example:: >>> m = Beta(torch.tensor([0.5]), torch.tensor([0.5])) >>> m.sample() # Beta distributed with concentration concentration1 and concentration0 0.1046 [torch.FloatTensor of size 1] Args: concentration1 (float or Tensor): 1st concentration parameter of the distribution (often referred to as alpha) concentration0 (float or Tensor): 2nd concentration parameter of the distribution (often referred to as beta) """ arg_constraints = { 'concentration1': constraints.positive, 'concentration0': constraints.positive } support = constraints.unit_interval has_rsample = True def __init__(self, concentration1, concentration0, validate_args=None): if isinstance(concentration1, Number) and isinstance( concentration0, Number): concentration1_concentration0 = torch.tensor( [float(concentration1), float(concentration0)]) else: concentration1, concentration0 = broadcast_all( concentration1, concentration0) concentration1_concentration0 = torch.stack( [concentration1, concentration0], -1) self._dirichlet = Dirichlet(concentration1_concentration0) super(Beta, self).__init__(self._dirichlet._batch_shape, validate_args=validate_args) @property def mean(self): return self.concentration1 / (self.concentration1 + self.concentration0) @property def variance(self): total = self.concentration1 + self.concentration0 return (self.concentration1 * self.concentration0 / (total.pow(2) * (total + 1))) def rsample(self, sample_shape=()): value = self._dirichlet.rsample(sample_shape).select(-1, 0) if isinstance(value, Number): value = self._dirichlet.concentration.new_tensor(value) return value def log_prob(self, value): if self._validate_args: self._validate_sample(value) heads_tails = torch.stack([value, 1.0 - value], -1) return self._dirichlet.log_prob(heads_tails) def entropy(self): return self._dirichlet.entropy() @property def concentration1(self): result = self._dirichlet.concentration[..., 0] if isinstance(result, Number): return torch.Tensor([result]) else: return result @property def concentration0(self): result = self._dirichlet.concentration[..., 1] if isinstance(result, Number): return torch.Tensor([result]) else: return result @property def _natural_params(self): return (self.concentration1, self.concentration0) def _log_normalizer(self, x, y): return torch.lgamma(x) + torch.lgamma(y) - torch.lgamma(x + y)
class Beta(ExponentialFamily): r""" Beta distribution parameterized by `concentration1` and `concentration0`. Example:: >>> m = Beta(torch.tensor([0.5]), torch.tensor([0.5])) >>> m.sample() # Beta distributed with concentration concentration1 and concentration0 tensor([ 0.1046]) Args: concentration1 (float or Tensor): 1st concentration parameter of the distribution (often referred to as alpha) concentration0 (float or Tensor): 2nd concentration parameter of the distribution (often referred to as beta) """ arg_constraints = {'concentration1': constraints.positive, 'concentration0': constraints.positive} support = constraints.unit_interval has_rsample = True def __init__(self, concentration1, concentration0, validate_args=None): if isinstance(concentration1, Number) and isinstance(concentration0, Number): concentration1_concentration0 = torch.tensor([float(concentration1), float(concentration0)]) else: concentration1, concentration0 = broadcast_all(concentration1, concentration0) concentration1_concentration0 = torch.stack([concentration1, concentration0], -1) self._dirichlet = Dirichlet(concentration1_concentration0) super(Beta, self).__init__(self._dirichlet._batch_shape, validate_args=validate_args) @property def mean(self): return self.concentration1 / (self.concentration1 + self.concentration0) @property def variance(self): total = self.concentration1 + self.concentration0 return (self.concentration1 * self.concentration0 / (total.pow(2) * (total + 1))) def rsample(self, sample_shape=()): value = self._dirichlet.rsample(sample_shape).select(-1, 0) if isinstance(value, Number): value = self._dirichlet.concentration.new_tensor(value) return value def log_prob(self, value): if self._validate_args: self._validate_sample(value) heads_tails = torch.stack([value, 1.0 - value], -1) return self._dirichlet.log_prob(heads_tails) def entropy(self): return self._dirichlet.entropy() @property def concentration1(self): result = self._dirichlet.concentration[..., 0] if isinstance(result, Number): return torch.tensor([result]) else: return result @property def concentration0(self): result = self._dirichlet.concentration[..., 1] if isinstance(result, Number): return torch.tensor([result]) else: return result @property def _natural_params(self): return (self.concentration1, self.concentration0) def _log_normalizer(self, x, y): return torch.lgamma(x) + torch.lgamma(y) - torch.lgamma(x + y)
class Beta(Distribution): r""" Beta distribution parameterized by `concentration1` and `concentration0`. Example:: >>> m = Beta(torch.Tensor([0.5]), torch.Tensor([0.5])) >>> m.sample() # Beta distributed with concentration concentration1 and concentration0 0.1046 [torch.FloatTensor of size 1] Args: concentration1 (float or Tensor or Variable): 1st concentration parameter of the distribution (often referred to as alpha) concentration0 (float or Tensor or Variable): 2nd concentration parameter of the distribution (often referred to as beta) """ params = {'concentration1': constraints.positive, 'concentration0': constraints.positive} support = constraints.unit_interval has_rsample = True def __init__(self, concentration1, concentration0): if isinstance(concentration1, Number) and isinstance(concentration0, Number): concentration1_concentration0 = variable([concentration1, concentration0]) else: concentration1, concentration0 = broadcast_all(concentration1, concentration0) concentration1_concentration0 = torch.stack([concentration1, concentration0], -1) self._dirichlet = Dirichlet(concentration1_concentration0) super(Beta, self).__init__(self._dirichlet._batch_shape) @property def mean(self): return self.concentration1 / (self.concentration1 + self.concentration0) @property def variance(self): total = self.concentration1 + self.concentration0 return (self.concentration1 * self.concentration0 / (total.pow(2) * (total + 1))) def rsample(self, sample_shape=()): value = self._dirichlet.rsample(sample_shape).select(-1, 0) if isinstance(value, Number): value = self._dirichlet.concentration.new([value]) return value def log_prob(self, value): self._validate_log_prob_arg(value) heads_tails = torch.stack([value, 1.0 - value], -1) return self._dirichlet.log_prob(heads_tails) def entropy(self): return self._dirichlet.entropy() @property def concentration1(self): result = self._dirichlet.concentration[..., 0] if isinstance(result, Number): return torch.Tensor([result]) else: return result @property def concentration0(self): result = self._dirichlet.concentration[..., 1] if isinstance(result, Number): return torch.Tensor([result]) else: return result
class Beta(ExponentialFamily): r""" Beta distribution parameterized by :attr:`concentration1` and :attr:`concentration0`. Example:: >>> # xdoctest: +IGNORE_WANT("non-deterinistic") >>> m = Beta(torch.tensor([0.5]), torch.tensor([0.5])) >>> m.sample() # Beta distributed with concentration concentration1 and concentration0 tensor([ 0.1046]) Args: concentration1 (float or Tensor): 1st concentration parameter of the distribution (often referred to as alpha) concentration0 (float or Tensor): 2nd concentration parameter of the distribution (often referred to as beta) """ arg_constraints = { 'concentration1': constraints.positive, 'concentration0': constraints.positive } support = constraints.unit_interval has_rsample = True def __init__(self, concentration1, concentration0, validate_args=None): if isinstance(concentration1, Real) and isinstance( concentration0, Real): concentration1_concentration0 = torch.tensor( [float(concentration1), float(concentration0)]) else: concentration1, concentration0 = broadcast_all( concentration1, concentration0) concentration1_concentration0 = torch.stack( [concentration1, concentration0], -1) self._dirichlet = Dirichlet(concentration1_concentration0, validate_args=validate_args) super(Beta, self).__init__(self._dirichlet._batch_shape, validate_args=validate_args) def expand(self, batch_shape, _instance=None): new = self._get_checked_instance(Beta, _instance) batch_shape = torch.Size(batch_shape) new._dirichlet = self._dirichlet.expand(batch_shape) super(Beta, new).__init__(batch_shape, validate_args=False) new._validate_args = self._validate_args return new @property def mean(self): return self.concentration1 / (self.concentration1 + self.concentration0) @property def mode(self): return self._dirichlet.mode[..., 0] @property def variance(self): total = self.concentration1 + self.concentration0 return (self.concentration1 * self.concentration0 / (total.pow(2) * (total + 1))) def rsample(self, sample_shape=()): return self._dirichlet.rsample(sample_shape).select(-1, 0) def log_prob(self, value): if self._validate_args: self._validate_sample(value) heads_tails = torch.stack([value, 1.0 - value], -1) return self._dirichlet.log_prob(heads_tails) def entropy(self): return self._dirichlet.entropy() @property def concentration1(self): result = self._dirichlet.concentration[..., 0] if isinstance(result, Number): return torch.tensor([result]) else: return result @property def concentration0(self): result = self._dirichlet.concentration[..., 1] if isinstance(result, Number): return torch.tensor([result]) else: return result @property def _natural_params(self): return (self.concentration1, self.concentration0) def _log_normalizer(self, x, y): return torch.lgamma(x) + torch.lgamma(y) - torch.lgamma(x + y)