def transform(self, xb, params, inverse=False): shift, scale = chunk_two(params) scale = torch.sigmoid(scale + 2.0) + 1e-3 if not inverse: return scale * xb + shift, sumeb(scale.log()) else: return (xb - shift) / scale, -sumeb(scale.log())
def log_prob(self, x, context=None): if self.use_context and context is not None: mu, sigma = chunk_two(self.context_enc(context)) sigma = F.softplus(sigma) + 1e-5 eps = (x - mu) / sigma return sumeb(self.unit_log_prob(eps) - sigma.log()) else: return sumeb(self.unit_log_prob(x))
def mean(self, context=None, device='cpu'): if self.use_context and context is not None: mu, sigma = chunk_two(self.context_enc(context)) sigma = F.softplus(sigma) + 1e-5 lp = sumeb(self.unit_log_prob(torch.zeros_like(mu)) \ - sigma.log()) return mu, lp else: mu = torch.zeros(self.infer_shape(), device=device) lp = sumeb(self.unit_log_prob(mu)) return mu, lp
def sample(self, num_samples=None, context=None, device='cpu'): if self.use_context and context is not None: mu, sigma = chunk_two(self.context_enc(context)) sigma = F.softplus(sigma) + 1e-5 eps = torch.randn_like(mu) x = mu + sigma * eps lp = sumeb(self.unit_log_prob(eps) - sigma.log()) return x, lp else: eps = torch.randn(self.infer_shape(num_samples), device=device) lp = sumeb(self.unit_log_prob(eps)) return eps, lp
def log_prob(self, x, context=None): if self.use_context and context is not None: logits = self.context_enc(context) else: logits = torch.zeros_like(x) return -sumeb( F.binary_cross_entropy_with_logits(logits, x, reduction='none'))
def mean(self, context=None, device='cpu'): if self.use_context and context is not None: logits = self.context_enc(context) else: logits = torch.zeros(self.infer_shape(), device=device) x = torch.sigmoid(logits) lp = -sumeb( F.binary_cross_entropy_with_logits(logits, x, reduction='none')) return x, lp
def inverse(self, z, context=None): """ inverse of MADE requires non-parallelizable computation, so better not use except special cases """ x = torch.zeros_like(z) for i in range(self.dim): shift, scale = self.get_params(x, context) x[...,i] = z[...,i] * scale[...,i] + shift[...,i] return x, sumeb(scale.log())
def get_params(self, x, context=None): if x.dim() == 4: log_scale = self.log_scale.view(1, -1, 1, 1) shift = self.shift.view(1, -1, 1, 1) if context is not None and self.linear is not None: ctx_log_scale, ctx_shift = chunk_two(self.linear(context)) B = x.shape[0] log_scale = log_scale + ctx_log_scale.view(B, -1, 1, 1) shift = shift + ctx_shift.view(B, -1, 1, 1) logdet = x.shape[-2] * x.shape[-1] * sumeb(log_scale) else: log_scale = self.log_scale.view(1, -1) shift = self.shift.view(1, -1) if context is not None and self.linear is not None: ctx_log_scale, ctx_shift = chunk_two(self.linear(context)) B = x.shape[0] log_scale = log_scale + ctx_log_scale.view(B, -1) shift = shift + ctx_shift.view(B, -1) logdet = sumeb(log_scale) return log_scale, shift, logdet
def inverse(self, Z, context=None): X = torch.zeros_like(Z) for i in range(self.dim): shift, scale = self.get_params(X) X[..., i] = Z[..., i] * scale[..., i] + shift[..., i] return X, sumeb(scale.log())
def forward(self, X, context=None): shift, scale = self.get_params(X) return (X - shift) / scale, -sumeb(scale.log())