def forward_transform_bruteforce(self, x, logdet=0, context=None): warnings.warn('brute force') bsz = x.shape[0] input_shape = x.shape[1:] with torch.enable_grad(): x.requires_grad_(True) F = self.get_potential(x, context) f = torch.autograd.grad(F.sum(), x, create_graph=True)[0] # TODO: compute Hessian in block mode instead of row-by-row. f = f.reshape(bsz, -1) H = [] for i in range(f.shape[1]): retain_graph = self.training or (i < (f.shape[1] - 1)) H.append( torch.autograd.grad(f[:, i].sum(), x, create_graph=self.training, retain_graph=retain_graph)[0]) # H is (bsz, dim, dim) H = torch.stack(H, dim=1) f = f.reshape(bsz, *input_shape) return f, logdet + torch.slogdet(H).logabsdet
def negative_log_likelihood(self, n_batch=None): if n_batch is None: ind = torch.arange(self.N) else: ind = torch.randperm(self.N)[:n_batch] sigma = torch.exp(self.log_p_y_sigma) K = torch.stack([ self.kern[m].K(self.X[ind]) + torch.eye(ind.shape[0]) * 1e-5 for m in range(self.M) ]) q_z_pi = copy.deepcopy(self.q_z_pi) q_z_pi[q_z_pi != 0] /= sigma B = torch.stack([torch.diag(q_z_pi[m][ind]) for m in range(self.M)]) sqrtB = torch.sqrt(B) R = torch.stack([ torch.eye(ind.shape[0]) + sqrtB[m].mm(K[m]).mm(sqrtB[m]) for m in range(self.M) ]) lk_y = torch.zeros(1) for m in range(self.M): lk_y += self.D * torch.slogdet(R[m])[1] lk_y += 0.5 * torch.trace(self.Y[ind].t().mm(sqrtB[m]).mm( torch.solve(sqrtB[m], R[m])[0]).mm(self.Y[ind])) lk_z = 0.5 * (self.q_z_pi * torch.log(np.pi * 2 * sigma)).sum() return (lk_y + lk_z)
def get_weight(self, input, reverse): w_shape = self.w_shape if not self.LU: pixels = thops.pixels(input) dlogdet = torch.slogdet(self.weight)[1] * pixels if not reverse: weight = self.weight.view(w_shape[0], w_shape[1], 1, 1) else: weight = torch.inverse(self.weight.double()).float()\ .view(w_shape[0], w_shape[1], 1, 1) return weight, dlogdet else: self.p = self.p.to(input.device) self.sign_s = self.sign_s.to(input.device) self.l_mask = self.l_mask.to(input.device) self.eye = self.eye.to(input.device) l = self.l * self.l_mask + self.eye u = self.u * self.l_mask.transpose(0, 1).contiguous() + torch.diag( self.sign_s * torch.exp(self.log_s)) dlogdet = thops.sum(self.log_s) * thops.pixels(input) if not reverse: w = torch.matmul(self.p, torch.matmul(l, u)) else: l = torch.inverse(l.double()).float() u = torch.inverse(u.double()).float() w = torch.matmul(u, torch.matmul(l, self.p.inverse())) return w.view(w_shape[0], w_shape[1], 1, 1), dlogdet
def rm_hamiltonian(params, momentum, log_prob_func, jitter, normalizing_const, softabs_const=1e6, sampler=Sampler.HMC, integrator=Integrator.EXPLICIT, metric=Metric.HESSIAN): log_prob = log_prob_func(params) ndim = params.nelement() pi_term = ndim * torch.log(2. * torch.tensor(pi)) fish, abs_eigenvalues = fisher(params, log_prob_func, jitter=jitter, normalizing_const=normalizing_const, softabs_const=softabs_const, metric=metric) if metric == Metric.SOFTABS: log_det_abs = abs_eigenvalues.log().sum() else: log_det_abs = torch.slogdet(fish)[1] fish_inverse_momentum = cholesky_inverse(fish, momentum) quadratic_term = torch.matmul(momentum.view(1, -1), fish_inverse_momentum) hamiltonian = -log_prob + 0.5 * pi_term + 0.5 * log_det_abs + 0.5 * quadratic_term if util.has_nan_or_inf(hamiltonian): print('Invalid hamiltonian, log_prob: {}, params: {}, momentum: {}'. format(log_prob, params, momentum)) raise util.LogProbError() # if NN_flag: # return hamiltonian, params # else: return hamiltonian
def get_weight(self, input, reverse): w_shape = self.w_shape pixels = list(input.size())[-1] if not self.LU: #thops.pixels(input) dlogdet = (torch.slogdet(self.weight)[1]) #* pixels*pixels if not reverse: weight = self.weight #.view(w_shape[0], w_shape[1], 1, 1) else: weight = torch.inverse(self.weight.double()).float( ) #.view(w_shape[0], w_shape[1], 1, 1) return weight, dlogdet else: self.p = self.p.to(input.device) self.sign_s = self.sign_s.to(input.device) self.l_mask = self.l_mask.to(input.device) self.eye = self.eye.to(input.device) l = self.l * self.l_mask + self.eye u = self.u * self.l_mask.transpose(0, 1).contiguous() + torch.diag( self.sign_s * torch.exp(self.log_s)) dlogdet = cpd_sum(self.log_s) #* pixels*pixels if not reverse: w = torch.matmul(self.p, torch.matmul(l, u)) else: l = torch.inverse(l.double().cpu()).float() u = torch.inverse(u.double().cpu()).float() w = torch.matmul(u, torch.matmul(l, self.p.cpu().inverse())).cuda() return w, dlogdet #view(w_shape[0], w_shape[1], 1, 1)
def get_weight(self, input, reverse): b, c, h, w = input.shape if not self.LU_decomposed: dlogdet = torch.slogdet(self.weight)[1] * h * w if reverse: weight = torch.inverse(self.weight) else: weight = self.weight else: self.l_mask = self.l_mask.to(input.device) self.eye = self.eye.to(input.device) lower = self.lower * self.l_mask + self.eye u = self.upper * self.l_mask.transpose(0, 1).contiguous() u += torch.diag(self.sign_s * torch.exp(self.log_s)) dlogdet = torch.sum(self.log_s) * h * w if reverse: u_inv = torch.inverse(u) l_inv = torch.inverse(lower) p_inv = torch.inverse(self.p) weight = torch.matmul(u_inv, torch.matmul(l_inv, p_inv)) else: weight = torch.matmul(self.p, torch.matmul(lower, u)) return weight.view(self.w_shape[0], self.w_shape[1], 1, 1), dlogdet
def get_weight_logdet(self, bs, h, w, inverse): logdet = (torch.slogdet(self.weight)[1] * h * w).expand(bs) if inverse: weight = torch.inverse(self.weight) else: weight = self.weight return weight.view(self.w_shape[0], self.w_shape[1], 1, 1), logdet
def __init__(self, dims_in, M, b): super().__init__() self.M = nn.Parameter(M.t(), requires_grad=False) self.M_inv = nn.Parameter(M.t().inverse(), requires_grad=False) self.b = nn.Parameter(b, requires_grad=False) self.logDetM = nn.Parameter(torch.slogdet(M)[1], requires_grad=False)
def forward(self, input): _, _, height, width = input.shape out = F.conv2d(input, self.weight) logdet = (height * width * torch.slogdet(self.weight.squeeze().double())[1].float()) return out, logdet
def inverse(self, x): x = x - self.bias self.dirty = True invlin = self.inv_conv(x) logdet = -torch.slogdet( torch.squeeze(self.inv_conv.weight.data) )[1] #The log determinant of the inverse matrix is the negative of the forward one. #logdet=0 return (invlin, logdet)
def compute_log_det_per_px(self): if self.fixed: logdet = self.fixed_log_det else: if self.use_lu: logdet = th.sum(self.w_s) else: logdet = th.slogdet(self.weight.double())[1].float() return logdet
def forward(self, z): w_ = torch.inverse(self.w) assert torch.det(w_) > 0.5 assert torch.det(w_) < 1.5 forwardLogjac = torch.slogdet( w_)[1] * z.shape[-1] * z.shape[-2] * torch.ones(z.shape[0]) z = torch.matmul(z.permute([0, 2, 3, 1]), w_.reshape(1, 1, *w_.shape)).permute(0, 3, 1, 2) return z, forwardLogjac
def inverse(self, y): assert torch.det(self.w) > 0.5 assert torch.det(self.w) < 1.5 inverseLogjac = torch.slogdet( self.w)[1] * y.shape[-1] * y.shape[-2] * torch.ones(y.shape[0]) y = torch.matmul(y.permute([0, 2, 3, 1]), self.w.reshape(1, 1, *self.w.shape)).permute(0, 3, 1, 2) return y, inverseLogjac
def forward(ctx, input, C, bias=None, beta=1.0, isorth=True, eps=1e-3, margin=-0.5, gap=0.5, negative_slope=0.1): if isinstance(bias, tc.Tensor): bias_requires_grad = bias.requires_grad else: bias_requires_grad = False if bias is not None: if margin is not None: bias = bias - margin else: if margin is not None: bias = -margin Num = len(input) input.reiter() obj0 = 0.0 obj1 = 0.0 db = 0.0 if bias_requires_grad else None #tc.Tensor([0.0]) dQ = 0.0 for X, _ in input: # ipdb.set_trace() f = X.mm(C) if bias is not None: f.add_(bias) g = tc.zeros(f.size()) f[0 <= f] = 1.0 f[f < -gap] = negative_slope g[f < 0] = 1.0 f[f < 0] = eps obj0 += -1.0 / Num * f.log_().sum() dQ = dQ - 1.0 / Num * X.t().mm(g) if bias_requires_grad: db = db - 1.0 / Num * f.sum(0) K = C.size(1) if K == 1: G = C.t().mm(C) + eps obj1 = -0.5 * G.log() else: G = C.t().mm(C) + tc.diag(tc.full((C.size(1), ), eps)) sign, logdet = tc.slogdet(G) obj1 = -0.5 * logdet dQ = dQ.mm(C.t().mm(C)) if isorth: dC = dQ - C.mm(dQ.t()).mm(C) else: dC = dQ - C argnum = tc.tensor([9]) ctx.save_for_backward(dC, db, argnum) # ipdb.set_trace() return obj0 + obj1
def inverse(self, z): if self.use_lu: W = self._assemble_W() log_det = torch.sum(self.log_S) else: W = self.W log_det = torch.slogdet(self.W)[1] z_ = z @ W return z_, log_det
def inverse(self, x): x = x - self.bias invlin = F.conv1d( x, torch.unsqueeze(torch.inverse(torch.squeeze(self.linweight)), -1)) logdet = -torch.slogdet(torch.squeeze( self.linweight))[1] #/self.hparams['dim'] #print("Conv logdet " + str(logdet)) return (invlin, logdet)
def forward(self, z: torch.Tensor, mask: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: # z ~ (batch size, seq length, hidden size) out = F.linear(z, self._weight) _, logdet = torch.slogdet(self._weight) if z.dim() > 2: num = torch.einsum("b...->b", mask) logdet = logdet * num return out, logdet
def adj(A: 'Tensor[N, N]') -> 'Tensor[N, N]': """ Compute the adjugate of a matrix A. The adjugate can be used for calculating the derivative of a determinants. Function has a runtime of O(N^3). """ Ad = torch.slogdet(A) Ad = Ad[0] * torch.exp(Ad[1]) return Ad * torch.inverse(A).t()
def get_weight(self, x, reverse): ldj = torch.slogdet(self.filters)[1] * x.size(2) * x.size(3) if reverse: weight = torch.inverse(self.filters.double()).float() else: weight = self.filters weight = weight.view(self.num_channels, self.num_channels, 1, 1) return weight, ldj
def forward(ctx, input, C, bias=None, beta=1.0, isorth=True, eps=1e-6, margin=-1.0, alpha=0.8): if isinstance(bias, tc.Tensor): bias_requires_grad = bias.requires_grad else: bias_requires_grad = False if bias is not None: if margin is not None: bias = bias - margin else: if margin is not None: bias = -margin if bias is not None: bias = beta * bias C1 = beta * C Num = len(input) input.reiter() obj0 = 0.0 obj1 = 0.0 db = 0.0 if bias_requires_grad else None #tc.Tensor([0.0]) dQ = 0.0 for X, _ in input: # ipdb.set_trace() f = X.mm(C1) if bias is not None: f.add_(bias) f.sigmoid_() g = f.mul(alpha).add_(1.0 - alpha + eps) f.mul_(1.0 - f).div_(g) dQ = dQ - alpha * beta / Num * X.t().mm(f) obj0 += -1.0 / Num * g.log_().sum() if bias_requires_grad: db = db - alpha * beta / Num * f.sum(0) K = C.size(1) if K == 1: G = C.t().mm(C) + eps obj1 = -0.5 * G.log() else: G = C.t().mm(C) + tc.diag(tc.full((C.size(1), ), eps)) sign, logdet = tc.slogdet(G) obj1 = -0.5 * logdet dQ = dQ.mm(C.t().mm(C)) if isorth: dC = dQ - C.mm(dQ.t()).mm(C) else: dC = dQ - C argnum = tc.tensor([8]) ctx.save_for_backward(dC, db, argnum) return obj0 + obj1
def forward(self, X): eps = torch.randn_like(X) y = self.dense(eps) s, jac = torch.slogdet(self.dense.weight) y = torch.sigmoid(y) jac = torch.log(y).sum(1) + torch.log(1. - y).sum(1) + jac log_pdf_eps = -torch.tensor(X.shape[1] * 0.5 * np.log(2 * np.pi), device=X.device) - 0.5 * torch.sum(eps**2, dim=1) return y, -log_pdf_eps + jac, eps
def forward(self, x): ''' z = Dx + UV^Tx ldj = sum(log(abs(D))) + log(abs(det(K))) ''' z = self.d * x + torch.einsum('dr,br->bd', self.U, torch.einsum('rd,bd->br', self.V, x)) if self.bias is not None: z = z + self.bias ldj = self.d.abs().log().sum() + torch.slogdet(self.K)[1] ldj = ldj.expand([x.shape[0]]) return z, ldj
def forward(self, x, sldj, reverse=False): # TODO: may not need this if not using InvConv for fc setting. if x.ndim == 4: ldj = torch.slogdet(self.weight)[1] * x.size(2) * x.size(3) else: ldj = torch.slogdet(self.weight)[1] if reverse: weight = torch.inverse(self.weight.double()).float() sldj = sldj - ldj else: weight = self.weight sldj = sldj + ldj weight = weight.view(self.num_channels, self.num_channels, 1, 1) z = F.conv2d(x, weight) return z, sldj
def backward(self, z: torch.Tensor, mask: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: weight_inv = torch.inverse(self._weight.double()).float() # z ~ (batch size, seq length, hidden size) out = F.linear(z, weight_inv) _, logdet = torch.slogdet(weight_inv) if z.dim() > 2: num = torch.einsum("b...->b", mask) logdet = logdet * num return out, logdet
def setUp(self): self.features = 3 self.bijection = LinearLU(num_features=self.features, bias=True) L, U = self.bijection._create_lower_upper() self.weight = L @ U self.weight_inverse = torch.inverse(self.weight) _, self.logabsdet = torch.slogdet(self.weight) self.eps = 1e-5
def reverse(self, output): _, _, height, width = output.shape in_recover = F.conv2d( output, self.weight.squeeze().inverse().unsqueeze(2).unsqueeze(3)) logdet = (-height * width * torch.slogdet(self.weight.squeeze().double())[1].float()) return in_recover, logdet
def get_weight(self, input, reverse): w_shape = self.w_shape pixels = thops.pixels(input) dlogdet = torch.slogdet(self.weight)[1] * pixels if not reverse: weight = self.weight.view(w_shape[0], w_shape[1], 1, 1) else: weight = torch.inverse(self.weight.double()).float() \ .view(w_shape[0], w_shape[1], 1, 1) return weight, dlogdet
def test_jacobian(transform): x = generate_data(transform) try: y = transform(x) actual = transform.log_abs_det_jacobian(x, y) except NotImplementedError: pytest.skip('Not implemented.') # Test shape target_shape = x.shape[:x.dim() - transform.domain.event_dim] assert actual.shape == target_shape # Expand if required transform = reshape_transform(transform, x.shape) ndims = len(x.shape) event_dim = ndims - transform.domain.event_dim x_ = x.view((-1,) + x.shape[event_dim:]) n = x_.shape[0] # Reshape to squash batch dims to a single batch dim transform = reshape_transform(transform, x_.shape) # 1. Transforms with unit jacobian if isinstance(transform, ReshapeTransform) or isinstance(transform.inv, ReshapeTransform): expected = x.new_zeros(x.shape[x.dim() - transform.domain.event_dim]) expected = x.new_zeros(x.shape[x.dim() - transform.domain.event_dim]) # 2. Transforms with 0 off-diagonal elements elif transform.domain.event_dim == 0: jac = jacobian(transform, x_) # assert off-diagonal elements are zero assert torch.allclose(jac, jac.diagonal().diag_embed()) expected = jac.diagonal().abs().log().reshape(x.shape) # 3. Transforms with non-0 off-diagonal elements else: if isinstance(transform, CorrCholeskyTransform): jac = jacobian(lambda x: tril_matrix_to_vec(transform(x), diag=-1), x_) elif isinstance(transform.inv, CorrCholeskyTransform): jac = jacobian(lambda x: transform(vec_to_tril_matrix(x, diag=-1)), tril_matrix_to_vec(x_, diag=-1)) elif isinstance(transform, StickBreakingTransform): jac = jacobian(lambda x: transform(x)[..., :-1], x_) else: jac = jacobian(transform, x_) # Note that jacobian will have shape (batch_dims, y_event_dims, batch_dims, x_event_dims) # However, batches are independent so this can be converted into a (batch_dims, event_dims, event_dims) # after reshaping the event dims (see above) to give a batched square matrix whose determinant # can be computed. gather_idx_shape = list(jac.shape) gather_idx_shape[-2] = 1 gather_idxs = torch.arange(n).reshape((n,) + (1,) * (len(jac.shape) - 1)).expand(gather_idx_shape) jac = jac.gather(-2, gather_idxs).squeeze(-2) out_ndims = jac.shape[-2] jac = jac[..., :out_ndims] # Remove extra zero-valued dims (for inverse stick-breaking). expected = torch.slogdet(jac).logabsdet assert torch.allclose(actual, expected, atol=1e-5)
def backward(self, y: torch.tensor, x: torch.tensor=None, x_freqs: torch.tensor=None, require_log_probs=True, var=None, y_freqs=None, to_lat=None): # from other language to this language x_prime = y.mm(self.W) if require_log_probs: assert x is not None, x_freqs is not None log_probs = self.cal_mixture_of_gaussian_fix_var(x_prime, x, x_freqs, var, x_prime_freqs=y_freqs) _, log_abs_det = torch.slogdet(self.W) log_probs = log_probs + log_abs_det else: log_probs = torch.tensor(0) return x_prime, log_probs
def blas_lapack_ops(self): m = torch.randn(3, 3) a = torch.randn(10, 3, 4) b = torch.randn(10, 4, 3) v = torch.randn(3) return ( torch.addbmm(m, a, b), torch.addmm(torch.randn(2, 3), torch.randn(2, 3), torch.randn(3, 3)), torch.addmv(torch.randn(2), torch.randn(2, 3), torch.randn(3)), torch.addr(torch.zeros(3, 3), v, v), torch.baddbmm(m, a, b), torch.bmm(a, b), torch.chain_matmul(torch.randn(3, 3), torch.randn(3, 3), torch.randn(3, 3)), # torch.cholesky(a), # deprecated torch.cholesky_inverse(torch.randn(3, 3)), torch.cholesky_solve(torch.randn(3, 3), torch.randn(3, 3)), torch.dot(v, v), torch.eig(m), torch.geqrf(a), torch.ger(v, v), torch.inner(m, m), torch.inverse(m), torch.det(m), torch.logdet(m), torch.slogdet(m), torch.lstsq(m, m), torch.lu(m), torch.lu_solve(m, *torch.lu(m)), torch.lu_unpack(*torch.lu(m)), torch.matmul(m, m), torch.matrix_power(m, 2), # torch.matrix_rank(m), torch.matrix_exp(m), torch.mm(m, m), torch.mv(m, v), # torch.orgqr(a, m), # torch.ormqr(a, m, v), torch.outer(v, v), torch.pinverse(m), # torch.qr(a), torch.solve(m, m), torch.svd(a), # torch.svd_lowrank(a), # torch.pca_lowrank(a), # torch.symeig(a), # deprecated # torch.lobpcg(a, b), # not supported torch.trapz(m, m), torch.trapezoid(m, m), torch.cumulative_trapezoid(m, m), # torch.triangular_solve(m, m), torch.vdot(v, v), )