def kinetic_grad(self, r): """ Computes the gradient of kinetic energy w.r.t. the momentum `r`. It is equivalent to compute velocity given the momentum `r`. :param dict r: a dictionary maps site names to a tensor momentum. :returns: a dictionary maps site names to the corresponding gradient """ v = {} for ( site_names, mass_matrix_sqrt_inverse, ) in self._mass_matrix_sqrt_inverse.items(): r_flat = torch.cat( [r[site_name].reshape(-1) for site_name in site_names]) # NB: using inverse_mass_matrix as in BlockMassMatrix will cost # O(N^2 x head_size) operators and O(N^2) memory requirement; # here, we will leverage mass_matrix_sqrt_inverse to reduce the cost to # O(N x head_size^2) operators and O(N x head_size) memory requirement. r_unscaled = triu_matvecmul(mass_matrix_sqrt_inverse, r_flat) v_flat = triu_matvecmul(mass_matrix_sqrt_inverse, r_unscaled, transpose=True) # unpacking pos = 0 for site_name in site_names: next_pos = pos + r[site_name].numel() v[site_name] = v_flat[pos:next_pos].reshape(r[site_name].shape) pos = next_pos return v
def test_utilities(head_size): size = 5 cov = torch.randn(size, size) cov = torch.mm(cov, cov.t()) mask = torch.ones(size, size) mask[head_size:, head_size:] = 0.0 mask.view(-1)[::size + 1][head_size:] = 1.0 arrowhead_full = mask * cov expected = torch.flip( torch.linalg.cholesky(torch.flip(arrowhead_full, (-2, -1))), (-2, -1)) # test if those flip ops give expected upper triangular values assert_close(expected.triu(), expected) assert_close(expected.matmul(expected.t()), arrowhead_full) # test sqrt arrowhead = SymmArrowhead(cov[:head_size], cov.diag()[head_size:]) actual = sqrt(arrowhead) assert_close(actual.top, expected[:head_size]) assert_close(actual.bottom_diag, expected.diag()[head_size:]) # test triu_inverse expected = expected.inverse() actual = triu_inverse(actual) assert_close(actual.top, expected[:head_size]) assert_close(actual.bottom_diag, expected.diag()[head_size:]) # test triu_matvecmul v = torch.randn(size) assert_close(triu_matvecmul(actual, v), expected.matmul(v)) assert_close(triu_matvecmul(actual, v, transpose=True), expected.t().matmul(v)) # test triu_gram actual = triu_gram(actual) expected = (arrowhead_full.inverse() if head_size > 0 else arrowhead_full.diag().reciprocal()) assert_close(actual, expected)
def unscale(self, r): """ Computes `inv(M^{1/2}) @ r`. Note that `r` is generated from a gaussian with scale `mass_matrix_sqrt`. This method will unscale it. :param dict r: a dictionary maps site names to a tensor momentum. :returns: a dictionary maps site names to the corresponding tensor """ u = {} for site_names, mass_matrix_sqrt_inverse in self._mass_matrix_sqrt_inverse.items( ): r_flat = torch.cat( [r[site_name].reshape(-1) for site_name in site_names]) u[site_names] = triu_matvecmul(mass_matrix_sqrt_inverse, r_flat) return u
def scale(self, r_unscaled, r_prototype): """ Computes `M^{1/2} @ r_unscaled`. Note that `r` is generated from a gaussian with scale `mass_matrix_sqrt`. This method will scale it. :param dict r_unscaled: a dictionary maps site names to a tensor momentum. :param dict r_prototype: a dictionary mapes site names to prototype momentum. Those prototype values are used to get shapes of the scaled version. :returns: a dictionary maps site names to the corresponding tensor """ s = {} for site_names, mass_matrix_sqrt in self._mass_matrix_sqrt.items(): r_flat = triu_matvecmul(mass_matrix_sqrt, r_unscaled[site_names]) # unpacking pos = 0 for site_name in site_names: next_pos = pos + r_prototype[site_name].numel() s[site_name] = r_flat[pos:next_pos].reshape( r_prototype[site_name].shape) pos = next_pos return s