Beispiel #1
0
    def _util_newton_updates(self, x0, max_iter=1, xtol=None) -> Tensor:
        r"""Make `max_iter` newton updates on utility.

        This is used in `forward` to calculate and fill in gradient into tensors.
        Instead of doing utility -= H^-1 @ g, use substition method.
        See more explanation in _update_utility_derived_values.dd
        By default only need to run one iteration just to fill the the gradients.

        Args:
            x0: A `batch_size x n` dimension tensor, initial values
            max_iter: Max number of iterations
            xtol: Stop creteria. If `None`, do not stop until
                finishing `max_iter` updates
        """
        xtol = float("-Inf") if xtol is None else xtol
        dp, D, DT, sn, ch, ci = (
            self.datapoints,
            self.D,
            self.DT,
            self.std_noise,
            self.covar_chol,
            self.covar_inv,
        )
        covar = self.covar
        diff = float("Inf")
        i = 0
        x = x0
        eye = None
        while i < max_iter and diff > xtol:
            hl = self._hess_likelihood_f_sum(x, D, DT, sn)
            cov_hl = covar @ hl
            if eye is None:
                eye = torch.eye(
                    cov_hl.size(-1),
                    dtype=self.datapoints.dtype,
                    device=self.datapoints.device,
                ).expand(cov_hl.shape)
            cov_hl = cov_hl + eye  # add 1 to cov_hl
            g = self._grad_posterior_f(x, dp, D, DT, sn, ch, ci)
            cov_g = covar @ g.unsqueeze(-1)
            x_update = torch.solve(cov_g, cov_hl).solution.squeeze(-1)
            x_next = x - x_update
            diff = torch.norm(x - x_next)
            x = x_next
            i += 1
        return x
Beispiel #2
0
 def inverse_no_cache(self, inputs):
     """Cost:
         output = O(D^3 + D^2N)
         logabsdet = O(D^3)
     where:
         D = num of features
         N = num of inputs
     """
     batch_size = inputs.shape[0]
     outputs = inputs - self.bias
     outputs, lu = torch.solve(outputs.t(), self._weight)  # Linear-system solver.
     outputs = outputs.t()
     # The linear-system solver returns the LU decomposition of the weights, which we
     # can use to obtain the log absolute determinant directly.
     logabsdet = -torch.sum(torch.log(torch.abs(torch.diag(lu))))
     logabsdet = logabsdet * torch.ones(batch_size)
     return outputs, logabsdet
Beispiel #3
0
 def solve(self, v, regul=1e-8, impl='solve'):
     """
     solves v = Ax in x
     """
     # TODO: test
     if impl == 'solve':
         # TODO: reuse LU decomposition once it is computed
         inv_v, _ = torch.solve(
             v.get_flat_representation().view(-1, 1), self.data +
             regul * torch.eye(self.size(0), device=self.data.device))
         return PVector(v.layer_collection, vector_repr=inv_v[:, 0])
     elif impl == 'eigendecomposition':
         v_eigenbasis = self.project_to_diag(v)
         inv_v_eigenbasis = v_eigenbasis / (self.evals + regul)
         return self.project_from_diag(inv_v_eigenbasis)
     else:
         raise NotImplementedError
Beispiel #4
0
def barycentric_coordinates(points, ref_points, triangulation):
    """
    compute barycentric coordinates of N points wrt M 2D triangles/3D tetrahedrons
    Args: 
        points: (B,D,N)
        ref_points: (B,D,M)
        triangulation: (B,D+1,L) L triangles (D=2) or L tetrahedra (D=3) indices
    Returns:
        epsilon: (B,N,L,D+1) weights for all triangles
        simplexMask: (B,N,L) mask the enclosing triangle
        pointMask: (B,N) mask the valid points
    """
    L = triangulation.shape[-1]
    D = ref_points.shape[1]
    N = points.shape[-1]
    M = ref_points.shape[-1]
    # find enclosing triangle
    ref_points = ref_points.transpose(2, 1).contiguous()
    triangulation = triangulation.transpose(2, 1).contiguous()
    # (B,L,D+1,D)
    simplexes = torch.gather(
        ref_points.unsqueeze(1).expand(-1, L, -1, -1), 2,
        triangulation.unsqueeze(-1).expand(-1, -1, -1, ref_points.shape[-1]))
    # (B,L,D,D+1)
    simplexes = simplexes.transpose(2, 3)
    # (B,N,1,D) - (B,1,L,D) = (B,N,L,D)
    B = points.transpose(1, 2).unsqueeze(2) - simplexes[:, :, :,
                                                        -1].unsqueeze(1)
    # (B,L,D,D+1) - (B,L,D,1) = (B,L,D,D+1)
    T = (simplexes - simplexes[:, :, :, -1:])[:, :, :, :D]
    # (B,N,L,D,D)epsilon = (B,N,L,D,1), epsilon (B,N,L,D,1)
    epsilon, _ = torch.solve(B.unsqueeze(-1),
                             T.unsqueeze(1).expand(-1, N, -1, -1, -1))
    epsilon_last = 1 - torch.sum(epsilon, dim=-2, keepdim=True)
    # (B,N,L,D+1)
    epsilon = torch.cat([epsilon, epsilon_last], dim=-2).squeeze(-1)
    # (B,N,L) enclosing triangle has positive coordinates
    simplexMask = torch.all((epsilon < 1) & (epsilon > 0), dim=-1)
    # cannot be enclosed in multiple simplexes
    assert (torch.all(torch.sum(simplexMask, dim=-1) <= 1)
            ), "detected points enclosed in multiple triangles"
    # (B,N,L,D+1)
    epsilon = epsilon * simplexMask.unsqueeze(-1).to(dtype=epsilon.dtype)
    # (B,N)
    pointMask = torch.eq(torch.sum(simplexMask, dim=-1), 1)
    return epsilon, simplexMask, pointMask
Beispiel #5
0
def _solve_ABE(A: torch.Tensor, B: torch.Tensor, E: torch.Tensor):
    # A: (*BA, na, na) matrix
    # B: (*BB, na, ncols) matrix
    # E: (*BE, ncols) matrix
    na = A.shape[-1]
    BA, BB, BE = normalize_bcast_dims(A.shape[:-2], B.shape[:-2], E.shape[:-1])
    E = E.reshape(1, *BE, E.shape[-1]).transpose(0, -1)  # (ncols, *BE, 1)
    B = B.reshape(1, *BB, *B.shape[-2:]).transpose(0,
                                                   -1)  # (ncols, *BB, na, 1)

    # NOTE: The line below is very inefficient for large na and ncols
    AE = A - torch.diag_embed(E.repeat_interleave(repeats=na, dim=-1),
                              dim1=-2,
                              dim2=-1)  # (ncols, *BAE, na, na)
    r, _ = torch.solve(B, AE)  # (ncols, *BAEM, na, 1)
    r = r.transpose(0, -1).squeeze(0)  # (*BAEM, na, ncols)
    return r
Beispiel #6
0
    def backward(ctx, dx, dloss):
        """
        \subsubsection{The gradient of the game module}


        Let $g(\theta, a) := \nabla_{a} \potf (\theta, a)$. \todo{check if derivative w.r.t. $a$ makes sense}

        Let $h$ be the local mapping from params $\Theta$ to solution $A$, i.e., solving $g(\theta, h(\theta)) = 0$.
        Let $D g (\theta, a) = [J_1 (\theta, a), J_2 (\theta, a)]$, with $J_1$ corresponding to $\theta$ and $J_2$ to $a$.


        \begin{prop}
        Then, under some conditions,
        \begin{align*}
        \nabla h (\theta) = - [J_2(\theta, h(\theta))]^{-1}  J_1(\theta, h(\theta))
        \end{align*}
        \end{prop}        
        """
        theta, x = ctx.saved_variables
        sv = ctx.sv
        game = ctx.game
        other_params = ctx.other_params

        with torch.enable_grad():
            theta = theta.clone().detach()

            x = x.clone().detach()
            theta.requires_grad = True
            x.requires_grad = True

            pot = game.loss_fun(sv, x, theta, other_params)
            g = torch.autograd.grad(pot,
                                    x,
                                    create_graph=True,
                                    retain_graph=True)[0]

            J2, J1 = jacobian(g, [x, theta], create_graph=False)

            J2 = J2 + torch.diag(torch.ones(J2.shape[0]) * 1e-5)
            X, LU = torch.solve(J1, J2)
            nabla_h = -X.transpose(0, 1)

            assert not torch.isnan(nabla_h).any()
            assert not torch.isinf(nabla_h).any()

        return torch.mm(nabla_h, dx[:, None])[:, 0], None, None, None, None
Beispiel #7
0
    def update(self, mean, covariance, measurement):
        """Run Kalman filter correction step.

        Parameters
        ----------
        mean : ndarray
            The predicted state's mean vector (*, 8).
        covariance : ndarray
            The state's covariance matrix (*,8,8).
        measurement : ndarray
            The 4 dimensional measurement vector (x, y, a, h), where (x, y)
            is the center position, a the aspect ratio, and h the height of the
            bounding box.

        Returns
        -------
        (ndarray, ndarray)
            Returns the measurement-corrected state distribution.

        """

        projected_mean, projected_cov = self.project(mean, covariance)

        # Unfortunately, cholesky will randomly throw CUDA ERROR under linux GPU environment,
        # this error is from pytorch1.2 to 1.5
        # chol_factor = torch.cholesky(projected_cov, upper=False)
        # kalman_gain = torch.cholesky_solve(torch.matmul(covariance, self._update_mat).permute(0, 2, 1),
        #                                    chol_factor,
        #                                    upper=False).permute(0, 2, 1)

        # The alternative solution is theoretically slower than cholesky_solve
        kalman_gain = torch.solve(
            torch.matmul(covariance, self._update_mat).permute(0, 2, 1),
            projected_cov)[0].permute(0, 2, 1)

        # (*, 4)
        innovation = measurement.view(-1, 4) - projected_mean

        kalman_gain_t = kalman_gain.permute(0, 2, 1)
        new_mean = mean + torch.bmm(innovation.unsqueeze(1),
                                    kalman_gain_t).view(-1, 8)  # (*, 8)
        new_covariance = covariance - torch.matmul(
            torch.matmul(projected_cov.permute(0, 2, 1),
                         kalman_gain_t).permute(0, 2, 1), kalman_gain_t)

        return new_mean, new_covariance
    def _spin_mean_var(self, x, cav_mean, cav_var):
        """
        Compute spin means and variances from cavity means and variances.

        Note:
            These expressions are obtained from integrating the single-site
            partition function with a multivariate Gaussian prior. You should
            change this function is you want to play around with different
            single-site priors for the spins.
        """
        inv_var = self.spin_prior_inv_var - cav_var
        prefactor = torch.solve(batched_eye_like(inv_var), inv_var).solution
        spin_mean = torch.einsum(
            'i d e, b i d -> b i e', prefactor, (cav_mean + x)
        )
        spin_var = prefactor
        return spin_mean, spin_var
Beispiel #9
0
 def forward(self):
     ################################################################################
     # flow forwards
     self.phii = self.XI.clone().detach(
     )  # recommended way to copy construct from  a tensor
     self.It = torch.zeros((self.nt, self.nxI[0], self.nxI[1], self.nxI[2]),
                           dtype=self.dtype,
                           device=self.device)
     self.It[0] = self.I
     for t in range(self.nt):
         # apply the tform to I0
         if t > 0: self.It[t] = self.interp3(self.xI, self.I, self.phii)
         Xs = self.XI - self.dt * self.v[t]
         self.phii = self.interp3(self.xI, self.phii - self.XI, Xs) + Xs
     # apply deformation including affine
     self.Ai = torch.inverse(self.A)
     X0s = self.Ai[0, 0] * self.XJ[0] + self.Ai[
         0, 1] * self.XJ[1] + self.Ai[0, 2] * self.XJ[2] + self.Ai[0, 3]
     X1s = self.Ai[1, 0] * self.XJ[0] + self.Ai[
         1, 1] * self.XJ[1] + self.Ai[1, 2] * self.XJ[2] + self.Ai[1, 3]
     X2s = self.Ai[2, 0] * self.XJ[0] + self.Ai[
         2, 1] * self.XJ[1] + self.Ai[2, 2] * self.XJ[2] + self.Ai[2, 3]
     self.AiX = torch.stack([X0s, X1s, X2s])
     self.phiiAi = self.interp3(self.xI, self.phii - self.XI,
                                self.AiX) + self.AiX
     self.AphiI = self.interp3(self.xI, self.I, self.phiiAi)
     ################################################################################
     # calculate and apply intensity transform
     AphiIflat = torch.flatten(self.AphiI)
     Jflat = torch.flatten(self.J)
     WMflat = torch.flatten(self.WM)
     # format data into a Nxorder matrix
     B = torch.zeros((self.AphiI.numel(), self.order),
                     device=self.device,
                     dtype=self.dtype)  # B for basis functions
     for o in range(self.order):
         B[:, o] = AphiIflat**o
     BT = torch.transpose(B, 0, 1)
     BTB = torch.matmul(BT * WMflat, B)
     BTJ = torch.matmul(BT * WMflat, Jflat)
     self.coeffs, _ = torch.solve(BTJ[:, None], BTB)
     self.CA = torch.mean(self.J * (1.0 - self.WM))
     # torch.solve(B,A) solves AX=B (note order is opposite what I'd expect)
     self.fAphiI = torch.matmul(B, self.coeffs).reshape(self.nxJ)
     # for convenience set this error to a member
     self.err = self.fAphiI - self.J
Beispiel #10
0
    def test_letkf_analyser_gets_same_solution_as_hunt_07(self):
        hunt_ana_perts = []
        for gp in range(40):
            use_obs, obs_weights = self.localisation.localize_obs(
                [gp, ], self.obs_grid
            )
            use_obs = obs_weights > 0
            obs_weights = torch.from_numpy(obs_weights[use_obs]).float()
            num_obs = len(obs_weights)
            obs_cov = torch.eye(num_obs) * 0.5
            loc_perts = self.normed_perts[..., use_obs]
            loc_obs = self.normed_obs[..., use_obs]

            c_hunt, _ = torch.solve(
                loc_perts.view(1, 10, num_obs).transpose(-1, -2),
                obs_cov.view(1, num_obs, num_obs)
            )
            c_hunt = c_hunt.squeeze(0).t() * obs_weights
            prec_analysed = c_hunt @ loc_perts.t() + torch.eye(10) * 9.
            evals, evects = torch.symeig(prec_analysed, eigenvectors=True,
                                         upper=False)
            evals_inv = 1/evals
            evects_inv = evects.t()
            cov_analysed = torch.mm(evects, torch.diagflat(evals_inv))
            cov_analysed = torch.mm(cov_analysed, evects_inv)

            evals_perts = (9 * evals_inv).sqrt()
            w_perts = torch.mm(evects, torch.diagflat(evals_perts))
            w_perts = torch.mm(w_perts, evects_inv)

            w_mean = cov_analysed @ c_hunt @ loc_obs.t()
            weights = w_mean + w_perts
            tmp_ana_pert = self.state_perts[..., gp].values @ weights.numpy()
            hunt_ana_perts.append(tmp_ana_pert)

        ret_ana_perts = self.analyser.get_analysis_perts(
            torch.from_numpy(self.state_perts.values).float(),
            self.normed_perts*np.sqrt(2), self.normed_obs*np.sqrt(2),
            self.state_grid, self.obs_grid
        ).numpy()

        hunt_ana_perts = np.stack(hunt_ana_perts, axis=-1)

        np.testing.assert_almost_equal(ret_ana_perts, hunt_ana_perts,
                                       decimal=5)
def solve_interpolation(train_points, train_values, order,
                        regularization_weight):
    b, n, d = train_points.shape
    k = train_values.shape[-1]

    # First, rename variables so that the notation (c, f, w, v, A, B, etc.)
    # follows https://en.wikipedia.org/wiki/Polyharmonic_spline.
    # To account for python style guidelines we use
    # matrix_a for A and matrix_b for B.

    c = train_points
    f = train_values.float()

    matrix_a = phi(cross_squared_distance_matrix(c, c),
                   order).unsqueeze(0)  # [b, n, n]
    #     if regularization_weight > 0:
    #         batch_identity_matrix = array_ops.expand_dims(
    #           linalg_ops.eye(n, dtype=c.dtype), 0)
    #         matrix_a += regularization_weight * batch_identity_matrix

    # Append ones to the feature values for the bias term in the linear model.
    ones = torch.ones(1, dtype=train_points.dtype).view([-1, 1, 1])
    matrix_b = torch.cat((c, ones), 2).float()  # [b, n, d + 1]

    # [b, n + d + 1, n]
    left_block = torch.cat((matrix_a, torch.transpose(matrix_b, 2, 1)), 1)

    num_b_cols = matrix_b.shape[2]  # d + 1
    #     lhs_zeros = torch.zeros((b, num_b_cols, num_b_cols), dtype=train_points.dtype).float()

    # In Tensorflow, zeros are used here. Pytorch gesv fails with zeros for some reason we don't understand.
    # So instead we use very tiny randn values (variance of one, zero mean) on one side of our multiplication.
    lhs_zeros = torch.randn((b, num_b_cols, num_b_cols)) / 1e10
    right_block = torch.cat((matrix_b, lhs_zeros), 1)  # [b, n + d + 1, d + 1]
    lhs = torch.cat((left_block, right_block), 2)  # [b, n + d + 1, n + d + 1]

    rhs_zeros = torch.zeros((b, d + 1, k), dtype=train_points.dtype).float()
    rhs = torch.cat((f, rhs_zeros), 1)  # [b, n + d + 1, k]

    # Then, solve the linear system and unpack the results.
    X, LU = torch.solve(rhs, lhs)
    w = X[:, :n, :]
    v = X[:, n:, :]

    return w, v
Beispiel #12
0
    def predict(self, xtest, full_cov=True):
        x = self.x
        y = self.y
        alpha, L = self.forward(x, y)

        Kt = self.kernel(x, xtest)
        Ktt = self.kernel(xtest, xtest)

        m = Kt.transpose(-2, -1) @ alpha
        v, _ = torch.solve(Kt.unsqueeze(0), L.unsqueeze(0))
        v = v.squeeze(0)
        S = Ktt - v.transpose(-2, -1) @ v
        S = S.squeeze()
        # adding noisestd to get the variance of P(y*|f*)
        S = S + torch.eye(S.shape[0]) * self.noisestd**2
        if full_cov is False:
            S = torch.diag(S)
        return m, S
Beispiel #13
0
 def forward(self, x):
     B, D, H, W = x.shape
     N = H * W
     identity = torch.eye(N).cuda()
     # reshape x, s.t. we can use the gmp formulation as a global pooling operation
     x = x.view(B, D, N)
     x = x.permute(0, 2, 1)
     # compute the linear kernel
     K = torch.bmm(x, x.permute(0, 2, 1))
     # solve the linear system (K + lambda * I) * alpha = ones
     A = K + self.lamb * identity
     o = torch.ones(B, N, 1).cuda()
     #alphas, _ = torch.gesv(o, A) # tested using pytorch 1.0.1
     alphas, _ = torch.solve(o, A)  # tested using pytorch 1.2.0
     alphas = alphas.view(B, 1, -1)
     xi = torch.bmm(alphas, x)
     xi = xi.view(B, -1)
     return xi
Beispiel #14
0
def multivariate_normal(x, mu, L):
    """
    L is the Cholesky decomposition of the covariance.
    x and mu are either vectors (ndim=1) or matrices. In the matrix case, we
    assume independence over the *columns*: the number of rows must match the
    size of L.
    """
    d = x - mu
    if d.dim() == 1:
        d = d.unsqueeze(1)
    alpha, _ = torch.solve(d, L)
    alpha = alpha.squeeze(1)
    num_col = 1 if x.dim() == 1 else x.size(1)
    num_dims = x.size(0)
    ret = -0.5 * num_dims * num_col * float(numpy.log(2 * numpy.pi))
    ret += -num_col * torch.diag(L).log().sum()
    ret += -0.5 * (alpha**2).sum()
    return ret
Beispiel #15
0
 def forward(self, X=None, Y=None):
     if X is None:
         X = self.X
         nx = self.nx
     else:
         nx = X.shape[0]
     if Y is None:
         Y = self.Y
         ny = self.ny
     else:
         ny = Y.shape[0]
     cov_X, x_bar = self.calculate_hotelling(X)
     cov_Y, y_bar = self.calculate_hotelling(Y)
     pooled = 1 / (nx + ny - 2) * (cov_X + cov_Y)
     z = (x_bar - y_bar).unsqueeze(1)
     inv_z, _ = torch.solve(z, pooled + self.diag)
     test_statistic = -nx * ny / (nx + ny) * torch.sum(z * inv_z)
     return test_statistic
Beispiel #16
0
def _torch_solve_cast(input: torch.Tensor,
                      A: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
    """Helper function to make torch.solve work with other than fp32/64.

    The function torch.solve is only implemented for fp32/64 which makes
    impossible to be used by fp16 or others. What this function does, is cast
    input data type to fp32, apply torch.svd, and cast back to the input dtype.
    """
    assert isinstance(
        input,
        torch.Tensor), f"Input must be torch.Tensor. Got: {type(input)}."
    dtype: torch.dtype = input.dtype
    if dtype not in (torch.float32, torch.float64):
        dtype = torch.float32

    out1, out2 = torch.solve(input.to(dtype), A.to(dtype))

    return (out1.to(input.dtype), out2.to(input.dtype))
Beispiel #17
0
def tt_als_step(X, Y, R, omega, _lambda, ranks):
    for i in range(Y.shape[1]):
        seen = []
        for j in range(len(omega[i, :])):
            if omega[i, j]:
                seen.append(j)
        if len(seen) == 0:
            Y[:, i, :] = torch.zeros(ranks)
            continue
        temp_X = torch.cat([X[j, :].unsqueeze(0) for j in seen], axis=0)
        temp_R = torch.cat([R[:, j].unsqueeze(-1) for j in seen], axis=1)
        XTX = temp_X.T @ temp_X
        lambdaI = torch.eye(XTX.shape[0]) * _lambda
        y = torch.solve(
            torch.matmul(temp_R[i, :], temp_X).unsqueeze(-1),
            XTX + lambdaI).solution.squeeze()
        Y[:, i, :] = y.reshape(ranks)
    return Y
Beispiel #18
0
    def update_q_f(self):
        sigma = torch.exp(self.log_p_y_sigma)
        K = torch.stack([
            self.kern[m].K(self.X) + torch.eye(self.N) * 1e-5
            for m in range(self.M)
        ])
        # invK = torch.inverse(K)
        sqrtK = torch.sqrt(K)
        B = torch.stack(
            [torch.diag(self.q_z_pi[m] / sigma) for m in range(self.M)])

        # self.q_f_sig = torch.inverse(invK+B)
        self.q_f_sig = sqrtK.bmm(
            torch.solve(
                sqrtK,
                torch.stack([torch.eye(self.N) for _ in range(self.M)]) +
                sqrtK.bmm(B).bmm(sqrtK))[0])
        self.q_f_mean = self.q_f_sig.bmm(B).bmm(self.Y.repeat(self.M, 1, 1))
Beispiel #19
0
def compute_ME_stat(X, Y, T, X_org, Y_org, T_org, sigma, sigma0, epsilon):
    """compute a deep kernel based ME statistic."""
    # if gwidth is None or gwidth <= 0:
    #     raise ValueError('require gaussian_width > 0. Was %s.' % (str(gwidth)))
    reg = 0  #10**(-8)
    n = X.shape[0]
    J = T.shape[0]
    g = gauss_kernel(X, T, X_org, T_org, sigma, sigma0, epsilon)
    h = gauss_kernel(Y, T, Y_org, T_org, sigma, sigma0, epsilon)
    Z = g - h
    W = Z.mean(0)
    Sig = ((Z - W).transpose(1, 0)).mm((Z - W))
    if is_cuda:
        IJ = torch.eye(J).cuda()
    else:
        IJ = torch.eye(J)
    s = n * W.unsqueeze(0).mm(torch.solve(W.unsqueeze(1), Sig + reg * IJ)[0])
    return s
Beispiel #20
0
    def post(self, hyp, X, y):
        """ Generic function to compute posterior distribution.

            This function will save the posterior mean and precision matrix as
            self.m and self.A and will also update internal parameters (e.g.
            N, D and the prior covariance (Sigma) and precision (iSigma).
        """

        # make sure all variables are the right type
        X, y, hyp = self._numpy2torch(X, y, hyp)

        self.N, self.Dx = X.shape

        # ensure the number of features is specified (use 75% as a default)
        if self.Nf is None:
            self.Nf = int(0.75 * self.N)

        self.Omega = torch.zeros((self.Dx, self.Nf), dtype=torch.double)
        for f in range(self.Nf):
            self.Omega[:,f] = torch.exp(hyp[1:-1]) * \
            torch.randn((self.Dx, 1), dtype=torch.double).squeeze()

        XO = torch.mm(X, self.Omega)
        self.Phi = torch.exp(hyp[-1])/np.sqrt(self.Nf) *  \
                   torch.cat((torch.cos(XO), torch.sin(XO)), 1)

        # concatenate linear weights
        self.Phi = torch.cat((self.Phi, X), 1)
        self.D = self.Phi.shape[1]

        if self.verbose:
            print("estimating posterior ... | hyp=", hyp)

        self.A = torch.mm(torch.t(self.Phi), self.Phi) / torch.exp(2*hyp[0]) + \
                 torch.eye(self.D, dtype=torch.double)
        self.m = torch.mm(torch.solve(torch.t(self.Phi), self.A)[0], y) / \
                 torch.exp(2*hyp[0])

        # save hyperparameters
        self.hyp = hyp

        # update optimizer iteration count
        if hasattr(self, '_iterations'):
            self._iterations += 1
Beispiel #21
0
def optimize_labels(X, k, lam, mask, known_values, nmin=None, nmax=None, use_cpu=False):
    """
    Given a matrix X of features and the number of classes, optimize over the labels of the unknown observations.

    :param X: 2D array of features
    :param k: Number of classes
    :param lam: Penalty on the l2 norm of the weights in the loss function
    :param mask: Binary matrix with value 0 in entry (i,j) if it is known whether i and j belong to the same class and 1
                 else
    :param known_values: Binary matrix with value 1 in entry (i,j) if it is known that i and j belong to the same class
                         and 0 else
    :param nmin: Minimum number of points in a class
    :param nmax: Maximum number of points in a class
    :param use_cpu: Whether to perform the computations on the CPU
    :return: M: Estimated equivalence matrix YY^T
    """
    n, d = X.shape
    if nmin is None or nmax is None:
        nmin = nmax = n/k
    if use_cpu:
        orig_device = defaults.device
        defaults.device = torch.device('cpu')
        X = X.cpu()

    PiX = centering(X)
    inv_term = X.t().mm(PiX) + n*lam*torch.eye(d, device=defaults.device)
    M = 1/n*(centering(torch.eye(n, device=defaults.device)) - PiX.mm(torch.solve(PiX.t(), inv_term)[0]))

    M0 = torch.ones(n, n, device=defaults.device)/k
    done = 0
    mu_factor = 1
    while not done:
        try:
            M = matrix_balancing(M, mask, known_values, nmin, nmax, M0, mu_factor=mu_factor)
            done = 1
        except:
            mu_factor *= 2
            if mu_factor > 2**10:
                raise ValueError

    if use_cpu:
        defaults.device = orig_device

    return M
Beispiel #22
0
    def solve_euler_lagrange(self, q, v, u=None):
        """ Computes `qddot` (generalized acceleration) by solving
        the Euler-Lagrange equation (Eq 7 in the paper)
        \qddot = M^-1 (F - Cv - G)
        """

        B = q.size(0)

        L_params, dLparamsdq = self._mass_matrix_network(q)

        L = self.embed_to_L(L_params, bias=True)

        M = L @ L.transpose(-2, -1)

        pot, gradpot = self._potential_network(q)
        gradpot = gradpot.transpose(-2, -1)

        # Eqn 12 from Deep Lagrangian Networks
        dLparamsdt = (dLparamsdq @ v.unsqueeze(-1)).squeeze(-1)

        dLdt = self.embed_to_L(dLparamsdt)

        # Eqb 10 from Deep Lagrangian Networks
        dMdt = L @ dLdt.transpose(-2, -1) + dLdt @ L.transpose(-2, -1)

        dMdtv = dMdt @ v.unsqueeze(-1)
        # Eqn 14 from Deep Lagrangian Networks
        dKEdq = []
        for i in range(self._qdim):
            dLdqi = self.embed_to_L(dLparamsdq[..., i])
            _mx = L @ dLdqi.transpose(-2, -1) + dLdqi @ L.transpose(-2, -1)
            dKEdqi = ((_mx @ v.unsqueeze(-1)).squeeze(-1) * v).sum(-1)
            dKEdq.append(dKEdqi)

        dKEdq = torch.stack(dKEdq, dim=1).unsqueeze(-1)

        # Eqn 4 from Deep Lagrangian Networks
        corfor = dMdtv - 0.5 * dKEdq

        F = self._forces(q, v, u) if self._forces is not None else 0.

        qdd = torch.solve(F - corfor - gradpot, M)[0].squeeze(-1)

        return qdd
Beispiel #23
0
def trace_beam_y(z_s, x_s, y_s, z_d, x_d, y_d, d_y_ls):
    if len(d_y_ls) == 0 or y_s == y_d:
        Y = np.stack((np.array([]), np.array([]), np.array([])), axis=-1)
    else:
        A = tc.tensor([[1, 0, 0, z_s - z_d], [0, 1, 0, x_s - x_d],
                       [0, 0, 1, y_s - y_d], [0, 0, 1, 0]])
        A = A.repeat([len(d_y_ls), 1, 1])

        b1 = tc.tensor([[[z_s], [x_s], [y_s]]]).repeat([len(d_y_ls), 1, 1])
        b2 = tc.tensor([[[d_y]] for d_y in d_y_ls])
        b = tc.cat((b1, b2), dim=1)

        Y, LU = tc.solve(b, A)
        Y = np.array(Y[:, :-1].view(len(d_y_ls), 3))


#         t = Z[:,-1]

    return Y
Beispiel #24
0
def inv_nf_upd(t, y: torch.Tensor, y_guess: torch.Tensor, jacob: torch.Tensor, x_guess: torch.Tensor, d: torch.Tensor,
               idx: torch.Tensor, dtype, dtype_orig, alpha):
    y_idx = y[idx]
    y_guess_idx = y_guess[idx]
    dy = y_idx - y_guess_idx
    jacob_idx = jacob[idx]
    x_guess_idx = x_guess[idx]

    dx = torch.solve(dy.unsqueeze(-1).to(dtype), jacob_idx.to(dtype))[0].to(dtype_orig)
    x_guess.masked_scatter_(idx.unsqueeze(-1).expand_as(x_guess), x_guess_idx+alpha * dx.squeeze(-1))

    _y_guess, _, _jacob = t.forward_transform(x_guess[idx], t.auxiliary_input[idx])
    y_guess.masked_scatter_(idx.unsqueeze(-1).expand_as(y_guess), _y_guess)
    jacob.masked_scatter_(idx.unsqueeze(-1).unsqueeze(-1).expand_as(jacob), _jacob)

    d_new = (y_guess[idx] - y_idx).norm(dim=-1)
    dd = abs(d[idx] - d_new).max()
    d.masked_scatter_(idx, d_new)
    return dd
Beispiel #25
0
    def solve_euler_lagrange(self, q, v, u=None):
        """ Computes `qddot` (generalized acceleration) by solving
        the Euler-Lagrange equation (Eq 7 in the paper)
        \qddot = M^-1 (F - Cv - G)
        """
        with torch.enable_grad():
            with utils.temp_require_grad((q, v)):
                M = self.mass_matrix(q)
                Cv = self.corriolisforce(q, v, M)
                G = self.gradpotential(q)

        F = torch.zeros_like(Cv)

        if u is not None:
            F = self.generalized_force(q, v, u)

        # Solve M \qddot = F - Cv - G
        qddot = torch.solve(F - Cv - G.unsqueeze(2), M)[0].squeeze(2)
        return qddot
    def test_solve_qr_constant_noise(self, dtype=torch.float64, tol=1e-8):
        size = 50
        X = torch.rand((size, 2)).to(dtype=dtype)
        y = torch.sin(torch.sum(X, 1)).unsqueeze(-1).to(dtype=dtype)

        noise = 1e-2 * torch.ones(size, dtype=dtype)
        lazy_tsr = RBFKernel().to(dtype=dtype)(X).evaluate_kernel().add_diag(noise)
        precondition_qr, _, logdet_qr = lazy_tsr._preconditioner()

        F = lazy_tsr._piv_chol_self
        M = noise.diag() + F.matmul(F.t())

        x_exact = torch.solve(y, M)[0]
        x_qr = precondition_qr(y)

        self.assertTrue(approx_equal(x_exact, x_qr, tol))

        logdet = 2 * torch.cholesky(M).diag().log().sum(-1)
        self.assertTrue(approx_equal(logdet, logdet_qr, tol))
Beispiel #27
0
    def compute_reward(self, m, s):
        '''
        Calculating expectation of rewards, given mean and variance of state distribution, along with the target State and a weight matrix.
        Args 
        m : [1, k], mean of x
        s : [k, k], cov of x

        Returns
        M : [1, 1], mean reward weighted by W
        '''
        # see (3.43), note reward is -cost
        SW = s @ self.W
        S1 = torch.solve(self.W.T, (torch.eye(self.state_dim) + SW).T)[0].T
        reward = torch.exp(-1 / 2 *
                           (m - self.target) @ S1 @ (m - self.target).T) / (
                               torch.det(torch.eye(self.state_dim) + SW))
        if reward != reward:
            raise RuntimeError('get numeric issuses, NAN in reward!')
        return reward.view(1, 1)
Beispiel #28
0
def distance_BH(V, T):
    W, S = LBO_slim(V, T)

    n = W.shape[1]
    A = W[0].mm(((1 / (S[:, None] + 1e-6)) * W[0]))
    A[0, :] = 0
    A[:, 0] = 0
    A[0, 0] = 1

    h = torch.eye(n, dtype=W.dtype, device=W.device) - (1 / n) * torch.ones(
        n, n, dtype=W.dtype, device=W.device)
    h[0, :] = 0

    g = torch.solve(h.double(), A.double())[0].float()
    g = g - torch.sum(g, 0, keepdims=True) / n

    v = torch.diag(g)
    D = torch.sqrt(torch.relu(v[None, :] + v[:, None] - 2 * g))
    return D
Beispiel #29
0
def KRR(kernel, wake_x, sleep_x, *fs, reweight=None):
    '''
    Kerner ridge regression that takes sleep data [`sleep_x`] and potentailly more than 1 functiosn `f`
    and produces predictions given wake data `wake_x`

    kernel:     the kernel used for KRR
    wake_x:     real data, size: nwake by Dx
    sleep_x:    simulated data from generative model, size: nsleep by Dx
    fs:         sleep target functions evaluated at latent (sleep_z), size: nsleep (by f.shape[1:])
    
    The formula for KRR prediction given one target `f` from `fs` is:  
                z_pred = k(wake_x, sleep_x) @ k(sleep_x, sleep_x)^{-1} @ f
                       = G @ K^{-1} @ f

    '''

    nsleep = sleep_x.shape[0]
    for fi, f in enumerate(fs):
        assert f.shape[0] == nsleep, "%1d'th target has wrong number of nsleep"

    if hasattr(kernel, "reweight") and reweight is None:
        reweight = kernel.reweight

    if isinstance(reweight, torch.Tensor):
        m = torch.diag(1.0 / reweight)

    elif reweight:
        w = kernel.kmm_weight(sleep_x, wake_x)
        m = torch.diag(1.0 / w)
    else:
        m = torch.eye(nsleep, device=sleep_x.device)

    # regressions share the same inverse of the gram matrix K(sleep_x, sleep_x)
    K = kernel(sleep_x, sleep_x) + m * kernel.log_lam.exp()

    # similarity between sleep and wake data
    G = kernel(sleep_x, wake_x)

    # this computes G @ K^{-1}
    n = K.shape[0]  # + 0.0001 * torch.eye(n, device=sleep_x.device)
    GKinv = torch.solve(G, K)[0].t()

    return (GKinv @ f for f in fs)
Beispiel #30
0
def cKRR(obs,
         deep_gaussian_gram,
         wake_x,
         sleep_x,
         norm,
         nat,
         log_sigma,
         log_lam,
         noise=0.0,
         bs=1000):
    '''
    intended for chunking the gram matrix on GPU and compute inverse on CPU
    '''

    nsleep = sleep_x.shape[0]
    bs = min(nsleep, bs)

    S_tr = suff(wake_x)
    if noise != 0:
        sleep_x = sleep_x + torch.randn_like(sleep_x) * noise

    nb = nsleep // bs
    K = torch.zeros(nsleep, nsleep)
    for i in range(nb):
        for j in range(i, nb):
            si = slice(i * bs, (i + 1) * bs)
            sj = slice(j * bs, (j + 1) * bs)
            di = sleep_x[si]
            dj = sleep_x[sj]
            K[si, sj] = deep_gaussian_gram(di, dj, log_sigma.exp())
            if i != j:
                K[sj, si] = K[si, sj].t()

    K += torch.eye(nsleep) * log_lam.exp()

    if noise != 0:
        sleep_x = sleep_x + torch.randn_like(sleep_x) * noise
    G = deep_gaussian_gram(sleep_x, wake_x, log_sigma.exp()).cpu()

    GKinv = torch.solve(G, K)[0].t()
    Elogp = torch.mean(GKinv @ (-norm.cpu()) +
                       ((GKinv @ nat.cpu()) * S_tr.cpu()).sum(-1))
    return Elogp.to(sleep_x.device)