Esempio n. 1
0
    def __call__(self, f):
        """
        Compute the following function:
            E(f) = ||f_l - y_l||^2 + mu f^T L f + mu eps ||f||^2,

        :param f: Theano tensor
            Vector of N continuous elements.
        :return: Theano tensor
            Energy (cost) of the vector f.
        """
        # Compute the un-normalized graph Laplacian: L = D - W
        D = T.diag(self.W.sum(axis=0))
        L = D - self.W

        # Compute the label consistency
        S = T.diag(self.L)
        El = (f - self.y).T.dot(S.dot(f - self.y))

        # Compute the smoothness along the similarity graph
        I = T.eye(self.L.shape[0])
        Es = f.T.dot(L.dot(f)) + self.eps * f.T.dot(I.dot(f))

        # Compute the whole cost function
        E = El + self.mu * Es

        return E
Esempio n. 2
0
    def L_op(self, inputs, outputs, gradients):
        # Modified from theano/tensor/slinalg.py
        # No handling for on_error = 'nan'
        dz = gradients[0]
        chol_x = outputs[0]

        # this is for nan mode
        #
        # ok = ~tensor.any(tensor.isnan(chol_x))
        # chol_x = tensor.switch(ok, chol_x, 1)
        # dz = tensor.switch(ok, dz, 1)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return gpu_solve_upper_triangular(
                outer.T, gpu_solve_upper_triangular(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))
        else:
            grad = tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))

        return [grad]
Esempio n. 3
0
 def SampleKsi(d, u, mu, eps):  # icml14SBP(20)
     dn = 1.0/d
     uDnu = T.sum(u*u*dn)
     coeff = ( 1-1.0/T.sqrt(1.0+uDnu) ) / (uDnu+SMALLNUM)
     u = u.reshape((u.shape[0],1))
     R = T.diag(T.sqrt(dn)) - coeff*T.dot( T.dot(T.diag(dn),T.dot(u,u.T)), T.diag(T.sqrt(dn)) )
     return mu + T.dot(R,eps)
    def retr(self, X, Z, t=None):
        U, S, V = X
        Up, M, Vp = Z
        if t is None:
            t = 1.0
        Qu, Ru = tensor.nlinalg.qr(Up)

        # we need rq decomposition here
        Qv, Rv = tensor.nlinalg.qr(Vp[::-1].T)
        Rv = Rv.T[::-1]
        Rv = Rv[:, ::-1]
        Qv = Qv.T[::-1]

        # now we have rq decomposition (Rv @ Qv = Z.Vp)
        #Rv, Qv = rq(Z.Vp, mode='economic')


        zero_block = tensor.zeros((Ru.shape[0], Rv.shape[1]))
        block_mat = tensor.stack(
            (
                tensor.stack((S + t * M, t * Rv), 1).reshape((Rv.shape[0], -1)),
                tensor.stack((t * Ru, zero_block), 1).reshape((Ru.shape[0], -1))
            )
        ).reshape((-1, Ru.shape[1] + Rv.shape[1]))

        Ut, St, Vt = tensor.nlinalg.svd(block_mat, full_matrices=False)

        U_res = tensor.stack((U, Qu), 1).reshape((Qu.shape[0], -1)).dot(Ut[:, :self._k])
        V_res = Vt[:self._k, :].dot(tensor.stack((V, Qv), 0).reshape((-1, Qv.shape[1])))
        # add some machinery eps to get a slightly perturbed element of a manifold
        # even if we have some zeros in S
        S_res = tensor.diag(St[:self._k]) + tensor.diag(np.spacing(1) * tensor.ones(self._k))
        return (U_res, S_res, V_res)
Esempio n. 5
0
    def retr(self, X, Z, t=None):
        if t is None:
            t = 1.0
        Qu, Ru = tensor.nlinalg.QRFull(Z.Up)

        # we need rq decomposition here
        Qv, Rv = tensor.nlinalg.QRFull(Z.Vp[::-1].T)
        Rv = Rv.T[::-1]
        Rv[:, :] = Rv[:, ::-1]
        Qv = Qv.T[::-1]

        # now we have rq decomposition (Rv @ Qv = Z.Vp)
        #Rv, Qv = rq(Z.Vp, mode='economic')


        zero_block = tensor.zeros((Ru.shape[0], Rv.shape[1]))
        block_mat = tensor.stack(
            (
                tensor.stack((X.S + t * Z.M, t * Rv), 1).reshape((Rv.shape[0], -1)),
                tensor.stack((t * Ru, zero_block), 1).reshape((Ru.shape[0], -1))
            )
        ).reshape((-1, Ru.shape[1] + Rv.shape[1]))

        Ut, St, Vt = tensor.nlinalg.svd(block_mat, full_matrices=False)

        U = tensor.stack((X.U, Qu), 1).reshape((Qu.shape[0], -1)).dot(Ut[:, :self._k])
        V = Vt[:self._k, :].dot(tensor.stack((X.V, Qv), 0).reshape((-1, Qv.shape[1])))
        # add some machinery eps to get a slightly perturbed element of a manifold
        # even if we have some zeros in S
        S = tensor.diag(St[:self._k]) + tensor.diag(np.spacing(1) * tensor.ones(self._k))
        return ManifoldElementShared.from_vars((U, S, V), shape=(self._m, self._n), r=self._k)
    def ehess2rhess(self, X, egrad, ehess, H):
        # Euclidean part
        rhess = self.proj(X, ehess)
        Sinv = tensor.diag(1.0 / tensor.diag(X.S))

        # Curvature part
        T = self.apply_ambient(egrad, H.Vp.T).dot(Sinv)
        rhess.Up += (T - X.U.dot(X.U.T.dot(T)))
        T = self.apply_ambient_transpose(egrad, H.Up).dot(Sinv)
        rhess.Vp += (T - X.V.T.dot(X.V.dot(T))).T
        return rhess
    def __call__(self, A, b, inference=False):
        dA = T.diagonal(A)
        D = T.diag(dA)
        R = A - D

        iD = T.diag(1.0 / dA)

        x = T.zeros_like(b)
        for i in range(self.iterations):
            x = iD.dot(b - R.dot(x))

        return x
Esempio n. 8
0
    def _global_error(self, targetM, i, lastM):
        mask = T.neq(self._y[self._set[:, 1]], self._y[self._set[:, 2]])
        f = T.nnet.sigmoid # T.tanh 
        g = lambda x, y: x*(1-y) #lambda x: T.maximum(x, 0)
        # g(lst_prediction - cur_prediction) 
        # f(T.diag(lossil - lossij))

        if i == 0:
            # pull_error for global 0
            pull_error = 0.
            ivectors = self._stackx[:, i, :][self._neighborpairs[:, 0]]
            jvectors = self._stackx[:, i, :][self._neighborpairs[:, 1]]
            diffv = ivectors - jvectors
            pull_error = linalg.trace(diffv.dot(targetM).dot(diffv.T))
        else:
            ivectors = self._stackx[:, i, :][self._neighborpairs[:, 0]]
            jvectors = self._stackx[:, i, :][self._neighborpairs[:, 1]]
            diffv1 = ivectors - jvectors
            distMcur = diffv1.dot(targetM).dot(diffv1.T)
#           ivectors = self._stackx[:, i-1, :][self._neighborpairs[:, 0]]
#           jvectors = self._stackx[:, i-1, :][self._neighborpairs[:, 1]]
#           diffv2 = ivectors - jvectors
#           distMlast = diffv2.dot(lastM).dot(diffv2.T)
            pull_error = linalg.trace(T.maximum(distMcur, 0))


        push_error = 0.0
        ivectors = self._stackx[:, i, :][self._set[:, 0]]
        jvectors = self._stackx[:, i, :][self._set[:, 1]]
        lvectors = self._stackx[:, i, :][self._set[:, 2]]
        diffij = ivectors - jvectors
        diffil = ivectors - lvectors
        lossij = diffij.dot(targetM).dot(diffij.T)
        lossil = diffil.dot(targetM).dot(diffil.T)
        #cur_prediction = T.diag(lossij - lossil)
        cur_prediction = f(T.diag(lossil - lossij))

        ivectors = self._stackx[:, i-1, :][self._set[:, 0]]
        jvectors = self._stackx[:, i-1, :][self._set[:, 1]]
        lvectors = self._stackx[:, i-1, :][self._set[:, 2]]
        diffij = ivectors - jvectors
        diffil = ivectors - lvectors
        if i == 0:
            lossij = diffij.dot(diffij.T)
            lossil = diffil.dot(diffil.T)
        else:
            lossij = diffij.dot(lastM).dot(diffij.T)
            lossil = diffil.dot(lastM).dot(diffil.T)
        lst_prediction = f(T.diag(lossil - lossij))
        push_error = T.sum(mask*(g(lst_prediction, cur_prediction)))


        return pull_error, push_error 
    def from_partial_old(self, X, dX):
        eps = 1e-10#np.spacing(1)
        U, S, V = X
        dU, dS, dV = dX
        S = tensor.diag(S)
        S_pinv = tensor.switch(tensor.gt(abs(S), eps), 1.0 / S, 0.0)
        S_pinv = tensor.diag(S_pinv)
        ZV = dU.dot(S_pinv)
        UtZV = dS
        ZtU = S_pinv.dot(dV)

        Zproj = (ZV - U.dot(UtZV), UtZV, ZtU - (UtZV.dot(V)))
        return Zproj
Esempio n. 10
0
def diagCholInvLogDet_fromDiag(diag_vec, name):

    diag_mat = T.diag(diag_vec.flatten())
    inv = T.diag(1.0 / diag_vec.flatten())
    chol = T.diag(T.sqrt(diag_vec.flatten()))
    logDet = T.sum(T.log(diag_vec.flatten()))  # scalar

    diag_mat.name = name
    chol.name = "c" + name
    inv.name = "i" + name
    logDet.name = "logDet" + name

    return (diag_mat, chol, inv, logDet)
Esempio n. 11
0
def diagCholInvLogDet_fromLogDiag(logdiag, name):

    diag = T.diag(T.exp(logdiag.flatten()))
    inv = T.diag(T.exp(-logdiag.flatten()))
    chol = T.diag(T.exp(0.5 * logdiag.flatten()))
    logDet = T.sum(logdiag)  # scalar

    diag.name = name
    chol.name = "c" + name
    inv.name = "i" + name
    logDet.name = "logDet" + name

    return (diag, chol, inv, logDet)
Esempio n. 12
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)

        # Replace the cholesky decomposition with 1 if there are nans
        # or solve_upper_triangular will throw a ValueError.
        if self.on_error == 'nan':
            ok = ~tensor.any(tensor.isnan(chol_x))
            chol_x = tensor.switch(ok, chol_x, 1)
            dz = tensor.switch(ok, dz, 1)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return solve_upper_triangular(
                outer.T, solve_upper_triangular(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))
        else:
            grad = tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))

        if self.on_error == 'nan':
            return [tensor.switch(ok, grad, np.nan)]
        else:
            return [grad]
Esempio n. 13
0
 def recurrence(x_t, h_tm1, c_tm1):
     i_t = TT.nnet.sigmoid(TT.dot(x_t, W_xi) +
                           TT.dot(h_tm1, W_hi) +
                           TT.dot(c_tm1, TT.diag(W_ci)) + b_i)
     f_t = TT.nnet.sigmoid(TT.dot(x_t, W_xf) +
                           TT.dot(h_tm1, W_hf) +
                           TT.dot(c_tm1, TT.diag(W_cf)) + b_f)
     c_t = f_t * c_tm1 + i_t * TT.tanh(TT.dot(x_t, W_xc) +
                                       TT.dot(h_tm1, W_hc) + b_c)
     o_t = TT.nnet.sigmoid(TT.dot(x_t, W_xo) +
                           TT.dot(h_tm1, W_ho) +
                           TT.dot(c_t, TT.diag(W_co)) + b_o)
     h_t = o_t * TT.tanh(c_t)
     return h_t, c_t
    def log_p_y_I_zA(self):

        sum_y_outers = T.sum(self.Y**2)
        sum_z_IBP_mean_phi_y = T.sum( T.dot( (T.dot(self.phi_IBP, self.Y.T)).T,self.z_IBP_mean ) )
        # sum_z_IBP_mean_phi_outer = T.tril(T.dot(z_IBP_mean.T, z_IBP_mean)) * T.tril()
        # sum_z_IBP_mean_phi_Phi = T.sum( T.dot(z_IBP_mean.T, (self.Phi_traces+T.sum(self.phi_IBP**2, 1)) )  )
        sum_2ndOrder_term = T.sum( T.dot(self.z_IBP_samp.T, T.dot(T.dot(self.phi_IBP, self.phi_IBP.T)
                          + T.diag(T.diag(self.get_tensor_traces_scan(self.Phi_IBP))), self.z_IBP_samp)) )

        term = -0.5*self.D*self.B*(log2pi*self.sigma_y**2) \
             -0.5*(self.sigma_y**-2)*(sum_y_outers -2*sum_z_IBP_mean_phi_y \
                    + sum_2ndOrder_term)

        return term
Esempio n. 15
0
 def get_output_for(self, input, **kwargs):
     xin_shape = input.shape
     if input.ndim > 2:
         # if the input has more than two dimensions, flatten it into a
         # batch of feature vectors.
         input = input.flatten(2)
     activation = T.zeros((input.shape[0], self.shape1[1] * self.shape2[1]))
     s = T.diag(T.sqrt(T.diag(self.S)))
     u = self.U.dot(s)
     w = s.dot(self.V)
     for i in range(self.manifold._k):
         activation += apply_mat_to_kron(input,
                             u[:, i].reshape((self.shape1[::-1])).T,
                             w[i, :].reshape((self.shape2[::-1])).T)
     return activation
Esempio n. 16
0
def _compile_func():
    beta = T.vector('beta')
    b = T.scalar('b')
    X = T.matrix('X')
    y = T.vector('y')
    C = T.scalar('C')
    params = [beta, b, X, y, C]
    cost = 0.5 * (T.dot(beta, beta) + b * b) + C * T.sum(
        T.nnet.softplus(
            -T.dot(T.diag(y), T.dot(X, beta) + b)
        )
    )
    # Function computing in one go the cost, its gradient
    # with regard to beta and with regard to the bias.
    cost_grad = theano.function(params,[
        cost,
        T.grad(cost, beta),
        T.grad(cost, b)
    ])

    # Function for computing element-wise sigmoid, used for
    # prediction.
    log_predict = theano.function(
        [beta, b, X],
        T.nnet.sigmoid(b + T.dot(X, beta)),
        on_unused_input='warn'
    )

    return (cost_grad, log_predict)
Esempio n. 17
0
def propagate(f, l, R, mu, eps):
    # The similarity matrix W is a linear combination of the slices in R
    W = T.tensordot(R, mu, axes=1)

    # The following indices correspond to labeled and unlabeled examples
    labeled = T.eq(l, 1).nonzero()
    unlabeled = T.eq(l, 0).nonzero()

    # Calculating the graph Laplacian of W
    D = T.diag(W.sum(axis=0))
    L = D - W

    # Computing L_UU (the Laplacian over unlabeled examples)
    L_UU = L[unlabeled][:, unlabeled][:, 0, :]

    # Computing the inverse of the (regularized) Laplacian iA = (L_UU + epsI)^-1
    epsI = eps * T.eye(L_UU.shape[0])
    rL_UU = L_UU + epsI
    iA = nlinalg.matrix_inverse(rL_UU)

    # Computing W_UL (the similarity matrix between unlabeled and labeled examples)
    W_UL = W[unlabeled][:, labeled][:, 0, :]
    f_L = f[labeled]

    # f* = (L_UU + epsI)^-1 W_UL f_L
    f_star = iA.dot(W_UL.dot(f_L))

    return f_star
Esempio n. 18
0
 def dot(self, other):
     if isinstance(other, ManifoldElement):
         mid = self.S.dot(self.V.dot(other.U)).dot(other.S)
         U, S, V = tensor.nlinalg.svd(mid, full_matrices=False)
         return ManifoldElement(self.U.dot(U), tensor.diag(self.S), V.dot(self.V))
     else:
         raise ValueError('dot must be performed on ManifoldElements.')
Esempio n. 19
0
def censor_updates(updates):
    
    w = updates[1][0]
    updated_w = updates[1][1]
    constrained_w = T.dot(updated_w, T.diag(1 / T.sqrt(T.sum(updated_w ** 2, axis=0))))
    new_update = [updates[0], (w, constrained_w)]
    return new_update   
Esempio n. 20
0
def check_jacobian_det(transform, domain,
                       constructor=tt.dscalar,
                       test=0,
                       make_comparable=None,
                       elemwise=False):
    y = constructor('y')
    y.tag.test_value = test

    x = transform.backward(y)
    if make_comparable:
        x = make_comparable(x)

    if not elemwise:
        jac = tt.log(tt.nlinalg.det(jacobian(x, [y])))
    else:
        jac = tt.log(tt.abs_(tt.diag(jacobian(x, [y]))))

    # ljd = log jacobian det
    actual_ljd = theano.function([y], jac)

    computed_ljd = theano.function([y], tt.as_tensor_variable(
        transform.jacobian_det(y)), on_unused_input='ignore')

    for yval in domain.vals:
        close_to(
            actual_ljd(yval),
            computed_ljd(yval), tol)
Esempio n. 21
0
def likelihood(f, l, R, mu, eps, sigma2, lambda_1=1e-4):
    # The similarity matrix W is a linear combination of the slices in R
    W = T.tensordot(R, mu, axes=1)

    # The following indices correspond to labeled and unlabeled examples
    labeled = T.eq(l, 1).nonzero()

    # Calculating the graph Laplacian of W
    D = T.diag(W.sum(axis=0))
    L = D - W

    # The Covariance (or Kernel) matrix is the inverse of the (regularized) Laplacian
    epsI = eps * T.eye(L.shape[0])
    rL = L + epsI
    Sigma = nlinalg.matrix_inverse(rL)

    # The marginal density of labeled examples uses Sigma_LL as covariance (sub-)matrix
    Sigma_LL = Sigma[labeled][:, labeled][:, 0, :]

    # We also consider additive Gaussian noise with variance sigma2
    K_L = Sigma_LL + (sigma2 * T.eye(Sigma_LL.shape[0]))

    # Calculating the inverse and the determinant of K_L
    iK_L = nlinalg.matrix_inverse(K_L)
    dK_L = nlinalg.det(K_L)

    f_L = f[labeled]

    # The (L1-regularized) log-likelihood is given by the summation of the following four terms
    term_A = - (1 / 2) * f_L.dot(iK_L.dot(f_L))
    term_B = - (1 / 2) * T.log(dK_L)
    term_C = - (1 / 2) * T.log(2 * np.pi)
    term_D = - lambda_1 * T.sum(abs(mu))

    return term_A + term_B + term_C + term_D
Esempio n. 22
0
 def _build_marginal_likelihood_logp(self, y, X, Xu, sigma):
     sigma2 = tt.square(sigma)
     Kuu = self.cov_func(Xu)
     Kuf = self.cov_func(Xu, X)
     Luu = cholesky(stabilize(Kuu))
     A = solve_lower(Luu, Kuf)
     Qffd = tt.sum(A * A, 0)
     if self.approx == "FITC":
         Kffd = self.cov_func(X, diag=True)
         Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
         trace = 0.0
     elif self.approx == "VFE":
         Lamd = tt.ones_like(Qffd) * sigma2
         trace = ((1.0 / (2.0 * sigma2)) *
                  (tt.sum(self.cov_func(X, diag=True)) -
                   tt.sum(tt.sum(A * A, 0))))
     else:  # DTC
         Lamd = tt.ones_like(Qffd) * sigma2
         trace = 0.0
     A_l = A / Lamd
     L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A)))
     r = y - self.mean_func(X)
     r_l = r / Lamd
     c = solve_lower(L_B, tt.dot(A, r_l))
     constant = 0.5 * X.shape[0] * tt.log(2.0 * np.pi)
     logdet = 0.5 * tt.sum(tt.log(Lamd)) + tt.sum(tt.log(tt.diag(L_B)))
     quadratic = 0.5 * (tt.dot(r, r_l) - tt.dot(c, c))
     return -1.0 * (constant + logdet + quadratic + trace)
Esempio n. 23
0
def mvnorm_logpdf(x, mu = None, Li = None):
    """
    Parameters
    ++++++++++
    mu - mean of MVN, if not given assume zero mean
    Li - inverse of lower cholesky
    """
    
    import autograd.numpy as T
    dim = Li.shape[0]
    Ki = np.dot(Li.T, Li)
    #determinant is just multiplication of diagonal elements of cholesky
    logdet = 2*T.log(1./T.diag(Li)).sum()
    lpdf_const = -0.5 * (dim * T.log(2 * np.pi) + logdet)
    if mu is None:
        d = T.reshape(x, (dim, 1))
    else:
        d = (x - mu.reshape((1 ,dim))).T

    Ki_d = T.dot(Ki, d)        #vector
    
    res_pdf = (lpdf_const - 0.5 * diag_dot(d.T, Ki_d)).T
    if res_pdf.size == 1:
        res_pdf = res_pdf[0]
    return res_pdf 
Esempio n. 24
0
    def log_likelihood(self):
        Users = self.U[:, :-1]
        Middle = self.S
        Items = self.V[:-1, :]
        UserBiases = self.U[:, -1].reshape((-1, 1))
        ItemBiases = self.V[-1, :].reshape((-1, 1))

        A = T.dot(T.dot(self.U[:, :-1], self.S[:-1, :-1]), self.V[:-1, :])
        A = T.inc_subtensor(A[:, :], UserBiases * T.sqrt(self.S[-1, -1]))
        A = T.inc_subtensor(A[:, :], ItemBiases.T * T.sqrt(self.S[-1, -1]))
        B = A * self.counts
        loglik = T.sum(B)

        A = T.exp(A)
        A += 1
        A = T.log(A)

        A = (self.counts + 1) * A
        loglik -= T.sum(A)

        # L2 regularization
        loglik -= 0.5 * self.reg_param * T.sum(T.square(T.diag(self.S)[:-1]))

        # Return negation of LogLikelihood cause we will minimize cost
        return -loglik
 def log_mvn(self, y, mean,beta):#対角ノイズ、YはN×Dのデータ,それの正規分布の対数尤度
     N = y.shape[0]
     D = y.shape[1]
     
     LL, updates = theano.scan(fn=lambda a: -0.5 *  D * T.sum(T.log(2 * np.pi*(1/T.diag(beta)))) - 0.5 * T.sum(T.dot(beta,(y - a)**2)),
                           sequences=[mean])
     return T.mean(LL)
Esempio n. 26
0
 def likelihood_domain(self,target,Xlabel):
     self.beta = T.exp(self.ls)
     betaI=T.diag(T.dot(Xlabel,self.beta))
     Covariance = betaI       
     LL = self.log_mvn(target, self.output, Covariance)# - 0.5*T.sum(T.dot(betaI,Ktilda))      
     
     return LL
Esempio n. 27
0
def SPD_Project(mat):
    # force symmetric
    mat = (mat+mat.T)/2.0
    eig, eigv = linalg.eig(mat)
    eig = T.maximum(eig, 0)
    eig = T.diag(eig)
    return eigv.dot(eig).dot(eigv.T)
Esempio n. 28
0
def l2ls_learn_basis_dual(X, S, c):
    tX = T.matrix('X')
    tS = T.matrix('S')
    tc = T.scalar('c')
    tlambdas = T.vector('lambdas')

    tXST = T.dot(tX, tS.T)
    tSSTetc = la.matrix_inverse(T.dot(tS, tS.T) + T.diag(tlambdas))

    objective = -(T.dot(tX, tX.T).trace()
                  - reduce(T.dot, [tXST, tSSTetc, tXST.T]).trace()
                  - tc*tlambdas.sum())

    objective_fn = theano.function([tlambdas],
                                   objective,
                                   givens={tX: X, tS: S, tc: c})
    objective_grad_fn = theano.function([tlambdas],
                                        T.grad(objective, tlambdas),
                                        givens={tX: X, tS: S, tc: c})

    initial_lambdas = 10*np.abs(np.random.random((S.shape[0], 1)))
    output = scipy.optimize.fmin_cg(f=objective_fn,
                                    fprime=objective_grad_fn,
                                    x0=initial_lambdas,
                                    maxiter=100,
                                    full_output=True)
    logging.debug("optimizer stats %s" % (output[1:],))
    logging.debug("optimizer lambdas %s" % output[0])

    lambdas = output[0]
    B = np.dot(np.linalg.inv(np.dot(S, S.T) + np.diag(lambdas)),
               np.dot(S, X.T)).T

    return B
Esempio n. 29
0
 def _theano_project_sd(self, mat):
     # force symmetric
     mat = (mat+mat.T)/2.0
     eig, eigv = linalg.eig(mat)
     eig = T.maximum(eig, 0)
     eig = T.diag(eig)
     return eigv.dot(eig).dot(eigv.T) 
Esempio n. 30
0
def forward_batch_step(x_t, H_mask, H_tm1):
    H = TT.dot(W_rec,H_tm1) + W_in[:,x_t]
    H_t = TT.nnet.sigmoid(H)
    Y_t = TT.nnet.softmax(TT.transpose(TT.dot(W_out, H_t)))
    Y_t = -TT.log2(Y_t)
    Y_t = TT.dot(TT.transpose(Y_t), TT.diag(H_mask))
    return [H_t, Y_t]
Esempio n. 31
0
 def make_model(cls):
     with pm.Model() as model:
         sd_mu = np.array([1, 2, 3, 4, 5])
         sd_dist = pm.Lognormal.dist(mu=sd_mu, sigma=sd_mu / 10., shape=5)
         chol_packed = pm.LKJCholeskyCov('chol_packed', eta=3, n=5, sd_dist=sd_dist)
         chol = pm.expand_packed_triangular(5, chol_packed, lower=True)
         cov = tt.dot(chol, chol.T)
         stds = tt.sqrt(tt.diag(cov))
         pm.Deterministic('log_stds', tt.log(stds))
         corr = cov / stds[None, :] / stds[:, None]
         corr_entries_unit = (corr[np.tril_indices(5, -1)] + 1) / 2
         pm.Deterministic('corr_entries_unit', corr_entries_unit)
     return model
Esempio n. 32
0
 def merge_factors(self, X, Z=None, diag=False):
     factor_list = []
     for factor in self.factor_list:
         if isinstance(factor, Covariance):
             factor_list.append(factor(X, Z, diag))
         elif hasattr(factor, "ndim"):
             if diag:
                 factor_list.append(tt.diag(factor))
             else:
                 factor_list.append(factor)
         else:
             factor_list.append(factor)
     return factor_list
Esempio n. 33
0
def lanczos(linear_op, z, m, batch_size):
    s = z.norm(2, axis=1)
    v = z / s.dimshuffle(0, 'x')

    alpha = []
    beta = []
    V = []
    V.append(v)
    v_curr = v
    b = None
    v_prev = None

    for j in xrange(m):
        if j == 0:
            r = linear_op(v_curr)
        else:
            r = linear_op(v_curr) - b.dimshuffle(0, 'x') * v_prev
        a = T.batched_dot(v_curr, r)
        r = r - a.dimshuffle(0, 'x') * v_curr
        b = r.norm(2, axis=1)
        v_prev = v_curr
        v_curr = r / b.dimshuffle(0, 'x')
        alpha.append(a)
        if j < m - 1:
            V.append(v_curr)
            beta.append(b)

    Az_list = []
    for idx in xrange(batch_size):
        alpha_diag = T.diag(T.stacklists([a_[idx] for a_ in alpha]))
        beta_diag = T.diag(T.stacklists([b_[idx] for b_ in beta] + [0]))
        M = alpha_diag + T.roll(beta_diag, 1, 0) + T.roll(beta_diag, 1, 1)
        V_matrix = T.stacklists([v_[idx] for v_ in V]).T
        approx_sqrt = s[idx] * V_matrix.dot(theano_sqrtm(M)[:, 0])
        Az_list.append(approx_sqrt)

    Azs = T.stacklists(Az_list)

    return Azs
Esempio n. 34
0
def get_att(X, index):
    """
    Input attention, single sentence.
    Args:
        X: tensor, shape=[n, embed_dim]
        index: int, target index
    Return:
        tensor, shape=[n, embed_dim]
    """
    result, update = theano.scan(lambda v, u: T.dot(v, T.transpose(u)), sequences=X, non_sequences=X[index])
    result_soft = T.nnet.softmax(result)
    A = T.diag(T.flatten(result_soft))  # n×n
    return T.dot(A, X)  # [n, embed_dim]
Esempio n. 35
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return solve_upper_triangular(
                outer.T,
                solve_upper_triangular(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            return [tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))]
        else:
            return [tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))]
def _create_theano_likelihood_graph(data, t, ind_t, n_time, n_inducing_time, approx='FITC'):
    """ 
    Here we use theano to compile a comutational graph defining our discrepancy
    likelihood. Note it just compiles this graph as a C program which will
    get successively called in pints. 
    Thus all the variables defined here are simply placeholders.
    """
    rho = tt.dscalar('rho')
    ker_sigma = tt.dscalar('ker_sigma')
    sigma = tt.dscalar('sigma')
    time = theano.tensor.as_tensor_variable(t)
    inducing_time = theano.tensor.as_tensor_variable(ind_t)
    y = theano.tensor.as_tensor_variable(data)
    current = tt.dvector('current')

    cov_func = RbfKernel(rho, ker_sigma)

    sigma2 = tt.square(sigma)
    Kuu = cov_func(inducing_time)
    Kuf = cov_func(inducing_time, time)
 
    Luu = cholesky(stabilize(Kuu))
    A = solve_lower(Luu, Kuf)
    Qffd = tt.sum(A * A, 0)
    
    if approx == 'FITC':
        Kffd = cov_func(time, diag=True)
        Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
        trace = 0.0
    elif approx == 'VFE':
        Lamd = tt.ones_like(Qffd) * sigma2
        trace = ((1.0 / (2.0 * sigma2)) *
                    (tt.sum(cov_func(time, diag=True)) -
                    tt.sum(tt.sum(A * A, 0))))
    else:  # DTC
        Lamd = tt.ones_like(Qffd) * sigma2
        trace = 0.0
    
    A_l = A / Lamd
    L_B = cholesky(tt.eye(n_inducing_time) + tt.dot(A_l, tt.transpose(A)))
    r = y - current
    
    r_l = r / Lamd
    
    c = solve_lower(L_B, tt.dot(A, r_l))
    
    constant = 0.5 * n_time * tt.log(2.0 * np.pi)
    logdet = 0.5 * tt.sum(tt.log(Lamd)) + tt.sum(tt.log(tt.diag(L_B)))
    quadratic = 0.5 * (tt.dot(r, r_l) - tt.dot(c, c))
    ll = -1.0 * (constant + logdet + quadratic + trace)  
    return theano.function([current,rho,ker_sigma,sigma],ll,on_unused_input='ignore')
Esempio n. 37
0
    def get_model(self, lengthscale_trf, lengthscale_p_trf, sn_trf, sf_trf, S,
                  MU, SIGMA_trf, U, b, X, Q, D, N, M):

        EPhi, EPhiTPhi = self.get_EPhi(lengthscale_trf, lengthscale_p_trf,
                                       sf_trf, S, MU, SIGMA_trf, U, b, N, M)
        EPhiTPhi_reg = self.reg_EPhi(lengthscale_trf, lengthscale_p_trf,
                                     sf_trf, S, MU, SIGMA_trf, U, b, N, M, D)
        K_MM = self.kernel_gauss(U, lengthscale_trf, lengthscale_p_trf, sf_trf)
        XT_EPhi = X.T.dot(EPhi)
        opt_A_mean, cholSigInv, cholK_MM, InvK_MM = self.get_opt_A(
            sn_trf, EPhiTPhi, XT_EPhi, K_MM)

        LL = -0.5 * (D * (
            (N - M) * T.log(sn_trf) + N * np.log(2 * np.pi) - T.sum(2 * T.log(
                T.diag(cholK_MM))) + T.sum(2 * T.log(T.diag(cholSigInv))) +
            (N * sf_trf - T.sum(T.diag(InvK_MM.dot(EPhiTPhi_reg)))) / sn_trf) +
                     T.sum(X**2) / sn_trf -
                     T.sum(opt_A_mean.T * XT_EPhi) / sn_trf)

        KL_X = -0.5 * (T.log(2 * np.pi * SIGMA_trf) + 1).sum() + 0.5 * (np.log(
            2 * np.pi)) + 0.5 * (SIGMA_trf + MU**2).sum()

        return LL, KL_X
Esempio n. 38
0
    def solve(self, X, flux, cho_C, mu, LInv):
        """
        Compute the maximum a posteriori (MAP) prediction for the
        spherical harmonic coefficients of a map given a flux timeseries.

        Args:
            X (matrix): The flux design matrix.
            flux (array): The flux timeseries.
            cho_C (scalar/vector/matrix): The lower cholesky factorization
                of the data covariance.
            mu (array): The prior mean of the spherical harmonic coefficients.
            LInv (scalar/vector/matrix): The inverse prior covariance of the
                spherical harmonic coefficients.

        Returns:
            The vector of spherical harmonic coefficients corresponding to the
            MAP solution and the Cholesky factorization of the corresponding
            covariance matrix.

        """
        # Compute C^-1 . X
        if cho_C.ndim == 0:
            CInvX = X / cho_C**2
        elif cho_C.ndim == 1:
            CInvX = tt.dot(tt.diag(1 / cho_C**2), X)
        else:
            CInvX = _cho_solve(cho_C, X)

        # Compute W = X^T . C^-1 . X + L^-1
        W = tt.dot(tt.transpose(X), CInvX)
        if LInv.ndim == 0:
            W = tt.inc_subtensor(
                W[tuple((tt.arange(W.shape[0]), tt.arange(W.shape[0])))], LInv)
            LInvmu = mu * LInv
        elif LInv.ndim == 1:
            W = tt.inc_subtensor(
                W[tuple((tt.arange(W.shape[0]), tt.arange(W.shape[0])))], LInv)
            LInvmu = mu * LInv
        else:
            W += LInv
            LInvmu = tt.dot(LInv, mu)

        # Compute the max like y and its covariance matrix
        cho_W = sla.cholesky(W)
        M = _cho_solve(cho_W, tt.transpose(CInvX))
        yhat = tt.dot(M, flux) + _cho_solve(cho_W, LInvmu)
        ycov = _cho_solve(cho_W, tt.eye(cho_W.shape[0]))
        cho_ycov = sla.cholesky(ycov)

        return yhat, cho_ycov
Esempio n. 39
0
def GrabProbs(classProbs, target, gRange=None):
    if classProbs.ndim > 2:
        classProbs = classProbs.reshape((classProbs.shape[0] * classProbs.shape[1], classProbs.shape[2]))
    else:
        classProbs = classProbs
    
    if target.ndim > 1:
        tflat = target.flatten()
    else:
        tflat = target

    ### Hack for Theano, much faster than [x, y] indexing 
    ### avoids a copy onto the GPU
    return T.diag(classProbs.T[tflat])
Esempio n. 40
0
    def construct_likelihood(self):
        lower_idxs = np.tril_indices(self.data.shape[-1], k=-1)

        L = pm.expand_packed_triangular(self.ndim, self.model['packed_L'])
        Sigma = pm.Deterministic('Sigma', L.dot(L.T))
        std = tt.sqrt(tt.diag(Sigma))
        corr = Sigma / tt.outer(std, std)
        pm.Deterministic('corr_coeffs', corr[lower_idxs])

        if self.data_covariances is None:
            pm.MvNormal('like', mu=self.model['mu'], chol=L, observed=self.residual_data)
        else:
            like = _multivariate_normal_convolution_likelihood(Sigma, self.model['mu'], self.residual_data, self.data_covariances)
            pm.Potential('like', like)
Esempio n. 41
0
def ksd_eval(X0, h0, score_q, **model_params):

    X = sharedX(X0)
    h = sharedX(h0)

    Sqx = score_q(X, **model_params)

    H = sqr_dist(X, X)
    h = T.sqrt(h / 2.)

    V = H.flatten()
    # median distance
    h = T.switch(
        T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[((V.shape[0] / 2) - 1):((V.shape[0] / 2) + 1)]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])

    # compute the rbf kernel
    Kxy = T.exp(-H / h**2 / 2.)

    Sqxdy = -(T.dot(Sqx, X.T) - T.tile(
        T.sum(Sqx * X, axis=1).dimshuffle(0, 'x'), (1, X.shape[0]))) / (h**2)

    dxSqy = T.transpose(Sqxdy)
    dxdy = (-H / (h**4) + X.shape[1].astype(theano.config.floatX) / (h**2))

    M = (T.dot(Sqx, Sqx.T) + Sqxdy + dxSqy + dxdy) * Kxy
    M2 = M - T.diag(T.diag(M))

    ksd_u = T.sum(M2) / (X.shape[0] * (X.shape[0] - 1))
    ksd_v = T.sum(M) / (X.shape[0]**2)

    f = theano.function(inputs=[], outputs=[ksd_u, ksd_v])

    return f()
Esempio n. 42
0
    def L_op(self, inputs, outputs, gradients):
        # Modified from theano/tensor/slinalg.py
        # No handling for on_error = 'nan'
        dz = gradients[0]
        chol_x = outputs[0]

        # this is for nan mode
        #
        # ok = ~tensor.any(tensor.isnan(chol_x))
        # chol_x = tensor.switch(ok, chol_x, 1)
        # dz = tensor.switch(ok, dz, 1)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.0)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return gpu_solve_upper_triangular(
                outer.T, gpu_solve_upper_triangular(outer.T, inner.T).T
            )

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz))
        )

        if self.lower:
            grad = tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))
        else:
            grad = tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))

        return [grad]
Esempio n. 43
0
def compileFun(model_name, dataset_name, pooling_mode):
	print "model_name: ", model_name
	print "dataset_name: ", dataset_name
	print "pooling_mode: ", pooling_mode
	print "Started!"
	rng = numpy.random.RandomState(23455)
	sentenceWordCount = T.ivector("sentenceWordCount")
	corpus = T.matrix("corpus")
# 	docLabel = T.ivector('docLabel') 
	
	# for list-type data
	layer0 = DocEmbeddingNNOneDoc(corpus, sentenceWordCount, rng, wordEmbeddingDim=200, \
													 sentenceLayerNodesNum=100, \
													 sentenceLayerNodesSize=[5, 200], \
													 docLayerNodesNum=100, \
													 docLayerNodesSize=[3, 100],
													 pooling_mode=pooling_mode)

	layer1_output_num = 100
	layer1 = HiddenLayer(
		rng,
		input=layer0.output,
		n_in=layer0.outputDimension,
		n_out=layer1_output_num,
		activation=T.tanh
	)
	
	layer2 = LogisticRegression(input=layer1.output, n_in=100, n_out=2)

	cost = layer2.negative_log_likelihood(1 - layer2.y_pred)
		
	# calculate sentence sentence_score
	sentence_grads = T.grad(cost, layer0.sentenceResults)
	sentence_score = T.diag(T.dot(sentence_grads, T.transpose(layer0.sentenceResults)))
	
	# construct the parameter array.
	params = layer2.params + layer1.params + layer0.params
	
	# Load the parameters last time, optionally.
	model_path = "data/" + dataset_name + "/" + model_name + "/" + pooling_mode + ".model"
	loadParamsVal(model_path, params)
	print "Compiling computing graph."
	output_model = theano.function(
 		[corpus, sentenceWordCount],
 		[layer2.y_pred, sentence_score]
 	)
	
	print "Compiled."
	return output_model
Esempio n. 44
0
    def __init__(self, rng, embedding, idx_context, gamma, dim_emb, dict_size):
        """ Initialize the parameters of the logistic regression

        :type n_in: int
        :param n_in:

        """
        # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
        W_values = numpy.asarray(
            rng.uniform(
                low=-numpy.sqrt(6. / (dict_size + dim_emb)),
                high=numpy.sqrt(6. / (dict_size + dim_emb)),
                size=(dict_size, dim_emb)
            ),
            dtype=theano.config.floatX
        )
        self.W = theano.shared(
            W_values*0.,
#            value=numpy.zeros(
#                (dict_size, dim_emb),
#                dtype=theano.config.floatX
#            ),
            name='SoftmaxW',
            borrow=True
        )
        
        self.params = [self.W]
               
        self.prediction = T.nnet.sigmoid(
                gamma * T.diag(T.dot(embedding, self.W[idx_context].T))
        )
        
        self.regul = T.mean (T.diag(T.dot(embedding, embedding.T) + T.dot(self.W[idx_context], self.W[idx_context].T)))
    
        self.negLogLikelihood = -T.mean(T.log(self.prediction))
        
 def __init__(self, input_dim, W=None, kappa=None, B=None, active_dims=None):
     super(Coregion, self).__init__(input_dim, active_dims)
     if len(self.active_dims) != 1:
         raise ValueError('Coregion requires exactly one dimension to be active')
     make_B = W is not None or kappa is not None
     if make_B and B is not None:
         raise ValueError('Exactly one of (W, kappa) and B must be provided to Coregion')
     if make_B:
         self.W = tt.as_tensor_variable(W)
         self.kappa = tt.as_tensor_variable(kappa)
         self.B = tt.dot(self.W, self.W.T) + tt.diag(self.kappa)
     elif B is not None:
         self.B = tt.as_tensor_variable(B)
     else:
         raise ValueError('Exactly one of (W, kappa) and B must be provided to Coregion')
Esempio n. 46
0
 def psd(self, omega):
     ar, cr, ac, bc, cc, dc = self.term.coefficients
     omega = tt.reshape(omega,
                        tt.concatenate([omega.shape, [1]]),
                        ndim=omega.ndim + 1)
     w2 = omega**2
     w02 = cc**2 + dc**2
     power = tt.sum(ar * cr / (cr**2 + w2), axis=-1)
     power += tt.sum(
         ((ac * cc + bc * dc) * w02 + (ac * cc - bc * dc) * w2) /
         (w2 * w2 + 2.0 * (cc**2 - dc**2) * w2 + w02 * w02),
         axis=-1,
     )
     psd = np.sqrt(2.0 / np.pi) * power
     return psd[:, None] * tt.diag(self.R)
Esempio n. 47
0
    def step(self, mask, input_term, forget_term, output_term, cell_term, h_pre, c_pre):
        input_term += T.dot(h_pre, self.lstm_input_wh) + T.dot(c_pre, T.diag(self.lstm_input_wc))
        forget_term += T.dot(h_pre, self.lstm_forget_wh) + T.dot(c_pre, T.diag(self.lstm_forget_wc))

        input_term += self.lstm_input_b
        forget_term += self.lstm_forget_b

        input_gate = T.nnet.sigmoid(input_term)
        forget_gate = T.nnet.sigmoid(forget_term)

        cell_term += T.dot(h_pre, self.lstm_cell_wh)
        cell_term += self.lstm_cell_b

        c = forget_gate * c_pre + input_gate * T.tanh(cell_term)


        output_term += T.dot(h_pre, self.lstm_output_wh) + T.dot(c, T.diag(self.lstm_output_wc))
        output_term += self.lstm_output_b

        output_gate = T.nnet.sigmoid(output_term)

        h = output_gate * T.tanh(c)

        return h, c
Esempio n. 48
0
    def MMD_kenel_Xonly(self,gamma,Label,Knn,Weight):
        Dn=Label.shape[1]
        DD1=T.tile(Label.T, (Dn, 1,1))
        tttt=DD1[:,:,:,None]*DD1.transpose((1,0,2))[:,:,None,:]

        Hh=T.sum(T.sum(tttt*Knn[None,None,:,:],-1),-1)
        Hh=Hh*Weight
        
        GH=T.tile(T.diag(Hh),(Dn,1))
        new=T.exp(-(GH.T+GH-2*Hh)/(2*gamma**2))#ここまででD×DのMMD距離になった。次はRBFカーネルにかける
        
        KK=tttt*new[:,:,None,None]
        #KK1=T.where(T.eq(KK,0),1,KK)#これはZ用。Xは一つしか0じゃない数はないが、Zが複数ある。全てを重みつきでかけたいが、0があると0になっちゃうので1に変換する
        KK2=T.sum(T.sum(KK,0),0)
        Kmmd_rbf=KK2*Knn#RBFカーネルにかける
        return Kmmd_rbf
Esempio n. 49
0
def cholInvLogDet(A, dim, jitter, fast=False):

    A_jitter = A + jitter * T.eye(dim)
    cA = myCholesky()(A_jitter)
    cA.name = 'c' + A.name

    if fast:
        (iA, logDetA) = invLogDet(cA)
    else:
        iA = nlinalg.matrix_inverse(A_jitter)
        #logDetA = T.log( nlinalg.Det()(A_jitter) )
        logDetA = 2.0 * T.sum(T.log(T.abs_(T.diag(cA))))
        iA.name = 'i' + A.name
        logDetA.name = 'logDet' + A.name

    return (cA, iA, logDetA)
Esempio n. 50
0
    def evaluateLogDensity(self, X, Y):
        Ypred = theano.clone(self.rate, replace={self.Xsamp: X})
        resY = Y - Ypred
        resX = X[1:] - T.dot(X[:(X.shape[0] - 1)], self.A.T)
        resX0 = X[0] - self.x0

        LogDensity = -(0.5 * T.dot(resY.T, resY) * T.diag(self.Rinv)).sum() - (
            0.5 * T.dot(resX.T, resX) * self.Lambda).sum() - 0.5 * T.dot(
                T.dot(resX0, self.Lambda0), resX0.T)
        LogDensity += 0.5 * (T.log(
            self.Rinv)).sum() * Y.shape[0] + 0.5 * T.log(Tla.det(
                self.Lambda)) * (Y.shape[0] - 1) + 0.5 * T.log(
                    Tla.det(self.Lambda0)) - 0.5 * (
                        self.xDim + self.yDim) * np.log(2 * np.pi) * Y.shape[0]

        return LogDensity
Esempio n. 51
0
    def get_celerite_matrices(self, x, diag):
        x = tt.as_tensor_variable(x)
        ar, cr, ac, bc, cc, dc = self.term.coefficients

        U = tt.concatenate(
            (
                ar[None, :] + tt.zeros_like(x)[:, None],
                ac[None, :] * tt.cos(dc[None, :] * x[:, None]) +
                bc[None, :] * tt.sin(dc[None, :] * x[:, None]),
                ac[None, :] * tt.sin(dc[None, :] * x[:, None]) -
                bc[None, :] * tt.cos(dc[None, :] * x[:, None]),
            ),
            axis=1,
        )
        V = tt.concatenate(
            (
                tt.zeros_like(ar)[None, :] + tt.ones_like(x)[:, None],
                tt.cos(dc[None, :] * x[:, None]),
                tt.sin(dc[None, :] * x[:, None]),
            ),
            axis=1,
        )
        if 'alpha' in vars(self):
            x = tt.reshape(
                tt.tile(x, (self.alpha.shape[0], 1)).T,
                (1, x.size * self.alpha.shape[0]))[0]
            dx = x[1:] - x[:-1]
            a = diag + (self.alpha**2)[:, None] * (tt.sum(ar) + tt.sum(ac))
            a = tt.reshape(a.T, (1, a.size))[0]
            U = tt.slinalg.kron(U, self.alpha[:, None])
            V = tt.slinalg.kron(V, self.alpha[:, None])
            c = tt.concatenate((cr, cc, cc))
            P = tt.exp(-c[None, :] * dx[:, None])
        elif 'R' in vars(self):
            x = tt.reshape(
                tt.tile(x, (self.R.shape[0], 1)).T,
                (1, x.size * self.R.shape[0]))[0]
            dx = x[1:] - x[:-1]
            a = diag + tt.diag(self.R)[:, None] * (tt.sum(ar) + tt.sum(ac))
            a = tt.reshape(a.T, (1, a.size))[0]
            U = tt.slinalg.kron(U, self.R)
            V = tt.slinalg.kron(V, tt.eye(self.R.shape[0]))
            c = tt.concatenate((cr, cc, cc))
            P = tt.exp(-c[None, :] * dx[:, None])
            P = tt.tile(P, (1, self.R.shape[0]))

        return a, U, V, P
Esempio n. 52
0
            def nlml(Y, hyp, X, X_sp, EyeM):
                # TODO allow for different pseudo inputs for each dimension
                # initialise the (before compilation) kernel function
                hyps = [hyp[:idims+1], hyp[idims+1]]
                kernel_func = partial(cov.Sum, hyps, self.covs)

                sf2 = hyp[idims]**2
                sn2 = hyp[idims+1]**2
                N = X.shape[0].astype(theano.config.floatX)

                ridge = 1e-6
                Kmm = kernel_func(X_sp) + ridge*EyeM
                Kmn = kernel_func(X_sp, X)
                Lmm = cholesky(Kmm)
                rhs = tt.concatenate([EyeM, Kmn], axis=1)
                sol = solve_lower_triangular(Lmm, rhs)
                iKmm = solve_upper_triangular(Lmm.T, sol[:, :EyeM.shape[0]])
                Lmn = sol[:, EyeM.shape[0]:]
                diagQnn = (Lmn**2).sum(0)

                # Gamma = diag(Knn - Qnn) + sn2*I
                Gamma = sf2 + sn2 - diagQnn
                Gamma_inv = 1.0/Gamma

                # these operations are done to avoid inverting Qnn+Gamma)
                sqrtGamma_inv = tt.sqrt(Gamma_inv)
                Lmn_ = Lmn*sqrtGamma_inv                      # Kmn_*Gamma^-.5
                Yi = Y*(sqrtGamma_inv)                        # Gamma^-.5* Y
                # I + Lmn * Gamma^-1 * Lnm
                Bmm = tt.eye(Kmm.shape[0]) + (Lmn_).dot(Lmn_.T)
                Amm = cholesky(Bmm)
                LAmm = Lmm.dot(Amm)
                Kmn_dotYi = Kmn.dot(Yi*(sqrtGamma_inv))
                rhs = tt.concatenate([EyeM, Kmn_dotYi[:, None]], axis=1)
                sol = solve_upper_triangular(
                    LAmm.T, solve_lower_triangular(LAmm, rhs))
                iBmm = sol[:, :-1]
                beta_sp = sol[:, -1]

                log_det_K_sp = tt.sum(tt.log(Gamma))
                log_det_K_sp += 2*tt.sum(tt.log(tt.diag(Amm)))

                loss_sp = Yi.dot(Yi) - Kmn_dotYi.dot(beta_sp)
                loss_sp += log_det_K_sp + N*np.log(2*np.pi)
                loss_sp *= 0.5

                return loss_sp, iKmm, Lmm, Amm, iBmm, beta_sp
def _create_theano_likelihood_graph_voltage(data, X, ind_X, n_X, n_inducing_X, approx='FITC'):#<-----New

    rho = tt.dvector('rho')
    ker_sigma = tt.dscalar('ker_sigma')
    sigma = tt.dscalar('sigma')
    time_V = theano.tensor.as_tensor_variable(X)
    inducing_time_V = theano.tensor.as_tensor_variable(ind_X)
    y = theano.tensor.as_tensor_variable(data)

    current = tt.dvector('current')

    cov_func = RbfKernel(rho, ker_sigma)

    sigma2 = tt.square(sigma)
    Kuu = cov_func(inducing_time_V)
    Kuf = cov_func(inducing_time_V, time_V)
 
    Luu = cholesky(stabilize(Kuu))
    A = solve_lower(Luu, Kuf)
    Qffd = tt.sum(A * A, 0)
    
    if approx == 'FITC':
        Kffd = cov_func(time_V, diag=True)
        Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
        trace = 0.0
    elif approx == 'VFE':
        Lamd = tt.ones_like(Qffd) * sigma2
        trace = ((1.0 / (2.0 * sigma2)) *
                    (tt.sum(cov_func(time_V, diag=True)) -
                    tt.sum(tt.sum(A * A, 0))))
    else:  # DTC
        Lamd = tt.ones_like(Qffd) * sigma2
        trace = 0.0
    
    A_l = A / Lamd
    L_B = cholesky(tt.eye(n_inducing_X) + tt.dot(A_l, tt.transpose(A)))
    r = y - current
    
    r_l = r / Lamd
    
    c = solve_lower(L_B, tt.dot(A, r_l))
    
    constant = 0.5 * n_X * tt.log(2.0 * np.pi)
    logdet = 0.5 * tt.sum(tt.log(Lamd)) + tt.sum(tt.log(tt.diag(L_B)))
    quadratic = 0.5 * (tt.dot(r, r_l) - tt.dot(c, c))
    ll = -1.0 * (constant + logdet + quadratic + trace)  
    return theano.function([current,rho,ker_sigma,sigma],ll,on_unused_input='ignore')
Esempio n. 54
0
    def predict(self, mx, Sx=None, **kwargs):
        # by default, sample internal params (e.g. dropout masks)
        # at every evaluation
        kwargs['iid_per_eval'] = kwargs.get('iid_per_eval', True)
        kwargs['whiten_inputs'] = kwargs.get('whiten_inputs', True)
        kwargs['whiten_outputs'] = kwargs.get('whiten_outputs', True)
        kwargs['deterministic'] = kwargs.get('deterministic', False)

        if Sx is not None:
            # generate random samples from input (assuming gaussian
            # distributed inputs)
            # standard uniform samples (one sample per network sample)
            z_std = self.m_rng.normal((self.n_samples, self.D))

            # scale and center particles
            Lx = tt.slinalg.cholesky(Sx)
            x = mx + z_std.dot(Lx.T)
        else:
            x = mx[None, :] if mx.ndim == 1 else mx

        # we are going to apply the saturation function
        # after whitening the outputs
        return_samples = kwargs.get('return_samples', True)
        kwargs['return_samples'] = True

        y, sn = super(NNPolicy, self).predict(x, None, **kwargs)
        if callable(self.sat_func):
            y = self.sat_func(y)

        if return_samples:
            return y, sn
        else:
            n = tt.cast(y.shape[0], dtype=theano.config.floatX)
            # empirical mean
            M = y.mean(axis=0)
            # empirical covariance
            deltay = y - M
            S = deltay.T.dot(deltay) / (n - 1)
            # noise
            S += tt.diag((sn**2).mean(axis=0))
            # empirical input output covariance
            if Sx is not None:
                deltax = x - x.mean(0)
                C = deltax.T.dot(deltay) / (n - 1)
            else:
                C = tt.zeros((self.D, self.E))
            return [M, S, C]
Esempio n. 55
0
    def get_model(self, lengthscale_trf, lengthscale_p_trf, sn_trf, sf_trf,
                  MU_S, SIGMA_S_trf, MU, SIGMA_trf, U, b, X, y, MEAN_MAP, Q, D,
                  D_cum_sum, layers, order, non_rec, N, M):

        X_inputs, SIGMA_inputs = self.update(layers, order, MU, SIGMA_trf, X,
                                             Q, D, D_cum_sum, N, non_rec)
        LL = 0

        for i in range(0, layers + 1):
            EEPhi, EEPhiTPhi = self.get_EPhi(
                lengthscale_trf[D_cum_sum[i]:D_cum_sum[i + 1]],
                lengthscale_p_trf[D_cum_sum[i]:D_cum_sum[i + 1]], sf_trf[i],
                MU_S[:, D_cum_sum[i]:D_cum_sum[i + 1]],
                SIGMA_S_trf[:, D_cum_sum[i]:D_cum_sum[i + 1]],
                X_inputs[:, D_cum_sum[i]:D_cum_sum[i + 1]],
                SIGMA_inputs[:, D_cum_sum[i]:D_cum_sum[i + 1]],
                U[:, D_cum_sum[i]:D_cum_sum[i + 1]], b[:, i], N, M, i, D[i],
                order, non_rec)
            if i == layers:
                z = y[order:]
                SIGMA_trf_LL = 0
            else:
                if layers > 1:
                    z = MU[order:, i] - X.dot(MEAN_MAP)
                    SIGMA_trf_LL = SIGMA_trf[order:, i]
                else:
                    z = MU[order:] - X.dot(MEAN_MAP)
                    SIGMA_trf_LL = SIGMA_trf[order:]
            zT_EEPhi = z.T.dot(EEPhi)
            opt_A_mean, cholSigInv = self.get_opt_A(sn_trf[i], EEPhiTPhi,
                                                    zT_EEPhi)
            LL = LL - 0.5 * (N - M) * T.log(sn_trf[i]) - 0.5 * N * np.log(
                2 * np.pi) - 0.5 * T.sum(
                    2 * T.log(T.diag(cholSigInv))) - 0.5 * T.sum(
                        SIGMA_trf_LL) / sn_trf[i] - 0.5 * T.sum(
                            z**2) / sn_trf[i] + 0.5 * T.sum(
                                opt_A_mean.T * zT_EEPhi) / sn_trf[i]

        KL_S = 0.5 * (SIGMA_S_trf + MU_S**2 - T.log(SIGMA_S_trf) - 1).sum()
        KL_X = -0.5 * (T.log(2 * np.pi * SIGMA_trf) +
                       1).sum() + 0.5 * layers * order * (np.log(
                           2 * np.pi)) + 0.5 * (SIGMA_trf[1:order, ] +
                                                MU[1:order, ]**2).sum()
        KL = KL_S + KL_X

        return LL, KL
Esempio n. 56
0
    def get_model(self, lengthscale_trf, lengthscale_p_trf, sn_trf, sf_trf, S,
                  MU, SIGMA_trf, U, b, X, Q, D, N, M):

        EPhi, EPhiTPhi = self.get_EPhi(lengthscale_trf, lengthscale_p_trf,
                                       sf_trf, S, MU, SIGMA_trf, U, b, N, M)
        XT_EPhi = X.T.dot(EPhi)
        opt_A_mean, cholSigInv = self.get_opt_A(sn_trf, EPhiTPhi, XT_EPhi)

        LL = -0.5 * (D * (
            (N - M) * T.log(sn_trf) + N * np.log(2 * np.pi) +
            T.sum(2 * T.log(T.diag(cholSigInv)))) + T.sum(X**2) / sn_trf -
                     T.sum(opt_A_mean.T * XT_EPhi) / sn_trf)

        KL_X = -0.5 * (T.log(2 * np.pi * SIGMA_trf) + 1).sum() + 0.5 * (np.log(
            2 * np.pi)) + 0.5 * (SIGMA_trf + MU**2).sum()

        return LL, KL_X
Esempio n. 57
0
    def bpr_max_reg(self, pred_mat, y, y_pos):
        loss = 0.5
        softmax_scores = self.softmax_neg(pred_mat.T).T
        loss_part = -T.log(
            T.sum(T.nnet.sigmoid(T.diag(pred_mat.T) - pred_mat) *
                  softmax_scores,
                  axis=0) + 1e-24)
        reg_part = loss * T.sum((pred_mat**2) * softmax_scores, axis=0)
        reg_part2 = (-self.regularization * (self.S[y_pos]**2).sum(axis=1) -
                     self.regularization * (self.I[y_pos]**2).sum(axis=1) -
                     self.regularization * (self.I1[y_pos]**2).sum(axis=1) -
                     self.regularization * (self.I2[y_pos]**2).sum(axis=1) -
                     self.regularization * (self.BI[y_pos]**2) -
                     self.regularization * (self.BS[y_pos]**2) -
                     self.regularization * (self.H[y_pos]**2))

        return T.cast(T.mean(loss_part + reg_part - reg_part2), self.floatX)
Esempio n. 58
0
    def MMD_kenel_ZX(self,Xlabel,Kmn,Weight):
        Dn=self.Zlabel_T.shape[1]
        DDX=T.tile(Xlabel.T, (Dn, 1,1))
        DDZ=T.tile(self.Zlabel_T.T, (Dn, 1,1))
        
        tttt=DDZ[:,:,:,None]*DDX.transpose((1,0,2))[:,:,None,:]#10*10*N_Z*Nとか

        Hh=T.sum(T.sum(tttt*Kmn[None,None,:,:],-1),-1)
        Hh=Hh*Weight
        
        GH=T.tile(T.diag(Hh),(Dn,1))
        new=T.exp(-(GH.T+GH-2*Hh)/(2*self.gamma**2))#ここまででD×DのMMD距離になった。次はRBFカーネルにかける
        
        KK=tttt*new[:,:,None,None]
        #KK1=T.where(T.eq(KK,0),1,KK)#これはZ用。Xは一つしか0じゃない数はないが、Zが複数ある。全てを重みつきでかけたいが、0があると0になっちゃうので1に変換する
        KK2=T.sum(T.sum(KK,0),0)
        Kmmd_rbf=KK2*Kmn#RBFカーネルにかける
        return Kmmd_rbf
def low_rank_matrix_approximation_theano(A, k, norm_ord, minstepsize=1e-9):
    manifold, solver = _bootstrap_problem(A, k, minstepsize)

    U, S, V = [T.matrix(sym) for sym in ['U', 'S', 'V']]
    if norm_ord == 'fro':
        cost = T.sum((U.dot(S).dot(V) - A)**2)
    elif norm_ord == 'spectral':
        cost = (U.dot(S).dot(V) - A).norm(2)
    elif norm_ord == 'abs':
        cost = (U.dot(S).dot(V) - A).norm(1)
    else:
        mat = U.dot(S).dot(V) - A
        cost = T.diag(mat.T.dot(mat)).norm(
            L=norm_ord
        )  #T.sum(T.nlinalg.svd(U.dot(S).dot(V) - A, full_matrices=False)[1])

    problem = Problem(man=manifold, theano_cost=cost, theano_arg=[U, S, V])
    return solver.solve(problem)
Esempio n. 60
0
def gSin(m, v, i=None, e=None):
    D = m.shape[0]
    if i is None:
        i = tt.arange(D)
    if e is None:
        e = tt.ones((D, ))
    elif e.__class__ is list:
        e = tt.as_tensor_variable(np.array(e)).flatten()
    elif e.__class__ is np.array:
        e = tt.as_tensor_variable(e).flatten()

    # compute the output mean
    mi = m[i]
    vi = v[i, :][:, i]
    vii = v[i, i]
    exp_vii_h = tt.exp(-vii / 2)
    M = exp_vii_h * tt.sin(mi)

    # output covariance
    vii_c = vii.dimshuffle(0, 'x')
    vii_r = vii.dimshuffle('x', 0)
    lq = -0.5 * (vii_c + vii_r)
    q = tt.exp(lq)
    exp_lq_p_vi = tt.exp(lq + vi)
    exp_lq_m_vi = tt.exp(lq - vi)
    mi_c = mi.dimshuffle(0, 'x')
    mi_r = mi.dimshuffle('x', 0)
    U1 = (exp_lq_p_vi - q) * (tt.cos(mi_c - mi_r))
    U2 = (exp_lq_m_vi - q) * (tt.cos(mi_c + mi_r))

    V = 0.5 * (U1 - U2)

    # inv input covariance dot input output covariance
    C = tt.diag(exp_vii_h * tt.cos(mi))

    # account for the effect of scaling the output
    M = e * M
    V = tt.outer(e, e) * V
    C = e * C

    retvars = [M, V, C]

    return retvars