コード例 #1
0
ファイル: linalg.py プロジェクト: Theano/Theano
    def L_op(self, inputs, outputs, gradients):
        # Modified from theano/tensor/slinalg.py
        # No handling for on_error = 'nan'
        dz = gradients[0]
        chol_x = outputs[0]

        # this is for nan mode
        #
        # ok = ~tensor.any(tensor.isnan(chol_x))
        # chol_x = tensor.switch(ok, chol_x, 1)
        # dz = tensor.switch(ok, dz, 1)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return gpu_solve_upper_triangular(
                outer.T, gpu_solve_upper_triangular(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))
        else:
            grad = tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))

        return [grad]
コード例 #2
0
ファイル: orbm.py プロジェクト: orapradeep/iRBM
    def marginalize_over_v_z(self, h):
        # energy = \sum_{i=1}^{|h|} h_i*b_i - \beta * ln(1 + e^{b_i})

        # In theory should use the following line
        # energy = (h * self.b).T
        # However, when there is broadcasting, the Theano element-wise multiplication between np.NaN and 0 is 0 instead of np.NaN!
        # so we use T.tensordot and T.diagonal instead as a workaround!
        # See Theano issue #3848 (https://github.com/Theano/Theano/issues/3848)
        energy = T.tensordot(h, self.b, axes=0)
        energy = T.diagonal(energy, axis1=1, axis2=2).T

        if self.penalty == "softplus_bi":
            energy = energy - self.beta * T.log(1 + T.exp(self.b))[:, None]

        elif self.penalty == "softplus0":
            energy = energy - self.beta * T.log(1 + T.exp(0))[:, None]

        else:
            raise NameError("Invalid penalty term")

        energy = T.set_subtensor(energy[(T.isnan(energy)).nonzero()],
                                 0)  # Remove NaN
        energy = T.sum(energy, axis=0, keepdims=True).T

        ener = T.tensordot(h, self.W, axes=0)
        ener = T.diagonal(ener, axis1=1, axis2=2)
        ener = T.set_subtensor(ener[(T.isnan(ener)).nonzero()], 0)
        ener = T.sum(ener, axis=2) + self.c[None, :]
        ener = T.sum(T.log(1 + T.exp(ener)), axis=1, keepdims=True)

        return -(energy + ener)
コード例 #3
0
ファイル: orbm.py プロジェクト: MarcCote/iRBM
    def marginalize_over_v_z(self, h):
        # energy = \sum_{i=1}^{|h|} h_i*b_i - \beta * ln(1 + e^{b_i})

        # In theory should use the following line
        # energy = (h * self.b).T
        # However, when there is broadcasting, the Theano element-wise multiplication between np.NaN and 0 is 0 instead of np.NaN!
        # so we use T.tensordot and T.diagonal instead as a workaround!
        # See Theano issue #3848 (https://github.com/Theano/Theano/issues/3848)
        energy = T.tensordot(h, self.b, axes=0)
        energy = T.diagonal(energy, axis1=1, axis2=2).T

        if self.penalty == "softplus_bi":
            energy = energy - self.beta * T.log(1 + T.exp(self.b))[:, None]

        elif self.penalty == "softplus0":
            energy = energy - self.beta * T.log(1 + T.exp(0))[:, None]

        else:
            raise NameError("Invalid penalty term")

        energy = T.set_subtensor(energy[(T.isnan(energy)).nonzero()], 0)  # Remove NaN
        energy = T.sum(energy, axis=0, keepdims=True).T

        ener = T.tensordot(h, self.W, axes=0)
        ener = T.diagonal(ener, axis1=1, axis2=2)
        ener = T.set_subtensor(ener[(T.isnan(ener)).nonzero()], 0)
        ener = T.sum(ener, axis=2) + self.c[None, :]
        ener = T.sum(T.log(1 + T.exp(ener)), axis=1, keepdims=True)

        return -(energy + ener)
コード例 #4
0
ファイル: mmd.py プロジェクト: yux94/opt-mmd
def rbf_mmd2(X, Y, sigma=0, biased=True):
    gamma = 1 / (2 * sigma**2)

    XX = T.dot(X, X.T)
    XY = T.dot(X, Y.T)
    YY = T.dot(Y, Y.T)

    X_sqnorms = T.diagonal(XX)
    Y_sqnorms = T.diagonal(YY)

    K_XY = T.exp(
        -gamma *
        (-2 * XY + X_sqnorms[:, np.newaxis] + Y_sqnorms[np.newaxis, :]))
    K_XX = T.exp(
        -gamma *
        (-2 * XX + X_sqnorms[:, np.newaxis] + X_sqnorms[np.newaxis, :]))
    K_YY = T.exp(
        -gamma *
        (-2 * YY + Y_sqnorms[:, np.newaxis] + Y_sqnorms[np.newaxis, :]))

    if biased:
        mmd2 = K_XX.mean() + K_YY.mean() - 2 * K_XY.mean()
    else:
        m = K_XX.shape[0]
        n = K_YY.shape[0]

        mmd2 = ((K_XX.sum() - m) / (m * (m - 1)) + (K_YY.sum() - n) /
                (n * (n - 1)) - 2 * K_XY.mean())
    return mmd2, mmd2
コード例 #5
0
ファイル: mmd.py プロジェクト: yux94/opt-mmd
def _mmd2_and_variance(K_XX, K_XY, K_YY, unit_diagonal=False, biased=False):
    m = K_XX.shape[0]  # Assumes X, Y are same shape

    ### Get the various sums of kernels that we'll use
    # Kts drop the diagonal, but we don't need to compute them explicitly
    if unit_diagonal:
        diag_X = diag_Y = 1
        sum_diag_X = sum_diag_Y = m
        sum_diag2_X = sum_diag2_Y = m
    else:
        diag_X = T.diagonal(K_XX)
        diag_Y = T.diagonal(K_YY)

        sum_diag_X = diag_X.sum()
        sum_diag_Y = diag_Y.sum()

        sum_diag2_X = diag_X.dot(diag_X)
        sum_diag2_Y = diag_Y.dot(diag_Y)

    Kt_XX_sums = K_XX.sum(axis=1) - diag_X
    Kt_YY_sums = K_YY.sum(axis=1) - diag_Y
    K_XY_sums_0 = K_XY.sum(axis=0)
    K_XY_sums_1 = K_XY.sum(axis=1)

    Kt_XX_sum = Kt_XX_sums.sum()
    Kt_YY_sum = Kt_YY_sums.sum()
    K_XY_sum = K_XY_sums_0.sum()

    # TODO: turn these into dot products?
    # should figure out if that's faster or not on GPU / with theano...
    Kt_XX_2_sum = (K_XX**2).sum() - sum_diag2_X
    Kt_YY_2_sum = (K_YY**2).sum() - sum_diag2_Y
    K_XY_2_sum = (K_XY**2).sum()

    if biased:
        mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m) + (Kt_YY_sum + sum_diag_Y) /
                (m * m) - 2 * K_XY_sum / (m * m))
    else:
        mmd2 = (Kt_XX_sum / (m * (m - 1)) + Kt_YY_sum / (m * (m - 1)) -
                2 * K_XY_sum / (m * m))

    var_est = (2 / (m**2 * (m - 1)**2) *
               (2 * Kt_XX_sums.dot(Kt_XX_sums) - Kt_XX_2_sum +
                2 * Kt_YY_sums.dot(Kt_YY_sums) - Kt_YY_2_sum) - (4 * m - 6) /
               (m**3 * (m - 1)**3) * (Kt_XX_sum**2 + Kt_YY_sum**2) + 4 *
               (m - 2) / (m**3 * (m - 1)**2) *
               (K_XY_sums_1.dot(K_XY_sums_1) + K_XY_sums_0.dot(K_XY_sums_0)) -
               4 * (m - 3) / (m**3 * (m - 1)**2) * K_XY_2_sum - (8 * m - 12) /
               (m**5 * (m - 1)) * K_XY_sum**2 + 8 / (m**3 * (m - 1)) *
               (1 / m * (Kt_XX_sum + Kt_YY_sum) * K_XY_sum -
                Kt_XX_sums.dot(K_XY_sums_1) - Kt_YY_sums.dot(K_XY_sums_0)))

    return mmd2, var_est
コード例 #6
0
ファイル: layers.py プロジェクト: yux94/opt-mmd
    def get_output_for(self, input, **kwargs):
        gamma = 1 / (2 * T.exp(2 * self.log_sigma))

        XX = T.dot(input, input.T)
        XY = T.dot(input, self.locs.T)
        YY = T.dot(self.locs, self.locs.T)  # cache this somehow?

        X_sqnorms = T.diagonal(XX)
        Y_sqnorms = T.diagonal(YY)
        return T.exp(
            -gamma *
            (-2 * XY + X_sqnorms[:, np.newaxis] + Y_sqnorms[np.newaxis, :]))
コード例 #7
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)

        # Replace the cholesky decomposition with 1 if there are nans
        # or solve_upper_triangular will throw a ValueError.
        if self.on_error == 'nan':
            ok = ~tensor.any(tensor.isnan(chol_x))
            chol_x = tensor.switch(ok, chol_x, 1)
            dz = tensor.switch(ok, dz, 1)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return solve_upper_triangular(
                outer.T,
                solve_upper_triangular(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))
        else:
            grad = tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))

        if self.on_error == 'nan':
            return [tensor.switch(ok, grad, np.nan)]
        else:
            return [grad]
コード例 #8
0
ファイル: slinalg.py プロジェクト: HapeMask/Theano
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)

        # Replace the cholesky decomposition with 1 if there are nans
        # or solve_upper_triangular will throw a ValueError.
        if self.on_error == 'nan':
            ok = ~tensor.any(tensor.isnan(chol_x))
            chol_x = tensor.switch(ok, chol_x, 1)
            dz = tensor.switch(ok, dz, 1)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return solve_upper_triangular(
                outer.T, solve_upper_triangular(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))
        else:
            grad = tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))

        if self.on_error == 'nan':
            return [tensor.switch(ok, grad, np.nan)]
        else:
            return [grad]
コード例 #9
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.
        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_
        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527
        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return solve_upper_triangular(
                outer.T,
                solve_upper_triangular(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        return [tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))]
コード例 #10
0
ファイル: eca.py プロジェクト: afcentry/eca
    def compile_adapt_f(self, signals):
        x = self.signal(signals)
        x_prev = [p.signal(signals) for p in self.prev]
        assert np.all([x.k == xp.k for xp in x_prev])
        assert self.m == [xp.n for xp in x_prev]
        assert x.n == self.n
        k = np.float32(x.k)
        # Modulate x
        if x.modulation is not None:
            x_ = x.var * T.as_tensor_variable(x.modulation)
        else:
            x_ = x.var

        updates = []
        upd = lambda en, old, new: [(old, ifelse(en, new, old))]

        E_XX_new, _, d = lerp(self.E_XX, T.dot(x_, x_.T) / k, self.min_tau)
        updates += upd(self.enabled, self.E_XX, E_XX_new)
        b = 1.
        d = T.diagonal(E_XX_new)
        stiff = T.scalar('stiffnes', dtype=FLOATX)
        Q_new = theano_diag(
            b / T.where(d < stiff * self.stiffx, stiff * self.stiffx, d))
        updates += upd(self.enabled, self.Q, Q_new)

        for i, x_p in enumerate(x_prev):
            E_XU_new, _, d_ = lerp(self.E_XU[i],
                                   T.dot(x_, x_p.var.T) / k, self.min_tau)
            updates += upd(self.enabled, self.E_XU[i], E_XU_new)
            d = T.maximum(d, d_)
            updates += upd(self.enabled, self.phi[i], T.dot(Q_new, E_XU_new).T)

        self.info('Compile layer update between: ' + self.name + ' and ' +
                  ', '.join([p.name for p in self.prev]))
        return theano.function(inputs=[stiff], outputs=d, updates=updates)
コード例 #11
0
 def free_energy(self, v_sample):
     wx_b = T.dot(v_sample, self.W) + self.hbias
     vbias_term = 0.5 * T.dot((v_sample - self.vbias),
                              (v_sample - self.vbias).T)
     hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis=1)
     #return -hidden_term - vbias_term
     return -hidden_term - T.diagonal(vbias_term)
コード例 #12
0
 def free_energy(self, v_sample):  #重写
     wx_b = T.dot(v_sample, self.W) + self.hbias
     vbias_term = 0.5 * T.dot((v_sample - self.vbias),
                              (v_sample - self.vbias).T)
     #这一个项 修改了, 原来是直接dot,现在换成了差的平方。
     hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis=1)
     return -hidden_term - T.diagonal(vbias_term)
コード例 #13
0
 def version1(self):
     
     Fhat = self.feedForward(self.dot())
     latFhat = T.dot(T.abs_(Fhat.T), self.distMat.T)
     latFhat = T.dot(latFhat, T.abs_(Fhat))
     self.latFhat = T.diagonal(latFhat)
     
     return self.latFhat
コード例 #14
0
 def logprob(x, m, S):
     delta = x - m
     L = cholesky(S)
     beta = solve_lower_triangular(L, delta.T).T
     lp = -0.5 * tt.square(beta).sum(-1)
     lp -= tt.sum(tt.log(tt.diagonal(L)))
     lp -= (0.5 * m.size * tt.log(2 * np.pi)).astype(
         theano.config.floatX)
     return lp
コード例 #15
0
ファイル: SCFGP.py プロジェクト: MaxInGaussian/SCFGP
 def build_theano_models(self, algo, algo_params):
     epsilon = 1e-6
     kl = lambda mu, sig: sig+mu**2-TT.log(sig)
     X, y = TT.dmatrices('X', 'y')
     params = TT.dvector('params')
     a, b, c, l_F, F, l_FC, FC = self.unpack_params(params)
     sig2_n, sig_f = TT.exp(2*a), TT.exp(b)
     l_FF = TT.dot(X, l_F)+l_FC
     FF = TT.concatenate((l_FF, TT.dot(X, F)+FC), 1)
     Phi = TT.concatenate((TT.cos(FF), TT.sin(FF)), 1)
     Phi = sig_f*TT.sqrt(2./self.M)*Phi
     noise = TT.log(1+TT.exp(c))
     PhiTPhi = TT.dot(Phi.T, Phi)
     A = PhiTPhi+(sig2_n+epsilon)*TT.identity_like(PhiTPhi)
     L = Tlin.cholesky(A)
     Li = Tlin.matrix_inverse(L)
     PhiTy = Phi.T.dot(y)
     beta = TT.dot(Li, PhiTy)
     alpha = TT.dot(Li.T, beta)
     mu_f = TT.dot(Phi, alpha)
     var_f = (TT.dot(Phi, Li.T)**2).sum(1)[:, None]
     dsp = noise*(var_f+1)
     mu_l = TT.sum(TT.mean(l_F, axis=1))
     sig_l = TT.sum(TT.std(l_F, axis=1))
     mu_w = TT.sum(TT.mean(F, axis=1))
     sig_w = TT.sum(TT.std(F, axis=1))
     hermgauss = np.polynomial.hermite.hermgauss(30)
     herm_x = Ts(hermgauss[0])[None, None, :]
     herm_w = Ts(hermgauss[1]/np.sqrt(np.pi))[None, None, :]
     herm_f = TT.sqrt(2*var_f[:, :, None])*herm_x+mu_f[:, :, None]
     nlk = (0.5*herm_f**2.-y[:, :, None]*herm_f)/dsp[:, :, None]+0.5*(
         TT.log(2*np.pi*dsp[:, :, None])+y[:, :, None]**2/dsp[:, :, None])
     enll = herm_w*nlk
     nlml = 2*TT.log(TT.diagonal(L)).sum()+2*enll.sum()+1./sig2_n*(
         (y**2).sum()-(beta**2).sum())+2*(X.shape[0]-self.M)*a
     penelty = (kl(mu_w, sig_w)*self.M+kl(mu_l, sig_l)*self.S)/(self.S+self.M)
     cost = (nlml+penelty)/X.shape[0]
     grads = TT.grad(cost, params)
     updates = getattr(OPT, algo)(self.params, grads, **algo_params)
     updates = getattr(OPT, 'apply_nesterov_momentum')(updates, momentum=0.9)
     train_inputs = [X, y]
     train_outputs = [cost, alpha, Li]
     self.train_func = Tf(train_inputs, train_outputs,
         givens=[(params, self.params)])
     self.train_iter_func = Tf(train_inputs, train_outputs,
         givens=[(params, self.params)], updates=updates)
     Xs, Li, alpha = TT.dmatrices('Xs', 'Li', 'alpha')
     l_FFs = TT.dot(Xs, l_F)+l_FC
     FFs = TT.concatenate((l_FFs, TT.dot(Xs, F)+FC), 1)
     Phis = TT.concatenate((TT.cos(FFs), TT.sin(FFs)), 1)
     Phis = sig_f*TT.sqrt(2./self.M)*Phis
     mu_pred = TT.dot(Phis, alpha)
     std_pred = (noise*(1+(TT.dot(Phis, Li.T)**2).sum(1)))**0.5
     pred_inputs = [Xs, alpha, Li]
     pred_outputs = [mu_pred, std_pred]
     self.pred_func = Tf(pred_inputs, pred_outputs,
         givens=[(params, self.params)])
コード例 #16
0
ファイル: ladder_nets.py プロジェクト: AdrianLsk/Ladder-Nets
def get_mu_sigma_costs(hid):
    shp = hid.shape
    mu = hid.mean(0)
    sigma = T.dot(hid.T, hid) / shp[0]

    C_mu = T.sum(mu**2)
    C_sigma = T.diagonal(sigma - T.log(T.clip(sigma, 1e-15, 1)))
    C_sigma -= -T.ones_like(C_sigma)
    return C_mu, C_sigma.sum()  # trace(C_sigma)
コード例 #17
0
ファイル: dist_math.py プロジェクト: qinghsui/pymc
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)
        ok = tt.all(tt.nlinalg.diag(chol_x) > 0)
        chol_x = tt.switch(ok, chol_x, tt.fill_diagonal(chol_x, 1))
        dz = tt.switch(ok, dz, floatX(1))

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tt.tril(mtx) - tt.diag(tt.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            solve = tt.slinalg.Solve(A_structure="upper_triangular")
            return solve(outer.T, solve(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tt.tril(s + s.T) - tt.diag(tt.diagonal(s))
        else:
            grad = tt.triu(s + s.T) - tt.diag(tt.diagonal(s))
        return [tt.switch(ok, grad, floatX(np.nan))]
コード例 #18
0
ファイル: dist_math.py プロジェクト: leezqcst/pymc3
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)
        ok = tt.all(tt.nlinalg.diag(chol_x) > 0)
        chol_x = tt.switch(ok, chol_x, tt.fill_diagonal(chol_x, 1))
        dz = tt.switch(ok, dz, floatX(1))

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tt.tril(mtx) - tt.diag(tt.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            solve = tt.slinalg.Solve(A_structure="upper_triangular")
            return solve(outer.T, solve(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tt.tril(s + s.T) - tt.diag(tt.diagonal(s))
        else:
            grad = tt.triu(s + s.T) - tt.diag(tt.diagonal(s))
        return [tt.switch(ok, grad, floatX(np.nan))]
    def free_energy(self, v_sample):
        """
        Function to compute the free energy, it overwrite free energy function
        (here only v_bias term is different)

        :param v_sample: Sampling values of visible units
        """
        wx_b = T.dot(v_sample, self.W) + self.h_bias
        v_bias_term = 0.5 * T.dot((v_sample - self.v_bias), (v_sample - self.v_bias).T)
        hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis=1)
        return -hidden_term - T.diagonal(v_bias_term)
コード例 #20
0
    def __call__(self, A, b, inference=False):
        dA = T.diagonal(A)
        D = T.diag(dA)
        R = A - D

        iD = T.diag(1.0 / dA)

        x = T.zeros_like(b)
        for i in range(self.iterations):
            x = iD.dot(b - R.dot(x))

        return x
コード例 #21
0
ファイル: mmd.py プロジェクト: yux94/opt-mmd
def rbf_mmd2_and_ratio(X, Y, sigma=0, biased=True):
    gamma = 1 / (2 * sigma**2)

    XX = T.dot(X, X.T)
    XY = T.dot(X, Y.T)
    YY = T.dot(Y, Y.T)

    X_sqnorms = T.diagonal(XX)
    Y_sqnorms = T.diagonal(YY)

    K_XY = T.exp(
        -gamma *
        (-2 * XY + X_sqnorms[:, np.newaxis] + Y_sqnorms[np.newaxis, :]))
    K_XX = T.exp(
        -gamma *
        (-2 * XX + X_sqnorms[:, np.newaxis] + X_sqnorms[np.newaxis, :]))
    K_YY = T.exp(
        -gamma *
        (-2 * YY + Y_sqnorms[:, np.newaxis] + Y_sqnorms[np.newaxis, :]))

    return _mmd2_and_ratio(K_XX, K_XY, K_YY, unit_diagonal=True, biased=biased)
コード例 #22
0
def fill_diagonal(x, val):
    """Fills in the diagonal of a tensor. """

    if val.size.eval() == 1:
        val = T.extra_ops.repeat(val, x.eval().shape[0])

    # adapted from following theano help topic: https://groups.google.com/forum/#!topic/theano-users/zYD-gsddIYs
    orig_diag = T.diag(T.diagonal(x))
    new_diag = T.diag(val)
    y = x - orig_diag + new_diag

    return y
コード例 #23
0
ファイル: VAFnet.py プロジェクト: MaxInGaussian/VAFnet
 def compile_theano_funcs(self, opt_algo, opt_params, dropout):
     self.compiled_funcs = {}
     # Compile Train & Optimization Function
     eps = 1e-5
     params = Tt.vector('params')
     X, Y = Tt.matrix('X'), Tt.matrix('Y')
     sig2, F, M, V = self.feature_maps(X, params)
     EPhi = F[-1]
     EPhiPhiT = Tt.dot(EPhi, Tt.transpose(EPhi))
     A = EPhiPhiT + (sig2 + eps) * Tt.identity_like(EPhiPhiT)
     L = Tlin.cholesky(A)
     Linv = Tlin.matrix_inverse(L)
     YPhiT = Tt.dot(Y, Tt.transpose(EPhi))
     beta = Tt.dot(YPhiT, Tt.transpose(Linv))
     alpha = Tt.dot(beta, Linv)
     mu_F = Tt.dot(alpha, EPhi)
     GOF = .5 / sig2 * Tt.sum(Tt.sum(Tt.dot(Y, (Y - mu_F).T)))
     REG = Tt.sum(Tt.log(
         Tt.diagonal(L))) + (self.N - self.D[-2]) / 2 * Tt.log(sig2)
     REG *= self.D[-1]
     KL = 0
     for h in range(self.H):
         KL += Tt.sum(Tt.sum(M[h]**2) + Tt.sum(V[h] - Tt.log(V[h] + eps)))
         KL -= self.D[h + 1] * self.D[h + 2] // 2
     obj = debug('obj', GOF + REG + KL)
     self.compiled_funcs['debug'] = Tf([X, Y], [obj],
                                       givens=[(params, self.params)])
     grads = Tt.grad(obj, params)
     updates = {self.params: grads}
     updates = getattr(Optimizer, opt_algo)(updates, **opt_params)
     updates = getattr(Optimizer, 'nesterov')(updates, momentum=0.9)
     train_inputs = [X, Y]
     train_outputs = [obj, alpha, Linv, mu_F]
     self.compiled_funcs['opt'] = Tf(train_inputs,
                                     train_outputs,
                                     givens=[(params, self.params)],
                                     updates=updates)
     self.compiled_funcs['train'] = Tf(train_inputs,
                                       train_outputs,
                                       givens=[(params, self.params)])
     # Compile Predict Function
     Linv, alpha = Tt.matrix('Linv'), Tt.matrix('alpha')
     Xs = Tt.matrix('Xs')
     sig2, Fs, _, _ = self.feature_maps(Xs, params)
     EPhis = Fs[-1]
     mu_Fs = Tt.dot(alpha, EPhis)
     std_Fs = ((sig2 * (1 + (Tt.dot(Linv, EPhis)**2).sum(0)))**0.5)[:, None]
     pred_inputs = [Xs, alpha, Linv]
     pred_outputs = [mu_Fs, std_Fs]
     self.compiled_funcs['pred'] = Tf(pred_inputs,
                                      pred_outputs,
                                      givens=[(params, self.params)])
コード例 #24
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return solve_upper_triangular(
                outer.T, solve_upper_triangular(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            return [tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))]
        else:
            return [tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))]
コード例 #25
0
ファイル: neural_cluster_model.py プロジェクト: kiankd/events
def prepare_model(x_train, y_train, batchsize, params=None):
    input_var = T.matrix('inputs')
    target_var = T.ivector('targets')
    same_cluster_indices_matrix = T.matrix('same_clusters')
    diff_cluster_indices_matrix = T.matrix('diff_clusters')

    # prepare network
    print '\nPreparing the model with primary hidden layer size %d...'%HOURGLASS_LAYER_SIZE
    print 'X-shape = %d, Num_classes = %d, num_samples = %d'%(x_train[0].shape[0], max(y_train), len(x_train))
    representation_layer, network = build_args_nn(x_train, y_train, batchsize, input_var)

    # loss stuff
    prediction = lasagne.layers.get_output(network)
    get_representations = lasagne.layers.get_output(representation_layer, inputs=input_var, deterministic=True)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)

    if LAMBDA1 == LAMBDA2 == 0.0:
        loss = loss.mean()
    else:
        representations = get_representations
        dot_prods = T.dot(representations, representations.T) # X times X.T
        diag = T.sqrt(T.diagonal(dot_prods)) # sqrt(||ri||^2) = ||ri||

        norms = T.outer(diag, diag.T)
        distances = 0.5*(1 -(dot_prods * (1./norms))) # d(a,b) = 1/2 (1 - dot(a,b) / (||a||*||b||))

        # we want the first sum to be as close to zero as possible, so we add it to the loss.
        # we want the second sum to be as close to 1 as possible, so we want LAMBDA2 * (1 - sum2)
        # to be as close to zero as possible, thus adding that difference to the overall loss.
        loss = loss.mean() \
               + (LAMBDA1 * T.sum(same_cluster_indices_matrix * distances)) \
               + (LAMBDA2 * (1.0 - T.sum(diff_cluster_indices_matrix * distances)))

    # for loading/building the parameters
    if not params:
        params = lasagne.layers.get_all_params(network, trainable=True)
    else:
        lasagne.layers.set_all_param_values(network, params)
        params = lasagne.layers.get_all_params(network, trainable=True)

    updates = lasagne.updates.adam(loss, params, learning_rate=LEARNING_RATE)

    # the final keys
    train_function = theano.function([input_var, target_var, same_cluster_indices_matrix, diff_cluster_indices_matrix],
                                     loss, updates=updates, allow_input_downcast=True, on_unused_input='ignore')
    convert_to_numpy_function = theano.function([input_var], get_representations, allow_input_downcast=True)

    # theano.printing.debugprint(train_function.maker.fgraph.outputs[0])

    return network, train_function, convert_to_numpy_function
コード例 #26
0
def plot_qXphi(signal, n=int(1e5), axis=None):
    axis, show_it, lim = axis_and_show(axis)
    if axis is None:
        return
    en = np.mean(np.square(signal.val()), axis=1)
    nphi = np.linalg.norm(signal.layer.phi[0].get_value(), axis=0)
    Q = T.diagonal(signal.layer.Q).eval()
    pen, = axis.plot(en[:n], 's-')
    pphi, = axis.plot(nphi[:n], '*-')
    pq, = axis.plot(Q[:n], 'x-')
    axis.legend([pen, pphi, pq], ['E{X^2}', '|phi|', 'q_i'])
    lim([0.0, 5])
    if show_it:
        axis.show()
コード例 #27
0
ファイル: utils.py プロジェクト: xyang35/CMSC727_project
def cca_loss(y1, y2, lamda=0.1):
    ''' Approximated cca loss of two views '''

    y1_mean = T.mean(y1, axis=0)
    y1_centered = y1 - y1_mean
    y2_mean = T.mean(y2, axis=0)
    y2_centered = y2 - y2_mean

    corr_nr = T.sum(y1_centered * y2_centered, axis=0)
    corr_dr1 = T.sqrt(T.sum(y1_centered * y1_centered, axis=0) + 1e-8)
    corr_dr2 = T.sqrt(T.sum(y2_centered * y2_centered, axis=0) + 1e-8)
    corr_dr = corr_dr1 * corr_dr2
    corr = corr_nr / corr_dr

    #C12 = T.dot(y1_centered.T, y2_centered)# / y1_centered.shape[0]
    #l12 = 0.5*((C12 ** 2).sum() - (T.diagonal(C12) ** 2).sum())
    C11 = T.dot(y1_centered.T, y1_centered) / y1_centered.shape[0]
    l11 = 0.5 * ((C11**2).sum() - (T.diagonal(C11)**2).sum())
    C22 = T.dot(y2_centered.T, y2_centered) / y1_centered.shape[0]
    l22 = 0.5 * ((C22**2).sum() - (T.diagonal(C22)**2).sum())

    #    return -T.sum(corr) + lamda*(l11+l22+l12)
    return -T.sum(corr) + lamda * (l11 + l22)
コード例 #28
0
ファイル: utils.py プロジェクト: arasmus/eca
def plot_qXphi(signal, n=int(1e5), axis=None):
    axis, show_it, lim = axis_and_show(axis)
    if axis is None:
        return
    en = np.mean(np.square(signal.val()), axis=1)
    nphi = np.linalg.norm(signal.layer.phi[0].get_value(), axis=0)
    Q = T.diagonal(signal.layer.Q).eval()
    pen, = axis.plot(en[:n], 's-')
    pphi, = axis.plot(nphi[:n], '*-')
    pq, = axis.plot(Q[:n], 'x-')
    axis.legend([pen, pphi, pq], ['E{X^2}', '|phi|', 'q_i'])
    lim([0.0, 5])
    if show_it:
        axis.show()
コード例 #29
0
    def L_op(self, inputs, outputs, gradients):
        # Modified from theano/tensor/slinalg.py
        # No handling for on_error = 'nan'
        dz = gradients[0]
        chol_x = outputs[0]

        # this is for nan mode
        #
        # ok = ~tensor.any(tensor.isnan(chol_x))
        # chol_x = tensor.switch(ok, chol_x, 1)
        # dz = tensor.switch(ok, dz, 1)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.0)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return gpu_solve_upper_triangular(
                outer.T, gpu_solve_upper_triangular(outer.T, inner.T).T
            )

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz))
        )

        if self.lower:
            grad = tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))
        else:
            grad = tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))

        return [grad]
コード例 #30
0
ファイル: eca.py プロジェクト: afcentry/eca
def lerp(old, new, min_tau=0.0, en=None):
    """
    Return new interpolated value and a relative difference
    """
    diff = T.mean(T.sqr(new) - T.sqr(old), axis=1, keepdims=True)
    rel_diff = diff / (T.mean(T.sqr(old), axis=1, keepdims=True) + 1e-5)
    t = rel_diff * 20.
    t = T.where(t < 5, 5, t)
    t = T.where(t > 100, 100, t)
    t = t + min_tau
    if en is not None:
        lmbd = T.diagonal(en).dimshuffle(0, 'x') * (1. / t)
    else:
        lmbd = 1. / t
    return ((1 - lmbd) * old + lmbd * new, t, rel_diff)
コード例 #31
0
ファイル: eca.py プロジェクト: arasmus/eca
def lerp(old, new, min_tau=0.0, en=None):
    """
    Return new interpolated value and a relative difference
    """
    diff = T.mean(T.sqr(new) - T.sqr(old), axis=1, keepdims=True)
    rel_diff = diff / (T.mean(T.sqr(old), axis=1, keepdims=True) + 1e-5)
    t = rel_diff * 20.
    t = T.where(t < 5, 5, t)
    t = T.where(t > 100, 100, t)
    t = t + min_tau
    if en is not None:
        lmbd = T.diagonal(en).dimshuffle(0, 'x') * (1. / t)
    else:
        lmbd = 1. / t
    return ((1 - lmbd) * old + lmbd * new,
            t, rel_diff)
コード例 #32
0
ファイル: autoencoder.py プロジェクト: ganguli-lab/pylearn
    def __call__(self, model, X):
        # Corrupt X
        corrupted_inputs = model.corruptor(X)
        hidden = model.encode(corrupted_inputs)

        ex_corrupted_hidden = (1 - self.q) * hidden
        ex_recon = model.decode(ex_corrupted_hidden)

        # Trace term depends on variance
        # var_cost = (tensor.diagonal(theano.dot(model.w_prime, model.w_prime.T)) * (self.q * (1-self.q)**2 * hidden**2).mean(axis=0)).sum()
        var_cost = (
            tensor.diagonal(theano.dot(model.w_prime, model.w_prime.T))
            * (self.q * (1 - self.q) * hidden ** 2).mean(axis=0)
        ).sum()
        recon_cost = ((ex_recon - X) ** 2).sum(axis=1).mean()
        cost = var_cost + recon_cost
        return cost
コード例 #33
0
 def __init__(self,
              X_u,
              X_v,
              y,
              Uinit,
              Vinit,
              loss=loss_squared,
              regularization=(1, 1),
              reg_type=(lambda x: T.mean(x**2), lambda x: T.mean(x**2))):
     self.U = theano.shared(Uinit)
     self.V = theano.shared(Vinit)
     self.X_u = X_u
     self.X_v = X_v
     self.y = y
     self.prediction_matrix = X_u.dot(self.U).dot((X_v.dot(self.V)).T)
     self.prediction_pairs = T.diagonal(self.prediction_matrix)
     self.cost = loss(y, self.prediction_pairs)
     self.cost += regularization[0] * reg_type[0](self.U)
     self.cost += regularization[1] * reg_type[1](self.V)
        def step(input_n, hid_prevprev, hid_previous, *args):
            # Compute the hidden-to-hidden activation
            hid_pre = helper.get_output(self.hidden_to_hidden, hid_previous, **kwargs)

            # If the dot product is precomputed then add it, otherwise
            # calculate the input_to_hidden values and add them
            if self.precompute_input:
                hid_pre += input_n
            else:
                hid_pre += helper.get_output(
                    self.input_to_hidden, input_n, **kwargs)

            # Clip gradients
            if self.grad_clipping:
                hid_pre = theano.gradient.grad_clip(hid_pre, -self.grad_clipping, self.grad_clipping)

            hid_pre += self.gamma * hid_prevprev * T.clip(T.tile(T.reshape(T.diagonal(T.dot(hid_prevprev, hid_previous.T)),
                                                      (1,hid_previous.shape[0])), (hid_previous.shape[1],1)).T, 0.0, 100.0)

            return self.nonlinearity( hid_pre )
コード例 #35
0
ファイル: orbm.py プロジェクト: vikkamath/iRBM
    def marginalize_over_v_z(self, h):
        # energy = \sum_{i=1}^{|h|} h_i*b_i - \beta * ln(1 + e^{b_i})

        if self.penalty == "softplus_bi":
            energy = (h * self.b).T - self.beta * T.log(1 + T.exp(self.b))[:, None]
        elif self.penalty == "softplus0":
            energy = (h * self.b).T - self.beta * T.log(1 + T.exp(0))[:, None]
        else:
            raise NameError("Invalid penalty term")

        energy = T.set_subtensor(energy[(T.isnan(energy)).nonzero()], 0)  # Remove
        energy = T.sum(energy, axis=0, keepdims=True).T

        ener = T.tensordot(h, self.W, axes=0)
        ener = T.diagonal(ener, axis1=1, axis2=2)
        ener = T.set_subtensor(ener[(T.isnan(ener)).nonzero()], 0)
        ener = T.sum(ener, axis=2) + self.c[None, :]
        ener = T.sum(T.log(1 + T.exp(ener)), axis=1, keepdims=True)

        return -(energy + ener)
コード例 #36
0
ファイル: eca.py プロジェクト: arasmus/eca
    def compile_adapt_f(self, signals):
        x = self.signal(signals)
        x_prev = [p.signal(signals) for p in self.prev]
        assert np.all([x.k == xp.k for xp in x_prev])
        assert self.m == [xp.n for xp in x_prev]
        assert x.n == self.n
        k = np.float32(x.k)
        # Modulate x
        if x.modulation is not None:
            x_ = x.var * T.as_tensor_variable(x.modulation)
        else:
            x_ = x.var

        updates = []
        upd = lambda en, old, new: [(old, ifelse(en, new, old))]

        E_XX_new, _, d = lerp(self.E_XX, T.dot(x_, x_.T) / k, self.min_tau)
        updates += upd(self.enabled, self.E_XX, E_XX_new)
        b = 1.
        d = T.diagonal(E_XX_new)
        stiff = T.scalar('stiffnes', dtype=FLOATX)
        Q_new = theano_diag(b / T.where(d < stiff * self.stiffx,
                                        stiff * self.stiffx, d))
        updates += upd(self.enabled, self.Q, Q_new)

        for i, x_p in enumerate(x_prev):
            E_XU_new, _, d_ = lerp(self.E_XU[i], T.dot(x_, x_p.var.T) / k,
                                   self.min_tau)
            updates += upd(self.enabled, self.E_XU[i], E_XU_new)
            d = T.maximum(d, d_)
            updates += upd(self.enabled, self.phi[i], T.dot(Q_new, E_XU_new).T)

        self.info('Compile layer update between: ' + self.name + ' and '
                  + ', '.join([p.name for p in self.prev]))
        return theano.function(
            inputs=[stiff],
            outputs=d,
            updates=updates)
コード例 #37
0
    def __mapper(self, train_example):
        pos_triple, neg_triple = train_example[0:3], train_example[3:]

        unconstrained_objective = self.margin - self.__objective_triple(neg_triple) \
                                  + self.__objective_triple(pos_triple)

        entity_normalize = T.sum(T.square(self.Entity.norm(2, axis=0)) - 1)
        relation_normalize = T.square(self.Relation.norm(2, axis=0))
        surface_normalize = T.square(T.diagonal(T.dot(self.RelationNormal.T, self.Relation))) / relation_normalize

        surface_normalize = T.sum(surface_normalize - self.epsilon ** 2)

        unconstrained_objective_positive = ifelse(T.gt(unconstrained_objective, theano.shared(0.0)),
                                                  unconstrained_objective, theano.shared(0.0))

        entity_normalize_positive = ifelse(T.gt(entity_normalize, theano.shared(0.0)),
                                           entity_normalize, theano.shared(0.0))

        surface_normalize_positive = ifelse(T.gt(surface_normalize, theano.shared(0.0)),
                                            surface_normalize, theano.shared(0.0))

        return unconstrained_objective_positive + self.regularize_factor \
                                                  * (surface_normalize_positive + entity_normalize_positive)
コード例 #38
0
    def get_mapping(self, pr):
        X = T.transpose(self.input)

        # X=[X;ones(1,size(X,2))];
        X = T.concatenate([X, T.ones((1, X.shape[1]))], axis=0)

        #d=size(X,1);
        d = X.shape[0]

        #q=[ones(d-1,1).*(1-p); 1];
        q = T.concatenate([T.ones((d - 1, 1)) * (1 - pr),
                           T.ones((1, 1))],
                          axis=0)

        #S=X*X';
        S = T.dot(X, X.T)

        #Q=S.*(q*q');
        Q = S * T.dot(q, q.T)

        #Q(1:d+1:end)=q.*diag(S);
        Q -= (T.eye(Q.shape[0]) * Q.diagonal())
        Q += T.eye(Q.shape[0]) * T.diagonal(q * S.diagonal())

        #P=S.*repmat(q',d,1);
        P = S * T.extra_ops.repeat(q.T, d, 0)

        #W=P(1:end-1,:)/(Q+1e-5*eye(d));

        A = Q + 10**-5 * T.eye(d)
        B = P

        self.W = T.slinalg.solve(A.T, B.T)[:-1, :]
        self.Xh = T.tanh(T.dot(self.W, X)).T

        return self.W, self.Xh
コード例 #39
0
ファイル: losses.py プロジェクト: entn-at/mdgan
def get_gaussian_likelihood(comps, X_, mu_, S_, w_, feat_dim):
    _2PI = 2. * np.pi
    comps = T.cast(comps, 'int32')

    mu = mu_[comps, :]
    w = w_[comps]
    S = S_[comps, :, :]

    mu = T.cast(mu, "float32")
    w = T.cast(w, "float32")
    S = T.cast(S, "float32")
    X = T.cast(X_, "float32")
    feat_dim = T.cast(feat_dim, "float32")

    residuals_t = X - mu

    maha_t = T.diagonal(residuals_t.dot(T.nlinalg.matrix_inverse(S)).dot(residuals_t.T))

    likelihood_t = (
            T.nlinalg.det(_2PI * S) ** -0.5
            * (T.exp(-0.5 * maha_t))
    )
    likelihood_t += feat_dim * np.float32(0.)
    return likelihood_t * w
コード例 #40
0
def dot(x,y):
	return T.sum(T.diagonal(T.dot(x, T.transpose(y))))
コード例 #41
0
 def tril_and_halve_diagonal(mtx):
     """Extracts lower triangle of square matrix and halves diagonal."""
     return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.0)
コード例 #42
0
ファイル: linalg.py プロジェクト: Theano/Theano
 def tril_and_halve_diagonal(mtx):
     """Extracts lower triangle of square matrix and halves diagonal."""
     return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)
コード例 #43
0
def OptimalGaussian(x_train, y_train, Regression=True, Classification=False,
                    bias=False, n_iter=5, alpha=0.01, minibatch=False):
    '''
    inputs
        x_train: training features
        y_train: response variable
        n_iter: # of iterations for SGD
        alpha: strength of L2 penalty (default penalty for now)
    outputs
        Gaussian Node: dictionary with Node parameters an predict method
    '''

    rng = numpy.random

    feats = len(x_train[0, :])
    N = len(x_train[:, 0])
    D = [x_train, y_train]
    training_steps = n_iter
    #print "training steps: ", training_steps
    #print "penalty strength: ", alpha
    #print "Uses bias: ", bias

    # Declare Theano symbolic variables
    x = T.matrix("x")
    y = T.vector("y")
    w = theano.shared(rng.uniform(low=-0.25, high=0.25, size=feats), name="w")
    b = theano.shared(abs(rng.randn(1)[0]), name="b")
    a = theano.shared(abs(rng.randn(1)[0]), name="a")
    rep = theano.shared(numpy.asarray([1]*N), name="rep")
    #print "Initialize node as:"
    #print w.get_value(), b.get_value(), a.get_value()

    # Construct Theano expression graph
    W = T.outer(rep, w)
    if bias:
        p_1 = a * T.exp(-0.5 / (b**2) * T.dot((x - w).T, (x - w)))
    else:
        p_1 = a * T.exp(-0.5 / (1**2) * T.diagonal(T.dot((x - W), (x - W).T)))
    prediction = p_1 > 0.5
    if Regression:
        xent = 0.5 * (y - p_1)**2
    if alpha == 0:
        cost = xent.mean()  # The cost to minimize
    else:
        cost = xent.mean() + alpha * ((w ** 2).sum())
    if bias:
        gw, gb, ga = T.grad(cost, [w, b, a])
    else:
        gw, ga = T.grad(cost, [w, a])  # Compute the gradient of the cost

    # Compile
    Node = {}
    Node['Path'] = {}
    NodePath = Node['Path']
    if bias:
        train = theano.function(inputs=[x, y], outputs=[prediction, xent],
                                updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb),
                                         (a, a - 0.1 * ga)))
    else:
        train = theano.function(inputs=[x, y], outputs=[prediction, xent],
                                updates=((w, w - 0.1 * gw), (a, a - 0.1 * ga)))

    predict = theano.function(inputs=[x], outputs=p_1)

    # Train
    for i in range(training_steps):
        if minibatch:
            batch_split = train_test_split(x_train, y_train, test_size=0.2)
            _, D[0], _, D[1] = batch_split
            #IPython.embed()
            pred, err = train(D[0], D[1])

        elif not minibatch:
            pred, err = train(D[0], D[1])
        NodePath[str(i)] = {}
        NodePath[str(i)]['w'] = w.get_value()
        NodePath[str(i)]['b'] = b.get_value()
        NodePath[str(i)]['a'] = a.get_value()

    Node['w'] = w.get_value()
    Node['b'] = b.get_value()
    Node['a'] = a.get_value()
    Node['predict'] = predict

    return Node
コード例 #44
0
ファイル: constraints.py プロジェクト: BenJamesbabala/GOL
def no_linear_dependencies_constraint(W, k):
    M=T.dot(W, W.T)
    M=M-T.diag(T.diagonal(M))
    cost = -T.sum(T.log(1.0 - M ** 2))/2
    return cost
コード例 #45
0
ファイル: constraints.py プロジェクト: Licht-T/GOL
def no_linear_dependencies_constraint(W, k):
    M=T.dot(W, W.T)
    M=M-T.diag(T.diagonal(M))
    cost = -T.sum(T.log(1.0 - M ** 2))/2
    return cost
コード例 #46
0
 def free_energy(self, v_sample):
     wx_b = T.dot(v_sample, self.W) + self.hbias
     vbias_term = 0.5 * T.dot((v_sample - self.vbias), (v_sample - self.vbias).T)
     hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis=1)
     # return -hidden_term - vbias_term
     return -hidden_term - T.diagonal(vbias_term)
コード例 #47
0
 def free_energy(self, v_sample):     #重写       
     wx_b = T.dot(v_sample, self.W) + self.hbias
     vbias_term = 0.5 * T.dot((v_sample - self.vbias), (v_sample - self.vbias).T)
     #这一个项 修改了, 原来是直接dot,现在换成了差的平方。
     hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis=1)
     return -hidden_term - T.diagonal(vbias_term)
コード例 #48
0
def test_jax_basic():
    x = tt.matrix("x")
    y = tt.matrix("y")
    b = tt.vector("b")

    # `ScalarOp`
    z = tt.cosh(x**2 + y / 3.0)

    # `[Inc]Subtensor`
    out = tt.set_subtensor(z[0], -10.0)
    out = tt.inc_subtensor(out[0, 1], 2.0)
    out = out[:5, :3]

    out_fg = theano.gof.FunctionGraph([x, y], [out])

    test_input_vals = [
        np.tile(np.arange(10), (10, 1)).astype(theano.config.floatX),
        np.tile(np.arange(10, 20), (10, 1)).astype(theano.config.floatX),
    ]
    (jax_res, ) = compare_jax_and_py(out_fg, test_input_vals)

    # Confirm that the `Subtensor` slice operations are correct
    assert jax_res.shape == (5, 3)

    # Confirm that the `IncSubtensor` operations are correct
    assert jax_res[0, 0] == -10.0
    assert jax_res[0, 1] == -8.0

    out = tt.clip(x, y, 5)
    out_fg = theano.gof.FunctionGraph([x, y], [out])
    compare_jax_and_py(out_fg, test_input_vals)

    out = tt.diagonal(x, 0)
    out_fg = theano.gof.FunctionGraph([x], [out])
    compare_jax_and_py(
        out_fg,
        [np.arange(10 * 10).reshape((10, 10)).astype(theano.config.floatX)])

    out = tt.slinalg.cholesky(x)
    out_fg = theano.gof.FunctionGraph([x], [out])
    compare_jax_and_py(
        out_fg,
        [(np.eye(10) + np.random.randn(10, 10) * 0.01).astype(
            theano.config.floatX)],
    )

    # not sure why this isn't working yet with lower=False
    out = tt.slinalg.Cholesky(lower=False)(x)
    out_fg = theano.gof.FunctionGraph([x], [out])
    compare_jax_and_py(
        out_fg,
        [(np.eye(10) + np.random.randn(10, 10) * 0.01).astype(
            theano.config.floatX)],
    )

    out = tt.slinalg.solve(x, b)
    out_fg = theano.gof.FunctionGraph([x, b], [out])
    compare_jax_and_py(
        out_fg,
        [
            np.eye(10).astype(theano.config.floatX),
            np.arange(10).astype(theano.config.floatX),
        ],
    )

    out = tt.nlinalg.alloc_diag(b)
    out_fg = theano.gof.FunctionGraph([b], [out])
    compare_jax_and_py(out_fg, [np.arange(10).astype(theano.config.floatX)])

    out = tt.nlinalg.det(x)
    out_fg = theano.gof.FunctionGraph([x], [out])
    compare_jax_and_py(
        out_fg,
        [np.arange(10 * 10).reshape((10, 10)).astype(theano.config.floatX)])

    out = tt.nlinalg.matrix_inverse(x)
    out_fg = theano.gof.FunctionGraph([x], [out])
    compare_jax_and_py(
        out_fg,
        [(np.eye(10) + np.random.randn(10, 10) * 0.01).astype(
            theano.config.floatX)],
    )
コード例 #49
0
ファイル: grbm.py プロジェクト: jminyu/DL-ANNs
 def free_energy_grbm(self, v_sample):
     ''' Function to compute the free energy '''
     wx_b = T.dot(v_sample,self.W)+self.hbias
     vbias_term = 0.5*T.dot((v_sample-self.vbias),(v_sample-self.vbias).T)
     hidden_term = T.sum(T.log(1+T.exp(wx_b)),axis=1)
     return -hidden_term-T.diagonal(vbias_term)