Ejemplo n.º 1
0
	def rbm_K(self, X, effective_batch_size):
		D, DH = self.n_visible, self.n_hidden
		W, bh, bv = self.W, self.bh, self.bv

		#one bit flipped connected states
		Y = X.reshape((effective_batch_size, 1, D), 3) * T.ones((1, D, 1)) #tile out data vectors (repeat each one D times)
		Y1 = (Y + T.eye(D).reshape((1, D, D), 3))%2 # flip each bit once 

		# minimal activation connected states
		onehots = T.eye(D)
		blanks = T.zeros(D)
	
		eX = energy(X, W, bh, bv)	
		eY = energy(Y1, W, bh, bv)
		eO = energy(onehots, W, bh, bv)
		eB = energy(blanks, W, bh, bv)
		
		edif = eX.dimshuffle(0, 'x') - eY #- eB #- eO.dimshuffle('x', 0) - eB

		Z = T.exp(0.5*edif)
		K = T.sum(Z) / effective_batch_size
		K.name = 'K'

		K = T.cast(K, 'float32')
		return K 
Ejemplo n.º 2
0
def recurrence_relation(y, y_mask):
    # with blank symbol of -1 this falls back to the recurrence that fails
    # with repeating symbols!
    blank_symbol = 2
    n_y = y.shape[0]
    blanks = tensor.zeros((2, y.shape[1])) + blank_symbol
    ybb = tensor.concatenate((y, blanks), axis=0).T
    # ybb = B x (L'+2)   L'=2*label_noblank_length+1
    # ybb[:,:-2] == y.T   ybb[:,2:] = y.T move along 2 label
    # see Alex's paper:
    # tensor.neq(ybb[:, :-2], ybb[:, 2:]) -> l'=b or l'_u == l'_(u-2) set 0
    # tensor.eq(ybb[:, 1:-1], blank_symbol) -> [0,?,0,?,...0,1,0] ? depends on whether l'_u == l'_(u-2)  result[0,i]==1 means ybb[0,i] != ybb[0,i+2]
    # sec_diag = B x L'
    sec_diag = (tensor.neq(ybb[:, :-2], ybb[:, 2:]) *
                tensor.eq(ybb[:, 1:-1], blank_symbol) *
                y_mask.T)

    # r2: L'xL'
    # r3: L'xL'xB
    r2 = tensor.eye(n_y, k=1)
    # tensor.eye(n_y, k=2).dimshuffle(0, 1, 'x')   L' x L' x 1 
    # sec_diag.dimshuffle(1, 'x', 0)  L' x 1 x B
    r3 = (tensor.eye(n_y, k=2).dimshuffle(0, 1, 'x') *
          sec_diag.dimshuffle(1, 'x', 0))
    return r2, r3
Ejemplo n.º 3
0
        def each_loss(outpt, inpt):
            # y 是填充了blank之后的ans
            blank = 26
            y_nblank = T.neq(inpt, blank)
            n = T.dot(y_nblank, y_nblank)  # 真实的字符长度
            N = 2 * n + 1  # 填充后的字符长度,去除尾部多余的填充
            labels = inpt[:N]
            labels2 = T.concatenate((labels, [blank, blank]))
            sec_diag = T.neq(labels2[:-2], labels2[2:]) * T.eq(labels2[1:-1], blank)
            recurrence_relation = \
                T.eye(N) + \
                T.eye(N, k=1) + \
                T.eye(N, k=2) * sec_diag.dimshuffle((0, 'x'))

            pred_y = outpt[:, labels]

            fwd_pbblts, _ = theano.scan(
                lambda curr, accum: T.switch(T.eq(curr*T.dot(accum, recurrence_relation), 0.0),
                                             T.dot(accum, recurrence_relation)
                                             , curr*T.dot(accum, recurrence_relation)),
                sequences=[pred_y],
                outputs_info=[T.eye(N)[0]]
            )
            #return fwd_pbblts
            #liklihood = fwd_pbblts[0, 0]
            liklihood = fwd_pbblts[-1, -1] + fwd_pbblts[-1, -2]
            #liklihood = T.switch(T.lt(liklihood, 1e-35), 1e-35, liklihood)
            #loss = -T.log(T.cast(liklihood, "float32"))
            #loss = 10 * (liklihood - 1) * (liklihood - 100)
            loss = (T.le(liklihood, 1.0)*(10*(liklihood-1)*(liklihood-100)))+(T.gt(liklihood, 1.0)*(-T.log(T.cast(liklihood, "float32"))))
            return loss
Ejemplo n.º 4
0
    def vanilla_ctc(self, ):
        my_labels = TT.concatenate((self.labels, [self.blank, self.blank]))
        pre_V = TT.neq(my_labels[:-2], my_labels[2:]) * \
                   TT.eq(my_labels[1:-1], self.blank)

        capLambda = \
            TT.eye(self.n) + \
            TT.eye(self.n, k=1) + \
            TT.eye(self.n, k=2) * pre_V.dimshuffle((0, 'x'))

        softmax_outputs = self.inpt[:, self.labels]

        alphas, _ = theano.scan(
            lambda outPuts, old_alpha: outPuts * TT.dot(old_alpha, capLambda),
            sequences=[softmax_outputs],
            outputs_info=[TT.eye(self.n)[0]]
        )

        # TODO: This is what we really should use for the initialization.
        # Need to debug and make sure there are no errors.
        # initial_alphas = TT.zeros(n)
        # initial_alphas[0]=inpt[0][-1]
        # initial_alphas[1]=inpt[0][labels[1]]
        # alphas, _ = theano.scan(
        #     lambda outPuts, old_alpha: outPuts * TT.dot(old_alpha, capLambda),
        #     sequences=[softmax_outputs],
        #     outputs_info=[initial_alphas]
        # )

        transcript_prob = TT.sum(alphas[-1, -2:])
        self.cost = -TT.log(transcript_prob)
        self.debug = alphas.T
Ejemplo n.º 5
0
    def __init__(self, inpt, labels):
        '''
        Recurrent Relation:
        A matrix that specifies allowed transistions in paths.
        At any time, one could
        0) Stay at the same label (diagonal is identity)
        1) Move to the next label (first upper diagonal is identity)
        2) Skip to the next to next label if
            a) next label is blank and
            b) the next to next label is different from the current
            (Second upper diagonal is product of conditons a & b)
        '''
        n_labels = labels.shape[0]

        big_I = T.cast(T.eye(n_labels+2), 'float64')
        recurrence_relation1 = T.cast(T.eye(n_labels), 'float64') + big_I[2:,1:-1] + big_I[2:,:-2] * T.cast((T.arange(n_labels) % 2), 'float64')
        recurrence_relation = T.cast(recurrence_relation1, 'float64')

        '''
        Forward path probabilities
        '''
        pred_y = inpt[:, labels]

        probabilities, _ = theano.scan(
            lambda curr, prev: curr * T.dot(prev, recurrence_relation),
            sequences=[pred_y],
            outputs_info=[T.cast(T.eye(n_labels)[0], 'float64')]
        )

        # Final Costs
        labels_probab = T.sum(probabilities[-1, -2:])
        self.cost = -T.log(labels_probab)
        self.params = []
Ejemplo n.º 6
0
    def _plain_ctc(self, ):
        labels2 = tt.concatenate((self.labels, [self.blank, self.blank]))
        sec_diag = tt.neq(labels2[:-2], labels2[2:]) * tt.eq(labels2[1:-1], self.blank)
        # Last two entries of sec_diag do not matter as they multiply zero rows below.

        recurrence_relation = \
            tt.eye(self.n) + \
            tt.eye(self.n, k=1) + \
            tt.eye(self.n, k=2) * sec_diag.dimshuffle((0, 'x'))

        pred_y = self.inpt[:, self.labels]

        fwd_pbblts, _ = th.scan(
            lambda curr, accum: curr * tt.dot(accum, recurrence_relation),
            sequences=[pred_y],
            outputs_info=[tt.eye(self.n)[0]]
        )

        # TODO: Add probabilites[-1, -2] only if last label is blank.
        # liklihood = ifelse(tt.eq(self.n, 1), fwd_pbblts[-1, -1],
        #                        ifelse(tt.neq(self.labels[-1], self.blank), fwd_pbblts[-1, -1],
        #                               fwd_pbblts[-1, -1] + fwd_pbblts[-1, -2]))
        liklihood = fwd_pbblts[-1, -1]
        self.cost = -tt.log(liklihood)
        self.debug = fwd_pbblts.T
Ejemplo n.º 7
0
def likelihood(f, l, R, mu, eps, sigma2, lambda_1=1e-4):
    # The similarity matrix W is a linear combination of the slices in R
    W = T.tensordot(R, mu, axes=1)

    # The following indices correspond to labeled and unlabeled examples
    labeled = T.eq(l, 1).nonzero()

    # Calculating the graph Laplacian of W
    D = T.diag(W.sum(axis=0))
    L = D - W

    # The Covariance (or Kernel) matrix is the inverse of the (regularized) Laplacian
    epsI = eps * T.eye(L.shape[0])
    rL = L + epsI
    Sigma = nlinalg.matrix_inverse(rL)

    # The marginal density of labeled examples uses Sigma_LL as covariance (sub-)matrix
    Sigma_LL = Sigma[labeled][:, labeled][:, 0, :]

    # We also consider additive Gaussian noise with variance sigma2
    K_L = Sigma_LL + (sigma2 * T.eye(Sigma_LL.shape[0]))

    # Calculating the inverse and the determinant of K_L
    iK_L = nlinalg.matrix_inverse(K_L)
    dK_L = nlinalg.det(K_L)

    f_L = f[labeled]

    # The (L1-regularized) log-likelihood is given by the summation of the following four terms
    term_A = - (1 / 2) * f_L.dot(iK_L.dot(f_L))
    term_B = - (1 / 2) * T.log(dK_L)
    term_C = - (1 / 2) * T.log(2 * np.pi)
    term_D = - lambda_1 * T.sum(abs(mu))

    return term_A + term_B + term_C + term_D
Ejemplo n.º 8
0
def recurrence(blanked_label,blank_symbol):
    '''
    A(s) = alpha(t,s) + alpha(t,s-1)  if l_s = blank or l_s = l_{s-2}
           = alpha(t,s) + alpha(t,s-1) + alpha(t,s-2) if l_s != l_{s-2}
    we can define a L' x L' matrix R to help do this(L' = 2L+1)
    A = alpha(t,:) * R
    '''
    length = blanked_label.shape[1]
    blanks = T.zeros((1,2)) + blank_symbol
    ybb = T.concatenate((blanked_label, blanks), axis=1)
    '''
    ybb: 1 x L'+2    L' = 2*L+1
    ybb[0,:-2] == blanked_label  ybb[0,2:] = blanked_label move along 2 label
    T.neq(ybb[:, :-2], ybb[:, 2:]) -> l'=b or l'_u == l'_(u-2) set 0
    T.eq(ybb[:, 1:-1], blank_symbol) -> [0,?,0,?,...0,1,0] ? depends on 
    whether l_s = l_{s-2}  result[0,i]==1 means ybb[0,i] != ybb[0,i+2]
    '''
    setDiagMatrix = T.neq(ybb[:,:-2],ybb[:,2:])*T.eq(ybb[:,1:-1],blank_symbol)
    '''
    r2: L' x L'
    r3: L' x L'
    '''
    r2 = T.eye(length,k=1)
    r3 = T.eye(length,k=2)*(setDiagMatrix.T)
    return r2,r3
Ejemplo n.º 9
0
def recurrence_relation(size):
    """
    Based on code from Shawn Tan
    """

    eye2 = T.eye(size + 2)
    return T.eye(size) + eye2[2:, 1:-1] + eye2[2:, :-2] * (T.arange(size) % 2)
Ejemplo n.º 10
0
	def recurrence_relation(self, y):
		def sec_diag_i(yt, ytp1, ytp2):
			return T.neq(yt, ytp2) * T.eq(ytp1, self.n_out)

		y_extend = T.concatenate((y, [self.n_out, self.n_out]))
		sec_diag, _ = theano.scan(sec_diag_i,
				sequences={'input':y_extend, 'taps':[0, 1, 2]})

		y_sz = y.shape[0]
		return T.eye(y_sz) + \
			T .eye(y_sz, k=1) + \
			T.eye(y_sz, k=2) * sec_diag.dimshuffle((0, 'x'))
Ejemplo n.º 11
0
 def __init__(self, v=None, **kwargs):
     super(HouseholderFlow, self).__init__(**kwargs)
     v = self.add_param(v, 'v')
     self.shared_params = dict(v=v)
     if self.batched:
         vv = v.dimshuffle(0, 1, 'x') * v.dimshuffle(0, 'x', 1)
         I = tt.eye(self.dim).dimshuffle('x', 0, 1)
         vvn = (1e-10+(v**2).sum(-1)).dimshuffle(0, 'x', 'x')
     else:
         vv = tt.outer(v, v)
         I = tt.eye(self.dim)
         vvn = ((v**2).sum(-1)+1e-10)
     self.H = I - 2. * vv / vvn
Ejemplo n.º 12
0
    def _mb_normal_ctc(self, network_output,   labels, mask):


        n_y = labels.shape[1] / 2
        y = labels[:,:n_y]
        y = y.dimshuffle(1,0)
        y_mask = labels[:,n_y:].astype(theano.config.floatX)

        # y_row = labels.dimshuffle(1,0)
        # n_y = y_row.shape[0] / 2
        # y = y_row[:n_y,:]
        # y_mask = y_row[n_y:,:].astype(theano.config.floatX)

        y_hat = network_output.dimshuffle(0, 2, 1)

        pred_y = y_hat[:, y.astype('int32'), T.arange(self.tpo["batch_size"])]


        ybb = T.concatenate((y, self.blanks), axis=0).T
        sec_diag = (T.neq(ybb[:, :-2], ybb[:, 2:]) *
                    T.eq(ybb[:, 1:-1], self.tpo["CTC_blank"]) *
                    y_mask)



        # r1: LxL
        # r2: LxL
        # r3: LxLxB
        r2 = T.eye(n_y, k=1)
        r3 = (T.eye(n_y, k=2).dimshuffle(0, 1, 'x') *
              sec_diag.dimshuffle(1, 'x', 0))



        def step(p_curr, p_prev):
            # instead of dot product, we * first
            # and then sum oven one dimension.
            # objective: T.dot((p_prev)BxL, LxLxB)
            # solusion: Lx1xB * LxLxB --> LxLxB --> (sumover)xLxB
            dotproduct = (p_prev + T.dot(p_prev, r2) +
                          (p_prev.dimshuffle(1, 'x', 0) * r3).sum(axis=0).T)
            return p_curr.T * dotproduct * y_mask  # B x L

        probabilities, _ = theano.scan(
            step,
            sequences=[pred_y],
            outputs_info=[T.eye(n_y)[0] * T.ones([self.tpo["batch_size"], n_y])])


        labels_probab = T.sum(probabilities[-1,:, -2:])
        return T.mean(-T.log(labels_probab))
Ejemplo n.º 13
0
    def grad(self, inputs, cost_grad):
        """
        Notes:
        1. The gradient is computed under the assumption that perturbations
        of the input array respect triangularity, i.e. partial derivatives wrt
        triangular region are zero.
        2. In contrast with the usual mathematical presentation, in order to
        apply theano's 'reshape' function wich implements row-order (i.e. C
        order), the differential expressions below have been derived based on
        the row-vectorizations of inputs 'a' and 'b'.

        See The Matrix Reference Manual,
        Copyright 1998-2011 Mike Brookes, Imperial College, London, UK
        """

        a, b = inputs
        ingrad = cost_grad
        ingrad = tensor.as_tensor_variable(ingrad)
        shp_a = (tensor.shape(inputs[0])[1],
                               tensor.shape(inputs[0])[1])
        I_M = tensor.eye(*shp_a)
        if self.lower:
            inv_a = solve_triangular(a, I_M, lower=True)
            tri_M = tril(tensor.ones(shp_a))
        else:
            inv_a = solve_triangular(a, I_M, lower=False)
            tri_M = triu(tensor.ones(shp_a))
        if b.ndim == 1:
            prod_a_b = tensor.tensordot(-b.T, inv_a.T, axes=1)
            prod_a_b = tensor.shape_padleft(prod_a_b)
            jac_veca = kron(inv_a, prod_a_b)
            jac_b = inv_a
            outgrad_veca = tensor.tensordot(ingrad, jac_veca, axes=1)
            outgrad_a = tensor.reshape(outgrad_veca,
                        (inputs[0].shape[0], inputs[0].shape[0])) * tri_M
            outgrad_b = tensor.tensordot(ingrad, jac_b, axes=1).flatten(ndim=1)
        else:
            ingrad_vec = ingrad.flatten(ndim=1)
            prod_a_b = tensor.tensordot(-b.T, inv_a.T, axes=1)
            jac_veca = kron(inv_a, prod_a_b)
            I_N = tensor.eye(tensor.shape(inputs[1])[1],
                               tensor.shape(inputs[1])[1])
            jac_vecb = kron(inv_a, I_N)
            outgrad_veca = tensor.tensordot(ingrad_vec, jac_veca, axes=1)
            outgrad_a = tensor.reshape(outgrad_veca,
                        (inputs[0].shape[0], inputs[0].shape[0])) * tri_M
            outgrad_vecb = tensor.tensordot(ingrad_vec, jac_vecb, axes=1)
            outgrad_b = tensor.reshape(outgrad_vecb,
                        (inputs[1].shape[0], inputs[1].shape[1]))
        return [outgrad_a, outgrad_b]
Ejemplo n.º 14
0
    def compute_log_averaged_ei(self, x, X, randomness, incumbent):

        # We compute the old predictive mean at x
        
        Kzz = compute_kernel(self.lls, self.lsf, self.z, self.z) + T.eye(self.z.shape[ 0 ]) * self.jitter * T.exp(self.lsf)
        KzzInv = T.nlinalg.MatrixInversePSD()(Kzz)
        LLt = T.dot(self.LParamPost, T.transpose(self.LParamPost))
        covCavityInv = KzzInv + LLt * casting(self.n_points - self.set_for_training) / casting(self.n_points)
        covCavity = T.nlinalg.MatrixInversePSD()(covCavityInv)
        meanCavity = T.dot(covCavity, casting(self.n_points - self.set_for_training) / casting(self.n_points) * self.mParamPost)
        KzzInvmeanCavity = T.dot(KzzInv, meanCavity)
        Kxz = compute_kernel(self.lls, self.lsf, x, self.z)
        m_old_x = T.dot(Kxz, KzzInvmeanCavity)

        # We compute the old predictive mean at X

        KXz = compute_kernel(self.lls, self.lsf, X, self.z)
        m_old_X = T.dot(KXz, KzzInvmeanCavity)

        # We compute the required cross covariance matrices

        KXX = compute_kernel(self.lls, self.lsf, X, X) - T.dot(T.dot(KXz, KzzInv), KXz.T) + T.eye(X.shape[ 0 ]) * self.jitter * T.exp(self.lsf)
        KXXInv = T.nlinalg.MatrixInversePSD()(KXX)

        KxX = compute_kernel(self.lls, self.lsf, x, X)
        xX = T.concatenate([ x, X ], 0)
        KxXz = compute_kernel(self.lls, self.lsf, xX, self.z)
        KxX = KxX - T.dot(T.dot(KxXz[ 0 : x.shape[ 0], : ], KzzInv), KxXz[ x.shape[ 0 ] : xX.shape[ 0 ], : ].T)

        # We compute the new posterior mean

        samples_internal = T.dot(MatrixChol()(KXX), randomness)

        new_predictive_mean = T.tile(m_old_x, [ 1, randomness.shape[ 1 ] ]) + T.dot(KxX, T.dot(KXXInv, samples_internal))

        # We compute the new posterior variance

        z_expanded = T.concatenate([ self.z, X ], 0)
        Kxz_expanded = compute_kernel(self.lls, self.lsf, x, z_expanded)
        Kzz_expanded = compute_kernel(self.lls, self.lsf, z_expanded, z_expanded) + T.eye(z_expanded.shape[ 0 ]) * self.jitter * T.exp(self.lsf)
        Kzz_expandedInv = T.nlinalg.MatrixInversePSD()(Kzz_expanded)
        v_out = T.exp(self.lsf) - T.dot(Kxz_expanded * T.dot(Kxz_expanded, Kzz_expandedInv), T.ones_like(z_expanded[ : , 0 : 1 ]))
        new_predictive_var = T.tile(v_out, [ 1, randomness.shape[ 1 ] ])

        s = (incumbent - new_predictive_mean) / T.sqrt(new_predictive_var)

        log_ei = T.log((incumbent - new_predictive_mean) * ratio(s) + T.sqrt(new_predictive_var)) + log_n_pdf(s)

        return T.mean(LogSumExp(log_ei, 1), 1)
Ejemplo n.º 15
0
    def recurrence_relation_(y_, blank_symbol):
        y = y_.dimshuffle(0,'x')
        n_y = y.shape[0]
        blanks = T.zeros((2, y.shape[1])) + blank_symbol
        ybb = T.concatenate((y, blanks), axis=0).T
        sec_diag = (T.neq(ybb[:, :-2], ybb[:, 2:]) *
                    T.eq(ybb[:, 1:-1], blank_symbol))

        # r1: LxL
        # r2: LxL
        # r3: LxL
        r2 = T.eye(n_y, k=1)
        r3 = (T.eye(n_y, k=2) * sec_diag)

        return r2, r3
Ejemplo n.º 16
0
def recurrence_relation(y, y_mask):
    # with blank symbol of -1 this falls back to the recurrence that fails
    # with repeating symbols!
    blank_symbol = -1
    n_y = y.shape[0]
    blanks = tensor.zeros((2, y.shape[1])) + blank_symbol
    ybb = tensor.concatenate((y, blanks), axis=0).T
    sec_diag = tensor.neq(ybb[:, :-2], ybb[:, 2:]) * tensor.eq(ybb[:, 1:-1], blank_symbol) * y_mask.T

    # r1: LxL
    # r2: LxL
    # r3: LxLxB
    r2 = tensor.eye(n_y, k=1)
    r3 = tensor.eye(n_y, k=2).dimshuffle(0, 1, "x") * sec_diag.dimshuffle(1, "x", 0)
    return r2, r3
 def __init__(self,
              atomc,
              dist,
              atom_mask,
              num_hidden=60,
              num_passes=2,
              include_diagonal=False,
              nonlinearity=lasagne.nonlinearities.tanh,
              Wcf=lasagne.init.GlorotNormal(1.0),
              Wfc=lasagne.init.GlorotNormal(1.0),
              Wdf=lasagne.init.GlorotNormal(1.0),
              bcf=lasagne.init.Constant(0.0),
              bdf=lasagne.init.Constant(0.0),
              **kwargs):
     super(RecurrentLayer, self).__init__([atomc, dist, atom_mask],
                                          **kwargs)
     num_atoms = self.input_shapes[0][1]
     c_len = self.input_shapes[0][2]
     d_len = self.input_shapes[1][3]
     self.Wcf = self.add_param(Wcf, (c_len, num_hidden), name="W_atom_c")
     self.bcf = self.add_param(bcf, (num_hidden, ), name="b_atom_c")
     self.Wdf = self.add_param(Wdf, (d_len, num_hidden), name="W_dist")
     self.bdf = self.add_param(bdf, (num_hidden, ), name="b_dist")
     self.Wfc = self.add_param(Wfc, (num_hidden, c_len),
                               name="W_hidden_to_c")
     self.num_passes = num_passes
     self.nonlin = nonlinearity
     if include_diagonal:
         self.inv_eye_mask = None
     else:
         self.inv_eye_mask = (T.eye(num_atoms, num_atoms) < 1).dimshuffle(
             "x", 0, 1, "x")
Ejemplo n.º 18
0
def scale(x):
    """Returns a transform to represent a scaling"""

    x = T.as_tensor_variable(x)

    m = T.eye(4, 4)
    m = T.set_subtensor(m[0, 0], x[0])
    m = T.set_subtensor(m[1, 1], x[1])
    m = T.set_subtensor(m[2, 2], x[2])

    mInv = T.eye(4, 4)
    mInv = T.set_subtensor(mInv[0, 0], 1. / x[0])
    mInv = T.set_subtensor(mInv[1, 1], 1. / x[1])
    mInv = T.set_subtensor(mInv[2, 2], 1. / x[2])

    return Transform(m, mInv)
Ejemplo n.º 19
0
 def __init__(self, z0=None, dim=None, v=None, jitter=.1):
     super(HouseholderFlow, self).__init__(dim=dim, z0=z0, jitter=jitter)
     if v is None:
         v = self.add_param(dim, 'v')
     self.shared_params = dict(v=v)
     v = v.dimshuffle(0, 'x')
     self.H = tt.eye(dim) - 2. * v.dot(v.T) / ((v**2).sum() + 1e-10)
Ejemplo n.º 20
0
Archivo: gp.py Proyecto: zhuyiche/pymc3
 def _build_marginal_likelihood_logp(self, y, X, Xu, sigma):
     sigma2 = tt.square(sigma)
     Kuu = self.cov_func(Xu)
     Kuf = self.cov_func(Xu, X)
     Luu = cholesky(stabilize(Kuu))
     A = solve_lower(Luu, Kuf)
     Qffd = tt.sum(A * A, 0)
     if self.approx == "FITC":
         Kffd = self.cov_func(X, diag=True)
         Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
         trace = 0.0
     elif self.approx == "VFE":
         Lamd = tt.ones_like(Qffd) * sigma2
         trace = ((1.0 / (2.0 * sigma2)) *
                  (tt.sum(self.cov_func(X, diag=True)) -
                   tt.sum(tt.sum(A * A, 0))))
     else:  # DTC
         Lamd = tt.ones_like(Qffd) * sigma2
         trace = 0.0
     A_l = A / Lamd
     L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A)))
     r = y - self.mean_func(X)
     r_l = r / Lamd
     c = solve_lower(L_B, tt.dot(A, r_l))
     constant = 0.5 * X.shape[0] * tt.log(2.0 * np.pi)
     logdet = 0.5 * tt.sum(tt.log(Lamd)) + tt.sum(tt.log(tt.diag(L_B)))
     quadratic = 0.5 * (tt.dot(r, r_l) - tt.dot(c, c))
     return -1.0 * (constant + logdet + quadratic + trace)
Ejemplo n.º 21
0
    def init_train_updates(self):
        network_output = self.variables.network_output
        prediction_func = self.variables.train_prediction_func
        last_error = self.variables.last_error
        error_func = self.variables.error_func
        mu = self.variables.mu

        new_mu = ifelse(
            T.lt(last_error, error_func),
            mu * self.mu_update_factor,
            mu / self.mu_update_factor,
        )

        mse_for_each_sample = T.mean((network_output - prediction_func)**2,
                                     axis=1)

        params = list(iter_parameters(self))
        param_vector = parameters2vector(self)

        J = compute_jaccobian(mse_for_each_sample, params)
        n_params = J.shape[1]

        updated_params = param_vector - T.nlinalg.matrix_inverse(
            J.T.dot(J) + new_mu * T.eye(n_params)).dot(
                J.T).dot(mse_for_each_sample)

        updates = [(mu, new_mu)]
        parameter_updates = setup_parameter_updates(params, updated_params)
        updates.extend(parameter_updates)

        return updates
Ejemplo n.º 22
0
def neumann_inv_batch(A, it=10, F=None, reg=0.0):
    N, d, _ = A.shape
    if F is None:
        F = T.sqrt(T.sum(A * A, axis=(1, 2)))
        F = T.reshape(F, (N, 1, 1))
        F = T.tile(F, (1, d, d))

    G = A / F
    Y = T.tile(T.eye(d), (N, 1, 1)) * (1 - reg / F) - G
    Z = [None] * it
    Z[0] = T.tile(T.eye(d), (N, 1, 1))
    for i in xrange(1, it):
        Z[i] = T.batched_dot(Z[i - 1], Y)

    output = sum(Z) / F
    return output
Ejemplo n.º 23
0
def run_irl(world, car, reward, theta, data):
    def gen():
        for point in data:
            for c, x0, u in zip(world.cars, point['x0'], point['u']):
                c.traj.x0.set_value(x0)
                for cu, uu in zip(c.traj.u, u):
                    cu.set_value(uu)
            yield

    r = car.traj.reward(reward)
    g = utils.grad(r, car.traj.u)
    H = utils.hessian(r, car.traj.u)
    I = tt.eye(utils.shape(H)[0])
    reg = utils.vector(1)
    reg.set_value([1e-1])
    H = H - reg[0] * I
    L = tt.dot(g, tt.dot(tn.MatrixInverse()(H), g)) + tt.log(tn.Det()(-H))
    for _ in gen():
        pass
    optimizer = utils.Maximizer(L, [theta],
                                gen=gen,
                                method='gd',
                                eps=0.1,
                                debug=True,
                                iters=1000,
                                inf_ignore=10)
    optimizer.maximize()
    print theta.get_value()
Ejemplo n.º 24
0
    def compute_log_ei(self, x, incumbent):

        Kzz = compute_kernel(self.lls, self.lsf, self.z, self.z) + \
            T.eye(self.z.shape[0]) * self.jitter * T.exp(self.lsf)
        KzzInv = T.nlinalg.MatrixInversePSD()(Kzz)
        LLt = T.dot(self.LParamPost, T.transpose(self.LParamPost))
        covCavityInv = KzzInv + LLt * \
            casting(self.n_points - self.set_for_training) / \
            casting(self.n_points)
        covCavity = T.nlinalg.MatrixInversePSD()(covCavityInv)
        meanCavity = T.dot(
            covCavity,
            casting(self.n_points - self.set_for_training) /
            casting(self.n_points) * self.mParamPost)
        KzzInvcovCavity = T.dot(KzzInv, covCavity)
        KzzInvmeanCavity = T.dot(KzzInv, meanCavity)
        Kxz = compute_kernel(self.lls, self.lsf, x, self.z)
        B = T.dot(KzzInvcovCavity, KzzInv) - KzzInv
        v_out = T.exp(self.lsf) + T.dot(Kxz * T.dot(Kxz, B),
                                        T.ones_like(self.z[:, 0:1]))
        m_out = T.dot(Kxz, KzzInvmeanCavity)
        s = (incumbent - m_out) / T.sqrt(v_out)

        log_ei = T.log((incumbent - m_out) * ratio(s) +
                       T.sqrt(v_out)) + log_n_pdf(s)

        return log_ei
Ejemplo n.º 25
0
def log_path_probs(y_hat, y):
    eye = tensor.eye(y.shape[0])
    first = eye[0]
    mask0 = 1 - eye[0]
    mask1 = 1 - eye[1]
    alt_mask = tensor.cast(tensor.arange(y.shape[0]) % 2, theano.config.floatX)
    skip_mask = mask0 * mask1 * alt_mask
    prev_idx = tensor.arange(-1, y.shape[0] - 1)
    prev_prev_idx = tensor.arange(-2, y.shape[0] - 2)
    log_mask0 = log_(mask0)
    log_skip_mask = log_(skip_mask)
    log_first = log_(first)

    def step(log_p_curr, log_p_prev):
        log_after_trans = logplus_(
            log_p_prev,
            logplus_(log_mask0 + log_p_prev[prev_idx],
                     log_skip_mask + log_p_prev[prev_prev_idx]))
        log_p_next = log_p_curr + log_after_trans
        return log_p_next

    L = tensor.log(y_hat[:, y])
    log_f_probs, _ = theano.scan(step, sequences=[L], outputs_info=[log_first])
    log_b_probs, _ = theano.scan(step,
                                 sequences=[L[::-1, ::-1]],
                                 outputs_info=[log_first])

    log_probs = log_f_probs + log_b_probs[::-1, ::-1]
    return log_probs, prev_idx, prev_prev_idx
Ejemplo n.º 26
0
 def feedback(self, outputs):
     assert self.output_dim == 0
     eye = tensor.eye(self.num_outputs)
     check_theano_variable(outputs, None, "int")
     output_shape = [outputs.shape[i]
                     for i in range(outputs.ndim)] + [self.feedback_dim]
     return eye[outputs.flatten()].reshape(output_shape)
Ejemplo n.º 27
0
 def RBF(self, X1, X2=None):
     _X2 = X1 if X2 is None else X2
     dist = ((X1 / self.l)**2).sum(1)[:, None] + (
         (_X2 / self.l)**2).sum(1)[None, :] - 2 * (X1 / self.l).dot(
             (_X2 / self.l).T)
     RBF = self.sf2 * T.exp(-dist / 2.0)
     return (RBF + eps * T.eye(X1.shape[0])) if X2 is None else RBF
Ejemplo n.º 28
0
def _recurrence_relation(y, y_mask, blank_symbol):
    """
    Construct a permutation matrix and tensor for computing CTC transitions.
    Parameters
    ----------
    y : matrix (L, B)
        the target label sequences
    y_mask : matrix (L, B)
        indicates which values of y to use
    blank_symbol: integer
        indicates the symbol that signifies a blank label.
    Returns
    -------
    matrix (L, L)
    tensor3 (L, L, B)
    """
    n_y = y.shape[0]
    blanks = tensor.zeros((2, y.shape[1])) + blank_symbol
    ybb = tensor.concatenate((y, blanks), axis=0).T
    sec_diag = (tensor.neq(ybb[:, :-2], ybb[:, 2:]) *
                tensor.eq(ybb[:, 1:-1], blank_symbol) * y_mask.T)

    # r1: LxL
    # r2: LxL
    # r3: LxLxB
    eye2 = tensor.eye(n_y + 2)
    r2 = eye2[2:, 1:-1]  # tensor.eye(n_y, k=1)
    r3 = (eye2[2:, :-2].dimshuffle(0, 1, 'x') * sec_diag.dimshuffle(1, 'x', 0))

    return r2, r3
Ejemplo n.º 29
0
def tiled_eye(n1, n2, dtype="float32"):
    r1 = T.maximum((n1 - 1) / n2 + 1, 1)
    r2 = T.maximum((n2 - 1) / n1 + 1, 1)
    small_eye = T.eye(T.minimum(n1, n2), dtype=dtype)
    tiled_big = T.tile(small_eye, (r1, r2))
    tiled_part = tiled_big[:n1, :n2]
    return tiled_part
Ejemplo n.º 30
0
def parser_loss(energies, heads, types, masks):
    """
    compute minus log likelihood of parser as parser loss.
    :param energies: Theano 4D tensor
        energies of each edge. the shape is [batch_size, n_steps, n_steps, num_labels],
        where the summy root is at index 0.
    :param heads: Theano 2D tensor
        heads in the shape [batch_size, n_steps].
    :param types: Theano 2D tensor
        types in the shape [batch_size, n_steps].
    :param masks: Theano 2D tensor
        masks in the shape [batch_size, n_steps].
    :return: Theano 1D tensor
        an expression for minus log likelihood loss.
    """
    input_shape = energies.shape
    batch_size = input_shape[0]
    length = input_shape[1]
    # get the exp of energies, and add along the label axis.
    # the shape is [batch_size, n, n].
    E = T.exp(energies).sum(axis=3)

    # zero out the elements out the length of each sentence.
    if masks is not None:
        masks_shuffled = masks.dimshuffle(0, 1, 'x')
        E = E * masks_shuffled
        masks_shuffled = masks.dimshuffle(0, 'x', 1)
        E = E * masks_shuffled

    # compute the D tensor.
    # the shape is [batch_size, n, n]
    D = E.sum(axis=1)
    D = T.zeros_like(E) + D.dimshuffle(0, 1, 'x')
    # zeros out all elements except diagonal.
    D = D * T.eye(length, length, 0).dimshuffle('x', 0, 1)

    # compute lengths
    lengths = T.cast(masks, dtype='int32').sum(axis=1)
    # compute laplacian matrix
    L = D - E

    # compute partition Z(x)
    partitions, _ = theano.scan(
        fn=lambda laps, length: nlinalg.logabsdet(laps[1:length, 1:length]),
        outputs_info=None,
        sequences=[L, lengths])

    # compute targets energy
    # first create indice matrix
    indices = T.zeros_like(heads) + T.arange(length).dimshuffle('x', 0)
    # compute loss matrix shape = [n_steps, batch_size]
    target_energy = energies[T.arange(batch_size), heads.T, indices.T, types.T]
    # shuffle loss to [batch_size, n_steps]
    target_energy = target_energy.dimshuffle(1, 0)
    # remove the first element [batch, n_steps -1]
    target_energy = target_energy[:, 1:]
    # sum over n_step shape = [batch_size]
    target_energy = target_energy.sum(axis=1)

    return partitions - target_energy  #, E, D, L, partitions, target_energy
Ejemplo n.º 31
0
    def log_likelihood(self):

        # sequence step for each sentence is word_vec*M + rest where M moves the word vector to sentence embedding space
        def seq_step(words_vectors, seq_sum_vectors):
            return T.dot(words_vectors, self.M_word_to_sent) + seq_sum_vectors

        # 3-Tensor for word embeddings for a minibatch. After dimshuffle, the first dimension is that of the word sequence for sentences. So we want to iterate over each of the sentences in the minibatch simultaneously
        word_emb_tensor = T.concatenate([
            T.dot(
                T.eye(self.max_words,
                      self.num_words)[self.one_hot_sent_matrix[i], :],
                self.Wemb) for i in xrange(
                    self.one_hot_sent_matrix.get_value(
                        borrow=True, return_internal_type=True).shape[0])
        ]).dimshuffle(2, 1, 0)

        # use scan to generate the sequence rnn
        sent_emb, _ = theano.scan(fn=seq_step,
                                  sequences=[word_emb_tensor],
                                  outputs_info=[np.zeros(self.sent_dim)])

        return T.mean(
            T.sum(T.log(
                np.ones(self.num_entities) - self.answer_matrix -
                T.nnet.softmax(T.dot(sent_emb[-1], self.M_softmax))),
                  axis=1))
Ejemplo n.º 32
0
 def tangent2ambient(self, X, Z):
     U = tensor.stack((X.U.dot(Z.M) + Z.Up, X.U), 0).reshape((-1, X.U.shape[1]))
     #U = np.hstack((X.U.dot(Z.M) + Z.Up, X.U))
     S = tensor.eye(2*self._k)
     V = tensor.stack((X.V, Z.Vp), 1).reshape((X.V.shape[0], -1))
     #V = np.vstack((X.V, Z.Vp))
     return ManifoldElementShared.from_vars((U, S, V), shape=(self._m, self._n), r=self._k)
Ejemplo n.º 33
0
    def generate_samples(self,name, X_new, n_samples = 500):
        with self.model as model:

            Kuu = pm.gp.util.stabilize(self.cov(self.Xu))
            Kuf = self.cov(self.Xu, self.X)
            Luu = tt.slinalg.cholesky(Kuu)
            A = pm.gp.util.solve_lower(Luu, Kuf)
            Qff = tt.dot(tt.transpose(A),A)
            Kffd = self.cov(self.X, diag=True)
            Lamd_inv = tt.diag(1./tt.clip(Kffd - tt.diag(Qff) + self.sigma**2, 0, np.inf))

            Sigma = pm.gp.util.stabilize(Kuu + tt.dot(Kuf.dot(Lamd_inv),tt.transpose(Kuf)))
            L_Sigma = tt.slinalg.cholesky(Sigma)


            Kus = self.cov(self.Xu,X_new)

            m1 = pm.gp.util.solve_lower(L_Sigma, Kus)
            m2 = pm.gp.util.solve_lower(L_Sigma, Kuf)

            mu_pred = tt.dot(tt.dot(tt.transpose(m1),m2),tt.dot(Lamd_inv,model.fp))

            Kss = self.cov(X_new) + 1e-6 * tt.eye(X_new.shape[0])
            As = pm.gp.util.solve_lower(Luu, Kus)
            Qss = tt.dot(tt.transpose(As),As)


            cov_pred = Kss - Qss + tt.dot(tt.transpose(m1),m1)

            f_pred = pm.MvNormal(name, mu=mu_pred, cov=cov_pred, shape=pm.gp.util.infer_shape(X_new))

        with self.model:
            pred_samples = pm.sample_ppc(self.trace, vars=[f_pred], samples=n_samples)

        return pred_samples
Ejemplo n.º 34
0
    def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)
        
        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
                    + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
            self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]
        
        f = T.sum(T.exp(-abs_dif),axis=2)

        if init:
            mf = T.mean(f,axis=0)
            f -= mf.dimshuffle('x',0)
            self.init_updates.append((self.b, -mf))
        else:
            f += self.b.dimshuffle('x',0)

        return T.concatenate([input, f], axis=1)
Ejemplo n.º 35
0
 def __init__(self, loss=None, inputs=None, C=None):
     symmetrize = False
     A, b = inputs
     if A.shape[0] <> A.shape[1]:
         symmetrize = True
     elif not T.allclose(A.T, A):
         print('not sym th')
         symmetrize = True
     if symmetrize:
         print('symetrize thean')
         self._A = T.dot(A.T, A)
         self._b = T.dot(A.T, b)
     else:
         self._A = A
         self._b = b
     #         self._A = theano.shared(A)
     #         self._b = theano.shared(b)
     if C is None:
         self._C = T.eye(self._A.shape[1])
     else:
         self._C = C
     b = self._b.eval()
     A = self._A.eval()
     self._x0 = np.zeros(b.shape[0])
     self._r0 = b - np.dot(A, self._x0)
     # self._z = T.dot(self._C,theano.shared(self._x0))
     self._t_x = theano.shared(self._x0)  # T.vector('x')
     self._output_tf = loss
     if loss is None:
         self._output_tf = self._tf_CG_loss()
Ejemplo n.º 36
0
    def _initialize_posterior_distribution(self, RecognitionParams):

        ################## put together the total precision matrix ######################

        # Diagonals must be PSD
        diagsquare = T.batched_dot(self.AAChol,
                                   self.AAChol.dimshuffle(0, 2, 1))
        odsquare = T.batched_dot(self.BBChol, self.BBChol.dimshuffle(0, 2, 1))
        self.AA = diagsquare + T.concatenate([
            T.shape_padleft(T.zeros([self.xDim, self.xDim])), odsquare
        ]) + 1e-6 * T.eye(self.xDim)
        self.BB = T.batched_dot(self.AAChol[:-1],
                                self.BBChol.dimshuffle(0, 2, 1))

        # compute Cholesky decomposition
        self.the_chol = blk_tridag_chol(self.AA, self.BB)

        # symbolic recipe for computing the the diagonal (V) and
        # off-diagonal (VV) blocks of the posterior covariance
        self.V, self.VV, self.S = compute_sym_blk_tridiag(self.AA, self.BB)
        self.postX = self.Mu

        # The determinant of the covariance is the square of the determinant of the cholesky factor (twice the log).
        # Determinant of the Cholesky factor is the product of the diagonal elements of the block-diagonal.
        def comp_log_det(L):
            return T.log(T.diag(L)).sum()

        self.ln_determinant = -2 * theano.scan(
            fn=comp_log_det, sequences=self.the_chol[0])[0].sum()
Ejemplo n.º 37
0
    def mk_training_fn(self):

        n = self.batch_size
        N = self.total_size
        q_size = self.q_size
        B = self.B
        gamma = self.gamma
        avg_I = self.avg_I
        t = self.t
        updates = self.updates
        epsilon = self.step_size / pow(2.0, t // self.step_size_decay)
        random = self.random
        inarray = self.inarray
        gt, dlog_prior = self.dlogp_elemwise, self.dlog_prior

        # 5. Calculate mean dlogp
        avg_gt = gt.mean(axis=0)

        # 6. Calculate approximate Fisher Score
        gt_diff = (gt - avg_gt)

        V = (1. / (n - 1)) * tt.dot(gt_diff.T, gt_diff)

        # 7. Update moving average
        I_t = (1. - 1. / t) * avg_I + (1. / t) * V

        if B is None:
            # if B is not specified
            # B \propto I_t as given in
            # http://www.ics.uci.edu/~welling/publications/papers/SGFS_v10_final.pdf
            # after iterating over the data few times to get a good approximation of I_N
            B = tt.switch(t <= int(N / n) * 50, tt.eye(q_size), gamma * I_t)

        # 8. Noise Term
        # The noise term is sampled from a normal distribution
        # of mean 0 and std_dev = sqrt(4B/step_size)
        # In order to generate the noise term, a standard
        # normal dist. is scaled with 2B_ch/sqrt(step_size)
        # where B_ch is cholesky decomposition of B
        # i.e. B = dot(B_ch, B_ch^T)
        B_ch = tt.slinalg.cholesky(B)
        noise_term = tt.dot((2.*B_ch)/tt.sqrt(epsilon), \
                random.normal((q_size,), dtype=theano.config.floatX))
        # 9.
        # Inv. Fisher Cov. Matrix
        cov_mat = (gamma * I_t * N) + ((4. / epsilon) * B)
        inv_cov_mat = tt.nlinalg.matrix_inverse(cov_mat)
        # Noise Coefficient
        noise_coeff = (dlog_prior + (N * avg_gt) + noise_term)
        dq = 2 * tt.dot(inv_cov_mat, noise_coeff)

        updates.update({avg_I: I_t, t: t + 1})

        f = theano.function(
            outputs=dq,
            inputs=inarray,
            updates=updates,
            allow_input_downcast=True)

        return f
Ejemplo n.º 38
0
 def __init__(self, z0=None, dim=None, v=None, jitter=.1):
     super(HouseholderFlow, self).__init__(dim=dim, z0=z0, jitter=jitter)
     if v is None:
         v = self.add_param(dim, 'v')
     self.shared_params = dict(v=v)
     v = v.dimshuffle(0, 'x')
     self.H = tt.eye(dim) - 2. * v.dot(v.T) / ((v**2).sum()+1e-10)
Ejemplo n.º 39
0
Archivo: gp.py Proyecto: zhuyiche/pymc3
 def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma,
                        cov_total, mean_total):
     sigma2 = tt.square(sigma)
     Kuu = cov_total(Xu)
     Kuf = cov_total(Xu, X)
     Luu = cholesky(stabilize(Kuu))
     A = solve_lower(Luu, Kuf)
     Qffd = tt.sum(A * A, 0)
     if self.approx == "FITC":
         Kffd = cov_total(X, diag=True)
         Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
     else:  # VFE or DTC
         Lamd = tt.ones_like(Qffd) * sigma2
     A_l = A / Lamd
     L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A)))
     r = y - mean_total(X)
     r_l = r / Lamd
     c = solve_lower(L_B, tt.dot(A, r_l))
     Kus = self.cov_func(Xu, Xnew)
     As = solve_lower(Luu, Kus)
     mu = self.mean_func(Xnew) + tt.dot(tt.transpose(As),
                                        solve_upper(tt.transpose(L_B), c))
     C = solve_lower(L_B, As)
     if diag:
         Kss = self.cov_func(Xnew, diag=True)
         var = Kss - tt.sum(tt.square(As), 0) + tt.sum(tt.square(C), 0)
         if pred_noise:
             var += sigma2
         return mu, var
     else:
         cov = (self.cov_func(Xnew) - tt.dot(tt.transpose(As), As) +
                tt.dot(tt.transpose(C), C))
         if pred_noise:
             cov += sigma2 * tt.identity_like(cov)
         return mu, cov if pred_noise else stabilize(cov)
Ejemplo n.º 40
0
 def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total):
     sigma2 = tt.square(sigma)
     Kuu = cov_total(Xu)
     Kuf = cov_total(Xu, X)
     Luu = cholesky(stabilize(Kuu))
     A = solve_lower(Luu, Kuf)
     Qffd = tt.sum(A * A, 0)
     if self.approx == "FITC":
         Kffd = cov_total(X, diag=True)
         Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
     else:  # VFE or DTC
         Lamd = tt.ones_like(Qffd) * sigma2
     A_l = A / Lamd
     L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A)))
     r = y - mean_total(X)
     r_l = r / Lamd
     c = solve_lower(L_B, tt.dot(A, r_l))
     Kus = self.cov_func(Xu, Xnew)
     As = solve_lower(Luu, Kus)
     mu = self.mean_func(Xnew) + tt.dot(tt.transpose(As), solve_upper(tt.transpose(L_B), c))
     C = solve_lower(L_B, As)
     if diag:
         Kss = self.cov_func(Xnew, diag=True)
         var = Kss - tt.sum(tt.square(As), 0) + tt.sum(tt.square(C), 0)
         if pred_noise:
             var += sigma2
         return mu, var
     else:
         cov = (self.cov_func(Xnew) - tt.dot(tt.transpose(As), As) +
                tt.dot(tt.transpose(C), C))
         if pred_noise:
             cov += sigma2 * tt.identity_like(cov)
         return mu, stabilize(cov)
Ejemplo n.º 41
0
def translate(x):
    """Returns a transform to represent a translation"""

    x = T.as_tensor_variable(x)

    m = T.eye(4, 4)
    m = T.set_subtensor(m[0, 3], x[0])
    m = T.set_subtensor(m[1, 3], x[1])
    m = T.set_subtensor(m[2, 3], x[2])

    mInv = T.eye(4, 4)
    mInv = T.set_subtensor(mInv[0, 3], -x[0])
    mInv = T.set_subtensor(mInv[1, 3], -x[1])
    mInv = T.set_subtensor(mInv[2, 3], -x[2])

    return Transform(m, mInv)
Ejemplo n.º 42
0
 def cal_encoder_step(self, encoder_val):
     '''
         Calculate the weight ratios in encoder.
         
         :type decoder_val: class
         :param decoder_val: the class which stores the intermediate variables in encoder
         
         :returns: R_h_x, R_h_h are theano variables, weight ratios in encoder
     '''
     encoder_val.x = encoder_val.x.dimshuffle(0, 1, 'x')
     R_state_in_x = (encoder_val.x * self.input_emb + self.input_emb_offset
                     ) / (self.ep * TT.sgn(encoder_val.state_in) +
                          encoder_val.state_in).dimshuffle(0, 'x', 1)
     R_state_in_x = R_state_in_x.dimshuffle(0, 2, 1)
     R_reset_in_x = encoder_val.x * self.reset_emb / (
         encoder_val.reset_in +
         self.ep * TT.sgn(encoder_val.reset_in)).dimshuffle(0, 'x', 1)
     R_reset_in_x = R_reset_in_x.dimshuffle(0, 2, 1)
     R_gate_in_x = encoder_val.x * self.gate_emb / (
         encoder_val.gate_in +
         self.ep * TT.sgn(encoder_val.gate_in)).dimshuffle(0, 'x', 1)
     R_gate_in_x = R_gate_in_x.dimshuffle(0, 2, 1)
     h_before = encoder_val.h_before.dimshuffle(0, 1, 'x')
     R_gate_h = h_before * self.gate_hidden / (
         encoder_val.gate + self.ep * TT.sgn(encoder_val.gate)).dimshuffle(
             0, 'x', 1)
     R_gate_x = R_gate_in_x * (encoder_val.gate_in / (
         encoder_val.gate + self.ep * TT.sgn(encoder_val.gate))).dimshuffle(
             0, 1, 'x')
     R_reset_h = h_before * self.reset_hidden / (
         encoder_val.reset +
         self.ep * TT.sgn(encoder_val.reset)).dimshuffle(0, 'x', 1)
     R_reset_x = R_reset_in_x * (
         encoder_val.reset_in /
         (encoder_val.reset +
          self.ep * TT.sgn(encoder_val.reset))).dimshuffle(0, 1, 'x')
     R_reseted_h = R_reset_h * self.weight + TT.eye(self.dim,
                                                    self.dim) * self.weight
     R_reseted_x = R_reset_x * self.weight
     encoder_val.reseted = encoder_val.reseted.dimshuffle(0, 1, 'x')
     R_state_reseted = encoder_val.reseted * self.input_hidden / (
         encoder_val.state +
         self.ep * TT.sgn(encoder_val.state)).dimshuffle(0, 'x', 1)
     R_state_reseted = R_state_reseted.dimshuffle(0, 2, 1)
     R_state_h = TT.batched_dot(R_state_reseted, R_reseted_h)
     R_state_x = TT.batched_dot(R_state_reseted, R_reseted_x)
     R_state_x += R_state_in_x * (
         (encoder_val.state_in /
          (encoder_val.state +
           self.ep * TT.sgn(encoder_val.state))).dimshuffle(0, 1, 'x'))
     R_h = (encoder_val.gate * encoder_val.state /
            (encoder_val.h + self.ep * TT.sgn(encoder_val.h))).dimshuffle(
                0, 1, 'x') * self.weight
     R_h_h = R_state_h * R_h + R_gate_h * R_h
     R_h2 = ((1 - encoder_val.gate) * encoder_val.h_before /
             (encoder_val.h + self.ep * TT.sgn(encoder_val.h))).dimshuffle(
                 0, 1, 'x')
     R_h_h = TT.identity_like(R_h_h[0]) * R_h2
     R_h_x = R_gate_x * R_h + R_state_x * R_h
     return R_h_x, R_h_h
Ejemplo n.º 43
0
    def path_probability(self,
                         queryseq_padded,
                         scorematrix,
                         queryseq_mask_padded=None,
                         scorematrix_mask=None,
                         blank_symbol=None,
                         align='pre'):
        """
        Compute p(l|x) using only the forward variable and log scale
        :param queryseq_padded: (2L+1, B)
        :param scorematrix: (T, C+1, B)
        :param queryseq_mask_padded: (2L+1, B)
        :param scorematrix_mask: (T, B)
        :param blank_symbol: = C by default
        :return:
        """
        if blank_symbol is None:
            # blank_symbol = scorematrix.shape[1] - 1.0
            blank_symbol = tensor.cast(scorematrix.shape[1], floatX) - 1.0
        if queryseq_mask_padded is None:
            queryseq_mask_padded = tensor.ones_like(queryseq_padded,
                                                    dtype=floatX)
        if scorematrix_mask is None:
            scorematrix_mask = tensor.ones(
                [scorematrix.shape[0], scorematrix.shape[2]])

        pred_y = self._class_batch_to_labeling_batch(
            queryseq_padded, scorematrix,
            scorematrix_mask)  # (T, 2L+1, B), reshaped scorematrix
        r2, r3 = self._recurrence_relation(
            queryseq_padded, queryseq_mask_padded,
            blank_symbol)  # r2 (2L+1, 2L+1), r3 (2L+1, 2L+1, B)

        def step(p_curr, p_prev):
            p1 = p_prev
            p2 = self._log_dot_matrix(p1, r2)
            p3 = self._log_dot_tensor(p1, r3)
            p123 = self._log_add(p3, self._log_add(p1, p2))
            return p_curr.T + p123 + self._epslog(queryseq_mask_padded.T)

        alphas, _ = theano.scan(
            step,
            sequences=[self._epslog(pred_y)],
            outputs_info=[
                self._epslog(
                    tensor.eye(queryseq_padded.shape[0])[0] *
                    tensor.ones(queryseq_padded.T.shape))
            ])

        B = alphas.shape[1]
        LL = tensor.sum(queryseq_mask_padded, axis=0, dtype='int32')
        if align == 'pre':
            TL = tensor.sum(scorematrix_mask, axis=0, dtype='int32')
            NLL = -self._log_add(
                alphas[TL - 1, tensor.arange(B), LL - 1],
                alphas[TL - 1, tensor.arange(B), LL - 2])
        else:  # align == 'post'
            NLL = -self._log_add(alphas[-1, tensor.arange(B), LL - 1],
                                 alphas[-1, tensor.arange(B), LL - 2])
        return NLL, alphas
Ejemplo n.º 44
0
    def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(
            activation.dimshuffle(0, 1, 2, 'x') -
            activation.dimshuffle('x', 1, 2, 0)),
                         axis=2) +
                   1e6 * T.eye(input.shape[0]).dimshuffle(0, 'x', 1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2), axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x', 0, 'x')
            self.init_updates = [
                (self.log_weight_scale, self.log_weight_scale -
                 T.log(mean_min_abs_dif).dimshuffle(0, 'x'))
            ]

        f = T.sum(T.exp(-abs_dif), axis=2)

        if init:
            mf = T.mean(f, axis=0)
            f -= mf.dimshuffle('x', 0)
            self.init_updates.append((self.b, -mf))
        else:
            f += self.b.dimshuffle('x', 0)

        return T.concatenate([input, f], axis=1)
Ejemplo n.º 45
0
    def dlogp(inputs, gradients):
        g_logp, = gradients
        cov, delta = inputs

        g_logp.tag.test_value = floatX(np.array(1.))
        n, k = delta.shape

        chol_cov = cholesky(cov)
        diag = tt.nlinalg.diag(chol_cov)
        ok = tt.all(diag > 0)

        chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1))
        delta_trans = solve_lower(chol_cov, delta.T).T

        inner = n * tt.eye(k) - tt.dot(delta_trans.T, delta_trans)
        g_cov = solve_upper(chol_cov.T, inner)
        g_cov = solve_upper(chol_cov.T, g_cov.T)

        tau_delta = solve_upper(chol_cov.T, delta_trans.T)
        g_delta = tau_delta.T

        g_cov = tt.switch(ok, g_cov, -np.nan)
        g_delta = tt.switch(ok, g_delta, -np.nan)

        return [-0.5 * g_cov * g_logp, -g_delta * g_logp]
Ejemplo n.º 46
0
def _path_probabs(y, y_hat, y_mask, y_hat_mask, blank_symbol):
    """Compute the probabilities of the paths that are compatible with the
    sequence y.

    This function uses scan to get the forward probabilities (often denoted
    with the symbol alpha in the literature).

    See _log_path_probabs for a version that works in log domain.
    """

    pred_y = _class_batch_to_labeling_batch(y, y_hat, y_hat_mask)
    pred_y = pred_y.dimshuffle(0, 2, 1)
    n_labels = y.shape[0]

    r2, r3 = _recurrence_relation(y, y_mask, blank_symbol)

    def step(p_curr, p_prev):
        # instead of dot product, we * first
        # and then sum oven one dimension.
        # objective: T.dot((p_prev)BxL, LxLxB)
        # solusion: Lx1xB * LxLxB --> LxLxB --> (sumover)xLxB
        dotproduct = p_prev + tensor.dot(p_prev, r2) + (p_prev.dimshuffle(1, "x", 0) * r3).sum(axis=0).T
        return p_curr.T * dotproduct * y_mask.T  # B x L

    probabilities, _ = theano.scan(
        step, sequences=[pred_y], outputs_info=[tensor.eye(n_labels)[0] * tensor.ones(y.T.shape)]
    )
    return probabilities
Ejemplo n.º 47
0
 def __init__(self, incoming, dimension, params_init=(GlorotUniform(),
                                                      GlorotUniform(),
                                                      Uniform([0, 0.1])),
              addition_parameters=[False], **kwargs):
     '''
     init parameters
     :param incoming: input to the LISTA layer
     :param dimension: 2 numbers list.
      dimension[0] is dict_size, length of dictionary vector in LISTA. dimension[1] is T a.k.a depth
     :param params_init: init value or init method for LISTA
     :transposed: = True if the input dictionary D is the transpose matrix of a theano.compile.SharedVariable V.
      In that case self.W = D^T = V^T^T = V
     :param kwargs: parameters of super class
     :return:
     '''
     super(LISTA, self).__init__(incoming, **kwargs)
     self.transposed = addition_parameters[0]
     num_inputs = incoming.output_shape[-1]
     self.dict_size = dimension[0]
     self.T = dimension[1]
     self.W = self.add_param(params_init[0], [num_inputs, self.dict_size], name='W',
                             lista=True, lista_weight_W=True, sparse_dictionary=True, regularizable=True)
     # self.S = self.add_param(params_init[1], [self.dict_size, self.dict_size], name='S',
     #                         lista=True, lista_weight_W=True, regularizable=True)
     if T > 0:
         self.S = T.eye(self.dict_size) - T.dot(self.get_dictionary(), self.get_dictionary().T)
         self.S = self.add_param(theano.shared(floatX(self.S.eval())), [self.dict_size, self.dict_size], name='S',
                                 lista=True, lista_weight_S=True, regularizable=True)
     self.theta = self.add_param(theano.shared(floatX(0.01 * np.ones([self.dict_size, ]))), [self.dict_size, ],
                                 name='theta',
                                 lista=True, lista_fun_param=True, regularizable=False)
     self.eps = 1e-6
     self.clipped_theta = T.clip(self.theta, self.eps, 10)
Ejemplo n.º 48
0
    def _log_ctc(self, ):
        _1000 = tt.eye(self.n, dtype=th.config.floatX)[0]
        prev_mask = 1 - _1000
        prev_mask = safe_log(prev_mask)
        prevprev_mask = tt.neq(self.labels[:-2], self.labels[2:]) * \
                        tt.eq(self.labels[1:-1], self.blank)
        prevprev_mask = tt.concatenate(
            ([0, 0], prevprev_mask)).astype(th.config.floatX)
        prevprev_mask = safe_log(prevprev_mask)
        prev = tt.arange(-1, self.n - 1)
        prevprev = tt.arange(-2, self.n - 2)
        log_pred_y = tt.log(self.inpt[:, self.labels])

        def step(curr, accum):
            return logmul(
                curr,
                logadd(accum, logmul(prev_mask, accum[prev]),
                       logmul(prevprev_mask, accum[prevprev])))

        log_fwd_pbblts, _ = th.scan(step,
                                    sequences=[log_pred_y],
                                    outputs_info=[safe_log(_1000)])

        # TODO: Add probabilites[-1, -2] only if last label is blank.
        #     If length = 1, skip the scan process.
        # log_liklihood = ifelse(tt.eq(self.n, 1), tt.sum(log_pred_y),
        #                    ifelse(tt.eq(self.labels[-1], self.blank),
        #                           logadd(log_fwd_pbblts[-1, -1], log_fwd_pbblts[-1, -2]),
        #                           log_fwd_pbblts[-1, -1]))
        log_liklihood = log_fwd_pbblts[-1, -1]
        self.cost = -log_liklihood
        self.debug = log_fwd_pbblts.T
Ejemplo n.º 49
0
def quadratic_saturating_loss(mx, Sx, target, Q, *args, **kwargs):
    '''
        Squashing loss penalty function
        c(x) = ( 1 - e^(-0.5*quadratic_loss(x, target)) )
    '''
    if Sx is None:
        if mx.ndim == 1:
            mx = mx[None, :]
        delta = mx - target[None, :]
        deltaQ = delta.dot(Q)
        cost = 1.0 - tt.exp(-0.5 * tt.batched_dot(deltaQ, delta))
        return cost
    else:
        # stochastic case (moment matching)
        delta = mx - target
        SxQ = Sx.dot(Q)
        EyeM = tt.eye(mx.shape[0])
        IpSxQ = EyeM + SxQ
        Ip2SxQ = EyeM + 2 * SxQ
        S1 = tt.dot(Q, matrix_inverse(IpSxQ))
        S2 = tt.dot(Q, matrix_inverse(Ip2SxQ))
        # S1 = solve(IpSxQ.T, Q.T).T
        # S2 = solve(Ip2SxQ.T, Q.T).T
        # mean
        m_cost = -tt.exp(-0.5 * delta.dot(S1).dot(delta)) / tt.sqrt(det(IpSxQ))
        # var
        s_cost = tt.exp(-delta.dot(S2).dot(delta)) / tt.sqrt(
            det(Ip2SxQ)) - m_cost**2

        return 1.0 + m_cost, s_cost
Ejemplo n.º 50
0
def one_hot(x, m=None):
    """One-hot representation of integer vector.
    Given a vector of integers from 0 to m-1, returns a matrix
    with a one-hot representation, where each row corresponds
    to an element of x.
    Parameters
    ----------
    x : integer vector
        The integer vector to convert to a one-hot representation.
    m : int, optional
        The number of different columns for the one-hot representation. This
        needs to be strictly greater than the maximum value of `x`.
        Defaults to ``max(x) + 1``.
    Returns
    -------
    Theano tensor variable
        A Theano tensor variable of shape (``n``, `m`), where ``n`` is the
        length of `x`, with the one-hot representation of `x`.
    Notes
    -----
    If your integer vector represents target class memberships, and you wish to
    compute the cross-entropy between predictions and the target class
    memberships, then there is no need to use this function, since the function
    :func:`lasagne.objectives.categorical_crossentropy()` can compute the
    cross-entropy from the integer vector directly.
    """
    if m is None:
        m = T.cast(T.max(x) + 1, 'int32')

    return T.eye(m)[T.cast(x, 'int32')]
Ejemplo n.º 51
0
    def __init__(self,
                 generative_model,
                 recognition_model,
                 z_dim,
                 max_length,
                 vocab_size,
                 dist_z_gen,
                 dist_x_gen,
                 dist_z_rec,
                 gen_nn_kwargs,
                 rec_nn_kwargs,
                 iwae=False):

        self.vocab_size = vocab_size
        self.max_length = max_length

        self.generative_model = generative_model(z_dim, max_length, vocab_size,
                                                 dist_z_gen, dist_x_gen,
                                                 gen_nn_kwargs)
        self.recognition_model = recognition_model(z_dim, max_length,
                                                   vocab_size, dist_z_rec,
                                                   rec_nn_kwargs)

        self.iwae = iwae

        self.one_hot_encoder = T.concatenate(
            [T.zeros((1, self.vocab_size)),
             T.eye(self.vocab_size)], axis=0)
Ejemplo n.º 52
0
    def logp(self, Y, X=None):
        if X is None:
            X = self.X
        mu = self.M(X).squeeze()
        Sigma = self.K(X) + tt.eye(X.shape[0]) * self.sigma**2

        return MvNormal.dist(mu, Sigma).logp(Y)
Ejemplo n.º 53
0
    def __call__(self, f):
        """
        Compute the following function:
            E(f) = ||f_l - y_l||^2 + mu f^T L f + mu eps ||f||^2,

        :param f: Theano tensor
            Vector of N continuous elements.
        :return: Theano tensor
            Energy (cost) of the vector f.
        """
        # Compute the un-normalized graph Laplacian: L = D - W
        D = T.diag(self.W.sum(axis=0))
        L = D - self.W

        # Compute the label consistency
        S = T.diag(self.L)
        El = (f - self.y).T.dot(S.dot(f - self.y))

        # Compute the smoothness along the similarity graph
        I = T.eye(self.L.shape[0])
        Es = f.T.dot(L.dot(f)) + self.eps * f.T.dot(I.dot(f))

        # Compute the whole cost function
        E = El + self.mu * Es

        return E
Ejemplo n.º 54
0
def one_hot(x, m=None):
    """One-hot representation of integer vector.

    Given a vector of integers from 0 to m-1, returns a matrix
    with a one-hot representation, where each row corresponds
    to an element of x.

    Parameters
    ----------
    x : integer vector
        The integer vector to convert to a one-hot representation.
    m : int, optional
        The number of different columns for the one-hot representation. This
        needs to be strictly greater than the maximum value of `x`.
        Defaults to ``max(x) + 1``.

    Returns
    -------
    Theano tensor variable
        A Theano tensor variable of shape (``n``, `m`), where ``n`` is the
        length of `x`, with the one-hot representation of `x`.

    Notes
    -----
    If your integer vector represents target class memberships, and you wish to
    compute the cross-entropy between predictions and the target class
    memberships, then there is no need to use this function, since the function
    :func:`lasagne.objectives.categorical_crossentropy()` can compute the
    cross-entropy from the integer vector directly.

    """
    if m is None:
        m = T.cast(T.max(x) + 1, "int32")

    return T.eye(m)[T.cast(x, "int32")]
Ejemplo n.º 55
0
def log_path_probs(blanked_label,y,blank_symbol):
    '''
    table = feature probability table: T x 2L+1
    '''
    table = feature_table(blanked_label,y)
    r2, r3 = recurrence(blanked_label,blank_symbol)

    '''
    log_p_curr: 2L+1 x 1
    log_p_prev: 1 x 2L+1
    '''
    def step(log_p_curr, log_p_prev):
        p1 = log_p_prev
        p2 = _log_dot_matrix(p1, r2)
        p3 = _log_dot_matrix(p1, r3)
        p123 = _log_add(p3, _log_add(p1, p2))

        return (log_p_curr + p123 )

    '''
    T.eye(blanked_label.shape[1])[0] = [ 1.  0.  0.  0.  0.  0.  0.  0.  0.]
    T.eye(blanked_label.shape[1])[0]*T.ones(blanked_label.T.shape):  1 x 2L+1
    '''
    log_probabilities, _ = theano.scan(
        step,
        sequences=[_epslog(table)],
        outputs_info=[_epslog(T.eye(blanked_label.shape[1])[0])])
    return log_probabilities
Ejemplo n.º 56
0
 def TzscorrCols(Xn):
     """
     Theano expression which returns Fisher transformed correlation values between columns of a
     normalized input, `X_n`.  Diagonal is set to zero.
     """
     C_X = T.dot(Xn.T, Xn)-T.eye(Xn.shape[1])
     return 0.5*T.log((1+C_X)/(1-C_X))
Ejemplo n.º 57
0
 def _build_marginal_likelihood_logp(self, y, X, Xu, sigma):
     sigma2 = tt.square(sigma)
     Kuu = self.cov_func(Xu)
     Kuf = self.cov_func(Xu, X)
     Luu = cholesky(stabilize(Kuu))
     A = solve_lower(Luu, Kuf)
     Qffd = tt.sum(A * A, 0)
     if self.approx == "FITC":
         Kffd = self.cov_func(X, diag=True)
         Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
         trace = 0.0
     elif self.approx == "VFE":
         Lamd = tt.ones_like(Qffd) * sigma2
         trace = ((1.0 / (2.0 * sigma2)) *
                  (tt.sum(self.cov_func(X, diag=True)) -
                   tt.sum(tt.sum(A * A, 0))))
     else:  # DTC
         Lamd = tt.ones_like(Qffd) * sigma2
         trace = 0.0
     A_l = A / Lamd
     L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A)))
     r = y - self.mean_func(X)
     r_l = r / Lamd
     c = solve_lower(L_B, tt.dot(A, r_l))
     constant = 0.5 * X.shape[0] * tt.log(2.0 * np.pi)
     logdet = 0.5 * tt.sum(tt.log(Lamd)) + tt.sum(tt.log(tt.diag(L_B)))
     quadratic = 0.5 * (tt.dot(r, r_l) - tt.dot(c, c))
     return -1.0 * (constant + logdet + quadratic + trace)
Ejemplo n.º 58
0
 def get_blender_proj(self, camera):
     deg2rad = lambda angle: (angle / 180.) * np.pi
     sa = tensor.sin(deg2rad(-camera[0]))
     ca = tensor.cos(deg2rad(-camera[0]))
     se = tensor.sin(deg2rad(-camera[1]))
     ce = tensor.cos(deg2rad(-camera[1]))
     R_world2obj = tensor.eye(3)
     R_world2obj = tensor.set_subtensor(R_world2obj[0, 0], ca * ce)
     R_world2obj = tensor.set_subtensor(R_world2obj[0, 1], sa * ce)
     R_world2obj = tensor.set_subtensor(R_world2obj[0, 2], -se)
     R_world2obj = tensor.set_subtensor(R_world2obj[1, 0], -sa)
     R_world2obj = tensor.set_subtensor(R_world2obj[1, 1], ca)
     R_world2obj = tensor.set_subtensor(R_world2obj[2, 0], ca * se)
     R_world2obj = tensor.set_subtensor(R_world2obj[2, 1], sa * se)
     R_world2obj = tensor.set_subtensor(R_world2obj[2, 2], ce)
     R_obj2cam = np.array(
         ((1.910685676922942e-15, 4.371138828673793e-08, 1.0),
          (1.0, -4.371138828673793e-08, -0.0), (4.371138828673793e-08, 1.0,
                                                -4.371138828673793e-08))).T
     R_world2cam = tensor.dot(R_obj2cam, R_world2obj)
     cam_location = tensor.zeros((3, 1))
     cam_location = tensor.set_subtensor(cam_location[0, 0],
                                         camera[2] * 1.75)
     T_world2cam = -1 * tensor.dot(R_obj2cam, cam_location)
     R_camfix = np.array(((1, 0, 0), (0, -1, 0), (0, 0, -1)))
     R_world2cam = tensor.dot(R_camfix, R_world2cam)
     T_world2cam = tensor.dot(R_camfix, T_world2cam)
     RT = tensor.concatenate([R_world2cam, T_world2cam], axis=1)
     return RT
Ejemplo n.º 59
0
    def _compute_local_cn_acts(self, input, W):
        # Without Scan (Faster than scan, but still way too slow)
        shuffledIn    = input.dimshuffle(0,1,'x')
        shuffledMasks = self.localmask.dimshuffle('x',0,1)

        # cubeIn = T.repeat(shuffledIn,self.localmask.shape[1],2)
        # cubeMasks = T.repeat(shuffledMasks,input.shape[0],0)

        maskedIn     = shuffledIn * shuffledMasks
        maskedInMean = T.sum(maskedIn,axis=1,keepdims=True) / T.sum(shuffledMasks,axis=1,keepdims=True)
        maskedInVar  = T.sum(T.sqr((maskedIn-maskedInMean)*shuffledMasks),axis=1,keepdims=True)/T.sum(shuffledMasks,axis=1,keepdims=True)
        maskedInSTD  = T.sqrt(maskedInVar)

        maskedInSubMean = maskedIn - maskedInMean
        maskedCN        = maskedInSubMean / maskedInSTD
        # maskedCN = maskedInSubMean

        shuffledInCN = maskedCN.dimshuffle(2,0,1)

        allOuts      = T.dot(shuffledInCN, W)

        diagMask     = T.eye(self.localmask.shape[1],self.localmask.shape[1]).dimshuffle(0,'x',1)
        diagMaskAll  = allOuts * diagMask

        activation   = T.sum(diagMaskAll,axis=0)
        return activation
 def tangent2ambient(self, X, Z):
     U = tensor.stack((X.U.dot(Z.M) + Z.Up, X.U), 0).reshape((-1, X.U.shape[1]))
     #U = np.hstack((X.U.dot(Z.M) + Z.Up, X.U))
     S = tensor.eye(2*self._k)
     V = tensor.stack((X.V, Z.Vp), 1).reshape((X.V.shape[0], -1))
     #V = np.vstack((X.V, Z.Vp))
     return (U, S, V)