Example #1
0
 def grad(self, inputs, g_outputs):
     (rho, ) = inputs
     (gz,) = g_outputs
     A = self.Id - tt.mul(rho, self.Wd)
     dinv = tt.nlinalg.matrix_inverse(A).T
     out = tt.mul(dinv, - self.Wd)
     return [tt.as_tensor(tt.sum(tt.mul(out, gz)), ndim=1)]
    	def NLL(self, y, useMeanOnly=False, sampleWeight=None):

		assert (y.ndim == 2)
		pi = numpy.pi

		err = y - self.mean
		cos_err = 1. -  T.cos(err)

		if useMeanOnly==True or (self.sigma_sqr is None):
			sig_sqr = T.ones_like(cos_err)
		else:
			sig_sqr = self.sigma_sqr

		e = T.sum(cos_err/sig_sqr, axis=1, keepdims=True )

		sig = T.sqrt(sig_sqr)
		sin_err = T.sin(err)
		f = T.prod(sin_err/sig, axis=1, keepdims=True)

		if useMeanOnly==True or (self.corr is None):
			rho = T.zeros_like(f)
		else:
			rho = self.corr

		g = e - T.mul(rho, f)
		rho_sqr = T.sqr(rho)
		h = g/(1 - rho_sqr )

		nll = h + numpy.log(2*pi) + T.sum(T.log(sig_sqr), axis=1, keepdims=True)/2. + T.log(1 - rho_sqr)/2.

		if sampleWeight is None:
			return T.mean(nll)
		return T.sum(T.mul(nll, sampleWeight) )/T.sum(sampleWeight)
Example #3
0
def beta_div(X, W, H, beta):
    """Compute beta divergence D(X|WH)

    Parameters
    ----------
    X : Theano tensor
        data
    W : Theano tensor
        Bases
    H : Theano tensor
        activation matrix
    beta : Theano scalar


    Returns
    -------
    div : Theano scalar
        beta divergence D(X|WH)"""
    div = ifelse(
      T.eq(beta, 2),
      T.sum(1. / 2 * T.power(X - T.dot(H, W), 2)),
      ifelse(
        T.eq(beta, 0),
        T.sum(X / T.dot(H, W) - T.log(X / T.dot(H, W)) - 1),
        ifelse(
          T.eq(beta, 1),
          T.sum(T.mul(X, (T.log(X) - T.log(T.dot(H, W)))) + T.dot(H, W) - X),
          T.sum(1. / (beta * (beta - 1.)) * (T.power(X, beta) +
                (beta - 1.) * T.power(T.dot(H, W), beta) -
                beta * T.power(T.mul(X, T.dot(H, W)), (beta - 1)))))))
    return div
Example #4
0
def ConvByPattern(x, patterns, mask=None):
    W = np.transpose(patterns, (3, 0, 1, 2))
    out2 = T.nnet.conv2d(x.dimshuffle(0, 3, 1, 2),
                         W,
                         filter_shape=W.shape,
                         border_mode='half')
    if mask is not None:
        ## mask has shape (batchSize, #rows_to_be_masked, nCols)

        ## a subtensor of out2 along the horiz direction
        out2_sub_horiz = out2[:, :, :mask.shape[1], :]
        mask_horiz = mask.dimshuffle(0, 'x', 1, 2)
        out3 = T.set_subtensor(out2_sub_horiz, T.mul(out2_sub_horiz,
                                                     mask_horiz))

        ## a subtensor of out3 along the vertical direction
        out3_sub_vertical = out3[:, :, :, :mask.shape[1]]
        mask_vertical = mask.dimshuffle(0, 'x', 2, 1)
        y = T.set_subtensor(out3_sub_vertical,
                            T.mul(out3_sub_vertical, mask_vertical))
    else:
        y = out2

    y = y.dimshuffle(0, 2, 3, 1)

    return y / np.prod(patterns.shape[1:3])
Example #5
0
    def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ifco, b_ifco,ln_b1,ln_s1, ln_b2,ln_s2,ln_b3,ln_s3,
                       t_n_out):

        cur_w_in_sig_ln = self.ln(cur_w_in_sig, ln_b1, ln_s1)

        pre_w_out_sig = T.dot(pre_out_sig, w_ifco)
        pre_w_out_sig_ln = self.ln(pre_w_out_sig, ln_b2, ln_s2)

        preact = T.add(cur_w_in_sig_ln, pre_w_out_sig_ln, b_ifco)



        inner_act = self.activation # T.nnet.hard_sigmoid #T.tanh # T.nnet.hard_sigmoid T.tanh
        gate_act = self.sigmoid()  # T.nnet.hard_sigmoid #T.nnet.sigmoid

        # Input Gate
        ig_t1 = gate_act(preact[:, 0:t_n_out])
        # Forget Gate
        fg_t1 = gate_act(preact[:, 1 * t_n_out:2 * t_n_out])
        # Cell State
        cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act(preact[:, 2 * t_n_out:3 * t_n_out])))

        mask = T.addbroadcast(mask, 1)
        cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig

        cs_t1_ln = self.ln(cs_t1, ln_b3, ln_s3)
        # Output Gate
        og_t1 = gate_act(preact[:, 3 * t_n_out:4 * t_n_out])
        # Output LSTM
        out_sig = T.mul(og_t1, inner_act(cs_t1_ln))

        out_sig = mask * out_sig + (1. - mask) * pre_out_sig

        return [out_sig, cs_t1]
 def create_weight_update_functions(self):
     updates = []
     for i in range(len(self.error_gradients)):
         updates.append((self.weights[i], g(T.sub(self.weights[i],T.mul(T.mul(self.error_gradients[-(i+1)],self.alpha),self.batch_size_divisor)))))
         updates.append((self.biases[i],g(T.sub(self.biases[i],T.mul(T.mul(self.errors[-(i+1)], self.alpha),self.batch_size_divisor)))))
         
     self.update_weight_function = function(inputs=[self.idx,self.alpha],updates= updates) 
Example #7
0
    def get_cost_updates(self, corruption_level, learning_rate):
        """ This function computes the cost and the updates for one trainng
        step of the dA """

        tilde_x = self.get_corrupted_input(self.x, corruption_level)
        y = self.get_hidden_values(tilde_x)
        z = self.get_reconstructed_input(y)

        L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
        # Calculate cross-entropy cost (as alternative to MSE) of the reconstruction of the minibatch.

        weight_decay = 0.5 * self.lamda * (T.sum(T.mul(self.W, self.W)) + T.sum(T.mul(self.W_prime, self.W_prime)))
        # Calculate weight decay term to prevent overfitting

        rho_hat = T.sum(y, axis=1) / tilde_x.shape[1]
        KL_divergence = self.beta * T.sum(self.rho * T.log(self.rho / rho_hat) + (1-self.rho) * T.log((1 - self.rho)/(1-rho_hat)))
        # KL divergence sparsity term

        # Calculate overall errors
        cost = T.mean(L) + weight_decay + KL_divergence

        # Compute the gradients of the cost of the `dA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params)

        # Generate the list of updates
        updates = [
            (param, param - learning_rate * gparam)
            for param, gparam in zip(self.params, gparams)
        ]

        return (cost, updates)
Example #8
0
    def recurrence(self, inp, prev_hidden, prev_cell):
        """
            LSTM.recurrence(input_, prev_hidden, prev_cell) -> hidden, cell (batchsize x hidden_size)

            Produces the new hidden and cell state, acting as a single computation step of an LSTM

            @param input_: a batchsize x input_size matrix that represents the new data to input into the LSTM
            @param prev_hidden: a batchsize x hidden_size matrix that represents the previous hidden state of the network
            @param prev_cell: a batchsize x hidden_size matrix that represents the previous cell state of the network
        """

        forget = T.nnet.sigmoid(T.dot(inp, self.weights["f:x"]) +\
                                T.dot(prev_hidden, self.weights["f:h"]) +\
                                self.weights["f:b"])

        input_ = T.nnet.sigmoid(T.dot(inp, self.weights["i:x"]) +\
                                T.dot(prev_hidden, self.weights["i:h"]) +\
                                self.weights["i:b"])

        output = T.nnet.sigmoid(T.dot(inp, self.weights["o:x"]) +\
                                T.dot(prev_hidden, self.weights["o:h"]) +\
                                self.weights["o:b"])

        cell = T.mul(forget, prev_cell) + T.mul(input_, T.tanh(T.dot(inp, self.weights["c:x"]) +\
                                                                     T.dot(prev_hidden, self.weights["c:h"]) +\
                                                                     self.weights["c:b"]))

        hidden = T.mul(output, cell)

        return hidden, cell
Example #9
0
def H_beta_sub(X, W, Wsub, H, Hsub, beta):
    """Update group activation with beta divergence

    Parameters
    ----------
    X : Theano tensor
        data
    W : Theano tensor
        Bases
    Wsub : Theano tensor
        group Bases        
    H : Theano tensor
        activation matrix
    Hsub : Theano tensor
        group activation matrix
    beta : Theano scalar

    Returns
    -------
    H : Theano tensor
        Updated version of the activations
    """
    up = ifelse(T.eq(beta, 2), (T.dot(X, Wsub)) / (T.dot(T.dot(H, W.T), Wsub)),
                (T.dot(T.mul(T.power(T.dot(H, W.T), (beta - 2)), X), Wsub)) /
                (T.dot(T.power(T.dot(H, W.T), (beta-1)), Wsub)))
    return T.mul(Hsub, up)
Example #10
0
    def get_cost_updates(self, corruption_level, learning_rate):
        """ This function computes the cost and the updates for one trainng
        step of the dA """

        tilde_x = self.get_corrupted_input(self.x, corruption_level)
        y = self.get_hidden_values(tilde_x)
        z = self.get_reconstructed_input(y)

        L = -T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
        # Calculate cross-entropy cost (as alternative to MSE) of the reconstruction of the minibatch.

        weight_decay = 0.5 * self.lamda * (T.sum(T.mul(
            self.W, self.W)) + T.sum(T.mul(self.W_prime, self.W_prime)))
        # Calculate weight decay term to prevent overfitting

        rho_hat = T.sum(y, axis=1) / tilde_x.shape[1]
        KL_divergence = self.beta * T.sum(
            self.rho * T.log(self.rho / rho_hat) +
            (1 - self.rho) * T.log((1 - self.rho) / (1 - rho_hat)))
        # KL divergence sparsity term

        # Calculate overall errors
        cost = T.mean(L) + weight_decay + KL_divergence

        # Compute the gradients of the cost of the `dA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params)

        # Generate the list of updates
        updates = [(param, param - learning_rate * gparam)
                   for param, gparam in zip(self.params, gparams)]

        return (cost, updates)
Example #11
0
    def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ifco, b_ifco,
                       t_n_out):

        ifco = T.add(T.dot(pre_out_sig, w_ifco), b_ifco)

        inner_act = self.activation
        gate_act = self.sigmoid()

        # Input Gate
        ig_t1 = gate_act(T.add(ifco[:, 0:t_n_out], cur_w_in_sig[:, 0:t_n_out]))
        # Forget Gate
        fg_t1 = gate_act(T.add(ifco[:, 1 * t_n_out:2 * t_n_out],
                               cur_w_in_sig[:, 1 * t_n_out:2 * t_n_out]))
        # Cell State
        cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act(
            T.add(ifco[:, 2 * t_n_out:3 * t_n_out], cur_w_in_sig[:, 2 * t_n_out:3 * t_n_out]))))

        mask = T.addbroadcast(mask, 1)
        cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig
        # functionality: cs_t1 =   T.switch(mask , cs_t1, pre_cell_sig)

        # Output Gate
        og_t1 = gate_act(
            T.add(ifco[:, 3 * t_n_out:4 * t_n_out], cur_w_in_sig[:, 3 * t_n_out:4 * t_n_out]))
        # Output LSTM
        out_sig = T.mul(og_t1, inner_act(cs_t1))

        out_sig = mask * out_sig + (1. - mask) * pre_out_sig

        return [out_sig, cs_t1]
    def log_loss(self, y, L_input):
        #return -T.dot(T.log(self.p_y_given_x), T.transpose(y))[T.arange(y.shape[0]), T.arange(y.shape[0])]

        y_train_standard = y

        #tomm5_pos_range = numpy.concatenate([numpy.arange(30, 80), numpy.arange(185, 186)]).tolist()
        #y_train_tomm5 = self.partial_pos(y, tomm5_pos_range, y.shape[0])

        tomm5_range = numpy.arange(55, 85).tolist()
        tomm5_range_versus = numpy.concatenate(
            [numpy.arange(30, 55),
             numpy.arange(185, 186)]).tolist()
        y_train_tomm5 = self.partial_versus(y, tomm5_range, tomm5_range_versus,
                                            y.shape[0])

        symprx_range = numpy.arange(60, 80).tolist()
        symprx_range_versus = numpy.concatenate(
            [numpy.arange(145, 165),
             numpy.arange(185, 186)]).tolist()
        y_train_symprx = self.partial_versus(y, symprx_range,
                                             symprx_range_versus, y.shape[0])


        return - ( T.switch(self.L_standard.shape[0] > 0, T.sum(T.sum(T.mul(T.log(self.p_train_standard[self.L_standard, :]), y_train_standard[self.L_standard, :]), axis=1)), 0) \
         + T.switch(self.L_tomm5.shape[0] > 0, T.sum(T.sum(T.mul(T.log(self.p_train_tomm5[self.L_tomm5, :]), y_train_tomm5[self.L_tomm5, :]), axis=1)), 0) \
         + T.switch(self.L_symprx.shape[0] > 0, T.sum(T.sum(T.mul(T.log(self.p_train_symprx[self.L_symprx, :]), y_train_symprx[self.L_symprx, :]), axis=1)), 0))
        '''sym_prx_y = T.concatenate([ (T.sum(y[:, 60:80], axis=1) / ( T.sum(y[:, 60:80], axis=1) + T.sum(y[:, 145:165], axis=1) + y[:, 185] )).reshape((50, 1)) , (1.0 - T.sum(y[:, 60:80], axis=1) / ( T.sum(y[:, 60:80], axis=1) + T.sum(y[:, 145:165], axis=1) + y[:, 185] )).reshape((50, 1)) ], axis=1)
Example #13
0
def beta_div(X, W, H, beta):
    """Compute beta divergence D(X|WH)

    Parameters
    ----------
    X : Theano tensor
        data
    W : Theano tensor
        Bases
    H : Theano tensor
        activation matrix
    beta : Theano scalar


    Returns
    -------
    div : Theano scalar
        beta divergence D(X|WH)"""
    div = ifelse(
        T.eq(beta, 2), T.sum(1. / 2 * T.power(X - T.dot(H, W), 2)),
        ifelse(
            T.eq(beta, 0), T.sum(X / T.dot(H, W) - T.log(X / T.dot(H, W)) - 1),
            ifelse(
                T.eq(beta, 1),
                T.sum(
                    T.mul(X, (T.log(X) - T.log(T.dot(H, W)))) + T.dot(H, W) -
                    X),
                T.sum(1. / (beta * (beta - 1.)) *
                      (T.power(X, beta) +
                       (beta - 1.) * T.power(T.dot(H, W), beta) -
                       beta * T.power(T.mul(X, T.dot(H, W)), (beta - 1)))))))
    return div
Example #14
0
def beta_H_groupSparse(X, W, H, beta, l_sp, start, stop):
    """Update activation with beta divergence

    Parameters
    ----------
    X : Theano tensor
        data
    W : Theano tensor
        Bases
    H : Theano tensor
        activation matrix
    beta : Theano scalar

    Returns
    -------
    H : Theano tensor
        Updated version of the activations
    """
    results, _ = theano.scan(fn=lambda start_i, stop_i, prior_results, H:
                             T.set_subtensor(
                                prior_results[:, start_i:stop_i].T,
                                H[:, start_i:stop_i].T /
                                H[:, start_i:stop_i].norm(2, axis=1)).T,
                             outputs_info=T.zeros_like(H),
                             sequences=[start, stop],
                             non_sequences=H)
    cst = results[-1]
    up = ifelse(T.eq(beta, 2), (T.dot(X, W)) / (T.dot(T.dot(H, W.T), W) +
                                                l_sp * cst),
                               (T.dot(T.mul(T.power(T.dot(H, W.T),
                                            (beta - 2)), X), W)) /
                               (T.dot(T.power(T.dot(H, W.T), (beta-1)), W) +
                                l_sp * cst))
    return T.mul(H, up)
Example #15
0
def beta_H_Sparse(X, W, H, beta, l_sp):
    """Update activation with beta divergence

    Parameters
    ----------
    X : Theano tensor
        data
    W : Theano tensor
        Bases
    H : Theano tensor
        activation matrix
    beta : Theano scalar

    Returns
    -------
    H : Theano tensor
        Updated version of the activations
    """
    up = ifelse(T.eq(beta, 2), (T.dot(X, W)) / (T.dot(T.dot(H, W.T), W) +
                                                l_sp),
                               (T.dot(T.mul(T.power(T.dot(H, W.T),
                                            (beta - 2)), X), W)) /
                               (T.dot(T.power(T.dot(H, W.T), (beta-1)), W) +
                                l_sp))
    return T.mul(H, up)
Example #16
0
def W_beta_sub_withcst(X, W, Wsub, H, Hsub, beta, sum_grp, lambda_grp, card_grp):
    """Update group activation with beta divergence

    Parameters
    ----------
    X : Theano tensor
        data
    W : Theano tensor
        Bases
    Wsub : Theano tensor
        group Bases        
    H : Theano tensor
        activation matrix
    Hsub : Theano tensor
        group activation matrix
    beta : Theano scalar

    Returns
    -------
    H : Theano tensor
        Updated version of the activations
    """
    up = ifelse(T.eq(beta, 2), (T.dot(X.T, Hsub) + lambda_grp * sum_grp) /
                               (T.dot(T.dot(H, W.T).T, Hsub) + lambda_grp * card_grp * Wsub),
                (T.dot(T.mul(T.power(T.dot(H, W.T), (beta - 2)), X).T, Hsub)+
                 lambda_grp * sum_grp) /
                (T.dot(T.power(T.dot(H, W.T), (beta-1)).T, Hsub) +
                 lambda_grp * card_grp * Wsub))
    return T.mul(Wsub, up)
Example #17
0
 def get_output_for(self, inputs, **kwargs):
     mod1 = T.clip(T.sqrt(T.sum(T.sqr(inputs[0]), axis=self.axis)),
                   self.tol, 1000.)
     mod2 = T.clip(T.sqrt(T.sum(T.sqr(inputs[1]), axis=self.axis)),
                   self.tol, 1000.)
     return T.sum(T.mul(inputs[0], inputs[1]), axis=self.axis) / T.mul(
         mod1, mod2)
 def create_backprop_gradient_functions(self):
     self.errors =[]
     self.error_gradients = []
     error_function = None
     error_gradient = None
     for i in range(len(self.weights)):
         if len(self.errors) == 0:
             #this is the last layer of the net: The error is X - t because of 
             #the combination of softmax and cross entropy cost function
             error_function = g(T.sub(self.feedforward,self.t[self.idx]))  
             self.errors.append(error_function)
             error_gradient = g(T.dot(self.z[-2].T,self.errors[i]))       
             error_gradient = self.apply_L2_penalties_error_gradients(error_gradient, -1)        
             self.error_gradients.append(error_gradient)
             
         elif (len(self.weights) - 1) == i:  
             #this involves the input X instead of z-values as it is the first weights that
             #need to be updated                   
             self.errors.append(g(T.mul(T.dot(self.errors[-1],self.weights[1].T),
                                      self.layers[1].activation_derivative(self.z[0])))) 
             
             error_gradient = g(T.dot(self.X[self.idx].T,self.errors[-1]))      
             #error_gradient = self.apply_L2_penalties_error_gradients(error_gradient, 0)  
             self.error_gradients.append(error_gradient)
         else:
             self.errors.append(g(T.mul(T.dot(self.errors[-1],self.weights[-i].T),
                                      self.layers[-(i+1)].activation_derivative(self.z[-(i+1)]))))
             
             error_gradient = g(T.dot(self.z[-(i+2)].T,self.errors[-1]))     
             #error_gradient = self.apply_L2_penalties_error_gradients(error_gradient, -(i+1))  
             self.error_gradients.append(error_gradient)     
Example #19
0
def minus_corr(u, v):
    um = T.sub(u, T.mean(u))
    vm = T.sub(v, T.mean(v))
    r_num = T.sum(T.mul(um, vm))
    r_den = T.sqrt(T.mul(T.sum(T.sqr(um)), T.sum(T.sqr(vm))))
    r = T.true_div(r_num, r_den)
    r = T.neg(r)
    return r
Example #20
0
def pearson_correlation(x, y):
    print('Using PLCC metric')
    muy_ypred = T.mean(x)
    muy_y = T.mean(y)
    numerator = T.sum(T.mul(x - muy_ypred, y - muy_y))
    denominator = T.mul(T.sqrt(T.sum(T.square(x - muy_ypred))),
                        T.sqrt(T.sum(T.sqr(y - muy_y)))) + 1e-10
    return numerator / denominator
 def create_momentum_weight_update_functions(self):
     momentum_updates = []
     for i in range(len(self.H.L.momentum_weights)):
         momentum_updates.append(
         (self.H.L.momentum_weights[i], 
          g(T.mul(self.batch_size_divisor,T.sub(T.mul(self.M,self.H.L.momentum_weights[i]),T.mul(self.alpha,self.error_gradients[-(i+1)]))))))
         
     self.H.L.momentum_update_function = function(inputs=[self.idx, self.M, self.alpha],                  
       updates=momentum_updates) 
Example #22
0
 def grad(self, inputs, g_outputs):
     (rho, ) = inputs
     (gz,) = g_outputs
     A = self.Id - tt.mul(rho, self.Wd)
     dinv = self.I + ts.mul_s_d(self.W, rho) 
     dinv +=ts.mul_s_d(self.WW, rho**2)
     dinv +=ts.mul_s_d(self.WWW, rho**3)
     out = tt.mul(dinv, - self.Wd)
     return [tt.as_tensor(tt.sum(tt.mul(out, gz)), ndim=1)]
Example #23
0
 def lmul_T(self, x):
     CC, RR = self.split_right_shape(tuple(x.shape), T=True)
     x_WT = theano.dot(
             x.reshape((tensor.mul(*CC), tensor.mul(*RR))),
             self._W.T)
     cshape = self.col_shape()
     yshp = tensor.stack(*(CC + cshape))
     rval = x_WT.reshape(yshp, ndim=len(CC) + len(cshape))
     return rval
 def f1_score(self, y):
     n_total = y.shape[0]
     n_relevant_documents_predicted = T.sum(T.eq(T.ones(self.y_pred.shape), self.y_pred))
     two_vector = T.add(T.ones(self.y_pred.shape), T.ones(self.y_pred.shape))
     n_relevant_predicted_correctly = T.sum(T.eq(T.add(self.y_pred, y), two_vector))
     precision = T.true_div(n_relevant_predicted_correctly, n_relevant_documents_predicted)
     recall = T.true_div(n_relevant_predicted_correctly, n_total)
     f1_score =  T.mul(2.0, T.true_div(T.mul(precision, recall), T.add(precision, recall)))
     return [f1_score, precision, recall]
Example #25
0
 def lmul(self, x):
     # dot(x, A)
     RR, CC = self.split_left_shape(tuple(x.shape), T=False)
     xW = theano.dot(
             x.reshape((tensor.mul(*RR), tensor.mul(*CC))),
             self._W)
     rshape = self.row_shape()
     yshp = tensor.stack(*(RR + rshape))
     rval = xW.reshape(yshp, ndim=len(RR) + len(rshape))
     return rval
Example #26
0
 def __objective_triple(self, triple):
     """
     form the objective function value of a triple
     :param triple: (entity_l, entity_r, relation)
     :return:
     """
     l_index, r_index, relation_index = triple
     return T.nlinalg.norm(T.mul(self.Relation_L[relation_index, :, :], self.Entity[:, l_index]) -
                           T.mul(self.Relation_R[relation_index, :, :], self.Entity[:, r_index]),
                           ord=1)
 def set_dropout(self, dropout, activation_function):
     action_with_drop = None
     if dropout > 0:
         action_with_drop = lambda X: T.mul(activation_function(X),self.dropout_function)            
         self.activation_cv_dropout = lambda X: T.mul(activation_function(X),self.dropout_function_cv)
     else:
         action_with_drop = activation_function
         self.activation_cv_dropout = activation_function
         
     return action_with_drop
Example #28
0
 def square_dist(self, X, Z):
     X = tt.mul(X, 1.0)
     Xs = tt.sum(tt.square(X), 1)
     if Z is None:
         return -2.0 * tt.dot(X, tt.transpose(X)) +\
                (tt.reshape(Xs, (-1, 1)) + tt.reshape(Xs, (1, -1)))
     else:
         Z = tt.mul(Z, 1.0)
         Zs = tt.sum(tt.square(Z), 1)
         return -2.0 * tt.dot(X, tt.transpose(Z)) +\
                (tt.reshape(Xs, (-1, 1)) + tt.reshape(Zs, (1, -1)))
Example #29
0
    def __init__(self,
                 nnet,
                 dataset=None,
                 learning_rate=0.01,
                 beta=0.0,
                 sparsity=0.01,
                 weight_decay=0.0):
        if len(dataset) < 2:
            print "Error dataset must contain tuple (train_data,train_target)"
        train_data, train_target = dataset

        target = T.matrix('y')

        square_error = T.mean(0.5 *
                              T.sum(T.pow(target - nnet.output, 2), axis=1))

        avg_activate = T.mean(nnet.hiddenLayer[0].output, axis=0)
        sparsity_penalty = beta * T.sum(
            T.mul(T.log(sparsity / avg_activate), sparsity) +
            T.mul(T.log((1 - sparsity) / T.sub(1, avg_activate)),
                  (1 - sparsity)))

        regularization = 0.5 * weight_decay * (
            T.sum(T.pow(nnet.params[0], 2)) + T.sum(T.pow(nnet.params[2], 2)))

        cost = square_error + sparsity_penalty + regularization

        gparams = [T.grad(cost, param) for param in nnet.params]

        new_params = [
            param - (learning_rate * gparam)
            for param, gparam in zip(nnet.params, gparams)
        ]

        updates = [(param, new_param)
                   for param, new_param in zip(nnet.params, new_params)]

        index = T.lscalar()
        self.train = theano.function(
            inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                input: train_data[index * batch_size:(index + 1) * batch_size],
                target:
                train_target[index * batch_size:(index + 1) * batch_size]
            })

        self.cost = theano.function(inputs=[],
                                    outputs=cost,
                                    givens={
                                        input: train_data,
                                        target: train_target
                                    })
Example #30
0
 def __init__(self, x, marginal_flag=None):
     self._params = theano.shared(np.random.randn())
     self.name = x.name
     self._prob_succes = T.nnet.sigmoid(self._params)
     self.pdf = T.mul((self._prob_succes**x),
                      (1.0 - self._prob_succes)**(1 - x))
     self.pdf_function = theano.function([x], self.pdf)
     self.pdf_marg = T.mul((self._prob_succes**x),
                           (1.0 - self._prob_succes)**(1 - x))
     if marginal_flag:
         self.pdf_marg **= marginal_flag
Example #31
0
    def sequence_iteration(self, output, mask, use_dropout=0, dropout_value=0.5):

        dot_product = T.dot(output, self.t_w_out)

        linear_o = T.add(dot_product, self.t_b_out)


        mask = T.addbroadcast(mask, 2)  # to do nesseccary?
        output = T.mul(mask, linear_o) + T.mul((1. - mask), 1e-6)

        return output  # result
Example #32
0
 def square_dist(self, X, Xs):
     X = tt.mul(X, 1.0 / self.ls)
     X2 = tt.sum(tt.square(X), 1)
     if Xs is None:
         sqd = (-2.0 * tt.dot(X, tt.transpose(X)) +
                (tt.reshape(X2, (-1, 1)) + tt.reshape(X2, (1, -1))))
     else:
         Xs = tt.mul(Xs, 1.0 / self.ls)
         Xs2 = tt.sum(tt.square(Xs), 1)
         sqd = (-2.0 * tt.dot(X, tt.transpose(Xs)) +
                (tt.reshape(X2, (-1, 1)) + tt.reshape(Xs2, (1, -1))))
     return tt.clip(sqd, 0.0, np.inf)
Example #33
0
 def square_dist(self, X, Z):
     X = tt.mul(X, 1.0 / self.lengthscales)
     Xs = tt.sum(tt.square(X), 1)
     if Z is None:
         sqd = -2.0 * tt.dot(X, tt.transpose(X)) +\
               (tt.reshape(Xs, (-1, 1)) + tt.reshape(Xs, (1, -1)))
     else:
         Z = tt.mul(Z, 1.0 / self.lengthscales)
         Zs = tt.sum(tt.square(Z), 1)
         sqd = -2.0 * tt.dot(X, tt.transpose(Z)) +\
               (tt.reshape(Xs, (-1, 1)) + tt.reshape(Zs, (1, -1)))
     return tt.clip(sqd, 0.0, np.inf)
Example #34
0
 def square_dist(self, X, Xs):
     X = tt.mul(X, 1.0 / self.ls)
     X2 = tt.sum(tt.square(X), 1)
     if Xs is None:
         sqd = (-2.0 * tt.dot(X, tt.transpose(X))
                + (tt.reshape(X2, (-1, 1)) + tt.reshape(X2, (1, -1))))
     else:
         Xs = tt.mul(Xs, 1.0 / self.ls)
         Xs2 = tt.sum(tt.square(Xs), 1)
         sqd = (-2.0 * tt.dot(X, tt.transpose(Xs))
                + (tt.reshape(X2, (-1, 1)) + tt.reshape(Xs2, (1, -1))))
     return tt.clip(sqd, 0.0, np.inf)
Example #35
0
    def set_dropout(self, dropout, activation_function):
        action_with_drop = None
        if dropout > 0:
            action_with_drop = lambda X: T.mul(activation_function(X), self.
                                               dropout_function)
            self.activation_cv_dropout = lambda X: T.mul(
                activation_function(X), self.dropout_function_cv)
        else:
            action_with_drop = activation_function
            self.activation_cv_dropout = activation_function

        return action_with_drop
Example #36
0
    def get_output_for(self, input, **kwargs):
        num_leading_axes = self.num_leading_axes
        if num_leading_axes < 0:
            num_leading_axes += input.ndim
        if input.ndim > num_leading_axes + 1:
            # flatten trailing axes (into (n+1)-tensor for num_leading_axes=n)
            input = input.flatten(num_leading_axes + 1)

        t = lasagne.nonlinearities.sigmoid(T.dot(input, self.W_t) + self.b_t)
        g = self.nonlinearity(T.dot(input, self.W_h) + self.b_h)

        return T.mul(t,g) + T.mul(1-t, input)
Example #37
0
 def square_dist(self, X, Z):
     X = tt.mul(X, 1.0 / self.lengthscales)
     Xs = tt.sum(tt.square(X), 1)
     if Z is None:
         sqd = -2.0 * tt.dot(X, tt.transpose(X)) +\
               (tt.reshape(Xs, (-1, 1)) + tt.reshape(Xs, (1, -1)))
     else:
         Z = tt.mul(Z, 1.0 / self.lengthscales)
         Zs = tt.sum(tt.square(Z), 1)
         sqd = -2.0 * tt.dot(X, tt.transpose(Z)) +\
               (tt.reshape(Xs, (-1, 1)) + tt.reshape(Zs, (1, -1)))
     return tt.clip(sqd, 0.0, np.inf)
Example #38
0
    def sequence_iteration(self,
                           output,
                           mask,
                           use_dropout=0,
                           dropout_value=0.5):

        dot_product = T.dot(output, self.t_w_out)

        linear_o = T.add(dot_product, self.t_b_out)

        mask = T.addbroadcast(mask, 2)  # to do nesseccary?
        output = T.mul(mask, linear_o) + T.mul((1. - mask), 1e-6)

        return output  # result
Example #39
0
    def sequence_iteration(self, output, mask,use_dropout=0,dropout_value=0.5):

        dot_product = T.dot(output , self.t_w_out)

        net_o = T.add( dot_product , self.t_b_out )

        ex_net = T.exp(net_o)
        sum_net = T.sum(ex_net, axis=2, keepdims=True)
        softmax_o = ex_net / sum_net


        mask = T.addbroadcast(mask, 2) # to do nesseccary?
        output = T.mul(mask, softmax_o)   + T.mul( (1. - mask) , 1e-6 )

        return output #result
Example #40
0
def beta_div(X, W, H, beta):
    """Compute betat divergence"""
    div = ifelse(T.eq(beta, 2),
                 T.sum(1. / 2 * T.power(X - T.dot(H, W), 2)), 
                 ifelse(T.eq(beta, 0),
                        T.sum(X / T.dot(H, W) - T.log(X / T.dot(H, W)) - 1),
                        ifelse(T.eq(beta, 1),
                               T.sum(T.mul(X, (T.log(X) - T.log(T.dot(H, W)))) + T.dot(H, W) - X),
                                T.sum(1. / (beta * (beta - 1.)) * (T.power(X, beta) +
                                                                   (beta - 1.) *
                                                                   T.power(T.dot(H, W), beta) -
                                                                   beta *
                                                                   T.power(T.mul(X, T.dot(H, W)),
                                                                           (beta - 1)))))))
    return div
    def NLL(self, y, useMeanOnly=False, sampleWeight=None):

        assert (y.ndim == 2)

        pi = numpy.pi

        if self.n_variables == 1:
            e = T.sqr(y - self.mean) / 2.
            nll = numpy.log(2 * pi) / 2.

            if useMeanOnly or (self.sigma_sqr is None):
                nll = nll + e
            else:
                e = e / self.sigma_sqr
                nll = nll + e + T.log(self.sigma_sqr) / 2.

        else:
            err = y - self.mean
            err_sqr = T.sqr(err)

            if useMeanOnly or (self.sigma_sqr is None):
                sig_sqr = T.ones_like(e)
            else:
                sig_sqr = self.sigma_sqr

            nll = T.sum(
                T.log(sig_sqr) + numpy.log(2 * pi), axis=1, keepdims=True) / 2.

            e = T.sum(err_sqr / sig_sqr, axis=1, keepdims=True)

            sig = T.sqrt(sig_sqr)
            f = T.prod(err / sig, axis=1, keepdims=True)

            if useMeanOnly or (self.corr is None):
                rho = T.zeros_like(e)
            else:
                rho = T.corr

            g = e - T.mul(rho, f) * 2.

            rho_sqr = T.sqr(rho)
            h = g / (2 * (1 - rho_sqr))

            nll = nll + h + T.log(1 - rho_sqr) / 2.

        if sampleWeight is None:
            return T.mean(nll)
        return T.sum(T.mul(nll, sampleWeight)) / T.sum(sampleWeight)
def __init():
    dataset = T.matrix("dataset", dtype=config.globalFloatType())
    trans_dataset = T.transpose(dataset)
    dot_mul = T.dot(dataset, trans_dataset)
    l2 = T.sqrt(T.sum(T.square(dataset), axis=1))
    
#     p =printing.Print("l2")
#     l2 = p(l2)
    
    l2_inv2 = T.inv(l2).dimshuffle(['x', 0])
#     p =printing.Print("l2_inv2")
#     l2_inv2 = p(l2_inv2)
    
    l2_inv1 = T.transpose(l2_inv2)
#     p =printing.Print("l2_inv1")
#     l2_inv1 = p(l2_inv1)
    
    l2_inv = T.dot(l2_inv1, l2_inv2)
    
#     p =printing.Print("l2_inv")
#     l2_inv = p(l2_inv)
    
    affinty = (T.mul(dot_mul, l2_inv) + 1) / 2
    globals()['__affinty_fun'] = theano.function(
             [dataset],
             [affinty],
             allow_input_downcast=True
             )
Example #43
0
    def __call__(self, X):
        XY = X.dot(X.T)
        x2 = tt.sum(X**2, axis=1).dimshuffle(0, 'x')
        X2e = tt.repeat(x2, X.shape[0], axis=1)
        H = X2e + X2e.T - 2. * XY

        V = tt.sort(H.flatten())
        length = V.shape[0]
        # median distance
        m = tt.switch(
            tt.eq((length % 2), 0),
            # if even vector
            tt.mean(V[((length // 2) - 1):((length // 2) + 1)]),
            # if odd vector
            V[length // 2])

        h = .5 * m / tt.log(floatX(H.shape[0]) + floatX(1))

        #  RBF
        Kxy = tt.exp(-H / h / 2.0)

        # Derivative
        dxkxy = -tt.dot(Kxy, X)
        sumkxy = tt.sum(Kxy, axis=-1, keepdims=True)
        dxkxy = tt.add(dxkxy, tt.mul(X, sumkxy)) / h

        return Kxy, dxkxy
    def create_cost_function(self):
        cost_function = None

        if self.cost_function == Cost_function.cross_entropy:
            cost_function = -T.mean(T.sum(T.mul(self.t[self.idx], T.log(self.feedforward)), axis=0))

        self.cost_function = cost_function
Example #45
0
    def output_error(self, input_sequence,   true_output, mask):

        outputs = T.nnet.categorical_crossentropy(input_sequence, true_output)

        outputs = T.mul(outputs.dimshuffle(0,1,'x'), mask)

        return T.sum(outputs) / T.sum(mask)
Example #46
0
    def t_forward_step(self,mask, rzup_in_sig, h_pre,b_rzup, u_rz, u_up,ln_b1,ln_s1, ln_b2,ln_s2,ln_b3,ln_s3, t_n_out):



        signal_act = self.activation
        gate_act = self.sigmoid()

        rzup_in_sig_ln = self.ln(rzup_in_sig, ln_b1, ln_s1)

        rzup_b_in_sig_ln = T.add(rzup_in_sig_ln, b_rzup)

        preact = T.dot( h_pre, u_rz)

        preact_ln = self.ln(preact, ln_b2, ln_s2)

        r = gate_act( T.add( rzup_b_in_sig_ln[:, 0:t_n_out] , preact_ln[:, 0:t_n_out] ))
        z = gate_act( T.add( rzup_b_in_sig_ln[:, t_n_out:2 * t_n_out] , preact_ln[:, t_n_out:2 * t_n_out] ))

        preactx = T.dot(h_pre , u_up)
        preactx_ln = self.ln(preactx, ln_b3, ln_s3)
        h_pre_r_ln = T.mul( preactx_ln, r)

        h_update = signal_act( T.add( rzup_b_in_sig_ln[:, 2*t_n_out:3*t_n_out] , h_pre_r_ln ))

        h_new = T.add( (1.-z) * h_update , z * h_pre )

        mask = T.addbroadcast(mask, 1)
        out_sig =  T.add( mask * h_new   , (1. - mask) * h_pre )

        return out_sig
Example #47
0
    def output_error(self, input_sequence, true_output, mask):

        outputs = T.nnet.categorical_crossentropy(input_sequence, true_output)

        outputs = T.mul(outputs.dimshuffle(0, 1, 'x'), mask)

        return T.sum(outputs) / T.sum(mask)
Example #48
0
def rbf_kernel(X):

    XY = T.dot(X, X.T)
    x2 = T.sum(X**2, axis=1).dimshuffle(0, 'x')
    X2e = T.repeat(x2, X.shape[0], axis=1)
    H = X2e +  X2e.T - 2. * XY

    V = H.flatten()
    # median distance
    h = T.switch(T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])

    h = .5 * h / T.log(T.cast(H.shape[0] + 1., theano.config.floatX))

    # compute the rbf kernel
    kxy = T.exp(-H / h / 2.0)

    dxkxy = -T.dot(kxy, X)
    sumkxy = T.sum(kxy, axis=1).dimshuffle(0, 'x')
    dxkxy = T.add(dxkxy, T.mul(X, sumkxy)) / h

    return kxy, dxkxy
Example #49
0
File: normal.py Project: ibab/carl
    def __init__(self, mu, sigma, random_state=None):
        super(MultivariateNormal, self).__init__(mu=mu,
                                                 sigma=sigma,
                                                 random_state=random_state,
                                                 optimizer=None)
        # XXX: The SDP-ness of sigma should be check upon changes

        # ndim
        self.ndim_ = self.mu.shape[0]
        self.make_(self.ndim_, "ndim_func_", args=[])

        # pdf
        L = linalg.cholesky(self.sigma)
        sigma_det = linalg.det(self.sigma)  # XXX: compute from L instead
        sigma_inv = linalg.matrix_inverse(self.sigma)  # XXX: idem

        self.pdf_ = (
            (1. / T.sqrt((2. * np.pi) ** self.ndim_ * T.abs_(sigma_det))) *
            T.exp(-0.5 * T.sum(T.mul(T.dot(self.X - self.mu,
                                           sigma_inv),
                                     self.X - self.mu),
                               axis=1))).ravel()
        self.make_(self.pdf_, "pdf")

        # -log pdf
        self.nnlf_ = -T.log(self.pdf_)  # XXX: for sure this can be better
        self.make_(self.nnlf_, "nnlf")

        # self.rvs_
        self.make_(T.dot(L, self.X.T).T + self.mu, "rvs_func_")
Example #50
0
def ctc_loss(y_true, y_pred):
	
	def path_probs(predict, y_sym):
		pred_y = predict[:, y_sym]
		rr = recurrence_relation(y_sym.shape[0])

		def step(p_curr, p_prev,rr):
			return p_curr * T.dot(p_prev, rr)

		probabilities, _ = theano.scan(
			step,
			sequences=[pred_y],
			outputs_info=[T.eye(y_sym.shape[0])[0]],
			non_sequences=[rr]
			)
		return probabilities
	
	y_sym_a=T.argmax(y_true,axis=-1)
	n=T.cast(T.add(T.mul(2, y_true.shape[0] - T.sum(y_true[:,-1])),1),'int16')
	y_sym=T.cast(y_sym_a[:n],'int16')
	y_pred = T.clip(y_pred, epsilon, 1.0-epsilon)
	
	forward_probs = path_probs(y_pred, y_sym)
	backward_probs = path_probs(y_pred[::-1], y_sym[::-1])[::-1, ::-1]
	probs = forward_probs * backward_probs / y_pred[:, y_sym]
	total_probs = T.sum(probs)
	#total_probs=T.sum(forward_probs[-1,-2:])
	return -T.log(total_probs)
Example #51
0
    def negative_log_likelihood(self, y,misClassCost):
        """Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        .. math::

            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|}
                \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
            \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label

        Note: we use the mean instead of the sum so that
              the learning rate is less dependent on the batch size
        """
        # start-snippet-2
        # y.shape[0] is (symbolically) the number of rows in y, i.e.,
        # number of examples (call it n) in the minibatch
        # T.arange(y.shape[0]) is a symbolic vector which will contain
        # [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
        # Log-Probabilities (call it LP) with one row per example and
        # one column per class LP[T.arange(y.shape[0]),y] is a vector
        # v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
        # LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
        # the mean (across minibatch examples) of the elements in v,
        # i.e., the mean log-likelihood across the minibatch.
        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y] + T.mul(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y],misClassCost) )
Example #52
0
File: main.py Project: bengioe/ccae
    def recon_from(self, s):
        # s = (bs, n_out)
        self.mu = theano.shared(
            rng.uniform(0,1,size=(self.n_in,self.n_out)),
            name = 'mu')
        D = numpy.zeros((self.n_in,self.n_out,self.n_out))
        for i in range(self.n_in):
            numpy.fill_diagonal(D[i], 1)
        self.D = theano.shared(D, name='D')
        self.params += [self.mu, self.D]
        """
        r = []
        for i in range(self.n_in):
            k = (s-self.mu[i].reshape((1,self.n_out)))
            l = T.dot(k, self.D[i])
            # v = T.exp(-T.dot(l, k.T)).diagonal()
            # but the dot is expensive for nothing since we're only taking the diagonal
            v = T.exp(-T.mul(l, k).sum(axis=1))
            r.append(v)      

        recon = T.as_tensor_variable(r).T"""

        K = s.dimshuffle('x',0,1) - self.mu.dimshuffle(0,'x',1)
        #numpy.sum(a[:,:,:,numpy.newaxis]*b[:,numpy.newaxis,:,:],axis=-2)
        L = T.sum(K.dimshuffle(0,1,2,'x')*self.D.dimshuffle(0,'x',1,2),axis=-2)
        V = T.exp(-T.mul(L, K).sum(axis=2))
        R = V.T
        self.recon = R
Example #53
0
 def loss_t(self):
     # equiv to sum_i || Xi^T U g( U^T Xi r_i) - r_i ||^2 
     # X is [d,m,n]
     # I is [m,d]
     I = self.decode_t()
     Rhat = TT.sum(TT.mul(self.X_t.T, I), axis=2).T
     return TT.sum(( Rhat - self.R_t - self.bias_recon_t) ** 2) / (self.m * self.n)