コード例 #1
0
ファイル: regression_test.py プロジェクト: vr367305/s4r_metal
    def prepareTraining(self):
        '''
        Prepares the relevant functions
        (details on neural_net_creator's prepareTraining)
        '''
        #loss objective to minimize
        self.prediction = lasagne.layers.get_output(self.network)
        self.prediction=self.prediction[:,0]
        #self.loss = lasagne.objectives.categorical_crossentropy(self.prediction, self.target_var)
        #the loss is now the squared error in the output
        self.loss =  lasagne.objectives.squared_error(self.prediction, self.target_var)
        self.loss = self.loss.mean()

        self.params = lasagne.layers.get_all_params(self.network, trainable=True)
        self.updates = lasagne.updates.nesterov_momentum(
                self.loss, self.params, learning_rate=0.01, momentum=0.9)

        self.test_prediction = lasagne.layers.get_output(self.network, deterministic=True)
        self.test_prediction=self.test_prediction[:,0]
        self.test_loss = lasagne.objectives.squared_error(self.test_prediction, self.target_var)
        self.test_loss = self.test_loss.mean()
        #the accuracy is now the number of sample that achieve a 0.01 precision (can be changed)
        self.test_acc = T.mean(T.le(T.abs_(T.sub(self.test_prediction,self.target_var)),0.01)
                            , dtype=theano.config.floatX)
        self.test_acc2 = T.mean(T.le(T.abs_(T.sub(self.test_prediction,self.target_var)),0.05)
                            , dtype=theano.config.floatX)
        self.test_acc3 = T.mean(T.le(T.abs_(T.sub(self.test_prediction,self.target_var)),0.1)
                            , dtype=theano.config.floatX)

        self.train_fn = theano.function([self.input_var, self.target_var], self.loss, updates=self.updates)

        self.val_fn = theano.function([self.input_var, self.target_var], [self.test_loss,self.test_acc,self.test_acc2,self.test_acc3])

        self.use = theano.function([self.input_var],[self.test_prediction])
コード例 #2
0
ファイル: mlp.py プロジェクト: zjh-nudger/BioNLP-ST2016
    def f_score(self,y,label):
        #print dir(x)
        y=T.cast(y,'int32')
        new_y_pred=T.sub(self.y_pred,label)
        new_y=T.sub(y,label)
        
        pre_pos_num=new_y_pred.shape[0]-new_y_pred.nonzero()[0].shape[0]#预测的正例个数
        
        real_pos=new_y.shape[0]-new_y.nonzero()[0].shape[0]
        
        new_y_pred=T.set_subtensor(new_y_pred[new_y_pred.nonzero()[0]],1)
        new_y=T.set_subtensor(new_y[new_y.nonzero()[0]],2)

        r=T.neq(new_y_pred,new_y)
        true_pos=self.y_pred.shape[0]-r.sum()
        #printed_recall=theano.printing.Print('rec:')(pre_pos_num)
        #printed=theano.printing.Print('pre:')(real_pos)
        precision=true_pos / (T.cast(pre_pos_num,'float32')+0.0000001)
        recall=true_pos / (T.cast(real_pos,'float32')+0.0000001)
        
        f_score=(2 * precision * recall) / (precision + recall)
        
        return f_score,precision,recall
        
        
コード例 #3
0
def minus_corr(u, v):
    um = T.sub(u, T.mean(u))
    vm = T.sub(v, T.mean(v))
    r_num = T.sum(T.mul(um, vm))
    r_den = T.sqrt(T.mul(T.sum(T.sqr(um)), T.sum(T.sqr(vm))))
    r = T.true_div(r_num, r_den)
    r = T.neg(r)
    return r
コード例 #4
0
    def get_output_for(self, inputs, **kwargs):
        #input[0]:(BS,max_senlen,emb_size),input[1]:(BS,1,emb_size),input[2]:(BS,max_sentlen)
        # activation0=(T.dot(inputs[0],self.W_h)).reshape([self.batch_size,self.max_sentlen])+self.b_h.repeat(self.batch_size,0).repeat(self.max_sentlen,1)
        # activation1=T.dot(inputs[1],self.W_q).reshape([self.batch_size]).dimshuffle(0,'x')
        # activation2=T.batched_dot(T.dot(inputs[0],self.W_o),inputs[1].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen])

        #正常的点积的方法
        # activation2=T.batched_dot(inputs[0],inputs[1].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen])

        #正常的点积的方法
        # activation2=T.batched_dot(inputs[0],inputs[1].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen])
        # norm1=T.sqrt(T.sum(T.square(inputs[0]),axis=2))+1e-15
        # norm2=T.sqrt(T.sum(T.square(inputs[1]),axis=2))
        # activation2=activation2/(norm1+norm2)

        # 采用欧式距离的相反数的评价的话:
        activation2=-T.sqrt(T.sum(T.square(T.sub(inputs[0],inputs[1].repeat(self.max_sentlen,1))),axis=2))



        # norm2=T.sqrt(T.sum(T.mul(inputs[0],inputs[0]),axis=2))+1e-15
        # activation2=activation2/norm2
        # activation=(self.nonlinearity(activation0)+self.nonlinearity(activation1)+activation2).reshape([self.batch_size,self.max_sentlen])#.dimshuffle(0,'x',2)#.repeat(self.max_sentlen,axis=1)
        # activation2=(activation2).reshape([self.batch_size,self.max_sentlen])#.dimshuffle(0,'x',2)#.repeat(self.max_sentlen,axis=1)
        # final=T.dot(activation,self.W_o) #(BS,max_sentlen)
        # activation3=T.batched_dot(inputs[0],inputs[1].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen])
        # if inputs[2] is not None:
        #     final=inputs[2]*final-(1-inputs[2])*1000000
        alpha=lasagne.nonlinearities.softmax(activation2) #(BS,max_sentlen)
        return alpha
コード例 #5
0
ファイル: dA_modified.py プロジェクト: nglazyrin/chordest-py
    def get_cost_updates(self, corruption_level, learning_rate):
        """ This function computes the cost and the updates for one trainng
        step of the dA """

        # this is how if-then-else is written in Theano
        tilde_x = T.switch(T.gt(corruption_level, 0), self.get_corrupted_input(self.x, corruption_level), self.x)
        y = self.get_hidden_values(tilde_x)
        z = self.get_reconstructed_input(y)
        act = T.dot(tilde_x, self.W) + self.b
        # note : we sum over the size of a datapoint; if we are using
        #        minibatches, L will be a vector, with one entry per
        #        example in minibatch
        # L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
        # note : L is now a vector, where each element is the
        #        cross-entropy cost of the reconstruction of the
        #        corresponding example of the minibatch. We need to
        #        compute the average of all these to get the cost of
        #        the minibatch
        
        L = T.sqrt(T.sum(T.sqr(T.sub(self.x, z)), axis=1))
        reg = T.sum(y, axis=0) / T.shape(y)[0] # sum over training set
        rho = T.constant(0.05)
        beta = T.constant(self.beta)
        reg1 = T.sum(rho * T.log(rho / reg) + (1-rho) * T.log((1-rho) / (1-reg)))
        cost = T.mean(L) + beta * reg1

        # compute the gradients of the cost of the `dA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params)
        # generate the list of updates
        updates = {}
        for param, gparam in zip(self.params, gparams):
            updates[param] = param - learning_rate * gparam
        
        return (cost, collections.OrderedDict(updates.items()))
コード例 #6
0
ファイル: cov.py プロジェクト: springcoil/pymc3
 def full(self, X, Xs=None):
     X, Xc, Xs = self._common(X, Xs)
     if Xs is None:
         return tt.dot(Xc, tt.transpose(Xc))
     else:
         Xsc = tt.sub(Xs, self.c)
         return tt.dot(Xc, tt.transpose(Xsc))
コード例 #7
0
ファイル: cov.py プロジェクト: aasensio/pymc3
 def full(self, X, Z=None):
     X, Xc, Z = self._common(X, Z)
     if Z is None:
         return tt.dot(Xc, tt.transpose(Xc))
     else:
         Zc = tt.sub(Z, self.c)
         return tt.dot(Xc, tt.transpose(Zc))
コード例 #8
0
ファイル: ConvAE.py プロジェクト: npow/cae
    def get_cost_update(self, learning_rate=0.1):
        """Get cost updates
        
        Parameters
        ----------
        learning_rate : float
            learning rate of sgd
        """
        L=T.sum(T.pow(T.sub(self.get_decode(), self.input),2), axis=1);
        cost = 0.5*T.mean(L);

        d_b=T.grad(cost, self.BIAS);
        d_net_out=T.grad(cost, self.decode_layer.pooled_out);

        d_b_decode=T.grad(cost, self.decode_layer.b);
        d_W_decode=T.grad(cost, self.decode_layer.W);
        print d_b_decode.type();
        print d_W_decode.type();
        #d_b_encode=T.sum(d_net_out, axis=[0,1,2]);
        #print d_b_encode.type();
        #d_W_decode=self.decode_layer.getCP(data_in=self.encode_layer.output,
        #                                   filters=d_net_out);
        #print d_W_decode.shape;

        d_net_in=self.decode_layer.getConvPoolB(data_in=d_net_out,
                                                filters=self.decode_layer.B);
        #T.dot(self.decode_layer.B, d_net_out);
        #print d_net_in.type();
        d_net_in_delta=d_net_in*self.encode_layer.d_activation(self.encode_layer.pooled_out);
        print d_net_in_delta.type();
        d_b_encode=T.sum(d_net_in_delta, axis=[0,1,2]);
        d_W_encode=T.dot(d_net_in_delta, self.input.T);
        print d_W_encode.type();

        d_W=[d_W_encode, d_W_decode];

        updates_weights=[(param_i, param_i-learning_rate*d_W_i)
                         for param_i, d_W_i in zip (self.WEIGHTS, d_W)];

        updates_bias=[(param_i, param_i-learning_rate*d_b_i)
                      for param_i, d_b_i in zip(self.BIAS, d_b)];

        updates=updates_weights+updates_bias;

        #L_B=T.sum(T.pow(T.sub(self.recon_layer.output_B, self.input),2), axis=1);
        #cost_B = 0.5*T.mean(L_B);

        #grad_weights=T.grad(cost, self.WEIGHTS);

        #updates_weights=[(param_i, param_i-learning_rate*(grad_i+learning_rate*rw_i))
        #                for param_i, grad_i, rw_i in zip(self.WEIGHTS, grad_weights, self.RW)];
        
        #grad_bias=T.grad(cost, self.BIAS);

        #updates_bias=[(param_i, param_i-learning_rate*grad_i)
        #              for param_i, grad_i in zip(self.BIAS, grad_bias)];

        #updates=updates_weights+updates_bias;
           
        return (cost, updates);
コード例 #9
0
ファイル: conv_ae.py プロジェクト: RitchieWang23/telauges
  def get_updates(self,
                  learning_rate,
                  corruption_level=None,
                  L1_rate=0.000,
                  L2_rate=0.000):
    
    if corruption_level is not None:
      x=self.get_corruption_input(self.input, corruption_level);
      y=self.decode_layer.get_output(self.encode_layer.get_output(x));
    else:
      y=self.decode_layer.out_feature_maps;
      
    cost=T.sum(T.pow(T.sub(self.decode_layer.out_feature_maps, self.feature_maps),2), axis=1);
    
    #cost=self.get_cost(self.feature_maps, y);
    cost+=0.001*((self.encode_layer.filters**2).sum()+(self.decode_layer.filters**2).sum());    
    cost=T.mean(cost);
    
    params=self.encode_layer.params+self.decode_layer.params;
    gparams=T.grad(cost, params);

    updates=[(param_i, param_i-learning_rate*grad_i)
             for param_i, grad_i in zip(params, gparams)];
             
    return cost, updates;
コード例 #10
0
ファイル: dA.py プロジェクト: floatdrop/chordest
    def get_cost_updates(self, corruption_level, learning_rate):
        """ This function computes the cost and the updates for one trainng
        step of the dA """

        tilde_x = self.get_corrupted_input(self.x, corruption_level)
        y = self.get_hidden_values(tilde_x)
        z = self.get_reconstructed_input(y)
        # note : we sum over the size of a datapoint; if we are using
        #        minibatches, L will be a vector, with one entry per
        #        example in minibatch
        # L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
        # note : L is now a vector, where each element is the
        #        cross-entropy cost of the reconstruction of the
        #        corresponding example of the minibatch. We need to
        #        compute the average of all these to get the cost of
        #        the minibatch
        
        L = T.sqrt(T.sum(T.sqr(T.sub(self.x, z)), axis=1))
        cost = T.mean(L)

        # compute the gradients of the cost of the `dA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params)
        # generate the list of updates
        updates = {}
        for param, gparam in zip(self.params, gparams):
            updates[param] = param - learning_rate * gparam

        return (cost, updates)
コード例 #11
0
ファイル: grbm.py プロジェクト: boundles/DBN
 def free_energy(self, v_sample):
     ''' Function to compute the free energy '''
     wx_b = T.dot(v_sample, self.W) + self.hbias
     diff_v_vbias = T.sub(v_sample, self.vbias)
     diff_v_vbias_T = T.transpose(diff_v_vbias)
     vbias_term = T.dot(diff_v_vbias, diff_v_vbias_T)
     hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis=1)
     
     return 0.5 * vbias_term - hidden_term 
コード例 #12
0
ファイル: convae.py プロジェクト: Song-Tu/DeepHash
 def get_cost_update(self, learningrate=0.1):
     '''
     '''
     L = T.sum(T.pow(T.sub(self.recon_layer.outputs, self.inputs), 2), axis=1)
     cost = 0.5*T.mean(L)
     grads = T.grad(cost, self.params)
     
     updates = [(param_i, param_i-learningrate*grad_i) 
                for param_i, grad_i in zip(self.params, grads)]
     
     return (cost, updates)
コード例 #13
0
ファイル: nn.py プロジェクト: CollinM/asap-sas
def quadratic_weighted_kappa_loss(y_true, y_pred):
    min_rating = T.minimum(T.min(y_true), T.min(y_pred))
    max_rating = T.maximum(T.max(y_true), T.max(y_pred))

    hist_true = T.bincount(y_true, minlength=max_rating)
    hist_pred = T.bincount(y_pred, minlength=max_rating)
    num_ratings = (max_rating - min_rating) + 1
    num_scored = float(len(y_true))

    numerator = T.zeros(1)
    denominator = T.zeros(1)
    z = T.zeros(len(y_true))
    for i_true in range(min_rating, max_rating + 1):
        for j_pred in range(min_rating, max_rating + 1):
            expected = T.true_div(T.mul(hist_true[i_true], hist_pred[j_pred]), num_scored)
            d = T.true_div(T.sqr(i_true - j_pred), T.sqr(num_ratings - 1.))
            conf_mat_cell = T.sum(T.and_(T.eq(T.sub(y_true, i_true), z), T.eq(T.sub(y_pred, j_pred), z)))
            numerator = T.add(numerator, T.true_div(T.mul(d, conf_mat_cell), num_scored))
            denominator = T.add(denominator, T.true_div(T.mul(d, expected), num_scored))

    return T.true_div(numerator, denominator)
コード例 #14
0
ファイル: mlp.py プロジェクト: UCSD-AUVSI/Heimdall
def _add_noise_to_input(rng, layer, p):
    """ p is the probablity of replacing a unit with a random number (from 0 to 255)
    """
    if p > 0:
        srng = theano.tensor.shared_randomstreams.RandomStreams(rng.randint(999999))
        # p=1-p because 1's indicate keep and p is prob of dropping
        dropmask = T.cast(srng.binomial(n=1, p=1-p, size=layer.shape),theano.config.floatX)
        noise = T.cast(srng.uniform(low=0., high=255., size=layer.shape),theano.config.floatX)
        # The cast is important because
        # int * float32 = float64 which pulls things off the gpu
        output = (layer*dropmask) + (noise * T.sub(1,dropmask))
        return output
    return layer
コード例 #15
0
ファイル: convnet.py プロジェクト: zjh-nudger/BioNLP-ST2016
    def f_score(self, y, label):
        # print dir(x)
        y = T.cast(y, "int32")
        new_y_pred = T.sub(self.y_pred, label)
        new_y = T.sub(y, label)

        pre_pos_num = new_y_pred.shape[0] - new_y_pred.nonzero()[0].shape[0]  # 预测的正例个数

        real_pos = new_y.shape[0] - new_y.nonzero()[0].shape[0]

        new_y_pred = T.set_subtensor(new_y_pred[new_y_pred.nonzero()[0]], 1)
        new_y = T.set_subtensor(new_y[new_y.nonzero()[0]], 2)

        r = T.neq(new_y_pred, new_y)
        true_pos = self.y_pred.shape[0] - r.sum()

        precision = true_pos / T.cast(pre_pos_num, "float32")
        recall = true_pos / T.cast(real_pos, "float32")

        f_score = (2 * precision * recall) / (precision + recall)

        return f_score, precision, recall
コード例 #16
0
    def create_weight_update_functions(self):
        updates = []
        for i in range(len(self.error_gradients)):
            updates.append(
                (
                    self.weights[i],
                    g(
                        T.sub(
                            self.weights[i],
                            T.mul(T.mul(self.error_gradients[-(i + 1)], self.alpha), self.batch_size_divisor),
                        )
                    ),
                )
            )
            updates.append(
                (
                    self.biases[i],
                    g(T.sub(self.biases[i], T.mul(T.mul(self.errors[-(i + 1)], self.alpha), self.batch_size_divisor))),
                )
            )

        self.update_weight_function = function(inputs=[self.idx, self.alpha], updates=updates)
コード例 #17
0
    def euclidean_loss(self, y):
        """Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        .. math::

            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
                \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a matrix where 1 indicates which class the sample belongs to

        """

        return T.mean(T.sub(y, self.p_y_given_x) ** 2)
コード例 #18
0
    def get_output_for(self, inputs, **kwargs):
        #input[0]:(BS,max_senlen,emb_size),input[1]:(BS,1,emb_size),input[2]:(BS,max_sentlen)
        # activation0=(T.dot(inputs[0],self.W_h)).reshape([self.batch_size,self.max_sentlen])+self.b_h.repeat(self.batch_size,0).repeat(self.max_sentlen,1)
        # activation1=T.dot(inputs[1],self.W_q).reshape([self.batch_size]).dimshuffle(0,'x')
        # activation2=T.batched_dot(T.dot(inputs[0],self.W_o),inputs[1].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen])
        #数据预处理和归一化的方法

        # inputs[0]=inputs[0]/(T.sqrt(T.sum(T.square(inputs[0]),axis=2)).reshape([self.batch_size,self.max_sentlen,1]).repeat(self.embedding_size,2))-1
        # inputs[1]=inputs[1]/(T.sqrt(T.sum(T.square(inputs[1]),axis=2)).reshape([self.batch_size,1,1]).repeat(self.embedding_size,2))-1

        # aver0=T.mean(inputs[0],-1).reshape([self.batch_size,self.max_sentlen,1]).repeat(self.embedding_size,2)
        # var0=T.sqrt(T.var(inputs[0],-1)).reshape([self.batch_size,self.max_sentlen,1]).repeat(self.embedding_size,2)
        # inputs[0]=(inputs[0]-aver0)/var0
        # aver1=T.mean(inputs[1],-1).reshape([self.batch_size,1,1]).repeat(self.embedding_size,2)
        # var1=T.sqrt(T.var(inputs[1],-1)).reshape([self.batch_size,1,1]).repeat(self.embedding_size,2)
        # inputs[1]=(inputs[1]-aver1)/var1


        #正常的点积的方法
        # activation2=T.batched_dot(inputs[0],inputs[1].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen])
        # print 'metric:dot'

        #正常的点积的方法
        # activation2=T.batched_dot(inputs[0],inputs[1].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen])
        # norm1=T.sqrt(T.sum(T.square(inputs[0]),axis=2))+1e-15
        # norm2=T.sqrt(T.sum(T.square(inputs[1]),axis=2))
        # activation2=activation2/(norm1*norm2)
        # print 'metric:cos'

        # 采用欧式距离的相反数的评价的话:
        activation2=-T.sqrt(T.sum(T.square(T.sub(inputs[0],inputs[1].repeat(self.max_sentlen,1))),axis=2)+1e-15)
        print 'metric:distance'



        # norm2=T.sqrt(T.sum(T.mul(inputs[0],inputs[0]),axis=2))+1e-15
        # activation2=activation2/norm2
        # activation=(self.nonlinearity(activation0)+self.nonlinearity(activation1)+activation2).reshape([self.batch_size,self.max_sentlen])#.dimshuffle(0,'x',2)#.repeat(self.max_sentlen,axis=1)
        # activation2=(activation2).reshape([self.batch_size,self.max_sentlen])#.dimshuffle(0,'x',2)#.repeat(self.max_sentlen,axis=1)
        # final=T.dot(activation,self.W_o) #(BS,max_sentlen)
        # activation3=T.batched_dot(inputs[0],inputs[1].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen])
        # if inputs[2] is not None:
        #     final=inputs[2]*final-(1-inputs[2])*1000000
        alpha=lasagne.nonlinearities.softmax(activation2) #(BS,max_sentlen)
        return alpha
コード例 #19
0
ファイル: regression_test.py プロジェクト: vr367305/s4r_metal
def generateData(dim,num):
    names=[]
    data={}
    regions={}
    targets={}
    srng = RandomStreams()
    rv_u = srng.uniform(dim)
    rv_u = T.mul(rv_u,50.0)
    rv_u = T.sub(rv_u,25.0)
    f = function([], rv_u)
    for i in range(num):
        name=str(i)
        names.append(name)
        data[name]=f()
        regions[name]=(0,0,dim[0]-1,dim[1]-1)
        #targets[name]=np.sum(data[name],dtype=np.float32)
        targets[name]=np.amax(data[name])
    return names,data,regions,targets
コード例 #20
0
    def create_momentum_weight_update_functions(self):
        momentum_updates = []
        for i in range(len(self.H.L.momentum_weights)):
            momentum_updates.append(
                (
                    self.H.L.momentum_weights[i],
                    g(
                        T.mul(
                            self.batch_size_divisor,
                            T.sub(
                                T.mul(self.M, self.H.L.momentum_weights[i]),
                                T.mul(self.alpha, self.error_gradients[-(i + 1)]),
                            ),
                        )
                    ),
                )
            )

        self.H.L.momentum_update_function = function(inputs=[self.idx, self.M, self.alpha], updates=momentum_updates)
コード例 #21
0
ファイル: ConvAE.py プロジェクト: npow/cae
    def get_cost_update(self, learning_rate=0.1):
        """Get cost updates
        
        Parameters
        ----------
        learning_rate : float
            learning rate of sgd
        """
        L=T.sum(T.pow(T.sub(self.get_reconstruction(), self.input),2), axis=1);
        
        cost = 0.5*T.mean(L);

        grads=T.grad(cost, self.params);
        
        updates = [
                   (param_i, param_i-learning_rate*grad_i)
                   for param_i, grad_i in zip(self.params, grads)
                   ];
           
        return (cost, updates);
コード例 #22
0
    def create_backprop_gradient_functions(self):
        self.errors = []
        self.error_gradients = []
        error_function = None
        error_gradient = None
        for i in range(len(self.weights)):
            if len(self.errors) == 0:
                # this is the last layer of the net: The error is X - t because of
                # the combination of softmax and cross entropy cost function
                error_function = g(T.sub(self.feedforward, self.t[self.idx]))
                self.errors.append(error_function)
                error_gradient = g(T.dot(self.z[-2].T, self.errors[i]))
                error_gradient = self.apply_L2_penalties_error_gradients(error_gradient, -1)
                self.error_gradients.append(error_gradient)

            elif (len(self.weights) - 1) == i:
                # this involves the input X instead of z-values as it is the first weights that
                # need to be updated
                self.errors.append(
                    g(T.mul(T.dot(self.errors[-1], self.weights[1].T), self.layers[1].activation_derivative(self.z[0])))
                )

                error_gradient = g(T.dot(self.X[self.idx].T, self.errors[-1]))
                # error_gradient = self.apply_L2_penalties_error_gradients(error_gradient, 0)
                self.error_gradients.append(error_gradient)
            else:
                self.errors.append(
                    g(
                        T.mul(
                            T.dot(self.errors[-1], self.weights[-i].T),
                            self.layers[-(i + 1)].activation_derivative(self.z[-(i + 1)]),
                        )
                    )
                )

                error_gradient = g(T.dot(self.z[-(i + 2)].T, self.errors[-1]))
                # error_gradient = self.apply_L2_penalties_error_gradients(error_gradient, -(i+1))
                self.error_gradients.append(error_gradient)
コード例 #23
0
    def get_output_for(self, inputs, **kwargs):
        '''input[0]是memory[bs*path_length,n_classes,h_dim]
           input[1]是hidden[bs*path_length,1,h_dim]'''
        '''内容部分的计算'''
        # activation0=(T.dot(inputs[0][:,:,:self.h_dim],self.W_h)).reshape([self.batch_size,self.max_sentlen])+self.b_h.repeat(self.batch_size,0).repeat(self.max_sentlen,1)
        # activation1=T.dot(inputs[1][:,:,:self.h_dim],self.W_q).reshape([self.batch_size]).dimshuffle(0,'x')
        # activation2=T.batched_dot(inputs[0][:,:,:self.h_dim],inputs[1][:,:,:self.h_dim].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen])
        activation2=-T.sqrt(T.sum(T.square(T.sub(inputs[0],inputs[1].repeat(self.max_sentlen,1))),axis=2))
        # activation2=T.batched_dot(T.dot(inputs[0][:,:,:self.h_dim],self.W_o),inputs[1][:,:,:self.h_dim].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen])
        # norm2=T.sqrt(T.sum(T.mul(inputs[0][:,:,:self.h_dim],inputs[0][:,:,:self.h_dim]),axis=2))+0.0000001
        # activation2=activation2/norm2
        activation=(activation2).reshape([self.batch_size,self.max_sentlen])#.dimshuffle(0,'x',2)#.repeat(self.max_sentlen,axis=1)
        alpha=lasagne.nonlinearities.softmax(activation) #(BS,max_sentlen)

        '''标签部分的计算'''
        # activation0=(T.dot(inputs[0][:,:,self.h_dim:],self.W_h_label)).reshape([self.batch_size,self.max_sentlen])+self.b_h_label.repeat(self.batch_size,0).repeat(self.max_sentlen,1)
        # activation1=T.dot(inputs[1][:,:,self.h_dim:],self.W_q_label).reshape([self.batch_size]).dimshuffle(0,'x')
        activation2=T.batched_dot(T.dot(inputs[0][:,:,self.h_dim:],self.W_o_label),inputs[1][:,:,self.h_dim:].reshape([self.batch_size,self.n_classes,1])).reshape([self.batch_size,self.max_sentlen])
        activation=(activation2).reshape([self.batch_size,self.max_sentlen])#.dimshuffle(0,'x',2)#.repeat(self.max_sentlen,axis=1)
        beta=lasagne.nonlinearities.softmax(activation) #(BS,max_sentlen)

        alpha=lasagne.nonlinearities.softmax(alpha+5*beta)
        return beta
        return alpha
コード例 #24
0
def w_brier_loss(o, f, class_w):
    """f is the forecast and o is the original outcome"""
    print class_w
    return T.mean(T.dot(T.square(T.sub(f, o)), class_w), axis=-1)
    def __init__(self, data, image_shape, filter_shape, poolsize, sparse_coeff, activation='sigmoid',
                 tied_weight=False, is_linear=False, do_max_pool=False):
        rng = np.random.RandomState(None)
        self.data = data
        self.batchsize = image_shape[0]
        self.in_channels   = image_shape[1]
        self.in_height     = image_shape[2]
        self.in_width      = image_shape[3]
        self.flt_channels  = filter_shape[0]
        self.flt_height    = filter_shape[2]
        self.flt_width     = filter_shape[3]
        self.input = T.ftensor4('input')
        # self.input = input.reshape(image_shape)
        hidden_layer=ConvolutionLayer(rng,
                                      input=self.input,
                                      filter_shape=filter_shape,
                                      act=activation,
                                      border_mode='full',
                                      if_pool=do_max_pool)

        self.hidden_image_shape = (self.batchsize,
                                   self.flt_channels,
                                   self.in_height+self.flt_height-1,
                                   self.in_width+self.flt_width-1)

        self.hidden_pooled_image_shape = (self.batchsize,
                                          self.flt_channels,
                                          (self.in_height+self.flt_height-1)/2,
                                          (self.in_width+self.flt_width-1)/2)

        self.hidden_filter_shape = (self.in_channels,
                                    self.flt_channels,
                                    self.flt_height,
                                    self.flt_width)
        if sparse_coeff == 0:
            if do_max_pool:
                hidden_layer_output = repeat(hidden_layer.output,
                                             repeats=2,
                                             axis=2)
                hidden_layer_output = repeat(hidden_layer_output,
                                             repeats=2,
                                             axis=3)
            else:
                hidden_layer_output = hidden_layer.output
        else:
            feature_map = hidden_layer.output

            # first per featuremap, then across featuremap
            # feature_map_vec = feature_map.reshape((feature_map.shape[0],
            #                                        feature_map.shape[1], feature_map.shape[2]*feature_map.shape[3]))
            # feat_sparsity = feature_map_vec.norm(2, axis=2)
            # feat_sparsity = feat_sparsity.dimshuffle(0, 1, 'x', 'x')
            # feature_map1 = np.divide(feature_map, feat_sparsity+1e-9)
            # examp_sparsity = feature_map1.norm(2, axis=1)
            # examp_sparsity = examp_sparsity.dimshuffle(0, 'x', 1, 2)
            # feature_map2 = np.divide(feature_map1, examp_sparsity+1e-9)

            # first across featuremap, then per featuremap
            examp_sparsity = feature_map.norm(2, axis=1)
            examp_sparsity = examp_sparsity.dimshuffle(0, 'x', 1, 2)
            feature_map1 = np.divide(feature_map, examp_sparsity+1e-9)
            feature_map1_vec = feature_map1.reshape((feature_map1.shape[0],
                                                   feature_map1.shape[1], feature_map1.shape[2]*feature_map1.shape[3]))
            feat_sparsity = feature_map1_vec.norm(2, axis=2)
            feat_sparsity = feat_sparsity.dimshuffle(0, 1, 'x', 'x')
            feature_map2 = np.divide(feature_map1, feat_sparsity+1e-9)

            if do_max_pool:
                hidden_layer_output = repeat(feature_map2,
                                             repeats=2,
                                             axis=2)
                hidden_layer_output = repeat(hidden_layer_output,
                                             repeats=2,
                                             axis=3)
            else:
                hidden_layer_output = feature_map2

        # recon_layer_input = hidden_layer_output

        if is_linear:
            recon_layer=ConvolutionLayer(rng,
                                         input=hidden_layer_output,
                                         filter_shape=self.hidden_filter_shape,
                                         act='linear',
                                         border_mode='valid')
        else:
            recon_layer=ConvolutionLayer(rng,
                                         input=hidden_layer_output,
                                         filter_shape=self.hidden_filter_shape,
                                         act=activation,
                                         border_mode='valid')


        self.tied_weight = tied_weight
        if self.tied_weight:
            # recon_layer.W = hidden_layer.W
            # recon_layer.W = recon_layer.W.dimshuffle(1,0,2,3)
            weight = hidden_layer.W.get_value()
            recon_layer.W.set_value(weight.transpose(1,0,2,3), borrow=True)

        self.layers = [hidden_layer, recon_layer]
        self.params = sum([layer.params for layer in self.layers], [])

        # self.params = hidden_layer.params + recon_layer.params


        L1_sparsity = hidden_layer_output.norm(1, axis=(2, 3))
        # L1_sparsity = T.sum(np.abs(feature_map2), axis=(2, 3))

        # sparse_filter = T.mean(L1_sparsity.sum(axis=1), axis=(0))
        sparse_filter = T.mean(L1_sparsity, axis=(0, 1))

        # sparsity = T.mean(feature_map2, axis=(2,3))
        # sparse_filter = T.mean(sparsity, axis=(0, 1))

        # L=T.sum(T.pow(T.sub(recon_layer.output, self.input), 2), axis=0)
        L=T.sum(T.pow(T.sub(recon_layer.output, self.input), 2), axis=(1,2,3)) # sum over channel,height, width

        cost = 0.5*T.mean(L) + sparse_coeff * sparse_filter

        grads = T.grad(cost, self.params)

        # learning_rate = 0.1
        # updates = [(param_i, param_i-learning_rate*grad_i)
        #            for param_i, grad_i in zip(self.params, grads)]

        updates = adadelta_updates(self.params, grads, rho=0.95, eps=1e-6)

        # self.train = theano.function(
        # [self.input],
        # cost,
        # updates=updates,
        # name="train cae model")
        index = T.lscalar('index')
        batch_begin = index * self.batchsize
        batch_end = batch_begin + self.batchsize

        self.train = theano.function(
                    inputs=[index],
                    outputs=cost,
                    updates=updates,
                    givens={
                        self.input: self.data[batch_begin:batch_end]
                    },
                    name="train cae model")

        self.activation = downsample.max_pool_2d(
                input=hidden_layer.output,
                ds=poolsize,
                ignore_border=True)

        # self.get_activation = theano.function(
        #     [self.input],
        #     self.activation,
        #     updates=None,
        #     name='get hidden activation')
        # num = T.bscalar
        self.get_activation = theano.function(
            inputs=[index],
            # outputs=self.activation,
            outputs=hidden_layer.output if do_max_pool else self.activation,
            updates=None,
            givens={
                self.input: self.data[batch_begin:batch_end]
            },
            name='get hidden activation')

        # self.get_reconstruction = theano.function(
        #                     inputs=[self.input],
        #                     outputs=recon_layer.output,
        #                     updates=None,
        #                     name='get reconstruction')
        self.get_reconstruction = theano.function(
                            inputs=[index],
                            outputs=recon_layer.output,
                            updates=None,
                            givens={
                                self.input: self.data[batch_begin:batch_end]
                            },
                            name='get reconstruction')
コード例 #26
0
    def get_cost_update(self, learning_rate=0.1):
        """Get cost updates
        
        Parameters
        ----------
        learning_rate : float
            learning rate of sgd
        """
        L = T.sum(T.pow(T.sub(self.get_decode(), self.input), 2), axis=1)
        cost = 0.5 * T.mean(L)

        d_b = T.grad(cost, self.BIAS)
        d_net_out = T.grad(cost, self.decode_layer.pooled_out)

        d_b_decode = T.grad(cost, self.decode_layer.b)
        d_W_decode = T.grad(cost, self.decode_layer.W)
        print d_b_decode.type()
        print d_W_decode.type()
        #d_b_encode=T.sum(d_net_out, axis=[0,1,2]);
        #print d_b_encode.type();
        #d_W_decode=self.decode_layer.getCP(data_in=self.encode_layer.output,
        #                                   filters=d_net_out);
        #print d_W_decode.shape;

        d_net_in = self.decode_layer.getConvPoolB(data_in=d_net_out,
                                                  filters=self.decode_layer.B)
        #T.dot(self.decode_layer.B, d_net_out);
        #print d_net_in.type();
        d_net_in_delta = d_net_in * self.encode_layer.d_activation(
            self.encode_layer.pooled_out)
        print d_net_in_delta.type()
        d_b_encode = T.sum(d_net_in_delta, axis=[0, 1, 2])
        d_W_encode = T.dot(d_net_in_delta, self.input.T)
        print d_W_encode.type()

        d_W = [d_W_encode, d_W_decode]

        updates_weights = [(param_i, param_i - learning_rate * d_W_i)
                           for param_i, d_W_i in zip(self.WEIGHTS, d_W)]

        updates_bias = [(param_i, param_i - learning_rate * d_b_i)
                        for param_i, d_b_i in zip(self.BIAS, d_b)]

        updates = updates_weights + updates_bias

        #L_B=T.sum(T.pow(T.sub(self.recon_layer.output_B, self.input),2), axis=1);
        #cost_B = 0.5*T.mean(L_B);

        #grad_weights=T.grad(cost, self.WEIGHTS);

        #updates_weights=[(param_i, param_i-learning_rate*(grad_i+learning_rate*rw_i))
        #                for param_i, grad_i, rw_i in zip(self.WEIGHTS, grad_weights, self.RW)];

        #grad_bias=T.grad(cost, self.BIAS);

        #updates_bias=[(param_i, param_i-learning_rate*grad_i)
        #              for param_i, grad_i in zip(self.BIAS, grad_bias)];

        #updates=updates_weights+updates_bias;

        return (cost, updates)
コード例 #27
0
activation_k = T.dmatrix('Layer3 outputs')

t = T.dmatrix('Actual output')
delta_w1 = T.dmatrix('Delta w1')
delta_w2 = T.dmatrix('Delta w2')

eta = 0.1

#equations
h = T.dot(x, w1)
activation_h = T.nnet.sigmoid(h)

k = T.dot(activation_h, w2)
activation_k = T.nnet.softmax(k)

cost = (T.sum(T.sub(activation_k, t)**2)) / (2 * X.shape[0])

delta_w1 = T.grad(cost, w1)
delta_w2 = T.grad(cost, w2)

#w1 = w1 - eta * delta w
update_w1 = (w1, w1 - eta * delta_w1)
update_w2 = (w2, w2 - eta * delta_w2)

updates = [update_w1, update_w2]

for i in range (Y.shape[0]):
	value = Y[i]
	target[i][value] = value
	
コード例 #28
0
def MSE_tensor(y, y_pred):
    return T.mean(T.pow(T.sub(y, y_pred), 2))
    def __init__(self,
                 data,
                 image_shape,
                 filter_shape,
                 poolsize,
                 sparse_coeff,
                 activation='sigmoid',
                 tied_weight=False,
                 is_linear=False,
                 do_max_pool=False):
        rng = np.random.RandomState(None)
        self.data = data
        self.batchsize = image_shape[0]
        self.in_channels = image_shape[1]
        self.in_height = image_shape[2]
        self.in_width = image_shape[3]
        self.flt_channels = filter_shape[0]
        self.flt_height = filter_shape[2]
        self.flt_width = filter_shape[3]
        self.input = T.ftensor4('input')
        # self.input = input.reshape(image_shape)
        hidden_layer = ConvolutionLayer(rng,
                                        input=self.input,
                                        filter_shape=filter_shape,
                                        act=activation,
                                        border_mode='full',
                                        if_pool=do_max_pool)

        self.hidden_image_shape = (self.batchsize, self.flt_channels,
                                   self.in_height + self.flt_height - 1,
                                   self.in_width + self.flt_width - 1)

        self.hidden_pooled_image_shape = (
            self.batchsize, self.flt_channels,
            (self.in_height + self.flt_height - 1) / 2,
            (self.in_width + self.flt_width - 1) / 2)

        self.hidden_filter_shape = (self.in_channels, self.flt_channels,
                                    self.flt_height, self.flt_width)
        if sparse_coeff == 0:
            if do_max_pool:
                hidden_layer_output = repeat(hidden_layer.output,
                                             repeats=2,
                                             axis=2)
                hidden_layer_output = repeat(hidden_layer_output,
                                             repeats=2,
                                             axis=3)
            else:
                hidden_layer_output = hidden_layer.output
        else:
            feature_map = hidden_layer.output

            # first per featuremap, then across featuremap
            # feature_map_vec = feature_map.reshape((feature_map.shape[0],
            #                                        feature_map.shape[1], feature_map.shape[2]*feature_map.shape[3]))
            # feat_sparsity = feature_map_vec.norm(2, axis=2)
            # feat_sparsity = feat_sparsity.dimshuffle(0, 1, 'x', 'x')
            # feature_map1 = np.divide(feature_map, feat_sparsity+1e-9)
            # examp_sparsity = feature_map1.norm(2, axis=1)
            # examp_sparsity = examp_sparsity.dimshuffle(0, 'x', 1, 2)
            # feature_map2 = np.divide(feature_map1, examp_sparsity+1e-9)

            # first across featuremap, then per featuremap
            examp_sparsity = feature_map.norm(2, axis=1)
            examp_sparsity = examp_sparsity.dimshuffle(0, 'x', 1, 2)
            feature_map1 = np.divide(feature_map, examp_sparsity + 1e-9)
            feature_map1_vec = feature_map1.reshape(
                (feature_map1.shape[0], feature_map1.shape[1],
                 feature_map1.shape[2] * feature_map1.shape[3]))
            feat_sparsity = feature_map1_vec.norm(2, axis=2)
            feat_sparsity = feat_sparsity.dimshuffle(0, 1, 'x', 'x')
            feature_map2 = np.divide(feature_map1, feat_sparsity + 1e-9)

            if do_max_pool:
                hidden_layer_output = repeat(feature_map2, repeats=2, axis=2)
                hidden_layer_output = repeat(hidden_layer_output,
                                             repeats=2,
                                             axis=3)
            else:
                hidden_layer_output = feature_map2

        # recon_layer_input = hidden_layer_output

        if is_linear:
            recon_layer = ConvolutionLayer(
                rng,
                input=hidden_layer_output,
                filter_shape=self.hidden_filter_shape,
                act='linear',
                border_mode='valid')
        else:
            recon_layer = ConvolutionLayer(
                rng,
                input=hidden_layer_output,
                filter_shape=self.hidden_filter_shape,
                act=activation,
                border_mode='valid')

        self.tied_weight = tied_weight
        if self.tied_weight:
            # recon_layer.W = hidden_layer.W
            # recon_layer.W = recon_layer.W.dimshuffle(1,0,2,3)
            weight = hidden_layer.W.get_value()
            recon_layer.W.set_value(weight.transpose(1, 0, 2, 3), borrow=True)

        self.layers = [hidden_layer, recon_layer]
        self.params = sum([layer.params for layer in self.layers], [])

        # self.params = hidden_layer.params + recon_layer.params

        L1_sparsity = hidden_layer_output.norm(1, axis=(2, 3))
        # L1_sparsity = T.sum(np.abs(feature_map2), axis=(2, 3))

        # sparse_filter = T.mean(L1_sparsity.sum(axis=1), axis=(0))
        sparse_filter = T.mean(L1_sparsity, axis=(0, 1))

        # sparsity = T.mean(feature_map2, axis=(2,3))
        # sparse_filter = T.mean(sparsity, axis=(0, 1))

        # L=T.sum(T.pow(T.sub(recon_layer.output, self.input), 2), axis=0)
        L = T.sum(T.pow(T.sub(recon_layer.output, self.input), 2),
                  axis=(1, 2, 3))  # sum over channel,height, width

        cost = 0.5 * T.mean(L) + sparse_coeff * sparse_filter

        grads = T.grad(cost, self.params)

        # learning_rate = 0.1
        # updates = [(param_i, param_i-learning_rate*grad_i)
        #            for param_i, grad_i in zip(self.params, grads)]

        updates = adadelta_updates(self.params, grads, rho=0.95, eps=1e-6)

        # self.train = theano.function(
        # [self.input],
        # cost,
        # updates=updates,
        # name="train cae model")
        index = T.lscalar('index')
        batch_begin = index * self.batchsize
        batch_end = batch_begin + self.batchsize

        self.train = theano.function(
            inputs=[index],
            outputs=cost,
            updates=updates,
            givens={self.input: self.data[batch_begin:batch_end]},
            name="train cae model")

        self.activation = downsample.max_pool_2d(input=hidden_layer.output,
                                                 ds=poolsize,
                                                 ignore_border=True)

        # self.get_activation = theano.function(
        #     [self.input],
        #     self.activation,
        #     updates=None,
        #     name='get hidden activation')
        # num = T.bscalar
        self.get_activation = theano.function(
            inputs=[index],
            # outputs=self.activation,
            outputs=hidden_layer.output if do_max_pool else self.activation,
            updates=None,
            givens={self.input: self.data[batch_begin:batch_end]},
            name='get hidden activation')

        # self.get_reconstruction = theano.function(
        #                     inputs=[self.input],
        #                     outputs=recon_layer.output,
        #                     updates=None,
        #                     name='get reconstruction')
        self.get_reconstruction = theano.function(
            inputs=[index],
            outputs=recon_layer.output,
            updates=None,
            givens={self.input: self.data[batch_begin:batch_end]},
            name='get reconstruction')
コード例 #30
0
    def __init__(self, signal_shape, filter_shape, poolsize, activation=None):
        rng = np.random.RandomState(None)
        dtensor5 = T.TensorType('float32', (False,)*5)
        self.inputs = dtensor5(name='inputs')
        self.image_shape = signal_shape
        self.batchsize = signal_shape[0]
        self.in_channels   = signal_shape[2]
        self.in_depth      = signal_shape[1]
        self.in_width      = signal_shape[4]
        self.in_height     = signal_shape[3]
        self.flt_channels  = filter_shape[0]
        self.flt_time      = filter_shape[1]
        self.flt_width     = filter_shape[4]
        self.flt_height    = filter_shape[3]
        self.activation = activation

        self.hidden_layer=ConvolutionLayer3D(rng,
                                             input=self.inputs,
                                             signal_shape=signal_shape,
                                             filter_shape=filter_shape,
                                             act=activation,
                                             border_mode='full',
                                             if_hidden_pool=False)

        self.hidden_image_shape = (self.batchsize,
                                   self.in_depth,
                                   self.flt_channels,
                                   self.in_height+self.flt_height-1,
                                   self.in_width+self.flt_width-1)

        self.hidden_pooled_image_shape = (self.batchsize,
                                          self.in_depth/2,
                                          self.flt_channels,
                                          (self.in_height+self.flt_height-1)/2,
                                          (self.in_width+self.flt_width-1)/2)

        self.hidden_filter_shape = (self.in_channels, self.flt_time, self.flt_channels, self.flt_height,
                                    self.flt_width)

        self.recon_layer=ConvolutionLayer3D(rng,
                                 input=self.hidden_layer.output,
                                 signal_shape=self.hidden_image_shape,
                                 filter_shape=self.hidden_filter_shape,
                                 act=activation,
                                 border_mode='valid')

        self.layers = [self.hidden_layer, self.recon_layer]
        self.params = sum([layer.params for layer in self.layers], [])
        L=T.sum(T.pow(T.sub(self.recon_layer.output, self.inputs), 2), axis=(1,2,3,4))
        self.cost = 0.5*T.mean(L)
        self.grads = T.grad(self.cost, self.params)
        self.updates = adadelta_updates(self.params, self.grads, rho=0.95, eps=1e-6)

        self.train = theano.function(
        [self.inputs],
        self.cost,
        updates=self.updates,
        name = "train cae model"
        )

        self.activation = pools.pool_3d(
                input=self.hidden_layer.output.dimshuffle(0,2,1,3,4),
                ds=poolsize,
                ignore_border=True)
        self.activation = self.activation.dimshuffle(0,2,1,3,4)
        self.get_activation = theano.function(
            [self.inputs],
            self.activation,
            updates=None,
            name='get hidden activation')
コード例 #31
0
def main():
    # load the training and validation data sets
    # labels=int(0.7*image.all_count)
    X = T.tensor4()

    # set up theano functions to generate output by feeding data through network
    output_layer_softmax , output_layer_triplet= lasagne_model()
    output_train = lasagne.layers.ReshapeLayer(output_layer_triplet,(-1,3,[1]))
    output_0= lasagne.layers.helper.get_output(lasagne.layers.SliceLayer(output_train,0,1),X)
    output_1= lasagne.layers.helper.get_output(lasagne.layers.SliceLayer(output_train,1,1),X)
    output_2= lasagne.layers.helper.get_output(lasagne.layers.SliceLayer(output_train,2,1),X)
    output= lasagne.layers.helper.get_output(output_layer_softmax,X)

    # set up the loss that we aim to minimize
    eps=1e-10
    dis_pos=T.sqrt(T.sum(T.square(T.sub(output_0,output_1)),1)+eps)
    dis_neg=T.sqrt(T.sum(T.square(T.sub(output_0,output_2)),1)+eps)
    dis=(dis_pos-dis_neg+alpha)
    # dis=(dis_pos-dis_neg)
    loss_train = T.mean((dis)*(dis>0))
    # loss_train = T.sum(T.nnet.relu(dis))
    # loss_train = T.mean(dis)

    # prediction functions for classifications
    pred = T.argmax(output, axis=1)

    # get parameters from network and set up sgd with nesterov momentum to update parameters
    params = lasagne.layers.get_all_params(output_layer_triplet,trainable=True)
    #params = params[-4:]#TODO: !!!!!!!!!!!!!!!!!!!
    grad=T.grad(loss_train,params)

    # updates = nesterov_momentum(loss_train, params, learning_rate=0.03, momentum=0.9)
    updates =lasagne.updates.rmsprop(loss_train, params, learning_rate=0.0002)
    # updates =lasagne.updates.get_or_compute_grads(loss_train, params)

    # set up training and prediction functions
    train = theano.function(inputs=[X], outputs=[loss_train,pred,dis,dis_pos,dis_neg], updates=updates, allow_input_downcast=True)

    if load_params:
        pre_params=pickle.load(gzip.open(load_params))
        lasagne.layers.set_all_param_values(output_layer_softmax,pre_params)
        print 'load Success.'

    for i in range(4500):
        aver_loss=0
        for idx_batch in range (num_batches):
            train_X=np.zeros([BATCHSIZE,3,PIXELS,PIXELS])
            for iii in range(BATCHSIZE):
                label=random.choice(train_files)
                num_slots=random.randint(1,5)
                im_aim_list=random.sample(np.load(train_load_path+label),num_slots)
                tmp_sum=0
                for iidx,shot in enumerate(im_aim_list):
                    tmp_sum+=shot
                im_aim=tmp_sum/float(num_slots)
                # im_aim=tmp_sum

                im_aim_list=random.sample(np.load(train_load_path+label),num_slots)
                tmp_sum=0
                for iidx,shot in enumerate(im_aim_list):
                    tmp_sum+=shot
                im_pos=tmp_sum/float(num_slots)
                # im_pos=tmp_sum

                while True:
                    label_neg=random.choice(train_files)
                    if label!=label_neg:
                        im_neg_list=random.sample(np.load(train_load_path+label_neg),num_slots)
                        tmp_sum=0
                        for iidx,shot in enumerate(im_neg_list):
                            tmp_sum+=shot
                        im_neg=tmp_sum/float(num_slots)
                        # im_neg=tmp_sum
                        break

                train_X[iii,0]=im_aim
                train_X[iii,1]=im_pos
                train_X[iii,2]=im_neg

            train_X=train_X.reshape(BATCHSIZE*3,1,PIXELS,PIXELS)
            xx_batch = np.float32(train_X)
            # print xx_batch.shape
            # yy_batch = np.float32(train_y[idx_batch * BATCHSIZE:(idx_batch + 1) * BATCHSIZE])

            train_loss ,pred ,dis ,dis1,dis2= train(xx_batch)
            aver_loss+=train_loss
            # count=np.count_nonzero(np.int32(pred ==np.argmax(yy_batch,axis=1)))
            if idx_batch%3==0:
                print i,idx_batch,'| Tloss:', train_loss,pred,'\ndis_pos:{}\ndis_neg:{}\ndis:{}'.format(dis1[:20],dis2[:20],dis[:20])
                # print pred
                # print np.argmax(yy_batch,axis=1)
                print "time:",time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())



        # save weights
        if i%1==0:
            aver_loss=aver_loss/num_batches
            all_params = helper.get_all_param_values(output_layer_softmax)
            f = gzip.open('speech_params/speech_{}_batchnorm_12345aver_{}_triplet_{}.pklz'.format(aver_loss,alpha,time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())), 'wb')
            pickle.dump(all_params, f)
            f.close()
コード例 #32
0
ファイル: cov.py プロジェクト: zivtigher/pymc3
 def _common(self, X, Xs=None):
     X, Xs = self._slice(X, Xs)
     Xc = tt.sub(X, self.c)
     return X, Xc, Xs
コード例 #33
0
    def apply(self, inputs, time_step, states, cells, time_scale, time_offset, mask=None):
        """Apply the Long Short Term Memory transition.

        Parameters
        ----------
        states : :class:`~tensor.TensorVariable`
            The 2 dimensional matrix of current states in the shape
            (batch_size, features). Required for `one_step` usage.
        cells : :class:`~tensor.TensorVariable`
            The 2 dimensional matrix of current cells in the shape
            (batch_size, features). Required for `one_step` usage.
        inputs : :class:`~tensor.TensorVariable`
            The 2 dimensional matrix of inputs in the shape (batch_size,
            features * 4). The `inputs` needs to be four times the
            dimension of the LSTM brick to insure each four gates receive
            different transformations of the input. See [Grav13]_
            equations 7 to 10 for more details. The `inputs` are then split
            in this order: Input gates, forget gates, cells and output
            gates.
        mask : :class:`~tensor.TensorVariable`
            A 1D binary array in the shape (batch,) which is 1 if there is
            data available, 0 if not. Assumed to be 1-s only if not given.

        .. [Grav13] Graves, Alex, *Generating sequences with recurrent*
            *neural networks*, arXiv preprint arXiv:1308.0850 (2013).

        Returns
        -------
        states : :class:`~tensor.TensorVariable`
            Next states of the network.
        cells : :class:`~tensor.TensorVariable`
            Next cell activations of the network.

        """
        def activate_lstm(self, inputs, states, cells, mask=None):
            def slice_last(x, no):
                return x[:, no*self.dim: (no+1)*self.dim]
    
            activation = tensor.dot(states, self.W_state) + inputs
            in_gate = self.gate_activation.apply(
                slice_last(activation, 0) + cells * self.W_cell_to_in)
            forget_gate = self.gate_activation.apply(
                slice_last(activation, 1) + cells * self.W_cell_to_forget)
            next_cells = (
                forget_gate * cells +
                in_gate * self.activation.apply(slice_last(activation, 2)))
            out_gate = self.gate_activation.apply(
                slice_last(activation, 3) + next_cells * self.W_cell_to_out)
            next_states = out_gate * self.activation.apply(next_cells)
    
            if mask:
                next_states = (mask[:, None] * next_states +
                               (1 - mask[:, None]) * states)
                next_cells = (mask[:, None] * next_cells +
                              (1 - mask[:, None]) * cells)
    
            return next_states, next_cells
        def do_nothing(states, cells):
            return states, cells
        
        result = ifelse(tensor.eq(tensor.mod(tensor.sub(time_step,time_offset),time_scale),0), activate_lstm(self, inputs, states, cells, mask), do_nothing(states, cells))
        
        return result
コード例 #34
0
cv_size = T.fscalar("cv_size")

drop_input = lambda rand: T.reshape(
    bino_input[rand:rand + (batch_size * dim_visible)],
    (batch_size, dim_visible))
input_drop = drop_input(rdm.random_integers(low=0, high=sample_range_dropout))

h = T.nnet.sigmoid(T.add(T.dot(v, w_vh), w_h))

u_w_plus = function([],
                    updates=[(wu_vh, g(T.add(wu_vh, T.dot(v.T, h)))),
                             (wu_v, g(T.add(T.sum(v[:], axis=0), wu_v))),
                             (wu_h, g(T.add(T.sum(h[:], axis=0), wu_h)))])

u_w_minus = function([],
                     updates=[(wu_vh, g(T.sub(wu_vh, T.dot(v.T, h)))),
                              (wu_v, g(T.sub(T.sum(v[:], axis=0), wu_v))),
                              (wu_h, g(T.sub(T.sum(h[:], axis=0), wu_h)))])

sample = lambda rdm: T.reshape(
    uniform_sample[rdm:rdm + (dim_hidden * batch_size)],
    (batch_size, dim_hidden))

gibbs = T.cast(T.gt(h, sample(rdm.random_integers(low=0, high=sample_range))),
               'float32')

update_v = function(
    [], outputs=[g(T.nnet.sigmoid(T.add(T.dot(gibbs, w_vh.T), w_v)))])

update_w = function([alpha],
                    updates=[(w_vh, g(T.add(w_vh, T.mul(alpha, wu_vh)))),
コード例 #35
0
ファイル: classifier.py プロジェクト: mpslxz/LDH_classifier
 def _def_cost_acc(self):
     l2_norm_squared = sum([(p**2).sum() for p in self.to_regularize])
     self.cost = T.sum((T.sub(self.outputs, self.y))**
                       2).mean() + self.lmbd * l2_norm_squared
     diff = abs(T.argmax(self.outputs, axis=1) - T.argmax(self.y, axis=1))
     self.acc = T.sub(1, 1. * T.nonzero(diff)[0].shape[0] / self.y.shape[0])
コード例 #36
0
ファイル: tensor.py プロジェクト: liqin123/odin
def sub(x, y):
    z = T.sub(x, y)
    if isinstance(get_shape(x), (tuple, list)):
        output_shape = auto_infer_shape(T.sub, x, y)
        add_shape(z, output_shape)
    return z
コード例 #37
0
    def __init__(self, nnet, dataset=None, learning_rate=0.01, beta=0.0, sparsity=0.01, weight_decay=0.0, momentum=0.5):
        if len(dataset) < 2:
            print "Error dataset must contain tuple (train_data,train_target)"
        train_data, train_target = dataset

        target = T.matrix('y')

        square_error = T.mean(0.5*T.sum(T.pow(target - nnet.output, 2), axis=1))

        avg_activate = T.mean(nnet.hiddenLayer[0].output, axis=0)
        sparsity_penalty = beta*T.sum(T.mul(T.log(sparsity/avg_activate), sparsity) + T.mul(T.log((1-sparsity)/T.sub(1,avg_activate)), (1-sparsity)))

        regularization = 0.5*weight_decay*(T.sum(T.pow(nnet.params[0],2)) + T.sum(T.pow(nnet.params[2],2)))

        cost = square_error + sparsity_penalty + regularization
        
        gparams = [T.grad(cost, param) for param in nnet.params]

        w_deltas = []
        for param in nnet.params:
            w_deltas.append(theano.shared(value=param.get_value()*0, borrow=True))

        new_params = [param - (learning_rate*gparam + momentum*w_delta) for param, gparam, w_delta in zip(nnet.params, gparams, w_deltas)]

        updates = [(param, new_param) for param, new_param in zip(nnet.params, new_params)]
        updates += [(w_delta, learning_rate*gparam + momentum*w_delta) for w_delta, gparam in zip(w_deltas, gparams)]

        index = T.lscalar()
        self.train = theano.function(
            inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                input: train_data[index * batch_size: (index + 1) * batch_size],
                target: train_target[index * batch_size: (index + 1) * batch_size]
            }
        )

        self.cost = theano.function(
            inputs=[],
            outputs=cost,
            givens={ input: train_data, target: train_target }
        )
コード例 #38
0
f1([[4,3], [1, 3, 3, 2], [1, 2, 2]], 4)


f2 = theano.function([x, max_x], o )
f2([[4,3], [1, 3, 3, 2], [1, 2, 2]], 4)


x = T.imatrix('x')
x_vec = T.ivector('x_vec')
fe = theano.function([x_vec], E[:,x_vec])




shape_sub = shared(0)
a = T.sub(T.shape(x[0]),T.shape(x[1]))

f = func([x], a, updates={(shape_sub, a[0])})

f([[[4,3], [3,7]], 2])
f2 = T.zeros(shape_sub)

T.zeros(a[0]).eval({x:[[4,3,1, 6, 6, 7, 8], [5, 7,6], [4, 7, 1, 1]]}) 
f([[3], [3, 1,3]])[-1]
f([[3, 1, 4]])

	x1 = T.ivector('x1')
	x2 = T.ivector('x2')
	shape_sub = T.sub(T.shape(x1),T.shape(x2))
	vec = T.ivector('x1')
コード例 #39
0
def crf_par_01_gpu(aryDpth,
                   vecEmpX,
                   strFunc='power',
                   varNumIt=1000,
                   varNumX=1000,
                   varXmin=0.0,
                   varXmax=1.0,
                   varNumOp=10000):
    """
    Parallelised bootstrapping of contrast response function, level 1.

    Parameters
    ----------
    aryDpth : np.array
        Array with empirical response data, of the form
        aryDpth[idxRoi, idxSub, idxCon, idxDpt].
    vecEmpX : np.array
        Empirical x-values at which model will be fitted (e.g. stimulus
        contrast levels at which stimuli were presented), of the form
        vecEmpX[idxCon].
    strFunc : str
        Which contrast response function to fit. 'power' for power function, or
        'hyper' for hyperbolic ratio function.
    varNumIt : int
        Number of bootstrapping iterations (i.e. how many times to sample).
    varPar : int
        Number of process to run in parallel.
    varNumX : int
        Number of x-values for which to solve the function when calculating
        model fit.
    varXmin : float
        Minimum x-value for which function will be fitted.
    varXmax : float
        Maximum x-value for which function will be fitted.
    varNumOp: int
        Number of optimisation steps for function fitting.

    Returns
    -------
    aryMdlY : np.array
        Fitted y-values (predicted response based on CRF model), of the form
        aryMdlY[idxRoi, idxIteration, idxDpt, varNumX], where varNumX is the
        number of data points at which the fitted function is evaluated (e.g.
        1000).
    aryHlfMax : np.array
        Predicted response at 50 percent contrast based on CRF model. Array of
        the form aryHlfMax[idxRoi, idxIteration, idxDpt].
    arySemi : np.array
        Semisaturation contrast (predicted contrast needed to elicit 50 percent
        of the response amplitude that would be expected with a 100 percent
        contrast stimulus). Array of the form
        arySemi[idxRoi, idxIteration, idxDpt].
    aryRes : np.array
        Residual variance at empirical contrast levels. Array of the form
        aryRes[idxRoi, idxIteration, idxCondition, idxDpt].

    Notes
    -----
    NOTE: HYPERBOLIC RATIO NOT YET IMPLEMENTED FOR THEANO.
    
    This function parallelises the contrast response function fitting by
    calling a second-level function using the multiprocessing module.

    Function of the depth sampling pipeline.
    """
    # ------------------------------------------------------------------------
    # *** Prepare bootstrapping
    print('---Preparing bootstrapping')

    # Check time:
    varTme01 = time.time()

    # Number of ROIs:
    varNumIn = aryDpth.shape[0]

    # Number of subjects:
    varNumSubs = aryDpth.shape[1]

    # Number of conditions:
    varNumCon = aryDpth.shape[2]

    # Number of depth levels:
    varNumDpth = aryDpth.shape[3]

    # Random array with subject indicies for bootstrapping of the form
    # aryRnd[varNumIt, varNumSmp]. Each row includes the indicies of the
    # subjects to the sampled on that iteration.
    aryRnd = np.random.randint(0, high=varNumSubs, size=(varNumIt, varNumSubs))

    ## Initialise array for random samples:
    #aryDpthRnd = np.zeros((varNumIt, varNumIn, varNumSubs, varNumCon, varNumDpth))
    #
    ## Fill array with resampled samples:
    #for idxIt in range(varNumIt):
    #    aryDpthRnd[idxIt, :, :, :, :] = aryDpth[:, aryRnd[idxIt, :], :, :]
    #
    ## Take mean within random samples:
    #aryDpthRnd = np.mean(aryDpthRnd, axis=2)
    #
    ## Total number of CRF models to fit:
    #varNumTtl = (varNumIn * varNumDpth * varNumIt)
    #
    ## Reshape:
    #aryDpthRnd = np.reshape(aryDpthRnd, (varNumTtl, varNumCon))

    # Total number of CRF models to fit:
    varNumTtl = (varNumIn * varNumDpth * varNumIt)

    # Array for resampled samples:
    aryDpthRnd = np.zeros((varNumTtl, varNumSubs, varNumCon))

    # Fill array with resampled samples:
    varCnt = 0
    for idxIn in range(varNumIn):
        for idxIt in range(varNumIt):
            for idxDpth in range(varNumDpth):
                aryDpthRnd[varCnt, :, :] = aryDpth[idxIn, aryRnd[idxIt, :], :,
                                                   idxDpth]

                varCnt += 1

    # Take mean within random samples:
    aryDpthRnd = np.mean(aryDpthRnd, axis=1)

    # Check time:
    varTme02 = time.time()

    # Report time:
    varTme03 = np.around((varTme02 - varTme01), decimals=3)
    print(('---Elapsed time: ' + str(varTme03) + ' s for ' + str(varNumTtl) +
           ' iterations.'))

    # ------------------------------------------------------------------------
    # *** Theano CRF fitting

    print('---Theano CRF fitting')

    # Check time:
    varTme01 = time.time()

    # Boradcast array with X data, and change data type to float 32:
    aryEmpX = np.broadcast_to(vecEmpX, (varNumTtl, vecEmpX.shape[0]))
    aryEmpX = aryEmpX.astype(th.config.floatX)
    aryDpthRnd = aryDpthRnd.astype(th.config.floatX)

    # The CRF:
    # varR = varA * np.power(varC, varB)
    def model(aryC, vecA, vecB):
        return T.mul(T.pow(aryC, vecB[:, None]), vecA[:, None])

    # Initialise theano arrays for emprical X and Y data:
    TaryEmpX = T.matrix()
    TaryDpthRnd = T.matrix()

    # Initialise model parameters:
    vecA = np.ones((varNumTtl))
    vecB = np.ones((varNumTtl))
    vecA = np.multiply(vecA, 0.5)
    vecB = np.multiply(vecB, 0.5)
    vecA = vecA.astype(dtype=th.config.floatX)
    vecB = vecB.astype(dtype=th.config.floatX)

    # Create shared theano object for model parameters:
    TvecA = th.shared(vecA)
    TvecB = th.shared(vecB)

    # Model prediction for theano:
    TobjMdlPre = model(TaryEmpX, TvecA, TvecB)

    # Learning rate:
    varLrnRt = np.float32(0.0001)

    # Cost function:
    # cost = T.mean(T.sqr(y - Y))
    TobjCst = T.sum(T.sqr(T.sub(TobjMdlPre, TaryDpthRnd)))

    # Gradients for cost function
    TobGrd01 = T.grad(cost=TobjCst, wrt=TvecA)
    TobGrd02 = T.grad(cost=TobjCst, wrt=TvecB)

    # How to update the cost function:
    lstUp = [(TvecA, (TvecA - TobGrd01 * varLrnRt)),
             (TvecB, (TvecB - TobGrd02 * varLrnRt))]

    # Define the theano function that will be optimised:
    TcrfPwOp = th.function(inputs=[TaryEmpX, TaryDpthRnd],
                           outputs=TobjCst,
                           updates=lstUp)  # allow_input_downcast=True)

    # Do not check input data type:
    # train.trust_input = True

    ## Array for theano model parameters, of the form
    ## aryMdlParT[idxTotalIterations, freeModelParameters]:
    aryMdlParT = np.zeros((varNumTtl, 2)).astype(th.config.floatX)

    # Optimise function:
    for idxThn in range(varNumOp):
        TcrfPwOp(aryEmpX, aryDpthRnd)

    # Save model parameter A:
    aryMdlParT[:, 0] = TvecA.get_value()

    # Save model parameter B:
    aryMdlParT[:, 1] = TvecB.get_value()

    # Check time:
    varTme02 = time.time()

    # Report time:
    varTme03 = np.around((varTme02 - varTme01), decimals=3)
    print(('---Elapsed time: ' + str(varTme03) + ' s for ' + str(varNumTtl) +
           ' iterations.'))

    # ------------------------------------------------------------------------
    # *** Apply CRF

    print('---Theano CRF evaluation')

    # Check time:
    varTme01 = time.time()

    # Vector for which the function will be fitted:
    vecMdlX = np.linspace(varXmin, varXmax, num=varNumX, endpoint=True)

    # Boradcast array with X data, and change data type to float 32:
    aryMdlX = np.broadcast_to(vecMdlX, (varNumTtl, varNumX))
    aryMdlX = aryMdlX.astype(th.config.floatX)

    # Change data type to float 32:
    aryMdlParT = aryMdlParT.astype(th.config.floatX)

    # Initialise theano arrays for mmodel X data:
    TaryMdlX = T.matrix()

    # Create shared theano object for fitted model parameters:
    TvecMdlParA = th.shared(aryMdlParT[:, 0])
    TvecMdlParB = th.shared(aryMdlParT[:, 1])

    # Model to evaluate, like before (i.e. similar to the model that was
    # optimised, but this time with the fitted parameter values as input):
    TobjMdlEval = model(TaryMdlX, TvecMdlParA, TvecMdlParB)

    # Function definition for evaluation:
    TcrfPwEval = th.function([TaryMdlX], TobjMdlEval)

    # Evaluate function (get predicted y values of CRF for all resampling
    # iterations). Returns arrays for y-values of fitted function (for each
    # iteration & depth  level), of the form aryMdlY[varNumTtl, varNumX]
    aryMdlY = TcrfPwEval(aryMdlX)

    # Check time:
    varTme02 = time.time()

    # Report time:
    varTme03 = np.around((varTme02 - varTme01), decimals=3)
    print(('---Elapsed time: ' + str(varTme03) + ' s for ' + str(varNumTtl) +
           ' iterations.'))

    # ------------------------------------------------------------------------
    # *** Calculate predicted response at 50% contrast

    # Vector for which the function will be fitted (contrast = 0.5):
    vecMdl50 = np.ones((varNumTtl, 1))
    vecMdl50 = np.multiply(vecMdl50, 0.5)
    vecMdl50 = vecMdl50.astype(dtype=th.config.floatX)

    # Evaluate function. Returns array for responses at half maximum
    # contrast, of the form aryHlfMax[varNumTtl, 1]
    aryHlfMax = TcrfPwEval(vecMdl50)

    # ------------------------------------------------------------------------
    # *** Calculate predicted response at empirical contrast levels

    # We calculate the predicted response at the actually tested empirical
    # contrast levels in order to subsequently calculate the residual variance
    # of the model fit at those contrast levels.

    # Evaluate function (get predicted y values of CRF for empirically
    # measured contrast values):
    aryMdlEmpX = TcrfPwEval(aryEmpX)

    # ------------------------------------------------------------------------
    # *** Calculate semisaturation contrast

    print('---Calculating semisaturation contrast')

    # Check time:
    varTme01 = time.time()

    # We first need to calculate the response at 100% contrast.

    # Vector for which the function will be fitted (contrast = 1.0):
    vecMdl100 = np.ones((varNumTtl, 1))
    vecMdl100 = vecMdl100.astype(dtype=th.config.floatX)

    # Evaluate function. Returns array for responses at half maximum
    # contrast, of the form aryResp100[varNumTtl, 1]
    aryResp100 = TcrfPwEval(vecMdl100)

    # Half maximum response:
    aryResp50 = np.multiply(aryResp100, 0.5)
    aryResp50 = aryResp50.astype(dtype=th.config.floatX)

    # Initialise theano arrays for half maximum response:
    TaryResp50 = T.matrix()

    # Initialise vector for Semisaturation constant:
    arySemi = np.ones((varNumTtl, 1))
    arySemi = np.multiply(arySemi, 0.2)
    arySemi = arySemi.astype(dtype=th.config.floatX)

    # Create shared theano object for model parameters:
    TarySemi = th.shared(arySemi)

    # Model for finding semisaturation contrast:
    TobjMdlSemi = model(TarySemi, TvecMdlParA, TvecMdlParB)

    # Cost function for finding semisaturation contrast:
    TobjCst = T.sum(T.sqr(T.sub(TobjMdlSemi[:], TaryResp50[:])))

    # Gradient for cost function
    TobGrdSemi = T.grad(cost=TobjCst, wrt=TarySemi)

    # Learning rate:
    varLrnRt = np.float32(0.0001)

    # How to update the cost function:
    lstUp = [(TarySemi, (TarySemi - TobGrdSemi * varLrnRt))]

    # Define the theano function that will be optimised:
    TcrfPwSemi = th.function(inputs=[TaryResp50],
                             outputs=TobjCst,
                             updates=lstUp)  # allow_input_downcast=True)

    # Optimise function:
    for idxThn in range(varNumOp):
        TcrfPwSemi(aryResp50)

    # Save semisaturation contrast:
    arySemi = TarySemi.get_value()

    # Check time:
    varTme02 = time.time()

    # Report time:
    varTme03 = np.around((varTme02 - varTme01), decimals=3)
    print(('---Elapsed time: ' + str(varTme03) + ' s for ' + str(varNumTtl) +
           ' iterations.'))

    # ------------------------------------------------------------------------
    # *** Reshape

    # Reshape array with fitted y-values, from
    #     aryMdlY[varNumTtl, varNumX]
    # to
    #     aryMdlY[idxRoi, idxIteration, idxDpt, varNumX]
    aryMdlYRs = np.zeros((varNumIn, varNumIt, varNumDpth, varNumX),
                         dtype=th.config.floatX)
    varCnt = 0
    for idxIn in range(varNumIn):
        for idxIt in range(varNumIt):
            for idxDpth in range(varNumDpth):
                aryMdlYRs[idxIn, idxIt, idxDpth, :] = aryMdlY[varCnt, :]
                varCnt += 1
    del (aryMdlY)
    aryMdlY = np.copy(aryMdlYRs)
    del (aryMdlYRs)

    # Reshape array with predicted response at 50 percent contrast, from
    #     aryHlfMax[varNumTtl, 1]
    # to
    #     aryHlfMax[idxRoi, idxIteration, idxDpt]
    aryHlfMaxRs = np.zeros((varNumIn, varNumIt, varNumDpth),
                           dtype=th.config.floatX)
    varCnt = 0
    for idxIn in range(varNumIn):
        for idxIt in range(varNumIt):
            for idxDpth in range(varNumDpth):
                aryHlfMaxRs[idxIn, idxIt, idxDpth] = aryHlfMax[varCnt, :]
                varCnt += 1
    del (aryHlfMax)
    aryHlfMax = np.copy(aryHlfMaxRs)
    del (aryHlfMaxRs)

    # Reshape array with predicted response at 50 percent contrast, from
    #     arySemi[varNumTtl, 1]
    # to
    #     arySemi[idxRoi, idxIteration, idxDpt]
    arySemiRs = np.zeros((varNumIn, varNumIt, varNumDpth),
                         dtype=th.config.floatX)
    varCnt = 0
    for idxIn in range(varNumIn):
        for idxIt in range(varNumIt):
            for idxDpth in range(varNumDpth):
                arySemiRs[idxIn, idxIt, idxDpth] = arySemi[varCnt, :]
                varCnt += 1
    del (arySemi)
    arySemi = np.copy(arySemiRs)
    del (arySemiRs)

    # Reshape array with predicted response at empirical constrast values, from
    #     aryMdlEmpX[varNumTtl, varNumCon]
    # to
    #     aryMdlEmpX[idxRoi, idxIteration, idxCondition, idxDpt]
    aryMdlEmpXRs = np.zeros((varNumIn, varNumIt, varNumCon, varNumDpth),
                            dtype=th.config.floatX)
    varCnt = 0
    for idxIn in range(varNumIn):
        for idxIt in range(varNumIt):
            for idxDpth in range(varNumDpth):
                aryMdlEmpXRs[idxIn, idxIt, :, idxDpth] = aryMdlEmpX[varCnt, :]
                varCnt += 1
    del (aryMdlEmpX)
    aryMdlEmpX = np.copy(aryMdlEmpXRs)
    del (aryMdlEmpXRs)

    # ------------------------------------------------------------------------
    # *** Calculate residual variance

    # Mean across subjects of (full) empirical dataset:
    aryEmpYMne = np.mean(aryDpth, axis=1)
    aryEmpYMne = aryEmpYMne.astype(dtype=th.config.floatX)

    # Residual variance at empirical contrast levels. Array of the form
    # aryRes[idxRoi, idxIteration, idxCondition, idxDpt].
    aryRes = np.absolute(np.subtract(aryMdlEmpX, aryEmpYMne[:, None, :, :]))

    # ------------------------------------------------------------------------
    # *** Return

    return aryMdlY, aryHlfMax, arySemi, aryRes
コード例 #40
0
Wlk = shared(np.random.rand(numOutputNeurons, numHiddenNeurons2)*0.01)

# input values
X = T.dmatrix('X')
# output values
t = T.dmatrix('t')

# output of first hidden layer
Aji = T.nnet.sigmoid(T.dot(Wji, X.T))
# output of second hidden layer (prediction)
# Akj = T.nnet.softmax(T.dot(Wo1, Aji))
Akj = T.dot(Wo1, Aji)

# error function of first pre training
# E = T.mean(T.nnet.categorical_crossentropy(Akj.T, t))
E = T.sum(T.sub(Akj.T, t)**2)/(2*trainX.shape[0])

# gradient of error with respect to weights of first hidden layer
gradji = T.grad(E, Wji)
# gradient of error with respect to weights of output layer in pre training when 1 hidden layer present
grado1 = T.grad(E, Wo1)

updates = [(Wji, Wji-pre_eta*gradji),
           (Wo1, Wo1-pre_eta*grado1)]

# pre training function
pre_training_first_stack = function(inputs=[X, t], outputs=[E], updates=updates)

# output of first hidden layer
# Aji = T.nnet.sigmoid(T.dot(Wji, X.T))
Aji = T.tanh(T.dot(Wji, X.T))
コード例 #41
0
def R2loss(y_true, y_pred):
    y_pred = y_pred.flatten()
    y_true = y_true.flatten()
    tot = T.sum(T.sqr(T.sub(y_true, T.mean(y_true))))
    res = T.sum(T.sqr(T.sub(y_true, y_pred)))
    return T.true_div(res, tot)
コード例 #42
0
ファイル: DNN.py プロジェクト: DSLabXXX/DeepNN
 def l2_norm(self, y):
     return T.mean(T.sub(self.predict_y[T.arange(y.shape[0]), y], y) ** 2)
コード例 #43
0
def theanoMatMatSub(In1, In2):
    var1 = T.dmatrix('var1')
    var2 = T.dmatrix('var2')
    var3 = T.sub(var1, var2)
    SubMat = function([var1, var2], var3)
    return SubMat(In1, In2)
コード例 #44
0
ファイル: dA.py プロジェクト: floatdrop/chordest
 def quadratic_loss(self, y):
     return T.mean(T.sqrt(T.sum(T.sqr(T.sub(self.p_y_given_x, y)), axis=1)))
コード例 #45
0
def theanoVecVecSub(In1, In2):
    var1 = T.dvector('var1')
    var2 = T.dvector('var2')
    var3 = T.sub(var1, var2)
    DivVec = function([var1, var2], var3)
    return DivVec(In1, In2)
コード例 #46
0
ファイル: theano_test.py プロジェクト: floatdrop/chordest
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 08 23:10:24 2013

@author: Nikolay
"""

import theano
import theano.tensor as T

a, b, c = T.vector(), T.vector(), T.vector()
W = T.matrix()
x = T.dot(a, W) + b
c = T.sqr(T.sub(x, a))
#c = T.sqrt(T.sum(T.sqr(T.sub(x, a))))
# -T.mean(T.sqrt(T.sum(T.sqr(T.sub(self.p_y_given_x, y)))))
s = T.sum(W, axis=1)

calc = theano.function(
    inputs=[a,b,W],
    outputs=[c])

sss = theano.function(
    inputs=[W],
    outputs=[s])

#print calc((1, 2), (5, 6), ((3,4), (-7,8)))
print sss(((1, 2, 3), (4, 5, 6), (7, 8, 9)))
コード例 #47
0
    def get_output_for(self, grids, **kwargs):
        height = width = depth = self.grid_side

        # np.indices() returns 3 train_grids exactly as big as the original one.
        # The first grid contains the X coordinate of each point at the location of the point.
        # The second grid contains the Y coordinate of each point at the location of the point.
        # The third grid contains the Z coordinate of each point at the location of the point.
        indices_grids = T.as_tensor_variable(np.indices((width, height, depth),
                                                        dtype=floatX),
                                             name="grid_indices")

        # Translate:
        # the translation vector will be broad-casted:
        # t_x will be added to all values in the first indices grid
        # t_y will be added to all values in the second indices grid
        # t_z will be added to all values in the third indices grid
        # resulting in a translation in the direction of translation_vector
        indices_grids = T.add(indices_grids, self._translation_vector())

        # Rotate:
        # the origin is just the center point in the grid
        origin = T.as_tensor_variable(np.array(
            (width // 2, height // 2, depth // 2), dtype=floatX).reshape(
                (3, 1, 1, 1)),
                                      name='origin')
        # We first center all indices, just as in the translation above
        indices_grids = T.sub(indices_grids, origin)

        # T.tensordot is a generalized version of a dot product.
        # The axes parameter is of length 2, and it gives the axis for each of the two tensors
        # passed, over which the summation will occur. Of course, those two axis need to be of the
        # same dimension.
        # Here we have a (3 x 3) matrix <dot product> (3, width, height, depth) grid, and the
        # summation happens over the first axis (index 0). The result is of size
        # (3 x width x height x depth) and contains again 3 train_grids of this time
        # **rotated** indices for each dimension X, Y, Z respectively.
        indices_grids = T.tensordot(self._rotation_matrix(),
                                    indices_grids,
                                    axes=(0, 0))

        # Decenter
        indices_grids = T.add(indices_grids, origin)

        # Since indices_grids was transformed, we now might have indices at certain locations
        # that are out of the range of the original grid. We this need to clip them to valid values.
        # For the first grid: between 0 and width - 1
        # For the second grid: between 0 and height - 1
        # For the third grid: between 0 and depth - 1
        # Note that now te index train_grids might contain real numbers (not only integers).
        x_indices = T.clip(indices_grids[0], 0, width - 1 - .001)
        y_indices = T.clip(indices_grids[1], 0, height - 1 - .001)
        z_indices = T.clip(indices_grids[2], 0, depth - 1 - .001)

        if self.interpolation == "nearest":
            # Here we just need to round the indices for each spatial dimension to the closest
            # integer, and than index the original input grid with the 3 indices train_grids
            # (numpy style indexing with arrays) to obtain the final result. Note that here,
            # as usual, the multi-dim array that you index with has the
            # same spatial dimensionality as the multi-dim array being index.

            # We intentionally flatten everything before indexing, so that Theano can use
            # ArraySubtensor1 instead of ArraySubtensor, because only the former can be run
            # on the GPU.
            # https://groups.google.com/forum/#!topic/theano-users/XkPJP6on50Y
            flat_grids, flat_indices = grids.reshape(
                (grids.shape[0], grids.shape[1], -1)), \
                                       width * height * T.iround(
                                           x_indices).flatten() + \
                                       height * T.iround(y_indices).flatten() + \
                                       T.iround(z_indices).flatten()
            output = flat_grids[:, :, flat_indices]
            output = output.reshape(grids.shape)
        else:
            flat_grids = grids.reshape((grids.shape[0], grids.shape[1], -1))
            # For linear interpolation, we use the transformed indices x_indices, y_indices and
            # z_indices to linearly calculate the desired values at each of the original indices
            # in each dimension.

            # Again, everything is flattened so that Theano can put it on the GPU, just as in
            # the other part of this if block.
            # https://groups.google.com/forum/#!topic/theano-users/XkPJP6on50Y
            top = T.cast(y_indices, 'int32').flatten()
            left = T.cast(x_indices, 'int32').flatten()
            forward = T.cast(z_indices, 'int32').flatten()

            x_indices = x_indices.flatten()
            y_indices = y_indices.flatten()
            z_indices = z_indices.flatten()

            # this computes the amount of shift into each direction from the original position
            fraction_y = T.cast(y_indices - top,
                                theano.config.floatX).flatten()
            fraction_x = T.cast(x_indices - left,
                                theano.config.floatX).flatten()
            fraction_z = T.cast(z_indices - forward,
                                theano.config.floatX).flatten()

            # then the new value is the linear combination based on the shifts in all
            # of the 8 possible directions in 3D
            output = flat_grids[:, :, self.grid_side ** 2 * top + self.grid_side * left + forward] \
                     * (1 - fraction_y) * (1 - fraction_x) * (1 - fraction_z) + \
                     flat_grids[:, :,
                     self.grid_side ** 2 * top + self.grid_side * left + (forward + 1)] \
                     * (1 - fraction_y) * (1 - fraction_x) * fraction_z + \
                     flat_grids[:, :,
                     self.grid_side ** 2 * top + self.grid_side * (left + 1) + forward] \
                     * (1 - fraction_y) * fraction_x * (1 - fraction_z) + \
                     flat_grids[:, :,
                     self.grid_side ** 2 * top + self.grid_side * (left + 1) + (forward + 1)] \
                     * (1 - fraction_y) * fraction_x * fraction_z + \
                     flat_grids[:, :,
                     self.grid_side ** 2 * (top + 1) + self.grid_side * left + forward] \
                     * fraction_y * (1 - fraction_x) * (1 - fraction_z) + \
                     flat_grids[:, :,
                     self.grid_side ** 2 * (top + 1) + self.grid_side * left + (forward + 1)] \
                     * fraction_y * (1 - fraction_x) * fraction_z + \
                     flat_grids[:, :,
                     self.grid_side ** 2 * (top + 1) + self.grid_side * (left + 1) + forward] \
                     * fraction_y * fraction_x * (1 - fraction_z) + \
                     flat_grids[:, :,
                     self.grid_side ** 2 * (top + 1) + self.grid_side * (left + 1) + (forward + 1)] \
                     * fraction_y * fraction_x * fraction_z
            output = output.reshape(grids.shape)

        return output
コード例 #48
0
ファイル: debug_lstm.py プロジェクト: jrbtaylor/trace-rnn
 def mse(self, y):
     return T.mean(T.sqr(T.sub(self.output, y)))
コード例 #49
0
def RMSE_tensor(y, y_pred):
    return 48 * T.pow(T.mean(T.pow(T.sub(y, y_pred), 2)), 0.5)