예제 #1
0
    def output(self, x, index_selection_func=None):
        if self.n_out > 1:
            iWin = self.k

            if self.n_in == 1:
                iWin = 1

            rnd_proj = T.dot(
                x.reshape((x.shape[0], x.shape[1]*x.shape[2])),
                self.rand_proj_mat
            )

            if index_selection_func is not None:
                self.out_idxs = index_selection_func(rnd_proj)
            else:
                self.out_idxs = T.argsort(rnd_proj)
            self.out_idxs = T.sort(self.out_idxs[:, -self.k:])

            # self.out_idxs.set_value(
            #     np.random.randint(0, self.n_out, (self.batch_size, self.k))
            # )

        sparse = sparse_block_dot_SS(
            self.W,
            x,
            self.in_idxs,
            self.b,
            self.out_idxs
        )

        return (sparse if self.activation is None
                else self.activation(sparse))
예제 #2
0
    def _pooling_function(self, inputs, pool_size, strides, border_mode, dim_ordering):

        if pool_size[0]<-1:
            # k-max pooling
            input_layer = T.transpose(inputs, axes=(0, 1, 3, 2))
            sorted_values = T.argsort(input_layer, axis=3)
            topmax_indexes = sorted_values[:, :, :, -self.k:]
            # sort indexes so that we keep the correct order within the sentence
            topmax_indexes_sorted = T.sort(topmax_indexes)

            # given that topmax only gives the index of the third dimension, we need to generate the other 3 dimensions
            dim0 = T.arange(0, input_layer.shape[0]).repeat(input_layer.shape[1] * input_layer.shape[2] * self.k)
            dim1 = T.arange(0, input_layer.shape[1]).repeat(self.k * input_layer.shape[2]).reshape((1, -1)).repeat(
                input_layer.shape[0],
                axis=0).flatten()
            dim2 = T.arange(0, input_layer.shape[2]).repeat(self.k).reshape((1, -1)).repeat(
                input_layer.shape[0] * input_layer.shape[1],
                axis=0).flatten()
            dim3 = topmax_indexes_sorted.flatten()
            x = T.transpose(
                input_layer[dim0, dim1, dim2, dim3].reshape(
                    (input_layer.shape[0], input_layer.shape[1], input_layer.shape[2], self.k)),
                axes=(0, 1, 3, 2))
            return x
        else:
            return super(MaxPooling2DWrapper, self)._pooling_function(inputs, pool_size, strides, border_mode, dim_ordering)
def keep_max(input, theta, k, sent_mask):
    sig_input = T.nnet.sigmoid(T.dot(input, theta))
    sent_mask = sent_mask.dimshuffle(0, 'x', 1, 'x')
    sig_input = sig_input * sent_mask
    #sig_input = T.dot(input, theta)
    if k == 0:
        result = input * T.addbroadcast(sig_input, 3)
        return result, sig_input

    # get the sorted idx
    sort_idx = T.argsort(sig_input, axis=2)
    k_max_ids = sort_idx[:,:,-k:,:]
    dim0, dim1, dim2, dim3 = k_max_ids.shape
    batchids = T.repeat(T.arange(dim0), dim1*dim2*dim3)
    mapids = T.repeat(T.arange(dim1), dim2*dim3).reshape((1, dim2*dim3))
    mapids = T.repeat(mapids, dim0, axis=0).flatten()
    rowids = k_max_ids.flatten()
    colids = T.arange(dim3).reshape((1, dim3))
    colids = T.repeat(colids, dim0*dim1*dim2, axis=0).flatten()
    sig_mask = T.zeros_like(sig_input)
    choosed = sig_input[batchids, mapids, rowids, colids]
    sig_mask = T.set_subtensor(sig_mask[batchids, mapids, rowids, colids], 1)
    input_mask = sig_mask * sig_input
    result = input * T.addbroadcast(input_mask, 3)
    return result, sig_input
예제 #4
0
파일: nbknca.py 프로젝트: Scott-Alex/CCML
    def compute_probabilistic_matrix(self,X, y, num_cases, k=5):

        z       = T.dot(X, self.A) #Transform x into z space 
        dists   = T.sqr(dist2hy(z,z))
        dists   = T.extra_ops.fill_diagonal(dists, T.max(dists)+1)
        nv      = T.min(dists,axis=1) # value of nearest neighbour 
        dists   = (dists.T - nv).T
        d       = T.extra_ops.fill_diagonal(dists, 0)
   
        #Take only k nearest 
        num     = T.zeros((num_cases, self.num_classes))
        denom   = T.zeros((num_cases,))
        for c_i in xrange(self.num_classes):

            #Mask for class i
            mask_i = T.eq(T.outer(T.ones_like(y),y),c_i)

            #K nearest neighbour within a class i 
            dim_ci = T.sum(mask_i[0])
            d_c_i = T.reshape(d[mask_i.nonzero()],(num_cases,dim_ci))
            k_indice = T.argsort(d_c_i, axis=1)[:,0:k]
            
            kd = T.zeros((num_cases,k))
            for it in xrange(k):
                kd = T.set_subtensor(kd[:,it], d_c_i[T.arange(num_cases),k_indice[:,it]]) 

            #Numerator
            value   = T.exp(-T.mean(kd,axis=1))
            num     = T.set_subtensor(num[:,c_i], value) 
            denom   += value 
            

        p = num / denom.dimshuffle(0,'x')    #prob that point i will be correctly classified    
        return p
예제 #5
0
        def kmaxpooling_output(input):
            '''
                实现 k-max pooling
                    1. 先排序
                    2. 再分别取出前k个值
            :param k: k top higiest value
            :type k: int
            :return:
            '''
            input = T.transpose(input, axes=(0, 1, 3, 2))
            sorted_values = T.argsort(input, axis=3)
            topmax_indexes = sorted_values[:, :, :, -k:]
            # sort indexes so that we keep the correct order within the sentence
            topmax_indexes_sorted = T.sort(topmax_indexes)

            # given that topmax only gives the index of the third dimension, we need to generate the other 3 dimensions
            dim0 = T.arange(0, input.shape[0]).repeat(input.shape[1] * input.shape[2] * k)
            dim1 = T.arange(0, input.shape[1]).repeat(k * input.shape[2]).reshape((1, -1)).repeat(input.shape[0],
                                                                                                  axis=0).flatten()
            dim2 = T.arange(0, input.shape[2]).repeat(k).reshape((1, -1)).repeat(input.shape[0] * input.shape[1],
                                                                                 axis=0).flatten()
            dim3 = topmax_indexes_sorted.flatten()
            return T.transpose(
                input[dim0, dim1, dim2, dim3].reshape((input.shape[0], input.shape[1], input.shape[2], k)),
                axes=(0, 1, 3, 2))
def top_k_pooling(matrix, sentlength_1, sentlength_2, Np):

    #tensor: (1, feature maps, 66, 66)
    #sentlength_1=dim-left1-right1
    #sentlength_2=dim-left2-right2
    #core=tensor[:,:, left1:(dim-right1),left2:(dim-right2) ]
    '''
    repeat_row=Np/sentlength_1
    extra_row=Np%sentlength_1
    repeat_col=Np/sentlength_2
    extra_col=Np%sentlength_2    
    '''
    #repeat core
    matrix_1=repeat_whole_tensor(matrix, 5, True) 
    matrix_2=repeat_whole_tensor(matrix_1, 5, False)

    list_values=matrix_2.flatten()
    neighborsArgSorted = T.argsort(list_values)
    kNeighborsArg = neighborsArgSorted[-(Np**2):]    
    top_k_values=list_values[kNeighborsArg]
    

    all_max_value=top_k_values.reshape((1, Np**2))
    
    return all_max_value  
예제 #7
0
 def get_best_sense(self, word, curr_sense, context_vector, W_s):
     scores_all_senses = T.dot(context_vector, W_s[word].T)
     sorted_senses = T.argsort(scores_all_senses)
     score_best = scores_all_senses[sorted_senses[-1]]
     score_second_best = scores_all_senses[sorted_senses[-2]]
     new_sense = T.switch(T.gt(score_best-score_second_best, epsilon), sorted_senses[-1], curr_sense)
     return new_sense
예제 #8
0
    def dynamic_kmaxPooling(self, curConv_out, k):
        neighborsForPooling = TSN.images2neibs(ten4=curConv_out, neib_shape=(1,curConv_out.shape[3]), mode='ignore_borders')
        self.neighbors = neighborsForPooling

        neighborsArgSorted = T.argsort(neighborsForPooling, axis=1)
        kNeighborsArg = neighborsArgSorted[:,-k:]
        #self.bestK = kNeighborsArg
        kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1)

        ii = T.repeat(T.arange(neighborsForPooling.shape[0]), k)
        jj = kNeighborsArgSorted.flatten()
        pooledkmaxTmp = neighborsForPooling[ii, jj]
        new_shape = T.cast(T.join(0, 
                           T.as_tensor([neighborsForPooling.shape[0]]),
                           T.as_tensor([k])),
                           'int64')
        pooledkmax_matrix = T.reshape(pooledkmaxTmp, new_shape, ndim=2)

        rightWidth=self.unifiedWidth-k            
        right_padding = T.zeros((neighborsForPooling.shape[0], rightWidth), dtype=theano.config.floatX)
        matrix_padded = T.concatenate([pooledkmax_matrix, right_padding], axis=1)      
        #recover tensor form
        new_shape = T.cast(T.join(0, curConv_out.shape[:-2],
                           T.as_tensor([curConv_out.shape[2]]),
                           T.as_tensor([self.unifiedWidth])),
                           'int64')

        curPooled_out = T.reshape(matrix_padded, new_shape, ndim=4)
                
        return curPooled_out
예제 #9
0
    def link(self, input):
        self.input = input

        # select the lines where we apply k-max pooling
        neighbors_for_pooling = TSN.images2neibs(
            ten4=self.input,
            neib_shape=(self.input.shape[2], 1),  # we look the max on every dimension
            mode='valid'  # 'ignore_borders'
        )

        neighbors_arg_sorted = T.argsort(neighbors_for_pooling, axis=1)
        k_neighbors_arg = neighbors_arg_sorted[:, -self.k_max:]
        k_neighbors_arg_sorted = T.sort(k_neighbors_arg, axis=1)

        ii = T.repeat(T.arange(neighbors_for_pooling.shape[0]), self.k_max)
        jj = k_neighbors_arg_sorted.flatten()
        flattened_pooled_out = neighbors_for_pooling[ii, jj]

        pooled_out_pre_shape = T.join(
            0,
            self.input.shape[:-2],
            [self.input.shape[3]],
            [self.k_max]
        )
        self.output = flattened_pooled_out.reshape(
            pooled_out_pre_shape,
            ndim=self.input.ndim
        ).dimshuffle(0, 1, 3, 2)
        return self.output
예제 #10
0
def keep_max(input, theta, k):
    """
    :type input: theano.tensor.tensor4
    :param input: the input data
                
    :type theta: theano.tensor.matrix
    :param theta: the parameter for sigmoid function
                            
    :type k: int 
    :param k: the number k used to define top k sentence to remain
    """
    sig_input = T.nnet.sigmoid(T.dot(input, theta))
    if k == 0: # using all the sentences
        result = input * T.addbroadcast(sig_input, 3)
        return result, sig_input

    # get the sorted idx
    sort_idx = T.argsort(sig_input, axis=2)
    k_max_ids = sort_idx[:,:,-k:,:]
    dim0, dim1, dim2, dim3 = k_max_ids.shape
    batchids = T.repeat(T.arange(dim0), dim1*dim2*dim3)
    mapids = T.repeat(T.arange(dim1), dim2*dim3).reshape((1, dim2*dim3))
    mapids = T.repeat(mapids, dim0, axis=0).flatten()
    rowids = k_max_ids.flatten()
    colids = T.arange(dim3).reshape((1, dim3))
    colids = T.repeat(colids, dim0*dim1*dim2, axis=0).flatten()
    # construct masked data
    sig_mask = T.zeros_like(sig_input)
    choosed = sig_input[batchids, mapids, rowids, colids]
    sig_mask = T.set_subtensor(sig_mask[batchids, mapids, rowids, colids], 1)

    input_mask = sig_mask * sig_input
    result = input * T.addbroadcast(input_mask, 3)
    return result, sig_input
예제 #11
0
    def link(self, input):
        self.input = input.dimshuffle(0, 1, 3, 2)
        # get the indexes that give the max on every line and sort them
        ind = T.argsort(self.input, axis=3)
        sorted_ind = T.sort(ind[:, :, :, -self.k_max:], axis=3)
        dim0, dim1, dim2, dim3 = sorted_ind.shape

        # prepare indices for selection
        indices_dim0 = T.arange(dim0)\
                        .repeat(dim1 * dim2 * dim3)
        indices_dim1 = T.arange(dim1)\
                        .repeat(dim2 * dim3)\
                        .reshape((dim1 * dim2 * dim3, 1))\
                        .repeat(dim0, axis=1)\
                        .T\
                        .flatten()
        indices_dim2 = T.arange(dim2)\
                        .repeat(dim3)\
                        .reshape((dim2 * dim3, 1))\
                        .repeat(dim0 * dim1, axis=1)\
                        .T\
                        .flatten()

        # output
        self.output = self.input[
            indices_dim0,
            indices_dim1,
            indices_dim2,
            sorted_ind.flatten()
        ].reshape(sorted_ind.shape).dimshuffle(0, 1, 3, 2)
        return self.output
예제 #12
0
    def k_max_pool(self, x, k):
        """
        perform k-max pool on the input along the rows

        input: theano.tensor.tensor4
           
        k: theano.tensor.iscalar
            the k parameter

        Returns: 
        4D tensor
        """
        x = T.reshape(x, (x.shape[0], x.shape[1], 1, x.shape[2] * x.shape[3]))
        ind = T.argsort(x, axis=3)

        sorted_ind = T.sort(ind[:, :, :, -k:], axis=3)

        dim0, dim1, dim2, dim3 = sorted_ind.shape

        indices_dim0 = T.arange(dim0).repeat(dim1 * dim2 * dim3)
        indices_dim1 = (
            T.arange(dim1).repeat(dim2 * dim3).reshape((dim1 * dim2 * dim3, 1)).repeat(dim0, axis=1).T.flatten()
        )
        indices_dim2 = T.arange(dim2).repeat(dim3).reshape((dim2 * dim3, 1)).repeat(dim0 * dim1, axis=1).T.flatten()

        result = x[indices_dim0, indices_dim1, indices_dim2, sorted_ind.flatten()].reshape(sorted_ind.shape)
        shape = (result.shape[0], result.shape[1], result.shape[2] * result.shape[3], 1)

        result = T.reshape(result, shape)

        return result
예제 #13
0
 def _step(x, k, max_seq_len):
     tmp = x[
         T.arange(x.shape[0])[:, np.newaxis, np.newaxis],
         T.sort(T.argsort(x, axis=1)[:, -k:, :], axis=1),
         T.arange(x.shape[2])[np.newaxis, np.newaxis,:],
     ]
     return T.concatenate([tmp, T.zeros([x.shape[0], max_seq_len-k, x.shape[2]])], axis=1)
예제 #14
0
 def __call__(self,X):
     ind = T.argsort(X, axis = 3)
     sorted_ind = T.sort(ind[:,:,:, -self.poolsize:], axis = 3)
     dim0, dim1, dim2, dim3 = sorted_ind.shape
     indices_dim0 = T.arange(dim0).repeat(dim1 * dim2 * dim3)
     indices_dim1 = T.arange(dim1).repeat(dim2 * dim3).reshape((dim1*dim2*dim3, 1)).repeat(dim0, axis=1).T.flatten()
     indices_dim2 = T.arange(dim2).repeat(dim3).reshape((dim2*dim3, 1)).repeat(dim0 * dim1, axis = 1).T.flatten()
     return X[indices_dim0, indices_dim1, indices_dim2, sorted_ind.flatten()].reshape(sorted_ind.shape)
예제 #15
0
def argtop_k(x, k=1):
    # top-k accuracy
    top = T.argsort(x, axis=-1)
    # (Theano cannot index with [..., -top_k:], we need to simulate that)
    top = top[[slice(None) for _ in range(top.ndim - 1)] +
              [slice(-k, None)]]
    top = top[(slice(None),) * (top.ndim - 1) + (slice(None, None, -1),)]
    return top
 def _FindB_best(lPLcl, lPprev, dVLcl):
     srtLcl = tensor.argsort(-lPLcl)
     srtLcl = srtLcl[:beam_size]
     deltaVec = tensor.fill( lPLcl[srtLcl], numpy_floatX(-10000.))
     deltaVec = tensor.set_subtensor(deltaVec[0], lPprev)
     lProbBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), lPLcl[srtLcl] + lPprev, deltaVec)
     xWIdxBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), srtLcl, tensor.zeros_like(srtLcl)) 
     return lProbBest, xWIdxBest 
예제 #17
0
 def fix_k_max(self, k, masked_data):
     # @ref: https://github.com/fchollet/keras/issues/373
     result = masked_data[
         T.arange(masked_data.shape[0]).dimshuffle(0, "x", "x"),
         T.sort(T.argsort(masked_data, axis=1)[:, -k:, :], axis=1),
         T.arange(masked_data.shape[2]).dimshuffle("x", "x", 0)
     ]
     return result
예제 #18
0
def _k_max_pooling(input, kmax):
  pool = input.dimshuffle(0, 2, 1, 3).flatten(ndim=3).dimshuffle(1,0,2).flatten(ndim=2).dimshuffle(1,0)
  neighborsArgSorted = T.argsort(pool, axis=1)
  yy = T.sort(neighborsArgSorted[:, -kmax:], axis=1).flatten()
  xx = T.repeat(T.arange(neighborsArgSorted.shape[0]), kmax)
  pool_kmax = pool[xx, yy]
  pool_kmax_shape = T.join(0, T.as_tensor([input.shape[0], input.shape[1], input.shape[3], kmax]))
  pooled_out = pool_kmax.reshape(pool_kmax_shape, ndim=4).dimshuffle(0, 1, 3, 2)
  return pooled_out
예제 #19
0
파일: layer.py 프로젝트: srivignessh/NICE
  def inv_fprop(self, output_):
	shape = output_.shape[1]
	index = self.dim
        state_below = output_
        state_below = state_below[:, T.argsort(self.permutation)]
        coupling_out = -self.function(state_below[:, :index])
        state_below = T.inc_subtensor(state_below[:, index:],
                                      coupling_out)

        return state_below
예제 #20
0
def arg_sort():
    a, b, c = 2, 4, 4
    input = np.arange(a*b*c).reshape([a, b, c]).astype('float32')
    print input
    print
    x = T.tensor3()
    z = T.argsort(x, axis=2)[:, :, :2].astype('int64')
    z = x[z[0].flatten()]
    # z = x[T.arange(x.shape[0], dtype='int32'), T.arange(x.shape[1], dtype='int32'), z]
    f = theano.function(inputs=[x], outputs=z)
    print f(input)
예제 #21
0
    def kmaxPool(self, conv_out, pool_shape, k):
        '''
        Perform k-max Pooling.
        '''
        n0, n1, d, size = pool_shape
        imgs = images2neibs(conv_out, T.as_tensor_variable((1, size)))

        indices = T.argsort(T.mul(imgs, -1))
        k_max_indices = T.sort(indices[:, :k])
    
        S = T.arange(d*n1*n0).reshape((d*n1*n0, 1))
        return imgs[S, k_max_indices].reshape((n0, n1, d, k))
예제 #22
0
def l2C(curr_word, i, curr_senses, context_vector):
	# theano vector of size (num_senses,)
	scores_all_senses = T.dot(context_vector, W_s[curr_word].T)

	sorted_senses = T.argsort(scores_all_senses)
	score_best = scores_all_senses[sorted_senses[-1]]
	score_second_best = scores_all_senses[sorted_senses[-2]]

	prev_sense  = curr_senses[i]
	context_vector = T.switch(T.gt(score_best-score_second_best, epsilon),  change_context_vec(context_vector, sorted_senses[-1], prev_sense, curr_word), context_vector )
	new_senses = T.set_subtensor(curr_senses[i], sorted_senses[-1])
	return [new_senses, context_vector]
예제 #23
0
 def get_hard_examples(self, _, x, y, batch_size, transformed_x=identity):
     '''
     Returns the set of training cases (above avg reconstruction error)
     :param _:
     :param x:
     :param y:
     :param batch_size:
     :return:
     '''
     # sort the values by cost and get the top half of it (above average error)
     indexes = T.argsort(self.cost_vector)[(self.cost_vector.shape[0] // 2):]
     return self.make_func(x=x, y=y, batch_size=batch_size, output=[self._x[indexes], self._y[indexes]], update=None, transformed_x=transformed_x)
예제 #24
0
    def __init__(self, dnodex,inputdim,dim):
        X=T.ivector()
	Y=T.ivector()
	Z=T.lscalar()
	NP=T.ivector()
	lambd = T.scalar()
	eta = T.scalar()
        temperature=T.scalar()
        num_input = inputdim
	self.umatrix=theano.shared(floatX(np.random.rand(dnodex.nuser,inputdim, inputdim)))
        self.pmatrix=theano.shared(floatX(np.random.rand(dnodex.npoi,inputdim)))
        self.p_l2_norm=(self.pmatrix**2).sum()
        self.u_l2_norm=(self.umatrix**2).sum()
        num_hidden = dim
        num_output = inputdim
        inputs = InputPLayer(self.pmatrix[X,:], self.umatrix[Z,:,:], name="inputs")
        lstm1 = LSTMLayer(num_input, num_hidden, input_layer=inputs, name="lstm1")
        #lstm2 = LSTMLayer(num_hidden, num_hidden, input_layer=lstm1, name="lstm2")
        #lstm3 = LSTMLayer(num_hidden, num_hidden, input_layer=lstm2, name="lstm3")
        softmax = SoftmaxPLayer(num_hidden, num_output, self.umatrix[Z,:,:], input_layer=lstm1, name="yhat", temperature=temperature)

        Y_hat = softmax.output()

        self.layers = inputs, lstm1,softmax
        params = get_params(self.layers)
        #caches = make_caches(params)

        tmp_u=T.mean(T.dot(self.pmatrix[X,:],self.umatrix[Z,:,:]),axis=0)
        tr=T.dot(tmp_u,(self.pmatrix[X,:]-self.pmatrix[NP,:]).transpose())
        pfp_loss1=sigmoid(tr)
        pfp_loss=pfp_loss1*(T.ones_like(pfp_loss1)-pfp_loss1)
        tmp_u1=T.reshape(T.repeat(tmp_u,X.shape[0]),(inputdim,X.shape[0])).T
        pfp_lossv=T.reshape(T.repeat(pfp_loss,inputdim),(inputdim,X.shape[0])).T
	cost = lambd*10*T.mean(T.nnet.categorical_crossentropy(Y_hat, T.dot(self.pmatrix[Y,:],self.umatrix[Z,:,:])))+lambd*self.p_l2_norm+lambd*self.u_l2_norm
    #    updates = PerSGD(cost,params,eta,X,Z,dnodex)#momentum(cost, params, caches, eta)
        updates = []
        grads = T.grad(cost=cost, wrt=params)
        updates.append([self.pmatrix,T.set_subtensor(self.pmatrix[X,:],self.pmatrix[X,:]-eta*grads[0])])
        updates.append([self.umatrix,T.set_subtensor(self.umatrix[Z,:,:],self.umatrix[Z,:,:]-eta*grads[1])])
        for p,g in zip(params[2:], grads[2:]):
            updates.append([p, p - eta * g])

        rlist=T.argsort(T.dot(tmp_u,self.pmatrix.T))[::-1]
        n_updates=[(self.pmatrix, T.set_subtensor(self.pmatrix[NP,:],self.pmatrix[NP,:]-eta*pfp_lossv*tmp_u1-eta*lambd*self.pmatrix[NP,:]))]
	p_updates=[(self.pmatrix, T.set_subtensor(self.pmatrix[X,:],self.pmatrix[X,:]+eta*pfp_lossv*tmp_u1-eta*lambd*self.pmatrix[X,:])),(self.umatrix, T.set_subtensor(self.umatrix[Z,:,:],self.umatrix[Z,:,:]+eta*T.mean(pfp_loss)*(T.reshape(tmp_u,(tmp_u.shape[0],1))*T.mean(self.pmatrix[X,:]-self.pmatrix[NP,:],axis=0)))-eta*lambd*self.umatrix[Z,:,:])]
        
        self.train = theano.function([X,Y,Z, eta, lambd, temperature], cost, updates=updates, allow_input_downcast=True)
        self.trainpos=theano.function([X,NP,Z,eta, lambd],tmp_u, updates=p_updates,allow_input_downcast=True)
        self.trainneg=theano.function([X,NP,Z,eta, lambd],T.mean(pfp_loss), updates=n_updates,allow_input_downcast=True)
        
        
        self.predict_pfp = theano.function([X,Z], rlist, allow_input_downcast=True)
예제 #25
0
    def __init__(self, conv_out, k=1):
        """
        Allocate a LeNetConvPoolLayer with shared variable internal parameters.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height,filter width)

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type poolsize: tuple or list of length 2
        :param poolsize: the downsampling (pooling) factor (#rows,#cols)
        """
        
        #images2neibs produces a 2D matrix
        neighborsForPooling = TSN.images2neibs(ten4=conv_out, neib_shape=(conv_out.shape[2], 1), mode='ignore_borders')

        #k = poolsize[1]

        neighborsArgSorted = T.argsort(neighborsForPooling, axis=1)
        kNeighborsArg = neighborsArgSorted[:,-k:]
        kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1)

        ii = T.repeat(T.arange(neighborsForPooling.shape[0]), k)
        jj = kNeighborsArgSorted.flatten()
        pooledkmaxTmp = neighborsForPooling[ii, jj]

        # reshape pooledkmaxTmp
        new_shape = T.cast(T.join(0, conv_out.shape[:-2],
                           T.as_tensor([conv_out.shape[3]]),
                           T.as_tensor([k])),
                           'int32')
        pooled_out = T.reshape(pooledkmaxTmp, new_shape, ndim=4)
        
        # downsample each feature map individually, using maxpooling
        '''
        pooled_out = downsample.max_pool_2d(input=conv_out,
                                            ds=poolsize, ignore_border=True)
        '''
        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        self.output = T.tanh(pooled_out)
예제 #26
0
 def errors_top_x(self, y, num_top=5):
     if y.ndim != self.y_pred.ndim:
         raise TypeError('y should have the same shape as self.y_pred',
                         ('y', y.type, 'y_pred', self.y_pred.type))
     # check if y is of the correct datatype
     if y.dtype.startswith('int'):
         # the T.neq operator returns a vector of 0s and 1s, where 1
         # represents a mistake in prediction
         y_pred_top_x = T.argsort(self.p_y_given_x, axis=1)[:, -num_top:]
         y_top_x = y.reshape((y.shape[0], 1)).repeat(num_top, axis=1)
         return T.mean(T.min(T.neq(y_pred_top_x, y_top_x), axis=1))
     else:
         raise NotImplementedError()
예제 #27
0
    def kmaxpooling(self,input,k):

        sorted_values = T.argsort(input,axis=3)
        topmax_indexes = sorted_values[:,:,:,-k:]
        # sort indexes so that we keep the correct order within the sentence
        topmax_indexes_sorted = T.sort(topmax_indexes)

        #given that topmax only gives the index of the third dimension, we need to generate the other 3 dimensions
        dim0 = T.arange(0,self.input_shape[0]).repeat(self.input_shape[1]*self.input_shape[2]*k)
        dim1 = T.arange(0,self.input_shape[1]).repeat(k*self.input_shape[2]).reshape((1,-1)).repeat(self.input_shape[0],axis=0).flatten()
        dim2 = T.arange(0,self.input_shape[2]).repeat(k).reshape((1,-1)).repeat(self.input_shape[0]*self.input_shape[1],axis=0).flatten()
        dim3 = topmax_indexes_sorted.flatten()
        return input[dim0,dim1,dim2,dim3].reshape((self.input_shape[0], self.input_shape[1], self.input_shape[2], k))
예제 #28
0
 def errors_top_x(self, p_y_given_x, y, num_top=5):                       
                                 
     if num_top != 5: print 'val errors from top %d' % num_top        
     
     # check if y is of the correct datatype
     if y.dtype.startswith('int'):
         # the T.neq operator returns a vector of 0s and 1s, where 1
         # represents a mistake in prediction
         y_pred_top_x = T.argsort(p_y_given_x, axis=1)[:, -num_top:]
         y_top_x = y.reshape((y.shape[0], 1)).repeat(num_top, axis=1)
         return T.mean(T.min(T.neq(y_pred_top_x, y_top_x), axis=1))
     else:
         raise NotImplementedError()             
예제 #29
0
파일: PCA.py 프로젝트: fabi92/deeplearning
    def process(self):
        data = self.data - self.data.mean(axis=0)
        cov = T.dot(data.T, data.conj())/ (data.shape[0]-1)
        evals, evecs = T.nlinalg.eig(cov)
        
        if self.components is None and \
                self.threshold is not None:
            self.components = T.gt(evals, self.threshold).sum()

        key = T.argsort(evals)[::-1][:self.components]

        self.evals, self.evecs = evals[key], evecs[:, key]
        self.pca = T.dot(self.evecs.T, data.T).T
        return self.pca
   def _stepP(x_, h_, c_, lP_, dV_, xAux):
       preact = tensor.dot(h_, tparams[_p(prefix, 'W_hid')])
       preact += (tensor.dot(x_, tparams[_p(prefix, 'W_inp')]) +
                  tparams[_p(prefix, 'b')])
       if options.get('en_aux_inp',0):
           preact += tensor.dot(xAux,tparams[_p(prefix,'W_aux')])
 
       i = tensor.nnet.sigmoid(sliceT(preact, 0, options['hidden_size']))
       f = tensor.nnet.sigmoid(sliceT(preact, 1, options['hidden_size']))
       o = tensor.nnet.sigmoid(sliceT(preact, 2, options['hidden_size']))
       c = tensor.tanh(sliceT(preact, 3, options['hidden_size']))
 
       c = f * c_ + i * c
 
       h = o * tensor.tanh(c)
 
       p = tensor.dot(h,tparams['Wd']) + tparams['bd']
       p = tensor.nnet.softmax(p)
       lProb = tensor.log(p + 1e-20)
 
       def _FindB_best(lPLcl, lPprev, dVLcl):
           srtLcl = tensor.argsort(-lPLcl)
           srtLcl = srtLcl[:beam_size]
           deltaVec = tensor.fill( lPLcl[srtLcl], numpy_floatX(-10000.))
           deltaVec = tensor.set_subtensor(deltaVec[0], lPprev)
           lProbBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), lPLcl[srtLcl] + lPprev, deltaVec)
           xWIdxBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), srtLcl, tensor.zeros_like(srtLcl)) 
           return lProbBest, xWIdxBest 
 
       rvalLcl, updatesLcl = theano.scan(_FindB_best, sequences = [lProb, lP_, dV_], name=_p(prefix, 'FindBest'), n_steps=x_.shape[0])
       xWIdxBest = rvalLcl[1]
       lProbBest = rvalLcl[0]
 
       xWIdxBest = xWIdxBest.flatten()
       lProb = lProbBest.flatten()
 
       # Now sort and find the best among these best extensions for the current beams
       srtIdx = tensor.argsort(-lProb)
       srtIdx = srtIdx[:beam_size]
       xWlogProb = lProb[srtIdx]
 
       xWIdx = xWIdxBest[srtIdx]
       xCandIdx = srtIdx // beam_size # Floor division 
 
       xW = tparams['Wemb'][xWIdx.flatten()]
       doneVec = tensor.eq(xWIdx,tensor.zeros_like(xWIdx))
       h = h.take(xCandIdx.flatten(),axis=0);
       c = c.take(xCandIdx.flatten(),axis=0)
 
       return [xW, h, c, xWlogProb, doneVec, xWIdx, xCandIdx], theano.scan_module.until(doneVec.all())
예제 #31
0
def test_big_and_little_train_both(rng,
                                   batch_size=1,
                                   learning_rate=0.01,
                                   n_epochs=1000,
                                   L1_reg=0.0,
                                   L2_reg=0.0001):
    l_learning_rate = learning_rate
    b_learning_rate = 10 * learning_rate

    index = T.lscalar('index')
    l_x = T.matrix('l_x', dtype=config.floatX)
    b_x = T.tensor3('b_x', dtype=config.floatX)
    y = T.ivector('y')

    print "Loading Data"
    dataset = 'mnist.pkl.gz'
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    print "Building models"
    print "... Building layers"
    # Create network structure
    x_size = train_set_x.shape[1].eval()
    n_in = x_size
    n_units_per = 32
    n_out = 500
    l_layers = []
    b_layers = []

    l_layers.append(
        HiddenLayer(
            n_in,
            n_out,
            batch_size,
            #k=0.05,
            k=1,
            activation=T.tanh,
            name='l_layer_' + str(len(l_layers))))

    in_idxs_0 = shared(np.zeros((batch_size, 1), dtype='int64'),
                       name='in_idxs_0')
    b_layers.append(
        HiddenBlockLayer((1, x_size), (n_out, n_units_per),
                         in_idxs_0,
                         l_layers[-1].top_active,
                         batch_size,
                         activation=T.tanh,
                         name='b_layer_' + str(len(b_layers))))

    #n_in = n_out
    #n_out = 100
    #k_activations = 0.12
    #l_layers.append(
    #    HiddenLayer(
    #        n_in,
    #        n_out,
    #        k=k_activations,
    #        name='l_layer_' + str(len(l_layers))
    #    )
    #)
    #b_layers.append(HiddenBlockLayer(n_in, n_out, batch_size))

    n_in = n_out
    n_out = 10
    l_layers.append(
        HiddenLayer(n_in,
                    n_out,
                    batch_size,
                    k=1,
                    activation=T.nnet.softmax,
                    name='l_layer_' + str(len(l_layers))))
    l_layers[-1].W.set_value(0 * l_layers[-1].W.get_value())

    # T.nnet.softmax takes a matrix not a tensor so just calculate the linear
    # component in the layer and apply the softmax later
    #out_idxs_n = shared(
    #    np.repeat(
    #        np.arange(n_out, dtype='int64').reshape(1, n_out),
    #        batch_size,
    #        axis=0
    #    ),
    #    name='out_idxs_' + str(len(l_layers))
    #)
    b_layers.append(
        HiddenBlockLayer(
            (n_in, n_units_per),
            (n_out, n_units_per),
            l_layers[-2].top_active,
            l_layers[-1].top_active,
            #out_idxs_n,
            batch_size,
            None,
            name='b_layer_' + str(len(b_layers))))
    #b_layers[-1].W.set_value(0*b_layers[-1].W.get_value())

    print "... Building top active updates"
    top_active = []
    l_activation = l_x
    b_activation = b_x
    b_activations = [b_activation]
    for i in range(len(l_layers)):
        l_activation = l_layers[i].output(l_activation)
        b_activation = b_layers[i].output(b_activation)
        b_activations.append(b_activation)
        top_active.append((l_layers[i].top_active,
                           T.argsort(T.abs_(l_activation))[:, :l_layers[i].k]))

    print "... Building costs and errors"
    l_cost = add_regularization(l_layers, l_layers[-1].cost(l_activation, y),
                                L1_reg, L2_reg)
    l_error = l_layers[-1].error(l_activation, y)

    # T.nnet.softmax takes a matrix not a tensor so we only calculate the
    # linear component at the last layer and here we reshape and then
    # apply the softmax
    #b_activation = T.nnet.softmax(((b_activation*b_activation)**2).sum(axis=2))
    #b_activation = relu_softmax(((b_activation*b_activation)**2).sum(axis=2))
    b_activation = T.nnet.softmax(T.mean(b_activation, axis=2))
    #b_activation = relu_softmax(T.mean(b_activation, axis=2))
    #b_activation = T.nnet.softmax(T.max(b_activation, axis=2))
    #b_activation = relu_softmax(T.max(b_activation, axis=2))
    b_activations.append(b_activation)
    b_cost = add_regularization(b_layers, b_layers[-1].cost(b_activation, y),
                                L1_reg, L2_reg)
    b_error = b_layers[-1].error(b_activation, y)

    print "... Building parameter updates"
    l_grads = []
    l_param_updates = []
    b_grads = []
    b_param_updates = []
    for i in range(len(l_layers)):
        for param in l_layers[i].params:
            gparam = T.grad(l_cost, param)
            l_grads.append(gparam)
            l_param_updates.append((param, param - l_learning_rate * gparam))

        for param in b_layers[i].params:
            gparam = T.grad(
                b_cost,
                param,
                consider_constant=[b_layers[i].in_idxs, b_layers[i].out_idxs])
            b_grads.append(gparam)
            b_param_updates.append((param, param - b_learning_rate * gparam))

    print "... Compiling little net train function"
    l_train_model = function(
        [index], [l_cost, l_x, y],
        updates=top_active + l_param_updates,
        givens={
            l_x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    print "... Compiling big net train function"
    temp = train_set_x.get_value(borrow=True, return_internal_type=True)
    train_set_x_b = shared(temp.reshape((temp.shape[0], 1, temp.shape[1])),
                           borrow=True,
                           name='train_set_x_b')
    b_train_model = function(
        [index], [b_cost],
        updates=b_param_updates,
        givens={
            b_x: train_set_x_b[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    #theano.printing.debugprint(b_train_model)
    #ipdb.set_trace()

    #    verify_layers(batch_size, b_layers, train_set_x_b, train_set_y)
    #    temp = verify_cost(
    #        b_cost,
    #        b_layers,
    #        b_x,
    #        y,
    #        batch_size,
    #        train_set_x_b,
    #        train_set_y
    #    )
    #    T.verify_grad(
    #        temp,
    #        [b_layers[0].W.get_value(), b_layers[1].W.get_value()],
    #        rng=rng
    #    )

    print "... Compiling little net test function"
    l_test_model = function(
        [index],
        l_error,
        givens={
            l_x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    print "... Compiling big net test function"
    temp = test_set_x.get_value(borrow=True, return_internal_type=True)
    test_set_x_b = shared(temp.reshape((temp.shape[0], 1, temp.shape[1])),
                          borrow=True,
                          name='test_set_x_b')
    b_test_model = function(
        [index],
        b_error,
        givens={
            b_x: test_set_x_b[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    print "... Compiling little net validate function"
    l_validate_model = function(
        [index],
        l_error,
        givens={
            l_x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    print "... Compiling big net validate function"
    temp = valid_set_x.get_value(borrow=True, return_internal_type=True)
    valid_set_x_b = shared(temp.reshape((temp.shape[0], 1, temp.shape[1])),
                           borrow=True,
                           name='valid_set_x_b')
    b_validate_model = function(
        [index],
        b_error,
        givens={
            b_x: valid_set_x_b[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    print "Training"

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 100  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    accum = 0
    accum_b = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = l_train_model(minibatch_index)
            minibatch_avg_cost_b = b_train_model(minibatch_index,
                                                 learning_rate.rate)

            #print "minibatch_avg_cost: " + str(minibatch_avg_cost) + " minibatch_avg_cost_b: " + str(minibatch_avg_cost_b)
            #print l_layers[0].W.get_value().sum(), l_layers[1].W.get_value().sum(), b_layers[0].W.get_value().sum(), b_layers[1].W.get_value().sum()
            #print "A: ", np.max(np.abs(b_layers[0].W.get_value())), np.max(np.abs(b_layers[0].b.get_value())), np.max(np.abs(b_layers[1].W.get_value())), np.max(np.abs(b_layers[1].b.get_value()))
            #print "B: ", np.abs(b_layers[0].W.get_value()).sum(), np.abs(b_layers[0].b.get_value()).sum(), np.abs(b_layers[1].W.get_value()).sum(), np.abs(b_layers[1].b.get_value()).sum()
            #print "C: ", np.abs(np.array(minibatch_avg_cost_b[1])).sum(), np.abs(np.array(minibatch_avg_cost_b[2])).sum(), np.abs(np.array(minibatch_avg_cost_b[3])).sum(), np.abs(np.array(minibatch_avg_cost_b[4])).sum()
            minibatch_avg_cost = minibatch_avg_cost[0]
            minibatch_avg_cost_b = minibatch_avg_cost_b[0]
            accum = accum + minibatch_avg_cost
            accum_b = accum_b + minibatch_avg_cost_b

            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                accum = accum / validation_frequency
                accum_b = accum_b / validation_frequency
                print "minibatch_avg_cost: ", accum, \
                    "minibatch_avg_cost_b: ", accum_b
                accum = 0
                accum_b = 0

                # compute zero-one loss on validation set
                validation_losses = [
                    l_validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)

                validation_losses_b = [
                    b_validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss_b = np.mean(validation_losses_b)
                #this_validation_loss_b = 0

                print(
                    'epoch %i, minibatch %i/%i, validation error %f %% '
                    '(%f %%)' % (epoch, minibatch_index + 1, n_train_batches,
                                 this_validation_loss * 100.,
                                 this_validation_loss_b * 100.))
                #ipdb.set_trace()

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        l_test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = np.mean(test_losses)

                    test_losses_b = [
                        b_test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score_b = np.mean(test_losses_b)
                    #test_score_b = 0

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %% (%f %%)') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100., test_score_b * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
예제 #32
0
  def build_model(self, tparams, options, xI=None, prior_inp_list = []):
    trng = RandomStreams()
    rng = np.random.RandomState()

    # Used for dropout.
    use_noise = theano.shared(numpy_floatX(0.))

    xWi = T.matrix('xW', dtype='int64')
    # Now input is transposed compared to the generator!!
    xW = xWi.T
    n_samples = xW.shape[0]
    n_words= xW.shape[1]

    Words = T.concatenate([tparams['Wemb'], T.alloc(numpy_floatX(0.),1,self.word_encoding_size)],axis=0)
    embW = Words[xW.flatten()].reshape([options['batch_size'], 1, n_words, self.word_encoding_size])

    if options.get('use_dropout',0):
        embW = dropout_layer(embW, use_noise, trng, options['drop_prob_encoder'], shp = embW.shape)

    sent_emb, cnn_out , tparams = self.sent_conv_layer(tparams, options, embW, options['batch_size'], use_noise, trng)

    if xI == None:
        xI = T.matrix('xI', dtype=config.floatX)
        xI_is_inp = True
    else:
        xI_is_inp = False


    if options.get('mode','batchtrain') != 'batchtrain':
        posSamp = T.ivector('posSamp')

    if xI_is_inp:
        embImg = T.dot(xI, tparams['WIemb']) + tparams['b_Img']
    else:
        embImg = xI + tparams['b_Img']

    if options.get('use_dropout',0):
        embImg = dropout_layer(embImg, use_noise, trng, options['drop_prob_encoder'], shp = embImg.shape)


    #-------------------------------------------------------------------------------------------------------------#
    # Curr prob is computed by applying softmax over (I0,c0), (I0,c1),... (I0,cn-1) pairs
    # It could also be computed with (I0,c0), (I1,c0),... (In,c0) pairs, but will lead to different discrimination
    # Maybe even sum of the two could be used
    #-------------------------------------------------------------------------------------------------------------#
    probMatchImg, sim_score = multimodal_cosine_sim_softmax(embImg, sent_emb, tparams, options.get('sim_smooth_factor',1.0))
    inp_list = [xWi]
    if xI_is_inp:
        inp_list.append(xI)

    if options.get('en_aux_inp',0):
        xAux = T.matrix('xAux', dtype=config.floatX)
        embAux = T.dot(xAux, tparams['WIemb_aux']) + tparams['b_Img_aux']
        xAuxEmb = dropout_layer(embAux, use_noise, trng, options['drop_prob_aux'], shp = embAux.shape)
        inp_list.append(xAux)
        probMatchAux, sim_scoreAux = multimodal_cosine_sim_softmax(embAux, sent_emb, tparams, options.get('sim_smooth_factor',1.0))
    else:
        probMatchAux = T.alloc(numpy_floatX(0.),1,1)

    probMatch = (probMatchImg + probMatchAux) / 2.

    sortedProb = T.argsort(probMatch,axis=1)

    batch_idces = T.arange(probMatch.shape[0])
    opponents = T.switch(T.eq(sortedProb[:,-1], batch_idces), sortedProb[:,-2], sortedProb[:,-1])

    violator_mask = (probMatch.diagonal() - probMatch[batch_idces,opponents]) < (options.get('cost_margin',0.02))

    n_violators = violator_mask.sum()

    if options.get('mode','batchtrain') == 'batchtrain':
        cost = [-((T.log(probMatch.diagonal())* (1+2.0*violator_mask)).sum())/probMatch.shape[0]]
    else:
        cost = [-(T.log(probMatch[0,posSamp]).sum())/posSamp.shape[0]]

    cost.append(n_violators)
    cost.append((probMatch.diagonal() - probMatch[batch_idces,opponents]))

    f_pred_sim_prob = theano.function(prior_inp_list + inp_list, [probMatchImg, probMatchAux, probMatch, opponents], name='f_pred_sim_prob')
    f_pred_sim_scr = theano.function(prior_inp_list + inp_list[:2], sim_score, name='f_pred_sim_scr')
    f_sent_emb = theano.function(inp_list[:1], cnn_out, name='f_sent_emb')

    if options.get('mode','batchtrain') != 'batchtrain':
        inp_list.append(posSamp)

    return use_noise, inp_list, [f_pred_sim_prob, f_pred_sim_scr, f_sent_emb], cost, sim_score, tparams
        def _stepP(*in_list):
            x_inp = []
            h_inp = []
            c_inp = []
            for i in xrange(nmodels):
                x_inp.append(in_list[i])
                h_inp.append(in_list[nmodels + i])
                c_inp.append(in_list[2 * nmodels + i])
            lP_ = in_list[3 * nmodels]
            dV_ = in_list[3 * nmodels + 1]

            p_comb = tensor.alloc(numpy_floatX(0.), options[0]['output_size'])
            cf = []
            h = []
            xW = []
            for i in xrange(nmodels):
                preact = tensor.dot(h_inp[i], tparams[i][_p(prefix, 'W_hid')])
                preact += (
                    tensor.dot(x_inp[i], tparams[i][_p(prefix, 'W_inp')]) +
                    tparams[i][_p(prefix, 'b')])
                if options[i].get('en_aux_inp', 0):
                    preact += tensor.dot(aux_input2[i],
                                         tparams[i][_p(prefix, 'W_aux')])

                inp = tensor.nnet.sigmoid(
                    sliceT(preact, 0, options[i]['hidden_size']))
                f = tensor.nnet.sigmoid(
                    sliceT(preact, 1, options[i]['hidden_size']))
                o = tensor.nnet.sigmoid(
                    sliceT(preact, 2, options[i]['hidden_size']))
                c = tensor.tanh(sliceT(preact, 3, options[i]['hidden_size']))

                cf.append(f * c_inp[i] + inp * c)

                h.append(o * tensor.tanh(cf[i]))
                p = tensor.dot(h[i], tparams[i]['Wd']) + tparams[i]['bd']
                if i == 0:
                    p_comb = tparams[i]['comb_weight'] * tensor.nnet.softmax(p)
                else:
                    p_comb += tparams[i]['comb_weight'] * tensor.nnet.softmax(
                        p)

            lProb = tensor.log(p_comb + 1e-20)

            def _FindB_best(lPLcl, lPprev, dVLcl):
                srtLcl = tensor.argsort(-lPLcl)
                srtLcl = srtLcl[:beam_size]
                deltaVec = tensor.fill(lPLcl[srtLcl], numpy_floatX(-10000.))
                deltaVec = tensor.set_subtensor(deltaVec[0], lPprev)
                lProbBest = ifelse(tensor.eq(dVLcl, tensor.zeros_like(dVLcl)),
                                   lPLcl[srtLcl] + lPprev, deltaVec)
                xWIdxBest = ifelse(tensor.eq(dVLcl, tensor.zeros_like(dVLcl)),
                                   srtLcl, tensor.zeros_like(srtLcl))
                return lProbBest, xWIdxBest

            rvalLcl, updatesLcl = theano.scan(_FindB_best,
                                              sequences=[lProb, lP_, dV_],
                                              name=_p(prefix, 'FindBest'),
                                              n_steps=x_inp[0].shape[0])
            xWIdxBest = rvalLcl[1]
            lProbBest = rvalLcl[0]

            xWIdxBest = xWIdxBest.flatten()
            lProb = lProbBest.flatten()

            # Now sort and find the best among these best extensions for the current beams
            srtIdx = tensor.argsort(-lProb)
            srtIdx = srtIdx[:beam_size]
            xWlogProb = lProb[srtIdx]

            xWIdx = xWIdxBest[srtIdx]
            xCandIdx = srtIdx // beam_size  # Floor division

            doneVec = tensor.eq(xWIdx, tensor.zeros_like(xWIdx))

            x_out = []
            h_out = []
            c_out = []
            for i in xrange(nmodels):
                x_out.append(tparams[i]['Wemb'][xWIdx.flatten()])
                h_out.append(h[i].take(xCandIdx.flatten(), axis=0))
                c_out.append(cf[i].take(xCandIdx.flatten(), axis=0))

            out_list = []
            out_list.extend(x_out)
            out_list.extend(h_out)
            out_list.extend(c_out)
            out_list.extend([xWlogProb, doneVec, xWIdx, xCandIdx])

            return out_list, theano.scan_module.until(doneVec.all())
예제 #34
0
def test_big_and_little_train_big(rng,
                                  batch_size,
                                  learning_rate,
                                  momentum_rate,
                                  n_epochs=1000,
                                  L1_reg=0.0,
                                  L2_reg=0.0001,
                                  restore_parameters=False,
                                  select_top_active=False,
                                  mult_small_net_params=False,
                                  zero_last_layer_params=False,
                                  train_little_net=False,
                                  train_big_net=True):
    def summarize_rates():
        print "Learning rate: ", learning_rate.rate, \
            "Momentum: ", momentum.get_value()

    assert (train_big_net or train_little_net)

    l_learning_rate = shared(np.array(learning_rate.rate, dtype=config.floatX),
                             name='learning_rate')
    b_learning_rate = shared(np.array(learning_rate.rate, dtype=config.floatX),
                             name='learning_rate')
    momentum = shared(np.array(momentum_rate.rate, dtype=config.floatX),
                      name='momentum')

    index = T.lscalar('index')
    l_x = T.matrix('l_x', dtype=config.floatX)
    b_x = T.tensor3('b_x', dtype=config.floatX)
    y = T.ivector('y')

    print "Loading Data"
    print "... MNIST"
    dataset = 'mnist.pkl.gz'
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    print "Building models"
    print "... Building layers"
    # Create network structure
    x_size = train_set_x.shape[1].eval()
    y_size = train_set_y.shape[0].eval()
    n_in = x_size
    n_units_per = 1
    n_out = 5000
    l_layers = []
    b_layers = []
    l_params = None

    # Shared variable used for always activating one block in a layer as in the
    # input and output layer
    one_block_idxs = shared(np.zeros((batch_size, 1), dtype='int64'),
                            name='one_block_idxs')

    l_layers.append(
        HiddenLayer(n_in,
                    n_out,
                    batch_size,
                    k=0.1,
                    activation=T.tanh,
                    name='l_layer_' + str(len(l_layers))))

    if mult_small_net_params:
        l_params = l_layers[-1].params

    b_layers.append(
        HiddenBlockLayer((1, x_size), (n_out, n_units_per),
                         one_block_idxs,
                         l_layers[-1].top_active,
                         batch_size,
                         activation=T.tanh,
                         name='b_layer_' + str(len(b_layers)),
                         l_params=l_params,
                         l_param_map=[('x', 1, 0, 'x'), (0, 'x')]))

    n_in = n_out
    l_layers.append(
        HiddenLayer(n_in,
                    n_out,
                    batch_size,
                    k=0.1,
                    activation=T.tanh,
                    name='l_layer_' + str(len(l_layers))))

    if mult_small_net_params:
        l_params = l_layers[-1].params

    b_layers.append(
        HiddenBlockLayer(
            (n_in, n_units_per),
            (n_out, n_units_per),
            l_layers[-2].top_active,
            l_layers[-1].top_active,
            #out_idxs_n,
            batch_size,
            activation=T.tanh,
            name='b_layer_' + str(len(b_layers)),
            l_params=l_params,
            l_param_map=[(0, 1, 'x', 'x'), (0, 'x')]))

    n_out = 10
    l_layers.append(
        HiddenLayer(n_in,
                    n_out,
                    batch_size,
                    k=1,
                    activation=T.nnet.softmax,
                    name='l_layer_' + str(len(l_layers))))
    if zero_last_layer_params:
        l_layers[-1].W.set_value(0 * l_layers[-1].W.get_value())
        l_layers[-1].b.set_value(0 * l_layers[-1].b.get_value())

    if mult_small_net_params:
        l_params = l_layers[-1].params

    b_layers.append(
        HiddenBlockLayer((n_in, n_units_per), (1, n_out),
                         l_layers[-2].top_active,
                         one_block_idxs,
                         batch_size,
                         None,
                         name='b_layer_' + str(len(b_layers)),
                         l_params=l_params,
                         l_param_map=[(0, 'x', 'x', 1), ('x', 0)]))
    if zero_last_layer_params:
        b_layers[-1].W.set_value(0 * b_layers[-1].W.get_value())
        b_layers[-1].b.set_value(0 * b_layers[-1].b.get_value())

    if train_little_net or select_top_active:
        for layer in l_layers:
            print "\t%s" % layer

    if train_big_net:
        for layer in b_layers:
            print layer

    if restore_parameters:
        print "... Restoring weights of little model"
        restore_parameters('parameters_20_20_l1_0.0001_l2_0.0001.pkl',
                           l_layers)

    #for l_layer in l_layers:
    #    for param in l_layer.params:
    #        param.set_value(np.ones_like(param.get_value()))

    print "... Building top active updates"
    top_active = []
    l_activation = l_x
    b_activation = b_x
    b_activations = [b_activation]
    for i in range(len(l_layers)):
        l_activation = l_layers[i].output(l_activation)
        b_activation = b_layers[i].output(b_activation)
        b_activations.append(b_activation)
        top_active.append((l_layers[i].top_active,
                           T.argsort(T.abs_(l_activation))[:, :l_layers[i].k]))

    print "... Building costs and errors"
    l_cost = add_regularization(l_layers, l_layers[-1].cost(l_activation, y),
                                L1_reg, L2_reg)
    l_error = l_layers[-1].error(l_activation, y)

    # T.nnet.softmax takes a matrix not a tensor so we only calculate the
    # linear component at the last layer and here we reshape and then
    # apply the softmax
    #b_activation = T.nnet.softmax(((b_activation*b_activation)**2).sum(axis=2))
    #b_activation = relu_softmax(((b_activation*b_activation)**2).sum(axis=2))
    #b_activation = T.nnet.softmax(T.mean(b_activation, axis=2))
    #b_activation = relu_softmax(T.mean(b_activation, axis=2))
    #b_activation = T.nnet.softmax(T.max(b_activation, axis=2))
    #b_activation = relu_softmax(T.max(b_activation, axis=2))
    b_shp = b_activation.shape
    #b_activation = relu_softmax(b_activation.reshape((b_shp[0], b_shp[2])))
    b_activation = T.nnet.softmax(b_activation.reshape((b_shp[0], b_shp[2])))
    b_activations.append(b_activation)
    b_cost = add_regularization(b_layers, b_layers[-1].cost(b_activation, y),
                                L1_reg, L2_reg)
    b_error = b_layers[-1].error(b_activation, y)

    print "... Building parameter updates"
    l_grads = []
    l_param_updates = []
    b_grads = []
    b_param_updates = []
    for i in range(len(l_layers)):
        for param in l_layers[i].params:
            gparam = T.grad(l_cost, param)
            l_grads.append(gparam)
            l_param_updates.append((param, param - l_learning_rate * gparam))

        for param in b_layers[i].params:
            b_gparam = T.grad(
                b_cost,
                param,
                #consider_constant=[b_layers[i].in_idxs, b_layers[i].out_idxs]
            )
            b_velocity = shared(
                np.zeros_like(param.get_value(), dtype=theano.config.floatX),
                param.name + '_velocity')
            b_param_updates.append(
                (b_velocity,
                 momentum * b_velocity - b_learning_rate * b_gparam))
            b_grads.append(b_gparam)
            b_param_updates.append((param, param + b_velocity))

        #if b_layers[i].l_params is not None:
        #for param in b_layers[i].l_params:
        #l_gparam = T.grad(
        #    b_cost,
        #    param
        #)
        #l_velocity = shared(
        #    np.zeros_like(param.get_value()),
        #    param.name + '_velocity'
        #)
        #b_param_updates.append((
        #    l_velocity, momentum*l_velocity - b_learning_rate*l_gparam
        #))
        #l_grads.append(l_gparam)
        #b_param_updates.append((param, param + l_velocity))
        #b_param_updates.append((
        #    param, param - 0.0001*l_gparam
        #))

    print "... Compiling little net train function"
    l_updates = []
    if select_top_active:
        l_updates = l_updates + top_active

    if train_little_net:
        l_updates = l_updates + l_param_updates

    l_train_model = function(
        [index], [l_cost, l_x, y],
        updates=l_updates,
        givens={
            l_x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    print "... Compiling big net train function"
    temp = train_set_x.get_value(borrow=True, return_internal_type=True)
    train_set_x_b = shared(temp.reshape((temp.shape[0], 1, temp.shape[1])),
                           borrow=True,
                           name='train_set_x_b')

    b_updates = []
    if train_big_net:
        b_updates = b_updates + b_param_updates

    b_train_model = function(
        [index], [b_cost],
        updates=b_updates,
        givens={
            b_x: train_set_x_b[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    #theano.printing.debugprint(b_train_model)
    #ipdb.set_trace()

    #    verify_layers(batch_size, b_layers, train_set_x_b, train_set_y)
    #    temp = verify_cost(
    #        b_cost,
    #        b_layers,
    #        b_x,
    #        y,
    #        batch_size,
    #        train_set_x_b,
    #        train_set_y
    #    )
    #    T.verify_grad(
    #        temp,
    #        [b_layers[0].W.get_value(), b_layers[1].W.get_value()],
    #        rng=rng
    #    )

    print "... Compiling little net test function"
    l_test_model = function(
        [index],
        l_error,
        givens={
            l_x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    print "... Compiling big net test function"
    temp = test_set_x.get_value(borrow=True, return_internal_type=True)
    test_set_x_b = shared(temp.reshape((temp.shape[0], 1, temp.shape[1])),
                          borrow=True,
                          name='test_set_x_b')
    b_test_model = function(
        [index],
        b_error,
        givens={
            b_x: test_set_x_b[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    print "... Compiling little net validate function"
    l_validate_model = function(
        [index],
        l_error,
        givens={
            l_x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    print "... Compiling big net validate function"
    temp = valid_set_x.get_value(borrow=True, return_internal_type=True)
    valid_set_x_b = shared(temp.reshape((temp.shape[0], 1, temp.shape[1])),
                           borrow=True,
                           name='valid_set_x_b')
    b_validate_model = function(
        [index],
        b_error,
        givens={
            b_x: valid_set_x_b[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    print "Training"

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 10  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    this_validation_loss = 0
    this_validation_loss_l = 0
    this_validation_loss_b = 0
    best_validation_loss = np.inf
    best_validation_loss_l = best_validation_loss
    best_validation_loss_b = best_validation_loss
    best_iter = 0
    test_score = 0.
    test_score_l = 0.
    test_score_b = 0.
    accum_l = 0
    accum_b = 0
    epoch = 0
    train_time_accum_l = 0
    train_time_accum_b = 0
    done_looping = False

    timers = ['train', 'valid', 'train']
    ts = TS(['epoch', 'valid'])
    ts_l = TS(timers)
    ts_b = TS(timers)

    summarize_rates()

    ts.start()
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        ts.start('epoch')
        for minibatch_index in xrange(n_train_batches):
            if train_little_net or select_top_active:
                ts_l.start('train')
                minibatch_avg_cost_l = l_train_model(minibatch_index)
                ts_l.end('train')

                minibatch_avg_cost_l = minibatch_avg_cost_l[0]
                if np.isnan(minibatch_avg_cost_l):
                    print "minibatch_avg_cost_l: %f" % minibatch_avg_cost_l
                    ipdb.set_trace()
                accum_l = accum_l + minibatch_avg_cost_l

            if train_big_net:
                ts_b.start('train')
                minibatch_avg_cost_b = b_train_model(minibatch_index)
                ts_b.end('train')

                minibatch_avg_cost_b = minibatch_avg_cost_b[0]
                accum_b = accum_b + minibatch_avg_cost_b

            #print "minibatch_avg_cost: " + str(minibatch_avg_cost) + " minibatch_avg_cost_b: " + str(minibatch_avg_cost_b)
            #print l_layers[0].W.get_value().sum(), l_layers[1].W.get_value().sum(), b_layers[0].W.get_value().sum(), b_layers[1].W.get_value().sum()
            #print "A: ", np.max(np.abs(b_layers[0].W.get_value())), np.max(np.abs(b_layers[0].b.get_value())), np.max(np.abs(b_layers[1].W.get_value())), np.max(np.abs(b_layers[1].b.get_value()))
            #print "B: ", np.abs(b_layers[0].W.get_value()).sum(), np.abs(b_layers[0].b.get_value()).sum(), np.abs(b_layers[1].W.get_value()).sum(), np.abs(b_layers[1].b.get_value()).sum()
            #print "C: ", np.abs(np.array(minibatch_avg_cost_b[1])).sum(), np.abs(np.array(minibatch_avg_cost_b[2])).sum(), np.abs(np.array(minibatch_avg_cost_b[3])).sum(), np.abs(np.array(minibatch_avg_cost_b[4])).sum()

            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                ts.end('epoch')
                ts.reset('epoch')

                l_summary = ""
                if train_little_net or select_top_active:
                    ts_l.reset('train')
                    accum_l = accum_l / validation_frequency
                    l_summary = ("minibatch_avg_cost_l: %f, time: %f" %
                                 (accum_l, ts_l.accumed['train'][-1][1]))
                    accum_l = 0
                    train_time_accum_l = 0

                b_summary = ""
                if train_big_net:
                    ts_b.reset('train')
                    accum_b = accum_b / validation_frequency
                    b_summary = ("minibatch_avg_cost_b: %f, time: %f" %
                                 (accum_b, ts_b.accumed['train'][-1][1]))
                    accum_b = 0

                print "%s %s" % (l_summary, b_summary)

                # compute zero-one loss on validation set
                summary = ('epoch %i, minibatch %i/%i' %
                           (epoch, minibatch_index + 1, n_train_batches))

                l_summary = ""
                if train_little_net or select_top_active:
                    validation_losses_l = [
                        l_validate_model(i) for i in xrange(n_valid_batches)
                    ]
                    this_validation_loss_l = np.mean(validation_losses_l)
                    l_summary = ('little validation error %f %% ' %
                                 (this_validation_loss_l * 100.))

                b_summary = ""
                if train_big_net:
                    validation_losses_b = [
                        b_validate_model(i) for i in xrange(n_valid_batches)
                    ]
                    this_validation_loss_b = np.mean(validation_losses_b)
                    #this_validation_loss_b = 0
                    b_summary = ('big validation error %f %% ' %
                                 (this_validation_loss_b * 100.))

                print("%s %s %s" % (summary, l_summary, b_summary))
                #ipdb.set_trace()

                # if we got the best validation score until now
                if train_big_net:
                    this_validation_loss = this_validation_loss_b
                elif train_little_net:
                    this_validation_loss = this_validation_loss_l

                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss_l = this_validation_loss_l
                    best_validation_loss_b = this_validation_loss_b

                    if train_big_net:
                        best_validation_loss = best_validation_loss_b
                    elif train_little_net:
                        best_validation_loss = best_validation_loss_l

                    best_iter = iter

                    # test it on the test set
                    l_summary = ""
                    if train_little_net:
                        test_losses_l = [
                            l_test_model(i) for i in xrange(n_test_batches)
                        ]
                        test_score_l = np.mean(test_losses_l)
                        l_summary = 'little: %f' % (test_score_l * 100.)

                    b_summary = ""
                    if train_big_net:
                        test_losses_b = [
                            b_test_model(i) for i in xrange(n_test_batches)
                        ]
                        test_score_b = np.mean(test_losses_b)
                        #test_score_b = 0
                        b_summary = 'big: %f' % (test_score_b * 100.)

                    print(
                        '     epoch %i, minibatch %i/%i,'
                        ' test error of best model %s %s' %
                        (epoch, minibatch_index + 1, n_train_batches,
                         l_summary, b_summary))

                learning_rate.update()

                if train_little_net:
                    l_learning_rate.set_value(learning_rate.rate)

                if train_big_net:
                    b_learning_rate.set_value(learning_rate.rate)

                momentum_rate.update()
                momentum.set_value(momentum_rate.rate)

                summarize_rates()

            if patience <= iter:
                done_looping = True
                break

    ts.end()
    print(
        'Optimization complete. Best validation score of %f %% (%f %%) '
        'obtained at iteration %i, with test performance %f %% (%f %%)' %
        (best_validation_loss_l * 100., best_validation_loss_b * 100.,
         best_iter + 1, test_score_l * 100., test_score_b * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %s' % ts)
예제 #35
0
파일: Model.py 프로젝트: qdtn/NewSemLM
def get_scores_all(mdl, fm1, fm2, X1, X2, X_new1, X_new2, num_select=10):
    X11 = []
    X21 = []

    # x = X[i], we add new values to the end of x

    for j in range(len(X_new1)):

        for k in range(len(X_new1[j])):
            xx = [xxx for xxx in X1[j]]
            xx.append(X_new1[j][k])
            X11.append(xx)

    for j in range(len(X_new2)):

        for k in range(len(X_new2[j])):
            xx = [xxx for xxx in X2[j]]
            xx.append(X_new2[j][k])
            X21.append(xx)

    print(X11)
    print(X21)

    X1 = [[fm1.f.map[fm1.f.getFeatureValue(x)] + 1 for x in XX] for XX in X11]

    X2 = [[fm2.f.map[fm2.f.getFeatureValue(x)] + 1 for x in XX] for XX in X21]

    x1, x_mask1 = preprare_seq_seq_data(X1)

    x1, _, mask_x1, _, _, _, _, _ = mdl.standardize_data(
        x1, None, x_mask1, None, None, None, None, None)

    x2, x_mask2 = preprare_seq_seq_data(X2)

    x2, _, mask_x2, _, _, _, _, _ = mdl.standardize_data(
        x2, None, x_mask2, None, None, None, None, None)

    score_pos = mdl.get_output_layer(-1, x1, x2, mask_x1)

    score_pos = score_pos.swapaxes(0, 1)
    score_pos = score_pos[:, -1]

    x = T.matrix("score")

    sort_f = th.function([x], T.argsort(x))

    sorted_values = sort_f(score_pos)
    sorted_values = sorted_values
    print(sorted_values)
    rs1 = []
    rs2 = []
    rs_scores = []
    my_scores = []
    for i in range(sorted_values.shape[0]):
        #f.write(to_string(X1[i]) + " ")
        ss = []
        for j in range(1, sorted_values.shape[1]):
            val = sorted_values[i][sorted_values.shape[1] - j]

            #val_map = fm.fY.map_inversed[val-1]
            score = score_pos[i][val]
            #f.write(str(val) + ":" + str(score) + " ")
            ss.append((val, score))
        #f.write("\n")
        my_scores.append(("_", ss))

        vals = []
        c = 0
        for t in range(sorted_values.shape[1] - 1, -1, -1):
            if c == num_select:
                break
            v = sorted_values[i][t]

            if fm1.fY.map_inversed[v - 1] != "EOS":
                vals.append(v)
                c += 1
        #vals = sorted_values[i][sorted_values.shape[1]-num_select:sorted_values.shape[1]]

        vals1 = []
        vals2 = []

        #val_maps = [fm1.fY.map_inversed[v-1].split("_") for v in list(vals) ]#if  fm.fY.map_inversed[v-1]!="EOS" ]
        scores = [score_pos[i][v]
                  for v in list(vals)]  # if fm.fY.map_inversed[v-1]!="EOS"]

        for v in list(vals):
            tm = fm1.fY.map_inversed[v - 1].split("_")

            vals1.append(tm[0])
            vals2.append(tm[1])

        rs1.append(vals1)
        rs2.append(vals2)

        rs_scores.append(scores)

    return (rs1, rs2), rs_scores, X11, X21, my_scores
예제 #36
0
"""
k-max pooling example.
"""

import numpy as np

import theano
from theano import tensor as T
from theano.sandbox import neighbours

k = 3
# instantiate 4D tensor for input
input = T.tensor4(name='input')

neighborsForPooling = neighbours.images2neibs(input, (1, 5), mode='valid')
neighborsArgSorted = T.argsort(neighborsForPooling, axis=1)
kNeighborsArg = neighborsArgSorted[:, -k:]
kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1)
ii = T.repeat(T.arange(neighborsForPooling.shape[0]), k)
jj = kNeighborsArgSorted.flatten()
k_pooled_2D = neighborsForPooling[ii, jj].reshape((3, k))
k_pooled = neighbours.neibs2images(k_pooled_2D, (1, 3), (1, 3, 1, 3))

k_max = theano.function([input], k_pooled)

input = np.array([[2, 4, 1, 6, 8], [12, 3, 5, 7, 1], [-8, 6, -12, 4, 1]], dtype=np.float32)
input = input.reshape(1, 3, 1, 5)
print "input shape: ", input.shape
print "input: ", input
output = k_max(input)
print "output shape: ", output.shape
예제 #37
0
    def __init__(self,
                 n_in,
                 layers,
                 hidden_dropout=0.5,
                 max_col_norm=1.7236,
                 rho=0.96,
                 rmsprop=False,
                 center_grads=False,
                 use_nesterov=False,
                 mean_pooling=False,
                 normalize_acts=False,
                 layer_dropout=True,
                 no_final_dropout=False,
                 loss_based_pooling=False,
                 topN_pooling=1,
                 adadelta=False,
                 response_normalize=True,
                 enable_standardization=False,
                 l2=None,
                 seed=1985,
                 **kwargs):

        x = T.matrix('x', dtype=theano.config.floatX)
        y = T.lvector('y')

        type_map = {
            'L': LogisticLayer,
            'R': RectifierLayer,
            'S': SoftmaxLayer,
            'Sp': SoftplusLayer,
            'T': TanhLayer,
            'Li': LinearLayer,
            'Sq': SquaredLayer,
        }
        #EPS = 1e-18

        self.max_col_norm = max_col_norm
        self.rng = RandomStreams(seed)

        alpha = 0.02
        beta = 0.75
        k = 1.5

        self.layers = []

        constants = []
        n_layers = len(layers)
        # Create hidden layers
        for i, layer in enumerate(layers):
            layer_type = layer[0]
            layer_size = layer[1]

            if i == 0:
                layer_input = x
                layer_n_in = n_in
            #elif i == n_layers - 1:
            #    layer_input = self.layers[-1].output
            #    layer_n_in = self.layers[-1].n_in
            else:
                layer_input = self.layers[-1].output
                layer_n_in = self.layers[-1].n_out

            if i == n_layers - 1:
                if normalize_acts:
                    layer_input = layer_input / T.sqrt(
                        T.sum(layer_input**2, axis=1, keepdims=True) + EPS)
                elif response_normalize:
                    layer_input = (layer_input - T.min(
                        layer_input, axis=1, keepdims=True)) / T.maximum(
                            T.max(layer_input, axis=1, keepdims=True) -
                            T.min(layer_input, axis=1, keepdims=True), EPS)
                    """
                    layer_input = layer_input / (k + alpha * T.sum(layer_input**2, axis=1,
                        keepdims=True))**beta
                    """
                if enable_standardization:
                    from utils import stddev_bias
                    std_val = stddev_bias(layer_input, EPS)
                    mu = T.mean(layer_input, axis=0)
                    z_val = (layer_input - mu) / std_val
                    layer_input = z_val

                if loss_based_pooling:
                    pass
                elif topN_pooling == 1:
                    print "Using topN_pooling for training"
                    max1_indx = T.argmax(layer_input, axis=0)
                    layer_input1 = T.max(layer_input, axis=0)
                    t1 = T.arange(layer_input.shape[1])
                    masked_in = layer_input * T.neq(layer_input,
                                                    layer_input[max1_indx, t1])
                    layer_input2 = T.max(masked_in, axis=0)
                    layer_input = (1.4 * layer_input1 + 0.6 * layer_input2) / 2
                elif mean_pooling:
                    layer_input = T.mean(layer_input, axis=0)
                else:
                    layer_input = T.max(layer_input, axis=0)

                if layer_dropout and not no_final_dropout:
                    layer_input = layer_input * self.rng.binomial(
                        n=1, p=0.6, dtype=theano.config.floatX) / 0.6
                elif not no_final_dropout:
                    assert hidden_dropout != 1.
                    layer_input = layer_input * self.rng.binomial(
                        n=1,
                        p=1 - hidden_dropout,
                        dtype=theano.config.floatX,
                        size=layer_input.shape) / 1 - hidden_dropout

            if hidden_dropout != 1. and i != n_layers - 1:
                layer_input = layer_input * self.rng.binomial(
                    n=1,
                    p=1 - hidden_dropout,
                    dtype=theano.config.floatX,
                    size=layer_input.shape) / 1 - hidden_dropout

            xargs = {}

            if layer_type == 'R' and layer == layers[-1]:
                xargs['mask'] = False

            layer = type_map[layer_type](layer_input,
                                         layer_n_in,
                                         layer_size,
                                         seed=seed,
                                         rng=self.rng,
                                         **xargs)

            self.layers.append(layer)

        self.clean_layers = []

        for i, layer in enumerate(layers):
            layer_type = layer[0]
            layer_size = layer[1]

            if i == 0:
                layer_input = x
                layer_n_in = n_in
            else:
                layer_input = self.clean_layers[-1].output
                layer_n_in = self.clean_layers[-1].n_out

            if i == n_layers - 1:
                if normalize_acts:
                    layer_input = layer_input / T.sqrt(
                        T.sum(layer_input**2, axis=1, keepdims=True) + EPS)
                elif response_normalize:

                    layer_input = (layer_input - T.min(
                        layer_input, axis=1, keepdims=True)) / T.maximum(
                            T.max(layer_input, axis=1, keepdims=True) -
                            T.min(layer_input, axis=1, keepdims=True), EPS)

                    #layer_input = T.nnet.sigmoid(layer_input)
                    """
                    layer_input = layer_input / (k + alpha * T.sum(layer_input**2, axis=1,
                        keepdims=True))**beta
                    """
                if enable_standardization:
                    from utils import stddev_bias
                    std_val = stddev_bias(layer_input, EPS)
                    mu = T.mean(layer_input, axis=0)
                    z_val = (layer_input -
                             mu) / std_val  #T.maximum(std_val, EPS)
                    layer_input = z_val

                feature_out = layer_input
                #Perform the temporal max-pooling:
                if topN_pooling == 1:
                    print "Using topN_pooling for testing."
                    collapsed_val = T.sum(layer_input, axis=0)
                    top_ids = T.argsort(layer_input, axis=0)[-3:][::-1]
                    top_vals = layer_input[top_ids,
                                           T.arange(layer_input.shape[1])]
                    #top_mean = (1.2 * top_vals[0] + 1.0 * top_vals[1] + 0.8 * top_vals[2]) / 3
                    top_mean = (1.4 * top_vals[0] + 0.6 * top_vals[1]) / 2
                    layer_input = top_mean
                elif mean_pooling:
                    layer_input = T.mean(layer_input, axis=0)
                else:
                    layer_input = T.max(layer_input, axis=0)

            xargs = {}
            pooled_output_features = layer_input
            if layer_type == 'R' and layer == layers[-1]:
                xargs['mask'] = False

            layer = type_map[layer_type](layer_input,
                                         layer_n_in,
                                         layer_size,
                                         seed=seed,
                                         W=self.layers[i].W,
                                         b=self.layers[i].b,
                                         **xargs)

            self.clean_layers.append(layer)

        self._output = theano.function([x],
                                       T.argmax(self.clean_layers[-1].output,
                                                axis=1))
        self._feature_output = theano.function([x], feature_out)
        self._pooled_output_features = theano.function([x],
                                                       pooled_output_features)

        self.transform = theano.function([x],
                                         T.mean(self.clean_layers[-2].output,
                                                axis=0))

        loss = -T.mean(T.log(self.layers[-1].output)[T.arange(y.shape[0]), y])
        pooling_loss = -T.log(self.layers[-1].output)[T.arange(y.shape[0]), y]

        if l2 != None:
            loss += l2 * sum([(l.W**2).sum(dtype=theano.config.floatX)
                              for l in self.layers])

        self.trainer = NeuralNetworkTrainer(
            [x, y],
            loss,
            self.layers,
            self.max_col_norm,
            rmsprop=rmsprop,
            rho=rho,
            center_grads=center_grads,
            use_nesterov=use_nesterov,
            loss_based_pooling=loss_based_pooling,
            adadelta=adadelta,
            pooling_loss=pooling_loss,
            constants=constants,
            rng=self.rng,
            **kwargs)
예제 #38
0
def theano_compiler(model):
    """Take a triflow model and return optimized theano routines.

    Parameters
    ----------
    model: triflow.Model:
        Model to compile

    Returns
    -------
    (theano function, theano_function):
        Optimized routine that compute the evolution equations and their
        jacobian matrix.
    """
    from theano import tensor as T
    from theano.ifelse import ifelse
    import theano.sparse as ths
    from theano import function

    def th_Min(a, b):
        if isinstance(a, T.TensorVariable) or isinstance(b, T.TensorVariable):
            return T.where(a < b, a, b)
        return min(a, b)

    def th_Max(a, b):
        if isinstance(a, T.TensorVariable) or isinstance(b, T.TensorVariable):
            return T.where(a < b, b, a)
        return max(a, b)

    def th_Heaviside(a):
        if isinstance(a, T.TensorVariable):
            return T.where(a < 0, 1, 1)
        return 0 if a < 0 else 1

    mapargs = {
        arg: T.vector(arg)
        for arg, sarg in zip(model._args, model._symbolic_args)
    }

    to_feed = mapargs.copy()

    x_th = mapargs['x']
    N = x_th.size
    L = x_th[-1] - x_th[0]
    dx = L / (N - 1)
    to_feed['dx'] = dx

    periodic = T.scalar("periodic", dtype="int32")

    middle_point = int((model._window_range - 1) / 2)

    th_args = [
        mapargs[key] for key in [
            *model._indep_vars, *model._dep_vars, *model._help_funcs,
            *model._pars
        ]
    ] + [periodic]

    map_extended = {}

    for (varname, discretisation_tree) in \
            model._symb_vars_with_spatial_diff_order.items():
        pad_left, pad_right = model._bounds

        th_arg = mapargs[varname]

        per_extended_var = T.concatenate(
            [th_arg[pad_left:], th_arg, th_arg[:pad_right]])
        edge_extended_var = T.concatenate([[th_arg[0]] * middle_point, th_arg,
                                           [th_arg[-1]] * middle_point])

        extended_var = ifelse(periodic, per_extended_var, edge_extended_var)

        map_extended[varname] = extended_var
        for order in range(pad_left, pad_right + 1):
            if order != 0:
                var = ("{}_{}{}").format(varname, 'm' if order < 0 else 'p',
                                         np.abs(order))
            else:
                var = varname
            new_var = extended_var[order - pad_left:extended_var.size + order -
                                   pad_right]
            to_feed[var] = new_var

    F = lambdify(
        (model._symbolic_args),
        expr=model.F_array.tolist(),
        modules=[T, {
            "Max": th_Max,
            "Min": th_Min,
            "Heaviside": th_Heaviside
        }])(*[to_feed[key] for key in model._args])

    F = T.concatenate(F, axis=0).reshape((model._nvar, N)).T
    F = T.stack(F).flatten()

    J = lambdify(
        (model._symbolic_args),
        expr=model.J_array.tolist(),
        modules=[T, {
            "Max": th_Max,
            "Min": th_Min,
            "Heaviside": th_Heaviside
        }])(*[to_feed[key] for key in model._args])

    J = [j if j != 0 else T.constant(0.) for j in J]
    J = [j if not isinstance(j, (int, float)) else T.constant(j) for j in J]
    J = T.stack([T.repeat(j, N) if j.ndim == 0 else j for j in J])
    J = J[model._sparse_indices[0]].T.squeeze()

    i = T.arange(N).dimshuffle([0, 'x'])
    idx = T.arange(N * model._nvar).reshape((N, model._nvar)).T
    edge_extended_idx = T.concatenate([
        T.repeat(idx[:, :1], middle_point, axis=1), idx,
        T.repeat(idx[:, -1:], middle_point, axis=1)
    ],
                                      axis=1).T.flatten()
    per_extended_idx = T.concatenate(
        [idx[:, -middle_point:], idx, idx[:, :middle_point]],
        axis=1).T.flatten()
    extended_idx = ifelse(periodic, per_extended_idx, edge_extended_idx)

    rows = T.tile(T.arange(model._nvar),
                  model._window_range * model._nvar) + i * model._nvar
    cols = T.repeat(T.arange(model._window_range * model._nvar),
                    model._nvar) + i * model._nvar
    rows = rows[:, model._sparse_indices].reshape(J.shape).flatten()
    cols = extended_idx[cols][:, model._sparse_indices] \
        .reshape(J.shape).flatten()

    permutation = T.argsort(cols)

    J = J.flatten()[permutation]
    rows = rows[permutation]
    cols = cols[permutation]
    count = T.zeros((N * model._nvar + 1, ), dtype=int)
    uq, cnt = T.extra_ops.Unique(False, False, True)(cols)
    count = T.set_subtensor(count[uq + 1], cnt)

    indptr = T.cumsum(count)
    shape = T.stack([N * model._nvar, N * model._nvar])
    sparse_J = ths.CSC(J, rows, indptr, shape)
    F_theano_function = function(inputs=th_args,
                                 outputs=F,
                                 on_unused_input='ignore',
                                 allow_input_downcast=True)
    J_theano_function = function(inputs=th_args,
                                 outputs=sparse_J,
                                 on_unused_input='ignore',
                                 allow_input_downcast=True)

    return F_theano_function, J_theano_function
예제 #39
0
                               r_Wa_aht_st, r_ba_aht, \
                               r_Wa_atmu_aht, r_ba_atmu, \
                               r_Wa_atsig_aht, r_ba_atsig]
                )

FE_mean = FEt_th.mean()
KL_st_mean = KL_st_th.mean()
ot_mean = p_ot_th.mean()
oht_mean = p_oht_th.mean()
oat_mean = p_oat_th.mean()

FE_mean_perturbations = FEt_th.mean(axis=0).mean(axis=1)
FE_std_perturbations = FEt_th.mean(axis=0).std(axis=1)
FE_mean_perturbations_std = FE_mean_perturbations.std(axis=0)

FE_rank = n_perturbations - T.argsort(T.argsort(FE_mean_perturbations))

FE_rank_score = T.clip(
    numpy.log(0.5 * n_perturbations + 1) - T.log(FE_rank), 0.0,
    10000.0).astype(dtype=theano.config.floatX)

FE_rank_score_normalized = FE_rank_score / FE_rank_score.sum(
) - 1.0 / n_perturbations

run_agent_scan = theano.function(inputs=[],
                                 outputs=[
                                     states_th, oat_th, ot_th, oht_th, FEt_th,
                                     KL_st_th, hst_th, hst2_th, stmu_th,
                                     stsig_th, force_th, pos_th
                                 ],
                                 allow_input_downcast=True,
예제 #40
0
    def compile(self, optimizer, loss, class_mode="categorical", theano_mode=None):
        self.optimizer = optimizer.get(optimizer)
        
        self.loss = objectives.get(loss)
        weighted_loss = weighted_objective(objectives.get(loss))

        self.X_train = self.get_input(train=True)
        self.X_test = self.get_input(train=False)

        self.y_train = self.get_input(train=True)
        self.y_test = self.get_input(train=False)


        self.y = T.zeros_like(self.y_train)

        self.weights = T.ones_like(self.y_train)

        if hasattr(self.layers[-1], "get_ouput_mask"):
            mask = self.layers[-1].get_output_mask()
        else:
            mask = None

        train_loss = weighted_loss(self.y, self.y_train, self.weights, mask)
        test_loss = weighted_loss(self.y, self.y_test, self.weights, mask)

        train_loss.name = 'train_loss'
        test_loss.name = 'test_loss'
        self.y.name = 'y'

        if class_mode == 'categorical':
            train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1)))
            test_accuracy = T.mean(T.eq(T.argsort(self.y, axis=-1), T.argmax(self.y_test, axis=-1)))

        elif class_mode == "binary":
            train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train)))
            test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test)))
        else:
            raise Exception("Invalid class mode:" + str(class_mode))
        self.class_mode = class_mode
        self.theano_mode = theano_mode

        for r in self.regularizers:
            train_loss = r(train_loss)
        updates = self.optimizer.get_updates(self.params, self.constraints, train_loss)
        updates += self.updates

        if type(self.X_train) == list:
            train_ins = self.X_train + [self.y, self.weights]
            test_ins = self.X_test + [self.y, self.weights]
            predict_ins = self.X_test

        else:
            train_ins = [self.X_train, self.y, self.weights]
            test_ins = [self.X_test, self.y, self.weights]
            predict_ins = self.X_test

        self._train = theano.function(train_ins, train_loss, updates=updates,
                                      allow_input_downcast=True, mode=theano_mode)
        self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy], updates=updates,
                                               allow_input_downcast = True, mode= theano_mode)
        self._predict= theano.function(predict_ins, self.y_test,
                                       allow_input_downcast=True, mode=theano_mode)
        self._test = theano.function(test_ins, test_loss, updates=updates,
                                      allow_input_downcast=True, mode=theano_mode)
        self._test_with_acc = theano.function(test_ins, [test_loss, test_accuracy], updates=updates,
                                               allow_input_downcast = True, mode= theano_mode)
예제 #41
0

def get_word_idxs(relevant_sentence_idxs, support_, mask_):
    rel_support = support_[relevant_sentence_idxs, :]
    rel_mask = mask_[relevant_sentence_idxs, :]
    return rel_support[rel_mask.nonzero()].ravel()


if attention:
    # estimate relevance of each sentence
    relevance_probs = attention_model.get_relevance_probs(
        support, mask, question_idxs)

    # By default, the attention model retrieves any sentence with prob > 0.5 under the model
    # If no sentence exists, it returns the top two sentences in chronological order
    max_idxs = T.sort(T.argsort(relevance_probs[:, 1])[-2:])
    prob_idxs = T.arange(
        relevance_probs.shape[0])[T.nonzero(relevance_probs[:, 1] > 0.5)]
    est_idxs = ifelse(T.lt(T.sum(relevance_probs[:, 1] > 0.5), 1), max_idxs,
                      prob_idxs)
else:
    est_idxs = T.arange(support.shape[0])

# joint training of question model + attention model
# if no attention, train on all of the sentences
est_rel_facts = get_word_idxs(est_idxs, support, mask)
answer_probs = qa_model.get_answer_probs(est_rel_facts, question_idxs)

# train the qa-model using the hints
# true_rel_facts = get_word_idxs(hints.nonzero(), support, mask)
# answer_probs = qa_model.get_answer_probs(true_rel_facts, question_idxs)
예제 #42
0
 def call(self, x, mask=None):
     output = x[T.arange(x.shape[0]).dimshuffle(0, "x", "x"),
                T.sort(T.argsort(x, axis=1)[:, -self.ktop:, :], axis=1),
                T.arange(x.shape[2]).dimshuffle("x", "x", 0)]
     return output
예제 #43
0
 def neighbourhood(X):
     D = distance_tensor(X)
     N = T.argsort(D, axis=0)
     mask = T.cast(T.lt(N, nc), 'float32')
     return N[1:nc + 1], mask
    def __init__(self,
                 numTruncate=20,
                 numHidden=500,
                 inputsSize=[576],
                 outputsSize=[1, 4]):
        ####################################
        #       Create model               #
        ####################################

        # Create tensor variables to store input / output data
        FeaturesXGt = T.matrix('FeaturesXGt', dtype='float32')
        FeaturesX = T.tensor3('FeaturesX')
        TargetY = T.tensor3('PredY')
        BboxY = T.tensor3('BboxY')
        C = T.vector('C', dtype='float32')
        S = T.vector('S', dtype='float32')
        BoxsVariances = T.matrix('BoxsVariances')
        RatioPosNeg = T.scalar('RatioPosNeg')

        # Create shared variable for input
        net = LSTMNet()
        net.NetName = 'LSTMTrackingNet'

        # Input
        # net.Layer['input']                 = InputLayer(net, X)
        net.LayerOpts['lstm_num_truncate'] = numTruncate
        # net.LayerOpts['reshape_new_shape'] = (net.LayerOpts['lstm_num_truncate'], 576)          # TODO: Need to set this size later
        # net.Layer['input_2d']              = ReshapeLayer(net, net.Layer['input'].Output)

        # Setting LSTM architecture
        net.LayerOpts['lstm_num_hidden'] = numHidden
        net.LayerOpts['lstm_inputs_size'] = inputsSize
        net.LayerOpts['lstm_outputs_size'] = outputsSize

        # Truncate lstm model
        currentC = C
        currentS = S
        preds = []
        bboxs = []
        predictLayers = []
        for truncId in range(net.LayerOpts['lstm_num_truncate']):
            # Create LSTM layer
            currentInput = FeaturesXGt[truncId]
            net.Layer['lstm_truncid_%d' % (truncId)] = LSTMLayer(
                net, currentInput, currentC, currentS)
            net.LayerOpts['lstm_params'] = net.Layer['lstm_truncid_%d' %
                                                     (truncId)].Params

            # Predict next position based on current state
            currentInput = FeaturesX[truncId]
            tempLayer = LSTMLayer(net, currentInput, currentC, currentS)
            predictLayers.append(tempLayer)
            pred = SigmoidLayer(tempLayer.Output[0]).Output
            bbox = tempLayer.Output[1]
            preds.append(pred)
            bboxs.append(bbox)

            # Update stateS and stateC
            currentC = net.Layer['lstm_truncid_%d' % (truncId)].C
            currentS = net.Layer['lstm_truncid_%d' % (truncId)].S
        lastS = currentS
        lastC = currentC
        self.Net = net

        # Calculate cost function
        # Confidence loss
        cost = 0
        costPos = 0
        costLoc = 0
        costNeg = 0

        k0 = None
        k1 = None
        k2 = None
        k3 = None
        k4 = None
        for truncId in range(net.LayerOpts['lstm_num_truncate']):
            pred = preds[truncId]
            bbox = bboxs[truncId]
            target = TargetY[truncId]
            bboxgt = BboxY[truncId]

            numFeaturesPerIm = pred.shape[0]
            numAnchorBoxPerLoc = pred.shape[1]

            pred = pred.reshape((numFeaturesPerIm * numAnchorBoxPerLoc, 1))
            target = target.reshape((numFeaturesPerIm * numAnchorBoxPerLoc, 1))
            bbox = bbox.reshape((numFeaturesPerIm * numAnchorBoxPerLoc, 4))
            bbox = bbox / BoxsVariances
            bboxgt = bboxgt.reshape((numFeaturesPerIm * numAnchorBoxPerLoc, 4))

            allLocCost = T.sum(T.abs_(bbox - bboxgt), axis=1,
                               keepdims=True) * target

            allConfPosCost = -target * T.log(pred)
            allConfNegCost = -(1 - target) * T.log(1 - pred)

            allPosCost = allConfPosCost + allLocCost * 0
            allNegCost = allConfNegCost

            allPosCostSum = T.sum(allPosCost, axis=1)
            allNegCostSum = T.sum(allNegCost, axis=1)

            sortedPosCostIdx = T.argsort(allPosCostSum, axis=0)
            sortedNegCostIdx = T.argsort(allNegCostSum, axis=0)

            sortedPosCost = allPosCostSum[sortedPosCostIdx]
            sortedNegCost = allNegCostSum[sortedNegCostIdx]

            if k0 == None:
                k0 = target
            if k1 == None:
                k1 = allLocCost
            if k2 == None:
                k2 = pred
            if k3 == None:
                k3 = sortedPosCostIdx
            if k4 == None:
                k4 = sortedNegCostIdx

            numMax = T.sum(T.neq(sortedPosCost, 0))
            # numNegMax = T.cast(T.floor(T.minimum(T.maximum(numMax * RatioPosNeg, 2), 300)), dtype = 'int32')
            numNegMax = T.cast(T.floor(numMax * RatioPosNeg), dtype='int32')

            top2PosCost = sortedPosCost[-numMax:]
            top6NegCost = sortedNegCost[-numNegMax:]

            layerCost = (T.sum(top2PosCost) + T.sum(top6NegCost)) / numMax
            cost = cost + layerCost

            costPos = costPos + pred[sortedPosCostIdx[-numMax:]].mean()
            costLoc = costLoc + allLocCost.sum() / numMax
            costNeg = costNeg + pred[sortedNegCostIdx[-numNegMax:]].mean()

        cost = cost / net.LayerOpts['lstm_num_truncate']
        costPos = costPos / net.LayerOpts['lstm_num_truncate']
        costLoc = costLoc / net.LayerOpts['lstm_num_truncate']
        costNeg = costNeg / net.LayerOpts['lstm_num_truncate']

        # Create update function
        params = self.Net.Layer['lstm_truncid_0'].Params
        grads = T.grad(cost, params)
        updates = AdamGDUpdate(net, params=params, grads=grads).Updates

        # Train function
        self.TrainFunc = theano.function(inputs=[
            FeaturesXGt, FeaturesX, TargetY, BboxY, S, C, BoxsVariances,
            RatioPosNeg
        ],
                                         updates=updates,
                                         outputs=[
                                             cost, lastS, lastC, costPos,
                                             costLoc, costNeg, k0, k1, k2, k3,
                                             k4
                                         ])

        self.PredFunc = theano.function(inputs=[FeaturesX, S, C],
                                        outputs=[preds[0], bboxs[0]])

        nextS = self.Net.Layer['lstm_truncid_0'].S
        nextC = self.Net.Layer['lstm_truncid_0'].C
        self.NextState = theano.function(inputs=[FeaturesXGt, S, C],
                                         outputs=[nextS, nextC])
예제 #45
0
energy_T = network.compute_energy(disc_score_T, state)

# generated samples
samples = gen_model.forward(noise)

feat_F = enc_model.forward(samples)
disc_score_F = disc_model.forward(feat_F)
energy_F = network.compute_energy(disc_score_F, state)

# sample gradient
sample_sqr = T.sum(samples**2, axis=1)
dist_mat = T.sqrt(
    sample_sqr.reshape((-1, 1)) + sample_sqr.reshape((1, -1)) -
    2 * T.dot(samples, samples.T))

neighbor_ids = T.argsort(dist_mat, axis=1)[:, :21]
nerghbor_mean = T.mean(samples[neighbor_ids[:, 1:]], axis=1)
nerghbor_var = T.var(samples[neighbor_ids], axis=1)

indices = T.repeat(T.arange(dist_mat.shape[0]).reshape((-1, 1)), 20, axis=1)
nerghbod_dist = T.mean(dist_mat[indices, neighbor_ids], axis=1, keepdims=True)

sample_gradient = (nerghbor_mean - samples)
sample_gradient /= nerghbor_var
# sample_gradient /= T.sqrt(T.sum(sample_gradient ** 2, axis=1, keepdims=True))

sample_gradient = theano.gradient.disconnected_grad(sample_gradient *
                                                    state['knn_scale'])

# grid = theano.shared(data)
# grid_sqr = theano.shared(data_sqr, broadcastable=(False, True))
예제 #46
0
    def loop(l_left, l_right, l_matrix, r_left, r_right, r_matrix, mts_i,
             extra_i, norm_length_l_i, norm_length_r_i):
        l_input_tensor = debug_print(
            Matrix_Bit_Shift(l_matrix[:, l_left:-l_right]), 'l_input_tensor')
        r_input_tensor = debug_print(
            Matrix_Bit_Shift(r_matrix[:, r_left:-r_right]), 'r_input_tensor')

        addition_l = T.sum(l_matrix[:, l_left:-l_right], axis=1)
        addition_r = T.sum(r_matrix[:, r_left:-r_right], axis=1)
        cosine_addition = cosine(addition_l, addition_r)
        eucli_addition = 1.0 / (1.0 + EUCLID(addition_l, addition_r))  #25.2%

        layer0_A1 = GRU_Batch_Tensor_Input(X=l_input_tensor,
                                           hidden_dim=nkerns[0],
                                           U=U,
                                           W=W,
                                           b=b,
                                           bptt_truncate=-1)
        layer0_A2 = GRU_Batch_Tensor_Input(X=r_input_tensor,
                                           hidden_dim=nkerns[0],
                                           U=U,
                                           W=W,
                                           b=b,
                                           bptt_truncate=-1)

        cosine_sent = cosine(layer0_A1.output_sent_rep,
                             layer0_A2.output_sent_rep)
        eucli_sent = 1.0 / (1.0 + EUCLID(layer0_A1.output_sent_rep,
                                         layer0_A2.output_sent_rep))  #25.2%

        attention_matrix = compute_simi_feature_matrix_with_matrix(
            layer0_A1.output_matrix, layer0_A2.output_matrix, layer0_A1.dim,
            layer0_A2.dim,
            maxSentLength * (maxSentLength + 1) / 2)

        l_max_attention = T.max(attention_matrix, axis=1)
        neighborsArgSorted = T.argsort(l_max_attention)
        kNeighborsArg = neighborsArgSorted[:3]  #only average the min 3 vectors
        ll = T.sort(kNeighborsArg).flatten()  # make y indices in acending lie

        r_max_attention = T.max(attention_matrix, axis=0)
        neighborsArgSorted_r = T.argsort(r_max_attention)
        kNeighborsArg_r = neighborsArgSorted_r[:
                                               3]  #only average the min 3 vectors
        rr = T.sort(
            kNeighborsArg_r).flatten()  # make y indices in acending lie

        l_max_min_attention = debug_print(layer0_A1.output_matrix[:, ll],
                                          'l_max_min_attention')
        r_max_min_attention = debug_print(layer0_A2.output_matrix[:, rr],
                                          'r_max_min_attention')

        layer1_A1 = GRU_Matrix_Input(X=l_max_min_attention,
                                     word_dim=nkerns[0],
                                     hidden_dim=nkerns[1],
                                     U=U1,
                                     W=W1,
                                     b=b1,
                                     bptt_truncate=-1)
        layer1_A2 = GRU_Matrix_Input(X=r_max_min_attention,
                                     word_dim=nkerns[0],
                                     hidden_dim=nkerns[1],
                                     U=U1,
                                     W=W1,
                                     b=b1,
                                     bptt_truncate=-1)

        vec_l = debug_print(
            layer1_A1.output_vector_last.reshape((1, nkerns[1])), 'vec_l')
        vec_r = debug_print(
            layer1_A2.output_vector_last.reshape((1, nkerns[1])), 'vec_r')

        #     sum_uni_l=T.sum(layer0_l_input, axis=3).reshape((1, emb_size))
        #     aver_uni_l=sum_uni_l/layer0_l_input.shape[3]
        #     norm_uni_l=sum_uni_l/T.sqrt((sum_uni_l**2).sum())
        #     sum_uni_r=T.sum(layer0_r_input, axis=3).reshape((1, emb_size))
        #     aver_uni_r=sum_uni_r/layer0_r_input.shape[3]
        #     norm_uni_r=sum_uni_r/T.sqrt((sum_uni_r**2).sum())
        #
        uni_cosine = cosine(vec_l, vec_r)
        #     aver_uni_cosine=cosine(aver_uni_l, aver_uni_r)
        #     uni_sigmoid_simi=debug_print(T.nnet.sigmoid(T.dot(norm_uni_l, norm_uni_r.T)).reshape((1,1)),'uni_sigmoid_simi')
        #     '''
        #     linear=Linear(sum_uni_l, sum_uni_r)
        #     poly=Poly(sum_uni_l, sum_uni_r)
        #     sigmoid=Sigmoid(sum_uni_l, sum_uni_r)
        #     rbf=RBF(sum_uni_l, sum_uni_r)
        #     gesd=GESD(sum_uni_l, sum_uni_r)
        #     '''
        eucli_1 = 1.0 / (1.0 + EUCLID(vec_l, vec_r))  #25.2%
        #     #eucli_1_exp=1.0/T.exp(EUCLID(sum_uni_l, sum_uni_r))
        #
        len_l = norm_length_l_i.reshape((1, 1))
        len_r = norm_length_r_i.reshape((1, 1))
        #
        #     '''
        #     len_l=length_l.reshape((1,1))
        #     len_r=length_r.reshape((1,1))
        #     '''
        #length_gap=T.log(1+(T.sqrt((len_l-len_r)**2))).reshape((1,1))
        #length_gap=T.sqrt((len_l-len_r)**2)
        #layer3_input=mts
        #         layer3_input_nn=T.concatenate([vec_l, vec_r,
        #                                     cosine_addition, eucli_addition,
        #     #                                 cosine_sent, eucli_sent,
        #                                     uni_cosine,eucli_1], axis=1)#, layer2.output, layer1.output_cosine], axis=1)

        output_i = T.concatenate(
            [
                vec_l,
                vec_r,
                cosine_addition,
                eucli_addition,
                #                                 cosine_sent, eucli_sent,
                uni_cosine,
                eucli_1,
                mts_i.reshape((1, 14)),
                len_l,
                len_r,
                extra_i.reshape((1, 9))
            ],
            axis=1)  #, layer2.output, layer1.output_cosine], axis=1)
        return output_i
예제 #47
0
    def __init__(self,
                 inputs,
                 cost,
                 layers,
                 max_col_norm=None,
                 loss_based_pooling=False,
                 pooling_loss=None,
                 learning_rate=0.01,
                 momentum=None,
                 rmsprop=True,
                 adadelta=False,
                 center_grads=False,
                 rho=0.96,
                 epsilon=1e-8,
                 use_nesterov=True,
                 seed=None,
                 rng=None,
                 constants=None,
                 **kw):

        self.loss_based_pooling = loss_based_pooling
        self.rng = rng
        params = [layer.W for layer in layers] + [layer.b for layer in layers]
        self.learning_rate = theano.shared(
            numpy.asarray(learning_rate, dtype=theano.config.floatX))
        self.layers = layers
        self.max_col_norm = max_col_norm
        #Initialize parameters for rmsprop:
        accumulators = OrderedDict({})
        accumulators_mgrad = OrderedDict({})
        exp_sqr_grads = OrderedDict({})
        exp_sqr_ups = OrderedDict({})
        e0s = OrderedDict({})
        learn_rates = []
        from utils import as_floatX

        self.max_col_norm = max_col_norm

        gparams = []
        for param in params:
            eps_p = numpy.zeros_like(param.get_value())

            accumulators[param] = theano.shared(value=as_floatX(eps_p),
                                                name="acc_%s" % param.name)
            accumulators_mgrad[param] = theano.shared(value=as_floatX(eps_p),
                                                      name="acc_mgrad%s" %
                                                      param.name)
            exp_sqr_grads[param] = theano.shared(value=as_floatX(eps_p),
                                                 name="exp_grad_%s" %
                                                 param.name)
            exp_sqr_ups[param] = theano.shared(value=as_floatX(eps_p),
                                               name="exp_grad_%s" % param.name)
            e0s[param] = as_floatX(learning_rate)
            gparam = T.grad(cost, param, consider_constant=constants)
            gparams.append(gparam)

        updates = OrderedDict({})

        i = 0

        for param, gparam in zip(params, gparams):
            if rmsprop:
                acc = accumulators[param]
                rms_grad = rho * acc + (1 - rho) * T.sqr(gparam)

                updates[acc] = rms_grad
                val = T.maximum(T.sqrt(T.sum(rms_grad, axis=0)), epsilon)

                learn_rates.append(e0s[param] / val)

                if center_grads:
                    acc_mg = accumulators_mgrad[param]
                    mean_grad = rho * acc_mg + (1 - rho) * gparam
                    gparam = gparam - mean_grad
                    updates[acc_mg] = mean_grad
                if momentum and not use_nesterov:
                    memory = theano.shared(param.get_value() * 0.)
                    updates[param] = param - memory
                    updates[
                        memory] = momentum * memory + learn_rates[i] * gparam
                elif use_nesterov:
                    memory = theano.shared(param.get_value() * 0.)
                    new_memo = momentum * memory - e0s[param] * gparam
                    #new_memo = momentum * memory - learn_rates[i] * gparam
                    updates[memory] = new_memo
                    updates[param] = param + (momentum * new_memo -
                                              e0s[param] * gparam) / val
                else:
                    updates[param] = param - learn_rates[i] * gparam
                i += 1
            elif adadelta:
                exp_sg = exp_sqr_grads[param]
                exp_su = exp_sqr_ups[param]
                up_exp_sg = rho * exp_sg + (1 - rho) * T.sqr(gparam)
                updates[exp_sg] = up_exp_sg
                step = -(T.sqrt(exp_su + epsilon) /
                         T.sqrt(up_exp_sg + epsilon)) * gparam
                updates[exp_su] = rho * exp_su + (1 - rho) * T.sqr(step)
                updates[param] = param + step
            else:
                if momentum and not use_nesterov:
                    memory = theano.shared(param.get_value() * 0.)
                    updates[param] = param - memory
                    updates[
                        memory] = momentum * memory + learning_rate * gparam
                elif use_nesterov:
                    memory = theano.shared(param.get_value() * 0.)
                    new_memo = momentum * memory - learning_rate * gparam
                    updates[memory] = new_memo
                    updates[
                        param] = param + momentum * new_memo - learning_rate * gparam
                else:
                    updates[param] = param - learning_rate * gparam

        if max_col_norm is not None:
            updates = self.constrain_weights(layers, updates, max_col_norm)

        self.updates = updates
        self._train = theano.function(inputs, outputs=cost, updates=updates)
        self._constrain_inputs = theano.function(inputs,
                                                 outputs=T.argsort(
                                                     pooling_loss, axis=0))
예제 #48
0
        test_layer0_output = conv_layer.predict(test_layer0_input, test_size)
        test_pred_layers.append(test_layer0_output.flatten(2))
    test_layer1_input = T.concatenate(test_pred_layers, 1)
    test_y_pred = classifier.predict(test_layer1_input)
    test_error = T.mean(T.neq(test_y_pred, y))
    test_model_all = theano.function([x, y],
                                     test_error,
                                     allow_input_downcast=True)
    test_predict = theano.function([x], test_y_pred, allow_input_downcast=True)
    #test_probs = theano.function([x], test_y_pred_p_reduce, allow_input_downcast=True)

    #gradient-based update
    dinput = T.grad(dropout_cost, layer0_input)
    din_onehot = dinput.dot(W.transpose())
    all_din1_indextemp = T.max(din_onehot, axis=3)
    all_din1_index = T.argsort(all_din1_indextemp, axis=2)
    Fall_din1_index = theano.function(
        [index],
        all_din1_index,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        },
        allow_input_downcast=True)

    all_din2_index = T.argsort(din_onehot, axis=3)
    Fall_din2_index = theano.function(
        [index],
        all_din2_index,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
예제 #49
0
    def step(x_t, M_tm1, c_tm1, h_tm1, r_tm1, wr_tm1, wu_tm1):
        # Feed Forward controller
        # h_t = lasagne.nonlinearities.tanh(T.dot(x_t, W_h) + b_h)
        # LSTM controller
        # p.3: "This memory is used by the controller as the input to a classifier,
        #       such as a softmax output layer, and as an additional
        #       input for the next controller state." -> T.dot(r_tm1, W_rh)
        preactivations = T.dot(x_t, W_xh) + T.dot(r_tm1, W_rh) + T.dot(
            h_tm1, W_hh) + b_h
        gf_, gi_, go_, u_ = slice_equally(preactivations, controller_size, 4)
        gf = lasagne.nonlinearities.sigmoid(gf_)
        gi = lasagne.nonlinearities.sigmoid(gi_)
        go = lasagne.nonlinearities.sigmoid(go_)
        u = lasagne.nonlinearities.tanh(u_)

        c_t = gf * c_tm1 + gi * u
        h_t = go * lasagne.nonlinearities.tanh(c_t)  # (batch_size, num_units)

        k_t = lasagne.nonlinearities.tanh(
            T.dot(h_t, W_key) +
            b_key)  # (batch_size, nb_reads, memory_size[1])
        a_t = lasagne.nonlinearities.tanh(
            T.dot(h_t, W_add) +
            b_add)  # (batch_size, nb_reads, memory_size[1])
        sigma_t = lasagne.nonlinearities.sigmoid(
            T.dot(h_t, W_sigma) + b_sigma)  # (batch_size, nb_reads, 1)
        sigma_t = T.addbroadcast(sigma_t, 2)

        wlu_tm1 = T.argsort(wu_tm1,
                            axis=1)[:, :nb_reads]  # (batch_size, nb_reads)
        # ww_t = sigma_t * wr_tm1 + (1. - sigma_t) * wlu_tm1
        ww_t = (sigma_t * wr_tm1).reshape(
            (batch_size * nb_reads, memory_shape[0]))
        ww_t = T.inc_subtensor(
            ww_t[T.arange(batch_size * nb_reads),
                 wlu_tm1.flatten()],
            1. - sigma_t.flatten())  # (batch_size * nb_reads, memory_size[0])
        ww_t = ww_t.reshape(
            (batch_size, nb_reads,
             memory_shape[0]))  # (batch_size, nb_reads, memory_size[0])

        # p.4: "Prior to writing to memory, the least used memory location is
        #       computed from wu_tm1 and is set to zero"
        M_t = T.set_subtensor(M_tm1[T.arange(batch_size), wlu_tm1[:, 0]], 0.)
        M_t = M_t + T.batched_dot(ww_t.dimshuffle(
            0, 2, 1), a_t)  # (batch_size, memory_size[0], memory_size[1])
        K_t = cosine_similarity(k_t,
                                M_t)  # (batch_size, nb_reads, memory_size[0])

        wr_t = lasagne.nonlinearities.softmax(
            K_t.reshape((batch_size * nb_reads, memory_shape[0])))
        wr_t = wr_t.reshape(
            (batch_size, nb_reads,
             memory_shape[0]))  # (batch_size, nb_reads, memory_size[0])
        if batch_size == 1:
            wr_t = T.unbroadcast(wr_t, 0)
        wu_t = gamma * wu_tm1 + T.sum(wr_t, axis=1) + T.sum(
            ww_t, axis=1)  # (batch_size, memory_size[0])

        r_t = T.batched_dot(wr_t, M_t).flatten(
            ndim=2)  # (batch_size, nb_reads * memory_size[1])

        return (M_t, c_t, h_t, r_t, wr_t, wu_t)
예제 #50
0
    def merge_inc_func(self, learning_rate, batch_size, prealloc_x,
                       prealloc_y):
        ''' Return a function that can merge/increment the model '''
        # matrix for scoring merges
        m = T.matrix('m')
        m_dists, _ = theano.map(lambda v: T.sqrt(T.dot(v, v.T)), m)
        m_cosine = (T.dot(m, m.T) / m_dists) / m_dists.dimshuffle(0, 'x')
        m_ranks = T.argsort(
            (m_cosine - T.tri(m.shape[0]) * np.finfo(theano.config.floatX).max
             ).flatten())[(m.shape[0] * (m.shape[0] + 1)) // 2:]

        score_merges = theano.function([m], m_ranks)

        # greedy layerwise training
        layer_greedy = [
            ae.indexed_train_func(
                0,
                learning_rate,
                prealloc_x,
                batch_size,
                lambda x, j=i: chained_output(self.layers[:j], x))
            for i, ae in enumerate(self._layered_autoencoders)
        ]
        finetune = self._autoencoder.train_func(0, learning_rate, prealloc_x,
                                                prealloc_y, batch_size)
        combined_objective_tune = self._combined_objective.train_func(
            0, learning_rate, prealloc_x, prealloc_y, batch_size)

        # set up layered merge-increment - build a cost function
        mi_cost = self._softmax.cost + self.lam * self._autoencoder.cost
        mi_updates = []

        for i, nnlayer in enumerate(self._autoencoder.layers):
            if i == 0:
                mi_updates += [
                    (nnlayer.W,
                     T.inc_subtensor(
                         nnlayer.W[:, nnlayer.idx], -learning_rate *
                         T.grad(mi_cost, nnlayer.W)[:, nnlayer.idx].T))
                ]
                mi_updates += [(nnlayer.b,
                                T.inc_subtensor(
                                    nnlayer.b[nnlayer.idx], -learning_rate *
                                    T.grad(mi_cost, nnlayer.b)[nnlayer.idx]))]
            else:
                mi_updates += [
                    (nnlayer.W,
                     nnlayer.W - learning_rate * T.grad(mi_cost, nnlayer.W))
                ]
                mi_updates += [
                    (nnlayer.b,
                     nnlayer.b - learning_rate * T.grad(mi_cost, nnlayer.b))
                ]

            mi_updates += [(nnlayer.b_prime,
                            -learning_rate * T.grad(mi_cost, nnlayer.b_prime))]

        softmax_theta = [self.layers[-1].W, self.layers[-1].b]

        mi_updates += [(param, param - learning_rate * grad)
                       for param, grad in zip(softmax_theta,
                                              T.grad(mi_cost, softmax_theta))]

        idx = T.iscalar('idx')
        given = {
            self._x: prealloc_x[idx * batch_size:(idx + 1) * batch_size],
            self._y: prealloc_y[idx * batch_size:(idx + 1) * batch_size]
        }
        mi_train = theano.function([idx, self.layers[0].idx],
                                   None,
                                   updates=mi_updates,
                                   givens=given)

        def merge_model(pool_indexes, merge_percentage, inc_percentage):
            ''' Merge/increment the model using the given batch '''
            prev_map = {}
            prev_dimensions = self.layers[0].initial_size[0]

            # first layer
            used = set()
            empty_slots = []
            layer_weights = self.layers[0].W.get_value().T.copy()
            layer_bias = self.layers[0].b.get_value().copy()

            init = 4 * np.sqrt(6.0 / (sum(layer_weights.shape)))

            merge_count = int(merge_percentage * layer_weights.shape[0])
            inc_count = int(inc_percentage * layer_weights.shape[0])

            if merge_count == 0 and inc_count == 0:
                return

            for index in score_merges(layer_weights):
                if len(empty_slots) == merge_count:
                    break

                x_i, y_i = index % layer_weights.shape[
                    0], index // layer_weights.shape[0]

                if x_i not in used and y_i not in used:
                    # merge x_i with y_i
                    layer_weights[x_i] = (layer_weights[x_i] +
                                          layer_weights[y_i]) / 2
                    layer_bias[x_i] = (layer_bias[x_i] + layer_bias[y_i]) / 2

                    used.update([x_i, y_i])
                    empty_slots.append(y_i)

            new_size = layer_weights.shape[0] + inc_count - len(empty_slots)
            current_size = layer_weights.shape[0]

            # compact weights array if neccessary
            if new_size < current_size:
                non_empty_slots = sorted(
                    list(set(range(0, current_size)) - set(empty_slots)),
                    reverse=True)[:len(empty_slots)]
                prev_map = dict(zip(empty_slots, non_empty_slots))

                # compact the layer weights by removing the empty slots
                for dest, src in prev_map.items():
                    layer_weights[dest] = layer_weights[src]
                    layer_weights[src] = np.asarray(self.rng.uniform(
                        low=-init, high=init, size=layer_weights.shape[1]),
                                                    dtype=theano.config.floatX)

                empty_slots = []
            else:
                prev_map = {}

            # will need to add more space for new features
            new_layer_weights = np.zeros((new_size, prev_dimensions),
                                         dtype=theano.config.floatX)
            new_layer_weights[:layer_weights.shape[0], :layer_weights.shape[
                1]] = layer_weights[:new_layer_weights.
                                    shape[0], :new_layer_weights.shape[1]]

            # randomly initalise new neurons
            empty_slots = [slot for slot in empty_slots if slot < new_size
                           ] + list(range(layer_weights.shape[0], new_size))
            new_layer_weights[empty_slots] = np.asarray(
                self.rng.uniform(low=-init,
                                 high=init,
                                 size=(len(empty_slots), prev_dimensions)),
                dtype=theano.config.floatX)

            layer_bias.resize(new_size)

            layer_bias_prime = self.layers[0].b_prime.get_value().copy()
            layer_bias_prime.resize(prev_dimensions)

            prev_dimensions = new_layer_weights.shape[0]

            # set the new data
            self.layers[0].W.set_value(new_layer_weights.T)
            self.layers[0].b.set_value(layer_bias)
            self.layers[0].b_prime.set_value(layer_bias_prime)

            #if empty_slots:
            ## train this layer
            #for _ in range(self.iterations):
            #for i in pool_indexes:
            #layer_greedy[0](i, empty_slots)

            # update the last layer's weight matrix size
            last_layer_weights = self.layers[1].W.get_value().copy()

            # apply mapping to last layer
            for dest, src in prev_map.items():
                last_layer_weights[dest] = last_layer_weights[src]
                last_layer_weights[src] = np.zeros(last_layer_weights.shape[1])

            # fix sizes
            last_layer_weights.resize(
                (prev_dimensions, self.layers[1].initial_size[1]))
            last_layer_prime = self.layers[1].b_prime.get_value().copy()
            last_layer_prime.resize(prev_dimensions)

            self.layers[1].W.set_value(last_layer_weights)
            self.layers[1].b_prime.set_value(last_layer_prime)

            # finetune with the deep autoencoder
            for _ in range(self.iterations):
                for i in pool_indexes:
                    finetune(i)

            # finetune with supervised
            if empty_slots:
                for _ in range(self.iterations):
                    for i in pool_indexes:
                        mi_train(i, empty_slots)
            else:
                for i in pool_indexes:
                    combined_objective_tune(i)

        return merge_model
예제 #51
0
 def preparePooling(self, conv_out):
   neighborsForPooling = TSN.images2neibs(ten4=conv_out, neib_shape=(1,conv_out.shape[3]), mode='ignore_borders')
   self.neighbors = neighborsForPooling
   neighborsArgSorted = T.argsort(neighborsForPooling, axis=1)
   neighborsArgSorted = neighborsArgSorted
   return neighborsForPooling, neighborsArgSorted
예제 #52
0
def get_scores_all(mdl, fm, X, X_new, num_select=10):
    #f = open(output, "w")
    X1 = []

    # x = X[i], we add new values to the end of x

    for j in range(len(X_new)):

        for k in range(len(X_new[j])):
            xx = [xxx for xxx in X[j]]

            xx.append(X_new[j][k][0] + "_" + X_new[j][k][1])
            X1.append(xx)

    X = [[fm.f.map[fm.f.getFeatureValue(x)] + 1 for x in XX] for XX in X1]

    x, x_mask = preprare_seq_seq_data(X)

    x, _, mask_x, _, _, _, _, _ = mdl.standardize_data(x, None, x_mask, None,
                                                       None, None, None, None)

    score_pos = mdl.get_output_layer(-1, x, mask_x)
    score_pos = score_pos.swapaxes(0, 1)
    score_pos = score_pos[:, -1]
    print(score_pos)
    x = T.matrix("score")

    sort_f = th.function([x], T.argsort(x))

    sorted_values = sort_f(score_pos)
    sorted_values = sorted_values
    print(sorted_values)
    rs = []
    rs_scores = []
    my_scores = []
    for i in range(sorted_values.shape[0]):
        #f.write(to_string(X1[i]) + " ")
        ss = []
        for j in range(1, sorted_values.shape[1] + 1):
            val = sorted_values[i][sorted_values.shape[1] - j]

            #val_map = fm.fY.map_inversed[val-1]
            score = score_pos[i][val]
            #f.write(str(val) + ":" + str(score) + " ")
            ss.append((val, score))
        #f.write("\n")
        my_scores.append((to_string(X1[i]), ss))

        vals = []
        c = 0
        for t in range(sorted_values.shape[1] - 1, -1, -1):
            if c == num_select:
                break
            v = sorted_values[i][t]

            if fm.fY.map_inversed[v - 1] != "EOS":
                vals.append(v)
                c += 1
        #vals = sorted_values[i][sorted_values.shape[1]-num_select:sorted_values.shape[1]]

        val_maps = [fm.fY.map_inversed[v - 1].split("_")
                    for v in list(vals)]  #if  fm.fY.map_inversed[v-1]!="EOS" ]
        scores = [score_pos[i][v]
                  for v in list(vals)]  # if fm.fY.map_inversed[v-1]!="EOS"]
        rs.append(val_maps)
        rs_scores.append(scores)

    return rs, rs_scores, X1, my_scores
예제 #53
0
def all_same(idxs):
    first_row = idxs[0, :].reshape((1, idxs.shape[1]))
    first_row = T.argsort(first_row)
    return first_row.repeat(idxs.shape[0], axis=0)
예제 #54
0
파일: dnc.py 프로젝트: xuqy1981/dnc-theano
    def dnc_step(
        s_x_,
        s_lstm_cell_,
        s_lstm_hid_,
        s_usage_,
        s_preced_,
        s_link_,
        s_mem_,
        s_read_val_,
        s_read_wgt_,
        s_write_wgt_):
        s_states_li_ = [
            s_lstm_cell_,
            s_lstm_hid_,
            s_usage_,
            s_preced_,
            s_link_,
            s_mem_,
            s_read_val_,
            s_read_wgt_,
            s_write_wgt_]
        s_inp = T.join(-1, s_x_, s_read_val_.flatten())

        s_lstm_cell_tp1, s_lstm_hid_tp1 = lyr.lyr_lstm(
            'ctrl',
            s_inp, s_lstm_cell_, s_lstm_hid_,
            ctrl_inp_size, ctrl_wm_size
        )
        s_out, s_itrface = T.split(
            lyr.lyr_linear(
                'ctrl_out', s_lstm_hid_tp1, ctrl_wm_size, ctrl_wm_size, bias_=None),
            [OUT_DIMS,itrface_size],2, axis=-1)
        splits_len = [
            N_READS*CELL_SIZE, N_READS, CELL_SIZE, 1,
            CELL_SIZE, CELL_SIZE, N_READS, 1, 1, 3*N_READS
        ]
        s_keyr, s_strr, s_keyw, s_strw, \
            s_ers, s_write, s_freeg, s_allocg, s_writeg, s_rmode = \
            T.split(s_itrface, splits_len, 10, axis=-1)

        s_keyr = T.reshape(s_keyr, (CELL_SIZE,N_READS))
        s_strr = 1.+T.nnet.softplus(s_strr)
        s_strw = 1.+T.nnet.softplus(s_strw[0])
        s_ers = T.nnet.sigmoid(s_ers)
        s_freeg = T.nnet.sigmoid(s_freeg)
        s_allocg = T.nnet.sigmoid(s_allocg[0])
        s_writeg = T.nnet.sigmoid(s_writeg[0])
        s_rmode = T.nnet.softmax(T.reshape(s_rmode,(N_READS,3))).dimshuffle(1,0,'x')

        s_mem_retention = T.prod(
            1.-s_freeg.dimshuffle(0,'x')*s_read_wgt_, axis=0)

        s_usage_tp1 = s_mem_retention*(
            s_usage_+s_write_wgt_-s_usage_*s_write_wgt_)
        s_usage_order = T.argsort(s_usage_tp1)
        s_usage_order_inv = T.inverse_permutation(s_usage_order)
        s_usage_tp1_sorted = s_usage_tp1[s_usage_order]

        s_alloc_wgt = ((1.-s_usage_tp1_sorted)*(
            T.join(
                0,np.array([1.],dtype=th.config.floatX),
                op_cumprod_hack(s_usage_tp1_sorted[:-1])
            )))[s_usage_order_inv]

        s_content_wgt_w = T.nnet.softmax(
            s_strw*T.dot(s_mem_, s_keyw)/(
                T.sqrt(
                    EPS+T.sum(T.sqr(s_mem_),axis=-1)*T.sum(T.sqr(s_keyw))))
        ).flatten()

        s_write_wgt_tp1 = s_writeg*(
            s_allocg*s_alloc_wgt+(1.-s_allocg)*s_content_wgt_w)

        s_mem_tp1 = s_mem_*(
            1.-T.outer(s_write_wgt_tp1,s_ers))+T.outer(s_write_wgt_tp1,s_write)
        s_preced_tp1 = (1.-T.sum(s_write_wgt_))*s_preced_ + s_write_wgt_tp1

        s_link_tp1 = (
            1.-s_write_wgt_tp1-s_write_wgt_tp1.dimshuffle(0,'x')
        )*s_link_ + T.outer(s_write_wgt_tp1,s_preced_)
        s_link_tp1 = s_link_tp1 * (1.-T.identity_like(s_link_tp1))#X
        s_fwd = T.dot(s_read_wgt_, s_link_tp1.transpose())#X
        s_bwd = T.dot(s_read_wgt_, s_link_tp1)#X

        s_content_wgt_r= T.nnet.softmax(T.dot(s_mem_tp1, s_keyr)/(T.sqrt(
            EPS+T.outer(
                T.sum(T.sqr(s_mem_tp1),axis=-1),T.sum(T.sqr(s_keyr),axis=0)
            )))).transpose()
        s_read_wgt_tp1 = s_bwd*s_rmode[0]+s_content_wgt_r*s_rmode[1]+s_fwd*s_rmode[2]
        s_read_val_tp1 = T.dot(s_read_wgt_tp1, s_mem_tp1)

        s_y = s_out + lyr.lyr_linear(
            'read_out',
            s_read_val_tp1.flatten(),
            CELL_SIZE*N_READS,OUT_DIMS,
            bias_=None)
        return [
            s_y,
            s_lstm_cell_tp1,
            s_lstm_hid_tp1,
            s_usage_tp1,
            s_preced_tp1,
            s_link_tp1,
            s_mem_tp1,
            s_read_val_tp1,
            s_read_wgt_tp1,
            s_write_wgt_tp1]
예제 #55
0
파일: dibr.py 프로젝트: pluohust/dibr-rvs
resultOrigAndWarp = T.set_subtensor(r2[(r2 < 0.0001).nonzero()], 999999)
resultOrigAndWarp = T.concatenate((c[:, 0:2], resultOrigAndWarp), axis=1)
_imageWarp2GPU = theano.function([pix, KRT, KRinv, KR2, KRT2, adjust],
                                 resultOrigAndWarp,
                                 on_unused_input='ignore')

#theano.printing.debugprint(_imageWarp2GPU)

r3 = r2
rd = r3[:, 2]
rxf = r3[:, 0] / rd
ryf = r3[:, 1] / rd
rx = T.cast(rxf, 'int32')
ry = T.cast(ryf, 'int32')
#rxy = T.transpose(T.as_tensor_variable([rx,ry]))
i = T.argsort(-rd)
rx = rx[i]
ry = ry[i]
rd = rd[i]
c = c[i]
p = p[i]
r2 = r2[i]
dest_img2 = T.set_subtensor(dest_img[rx, ry], p[:, 0])
dest_img = T.set_subtensor(dest_img[rx, ry], rd)
_imageWarp2GPUFilled2 = theano.function([pix, KRT, KRinv, KR2, KRT2, adjust],
                                        T.transpose(dest_img2))

_imageWarp2GPUFilled = theano.function([pix, KRT, KRinv, KR2, KRT2, adjust],
                                       T.transpose(dest_img))

# interpolate each pixel with the color values at (x,y),(x,y+1),(x+1,y),(x+1,y+1)
예제 #56
0
def kmax(masked_data):
    result = masked_data[
        T.arange(masked_data.shape[0]).dimshuffle(0, "x", "x"),
        T.sort(T.argsort(masked_data, axis=1)[:, -pooling_size:, :], axis=1),
        T.arange(masked_data.shape[2]).dimshuffle("x", "x", 0)]
    return result
예제 #57
0
    def get_output(self, input_, label, mask=None):
        """
        This function overrides the parents' one.
        Computes the loss by model input_ion and real label.

        Parameters
        ----------
        input_: TensorVariable
            an array of (batch size, input_ion).
            for accuracy task, "input_" is 2D matrix.
        label: TensorVariable
            an array of (batch size, answer) or (batchsize,) if label is a list of class labels.
            for classification, highly recommend second one.
            should make label as integer.
        mask: TensorVariable
            an array of (batchsize,) only contains 0 and 1.
            loss are summed or averaged only through 1.

        Returns
        -------
        TensorVariable
            a symbolic tensor variable which is scalar.
        """
        # do
        if mask is None:
            if self.top_k == 1:
                if label.ndim == 1:
                    return T.mean(T.eq(T.argmax(input_, axis=-1), label))
                elif label.ndim == 2:
                    return T.mean(
                        T.eq(T.argmax(input_, axis=-1), T.argmax(label,
                                                                 axis=-1)))
                else:
                    raise ValueError()
            else:
                # TODO: not yet tested
                top_k_input_ = T.argsort(
                    input_
                )[:, -self.top_k:]  # sort by values and keep top k indices
                if label.ndim == 1:
                    return T.mean(T.any(T.eq(top_k_input_, label), axis=-1))
                elif label.ndim == 2:
                    return T.mean(
                        T.any(T.eq(top_k_input_, T.argmax(label, axis=-1)),
                              axis=-1))
                raise ValueError()
        else:
            if self.top_k == 1:
                if label.ndim == 1:
                    return T.sum(
                        T.eq(T.argmax(input_, axis=-1), label) *
                        mask) / T.sum(mask)
                elif label.ndim == 2:
                    return T.sum(
                        T.eq(T.argmax(input_, axis=-1), T.argmax(
                            label, axis=-1)) * mask) / T.sum(mask)
                else:
                    raise ValueError()
            else:
                # TODO: not yet tested
                top_k_input_ = T.argsort(
                    input_
                )[:, -self.top_k:]  # sort by values and keep top k indices
                if label.ndim == 1:
                    return T.sum(
                        T.any(T.eq(top_k_input_, label), axis=-1) *
                        mask) / T.sum(mask)
                elif label.ndim == 2:
                    return T.sum(
                        T.any(T.eq(top_k_input_, T.argmax(label, axis=-1)),
                              axis=-1) * mask) / T.sum(mask)
                raise ValueError()
예제 #58
0
 def getKmaxIndices(self, weights, k):
     maxIndices = T.argsort(weights, axis=2)[:, :, -k:]
     maxIndicesSorted = T.sort(maxIndices, axis=2)
     ii = T.repeat(T.arange(self.batchsize), k)
     jj = maxIndicesSorted.flatten()
     return ii, jj
예제 #59
0
        def _stepP(x_, h_, c_, lP_, dV_, xAux):
            preact = tensor.dot(sliceT(h_, 0, h_sz),
                                tparams[_p(prefix, 'W_hid')])
            preact += (tensor.dot(x_, tparams[_p(prefix, 'W_inp')]) +
                       tparams[_p(prefix, 'b')])
            if options.get('en_aux_inp', 0):
                preact += tensor.dot(xAux, tparams[_p(prefix, 'W_aux')])

            hL = [[]] * h_depth
            cL = [[]] * h_depth
            outp = [[]] * h_depth
            for di in xrange(h_depth):
                i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz))
                f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz))
                o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz))
                cL[di] = tensor.tanh(sliceT(preact, 3, h_sz))
                cL[di] = f * sliceT(c_, di, h_sz) + i * cL[di]
                hL[di] = o * tensor.tanh(cL[di])
                outp[di] = hL[di]
                if options.get('en_residual_conn', 1):
                    if (di > 0):
                        outp[di] += outp[di - 1]
                        print "Connecting residual at %d" % (di)
                if di < (h_depth - 1):
                    preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \
                            tensor.dot(outp[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))])

            c = tensor.concatenate(cL, axis=1)
            h = tensor.concatenate(hL, axis=1)

            if options.get('class_out_factoring', 0) == 1:
                if options.get('cls_diff_layer', 0) == 1:
                    pC = tensor.dot(hL[-2],
                                    tparams['WdCls']) + tparams['bdCls']
                else:
                    pC = tensor.dot(outp[-1],
                                    tparams['WdCls']) + tparams['bdCls']

                pCSft = tensor.nnet.softmax(pC)
                xCIdx = tensor.argmax(pCSft, axis=-1)
                #pW = tensor.dot(outp[-1],tparams['Wd'][:,xCIdx,:]) + tparams['bd'][:,xCIdx,:]
                #smooth_factor = tensor.as_tensor_variable(numpy_floatX(options.get('softmax_smooth_factor',1.0)), name='sm_f')
                #pWSft = tensor.nnet.softmax(pW*smooth_factor)
                #lProb = tensor.log(pWSft + 1e-20) + tensor.log(pCSft[0,xCIdx] + 1e-20)
                #########################################################
                # pW is now of size (beam_size, n_classes, class_size)
                if options.get('cls_zmean', 0):
                    pW = tensor.dot(
                        (outp[-1] - tparams['WdCls'][:, xCIdx].T),
                        tparams['Wd'].swapaxes(0, 1)) + tparams['bd'][0, :, :]
                else:
                    pW = tensor.dot((outp[-1]), tparams['Wd'].swapaxes(
                        0, 1)) + tparams['bd'][0, :, :]
                #smooth_factor = tensor.as_tensor_variable(numpy_floatX(options.get('softmax_smooth_factor',1.0)), name='sm_f')
                pWSft = tensor.nnet.softmax(
                    pW.reshape([pW.shape[0] * pW.shape[1],
                                pW.shape[2]])).reshape(
                                    [pW.shape[0], pW.shape[1] * pW.shape[2]])
                ixtoclsinfo_t = tensor.as_tensor_variable(self.clsinfo)
                lProb = tensor.log(pWSft[:,ixtoclsinfo_t[:,0]*tparams['Wd'].shape[2]+ixtoclsinfo_t[:,3]] + 1e-20) + \
                        tensor.log(pCSft[0,ixtoclsinfo_t[:,0]] + 1e-20)
            else:
                p = tensor.dot(outp[-1], tparams['Wd']) + tparams['bd']
                smooth_factor = tensor.as_tensor_variable(numpy_floatX(
                    options.get('softmax_smooth_factor', 1.0)),
                                                          name='sm_f')
                p = tensor.nnet.softmax(p * smooth_factor)
                lProb = tensor.log(p + 1e-20)
                if per_word_logweight is not None:
                    log_w = theano.shared(
                        per_word_logweight)  #, dtype= theano.config.floatX)
                    lProb = log_w + lProb

            if beam_size > 1:

                def _FindB_best(lPLcl, lPprev, dVLcl):
                    srtLcl = tensor.argsort(-lPLcl)
                    srtLcl = srtLcl[:beam_size]
                    deltaVec = tensor.fill(lPLcl[srtLcl],
                                           numpy_floatX(-10000.))
                    deltaVec = tensor.set_subtensor(deltaVec[0], lPprev)
                    lProbBest = ifelse(
                        tensor.eq(dVLcl, tensor.zeros_like(dVLcl)),
                        lPLcl[srtLcl] + lPprev, deltaVec)
                    xWIdxBest = ifelse(
                        tensor.eq(dVLcl, tensor.zeros_like(dVLcl)), srtLcl,
                        tensor.zeros_like(srtLcl))
                    return lProbBest, xWIdxBest

                rvalLcl, updatesLcl = theano.scan(_FindB_best,
                                                  sequences=[lProb, lP_, dV_],
                                                  name=_p(prefix, 'FindBest'),
                                                  n_steps=x_.shape[0])
                xWIdxBest = rvalLcl[1]
                lProbBest = rvalLcl[0]

                xWIdxBest = xWIdxBest.flatten()
                lProb = lProbBest.flatten()
                # Now sort and find the best among these best extensions for the current beams
                srtIdx = tensor.argsort(-lProb)
                srtIdx = srtIdx[:beam_size]
                xCandIdx = srtIdx // beam_size  # Floor division
                h = h.take(xCandIdx.flatten(), axis=0)
                c = c.take(xCandIdx.flatten(), axis=0)
                xWlogProb = lProb[srtIdx]
                xWIdx = xWIdxBest[srtIdx]
                if options.get('class_out_factoring', 0) == 1:
                    clsoffset = tensor.as_tensor_variable(self.clsOffset)
            else:
                xCandIdx = tensor.as_tensor_variable([0])
                lProb = lProb.flatten()
                xWIdx = tensor.argmax(lProb, keepdims=True)
                xWlogProb = lProb[xWIdx] + lP_
                #if options.get('class_out_factoring',0) == 1:
                #    clsoffset = tensor.as_tensor_variable(self.clsOffset)
                #    xWIdx += clsoffset[xCIdx]
                h = h.take(xCandIdx.flatten(), axis=0)
                c = c.take(xCandIdx.flatten(), axis=0)

            if options.get('softmax_propogate', 0) == 0:
                xW = tparams['Wemb'][xWIdx.flatten()]
            else:
                xW = p.dot(tparams['Wemb'])
            doneVec = tensor.eq(xWIdx, tensor.zeros_like(xWIdx))

            return [xW, h, c, xWlogProb, doneVec, xWIdx,
                    xCandIdx], theano.scan_module.until(doneVec.all())
예제 #60
0
import theano
import theano.tensor as T
from theano.tensor.nnet.conv import conv2d
from theano.tensor.signal import downsample
import numpy as np
from layers import *
import cPickle, gzip, numpy

x = T.tensor4()

yinds = T.argsort(x, axis=3)
sliced = x
func = theano.function([x], yinds)

X = np.random.random((2, 2, 3, 4))

print X
print func(X)