def keep_max(input, theta, k, sent_mask):
    sig_input = T.nnet.sigmoid(T.dot(input, theta))
    sent_mask = sent_mask.dimshuffle(0, 'x', 1, 'x')
    sig_input = sig_input * sent_mask
    #sig_input = T.dot(input, theta)
    if k == 0:
        result = input * T.addbroadcast(sig_input, 3)
        return result, sig_input

    # get the sorted idx
    sort_idx = T.argsort(sig_input, axis=2)
    k_max_ids = sort_idx[:,:,-k:,:]
    dim0, dim1, dim2, dim3 = k_max_ids.shape
    batchids = T.repeat(T.arange(dim0), dim1*dim2*dim3)
    mapids = T.repeat(T.arange(dim1), dim2*dim3).reshape((1, dim2*dim3))
    mapids = T.repeat(mapids, dim0, axis=0).flatten()
    rowids = k_max_ids.flatten()
    colids = T.arange(dim3).reshape((1, dim3))
    colids = T.repeat(colids, dim0*dim1*dim2, axis=0).flatten()
    sig_mask = T.zeros_like(sig_input)
    choosed = sig_input[batchids, mapids, rowids, colids]
    sig_mask = T.set_subtensor(sig_mask[batchids, mapids, rowids, colids], 1)
    input_mask = sig_mask * sig_input
    result = input * T.addbroadcast(input_mask, 3)
    return result, sig_input
    def fprop(self, x, mode='train'):
        if mode == 'test':
            # this is for use during test/validation time
            x_avg = self.params.getParameter('x_avg')
        elif mode == 'calculate':
            x_avg = x.mean(self.norm_axis, keepdims=True)
        elif mode == 'train':
            # otherwise calculate the batch mean and std
            x_avg = x.mean(self.norm_axis, keepdims=True)
            
            # the following trick is learend from lasagne implementation
            running_mean = theano.clone(self.params.getParameter('x_avg'), share_inputs=False)
            
            running_mean_udpate = ((1 - self.alpha) * running_mean
                                    +self.alpha * x_avg)

 
            # set a default update for them
            running_mean.default_update = running_mean_udpate
 
            x_avg += 0 * running_mean
        else:
            raise "mode can only take ['train', 'test', 'calculate']"
        
        self.x_avg = x_avg
        x_avg = T.addbroadcast(x_avg, *self.norm_axis)
        beta = T.addbroadcast(self.params.getParameter('beta'), *self.norm_axis)
        
        bn_x = x / (x_avg + 1e-18) * beta
        return bn_x if self.actFunc is None else self.actFunc(bn_x)


# End BatchExpNormLayer
#-------------------------------------------------------------------------------
Exemple #3
0
def local_contrast_normalize(X, window, img_shape):
    """Return normalized X and the convolution transform
    """
    batchsize, channels, R, C = img_shape
    assert window.shape[0] == 1
    assert window.shape[1] == channels
    N = window.shape[2]
    assert window.shape[3] == N
    blur = tlinear.Conv2d(
            filters=sharedX(window, 'LCN_window'),
            img_shape=img_shape,
            border_mode='full')
    N2 = N//2
    # remove global mean
    X = X - X.mean(axis=[1, 2, 3]).dimshuffle(0, 'x', 'x', 'x')

    #remove local mean
    blurred_x = tensor.addbroadcast(blur.lmul(X), 1)
    x2c = X - blurred_x[:, :, N2:R + N2, N2:C + N2]

    # standardize contrast
    blurred_x2c_sqr = tensor.addbroadcast(blur.lmul(x2c ** 2), 1)
    x2c_lcn =  x2c / tensor.sqrt((10 + blurred_x2c_sqr[:, :, N2:R + N2, N2:C + N2]))

    return x2c_lcn, blur
def keep_max(input, theta, k):
    """
    :type input: theano.tensor.tensor4
    :param input: the input data
                
    :type theta: theano.tensor.matrix
    :param theta: the parameter for sigmoid function
                            
    :type k: int 
    :param k: the number k used to define top k sentence to remain
    """
    sig_input = T.nnet.sigmoid(T.dot(input, theta))
    if k == 0: # using all the sentences
        result = input * T.addbroadcast(sig_input, 3)
        return result, sig_input

    # get the sorted idx
    sort_idx = T.argsort(sig_input, axis=2)
    k_max_ids = sort_idx[:,:,-k:,:]
    dim0, dim1, dim2, dim3 = k_max_ids.shape
    batchids = T.repeat(T.arange(dim0), dim1*dim2*dim3)
    mapids = T.repeat(T.arange(dim1), dim2*dim3).reshape((1, dim2*dim3))
    mapids = T.repeat(mapids, dim0, axis=0).flatten()
    rowids = k_max_ids.flatten()
    colids = T.arange(dim3).reshape((1, dim3))
    colids = T.repeat(colids, dim0*dim1*dim2, axis=0).flatten()
    # construct masked data
    sig_mask = T.zeros_like(sig_input)
    choosed = sig_input[batchids, mapids, rowids, colids]
    sig_mask = T.set_subtensor(sig_mask[batchids, mapids, rowids, colids], 1)

    input_mask = sig_mask * sig_input
    result = input * T.addbroadcast(input_mask, 3)
    return result, sig_input
 def get_output_for(self, input, deterministic=False, **kwargs):
     if deterministic:
         # use stored mean and std
         mean = self.mean
         std = self.std
     else:
         # use this batch's mean and std
         mean = input.mean(self.axes, keepdims=True)
         std = input.std(self.axes, keepdims=True)
         # and update the stored mean and std:
         # we create (memory-aliased) clones of the stored mean and std
         running_mean = theano.clone(self.mean, share_inputs=False)
         running_std = theano.clone(self.std, share_inputs=False)
         # set a default update for them
         running_mean.default_update = (1 - self.alpha) * running_mean + self.alpha * mean
         running_std.default_update = (1 - self.alpha) * running_std + self.alpha * std
         # and include them in the graph so their default updates will be
         # applied (although the expressions will be optimized away later)
         mean += 0 * running_mean
         std += 0 * running_std
     std += self.epsilon
     mean = T.addbroadcast(mean, *self.axes)
     std = T.addbroadcast(std, *self.axes)
     beta = T.addbroadcast(self.beta, *self.axes)
     gamma = T.addbroadcast(self.gamma, *self.axes)
     normalized = (input - mean) * (gamma / std) + beta
     return self.nonlinearity(normalized)
Exemple #6
0
    def _ct(self, other):
        ''' Helper function to make tensors dimensions compatible'''
        if (other.var_set == self.var_set):
            return (self.pt_tensor, other.pt_tensor)
        union_var_set = other.scope.union(self.scope)
        vidx1 = frozenset(self.var_indices)
        vidx2 = frozenset(other.var_indices)
        union_indices = vidx1.union(vidx2)

        shape1 = []
        shape2 = []
        b1 = []
        b2 = []
        u1 = []
        u2 = []

        for i,vidx in enumerate(sorted(union_indices)):
            if (vidx in vidx1):
                shape1.append(self.discrete_pgm.cardinalities[vidx])
                u1.append(i)
            else:
                shape1.append(1)
                b1.append(i)
            if (vidx in vidx2):
                shape2.append(self.discrete_pgm.cardinalities[vidx])
                u2.append(i)
            else:
                shape2.append(1)
                b2.append(i)
        t1 = T.addbroadcast(T.unbroadcast(self.pt_tensor.reshape(shape1, len(shape1)), *u1), *b1)
        t2 = T.addbroadcast(T.unbroadcast(other.pt_tensor.reshape(shape2, len(shape2)), *u2), *b2)
        return (t1, t2)
Exemple #7
0
    def output(self, input):

        if self.unflatten_input != None:
            input = T.reshape(input, self.unflatten_input)

        W_shuffled = self.W.val.dimshuffle(3, 0, 1, 2)  # c01b to bc01

        conv_out = dnn.dnn_conv(img=input,
                                        kerns=W_shuffled,
                                        subsample=(self.convstride, self.convstride),
                                        border_mode=self.padsize)

        conv_out = conv_out + self.b.val.dimshuffle('x', 0, 'x', 'x')

        if self.batch_norm:
            conv_out = (conv_out - T.mean(conv_out, axis = (0,2,3), keepdims = True)) / (1.0 + T.std(conv_out, axis=(0,2,3), keepdims = True))
            conv_out = conv_out * T.addbroadcast(self.bn_std,0,2,3) + T.addbroadcast(self.bn_mean, 0,2,3)

        self.out_store = conv_out

        if self.activation == "relu":
            self.out = T.maximum(0.0, conv_out)
        elif self.activation == "tanh":
            self.out = T.tanh(conv_out)
        elif self.activation == None:
            self.out = conv_out

        #if self.residual:
        #    print "USING RESIDUAL"
        #    self.out += input


        return self.out
Exemple #8
0
    def output(self, input_raw):

        if self.flatten_input:
            input = input_raw.flatten(2)
        else:
            input = input_raw

        lin_output = T.dot(input, self.W) + self.b

        if self.batch_norm:
            lin_output = (lin_output - T.mean(lin_output, axis = 0, keepdims = True)) / (1.0 + T.std(lin_output, axis = 0, keepdims = True))
            lin_output = (lin_output * T.addbroadcast(self.bn_std,0) + T.addbroadcast(self.bn_mean,0))

        self.out_store = lin_output

        if self.activation == None: 
            activation = lambda x: x
        elif self.activation == "relu": 
            activation = lambda x: T.maximum(0.0, x)
        elif self.activation == "exp": 
            activation = lambda x: T.exp(x)
        elif self.activation == "tanh":
            activation = lambda x: T.tanh(x)
        elif self.activation == 'softplus':
            activation = lambda x: T.nnet.softplus(x)
        else: 
            raise Exception("Activation not found")

        out = activation(lin_output)

        #if self.residual:
        #    return out + input_raw
        #else:
        #    return out
        return out
def Softmax(x, temp = 1):
    """ 
    Softmax Units. 

    Applies row-wise softmax  to the input supplied.
            
    Args:
        x: could be a ``theano.tensor`` or a ``theano.shared`` or ``numpy`` arrays or 
            ``python lists``.
        temp: temperature of type ``float``. Mainly used during distillation, normal 
                softmax prefer ``T=1``. 
    Notes:
        Refer [3] for details.
    
        .. [#]  Hinton, Geoffrey, Oriol Vinyals, and Jeff Dean. "Distilling the knowledge in
                a neural network." arXiv preprint arXiv:1503.02531 (2015).       
                        
    Returns: 
        same as input: returns a row-wise softmax output of the same shape as the input.
    """
    if temp != 1:
        expo = T.exp(x / float(temp)) # at this moment this is mini_batch_size X num_classes.
        normalizer = T.sum(expo,axis=1,keepdims=True)  # at this moment this is mini_batch_size X 1.
        T.addbroadcast(normalizer,1)    
        return expo / normalizer
    else:
        return T.nnet.softmax(x)               
Exemple #10
0
    def output(self, input):

        W_shuffled = self.W.dimshuffle(3, 0, 1, 2)  # c01b to bc01

        print "input ndim", input.ndim

        conv_out = dnn.dnn_conv(img=input,
                                        kerns=W_shuffled,
                                        subsample=(self.stride, self.stride),
                                        border_mode=self.padsize)

        conv_out = conv_out + self.b.dimshuffle('x', 0, 'x', 'x')

        if self.batch_norm:
            conv_out = (conv_out - T.mean(conv_out, axis = (0,2,3), keepdims = True)) / (1.0 + T.std(conv_out, axis=(0,2,3), keepdims = True))
            conv_out = conv_out * T.addbroadcast(self.bn_std,0,2,3) + T.addbroadcast(self.bn_mean, 0,2,3)

        self.out_store = conv_out

        if self.activation == "relu":
            self.out = T.maximum(0.0, conv_out)
        elif self.activation == "tanh":
            self.out = T.tanh(conv_out)
        elif self.activation == None:
            self.out = conv_out


        return T.specify_shape(self.out, (self.batch_size, self.out_channels, self.in_length / self.stride, self.in_length / self.stride))
Exemple #11
0
    def output(self, input):

        if self.unflatten_input != None:
            input = T.reshape(input, self.unflatten_input)

        conv_out = deconv(input, self.W, subsample=(2, 2), border_mode=(2,2))

        conv_out = conv_out + self.b.dimshuffle('x', 0, 'x', 'x')

        if self.batch_norm:
            conv_out = (conv_out - conv_out.mean(axis = (0,2,3), keepdims = True)) / (1.0 + conv_out.std(axis = (0,2,3), keepdims = True))
            conv_out = conv_out * T.addbroadcast(self.bn_std,0,2,3) + T.addbroadcast(self.bn_mean,0,2,3)

        if self.activation == "relu":
            out = T.maximum(0.0, conv_out)
        elif self.activation == "tanh":
            out = T.tanh(conv_out)
        elif self.activation == None:
            out = conv_out
        else:
            raise Exception()


        self.params = {'W' : self.W, 'b' : self.b}
        if self.batch_norm:
            self.params["mu"] = self.bn_mean
            self.params["sigma"] = self.bn_std

        return out
	def resample_step(self):
		
		idx=self.theano_rng.multinomial(pvals=T.reshape(self.weights_now,(1,self.npcl))).T
		s_samp=T.sum(self.s_now*T.addbroadcast(idx,1),axis=0)
		h_samp=T.sum(self.h_now*T.addbroadcast(idx,1),axis=0)
		
		return T.cast(s_samp,'float32'), T.cast(h_samp,'float32')
Exemple #13
0
    def get_state(self):
        st = super(LatentTypeWithTuningCurve, self).get_state()

        # The filters are non-identifiable as we can negate both the
        # temporal and the spatial filters and get the same net effect.
        # By convention, choose the sign that results in the most
        # positive temporal filter.
        sign = T.sgn(T.sum(self.stim_resp_t, axis=0))
        T.addbroadcast(sign, 0)

        # Similarly, we can trade a constant between the spatial and temporal
        # pieces. By convention, set the temporal filter to norm 1.
        Z = T.sqrt(T.sum(self.stim_resp_t**2, axis=0))
        T.addbroadcast(Z, 0)

        # Compute the normalized temporal response
        stim_resp_t = sign*(1.0/Z)*self.stim_resp_t

        # Finally, reshape the spatial component as necessary
        if self.spatial_ndim == 2:
            stim_resp_x = sign*Z*self.stim_resp_x
            stim_resp_x = T.reshape(stim_resp_x,
                                    self.spatial_shape + (self.R,))
        else:
            stim_resp_x = sign*Z*self.stim_resp_x

        st.update({'stim_response_x' : stim_resp_x,
                   'stim_response_t' : stim_resp_t})

        return st
Exemple #14
0
    def log_p(self, L):
        """ Compute log prob of the given value under this prior
            Input: L ~ NxD
        """
        assert L.ndim == 2, "L must be 2d!"
        # Compute pairwise L2 norm
        L1 = L.dimshuffle(0,'x',1)     # Nx1xD
        L2 = L.dimshuffle('x',0,1)     # 1xNxD
        T.addbroadcast(L1,1)
        T.addbroadcast(L2,0)

        # Compute pairwise distances
        D = ((L1-L2)**2).sum(axis=2)

        # Compute the kernel
        K = T.exp(-D / self.sigma**2)

        # Log prob is the log determinant of the pairwise distances
        lp_det = T.log(self.d(K))

        # Also multiply by a spherical Gaussian with standard deviation of 'bound'
        # to prevent points from diverging to infinity
        lp_gauss = self.gaussian.log_p(L)

        return lp_det + lp_gauss
Exemple #15
0
    def embed(self,x, y, kth):

        hidden = self.hidden_k(x,self.superw,self.dicw, kth)
        size = y.ndim

        y = T.addbroadcast(y,size - 1)
        embedding = T.sum(hidden*y,0)/T.addbroadcast(T.cast(T.sum(y,0), 'int16'), size - 2)
        return embedding
Exemple #16
0
    def __Theano_build__(self):
        Td = T.tensor3('Td')
        Ty = T.ivector('Ty')
        Tlr = T.scalar('Tlr')
        #Talpha = T.TensorType(dtype='float32', broadcastable=(0, 1, 1))('alpha')
        A = theano.shared(np.ones((self.D.shape[0]))\
                .astype('float32').reshape(-1, 1, 1), 'A')
        Ttriple   = [T.ivector('triple'+x) for x in ['i', 'j', 'l']]
        Tneighbor = [T.ivector('neighbor'+x) for x in ['i', 'j']]

        d = (Td * T.addbroadcast(A, 1, 2)).sum(0)
        pull_error, _ = theano.scan(
            fn = lambda i, j, d: d[i, j],
            sequences=[Tneighbor[0], Tneighbor[1]],
            outputs_info=None,
            non_sequences=[d])
        pull_error = pull_error.sum()

        push_error, _ = theano.scan(
            fn = lambda i, j, l, d: T.neq(Ty[i], Ty[l]) * T.maximum((d[i]-d[j]) - (d[i]-d[l]) +1, 0),
            sequences=[Ttriple[0], Ttriple[1], Ttriple[2]],
            outputs_info=None,
            non_sequences=[d])
#       zerocount = T.eq(linalg.diag(mask*T.maximum(lossij - lossil + 1, 0)), 0).sum()

        error = pull_error.sum() + push_error.sum()
        grad = T.grad(error, A)
        newA = A - Tlr*grad #T.maximum(A - Tlr*grad, 0)
        updates = [(A, newA/newA.sum())]

        self.Ttrain = theano.function(Ttriple+Tneighbor+[Tlr], 
                Tlr*grad,
                givens={Td: self.D, 
                        Ty: self.y},
                updates=updates,
                allow_input_downcast=True,
                on_unused_input='warn')

        self.Tloss = theano.function(Ttriple+Tneighbor, 
                error,
                givens={Td: self.D, Ty: self.y},
                allow_input_downcast=True)


#       eig, eigv = linalg.eig((d+d.T)/2.0)
#       self.Tmineig = theano.function([], 
#               T.min(eig),
#               givens={Td: self.D},
#               allow_input_downcast=True)

        self.Tmindist = theano.function([], 
                T.min(d),
                givens={Td: self.D},
                allow_input_downcast=True)


        self.Ttransform = theano.function([Td], (Td*T.addbroadcast(A, 1, 2)).sum(0), allow_input_downcast=True)
        self.TA = A
Exemple #17
0
    def __init__(self, model):
        self.model = model
        self.imp_model = model['impulse']

        # Number of presynaptic neurons
        self.N = model['N']

        # Get parameters of the prior
        self.alpha = self.imp_model['alpha']

        # Create a basis for the impulse responses response
        self.basis = create_basis(self.imp_model['basis'])
        (_,self.B) = self.basis.shape
        # The basis is interpolated once the data is specified
        self.initialize_basis()

        # Initialize memory for the filtered spike train
        self.ir = theano.shared(name='ir',
                                value=np.zeros((1,self.N,self.B)))

        # Define Dirichlet distributed weights by normalizing gammas
        # The variables are log-gamma distributed
        self.lng = T.dvector('w_lng')
        self.g = T.exp(self.lng)
        self.g2 = T.reshape(self.g, [self.N,self.B])
        self.g_sum = T.reshape(T.sum(self.g2, axis=1), [self.N,1])
        
        # Normalize the gammas to get a Dirichlet draw
        T.addbroadcast(self.g_sum, 1)
        self.w_ir2 = self.g2 / self.g_sum
        self.w_ir2.name = 'w_ir'

        # Repeat them (in a differentiable manner) to create a 3-tensor
        self.w_ir3 = T.reshape(self.w_ir2, [1,self.N,self.B])

        # Make w_ir3 broadcastable in the 1st dim
        T.addbroadcast(self.w_ir3,0)

        # Take the elementwise product of the filtered stimulus and
        # the repeated weights to get the weighted impulse current along each
        # impulse basis dimension. Then sum over bases to get the
        # total coupling current from each presynaptic neurons at
        # all time points
        self.I_imp = T.sum(self.ir*self.w_ir3, axis=2)

        # Log probability of a set of independent log-gamma r.v.'s
        # This is log p(log(g)) under the prior. Since we are taking the
        # log, we multiply by a factor of g to ensure normalization and
        # thus the \alpha-1 in the exponent becomes \alpha
        self.log_p = -self.B*self.N*scipy.special.gammaln(self.alpha) \
                     + T.sum(self.alpha*self.lng) \
                     - T.sum(self.g)

        # Define a helper variable for the impulse response
        # after projecting onto the basis
        self.impulse = T.dot(self.w_ir2, T.transpose(self.ibasis))
Exemple #18
0
    def __init__(self, model, latent):
        """ Initialize the stochastic block model for the adjacency matrix
        """
        self.model = model
        self.prms = model['network']['graph']
        self.N = model['N']
        self.N_dims = self.prms['N_dims']

        # Get the latent location
        self.location = latent[self.prms['locations']]
        self.Lm = self.location.Lm
        # self.location_prior = create_prior(self.prms['location_prior'])
        #
        # # Latent distance model has NxR matrix of locations L
        # self.L = T.dvector('L')
        # self.Lm = T.reshape(self.L, (self.N, self.N_dims))

        # Compute the distance between each pair of locations
        # Reshape L into a Nx1xD matrix and a 1xNxD matrix, then add the requisite
        # broadcasting in order to subtract the two matrices
        L1 = self.Lm.dimshuffle(0,'x',1)     # Nx1xD
        L2 = self.Lm.dimshuffle('x',0,1)     # 1xNxD
        T.addbroadcast(L1,1)
        T.addbroadcast(L2,0)
        #self.D = T.sqrt(T.sum((L1-L2)**2, axis=2))
        #self.D = T.sum((L1-L2)**2, axis=2)

        # It seems we need to use L1 norm for now because
        # Theano doesn't properly compute the gradients of the L2
        # norm. (It gives NaNs because it doesn't realize that some
        # terms will cancel out)
        # self.D = (L1-L2).norm(1, axis=2)
        self.D = T.pow(L1-L2,2).sum(axis=2)

        # There is a distance scale, \delta
        self.delta = T.dscalar(name='delta')

        # Define complete adjacency matrix
        self.A = T.bmatrix('A')

        # The probability of A is exponentially decreasing in delta
        # self.pA = T.exp(-1.0*self.D/self.delta)
        self.pA = T.exp(-0.5*self.D/self.delta**2)

        if 'rho_refractory' in self.prms:
            self.pA += T.eye(self.N) * (self.prms['rho_refractory']-self.pA)
            # self.pA[np.diag_indices(self.N)] = self.prms['rho_refractory']

        # Allow for scaling the log likelihood of the graph so that we can do
        # Annealed importance sampling
        self.lkhd_scale = theano.shared(value=1.0, name='lkhd_scale')

        # Define log probability
        self.lkhd = T.sum(self.A * T.log(self.pA) + (1 - self.A) * T.log(1 - self.pA))
        # self.log_p = self.lkhd_scale * self.lkhd + self.location_prior.log_p(self.Lm)
        self.log_p = self.lkhd_scale * self.lkhd
 def get_t_weights(self, t):
     """
     Generate vector of weights allowing selection of current timestep.
     (if t is not an integer, the weights will linearly interpolate)
     """
     n_seg = self.trajectory_length
     t_compare = T.arange(n_seg, dtype=theano.config.floatX).reshape((1,n_seg))
     diff = abs(T.addbroadcast(t,1) - T.addbroadcast(t_compare,0))
     t_weights = T.max(T.join(1, (-diff+1).reshape((n_seg,1)), T.zeros((n_seg,1))), axis=1)
     return t_weights.reshape((-1,1))
    def __init__(self, input, n_filt, n_in, n_out, y, hist_len, y_len):
        """ Initialize the parameters of the poisson regression

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
                      architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
                     which the datapoints lie

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
                      which the labels lie

        """
        # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
        #self.W = theano.shared(value=numpy.identity(n_in, dtype=theano.config.floatX), name='W', borrow=True)
        self.W = theano.shared(value=numpy.tile([-1, -1, -1, -1, 1, 1, 1, 1, 1]*numpy.ones((n_in,), dtype=theano.config.floatX)/n_filt, (1,n_filt,1) ).astype(theano.config.floatX), name='W', borrow=True)
        #self.W = theano.shared(value=numpy.concatenate((-1*numpy.ones((4,)),numpy.ones((5,)),-1*numpy.ones((4,)),numpy.ones((5,))))*numpy.ones((n_in,), dtype=theano.config.floatX), name='W', borrow=True)
        #self.W = theano.shared(value=.001*numpy.ones((n_in,), dtype=theano.config.floatX), name='W', borrow=True)

        # initialize the baises b as a vector of n_out 0s
        self.b = theano.shared(value=-1*numpy.ones((n_out,), dtype=theano.config.floatX), name='b', borrow=True)

        self.h = theano.shared(value=numpy.zeros((hist_len,n_out), dtype=theano.config.floatX), name='h', borrow=True)

        # helper variables for adagrad
        self.b_helper = theano.shared(value=numpy.zeros((n_out,), \
            dtype=theano.config.floatX), name='b_helper', borrow=True)
        self.W_helper = theano.shared(value=numpy.tile(numpy.zeros((n_in,), \
            dtype=theano.config.floatX), (1,n_filt,1) ), name='W_helper', borrow=True)
        self.h_helper = theano.shared(value=numpy.zeros((hist_len,n_out), dtype=theano.config.floatX), name='h_helper', borrow=True)

        # helper variables for L1
        self.b_helper2 = theano.shared(value=numpy.zeros((n_out,), \
            dtype=theano.config.floatX), name='b_helper2', borrow=True)
        self.W_helper2 = theano.shared(value=numpy.tile(numpy.zeros((n_in,), \
            dtype=theano.config.floatX), (1,n_filt,1) ), name='W_helper2', borrow=True)
        self.h_helper2 = theano.shared(value=numpy.zeros((hist_len,n_out), dtype=theano.config.floatX), name='h_helper', borrow=True)

        # parameters of the model
        self.params = [self.W, self.b, self.h]
        self.params_helper = [self.W_helper, self.b_helper, self.h_helper]
        self.params_helper2 = [self.W_helper2, self.b_helper2, self.h_helper2]

        #history dependent input
        self.h_in = theano.shared(value=numpy.zeros((y_len,n_out), dtype=theano.config.floatX), borrow=True) 
        for hi in xrange(hist_len):
            self.h_in = T.set_subtensor(self.h_in[(1+hi):y_len], self.h_in[(1+hi):y_len] + T.addbroadcast(T.shape_padleft(self.h[hi,:],n_ones=1),0)*y[0:(y_len-(hi+1))])

        # compute vector of expected values (for each output) in symbolic form
        self.E_y_given_x = T.log(1+T.exp(T.sum(input*T.addbroadcast(self.W,0), axis=1) + self.b + self.h_in)) #sums over multiple filters
        self.input_responses = T.sum(input*T.addbroadcast(self.W,0), axis=1) + self.b #sums over multiple filters
Exemple #21
0
 def mulclassloss(self,kth,x,y,label):
     #mutiple label classification loss using wikidata for pretrain
     hidden = self.hidden_k(x,self.w,self.dicw,kth)
     print "hidden  type :  "+str(hidden.type)
     size = y.ndim
     y = T.addbroadcast(y,size - 1)
     embedding = T.sum(hidden*y,0)/T.addbroadcast(T.cast(T.sum(y,0), 'int16'), size - 2)
     #embedding = T.sum(hidden*y,0)/ T.addbroadcast(T.sum(y,0), size-2)
     print "embedding type  :  "+str(embedding.type)
     logloss = (0. - T.sum(T.log(1. / (1. + T.exp(0. - (T.dot(embedding, self.w["mulw"])+self.w["mulb"])*label)))))/embedding.shape[0]
     return logloss
 def node_update(self, s_, h_, m_) :
     """
     Update params in Attention.
     """
     preact = tensor.dot(h_, self.params[self._p(self.prefix, 'U')])
     preact += tensor.addbroadcast(tensor.dot(s_, self.params[self._p(self.prefix, 'W')]).dimshuffle('x', 0, 1), 0)
     preact = tensor.dot(tensor.tanh(preact), self.params[self._p(self.prefix, 'va')]) * m_
     alpha = tensor.nnet.softmax(preact.dimshuffle(1, 0)).dimshuffle(1, 0, 'x')
     c = (h_ * tensor.addbroadcast(alpha, 2)).sum(axis=0) # c is (samples,2*hidden)
 
     return c, alpha
Exemple #23
0
def test_dnn_batchnorm_train():
    if not dnn.dnn_available(test_ctx_name):
        raise SkipTest(dnn.dnn_available.msg)
    if dnn.version(raises=False) < 5000:
        raise SkipTest("batch normalization requires cudnn v5+")
    utt.seed_rng()

    for mode in ('per-activation', 'spatial'):
        for vartype in (T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
            x, scale, bias = (vartype(n) for n in ('x', 'scale', 'bias'))
            ndim = x.ndim
            eps = 5e-3  # some non-standard value to test if it's used

            # forward pass
            out, x_mean, x_invstd = dnn.dnn_batch_normalization_train(
                x, scale, bias, mode, eps)
            # reference forward pass
            if mode == 'per-activation':
                axes = (0,)
            elif mode == 'spatial':
                axes = (0,) + tuple(range(2, ndim))
            x_mean2 = x.mean(axis=axes, keepdims=True)
            x_invstd2 = T.inv(T.sqrt(x.var(axis=axes, keepdims=True) + eps))
            scale2 = T.addbroadcast(scale, *axes)
            bias2 = T.addbroadcast(bias, *axes)
            out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2
            # backward pass
            dy = vartype('dy')
            grads = T.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
            # reference backward pass
            grads2 = T.grad(None, wrt=[x, scale, bias], known_grads={out2: dy})
            # compile
            f = theano.function([x, scale, bias, dy],
                                [out, x_mean, x_invstd, out2, x_mean2, x_invstd2] +
                                grads + grads2, mode=mode_with_gpu)
            # run
            for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)):
                data_shape = data_shape[:ndim]
                param_shape = tuple(1 if d in axes else s
                                    for d, s in enumerate(data_shape))
                X = 4 + 3 * numpy.random.randn(*data_shape).astype('float32')
                Dy = -1 + 2 * numpy.random.randn(*data_shape).astype('float32')
                Scale = numpy.random.randn(*param_shape).astype('float32')
                Bias = numpy.random.randn(*param_shape).astype('float32')
                outputs = f(X, Scale, Bias, Dy)
                # compare outputs
                utt.assert_allclose(outputs[0], outputs[0 + 3])  # out
                utt.assert_allclose(outputs[1], outputs[1 + 3])  # mean
                utt.assert_allclose(outputs[2], outputs[2 + 3])  # invstd
                # compare gradients
                utt.assert_allclose(outputs[6], outputs[6 + 3])  # dx
                utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=3e-3)  # dscale
                utt.assert_allclose(outputs[8], outputs[8 + 3])  # dbias
Exemple #24
0
def multi_dim_softmax(X):
	"""
	compute a softmax for a filter_map at each point

	X : a 4d tensor (batch, feature_map, x, y)
	returns a 4d tensor (batch, softmax, x, y)
	"""
	maxs = X.max(axis=1, keepdims=True)
	maxs = tensor.addbroadcast(maxs, 1)
	exps = tensor.exp(X-maxs)
	sums = exps.sum(axis=1, keepdims=True)
	sums = tensor.addbroadcast(sums, 1)
	return exps/sums
Exemple #25
0
 def objective(self, x):
     # first, reshape x into a set of parameters we need
     i, W, b = 0, [], []
     for shape in self.layer_shapes:
         l = np.prod(shape)
         W.append(x[:, i:i+l].reshape((n_batch,)+shape))
         i += l
         l = shape[1]
         b.append(x[:, i:i+l].reshape((n_batch, 1, l)))
     # calculate the cost
     z = T.tile(self.mini_batch.reshape((1, 50, 784)), (20, 1, 1))
     for wi, bi in zip(W, b):
         z = T.nnet.sigmoid(T.batched_dot(z, wi) + T.addbroadcast(bi,1))
     return T.mean((z-T.addbroadcast(T.extra_ops.to_one_hot(self.classes, 10).reshape((1, 50, 10)),0))**2, axis=2)
Exemple #26
0
    def get_output_for(self, input, deterministic=False, collect=False,
                       **kwargs):

        if collect:
            # use this batch's mean and var
            if self.stat_indices is None:
                mean = input.mean(self.axes, keepdims=True)
                var = input.var(self.axes, keepdims=True)
            else:
                mean = input[self.stat_indices].mean(self.axes, keepdims=True)
                var = input[self.stat_indices].var(self.axes, keepdims=True)
            # and update the stored mean and var:
            # we create (memory-aliased) clones of the stored mean and var
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_var = theano.clone(self.var, share_inputs=False)
            # set a default update for them

            if self.alpha is not 'single_pass':
                running_mean.default_update = (
                    (1 - self.alpha) * running_mean + self.alpha * mean)
                running_var.default_update = (
                    (1 - self.alpha) * running_var + self.alpha * var)
            else:
                print "Collecting using single pass..."
                # this is ugly figure out what can be safely removed...
                running_mean.default_update = (0 * running_mean + 1.0 * mean)
                running_var.default_update = (0 * running_var + 1.0 * var)

            # and include them in the graph so their default updates will be
            # applied (although the expressions will be optimized away later)
            mean += 0 * running_mean
            var += 0 * running_var

        elif deterministic:
            # use stored mean and var
            mean = self.mean
            var = self.var
        else:
            # use this batch's mean and var
            mean = input.mean(self.axes, keepdims=True)
            var = input.var(self.axes, keepdims=True)

        mean = T.addbroadcast(mean, *self.axes)
        var = T.addbroadcast(var, *self.axes)
        normalized = (input - mean) / T.sqrt(var + self.epsilon)

        if self.return_stats:
            return [normalized, mean, var]
        else:
            return normalized
Exemple #27
0
    def get_weights(self, h_t, w_tm1, M_t, **kwargs):
        batch_size = self.heads[0].input_shape[0] # QKFIX: Get the size of the batches from the 1st head
        num_heads = len(self.heads)
        k_t = self.nonlinearity_key(T.dot(h_t, self.W_hid_to_key) + self.b_hid_to_key)
        beta_t = self.nonlinearity_beta(T.dot(h_t, self.W_hid_to_beta) + self.b_hid_to_beta)
        g_t = self.nonlinearity_gate(T.dot(h_t, self.W_hid_to_gate) + self.b_hid_to_gate)
        # QKFIX: If the nonlinearity is softmax (which is usually the case), then the activations
        # need to be reshaped (T.nnet.softmax only accepts 2D inputs)
        try:
            s_t = self.nonlinearity_shift(T.dot(h_t, self.W_hid_to_shift) + self.b_hid_to_shift)
        except ValueError:
            shift_activation_t = T.dot(h_t, self.W_hid_to_shift) + self.b_hid_to_shift
            s_t = self.nonlinearity_shift(shift_activation_t.reshape((h_t.shape[0] * num_heads, self.num_shifts)))
            s_t = s_t.reshape(shift_activation_t.shape)
        gamma_t = self.nonlinearity_gamma(T.dot(h_t, self.W_hid_to_gamma) + self.b_hid_to_gamma)

        # Content Addressing (3.3.1)
        beta_t = T.addbroadcast(beta_t, 2)
        betaK = beta_t * similarities.cosine_similarity(k_t, M_t)
        w_c = lasagne.nonlinearities.softmax(betaK.flatten(ndim=2))
        w_c = w_c.reshape(betaK.shape)

        # Interpolation (3.3.2)
        g_t = T.addbroadcast(g_t, 2)
        w_g = g_t * w_c + (1. - g_t) * w_tm1

        # Convolutional Shift (3.3.2)
        # NOTE: This library is using a flat (zero-padded) convolution instead of the circular
        # convolution from the original paper. In practice, this change has a minimal impact.
        w_g_padded = w_g.reshape((h_t.shape[0] * num_heads, self.memory_shape[0])).dimshuffle(0, 'x', 'x', 1)
        conv_filter = s_t.reshape((h_t.shape[0] * num_heads, self.num_shifts)).dimshuffle(0, 'x', 'x', 1)
        pad = (self.num_shifts // 2, (self.num_shifts - 1) // 2)
        w_g_padded = padding.pad(w_g_padded, [pad], batch_ndim=3)
        convolution = T.nnet.conv2d(w_g_padded, conv_filter,
            input_shape=(None if batch_size is None else \
                batch_size * num_heads, 1, 1, self.memory_shape[0] + pad[0] + pad[1]),
            filter_shape=(None if batch_size is None else \
                batch_size * num_heads, 1, 1, self.num_shifts),
            subsample=(1, 1),
            border_mode='valid')
        w_tilde = convolution[T.arange(h_t.shape[0] * num_heads), T.arange(h_t.shape[0] * num_heads), 0, :]
        w_tilde = w_tilde.reshape((h_t.shape[0], num_heads, self.memory_shape[0]))

        # Sharpening (3.3.2)
        gamma_t = T.addbroadcast(gamma_t, 2)
        w = T.pow(w_tilde + 1e-6, gamma_t)
        w /= T.sum(w, axis=2).dimshuffle(0, 1, 'x')

        return w
Exemple #28
0
    def output(self, input):
        conv_out = conv.conv2d(
            input, self.conv_w,
            filter_shape=self.filter_shape, image_shape=self.image_shape
        )
        conv_b = T.addbroadcast(self.conv_b, 1, 2)
        conv_out = conv_out + conv_b  # add bias
        pool_out = downsample.max_pool_2d(
            conv_out, (self.poolsize, self.poolsize), ignore_border=True
        )
        pool_w = T.addbroadcast(self.pool_w, 1, 2)
        pool_b = T.addbroadcast(self.pool_b, 1, 2)
        pool_out = pool_out * pool_w + pool_b

        return 1.7159*T.tanh(2/3 * pool_out)
	def sample_joint(self, sp):
		
		t2_samp=self.theano_rng.multinomial(pvals=T.reshape(self.weights_now,(1,self.npcl))).T
		s2_samp=T.cast(T.sum(self.s_now*T.addbroadcast(t2_samp,1),axis=0),'float32')
		
		diffs=(s2_samp-sp)
		abs_term=T.sum(T.abs_(diffs)/self.b,axis=1)
		alpha=T.exp(-abs_term)
		probs_unnorm=self.weights_past*alpha
		probs=probs_unnorm/T.sum(probs_unnorm)
		
		t1_samp=self.theano_rng.multinomial(pvals=T.reshape(probs,(1,self.npcl))).T
		s1_samp=T.cast(T.sum(self.s_past*T.addbroadcast(t1_samp,1),axis=0),'float32')
		
		return [s1_samp, s2_samp]
Exemple #30
0
def get_uhs_operator(uhs, depth, n_hidden, rhos):
    """

    :param uhs:
    :param depth:
    :param n_hidden:
    :param rhos: can be shared variable or constant of shape (depth, )!!
    :return:
    """
    # Will use a Fourier matrix (will be O(n^2)...)
    # Doesn't seem to slow things down much though!
    exp_phases = [T.cos(uhs), T.sin(uhs)]
    neg_exp_phases = [T.cos(uhs[:, ::-1]), -T.sin(uhs[:, ::-1])]
    ones_ = [T.ones((depth, 1), dtype=theano.config.floatX), T.zeros((depth, 1), dtype=theano.config.floatX)]

    rhos_reshaped = T.reshape(rhos, (depth, 1), ndim=2)
    rhos_reshaped = T.addbroadcast(rhos_reshaped, 1)

    eigvals_re = rhos_reshaped * T.concatenate((ones_[0], exp_phases[0], -ones_[0], neg_exp_phases[0]), axis=1)
    eigvals_im = rhos_reshaped * T.concatenate((ones_[1], exp_phases[1], -ones_[1], neg_exp_phases[1]), axis=1)
    phase_array = -2 * np.pi * np.outer(np.arange(n_hidden), np.arange(n_hidden)) / n_hidden
    f_array_re_val = np.cos(phase_array) / n_hidden
    f_array_im_val = np.sin(phase_array) / n_hidden
    f_array_re = theano.shared(f_array_re_val.astype(theano.config.floatX), name="f_arr_re")
    f_array_im = theano.shared(f_array_im_val.astype(theano.config.floatX), name="f_arr_im")

    a_k = T.dot(eigvals_re, f_array_re) + T.dot(eigvals_im, f_array_im)
    uhs_op = rep_vec(a_k, n_hidden, n_hidden)  # shape (depth, 2 * n_hidden - 1)

    return uhs_op
    def create_gradients(self, loss, deterministic=False):
        # load networks
        l_px_mu, l_px_logsigma, l_pa_mu, l_pa_logsigma, \
        l_qa_mu, l_qa_logsigma, l_qz_mu, l_qz_logsigma, l_qa, l_qz, l_cv, c, v = self.network

        # load params
        p_params  = lasagne.layers.get_all_params(
            [l_px_mu, l_pa_mu, l_pa_logsigma], trainable=True)
        qa_params  = lasagne.layers.get_all_params(l_qa_mu, trainable=True)
        qz_params  = lasagne.layers.get_all_params(l_qz, trainable=True)
        cv_params = lasagne.layers.get_all_params(l_cv, trainable=True)

        # load neural net outputs (probabilities have been precomputed)
        log_pxz, log_px_given_z, log_pz = self.log_pxz, self.log_px_given_z, self.log_pz
        log_qza_given_x = self.log_qza_given_x
        log_qz_given_x = self.log_qz_given_x
        log_qz_given_x_dgz = self.log_qz_given_x_dgz
        cv = T.addbroadcast(lasagne.layers.get_output(l_cv),1)

        # compute learning signals
        l0 = log_px_given_z + log_pz - log_qz_given_x - cv # NOTE: this disn't have q(a)
        l_avg, l_var = l0.mean(), l0.var()
        c_new = 0.8*c + 0.2*l_avg
        v_new = 0.8*v + 0.2*l_var
        l = (l0 - c_new) / T.maximum(1, T.sqrt(v_new))
        l_target = (l0 - c_new) / T.maximum(1, T.sqrt(v_new))

        # compute grad wrt p
        p_grads = T.grad(-log_pxz.mean(), p_params)

        # compute grad wrt q_a
        elbo = T.mean(log_pxz - log_qza_given_x)
        qa_grads = T.grad(-elbo, qa_params)

        # compute grad wrt q_z
        qz_target = T.mean(dg(l_target) * log_qz_given_x_dgz)
        qz_grads = T.grad(-0.2*qz_target, qz_params) # 5x slower rate for q

        # compute grad of cv net
        cv_target = T.mean(l0**2)
        cv_grads = [0.2*g for g in T.grad(cv_target, cv_params)]

        # combine and clip gradients
        clip_grad = 1
        max_norm = 5
        grads = p_grads + qa_grads + qz_grads + cv_grads
        mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=max_norm)
        cgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads]

        return cgrads
    def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, w_hidden_hidden, b_act, ln_s1, ln_b1, ln_s2, ln_b2):

        pre_w_sig = T.dot(pre_out_sig, w_hidden_hidden)

        inner_act = self.activation

        pre_w_sig_ln = self.ln(pre_w_sig, ln_b1, ln_s1)
        cur_w_in_sig_ln = self.ln(cur_w_in_sig, ln_b2, ln_s2)

        out_sig = inner_act(T.add(cur_w_in_sig_ln, pre_w_sig_ln, b_act))

        mask = T.addbroadcast(mask, 1)
        out_sig_m = mask * out_sig + (1. - mask) * pre_out_sig
        return [out_sig_m]
Exemple #33
0
    def sample_joint(self, sp):

        t2_samp = self.theano_rng.multinomial(
            pvals=T.reshape(self.weights_now, (1, self.npcl))).T
        s2_samp = T.cast(
            T.sum(self.s_now * T.addbroadcast(t2_samp, 1), axis=0), 'float32')
        h2_samp = T.cast(
            T.sum(self.h_now * T.addbroadcast(t2_samp, 1), axis=0), 'float32')

        diffs = self.b * (s2_samp - sp)
        sqr_term = T.sum(diffs**2, axis=1)
        alpha = T.exp(-sqr_term)
        probs_unnorm = self.weights_past * alpha
        probs = probs_unnorm / T.sum(probs_unnorm)

        t1_samp = self.theano_rng.multinomial(
            pvals=T.reshape(probs, (1, self.npcl))).T
        s1_samp = T.cast(
            T.sum(self.s_past * T.addbroadcast(t1_samp, 1), axis=0), 'float32')
        h1_samp = T.cast(
            T.sum(self.h_past * T.addbroadcast(t1_samp, 1), axis=0), 'float32')

        return [s1_samp, h1_samp, s2_samp, h2_samp]
Exemple #34
0
 def setup(self, bottom, top):
     attention = T.tensor4("attention")
     input = T.tensor4("input")
     v = T.matrix("v")
     attention_bc = T.addbroadcast(attention, 1)
     attended = T.mul(input, attention_bc)
     result = T.sum(attended, axis=(2, 3))
     result_g_attention, result_g_input = T.Lop(result, [attention, input],
                                                v)
     self.f = theano.function([attention, input], result)
     self.b_attention = theano.function([attention, input, v],
                                        result_g_attention)
     self.b_input = theano.function([attention, input, v],
                                    result_g_attention)
Exemple #35
0
def keep_max(input, theta, k):
    sig_input = T.nnet.sigmoid(T.dot(input, theta))
    #sig_input = T.dot(input, theta)
    if k == 0:
        result = input * T.addbroadcast(sig_input, 3)
        return result, sig_input

    # get the sorted idx
    sort_idx = T.argsort(sig_input, axis=2)
    k_max_ids = sort_idx[:,:,-k:,:]
    dim0, dim1, dim2, dim3 = k_max_ids.shape
    batchids = T.repeat(T.arange(dim0), dim1*dim2*dim3)
    mapids = T.repeat(T.arange(dim1), dim2*dim3).reshape((1, dim2*dim3))
    mapids = T.repeat(mapids, dim0, axis=0).flatten()
    rowids = k_max_ids.flatten()
    colids = T.arange(dim3).reshape((1, dim3))
    colids = T.repeat(colids, dim0*dim1*dim2, axis=0).flatten()
    sig_mask = T.zeros_like(sig_input)
    choosed = sig_input[batchids, mapids, rowids, colids]
    sig_mask = T.set_subtensor(sig_mask[batchids, mapids, rowids, colids], 1)
    input_mask = sig_mask * sig_input
    result = input * T.addbroadcast(input_mask, 3)
    return result, sig_input
Exemple #36
0
    def __call__(self, input, input_lm=None, h0=None, c0=None):
        batch_size = input_lm.shape[0]
        if h0 == None:
            h0 = T.alloc(np.asarray(0., dtype=theano.config.floatX), batch_size, self.n_hidden)
        if c0 == None:
            c0 = T.alloc(np.asarray(0., dtype=theano.config.floatX), batch_size, self.n_hidden)
        if input_lm == None:
            def step(x_t, h_tm_prev, c_tm_prev):
                x_i = T.dot(x_t, self.W_i) + self.b_i
                x_f = T.dot(x_t, self.W_f) + self.b_f
                x_c = T.dot(x_t, self.W_c) + self.b_c
                x_o = T.dot(x_t, self.W_o) + self.b_o

                i_t = self.inner_activation(x_i + T.dot(h_tm_prev, self.U_i))
                f_t = self.inner_activation(x_f + T.dot(h_tm_prev, self.U_f))
                c_t = f_t * c_tm_prev + i_t * self.activation(x_c + T.dot(h_tm_prev, self.U_c))  # internal memory
                o_t = self.inner_activation(x_o + T.dot(h_tm_prev, self.U_o))
                h_t = o_t * self.activation(c_t)  # actual hidden state

                return [h_t, c_t]

            self.h_1, _ = theano.scan(step,
                                      sequences=input.dimshuffle(1, 0, 2),
                                      outputs_info=[h0, c0]
                                      )
        else:
            def step(x_t, mask, h_tm_prev, c_tm_prev):
                x_i = T.dot(x_t, self.W_i) + self.b_i
                x_f = T.dot(x_t, self.W_f) + self.b_f
                x_c = T.dot(x_t, self.W_c) + self.b_c
                x_o = T.dot(x_t, self.W_o) + self.b_o

                i_t = self.inner_activation(x_i + T.dot(h_tm_prev, self.U_i))
                f_t = self.inner_activation(x_f + T.dot(h_tm_prev, self.U_f))
                c_t = f_t * c_tm_prev + i_t * self.activation(x_c + T.dot(h_tm_prev, self.U_c))  # internal memory
                o_t = self.inner_activation(x_o + T.dot(h_tm_prev, self.U_o))
                h_t = o_t * self.activation(c_t)  # actual hidden state

                h_t = mask * h_t + (1 - mask) * h_tm_prev
                c_t = mask * c_t + (1 - mask) * c_tm_prev

                return [h_t, c_t]

            self.h_1, _ = theano.scan(step,
                                      sequences=[input.dimshuffle(1, 0, 2),
                                                 T.addbroadcast(input_lm.dimshuffle(1, 0, 'x'), -1)],
                                      outputs_info=[h0, c0])

        self.h_1 = self.h_1[0].dimshuffle(1, 0, 2)
        return self.h_1[:, -1, :]
Exemple #37
0
    def get_padded_shuffled_mask(self, mask, X, pad=0):
        # mask is (nb_samples, time)
        if mask is None:
            mask = T.ones((X.shape[0], X.shape[1]))

        mask = T.shape_padright(mask)  # (nb_samples, time, 1)
        mask = T.addbroadcast(mask, -1)  # (time, nb_samples, 1) matrix.
        mask = mask.dimshuffle(1, 0, 2)  # (time, nb_samples, 1)

        if pad > 0:
            # left-pad in time with 0
            padding = alloc_zeros_matrix(pad, mask.shape[1], 1)
            mask = T.concatenate([padding, mask], axis=0)
        return mask.astype('int8')
Exemple #38
0
 def __init__(self, attended_layer, attended_layer_mask,
              condition_layer, gate_covariance=False, covariance_decay=None,
              name=None):
     MergeLayer.__init__(self, [attended_layer, attended_layer_mask,
                                condition_layer], name=name)
     self.gate_covariance = gate_covariance
     self.covariance_decay = covariance_decay
     if gate_covariance:
         n_units = attended_layer.output_shape[-1]
         self.w_gate = self.add_param(init.Constant(0.0),
                                      (n_units,), name="gate")
         self.b_gate = self.add_param(init.Constant(1.0),
                                      (1,), name="gate")
         self.b_gate = T.addbroadcast(self.b_gate, 0)
Exemple #39
0
    def sequence_iteration(self,
                           output,
                           mask,
                           use_dropout=0,
                           dropout_value=0.5):

        dot_product = T.dot(output, self.t_w_out)

        linear_o = T.add(dot_product, self.t_b_out)

        mask = T.addbroadcast(mask, 2)  # to do nesseccary?
        output = T.mul(mask, linear_o) + T.mul((1. - mask), 1e-6)

        return output  # result
Exemple #40
0
    def apply(self, char_seq, sample_matrix, char_aux):
        # Time as first dimension
        embeddings = self.lookup.apply(char_seq)
        gru_out = self.dgru.apply(**merge(
            self.gru_fork.apply(embeddings, as_dict=True), {'mask': char_aux}))
        wgru_out = tensor.exp(
            self.wl.apply(self.bidir_w.apply(embeddings, char_aux)))

        if self.dgru_depth > 1:
            gru_out = gru_out[-1]

        gru_out = tensor.addbroadcast(wgru_out, 2) * gru_out
        sampled_representation = tensor.tanh(
            tensor.batched_dot(sample_matrix, gru_out.dimshuffle([1, 0, 2])))
        return sampled_representation.dimshuffle([1, 0, 2]), wgru_out
Exemple #41
0
    def get_padded_shuffled_mask(self, train, X, pad=0):
        mask = self.get_input_mask(train)
        if mask is None:
            mask = T.ones_like(X.sum(axis=-1)) # is there a better way to do this without a sum?

        # mask is (nb_samples, time)
        mask = T.shape_padright(mask) # (nb_samples, time, 1)
        mask = T.addbroadcast(mask, -1) # (time, nb_samples, 1) matrix.
        mask = mask.dimshuffle(1, 0, 2) # (time, nb_samples, 1)

        if pad > 0:
            # left-pad in time with 0
            padding = alloc_zeros_matrix(pad, mask.shape[1], 1)
            mask = T.concatenate([padding, mask], axis=0)
        return mask.astype('int8')
Exemple #42
0
 def output(self, input_scalars):
     """
     Computes the n_output output scalars
     @param input_scalars: the layer's input
     @return: n_output scalars
     """
     z = T.dot(input_scalars, self.W) + T.addbroadcast(self.b, 0)
     if self.activation == 'linear':
         return z
     elif self.activation == 'rectified':
         return T.maximum(z, 0)
     elif self.activation == 'tanh':
         return T.tanh(z)
     else:
         raise "Invalid activation %s" % self.activation
Exemple #43
0
 def get_output_for(self, inputs, deterministic=False, **kwargs):
     event = inputs[0]  #(None, 1000, embed)
     feature_idx = inputs[1]  #(None, 1000, feature_num, embed)
     feature_b = inputs[2]  #(None, 1000, feature_num, 1)
     feature_trans = inputs[3]  #(None, 1000, feature_num, 1)
     feature_value = inputs[4]  #(None, 1000, feature_num)
     value_up = T.shape_padright(feature_value,
                                 1)  #(None, 1000, feature_num, 1)
     bias_value = feature_trans * (value_up + feature_b)
     bias_value_broad = T.addbroadcast(bias_value,
                                       3)  #make the last axis broadcastable
     v_idx = T.sum(feature_idx *
                   lasagne.nonlinearities.tanh(bias_value_broad),
                   axis=2)  #(None, 1000, embed)
     return v_idx + event
Exemple #44
0
def RBM_Free_Energy(x, y):
    # make input data binary
    data = makeBinary(x)

    # determine initial params
    (W_init, b_v_init, b_h_init) = initParams(x.shape[0], NUM_HID)
    W = theano.shared(W_init, name='W')
    b_v = theano.shared(b_v_init.reshape(b_v_init.shape[0], 1), name='b_v')
    b_h = theano.shared(b_h_init.reshape(b_h_init.shape[0], 1), name='b_h')

    # compute free energy
    v = T.matrix('v')
    F = -T.dot(T.flatten(b_v, 1), v)\
        - T.sum(T.log(1.0 + T.exp(T.addbroadcast(b_h, 1) + T.dot(W, v))), axis=0)
    free_energy = theano.function([v], F.sum())
    value = free_energy(data)
    print '             Total Free Energy =', value

    # approximate expected free energy
    # using k=1 constrastive divergence
    rng = RandomStreams(RANDOM_SEED)
    h_0_mean = 1.0 / (1.0 + T.exp(-T.addbroadcast(b_h, 1) - T.dot(W, v)))
    h_0 = rng.binomial(size=h_0_mean.shape, n=1, p=h_0_mean)
    v_0_mean = 1.0 / (1.0 + T.exp(-T.addbroadcast(b_v, 1) - T.dot(W.T, h_0)))
    v_0 = rng.binomial(size=v_0_mean.shape, n=1, p=v_0_mean)
    F_exp = -T.dot(T.flatten(b_v, 1), v_0)\
            - T.sum(T.log(1.0 + T.exp(T.addbroadcast(b_h, 1) + T.dot(W, v_0))), axis=0)
    exp_free_energy = theano.function([v], F_exp.sum())
    value = exp_free_energy(data)
    print 'Estimated Expected Free Energy =', value

    # compute param deltas
    dParams = T.grad(F.sum() - F_exp.sum(), [W, b_v, b_h],
                     consider_constant=[v_0])
    dParams_func = theano.function([v], dParams)
    value = dParams_func(data)
Exemple #45
0
    def __init__(self,
                 incoming,
                 n_codewords=24,
                 V=lasagne.init.Normal(0.1),
                 gamma=lasagne.init.Constant(0.1),
                 eps=0.00001,
                 input_var=None,
                 initializers=None,
                 spatial_level=1,
                 **kwargs):
        """
        Creates a BoF layer

        :param incoming: 
        :param n_codewords: number of codewords
        :param V: initializer used for the codebook
        :param gamma: initializer used for the scaling factors
        :param eps: epsilon used to ensure numerical stability
        :param input_var: input_var of the model (used to compile a function that extract the features fed to layer)
        :param initializers: 
        :param spatial_level: 0 (no spatial segmentation), 1 (first spatial level)
        :param pooling_type: either 'mean' or 'max'
        :param kwargs: 
        """
        super(CBoF_Layer, self).__init__(incoming, **kwargs)

        self.n_codewords = n_codewords
        self.spatial_level = spatial_level
        n_filters = self.input_shape[1]
        self.eps = eps

        # Create parameters
        self.V = self.add_param(V, (n_codewords, n_filters, 1, 1), name='V')
        self.gamma = self.add_param(gamma, (1, n_codewords, 1, 1),
                                    name='gamma')

        # Make gammas broadcastable
        self.gamma = T.addbroadcast(self.gamma, 0, 2, 3)

        # Compile function used for feature extraction
        if input_var is not None:
            self.features_fn = theano.function([input_var],
                                               lasagne.layers.get_output(
                                                   incoming,
                                                   deterministic=True))

        if initializers is not None:
            initializers.append(self.initialize_layer)
Exemple #46
0
    def get_padded_shuffled_mask(self, train, X, pad=0):
        mask = self.get_input_mask(train)
        if mask is None:
            mask = T.ones_like(X.sum(axis=-1))  # is there a better way to do this without a sum?

        # mask is (nb_samples, time)
        mask = T.shape_padright(mask)  # (nb_samples, time, 1)
        mask = T.addbroadcast(mask, -1)  # the new dimension (the '1') is made broadcastable
        # see http://deeplearning.net/software/theano/library/tensor/basic.html#broadcasting-in-theano-vs-numpy
        mask = mask.dimshuffle(1, 0, 2)  # (time, nb_samples, 1)

        if pad > 0:
            # left-pad in time with 0
            padding = alloc_zeros_matrix(pad, mask.shape[1], 1)
            mask = T.concatenate([padding, mask], axis=0)
        return mask.astype('int8')
def permute_dimensions(x, pattern):
    '''Transpose dimensions.

    pattern should be a tuple or list of
    dimension indices, e.g. [0, 2, 1].
    '''
    if len(
            pattern
    ) < x.ndim:  # [DV] handle the case that one dimension is to be dropped
        bcaxis = []
        for i in range(x.ndim):
            if i not in pattern:
                bcaxis.append(i)
        x = T.addbroadcast(x, *bcaxis)
    pattern = tuple(pattern)
    return x.dimshuffle(pattern)
Exemple #48
0
 def __call__(self, input,input_lm=None, return_list = False):
      # activation function
     if input_lm == None:
         self.h_l, _ = theano.scan(self.step2,
                     sequences=input.dimshuffle(1,0,2),
                     outputs_info=theano.shared(value=np.zeros((self.batch_size,self.n_hidden),
                                                               dtype=theano.config.floatX),borrow=True))
     else:
         self.h_l, _ = theano.scan(self.step,
                     sequences=[input.dimshuffle(1,0,2),T.addbroadcast(input_lm.dimshuffle(1,0,'x'), -1)],
                     outputs_info=theano.shared(value=np.zeros((self.batch_size,self.n_hidden),
                                                               dtype=theano.config.floatX),borrow=True))
     self.h_l = self.h_l.dimshuffle(1,0,2)
     if return_list == True:
         return self.h_l
     return self.h_l[:,-1,:]
Exemple #49
0
            def get_attention(Wg, bg, M, w):
                g_t = T.nnet.sigmoid(T.dot(x_t, Wg) + bg)  # [instances, mem]

                # eqn 11
                k = T.dot(h_tm1, self.Wk) + self.bk  # [instances, memory_size]

                # eqn 13
                beta = T.dot(h_tm1, self.Wb) + self.bb
                beta = T.log(1 + T.exp(beta))
                beta = T.addbroadcast(beta, 1)  # [instances, 1]

                # eqn 12
                w_hat = T.nnet.softmax(beta * cosine_dist(M, k))

                # eqn 14
                return (1 - g_t) * w + g_t * w_hat  # [instances, mem]
Exemple #50
0
    def get_output_for(self, inputs, **kwargs):
        inputs = autocrop(inputs, self.cropping)
        # modify broadcasting pattern.
        if self.broadcastable is not None:
            for n, broadcasting_dim in enumerate(self.broadcastable):
                for dim, broadcasting in enumerate(broadcasting_dim):
                    if broadcasting:
                        inputs[n] = T.addbroadcast(inputs[n], dim)

        output = None
        for input in inputs:
            if output is not None:
                output = self.merge_function(output, input)
            else:
                output = input
        return output
Exemple #51
0
    def __call__(self, input, input_lm=None, h0=None):
        batch_size = input.shape[0]
        if h0 == None:
            h0 = T.alloc(np.asarray(0., dtype=theano.config.floatX),
                         batch_size, self.n_hidden)
        if input_lm == None:

            def step(x_t, h_tm_prev):
                x_z = T.dot(x_t, self.W_z) + self.b_z
                x_r = T.dot(x_t, self.W_r) + self.b_r
                x_h = T.dot(x_t, self.W_h) + self.b_h

                z_t = self.inner_activation(x_z + T.dot(h_tm_prev, self.U_z))
                r_t = self.inner_activation(x_r + T.dot(h_tm_prev, self.U_r))
                hh_t = self.activation(x_h + T.dot(r_t * h_tm_prev, self.U_h))
                h_t = (1 - z_t) * hh_t + z_t * h_tm_prev
                h_t = T.cast(h_t, dtype=theano.config.floatX)
                return h_t

            self.output, _ = theano.scan(step,
                                         sequences=input.dimshuffle(1, 0, 2),
                                         outputs_info=h0)
        else:

            def step(x_t, mask, h_tm_prev):
                x_z = T.dot(x_t, self.W_z) + self.b_z
                x_r = T.dot(x_t, self.W_r) + self.b_r
                x_h = T.dot(x_t, self.W_h) + self.b_h
                z_t = self.inner_activation(x_z + T.dot(h_tm_prev, self.U_z))
                r_t = self.inner_activation(x_r + T.dot(h_tm_prev, self.U_r))

                hh = self.activation(x_h + T.dot(r_t * h_tm_prev, self.U_h))
                h_t = z_t * h_tm_prev + (1 - z_t) * hh
                h_t = mask * h_t + (1 - mask) * h_tm_prev
                h_t = T.cast(h_t, dtype=theano.config.floatX)
                return h_t

            self.output, _ = theano.scan(
                step,
                sequences=[
                    input.dimshuffle(1, 0, 2),
                    T.addbroadcast(input_lm.dimshuffle(1, 0, 'x'), -1)
                ],
                outputs_info=h0)
        h = self.output  # [max_length, batch_size, hidden_size]
        h = h.dimshuffle(1, 0, 2)
        return h, h[:, -1, :]
Exemple #52
0
 def output(self, input_vectors, input_scalars):
     """
     Calculate the n_output transformed vectors for this layer
     @param input_scalars: n_input x n_output scalar vector
     @param input_vectors: n_input vectors (actual shape should be (n_batch, n_input, n_dimension)
     """
     mat = input_scalars.reshape((n_batch, self.n_input, self.n_output))
     z = T.batched_tensordot(input_vectors, mat, [[1], [1]]).swapaxes(
         1, 2) + T.addbroadcast(self.b, 0, 2)
     if self.activation == 'linear':
         return z
     elif self.activation == 'rectified':
         return T.maximum(z, 0)
     elif self.activation == 'tanh':
         return T.tanh(z)
     else:
         raise "Unknown activation, %s" % self.activation
Exemple #53
0
def make_network_multiscale(
        network_type, loss_function, lr, n_scales, net_options,
        do_clip=True, make_pr_func=False):
    target_var = T.matrix('targets')
    lr_var = theano.shared(np.array(lr, dtype=floatX))

    print("Building model and compiling functions...")
    if n_scales >= 1:
        input_var_list = [T.tensor4('inputs{}'.format(i))
                          for i in range(n_scales)]
        network = getattr(network_design, network_type)(input_var_list,
                                                        **net_options)
    else:
        # if the network requires input_var not a list, set n_sources=-1
        input_var_list = [T.addbroadcast(T.tensor4('inputs{}'.format(i)))
                          for i in range(1)]
        network = getattr(network_design, network_type)(input_var_list[0],
                                                        **net_options)

    # Compute loss
    prediction = lasagne.layers.get_output(network)
    if do_clip:
        prediction = T.clip(prediction, epsilon, one-epsilon)
    loss = loss_function(prediction, target_var)
    loss = loss.mean()

    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.adagrad(
        loss, params, learning_rate=lr_var)

    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    if do_clip:
        test_prediction = T.clip(test_prediction, epsilon, one-epsilon)
    test_loss = loss_function(test_prediction, target_var)
    test_loss = test_loss.mean()

    train_func = theano.function(input_var_list+[target_var],
                                 loss, updates=updates)
    val_func = theano.function(input_var_list+[target_var],
                               [test_prediction, test_loss])

    if make_pr_func:
        pr_func = theano.function(input_var_list, test_prediction)
        return network, input_var_list, lr_var, train_func, val_func, pr_func
    else:
        return network, input_var_list, lr_var, train_func, val_func
Exemple #54
0
    def do_apply(self, input_):
        X = input_
        naxes = self.naxes
        broadcast_n = T.addbroadcast(self.n, 0)
        if naxes == 4:  # CNN
            if self.use_population:
                u = self.u / broadcast_n
            else:
                u = T.mean(X, axis=[0, 2, 3])
            b_u = u.dimshuffle('x', 0, 'x', 'x')
            if self.use_population:
                s = self.s / broadcast_n
            else:
                s = T.mean(T.sqr(X - b_u), axis=[0, 2, 3])
            X = (X - b_u) / T.sqrt(s.dimshuffle('x', 0, 'x', 'x') + self.e)
            X = self.g.dimshuffle('x', 0, 'x', 'x')*X +\
                self.b.dimshuffle('x', 0, 'x', 'x')
        elif naxes == 3:  # RNN
            if self.use_population:
                u = self.u / broadcast_n
            else:
                u = T.mean(X, axis=[0, 1])
            b_u = u.dimshuffle('x', 'x', 0)
            if self.use_population:
                s = self.s / broadcast_n
            else:
                s = T.mean(T.sqr(X - b_u), axis=[0, 1])
            X = (X - b_u) / T.sqrt(s.dimshuffle('x', 'x', 0) + self.e)
            X = self.g.dimshuffle('x', 'x', 0)*X +\
                self.b.dimshuffle('x', 'x', 0)
        elif naxes == 2:  # FC
            if self.use_population:
                u = self.u / broadcast_n
            else:
                u = T.mean(X, axis=0)
            if self.use_population:
                s = self.s / broadcast_n
            else:
                s = T.mean(T.sqr(X - u), axis=0)
            X = (X - u) / T.sqrt(s + self.e)
            X = self.g * X + self.b
        else:
            raise NotImplementedError

        return X, u, s
Exemple #55
0
    def output(self, input_raw):

        input = input_raw

        lin_output = T.dot(input, self.W) + self.b

        if self.batch_norm:
            lin_output = (lin_output -
                          T.mean(lin_output, axis=0, keepdims=True)) / (
                              1.0 + T.std(lin_output, axis=0, keepdims=True))
            lin_output = (lin_output * T.addbroadcast(self.bn_std, 0) +
                          T.addbroadcast(self.bn_mean, 0))

        if self.layer_norm:
            lin_output = (lin_output -
                          T.mean(lin_output, axis=1, keepdims=True)) / (
                              1.0 + T.std(lin_output, axis=1, keepdims=True))
            lin_output = (lin_output * T.addbroadcast(self.bn_std, 0) +
                          T.addbroadcast(self.bn_mean, 0))

        if self.norm_prop:
            lin_output = lin_output / T.sqrt(T.mean(T.sqr(lin_output), axis=0))
            lin_output = (lin_output * T.addbroadcast(self.bn_std, 0) +
                          T.addbroadcast(self.bn_mean, 0))

        clip_preactive = True

        if clip_preactive:
            lin_output = theano.tensor.clip(lin_output, -10, 10)

        self.out_store = lin_output

        if self.activation == None:
            activation = lambda x: x
        elif self.activation == "relu":
            activation = lambda x: T.maximum(0.0, x)
        elif self.activation == "lrelu":
            activation = lambda x: T.nnet.relu(x, alpha=0.02)
        elif self.activation == "exp":
            activation = lambda x: T.exp(x)
        elif self.activation == "tanh":
            activation = lambda x: T.tanh(x)
        elif self.activation == 'softplus':
            activation = lambda x: T.nnet.softplus(x)
        elif self.activation == 'sigmoid':
            activation = lambda x: T.nnet.sigmoid(x)
        else:
            raise Exception("Activation not found")

        out = activation(lin_output)

        return out
Exemple #56
0
    def sequence_iteration(self,
                           output,
                           mask,
                           use_dropout=0,
                           dropout_value=0.5):

        dot_product = T.dot(output, self.t_w_out)

        net_o = T.add(dot_product, self.t_b_out)

        ex_net = T.exp(net_o)
        sum_net = T.sum(ex_net, axis=2, keepdims=True)
        softmax_o = ex_net / sum_net

        mask = T.addbroadcast(mask, 2)  # to do nesseccary?
        output = T.mul(mask, softmax_o) + T.mul((1. - mask), 1e-6)

        return output  #result
Exemple #57
0
 def get_inverse_for(self, input, **kwargs):
     C_fft = T.addbroadcast(theano.tensor.fft.rfft(self.C_pad), 0)
     C_fft_norm2 = C_fft[:, :, 0]**2 + C_fft[:, :, 1]**2
     C_fft_inv = C_fft
     C_fft_inv = T.set_subtensor(C_fft_inv[:, :, 0],
                                 C_fft[:, :, 0] / C_fft_norm2)
     C_fft_inv = T.set_subtensor(C_fft_inv[:, :, 1],
                                 -C_fft[:, :, 1] / C_fft_norm2)
     z_fft = theano.tensor.fft.rfft(input)
     Cz_fft = z_fft
     Cz_fft = T.set_subtensor(
         Cz_fft[:, :, 0], z_fft[:, :, 0] * C_fft_inv[:, :, 0] -
         z_fft[:, :, 1] * C_fft_inv[:, :, 1])
     Cz_fft = T.set_subtensor(
         Cz_fft[:, :, 1], z_fft[:, :, 0] * C_fft_inv[:, :, 1] +
         z_fft[:, :, 1] * C_fft_inv[:, :, 0])
     rlt = theano.tensor.fft.irfft(Cz_fft)
     return rlt
Exemple #58
0
    def g_deconv(self, z_ver, in_dims, out_dims, weight_name, fspec):
        """ Inverse operation for each type of f used in convnets """
        f_type, f_dims = fspec
        assert z_ver is not None
        num_channels = in_dims[0] if in_dims is not None else None
        num_filters, width, height = out_dims[:3]

        if f_type in ['globalmeanpool']:
            u = T.addbroadcast(z_ver, 2, 3)
            assert in_dims[1] == 1 and in_dims[2] == 1, \
                "global pooling needs in_dims (1,1): %s" % str(in_dims)

        elif f_type in ['maxpool']:
            sh, str, size = z_ver.shape, f_dims[0], f_dims[1]
            assert str == size, "depooling requires stride == size"
            u = T.zeros((sh[0], sh[1], sh[2] * str, sh[3] * str),
                        dtype=z_ver.dtype)
            for x in xrange(str):
                for y in xrange(str):
                    u = T.set_subtensor(u[:, :, x::str, y::str], z_ver)
            u = u[:, :, :width, :height]

        elif f_type in ['convv', 'convf']:
            filter_size, str = (f_dims[1], f_dims[1]), f_dims[2]
            W_shape = (num_filters, num_channels) + filter_size
            W = self.weight(self.rand_init_conv(W_shape), weight_name)
            if str > 1:
                # upsample if strided version
                sh = z_ver.shape
                u = T.zeros((sh[0], sh[1], sh[2] * str, sh[3] * str),
                            dtype=z_ver.dtype)
                u = T.set_subtensor(u[:, :, ::str, ::str], z_ver)
            else:
                u = z_ver  # no strides, only deconv
            u = conv2d(u,
                       W,
                       filter_shape=W_shape,
                       border_mode='valid' if 'convf' in f_type else 'full')
            u = u[:, :, :width, :height]
        else:
            raise NotImplementedError('Layer %s has no convolutional decoder' %
                                      f_type)

        return u
 def outputs(self, groundtruth, groundtruth_mask, **inputs):
     # Copy-pasted from all_outputs, because Theano does not support ellipsis
     outputs = self.merge(**dict_subset(inputs, self.merge_names))
     if self.value_softmax:
         logger.debug('Applying value softmax')
         outputs = (tensor.addbroadcast(outputs[:, :1], 1) +
                    self.softmax.apply(outputs[:, 1:]))
     if self.same_value_for_wrong:
         logger.debug('Same value for apriori wrong actions')
         wrong_output = outputs[:, 0]
         outputs = outputs[:, 1:]
         indices = tensor.repeat(tensor.arange(groundtruth.shape[1]),
                                 groundtruth.shape[0])
         wrong_mask = tensor.ones_like(outputs)
         wrong_mask = tensor.set_subtensor(
             wrong_mask[indices, groundtruth.T.flatten()], 0)
         outputs = (outputs * (1 - wrong_mask) +
                    wrong_output[:, None] * wrong_mask)
     return outputs
Exemple #60
0
 def __init__(self,
              incoming,
              kernel_shape=[5, 5],
              input_shape=[50, 50],
              C=lasagne.init.Normal(0.01),
              **kwargs):
     super(CircMatLayerSparse2D, self).__init__(incoming, **kwargs)
     num_inputs = self.input_shape[1]
     self.kernel_shape = kernel_shape
     self.input_shape = input_shape
     self.C = self.add_param(C, (1, kernel_shape[0] * kernel_shape[1]),
                             name='C')
     self.C = T.addbroadcast(self.C, 0)
     #self.C_pad = self.C.reshape(kernel_shape, ndim=2)
     self.C_pad = T.zeros(input_shape)
     self.C_pad = T.set_subtensor(
         self.C_pad[:kernel_shape[0], :kernel_shape[1]],
         self.C.reshape(kernel_shape, ndim=2))
     self.C_pad = self.C_pad.reshape([1, input_shape[0] * input_shape[1]])