def errors(self, y):
        idxs=(self.y_pred<0.5).nonzero()
        y_reg_pred=T.set_subtensor(self.y_pred[idxs], 0)
        idxs=(y_reg_pred>=0.5).nonzero()
        y_reg_pred=T.set_subtensor(y_reg_pred[idxs], 1)
        if y.ndim != y_reg_pred.ndim:
            raise TypeError(
                'y should have the same shape as self.y_pred',
                ('y', y.type, 'y_pred', y_reg_pred.type)
            )
        return T.mean(T.neq(y_reg_pred, y))
#         prec_idxs=(y_reg_pred>0).nonzero()
#         prec = T.mean(y[prec_idxs])
#         reca_idxs=(y>0).nonzero()
#         reca = T.mean(y_reg_pred[reca_idxs])
#         return 2*prec*reca/(prec+reca)
        


#   
# files='data/images/train'
# images,rotated=F.loadImage(files)
# images=numpy.array(images)
# print images.shape
# images=images.astype('float32')    
# inputs = T.ftensor4('input')
# rng = numpy.random.RandomState(1234)
# classifier = CDNN(
#         rng=rng,
#         batch_size=2,
#         input=inputs.dimshuffle((0, 3, 1, 2))
#     )
# f= theano.function([inputs],classifier.y_pred)
#                    
# print f(images[:2]).shape
Ejemplo n.º 2
0
    def get_output(self,y,y_mask,init_state,train=False):
        X=self.get_input(train)  
        X_mask=self.previous.x_mask
        X = X.dimshuffle((1, 0, 2))
        X_mask = X_mask.dimshuffle((1, 0))  
        
        y=y.dimshuffle((1, 0, 2))
        y_mask=y_mask.dimshuffle((1, 0))  
        
        ### shift 1 sequence backward
        y_shifted=T.zeros_like(y)
        y_shifted=T.set_subtensor(y_shifted[1:],y[:-1])
        y=y_shifted 

        ### shift 1 sequence backward
        y_shifted=T.zeros_like(y_mask)
        y_shifted=T.set_subtensor(y_shifted[1:],y_mask[:-1])
        y_mask=y_shifted 

        y_z = T.dot(y, self.W_z) + self.b_z
        y_r = T.dot(y, self.W_r) + self.b_r
        y_h = T.dot(y, self.W_h) + self.b_h       
        
        
        
        [h,logit], _ = theano.scan(self._step, 
                                     sequences = [y,y_z,y_r,y_h,y_mask],
                                     outputs_info = [init_state,
                                                     None],
                                     non_sequences=[X,X_mask])

        return logit.dimshuffle((1, 0, 2))
Ejemplo n.º 3
0
Archivo: rnn.py Proyecto: dwf/pylearn2
    def fprop_step_mask(self, state_below, mask, state_before, U):
        """
        Scan function for case using masks

        Parameters
        ----------
        : todo
        state_below : TheanoTensor
        """

        g_on = state_below + tensor.dot(state_before[:, :self.dim], U)
        i_on = tensor.nnet.sigmoid(g_on[:, :self.dim])
        f_on = tensor.nnet.sigmoid(g_on[:, self.dim:2*self.dim])
        o_on = tensor.nnet.sigmoid(g_on[:, 2*self.dim:3*self.dim])

        z = tensor.set_subtensor(state_before[:, self.dim:],
                                 f_on * state_before[:, self.dim:] +
                                 i_on * tensor.tanh(g_on[:, 3*self.dim:]))
        z = tensor.set_subtensor(z[:, :self.dim],
                                 o_on * tensor.tanh(z[:, self.dim:]))

        # Only update the state for non-masked data, otherwise
        # just carry on the previous state until the end
        z = mask[:, None] * z + (1 - mask[:, None]) * state_before

        return z
Ejemplo n.º 4
0
 def call(self, X):
     if type(X) is not list or len(X) != 2:
         raise Exception("SquareAttention must be called on a list of two tensors. Got: " + str(X))
         
     frame, position  = X[0], X[1]
     
     # Reshaping the input to exclude the time dimension
     frameShape = K.shape(frame)
     positionShape = K.shape(position)
     (chans, height, width) = frameShape[-3:]
     targetDim = positionShape[-1]
     frame = K.reshape(frame, (-1, chans, height, width))
     position = K.reshape(position, (-1, ) + (targetDim, ))
     
     # Applying the attention
     hw = THT.abs_(position[:, 2] - position[:, 0]) * self.scale / 2.0
     hh = THT.abs_(position[:, 3] - position[:, 1]) * self.scale / 2.0
     position = THT.maximum(THT.set_subtensor(position[:, 0], position[:, 0] - hw), -1.0)
     position = THT.minimum(THT.set_subtensor(position[:, 2], position[:, 2] + hw), 1.0)
     position = THT.maximum(THT.set_subtensor(position[:, 1], position[:, 1] - hh), -1.0)
     position = THT.minimum(THT.set_subtensor(position[:, 3], position[:, 3] + hh), 1.0)
     rX = Data.linspace(-1.0, 1.0, width)
     rY = Data.linspace(-1.0, 1.0, height)
     FX = THT.gt(rX, position[:,0].dimshuffle(0,'x')) * THT.le(rX, position[:,2].dimshuffle(0,'x'))
     FY = THT.gt(rY, position[:,1].dimshuffle(0,'x')) * THT.le(rY, position[:,3].dimshuffle(0,'x'))
     m = FY.dimshuffle(0, 1, 'x') * FX.dimshuffle(0, 'x', 1)
     m = m + self.alpha - THT.gt(m, 0.) * self.alpha
     frame = frame * m.dimshuffle(0, 'x', 1, 2)
     
     # Reshaping the frame to include time dimension
     output = K.reshape(frame, frameShape)
     
     return output
Ejemplo n.º 5
0
def pad(inp, padding):

    if all([padval == 0 for padval in pyk.flatten(padding)]):
        return inp

    if inp.ndim == 4:
        # Make a zero tensor of the right shape
        zt = T.zeros(shape=(inp.shape[0], inp.shape[1], inp.shape[2]+sum(padding[0]), inp.shape[3]+sum(padding[1])))
        # Compute assignment slice
        [[ystart, ystop], [xstart, xstop]] = [[padval[0], (-padval[1] if padval[1] != 0 else None)]
                                              for padval in padding]
        # Assign subtensor
        padded = T.set_subtensor(zt[:, :, ystart:ystop, xstart:xstop], inp)
        return padded
    elif inp.ndim == 5:

        # Make a zero tensor of the right shape
        zt = T.zeros(shape=(inp.shape[0], inp.shape[1]+sum(padding[2]), inp.shape[2], inp.shape[3]+sum(padding[0]),
                            inp.shape[4]+sum(padding[1])))
        # Compute assignment slice
        [[ystart, ystop], [xstart, xstop], [zstart, zstop]] = [[padval[0], (-padval[1] if padval[1] != 0 else None)]
                                                               for padval in padding]
        # Assign subtensor
        padded = T.set_subtensor(zt[:, zstart:zstop, :, ystart:ystop, xstart:xstop], inp)
        return padded
    else:
        raise NotImplementedError("Padding is only implemented for 4 and 5 dimensional tensors.")
Ejemplo n.º 6
0
def mask_k_maxpooling(variable, variable_shape ,axis, k):
    """
    Params:
    variable:   tensor2D
    axis:       get k_max_pooling in axis'th dimension
    k:          k loop  --> k max value
    ------
    Return:
    mask : tensor2D
        1: if in position k_max
        0: else
        ex variable:
            1 2 3              0 0 1
            2 7 1      --->    0 1 0
            1 2 1              0 1 0
    """
    min = -999999999

    variable_tmp = variable
    mask = T.zeros(variable_shape, dtype=theano.config.floatX)
    for i in range(k):
        max_idx = T.argmax(variable_tmp,axis=axis)
        if axis == 0:
            mask = T.set_subtensor(mask[max_idx,range(0,variable_shape[1])],1)
            variable_tmp = T.set_subtensor(variable_tmp[max_idx,range(0,variable_shape[1])],min)
        elif axis == 1:
            mask = T.set_subtensor(mask[range(0,variable_shape[0]),max_idx],1)
            variable_tmp = T.set_subtensor(variable_tmp[range(0,variable_shape[0]),max_idx],min)
    return mask
Ejemplo n.º 7
0
        def pass_edges(input_idx_t, edge_t, edge_mask_t, counter_t, h_tm1, c_tm1, x):
            h_t = h_tm1
            c_t = c_tm1
            # select the input vector to use for this edge (source)
            x_t_i = x[input_idx_t, :]
            # zero out the input unless this is a leaf node
            x_t_0 = T.switch(T.eq(T.sum(edge_mask_t), 0), x_t_i, x_t_i*0)
            # concatenate with the input edge vector
            x_t_edge = T.concatenate([x_t_0, edge_t])

            # compute attention weights, using a manual softmax
            attention_scores = T.dot(self.v_a, T.tanh(T.dot(self.W_h_a, h_tm1))) # (1, n_edges)
            # find the max of the unmasked values
            max_score = T.max(attention_scores + edge_mask_t * 10000.0) - 10000.0
            # exponentiate the differences, masking first to avoid inf, and then to keep only relevant scores
            exp_scores = T.exp((attention_scores - max_score) * edge_mask_t) * edge_mask_t
            # take the sum, and add one if the mask is all zeros to avoid an inf
            exp_scores_sum = T.sum(exp_scores) + T.switch(T.eq(T.sum(edge_mask_t), 0), 1.0, 0.0)
            # normalize to compute the weights
            weighted_mask = exp_scores / exp_scores_sum

            i_t = T.nnet.sigmoid(T.dot(x_t_edge, self.W_x_i) + T.sum(T.dot(self.W_h_i.T, (weighted_mask * h_tm1)).T, axis=0) + self.b_h_i)
            f_t = T.nnet.sigmoid(T.dot(x_t_edge, self.W_x_f) + T.sum(T.dot(self.W_h_f.T, (weighted_mask * h_tm1)).T, axis=0) + self.b_h_f)
            o_t = T.nnet.sigmoid(T.dot(x_t_edge, self.W_x_o) + T.sum(T.dot(self.W_h_o.T, (weighted_mask * h_tm1)).T, axis=0) + self.b_h_o)
            u_t = T.tanh(T.dot(x_t_edge, self.W_x_u) + T.sum(T.dot(self.W_h_u.T, (weighted_mask * h_tm1)).T, axis=0) + self.b_h_u)

            c_temp = i_t * u_t + f_t * T.sum((weighted_mask * c_tm1).T, axis=0)
            h_temp = o_t * T.tanh(c_temp)

            h_t = T.set_subtensor(h_t[:, counter_t], h_temp)
            c_t = T.set_subtensor(c_t[:, counter_t], c_temp)
            return h_t, c_t
Ejemplo n.º 8
0
 def fprop(self, XH):
     # XH is a list of inputs: [state_belows, state_befores]
     # each state vector is: [state_before; cell_before]
     # Hence, you use h[:, :self.nout] to compute recurrent term
     X, H = XH
     if len(X) != len(self.parent):
         raise AttributeError("The number of inputs doesn't match "
                              "with the number of parents.")
     if len(H) != len(self.recurrent):
         raise AttributeError("The number of inputs doesn't match "
                              "with the number of recurrents.")
     # The index of self recurrence is 0
     z_t = H[0]
     z = T.zeros((X[0].shape[0], 4 * self.nout))
     for x, (parname, parout) in izip(X, self.parent.items()):
         W = self.params['W_' + parname + '__' + self.name]
         z += T.dot(x[:, :parout], W)
     for h, (recname, recout) in izip(H, self.recurrent.items()):
         U = self.params['U_' + recname + '__' + self.name]
         z += T.dot(h[:, :recout], U)
     z += self.params['b_' + self.name]
     # Compute activations of gating units
     i_on = T.nnet.sigmoid(z[:, self.nout:2 * self.nout])
     f_on = T.nnet.sigmoid(z[:, 2 * self.nout:3 * self.nout])
     o_on = T.nnet.sigmoid(z[:, 3 * self.nout:])
     # Update hidden & cell states
     z_t = T.set_subtensor(
         z_t[:, self.nout:],
         f_on * z_t[:, self.nout:] + i_on * self.nonlin(z[:, :self.nout]))
     z_t = T.set_subtensor(z_t[:, :self.nout],
                           o_on * self.nonlin(z_t[:, self.nout:]))
     z_t.name = self.name
     return z_t
Ejemplo n.º 9
0
def crop_images(data, image_shape, border_width=8, mode=0):
    """ Function used to crop the images by a certain border width.
        data         : input data, theano 4D tensor
        image_shape  : 4-tuple, (batch_size, num_channels, image_rows, image_cols)
        border_width : border width to be cropped, default value 8
        mode         : binary, 0 for random, 1 for centered crop.
    """
    if (mode == 0):
        row_step = image_shape[2] - border_width
        col_step = image_shape[3] - border_width
        output = T.alloc(0., image_shape[0], image_shape[1], row_step, col_step)
        for i in range(image_shape[0]):           
            begin_idx = numpy.random.randint(border_width)
            output = T.set_subtensor(output[i,:,:,:], 
                data[i,:,begin_idx:(begin_idx+row_step),begin_idx:(begin_idx+col_step)])
        return output
    else: 
        row_step = image_shape[2] - border_width
        col_step = image_shape[3] - border_width
        output = T.alloc(0., image_shape[0], image_shape[1], row_step, col_step)
        for i in range(image_shape[0]):           
            begin_idx = border_width / 2 
            output = T.set_subtensor(output[i,:,:,:], 
                data[i,:,begin_idx:(begin_idx+row_step),begin_idx:(begin_idx+col_step)])
        return output
Ejemplo n.º 10
0
    def output(self, input=None, dropout_active=True, *args, **kwargs):
        if input == None:
            input = self.input_layer.output(dropout_active=dropout_active, *args, **kwargs)

        if dropout_active and (self.dropout > 0.):
            retain_prob = 1 - self.dropout
            mask = layers.srng.binomial(input.shape, p=retain_prob, dtype='int32').astype('float32')
                # apply the input mask and rescale the input accordingly. By doing this it's no longer necessary to rescale the weights at test time.
            input = input / retain_prob * mask

        # pad input so the valid convolution amounts to a circular one.
        # we need to copy (filter_size - stride) values from one side to the other
        input_padded = T.zeros((input.shape[0], input.shape[1] + self.filter_size - self.stride, input.shape[2], input.shape[3]))
        input_padded = T.set_subtensor(input_padded[:, :input.shape[1], :, :], input)
        input_padded = T.set_subtensor(input_padded[:, input.shape[1]:, :, :], input[:, :self.filter_size - self.stride, :, :])

        contiguous_input = gpu_contiguous(input_padded)
        contiguous_filters = gpu_contiguous(self.W)
        conved = self.filter_acts_op(contiguous_input, contiguous_filters)

        if self.untie_biases:
            conved += self.b.dimshuffle(0, 1, 2, 'x')
        else:
            conved += self.b.dimshuffle(0, 'x', 'x', 'x')

        return self.nonlinearity(conved)
Ejemplo n.º 11
0
def update_log_p(skip_idxs,zeros,active,log_p_curr,log_p_prev):
    active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
    active_next = T.cast(T.minimum(
        T.maximum(
            active + 1,
            T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
        ),
        log_p_curr.shape[0]
    ), 'int32')

    common_factor = T.max(log_p_prev[:active])
    p_prev = T.exp(log_p_prev[:active] - common_factor)
    _p_prev = zeros[:active_next]
    # copy over
    _p_prev = T.set_subtensor(_p_prev[:active], p_prev)
    # previous transitions
    _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
    # skip transitions
    _p_prev = T.inc_subtensor(
        _p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
    updated_log_p_prev = T.log(_p_prev) + common_factor

    log_p_next = T.set_subtensor(
        zeros[:active_next],
        log_p_curr[:active_next] + updated_log_p_prev
    )
    return active_next, log_p_next
Ejemplo n.º 12
0
 def __init__(self, input):
     #A 3in1 maxpooling
     self.output_shape = input.output_shape[0]/2, input.output_shape[1]
     self.origlayer = input
     self.output = input.output[::2]
     self.output = T.set_subtensor(self.output[:input.output.shape[0]/2], T.maximum(self.output[:input.output.shape[0]/2], input.output[1::2]))
     self.output = T.set_subtensor(self.output[1:], T.maximum(self.output[1:], input.output[1:-1:2]))
Ejemplo n.º 13
0
        def _step(c, c_m, hidden, c_matrix):
            node_idx = c[:, 0]
            left_child_idx = c[:, 1]
            right_child_idx = c[:, 2]

            all_samples = T.arange(n_samples)
            recursive = (
                T.dot(hidden[left_child_idx, all_samples, :], self.W)
                + T.dot(hidden[right_child_idx, all_samples, :], self.U)
                + self.b
            )

            i = T.nnet.sigmoid(_slice(recursive, 0, self.dim_proj))
            f1 = T.nnet.sigmoid(_slice(recursive, 1, self.dim_proj))
            f2 = T.nnet.sigmoid(_slice(recursive, 2, self.dim_proj))
            o = T.nnet.sigmoid(_slice(recursive, 3, self.dim_proj))
            c_prime = T.tanh(_slice(recursive, 4, self.dim_proj))

            new_c = (
                i * c_prime
                + f1 * c_matrix[left_child_idx, all_samples, :]
                + f2 * c_matrix[right_child_idx, all_samples, :]
            )

            new_c_masked = c_m[:, None] * new_c + (1.0 - c_m[:, None]) * c_matrix[node_idx, all_samples, :]

            new_h = o * T.tanh(new_c_masked)
            new_h_masked = c_m[:, None] * new_h + (1.0 - c_m[:, None]) * hidden[node_idx, all_samples, :]

            return (
                T.set_subtensor(hidden[node_idx, all_samples], new_h_masked),
                T.set_subtensor(c_matrix[node_idx, all_samples], new_c_masked),
            )
Ejemplo n.º 14
0
def T_subspacel1_slow_shrinkage(a,L,lam_sparse,lam_slow,small_value=.001):
    amp = T.sqrt(a[::2,:]**2 + a[1::2,:]**2 + small_value)
    #damp = amp[:,1:] - amp[:,:-1]

    # compose slow shrinkage with subspace l1 shrinkage

    # slow shrinkage
    div = T.zeros_like(amp)
    d1 = amp[:,1:] - amp[:,:-1]
    d2 = d1[:,1:] - d1[:,:-1]
    div = T.set_subtensor(div[:,1:-1],-d2)
    div = T.set_subtensor(div[:,0], -d1[:,0])
    div = T.set_subtensor(div[:,-1], d1[:,-1])
    slow_amp_shrinkage = 1 - (lam_slow/L)*(div/amp)
    slow_amp_value = T.switch(T.gt(slow_amp_shrinkage,0),slow_amp_shrinkage,0)
    slow_shrinkage_prox_a = slow_amp_value*a[::2,:]
    slow_shrinkage_prox_b = slow_amp_value*a[1::2,:]

    # subspace l1 shrinkage
    amp_slow_shrinkage_prox = T.sqrt(slow_shrinkage_prox_a**2 + slow_shrinkage_prox_b**2)
    #amp_shrinkage = 1. - (lam_slow*lam_sparse/L)*amp_slow_shrinkage_prox
    amp_shrinkage = 1. - (lam_sparse/L)/amp_slow_shrinkage_prox
    amp_value = T.switch(T.gt(amp_shrinkage,0.),amp_shrinkage,0.)
    subspacel1_prox = T.zeros_like(a)
    subspacel1_prox = T.set_subtensor(subspacel1_prox[ ::2,:],amp_value*slow_shrinkage_prox_a)
    subspacel1_prox = T.set_subtensor(subspacel1_prox[1::2,:],amp_value*slow_shrinkage_prox_b)
    return subspacel1_prox
Ejemplo n.º 15
0
    def compile_dream(self, X_train, dream_state, initializer):
        self.dream_compiled = True
        X_dream_shape = list(X_train.shape)
        X_dream_shape[0] = 1
        X_dream_shape[1] -= len(dream_state)
        X_dream = initializer(tuple(X_dream_shape))
        self.X_dream = theano.shared(atleast_4d(np.append(dream_state, X_dream).astype('float32')))

        current_layer = self.X_dream
        T.set_subtensor(current_layer[:, len(dream_state):, :], Activations.softmax(current_layer[:, len(dream_state):, :]))
        for layer, params in zip(self.layers, self.params_shared):
            current_layer = layer.get_output(
                current_layer, params, testing=True)
        y_hat_dream = current_layer.flatten(1)

        self.optimizer.build([[self.X_dream.get_value()]])

        dream_updates = list(self.optimizer.get_updates([self.X_dream], -y_hat_dream[0]))
        original_var = dream_updates[1][0][:, len(dream_state):, :]
        new_var = dream_updates[1][1][:, len(dream_state):, :]
        dream_updates[1] = (self.X_dream, T.set_subtensor(original_var, new_var))
        self.dream_update = theano.function(
            inputs=[],
            outputs=y_hat_dream,
            updates=dream_updates
        )
Ejemplo n.º 16
0
def update_hard_stack(stack_t, stack_pushed, stack_merged, push_value,
                      merge_value, mask):
    """Compute the new value of the given hard stack.

    This performs stack pushes and pops in parallel, and somewhat wastefully.
    It accepts a precomputed merge result (in `merge_value`) and a precomputed
    push value `push_value` for all examples, and switches between the two
    outcomes based on the per-example value of `mask`.

    Args:
        stack_t: Current stack value
        stack_pushed: Helper stack structure, of same size as `stack_t`
        stack_merged: Helper stack structure, of same size as `stack_t`
        push_value: Batch of values to be pushed
        merge_value: Batch of merge results
        mask: Batch of booleans: 1 if merge, 0 if push
    """

    # Build two copies of the stack batch: one where every stack has received
    # a push op, and one where every stack has received a merge op.
    #
    # Copy 1: Push.
    stack_pushed = T.set_subtensor(stack_pushed[:, 0], push_value)
    stack_pushed = T.set_subtensor(stack_pushed[:, 1:], stack_t[:, :-1])

    # Copy 2: Merge.
    stack_merged = T.set_subtensor(stack_merged[:, 0], merge_value)
    stack_merged = T.set_subtensor(stack_merged[:, 1:-1], stack_t[:, 2:])

    # Make sure mask broadcasts over all dimensions after the first.
    mask = mask.dimshuffle(0, "x", "x")
    mask = T.cast(mask, dtype=theano.config.floatX)
    stack_next = mask * stack_merged + (1. - mask) * stack_pushed

    return stack_next
Ejemplo n.º 17
0
def T_subspacel1_slow_shrinkage_conv(a, L, lam_sparse, lam_slow, imshp,kshp,featshp,stride=(1,1),small_value=.001):
    featshp = (imshp[0],kshp[0],featshp[2],featshp[3]) # num images, features, szy, szx
    features = T.reshape(T.transpose(a),featshp,ndim=4)

    amp = T.sqrt(features[:,::2,:,:]**2 + features[:,1::2,:,:]**2 + small_value)
    #damp = amp[:,1:] - amp[:,:-1]

    # compose slow shrinkage with subspace l1 shrinkage

    # slow shrinkage
    div = T.zeros_like(amp)
    d1 = amp[1:,:,:,:] - amp[:-1,:,:,:]
    d2 = d1[1:,:,:,:] - d1[:-1,:,:,:]
    div = T.set_subtensor(div[1:-1,:,:,:], -d2)
    div = T.set_subtensor(div[0,:,:,:], -d1[0,:,:,:])
    div = T.set_subtensor(div[-1,:,:,:], d1[-1,:,:,:])
    slow_amp_shrinkage = 1 - (lam_slow / L) * (div / amp)
    slow_amp_value = T.switch(T.gt(slow_amp_shrinkage, 0), slow_amp_shrinkage, 0)
    slow_shrinkage_prox_a = slow_amp_value * features[:, ::2, :,:]
    slow_shrinkage_prox_b = slow_amp_value * features[:,1::2, :,:]

    # subspace l1 shrinkage
    amp_slow_shrinkage_prox = T.sqrt(slow_shrinkage_prox_a ** 2 + slow_shrinkage_prox_b ** 2)
    #amp_shrinkage = 1. - (lam_slow*lam_sparse/L)*amp_slow_shrinkage_prox
    amp_shrinkage = 1. - (lam_sparse / L) / amp_slow_shrinkage_prox
    amp_value = T.switch(T.gt(amp_shrinkage, 0.), amp_shrinkage, 0.)
    subspacel1_prox = T.zeros_like(features)
    subspacel1_prox = T.set_subtensor(subspacel1_prox[:, ::2, :,:], amp_value * slow_shrinkage_prox_a)
    subspacel1_prox = T.set_subtensor(subspacel1_prox[:,1::2, :,:], amp_value * slow_shrinkage_prox_b)

    reshape_subspacel1_prox = T.transpose(T.reshape(subspacel1_prox,(featshp[0],featshp[1]*featshp[2]*featshp[3]),ndim=2))
    return reshape_subspacel1_prox
Ejemplo n.º 18
0
 def __setitem__(self, ind, a):
     ind = self._data_index_(ind)
     if isinstance(a, psarray_base):
         assert a.grid is self.grid
         self._data = T.set_subtensor(self._data[ind], a._data)
     else:
         self._data = T.set_subtensor(self._data[ind], a)
Ejemplo n.º 19
0
    def global_contrast_normalize(self, X, scale=1., subtract_mean=True, 
        use_std=False, sqrt_bias=0., min_divisor=1e-8):

        ndim = X.ndim
        if not ndim in [3,4]: raise NotImplementedError("X.dim>4 or X.ndim<3")

        scale = float(scale)
        mean = X.mean(axis=ndim-1)
        new_X = X.copy()

        if subtract_mean:
            if ndim==3:
                new_X = X - mean[:,:,None]
            else: new_X = X - mean[:,:,:,None]

        if use_std:
            normalizers = T.sqrt(sqrt_bias + X.var(axis=ndim-1)) / scale
        else:
            normalizers = T.sqrt(sqrt_bias + (new_X ** 2).sum(axis=ndim-1)) / scale

        # Don't normalize by anything too small.
        T.set_subtensor(normalizers[(normalizers < min_divisor).nonzero()], 1.)

        if ndim==3: new_X /= normalizers[:,:,None]
        else: new_X /= normalizers[:,:,:,None]

        return new_X
Ejemplo n.º 20
0
def get_odd_even_energy(X, P, H, W, V, U, b, b_0, b_L, d, Lambda, b_p, \
                        marginalize_visible):
    h_odd_marginalized = T.set_subtensor(H[:,1::2], \
                            update_odd_mu(X, P, H, W, V, U, b, b_L))
    h_even_marginalized = T.set_subtensor(H[:,::2], \
                            update_even_mu(X, P, H, W, V, U, b, b_0, b_L))
    
    if marginalize_visible:
        energy_h_odd_marginalized = get_energy(X, P, h_odd_marginalized, W, V, \
                            U, b, b_0, b_L, d, Lambda, b_p, \
                            x_marginalized = "even", \
                            p_marginalized = "even")
        energy_h_even_marginalized = get_energy(X, P, h_even_marginalized, W, \
                            V, U, b, b_0, b_L, d, Lambda, b_p, \
                            x_marginalized = "odd", \
                            p_marginalized = "odd")
    else:
        energy_h_odd_marginalized = get_energy(X, P, h_odd_marginalized, W, V, \
                            U, b, b_0, b_L, d, Lambda, b_p, \
                            x_marginalized = None, \
                            p_marginalized = None)
        energy_h_even_marginalized = get_energy(X, P, h_even_marginalized, W, \
                            V, U, b, b_0, b_L, d, Lambda, b_p, \
                            x_marginalized = None, \
                            p_marginalized = None)
    
    energy = 0.5*(energy_h_odd_marginalized + energy_h_even_marginalized)
    
    return energy
Ejemplo n.º 21
0
def bbox_transform_inv(boxes, deltas):
    if boxes.shape[0] == 0:
        return T.zeros((0, deltas.shape[1]), dtype=deltas.dtype)

    boxes = boxes.astype(deltas.dtype)

    widths = boxes[:, 2] - boxes[:, 0] + 1.0
    heights = boxes[:, 3] - boxes[:, 1] + 1.0
    ctr_x = boxes[:, 0] + 0.5 * widths
    ctr_y = boxes[:, 1] + 0.5 * heights

    dx = deltas[:, 0::4]
    dy = deltas[:, 1::4]
    dw = deltas[:, 2::4]
    dh = deltas[:, 3::4]

    pred_ctr_x = dx * widths.dimshuffle(0,'x') + ctr_x.dimshuffle(0,'x')
    pred_ctr_y = dy * heights.dimshuffle(0,'x') + ctr_y.dimshuffle(0,'x')
    pred_w = T.exp(dw) * widths.dimshuffle(0,'x')
    pred_h = T.exp(dh) * heights.dimshuffle(0,'x')

    pred_boxes = T.zeros_like(deltas, dtype=deltas.dtype)
    # x1
    pred_boxes = T.set_subtensor(pred_boxes[:, 0::4], pred_ctr_x - 0.5 * pred_w)
    # y1
    pred_boxes = T.set_subtensor(pred_boxes[:, 1::4], pred_ctr_y - 0.5 * pred_h)
    # x2
    pred_boxes = T.set_subtensor(pred_boxes[:, 2::4], pred_ctr_x + 0.5 * pred_w)
    # y2
    pred_boxes = T.set_subtensor(pred_boxes[:, 3::4], pred_ctr_y + 0.5 * pred_h)

    return pred_boxes
Ejemplo n.º 22
0
	def get_output(self, train=False):

		X = self.get_input(train)

		full = T.ones_like(X)
		masks = [full]

		for i in xrange(len(self.input_shapes)):
			mask = T.ones_like(X)
			idx = 0
			for j in xrange(len(self.input_shapes)):
				if i == j:
					try:
						ishape = len(self.input_shapes[0])
					except:
						ishape = [1]
						pass
					if len(ishape)  == 3:
						mask = T.set_subtensor(mask[:,:,idx : idx+ self.input_shapes[j]], 0)
					elif len(ishape) == 2:
						mask = T.set_subtensor(mask[:,idx : idx+ self.input_shapes[j]], 0)
					elif len(ishape) == 1:
						mask = T.set_subtensor(mask[idx : idx+ self.input_shapes[j]], 0)
					else:
						raise NotImplementedError()
				idx =  idx + self.input_shapes[j]
			masks += [mask]
		masked = T.stack(masks)

		if train:
			index  = self.trng.random_integers(size=(1,),low = 0, high = len(masks)-1)[0]
		else:
			index = 0
		masked_output = X * masked[index]
		return masked_output
def create_adam_updates(updates, params, gparams, gsums, xsums, lr, eps, beta1, beta2):
    i = theano.shared(np.float64(0.0).astype(theano.config.floatX))
    i_t = i + 1.0
    omb1_t = 1.0 - beta1**i_t
    omb2_t = 1.0 - beta2**i_t
    lr_t = lr * (T.sqrt(omb2_t) / omb1_t)
    for p, g, m, v in zip(params, gparams, gsums, xsums):
        if is_subtensor_op(p):
            origin, indexes = get_subtensor_op_inputs(p)
            m_sub = m[indexes]
            v_sub = v[indexes]
            m_t = beta1*m_sub + (1.0-beta1)*g
            v_t = beta2*v_sub + (1.0-beta2)*T.sqr(g)
            g_t = m_t / (T.sqrt(v_t) + eps)
            updates[m] = T.set_subtensor(m_sub, m_t)
            updates[v] = T.set_subtensor(v_sub, v_t)
            updates[origin] = T.inc_subtensor(p, -lr_t*g_t)
        else:
            m_t = beta1*m + (1.0-beta1)*g
            v_t = beta2*v + (1.0-beta2)*T.sqr(g)
            g_t = m_t / (T.sqrt(v_t) + eps)
            updates[m] = m_t
            updates[v] = v_t
            updates[p] = p - lr_t*g_t
    updates[i] = i_t
Ejemplo n.º 24
0
def update_stack(stack_t, shift_value, reduce_value, mask, model_dim):
    """
    Compute the new value of the given stack.

    This performs stack shifts and reduces in parallel, and somewhat
    wastefully.  It accepts a precomputed reduce result (in `reduce_value`) and
    a precomputed shift value `shift` for all examples, and switches between
    the two outcomes based on the per-example value of `mask`.

    Args:
        stack_t: Current stack value
        shift_value: Batch of values to be shifted
        reduce_value: Batch of reduce results
        mask: Batch of booleans: 1 if reduce, 0 if shift
        model_dim: The dimension of shift_value and reduce_value.
    """

    # Build two copies of the stack batch: one where every stack has received
    # a shift op, and one where every stack has received a reduce op.

    # Copy 1: Shift.
    stack_s = T.set_subtensor(stack_t[:, 0, :model_dim], shift_value)
    stack_s = T.set_subtensor(stack_s[:, 1:], stack_t[:, :-1])

    # Copy 2: Reduce.
    stack_r = T.set_subtensor(stack_t[:, 0, :model_dim], reduce_value)
    stack_r = T.set_subtensor(stack_r[:, 1:-1], stack_t[:, 2:])

    # Make sure mask broadcasts over all dimensions after the first.
    mask = mask.dimshuffle(0, "x", "x")
    mask = T.cast(mask, dtype=theano.config.floatX)
    stack_next = mask * stack_r + (1. - mask) * stack_s

    return stack_next
Ejemplo n.º 25
0
	def sample_update(self, data):
		
		proposal_samples, log_proposal_probs=self.proposal_distrib
		
		printing=False
		
		if printing:
			log_transition_probs=theano.printing.Print('1 log transition probs update')(self.true_log_transition_probs(self.current_state, proposal_samples))
			log_observation_probs=theano.printing.Print('2 log observation probs update')(self.true_log_observation_probs(proposal_samples, data.dimshuffle('x',0)))
			log_unnorm_weights=theano.printing.Print('3 log unnorm weights update')(log_transition_probs + log_observation_probs - log_proposal_probs)
			log_unnorm_weights_center=theano.printing.Print('4 log unnorm weights center update')(log_unnorm_weights-T.max(log_unnorm_weights))
			unnorm_weights=theano.printing.Print('5 unnorm weights update')(T.exp(log_unnorm_weights_center)*self.current_weights)
			normalizer=theano.printing.Print('6 normalizer update')(T.sum(unnorm_weights))
		else:
			log_transition_probs=self.true_log_transition_probs(self.current_state, proposal_samples)
			log_observation_probs=self.true_log_observation_probs(proposal_samples, data.dimshuffle('x',0))
			log_unnorm_weights=log_transition_probs + log_observation_probs - log_proposal_probs
			log_unnorm_weights_center=log_unnorm_weights-T.max(log_unnorm_weights)
			unnorm_weights=T.exp(log_unnorm_weights_center)*self.current_weights
			normalizer=T.sum(unnorm_weights)

		
		weights=unnorm_weights/normalizer
		
		updates=OrderedDict()
		
		updates[self.weights]=T.set_subtensor(self.next_weights, weights)
		
		updates[self.particles]=T.set_subtensor(self.next_state, proposal_samples)
		
		updates[self.time_counter]=self.time_counter+1
		
		return updates
 def create_valid_error(self):
     #self.valid_error=T.mean(T.abs_(self.predictions - self.pm25target[:,-self.steps:]),axis=0)
     pred=T.zeros_like(self.predictions)
     pred=T.set_subtensor(pred[:,0],self.pm25in[:,1,0]+self.pm25target[:,-self.steps+0])#self.predictions[:,0])
     for i in xrange(1,self.steps):
         pred=T.set_subtensor(pred[:,i],pred[:,i-1]+self.pm25target[:,-self.steps+i])#self.predictions[:,i])
     self.valid_error=T.mean(T.abs_(pred - self.pm25in[:,-self.steps:,0]),axis=0)
Ejemplo n.º 27
0
    def get_learn_func(self):
        """
        Returns a theano function that takes an action and a reward,
        and updates the agent based on this experience.
        """

        a = T.iscalar()
        r = T.scalar()

        old_estimated_reward = self.estimated_rewards[a]
        old_observation_count = self.observation_counts[a]
        observation_count = old_observation_count + 1.

        delta = r - old_estimated_reward
        new_estimated_reward = old_estimated_reward + delta / observation_count

        new_estimated_rewards = T.set_subtensor(self.estimated_rewards[a],
            new_estimated_reward)
        new_observation_counts = T.set_subtensor(self.observation_counts[a], observation_count)

        updates = OrderedDict([
            (self.estimated_rewards, new_estimated_rewards),
            (self.observation_counts, new_observation_counts)
            ])

        rval = function([a, r], updates=updates)

        return rval
Ejemplo n.º 28
0
    def f_score(self,y,label):
        #print dir(x)
        y=T.cast(y,'int32')
        new_y_pred=T.sub(self.y_pred,label)
        new_y=T.sub(y,label)
        
        pre_pos_num=new_y_pred.shape[0]-new_y_pred.nonzero()[0].shape[0]#预测的正例个数
        
        real_pos=new_y.shape[0]-new_y.nonzero()[0].shape[0]
        
        new_y_pred=T.set_subtensor(new_y_pred[new_y_pred.nonzero()[0]],1)
        new_y=T.set_subtensor(new_y[new_y.nonzero()[0]],2)

        r=T.neq(new_y_pred,new_y)
        true_pos=self.y_pred.shape[0]-r.sum()
        #printed_recall=theano.printing.Print('rec:')(pre_pos_num)
        #printed=theano.printing.Print('pre:')(real_pos)
        precision=true_pos / (T.cast(pre_pos_num,'float32')+0.0000001)
        recall=true_pos / (T.cast(real_pos,'float32')+0.0000001)
        
        f_score=(2 * precision * recall) / (precision + recall)
        
        return f_score,precision,recall
        
        
Ejemplo n.º 29
0
        def pass_edges(input_idx_t, edge_t, edge_mask_t, counter_t, h_tm1, c_tm1, x):
            h_t = h_tm1
            c_t = c_tm1
            # select the input vector to use for this edge (source)
            input = x[input_idx_t, :]
            # zero out the input unless this is a leaf node
            input = T.switch(T.eq(T.sum(edge_mask_t), 0), input, input*0)
            i_t = T.nnet.sigmoid(T.dot(input, self.W_x_i) + T.sum(T.dot(self.W_h_i.T, (edge_mask_t * h_tm1)).T, axis=0) + self.b_h_i)
            f_t = T.nnet.sigmoid(T.dot(input, self.W_x_f) + T.sum(T.dot(self.W_h_f.T, (edge_mask_t * h_tm1)).T, axis=0) + self.b_h_f)
            o_t = T.nnet.sigmoid(T.dot(input, self.W_x_o) + T.sum(T.dot(self.W_h_o.T, (edge_mask_t * h_tm1)).T, axis=0) + self.b_h_o)
            u_t = T.tanh(T.dot(input, self.W_x_u) + T.sum(T.dot(self.W_h_u.T, (edge_mask_t * h_tm1)).T, axis=0) + self.b_h_u)

            c_temp = i_t * u_t + f_t * T.sum((edge_mask_t * c_tm1).T, axis=0)
            h_temp = o_t * T.tanh(c_temp)

            # pass the output of above through another LSTM node for the edge
            ie_t = T.nnet.sigmoid(T.dot(edge_t, self.W_e_i) + T.dot(h_temp, self.W_eh_i) + self.b_e_i)
            fe_t = T.nnet.sigmoid(T.dot(edge_t, self.W_e_f) + T.dot(h_temp, self.W_eh_f) + self.b_e_f)
            oe_t = T.nnet.sigmoid(T.dot(edge_t, self.W_e_o) + T.dot(h_temp, self.W_eh_o) + self.b_e_o)
            ue_t = T.tanh(T.dot(edge_t, self.W_e_u) + T.dot(h_temp, self.W_eh_u) + self.b_e_u)

            ce_temp = ie_t * ue_t + fe_t * c_temp
            he_temp = oe_t * T.tanh(ce_temp)

            h_t = T.set_subtensor(h_t[:, counter_t], he_temp)
            c_t = T.set_subtensor(c_t[:, counter_t], ce_temp)
            return h_t, c_t
    def negative_log_likelihood(self, label_sym):
        """
        Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        :type label_sym: theano.tensor.TensorType
        :param label_sym: corresponds to a vector that gives for each example the
                  correct label

        Note: we use the mean instead of the sum so that
              the learning rate is less dependent on the batch size
        """
        # label_sym.shape[0] is (symbolically) the number of rows in label_sym, i.e.,
        # number of examples (call it n) in the minibatch
        # T.arange(label_sym.shape[0]) is a symbolic vector which will contain
        # [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
        # Log-Probabilities (call it LP) with one row per example and
        # one column per class LP[T.arange(label_sym.shape[0]),label_sym] is a vector
        # v containing [LP[0,label_sym[0]], LP[1,label_sym[1]], LP[2,label_sym[2]], ...,
        # LP[n-1,label_sym[n-1]]] and T.mean(LP[T.arange(label_sym.shape[0]),label_sym]) is
        # the mean (across minibatch examples) of the elements in v,
        # i.e., the mean log-likelihood across the minibatch.

        # loss, matrix \in R[#data,#classes]
        loss = theano.shared(value=numpy.ones((self.n_data,self.n_classes), 
                        dtype=theano.config.floatX),
                name='cost', borrow=True)
        T.set_subtensor(loss[T.arange(label_sym.shape[0]),label_sym], 0)
        #loss = 0
        # score, matrix \in R[#data,1]
        self.score = T.max(loss + self.compatibility, axis=1)
        margin = T.mean(self.score - self.compatibility[T.arange(label_sym.shape[0]),label_sym])

        return self.l2norm + self.C * margin
#GaussianRandomWalkを使う方法と使わない方法どちらも実装しました。
#subtensorの使い方↓
#http://deeplearning.net/software/theano/library/tensor/basic.html

#GaussianRandomWalkを使わない方法
with basic_model:
    #事前分布
    s_mu = HalfNormal('s_mu', sd=100) #隣接時刻の状態の誤差
    s_Y =  HalfNormal('s_Y', sd=100) #各時刻における状態と観測の誤差
    mu_0 = Normal('mu_0',mu=0, sd=100) #初期状態
    
    #誤差項
    e_mu = Normal('e_mu', mu=0, sd=s_mu, shape =n_times-1)
    
    mu = tt.zeros((n_times))
    mu = tt.set_subtensor(mu[0], mu_0)
    for i in range(n_times-1):
        mu = tt.set_subtensor(mu[i+1], mu[i]+e_mu[i])

    #likelihood
    Y_obs = Normal('Y_obs', mu=mu, sd=s_Y, observed=Y)

    #サンプリング
    trace = sample(1000)
    summary(trace)
    
#GaussianRandomWalkを使う方法
with basic_model:
    #事前分布
    s_mu = HalfNormal('s_mu', sd=100) #隣接時刻の状態の誤差
    s_Y =  HalfNormal('s_Y', sd=100) #各時刻における状態と観測の誤差
Ejemplo n.º 32
0
def isoneutral_diffusion_pre(maskT, maskU, maskV, maskW, dxt, dxu, dyt, dyu,
                             dzt, dzw, cost, cosu, salt, temp, zt, K_iso, K_11,
                             K_22, K_33, Ai_ez, Ai_nz, Ai_bx, Ai_by):
    """
    Isopycnal diffusion for tracer
    following functional formulation by Griffies et al
    Code adopted from MOM2.1
    """
    epsln = 1e-20
    iso_slopec = 1e-3
    iso_dslope = 1e-3
    K_iso_steep = 50.
    tau = 0

    dTdx = T.zeros_like(K_11)
    dSdx = T.zeros_like(K_11)
    dTdy = T.zeros_like(K_11)
    dSdy = T.zeros_like(K_11)
    dTdz = T.zeros_like(K_11)
    dSdz = T.zeros_like(K_11)
    """
    drho_dt and drho_ds at centers of T cells
    """
    drdT = maskT * get_drhodT(salt[:, :, :, tau], temp[:, :, :, tau], abs(zt))
    drdS = maskT * get_drhodS(salt[:, :, :, tau], temp[:, :, :, tau], abs(zt))
    """
    gradients at top face of T cells
    """
    dTdz = T.set_subtensor(dTdz[:, :, :-1], maskW[:, :, :-1] *
        (temp[:, :, 1:, tau] - temp[:, :, :-1, tau]) / \
        dzw[:, :, :-1]
    )
    dSdz = T.set_subtensor(dSdz[:, :, :-1], maskW[:, :, :-1] *
        (salt[:, :, 1:, tau] - salt[:, :, :-1, tau]) / \
        dzw[:, :, :-1]
    )
    """
    gradients at eastern face of T cells
    """
    dTdx = T.set_subtensor(
        dTdx[:-1, :, :],
        maskU[:-1, :, :] * (temp[1:, :, :, tau] - temp[:-1, :, :, tau]) /
        (dxu[:-1, :, :] * cost[:, :, :]))
    dSdx = T.set_subtensor(
        dSdx[:-1, :, :],
        maskU[:-1, :, :] * (salt[1:, :, :, tau] - salt[:-1, :, :, tau]) /
        (dxu[:-1, :, :] * cost[:, :, :]))
    """
    gradients at northern face of T cells
    """
    dTdy = T.set_subtensor(dTdy[:, :-1, :], maskV[:, :-1, :] *
        (temp[:, 1:, :, tau] - temp[:, :-1, :, tau]) \
        / dyu[:, :-1, :]
    )
    dSdy = T.set_subtensor(dSdy[:, :-1, :], maskV[:, :-1, :] *
        (salt[:, 1:, :, tau] - salt[:, :-1, :, tau]) \
        / dyu[:, :-1, :]
    )

    def dm_taper(sx):
        """
        tapering function for isopycnal slopes
        """
        return 0.5 * (1. + T.tanh((-abs(sx) + iso_slopec) / iso_dslope))

    """
    Compute Ai_ez and K11 on center of east face of T cell.
    """
    diffloc = T.zeros_like(K_11)
    diffloc = T.set_subtensor(
        diffloc[1:-2, 2:-2, 1:],
        0.25 * (K_iso[1:-2, 2:-2, 1:] + K_iso[1:-2, 2:-2, :-1] +
                K_iso[2:-1, 2:-2, 1:] + K_iso[2:-1, 2:-2, :-1]))
    diffloc = T.set_subtensor(
        diffloc[1:-2, 2:-2, 0],
        0.5 * (K_iso[1:-2, 2:-2, 0] + K_iso[2:-1, 2:-2, 0]))

    sumz = T.zeros_like(K_11)[1:-2, 2:-2]
    for kr in range(2):
        ki = 0 if kr == 1 else 1
        for ip in range(2):
            drodxe = drdT[1 + ip:-2 + ip, 2:-2, ki:] * dTdx[1:-2, 2:-2, ki:] \
                + drdS[1 + ip:-2 + ip, 2:-2, ki:] * dSdx[1:-2, 2:-2, ki:]
            drodze = drdT[1 + ip:-2 + ip, 2:-2, ki:] * dTdz[1 + ip:-2 + ip, 2:-2, :-1 + kr or None] \
                + drdS[1 + ip:-2 + ip, 2:-2, ki:] * \
                dSdz[1 + ip:-2 + ip, 2:-2, :-1 + kr or None]
            sxe = -drodxe / (T.minimum(0., drodze) - epsln)
            taper = dm_taper(sxe)
            sumz = T.inc_subtensor(
                sumz[:, :, ki:],
                dzw[:, :, :-1 + kr or None] * maskU[1:-2, 2:-2, ki:] *
                T.maximum(K_iso_steep, diffloc[1:-2, 2:-2, ki:] * taper))
            Ai_ez = T.set_subtensor(Ai_ez[1:-2, 2:-2, ki:, ip, kr],
                                    taper * sxe * maskU[1:-2, 2:-2, ki:])
    K_11 = T.set_subtensor(K_11[1:-2, 2:-2, :], sumz / (4. * dzt[:, :, :]))
    """
    Compute Ai_nz and K_22 on center of north face of T cell.
    """
    diffloc = T.set_subtensor(diffloc[...], 0)
    diffloc = T.set_subtensor(
        diffloc[2:-2, 1:-2, 1:],
        0.25 * (K_iso[2:-2, 1:-2, 1:] + K_iso[2:-2, 1:-2, :-1] +
                K_iso[2:-2, 2:-1, 1:] + K_iso[2:-2, 2:-1, :-1]))
    diffloc = T.set_subtensor(
        diffloc[2:-2, 1:-2, 0],
        0.5 * (K_iso[2:-2, 1:-2, 0] + K_iso[2:-2, 2:-1, 0]))

    sumz = T.zeros_like(K_11)[2:-2, 1:-2]
    for kr in range(2):
        ki = 0 if kr == 1 else 1
        for jp in range(2):
            drodyn = drdT[2:-2, 1 + jp:-2 + jp, ki:] * dTdy[2:-2, 1:-2, ki:] + \
                drdS[2:-2, 1 + jp:-2 + jp, ki:] * dSdy[2:-2, 1:-2, ki:]
            drodzn = drdT[2:-2, 1 + jp:-2 + jp, ki:] * dTdz[2:-2, 1 + jp:-2 + jp, :-1 + kr or None] \
                + drdS[2:-2, 1 + jp:-2 + jp, ki:] * \
                dSdz[2:-2, 1 + jp:-2 + jp, :-1 + kr or None]
            syn = -drodyn / (T.minimum(0., drodzn) - epsln)
            taper = dm_taper(syn)
            sumz = T.inc_subtensor(
                sumz[:, :, ki:],
                dzw[:, :, :-1 + kr or None] * maskV[2:-2, 1:-2, ki:] *
                T.maximum(K_iso_steep, diffloc[2:-2, 1:-2, ki:] * taper))
            Ai_nz = T.set_subtensor(Ai_nz[2:-2, 1:-2, ki:, jp, kr],
                                    taper * syn * maskV[2:-2, 1:-2, ki:])
    K_22 = T.set_subtensor(K_22[2:-2, 1:-2, :], sumz / (4. * dzt[:, :, :]))
    """
    compute Ai_bx, Ai_by and K33 on top face of T cell.
    """
    sumx = T.zeros_like(K_11)[2:-2, 2:-2, :-1]
    sumy = T.zeros_like(K_11)[2:-2, 2:-2, :-1]

    for kr in range(2):
        drodzb = drdT[2:-2, 2:-2, kr:-1 + kr or None] * dTdz[2:-2, 2:-2, :-1] \
            + drdS[2:-2, 2:-2, kr:-1 + kr or None] * dSdz[2:-2, 2:-2, :-1]

        # eastward slopes at the top of T cells
        for ip in range(2):
            drodxb = drdT[2:-2, 2:-2, kr:-1 + kr or None] * dTdx[1 + ip:-3 + ip, 2:-2, kr:-1 + kr or None] \
                + drdS[2:-2, 2:-2, kr:-1 + kr or None] * \
                dSdx[1 + ip:-3 + ip, 2:-2, kr:-1 + kr or None]
            sxb = -drodxb / (T.minimum(0., drodzb) - epsln)
            taper = dm_taper(sxb)
            sumx += dxu[1 + ip:-3 + ip, :, :] * \
                K_iso[2:-2, 2:-2, :-1] * taper * \
                sxb**2 * maskW[2:-2, 2:-2, :-1]
            Ai_bx = T.set_subtensor(Ai_bx[2:-2, 2:-2, :-1, ip, kr],
                                    taper * sxb * maskW[2:-2, 2:-2, :-1])

        # northward slopes at the top of T cells
        for jp in range(2):
            facty = cosu[:, 1 + jp:-3 + jp] * dyu[:, 1 + jp:-3 + jp]
            drodyb = drdT[2:-2, 2:-2, kr:-1 + kr or None] * dTdy[2:-2, 1 + jp:-3 + jp, kr:-1 + kr or None] \
                + drdS[2:-2, 2:-2, kr:-1 + kr or None] * \
                dSdy[2:-2, 1 + jp:-3 + jp, kr:-1 + kr or None]
            syb = -drodyb / (T.minimum(0., drodzb) - epsln)
            taper = dm_taper(syb)
            sumy += facty * K_iso[2:-2, 2:-2, :-1] \
                * taper * syb**2 * maskW[2:-2, 2:-2, :-1]
            Ai_by = T.set_subtensor(Ai_by[2:-2, 2:-2, :-1, jp, kr],
                                    taper * syb * maskW[2:-2, 2:-2, :-1])

    K_33 = T.set_subtensor(
        K_33[2:-2, 2:-2, :-1], sumx / (4 * dxt[2:-2, :, :]) + sumy /
        (4 * dyt[:, 2:-2, :] * cost[:, 2:-2, :]))
    K_33 = T.set_subtensor(K_33[2:-2, 2:-2, -1], 0.)

    return K_11, K_22, K_33, Ai_ez, Ai_nz, Ai_bx, Ai_by
Ejemplo n.º 33
0
def build_model(tparams, options):
    opt_ret = OrderedDict()
    decoder_type = options['decoder_type']

    trng = RandomStreams(numpy.random.RandomState(numpy.random.randint(1024)).randint(numpy.iinfo(numpy.int32).max))
    use_noise = theano.shared(numpy.float32(0.))

    # description string: #words x #samples
    x = tensor.matrix('x', dtype='int64')
    x_mask = tensor.matrix('x_mask', dtype='float32')
    y = tensor.matrix('y', dtype='int64')
    y_mask = tensor.matrix('y_mask', dtype='float32')
    x.tag.test_value = numpy.zeros((5, 63), dtype='int64')
    x_mask.tag.test_value = numpy.ones((5, 63), dtype='float32')
    y.tag.test_value = numpy.zeros((7, 63), dtype='int64')
    y_mask.tag.test_value = numpy.ones((7, 63), dtype='float32')

    xr = x[::-1]
    xr_mask = x_mask[::-1]

    n_samples = x.shape[1]
    n_timesteps = x.shape[0]
    n_timesteps_trg = y.shape[0]

    # word embedding for forward RNN (source)
    emb = tparams['Wemb'][x.flatten()]
    emb = emb.reshape([n_timesteps, n_samples, options['dim_word_src']])

    # word embedding for backward RNN (source)
    embr = tparams['Wemb'][xr.flatten()]
    embr = embr.reshape([n_timesteps, n_samples, options['dim_word_src']])

    # pass through gru layer, recurrence here
    proj = get_layer('gru')[1](tparams, emb, options,
                               prefix='encoder', mask=x_mask)
    projr = get_layer('gru')[1](tparams, embr, options,
                                prefix='encoderr', mask=xr_mask)

    # context
    ctx = concatenate([proj, projr[::-1]], axis=proj.ndim-1)

    # context mean
    ctx_mean = (ctx * x_mask[:, :, None]).sum(0) / x_mask.sum(0)[:, None]

    # initial decoder state
    init_state_char = get_layer('ff')[1](tparams, ctx_mean, options,
                                         prefix='ff_init_state_char', activ='tanh')
    init_state_word = get_layer('ff')[1](tparams, ctx_mean, options,
                                         prefix='ff_init_state_word', activ='tanh')
    init_bound_char = tensor.zeros_like(init_state_char)
    init_bound_word = tensor.zeros_like(init_state_word)

    # word embedding and shifting for targets
    yemb = tparams['Wemb_dec'][y.flatten()]
    yemb = yemb.reshape([n_timesteps_trg, n_samples, options['dim_word']])
    yemb_shited = tensor.zeros_like(yemb)
    yemb_shited = tensor.set_subtensor(yemb_shited[1:], yemb[:-1])
    yemb = yemb_shited

    #For the planning
    [char_h, word_h, bound_c, bound_w, ctxs, alphas, probs, samples, commit_origin, probs_origin, action_plans, temp], updates = \
            get_layer(decoder_type)[1](tparams, yemb, options,
                                            prefix='decoder',
                                            mask=y_mask,
                                            context=ctx,
                                            context_mask=x_mask,
                                            one_step=False,
                                            init_state_char=init_state_char,
                                            init_state_word=init_state_word,
                                            init_bound_char=init_bound_char,
                                            init_bound_word=init_bound_word)



    opt_ret['bound_c'] = bound_c
    opt_ret['bound_w'] = bound_w
    opt_ret['dec_alphas'] = alphas

    #Francis
    #Our probabilities correspond to the non-shift version.
    opt_ret['dec_probs'] = probs_origin
    opt_ret['dec_samples'] = commit_origin
    opt_ret['dec_commits'] = samples
    opt_ret['dec_updates'] = updates
    opt_ret['dec_action_plans'] = action_plans
    opt_ret['dec_temperature'] = temp.mean()


    # compute word probabilities
    logit_rnn = get_layer('fff')[1](tparams, char_h, word_h, options,
                                    prefix='ff_logit_rnn', activ='linear')
    logit_prev = get_layer('ff')[1](tparams, yemb, options,
                                    prefix='ff_logit_prev', activ='linear')
    logit_ctx = get_layer('ff')[1](tparams, ctxs, options,
                                   prefix='ff_logit_ctx', activ='linear')
    logit = tensor.tanh(logit_rnn + logit_prev + logit_ctx)

    if options['use_dropout']:
        print 'Using dropout'
        logit = dropout_layer(logit, use_noise, trng)

    logit = get_layer('ff')[1](tparams, logit, options,
                               prefix='ff_logit', activ='linear')
    logit_shp = logit.shape
    probs = tensor.nnet.softmax(logit.reshape([logit_shp[0]*logit_shp[1], logit_shp[2]]))

    # cost
    y_flat = y.flatten()
    y_flat_idx = tensor.arange(y_flat.shape[0]) * options['n_words'] + y_flat
    cost = -tensor.log(probs.flatten()[y_flat_idx])
    cost = cost.reshape([y.shape[0], y.shape[1]])
    cost = (cost * y_mask).sum(0)

    return trng, use_noise, x, x_mask, y, y_mask, opt_ret, cost
Ejemplo n.º 34
0
def from_onehot_sym(x_var):
    ret = TT.zeros((x_var.shape[0], ), x_var.dtype)
    nonzero_n, nonzero_a = TT.nonzero(x_var)[:2]
    ret = TT.set_subtensor(ret[nonzero_n], nonzero_a.astype('uint8'))
    return ret
    pool_sizes = []
    for filter_h in filter_hs:
        filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1))

#define model architecture
    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')
    Words = theano.shared(value=U, name="Words")
    zero_vec_tensor = T.vector()
    zero_vec = np.zeros(img_w)
    set_zero = theano.function([zero_vec_tensor],
                               updates=[
                                   (Words,
                                    T.set_subtensor(Words[0, :],
                                                    zero_vec_tensor))
                               ],
                               allow_input_downcast=True)
    layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape(
        (x.shape[0], 1, x.shape[1], Words.shape[1]))
    conv_layers = []
    layer1_inputs = []
    for i in xrange(len(filter_hs)):
        filter_shape = filter_shapes[i]
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng,
                                        input=layer0_input,
                                        image_shape=(batch_size, 1, img_h,
                                                     img_w),
                                        filter_shape=filter_shape,
                                        poolsize=pool_size,
Ejemplo n.º 36
0
    def __init__(self,
                 data,
                 U,
                 img_h=160,
                 img_w=300,
                 hidden_size=100,
                 batch_size=50,
                 lr=0.001,
                 lr_decay=0.95,
                 sqr_norm_lim=9,
                 fine_tune_W=True,
                 fine_tune_M=False,
                 optimizer='adam',
                 filter_sizes=[3, 4, 5],
                 num_filters=100,
                 conv_attn=False,
                 encoder='rnn',
                 elemwise_sum=True,
                 corr_penalty=0.0,
                 xcov_penalty=0.0,
                 n_recurrent_layers=1,
                 is_bidirectional=False):
        self.data = data
        self.img_h = img_h
        self.batch_size = batch_size
        self.fine_tune_W = fine_tune_W
        self.fine_tune_M = fine_tune_M
        self.lr = lr
        self.lr_decay = lr_decay
        self.optimizer = optimizer
        self.sqr_norm_lim = sqr_norm_lim
        self.conv_attn = conv_attn

        index = T.iscalar()
        c = T.imatrix('c')
        r = T.imatrix('r')
        y = T.ivector('y')
        c_mask = T.fmatrix('c_mask')
        r_mask = T.fmatrix('r_mask')
        c_seqlen = T.ivector('c_seqlen')
        r_seqlen = T.ivector('r_seqlen')
        embeddings = theano.shared(U, name='embeddings', borrow=True)
        zero_vec_tensor = T.fvector()
        self.zero_vec = np.zeros(img_w, dtype=theano.config.floatX)
        self.set_zero = theano.function([zero_vec_tensor],
                                        updates=[(embeddings,
                                                  T.set_subtensor(
                                                      embeddings[0, :],
                                                      zero_vec_tensor))])
        if encoder.find('cnn') > -1 and (
                encoder.find('rnn') > -1
                or encoder.find('lstm') > -1) and not elemwise_sum:
            self.M = theano.shared(np.eye(2 * hidden_size).astype(
                theano.config.floatX),
                                   borrow=True)
        else:
            self.M = theano.shared(np.eye(hidden_size).astype(
                theano.config.floatX),
                                   borrow=True)

        c_input = embeddings[c.flatten()].reshape(
            (c.shape[0], c.shape[1], embeddings.shape[1]))
        r_input = embeddings[r.flatten()].reshape(
            (r.shape[0], r.shape[1], embeddings.shape[1]))

        l_in = lasagne.layers.InputLayer(shape=(batch_size, img_h, img_w))

        if encoder.find('cnn') > -1:
            l_conv_in = lasagne.layers.ReshapeLayer(l_in,
                                                    shape=(batch_size, 1,
                                                           img_h, img_w))
            conv_layers = []
            for filter_size in filter_sizes:
                conv_layer = lasagne.layers.Conv2DLayer(
                    l_conv_in,
                    num_filters=num_filters,
                    filter_size=(filter_size, img_w),
                    stride=(1, 1),
                    nonlinearity=lasagne.nonlinearities.rectify,
                    border_mode='valid')
                pool_layer = lasagne.layers.MaxPool2DLayer(
                    conv_layer, pool_size=(img_h - filter_size + 1, 1))
                conv_layers.append(pool_layer)

            l_conv = lasagne.layers.ConcatLayer(conv_layers)
            l_conv = lasagne.layers.DenseLayer(
                l_conv,
                num_units=hidden_size,
                nonlinearity=lasagne.nonlinearities.tanh)

        if is_bidirectional:
            if encoder.find('lstm') > -1:
                prev_fwd, prev_bck = l_in, l_in
                for _ in xrange(n_recurrent_layers):
                    l_fwd = lasagne.layers.LSTMLayer(prev_fwd,
                                                     hidden_size,
                                                     backwards=False,
                                                     learn_init=True,
                                                     peepholes=True)

                    l_bck = lasagne.layers.LSTMLayer(prev_bck,
                                                     hidden_size,
                                                     backwards=True,
                                                     learn_init=True,
                                                     peepholes=True)
                    prev_fwd, prev_bck = l_fwd, l_bck
            else:
                prev_fwd, prev_bck = l_in, l_in
                for _ in xrange(n_recurrent_layers):
                    l_fwd = lasagne.layers.RecurrentLayer(
                        prev_fwd,
                        hidden_size,
                        nonlinearity=lasagne.nonlinearities.tanh,
                        W_hid_to_hid=lasagne.init.Orthogonal(),
                        W_in_to_hid=lasagne.init.Orthogonal(),
                        backwards=False,
                        learn_init=True)

                    l_bck = lasagne.layers.RecurrentLayer(
                        prev_bck,
                        hidden_size,
                        nonlinearity=lasagne.nonlinearities.tanh,
                        W_hid_to_hid=lasagne.init.Orthogonal(),
                        W_in_to_hid=lasagne.init.Orthogonal(),
                        backwards=True,
                        learn_init=True)
                    prev_fwd, prev_bck = l_fwd, l_bck

            l_recurrent = lasagne.layers.ConcatLayer([l_fwd, l_bck])
        else:
            prev_fwd = l_in
            if encoder.find('lstm') > -1:
                for _ in xrange(n_recurrent_layers):
                    l_recurrent = lasagne.layers.LSTMLayer(prev_fwd,
                                                           hidden_size,
                                                           backwards=False,
                                                           learn_init=True,
                                                           peepholes=True)
                    prev_fwd = l_recurrent
            else:
                for _ in xrange(n_recurrent_layers):
                    l_recurrent = lasagne.layers.RecurrentLayer(
                        prev_fwd,
                        hidden_size,
                        nonlinearity=lasagne.nonlinearities.tanh,
                        W_hid_to_hid=lasagne.init.Orthogonal(),
                        W_in_to_hid=lasagne.init.Orthogonal(),
                        backwards=False,
                        learn_init=True)
                    prev_fwd = l_recurrent

        recurrent_size = hidden_size * 2 if is_bidirectional else hidden_size
        if conv_attn:
            l_rconv_in = lasagne.layers.InputLayer(shape=(batch_size, img_h,
                                                          recurrent_size))
            l_rconv_in = lasagne.layers.ReshapeLayer(l_rconv_in,
                                                     shape=(batch_size, 1,
                                                            img_h,
                                                            recurrent_size))
            conv_layers = []
            for filter_size in filter_sizes:
                conv_layer = lasagne.layers.Conv2DLayer(
                    l_rconv_in,
                    num_filters=num_filters,
                    filter_size=(filter_size, recurrent_size),
                    stride=(1, 1),
                    nonlinearity=lasagne.nonlinearities.rectify,
                    border_mode='valid')
                pool_layer = lasagne.layers.MaxPool2DLayer(
                    conv_layer, pool_size=(img_h - filter_size + 1, 1))
                conv_layers.append(pool_layer)

            l_hidden1 = lasagne.layers.ConcatLayer(conv_layers)
            l_hidden2 = lasagne.layers.DenseLayer(
                l_hidden1,
                num_units=hidden_size,
                nonlinearity=lasagne.nonlinearities.tanh)
            l_out = l_hidden2
        else:
            l_out = l_recurrent

        if conv_attn:
            e_context = l_recurrent.get_output(c_input,
                                               mask=c_mask,
                                               deterministic=False)
            e_response = l_recurrent.get_output(r_input,
                                                mask=r_mask,
                                                deterministic=False)

            def step_fn(row_t, mask_t):
                return row_t * mask_t.reshape((-1, 1))

            if is_bidirectional:
                e_context, _ = theano.scan(step_fn,
                                           outputs_info=None,
                                           sequences=[
                                               e_context,
                                               T.concatenate([c_mask, c_mask],
                                                             axis=1)
                                           ])
                e_response, _ = theano.scan(step_fn,
                                            outputs_info=None,
                                            sequences=[
                                                e_response,
                                                T.concatenate([r_mask, r_mask],
                                                              axis=1)
                                            ])
            else:
                e_context, _ = theano.scan(step_fn,
                                           outputs_info=None,
                                           sequences=[e_context, c_mask])
                e_response, _ = theano.scan(step_fn,
                                            outputs_info=None,
                                            sequences=[e_response, r_mask])

            e_context = l_out.get_output(e_context,
                                         mask=c_mask,
                                         deterministic=False)
            e_response = l_out.get_output(e_response,
                                          mask=r_mask,
                                          deterministic=False)
        else:
            e_context = l_out.get_output(
                c_input, mask=c_mask,
                deterministic=False)[T.arange(batch_size), c_seqlen].reshape(
                    (c.shape[0], hidden_size))
            e_response = l_out.get_output(
                r_input, mask=r_mask,
                deterministic=False)[T.arange(batch_size), r_seqlen].reshape(
                    (r.shape[0], hidden_size))

        if encoder.find('cnn') > -1:
            e_conv_context = l_conv.get_output(c_input, deterministic=False)
            e_conv_response = l_conv.get_output(r_input, deterministic=False)
            if encoder.find('rnn') > -1 or encoder.find('lstm') > -1:
                if elemwise_sum:
                    e_context = e_context + e_conv_context
                    e_response = e_response + e_conv_response
                else:
                    e_context = T.concatenate([e_context, e_conv_context],
                                              axis=1)
                    e_response = T.concatenate([e_response, e_conv_response],
                                               axis=1)

                # penalize correlation
                if abs(corr_penalty) > 0:
                    cor = []
                    for i in range(hidden_size if elemwise_sum else 2 *
                                   hidden_size):
                        y1, y2 = e_context, e_response
                        x1 = y1[:, i] - (np.ones(batch_size) *
                                         (T.sum(y1[:, i]) / batch_size))
                        x2 = y2[:, i] - (np.ones(batch_size) *
                                         (T.sum(y2[:, i]) / batch_size))
                        nr = T.sum(x1 * x2) / (T.sqrt(T.sum(x1 * x1)) *
                                               T.sqrt(T.sum(x2 * x2)))
                        cor.append(-nr)
                if abs(xcov_penalty) > 0:
                    e_context_mean = T.mean(e_context, axis=0, keepdims=True)
                    e_response_mean = T.mean(e_response, axis=0, keepdims=True)
                    e_context_centered = e_context - e_context_mean  # (n, i)
                    e_response_centered = e_response - e_response_mean  # (n, j)

                    outer_prod = (e_context_centered.dimshuffle(0, 1, 'x') *
                                  e_response_centered.dimshuffle(0, 'x', 1)
                                  )  # (n, i, j)
                    xcov = T.sum(T.sqr(T.mean(outer_prod, axis=0)))
            else:
                e_context = e_conv_context
                e_response = e_conv_response

        dp = T.batched_dot(e_context, T.dot(e_response, self.M.T))
        #dp = pp('dp')(dp)
        o = T.nnet.sigmoid(dp)
        o = T.clip(o, 1e-7, 1.0 - 1e-7)

        self.shared_data = {}
        for key in ['c', 'r']:
            self.shared_data[key] = theano.shared(
                np.zeros((batch_size, img_h), dtype=np.int32))
        for key in ['c_mask', 'r_mask']:
            self.shared_data[key] = theano.shared(
                np.zeros((batch_size, img_h), dtype=theano.config.floatX))
        for key in ['y', 'c_seqlen', 'r_seqlen']:
            self.shared_data[key] = theano.shared(
                np.zeros((batch_size, ), dtype=np.int32))

        self.probas = T.concatenate([(1 - o).reshape(
            (-1, 1)), o.reshape((-1, 1))],
                                    axis=1)
        self.pred = T.argmax(self.probas, axis=1)
        self.errors = T.sum(T.neq(self.pred, y))
        self.cost = T.nnet.binary_crossentropy(o, y).mean()
        if encoder.find('cnn') > -1 and (encoder.find('rnn') > -1
                                         or encoder.find('lstm') > -1):
            if abs(corr_penalty) > 0:
                self.cost += corr_penalty * T.sum(cor)
            if abs(xcov_penalty) > 0:
                self.cost += xcov_penalty * xcov
        self.l_out = l_out
        self.l_recurrent = l_recurrent
        self.embeddings = embeddings
        self.c = c
        self.r = r
        self.y = y
        self.c_seqlen = c_seqlen
        self.r_seqlen = r_seqlen
        self.c_mask = c_mask
        self.r_mask = r_mask

        self.update_params()
Ejemplo n.º 37
0
def conv3d(signals,
           filters,
           signals_shape=None,
           filters_shape=None,
           border_mode="valid"):
    """
    Convolve spatio-temporal filters with a movie.

    It flips the filters.

    Parameters
    ----------
    signals
        Timeseries of images whose pixels have color channels.
        Shape: [Ns, Ts, C, Hs, Ws].
    filters
        Spatio-temporal filters.
        Shape: [Nf, Tf, C, Hf, Wf].
    signals_shape
        None or a tuple/list with the shape of signals.
    filters_shape
        None or a tuple/list with the shape of filters.
    border_mode
        One of 'valid', 'full' or 'half'.

    Notes
    -----
    Another way to define signals: (batch,  time, in channel, row, column)
    Another way to define filters: (out channel,time,in channel, row, column)

    For the GPU, use nnet.conv3d.

    See Also
    --------
    Someone made a script that shows how to swap the axes between
    both 3d convolution implementations in Theano. See the last
    `attachment <https://groups.google.com/d/msg/theano-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_

    """

    if isinstance(border_mode, str):
        border_mode = (border_mode, border_mode, border_mode)

    if signals_shape is None:
        _signals_shape_5d = signals.shape
    else:
        _signals_shape_5d = signals_shape

    if filters_shape is None:
        _filters_shape_5d = filters.shape
    else:
        _filters_shape_5d = filters_shape

    Ns, Ts, C, Hs, Ws = _signals_shape_5d
    Nf, Tf, C, Hf, Wf = _filters_shape_5d

    _signals_shape_4d = (Ns * Ts, C, Hs, Ws)
    _filters_shape_4d = (Nf * Tf, C, Hf, Wf)

    if border_mode[1] != border_mode[2]:
        raise NotImplementedError("height and width bordermodes must match")
    conv2d_signal_shape = _signals_shape_4d
    conv2d_filter_shape = _filters_shape_4d
    if signals_shape is None:
        conv2d_signal_shape = None
    if filters_shape is None:
        conv2d_filter_shape = None

    out_4d = tensor.nnet.conv2d(
        signals.reshape(_signals_shape_4d),
        filters.reshape(_filters_shape_4d),
        input_shape=conv2d_signal_shape,
        filter_shape=conv2d_filter_shape,
        border_mode=border_mode[1],
    )  # ignoring border_mode[2]

    # compute the intended output size
    if border_mode[1] == "valid":
        Hout = Hs - Hf + 1
        Wout = Ws - Wf + 1
    elif border_mode[1] == "full":
        Hout = Hs + Hf - 1
        Wout = Ws + Wf - 1
    elif border_mode[1] == "half":
        Hout = Hs - (Hf % 2) + 1
        Wout = Ws - (Wf % 2) + 1
    elif border_mode[1] == "same":
        raise NotImplementedError()
    else:
        raise ValueError("invalid border mode", border_mode[1])

    # reshape the temporary output to restore its original size
    out_tmp = out_4d.reshape((Ns, Ts, Nf, Tf, Hout, Wout))

    # now sum out along the Tf to get the output
    # but we have to sum on a diagonal through the Tf and Ts submatrix.
    if Tf == 1:
        # for Tf==1, no sum along Tf, the Ts-axis of the output is unchanged!
        out_5d = out_tmp.reshape((Ns, Ts, Nf, Hout, Wout))
    else:
        # for some types of convolution, pad out_tmp with zeros
        if border_mode[0] == "valid":
            Tpad = 0
        elif border_mode[0] == "full":
            Tpad = Tf - 1
        elif border_mode[0] == "half":
            Tpad = Tf // 2
        elif border_mode[0] == "same":
            raise NotImplementedError()
        else:
            raise ValueError("invalid border mode", border_mode[0])

        if Tpad == 0:
            out_5d = diagonal_subtensor(out_tmp, 1, 3).sum(axis=3)
        else:
            # pad out_tmp with zeros before summing over the diagonal
            out_tmp_padded = tensor.zeros(dtype=out_tmp.dtype,
                                          shape=(Ns, Ts + 2 * Tpad, Nf, Tf,
                                                 Hout, Wout))
            out_tmp_padded = tensor.set_subtensor(
                out_tmp_padded[:, Tpad:(Ts + Tpad), :, :, :, :], out_tmp)
            out_5d = diagonal_subtensor(out_tmp_padded, 1, 3).sum(axis=3)

    return out_5d
Ejemplo n.º 38
0
def inner_fn(t, stm1, postm1, vtm1):

    # Use hidden state to generate action state
    aht = T.dot(Wa_aht_st, T.reshape(stm1, (n_s, n_proc))) + ba_aht
    #aht2 = T.dot(Wa_aht2_aht, T.reshape(aht,(n_s,n_proc))) + ba_aht2
    #aht3 = T.dot(Wa_aht3_aht2, T.reshape(aht2,(n_s,n_proc))) + ba_aht3
    atm1_mu = T.dot(Wa_atmu_aht, T.reshape(aht, (n_s, n_proc))) + ba_atmu
    atm1_sig = T.nnet.softplus(
        T.dot(Wa_atsig_aht, T.reshape(aht, (n_s, n_proc))) +
        ba_atsig) + sig_min_action

    # Sample Action
    atm1 = atm1_mu + theano_rng.normal((n_oa, n_proc)) * atm1_sig

    # Update Environment
    action_force = T.tanh(atm1)
    force = T.switch(
        T.lt(postm1, 0.0), -2 * postm1 - 1, -T.pow(1 + 5 * T.sqr(postm1), -0.5)
        - T.sqr(postm1) * T.pow(1 + 5 * T.sqr(postm1), -1.5) -
        T.pow(postm1, 4) / 16.0) - 0.25 * vtm1
    vt = vtm1 + 0.05 * force + 0.03 * action_force
    post = postm1 + vt

    # Generate Sensory Inputs:

    # 1.) Observation of Last Action
    oat = atm1

    # 2.) Noisy Observation of Current Position
    ot = post + theano_rng.normal((n_o, n_proc)) * 0.01

    # 3.) Nonlinear Transformed Sensory Channel
    oht = T.exp(-T.sqr(post - 1.0) / 2.0 / 0.3 / 0.3)

    # Infer hidden state from last hidden state and current observations, using variational density
    hst = T.nnet.relu(
        T.dot(Wq_hst_stm1, T.reshape(stm1, (n_s, n_proc))) +
        T.dot(Wq_hst_ot, T.reshape(ot, (n_o, n_proc))) +
        T.dot(Wq_hst_oht, T.reshape(oht, (n_oh, n_proc))) +
        T.dot(Wq_hst_oat, T.reshape(oat, (n_oa, n_proc))) + bq_hst)
    hst2 = T.nnet.relu(
        T.dot(Wq_hst2_hst, T.reshape(hst, (n_s, n_proc))) + bq_hst2)

    stmu = T.tanh(
        T.dot(Wq_stmu_hst2, T.reshape(hst2, (n_s, n_proc))) + bq_stmu)
    stsig = T.nnet.softplus(
        T.dot(Wq_stsig_hst2, T.reshape(hst2, (n_s, n_proc))) +
        bq_stsig) + sig_min_states

    # Explicitly encode position as homeostatic state variable
    # Rescale representation to fit within linear response of the tanh-nonlinearity
    stmu = T.set_subtensor(stmu[0, :], 0.1 * ot[0, :]).reshape((n_s, n_proc))
    stsig = T.set_subtensor(stsig[0, :], 0.005).reshape((n_s, n_proc))

    # Sample from variational density
    st = stmu + theano_rng.normal((n_s, n_proc)) * stsig

    # Calculate parameters of likelihood distributions from sampled state
    ost = T.nnet.relu(T.dot(Wl_ost_st, T.reshape(st, (n_s, n_proc))) + bl_ost)
    ost2 = T.nnet.relu(
        T.dot(Wl_ost2_ost, T.reshape(ost, (n_s, n_proc))) + bl_ost2)
    ost3 = T.nnet.relu(
        T.dot(Wl_ost3_ost2, T.reshape(ost2, (n_s, n_proc))) + bl_ost3)

    otmu = T.dot(Wl_otmu_st, T.reshape(ost3, (n_s, n_proc))) + bl_otmu
    otsig = T.nnet.softplus(
        T.dot(Wl_otsig_st, T.reshape(ost3, (n_s, n_proc))) +
        bl_otsig) + sig_min_obs

    ohtmu = T.dot(Wl_ohtmu_st, T.reshape(ost3, (n_s, n_proc))) + bl_ohtmu
    ohtsig = T.nnet.softplus(
        T.dot(Wl_ohtsig_st, T.reshape(ost3, (n_s, n_proc))) +
        bl_ohtsig) + sig_min_obs

    oatmu = T.dot(Wl_oatmu_st, T.reshape(ost3, (n_s, n_proc))) + bl_oatmu
    oatsig = T.nnet.softplus(
        T.dot(Wl_oatsig_st, T.reshape(ost3, (n_s, n_proc))) +
        bl_oatsig) + sig_min_obs

    # Calculate negative log-likelihood of observations
    p_ot = GaussianNLL(ot, otmu, otsig)
    p_oht = GaussianNLL(oht, ohtmu, ohtsig)
    p_oat = GaussianNLL(oat, oatmu, oatsig)

    # Calculate prior expectation on hidden state from previous state
    prior_stmu = T.tanh(
        T.dot(Wl_stmu_stm1, T.reshape(stm1, (n_s, n_proc))) + bl_stmu)
    prior_stsig = T.nnet.softplus(
        T.dot(Wl_stsig_stm1, T.reshape(stm1, (n_s, n_proc))) +
        bl_stsig) + sig_min_states

    # Explicitly encode expectations on homeostatic state variable
    prior_stmu = ifelse(T.lt(t, 20), prior_stmu,
                        T.set_subtensor(prior_stmu[0, :], 0.1))
    prior_stsig = ifelse(T.lt(t, 20), prior_stsig,
                         T.set_subtensor(prior_stsig[0, :], 0.005))

    # Calculate KL divergence between variational density and prior density
    # using explicit formula for diagonal gaussians
    KL_st = KLGaussianGaussian(stmu, stsig, prior_stmu, prior_stsig)

    # Put free energy functional together
    FEt = KL_st + p_ot + p_oht + p_oat

    return st, post, vt, oat, ot, oht, FEt, KL_st, stmu, stsig, force, p_ot, p_oht, p_oat
Ejemplo n.º 39
0
def interleave_blanks(Y):
    Y_ = T.alloc(-1, Y.shape[0] * 2 + 1)
    Y_ = T.set_subtensor(Y_[T.arange(Y.shape[0]) * 2 + 1], Y)
    return Y_
Ejemplo n.º 40
0
    def compute_landmarks_helper(self, moms, init_landmarks):
        moms = T.reshape(moms[:136], (68, 2))  # 68 * 2
        init_landmarks = T.reshape(init_landmarks[:136], (68, 2))  # 68 * 2

        mask = T.zeros((68, 2))
        mask = T.set_subtensor(mask[0:9, :], np.ones((9, 2)))

        initLandmarks_aftmas = init_landmarks * mask
        moms_aftmas = moms * mask

        dp = T.zeros((68, 2))
        dp1 = T.zeros((68, 2))

        initLandmarks_loca1 = T.alloc(initLandmarks_aftmas[0, :], 68, 2)
        initLandmarks_loca1_aftmas = initLandmarks_loca1 * mask
        initLandmarks_loca2 = T.alloc(initLandmarks_aftmas[1, :], 68, 2)
        initLandmarks_loca2_aftmas = initLandmarks_loca2 * mask
        initLandmarks_loca3 = T.alloc(initLandmarks_aftmas[2, :], 68, 2)
        initLandmarks_loca3_aftmas = initLandmarks_loca3 * mask
        initLandmarks_loca4 = T.alloc(initLandmarks_aftmas[3, :], 68, 2)
        initLandmarks_loca4_aftmas = initLandmarks_loca4 * mask
        initLandmarks_loca5 = T.alloc(initLandmarks_aftmas[4, :], 68, 2)
        initLandmarks_loca5_aftmas = initLandmarks_loca5 * mask
        initLandmarks_loca6 = T.alloc(initLandmarks_aftmas[5, :], 68, 2)
        initLandmarks_loca6_aftmas = initLandmarks_loca6 * mask
        initLandmarks_loca7 = T.alloc(initLandmarks_aftmas[6, :], 68, 2)
        initLandmarks_loca7_aftmas = initLandmarks_loca7 * mask
        initLandmarks_loca8 = T.alloc(initLandmarks_aftmas[7, :], 68, 2)
        initLandmarks_loca8_aftmas = initLandmarks_loca8 * mask
        initLandmarks_loca9 = T.alloc(initLandmarks_aftmas[8, :], 68, 2)
        initLandmarks_loca9_aftmas = initLandmarks_loca9 * mask

        weight1 = T.zeros((68, 2))
        weight1_val = T.exp(-T.sum(
            (initLandmarks_loca1_aftmas - initLandmarks_aftmas)**2, axis=1) /
                            self.sigmaV2)
        weight1 = T.set_subtensor(weight1[:, 0], weight1_val)
        weight1 = T.set_subtensor(weight1[:, 1], weight1_val)
        val1 = T.sum(weight1 * moms_aftmas, axis=0)
        dp = T.set_subtensor(dp[0, :], val1)

        weight2 = T.zeros((68, 2))
        weight2_val = T.exp(-T.sum(
            (initLandmarks_loca2_aftmas - initLandmarks_aftmas)**2, axis=1) /
                            self.sigmaV2)
        weight2 = T.set_subtensor(weight2[:, 0], weight2_val)
        weight2 = T.set_subtensor(weight2[:, 1], weight2_val)
        val2 = T.sum(weight2 * moms_aftmas, axis=0)
        dp = T.set_subtensor(dp[1, :], val2)

        weight3 = T.zeros((68, 2))
        weight3_val = T.exp(-T.sum(
            (initLandmarks_loca3_aftmas - initLandmarks_aftmas)**2, axis=1) /
                            self.sigmaV2)
        weight3 = T.set_subtensor(weight3[:, 0], weight3_val)
        weight3 = T.set_subtensor(weight3[:, 1], weight3_val)
        val3 = T.sum(weight3 * moms_aftmas, axis=0)
        dp = T.set_subtensor(dp[2, :], val3)

        weight4 = T.zeros((68, 2))
        weight4_val = T.exp(-T.sum(
            (initLandmarks_loca4_aftmas - initLandmarks_aftmas)**2, axis=1) /
                            self.sigmaV2)
        weight4 = T.set_subtensor(weight4[:, 0], weight4_val)
        weight4 = T.set_subtensor(weight4[:, 1], weight4_val)
        val4 = T.sum(weight4 * moms_aftmas, axis=0)
        dp = T.set_subtensor(dp[3, :], val4)

        weight5 = T.zeros((68, 2))
        weight5_val = T.exp(-T.sum(
            (initLandmarks_loca5_aftmas - initLandmarks_aftmas)**2, axis=1) /
                            self.sigmaV2)
        weight5 = T.set_subtensor(weight5[:, 0], weight5_val)
        weight5 = T.set_subtensor(weight5[:, 1], weight5_val)
        val5 = T.sum(weight5 * moms_aftmas, axis=0)
        dp = T.set_subtensor(dp[4, :], val5)

        weight6 = T.zeros((68, 2))
        weight6_val = T.exp(-T.sum(
            (initLandmarks_loca6_aftmas - initLandmarks_aftmas)**2, axis=1) /
                            self.sigmaV2)
        weight6 = T.set_subtensor(weight6[:, 0], weight6_val)
        weight6 = T.set_subtensor(weight6[:, 1], weight6_val)
        val6 = T.sum(weight6 * moms_aftmas, axis=0)
        dp = T.set_subtensor(dp[5, :], val6)

        weight7 = T.zeros((68, 2))
        weight7_val = T.exp(-T.sum(
            (initLandmarks_loca7_aftmas - initLandmarks_aftmas)**2, axis=1) /
                            self.sigmaV2)
        weight7 = T.set_subtensor(weight7[:, 0], weight7_val)
        weight7 = T.set_subtensor(weight7[:, 1], weight7_val)
        val7 = T.sum(weight7 * moms_aftmas, axis=0)
        dp = T.set_subtensor(dp[6, :], val7)

        weight8 = T.zeros((68, 2))
        weight8_val = T.exp(-T.sum(
            (initLandmarks_loca8_aftmas - initLandmarks_aftmas)**2, axis=1) /
                            self.sigmaV2)
        weight8 = T.set_subtensor(weight8[:, 0], weight8_val)
        weight8 = T.set_subtensor(weight8[:, 1], weight8_val)
        val8 = T.sum(weight8 * moms_aftmas, axis=0)
        dp = T.set_subtensor(dp[7, :], val8)

        weight9 = T.zeros((68, 2))
        weight9_val = T.exp(-T.sum(
            (initLandmarks_loca9_aftmas - initLandmarks_aftmas)**2, axis=1) /
                            self.sigmaV2)
        weight9 = T.set_subtensor(weight9[:, 0], weight9_val)
        weight9 = T.set_subtensor(weight9[:, 1], weight9_val)
        val9 = T.sum(weight9 * moms_aftmas, axis=0)
        dp = T.set_subtensor(dp[8, :], val9)

        deformedShape = initLandmarks_aftmas + (dp * self.tau)

        deformedShape_loca1 = T.alloc(deformedShape[0, :], 68, 2)
        deformedShape_loca2 = T.alloc(deformedShape[1, :], 68, 2)
        deformedShape_loca3 = T.alloc(deformedShape[2, :], 68, 2)
        deformedShape_loca4 = T.alloc(deformedShape[3, :], 68, 2)
        deformedShape_loca5 = T.alloc(deformedShape[4, :], 68, 2)
        deformedShape_loca6 = T.alloc(deformedShape[5, :], 68, 2)
        deformedShape_loca7 = T.alloc(deformedShape[6, :], 68, 2)
        deformedShape_loca8 = T.alloc(deformedShape[7, :], 68, 2)
        deformedShape_loca9 = T.alloc(deformedShape[8, :], 68, 2)

        weight11 = T.zeros((68, 2))
        weight11_val = T.exp(-T.sum(
            (deformedShape_loca1 - deformedShape)**2, axis=1) / self.sigmaV2)
        weight11 = T.set_subtensor(weight11[:, 0], weight11_val)
        weight11 = T.set_subtensor(weight11[:, 1], weight11_val)
        val11 = T.sum(weight11 * moms_aftmas, axis=0)
        dp1 = T.set_subtensor(dp1[0, :], val11)

        weight22 = T.zeros((68, 2))
        weight22_val = T.exp(-T.sum(
            (deformedShape_loca2 - deformedShape)**2, axis=1) / self.sigmaV2)
        weight22 = T.set_subtensor(weight22[:, 0], weight22_val)
        weight22 = T.set_subtensor(weight22[:, 1], weight22_val)
        val22 = T.sum(weight22 * moms_aftmas, axis=0)
        dp1 = T.set_subtensor(dp1[1, :], val22)

        weight33 = T.zeros((68, 2))
        weight33_val = T.exp(-T.sum(
            (deformedShape_loca3 - deformedShape)**2, axis=1) / self.sigmaV2)
        weight33 = T.set_subtensor(weight33[:, 0], weight33_val)
        weight33 = T.set_subtensor(weight33[:, 1], weight33_val)
        val33 = T.sum(weight33 * moms_aftmas, axis=0)
        dp1 = T.set_subtensor(dp1[2, :], val33)

        weight44 = T.zeros((68, 2))
        weight44_val = T.exp(-T.sum(
            (deformedShape_loca4 - deformedShape)**2, axis=1) / self.sigmaV2)
        weight44 = T.set_subtensor(weight44[:, 0], weight44_val)
        weight44 = T.set_subtensor(weight44[:, 1], weight44_val)
        val44 = T.sum(weight44 * moms_aftmas, axis=0)
        dp1 = T.set_subtensor(dp1[3, :], val44)

        weight55 = T.zeros((68, 2))
        weight55_val = T.exp(-T.sum(
            (deformedShape_loca5 - deformedShape)**2, axis=1) / self.sigmaV2)
        weight55 = T.set_subtensor(weight55[:, 0], weight55_val)
        weight55 = T.set_subtensor(weight55[:, 1], weight55_val)
        val55 = T.sum(weight55 * moms_aftmas, axis=0)
        dp1 = T.set_subtensor(dp1[4, :], val55)

        weight66 = T.zeros((68, 2))
        weight66_val = T.exp(-T.sum(
            (deformedShape_loca6 - deformedShape)**2, axis=1) / self.sigmaV2)
        weight66 = T.set_subtensor(weight66[:, 0], weight66_val)
        weight66 = T.set_subtensor(weight66[:, 1], weight66_val)
        val66 = T.sum(weight66 * moms_aftmas, axis=0)
        dp1 = T.set_subtensor(dp1[5, :], val66)

        weight77 = T.zeros((68, 2))
        weight77_val = T.exp(-T.sum(
            (deformedShape_loca7 - deformedShape)**2, axis=1) / self.sigmaV2)
        weight77 = T.set_subtensor(weight77[:, 0], weight77_val)
        weight77 = T.set_subtensor(weight77[:, 1], weight77_val)
        val77 = T.sum(weight77 * moms_aftmas, axis=0)
        dp1 = T.set_subtensor(dp1[6, :], val77)

        weight88 = T.zeros((68, 2))
        weight88_val = T.exp(-T.sum(
            (deformedShape_loca8 - deformedShape)**2, axis=1) / self.sigmaV2)
        weight88 = T.set_subtensor(weight88[:, 0], weight88_val)
        weight88 = T.set_subtensor(weight88[:, 1], weight88_val)
        val88 = T.sum(weight88 * moms_aftmas, axis=0)
        dp1 = T.set_subtensor(dp1[7, :], val88)

        weight99 = T.zeros((68, 2))
        weight99_val = T.exp(-T.sum(
            (deformedShape_loca9 - deformedShape)**2, axis=1) / self.sigmaV2)
        weight99 = T.set_subtensor(weight99[:, 0], weight99_val)
        weight99 = T.set_subtensor(weight99[:, 1], weight99_val)
        val99 = T.sum(weight99 * moms_aftmas, axis=0)
        dp1 = T.set_subtensor(dp1[8, :], val99)

        output = (deformedShape + dp1 * self.tau).flatten()
        return output
Ejemplo n.º 41
0
 def test_setsubtensor2(self):
     tv = numpy.asarray(self.rng.uniform(size=(10,)),
                        theano.config.floatX)
     t = theano.shared(tv)
     out = tensor.set_subtensor(t[:4], self.x[:4])
     self.check_rop_lop(out, (10,))
Ejemplo n.º 42
0
 def clip_around_zero(x, threshold=0.2):
     indicies = T.bitwise_and(x < threshold, x > -threshold)
     return T.set_subtensor(x[indicies.nonzero()], 0)
Ejemplo n.º 43
0
def scan(fn,
         sequences=None,
         outputs_info=None,
         non_sequences=None,
         n_steps=None,
         truncate_gradient=-1,
         go_backwards=False,
         mode=None,
         name=None,
         options=None,
         profile=False):
    """
    This function constructs and applies a Scan op to the provided
    arguments.

    :param fn:
        ``fn`` is a function that describes the operations involved in one
        step of ``scan``. ``fn`` should construct variables describing the
        output of one iteration step. It should expect as input theano
        variables representing all the slices of the input sequences
        and previous values of the outputs, as well as all other arguments
        given to scan as ``non_sequences``. The order in which scan passes
        these variables to ``fn``  is the following :

        * all time slices of the first sequence
        * all time slices of the second sequence
        * ...
        * all time slices of the last sequence
        * all past slices of the first output
        * all past slices of the second otuput
        * ...
        * all past slices of the last output
        * all other arguments (the list given as `non_sequences` to
            scan)

        The order of the sequences is the same as the one in the list
        `sequences` given to scan. The order of the outputs is the same
        as the order of ``output_info``. For any sequence or output the
        order of the time slices is the same as the one in which they have
        been given as taps. For example if one writes the following :

        .. code-block:: python

            scan(fn, sequences = [ dict(input= Sequence1, taps = [-3,2,-1])
                                 , Sequence2
                                 , dict(input =  Sequence3, taps = 3) ]
                   , outputs_info = [ dict(initial =  Output1, taps = [-3,-5])
                                    , dict(initial = Output2, taps = None)
                                    , Output3 ]
                   , non_sequences = [ Argument1, Argument 2])

        ``fn`` should expect the following arguments in this given order:

        #. ``Sequence1[t-3]``
        #. ``Sequence1[t+2]``
        #. ``Sequence1[t-1]``
        #. ``Sequence2[t]``
        #. ``Sequence3[t+3]``
        #. ``Output1[t-3]``
        #. ``Output1[t-5]``
        #. ``Output3[t-1]``
        #. ``Argument1``
        #. ``Argument2``

        The list of ``non_sequences`` can also contain shared variables
        used in the function, though ``scan`` is able to figure those
        out on its own so they can be skipped. For the clarity of the
        code we recommand though to provide them to scan. To some extend
        ``scan`` can also figure out other ``non sequences`` (not shared)
        even if not passed to scan (but used by `fn`). A simple example of
        this would be :

        .. code-block:: python

            import theano.tensor as TT
            W   = TT.matrix()
            W_2 = W**2
            def f(x):
                return TT.dot(x,W_2)

        The function is expected to return two things. One is a list of
        outputs ordered in the same order as ``outputs_info``, with the
        difference that there should be only one output variable per
        output initial state (even if no tap value is used). Secondly
        `fn` should return an update dictionary (that tells how to
        update any shared variable after each iteration step). The
        dictionary can optionally be given as a list of tuples. There is
        no constraint on the order of these two list, ``fn`` can return
        either ``(outputs_list, update_dictionary)`` or
        ``(update_dictionary, outputs_list)`` or just one of the two (in
        case the other is empty).

        To use ``scan`` as a while loop, the user needs to change the
        function ``fn`` such that also a stopping condition is returned.
        To do so, he/she needs to wrap the condition in an ``until`` class.
        The condition should be returned as a third element, for example:

        .. code-block:: python

            ...
            return [y1_t, y2_t], {x:x+1}, theano.scan_module.until(x < 50)

        Note that a number of steps (considered in here as the maximum
        number of steps ) is still required even though a condition is
        passed (and it is used to allocate memory if needed). = {}):

    :param sequences:
        ``sequences`` is the list of Theano variables or dictionaries
        describing the sequences ``scan`` has to iterate over. If a
        sequence is given as wrapped in a dictionary, then a set of optional
        information can be provided about the sequence. The dictionary
        should have the following keys:

        * ``input`` (*mandatory*) -- Theano variable representing the
          sequence.

        * ``taps`` -- Temporal taps of the sequence required by ``fn``.
          They are provided as a list of integers, where a value ``k``
          impiles that at iteration step ``t`` scan will pass to ``fn``
          the slice ``t+k``. Default value is ``[0]``

        Any Theano variable in the list ``sequences`` is automatically
        wrapped into a dictionary where ``taps`` is set to ``[0]``


    :param outputs_info:
        ``outputs_info`` is the list of Theano variables or dictionaries
        describing the initial state of the outputs computed
        recurrently. When this initial states are given as dictionary
        optional information can be provided about the output corresponding
        to these initial states. The dictionary should have the following
        keys:

        * ``initial`` -- Theano variable that represents the initial
          state of a given output. In case the output is not computed
          recursively (think of a map) and does not require a initial
          state this field can be skiped. Given that only the previous
          time step of the output is used by ``fn`` the initial state
          should have the same shape as the output. If multiple time
          taps are used, the initial state should have one extra
          dimension that should cover all the possible taps. For example
          if we use ``-5``, ``-2`` and ``-1`` as past taps, at step 0,
          ``fn`` will require (by an abuse of notation) ``output[-5]``,
          ``output[-2]`` and ``output[-1]``. This will be given by
          the initial state, which in this case should have the shape
          (5,)+output.shape. If this variable containing the initial
          state is called ``init_y`` then ``init_y[0]`` *corresponds to*
          ``output[-5]``. ``init_y[1]`` *correponds to* ``output[-4]``,
          ``init_y[2]`` corresponds to ``output[-3]``, ``init_y[3]``
          coresponds to ``output[-2]``, ``init_y[4]`` corresponds to
          ``output[-1]``. While this order might seem strange, it comes
          natural from splitting an array at a given point. Assume that
          we have a array ``x``, and we choose ``k`` to be time step
          ``0``. Then our initial state would be ``x[:k]``, while the
          output will be ``x[k:]``. Looking at this split, elements in
          ``x[:k]`` are ordered exactly like those in ``init_y``.
        * ``taps`` -- Temporal taps of the output that will be pass to
          ``fn``. They are provided as a list of *negative* integers,
          where a value ``k`` implies that at iteration step ``t`` scan
          will pass to ``fn`` the slice ``t+k``.

        ``scan`` will follow this logic if partial information is given:

        * If an output is not wrapped in a dictionary, ``scan`` will wrap
          it in one assuming that you use only the last step of the output
          (i.e. it makes your tap value list equal to [-1]).
        * If you wrap an output in a dictionary and you do not provide any
          taps but you provide an initial state it will assume that you are
          using only a tap value of -1.
        * If you wrap an output in a dictionary but you do not provide any
          initial state, it assumes that you are not using any form of
          taps.
        * If you provide a ``None`` instead of a variable or a empty
          dictionary ``scan`` assumes that you will not use any taps for
          this output (like for example in case of a map)

        If ``outputs_info`` is an empty list or None, ``scan`` assumes
        that no tap is used for any of the outputs. If information is
        provided just for a subset of the outputs an exception is
        raised (because there is no convention on how scan should map
        the provided information to the outputs of ``fn``)


    :param non_sequences:
        ``non_sequences`` is the list of arguments that are passed to
        ``fn`` at each steps. One can opt to exclude variable
        used in ``fn`` from this list as long as they are part of the
        computational graph, though for clarity we encourage not to do so.


    :param n_steps:
        ``n_steps`` is the number of steps to iterate given as an int
        or Theano scalar. If any of the input sequences do not have
        enough elements, scan will raise an error. If the *value is 0* the
        outputs will have *0 rows*. If the value is negative, ``scan``
        will run backwards in time. If the ``go_backwards`` flag is already
        set and also ``n_steps`` is negative, ``scan`` will run forward
        in time. If n stpes is not provided, ``scan`` will figure
        out the amount of steps it should run given its input sequences.


    :param truncate_gradient:
        ``truncate_gradient`` is the number of steps to use in truncated
        BPTT.  If you compute gradients through a scan op, they are
        computed using backpropagation through time. By providing a
        different value then -1, you choose to use truncated BPTT instead
        of classical BPTT, where you go for only ``truncate_gradient``
        number of steps back in time.


    :param go_backwards:
        ``go_backwards`` is a flag indicating if ``scan`` should go
        backwards through the sequences. If you think of each sequence
        as indexed by time, making this flag True would mean that
        ``scan`` goes back in time, namely that for any sequence it
        starts from the end and goes towards 0.


    :param name:
        When profiling ``scan``, it is crucial to provide a name for any
        instance of ``scan``. The profiler will produce an overall
        profile of your code as well as profiles for the computation of
        one step of each instance of ``scan``. The ``name`` of the instance
        appears in those profiles and can greatly help to disambiguate
        information.

    :param mode:
        It is recommended to leave this argument to None, especially
        when profiling ``scan`` (otherwise the results are not going to
        be accurate). If you prefer the computations of one step of
        ``scan`` to be done differently then the entire function, you
        can use this parameter to describe how the computations in this
        loop are done (see ``theano.function`` for details about
        possible values and their meaning).

    :param profile:
        Flag or string. If true, or different from the empty string, a
        profile object will be created and attached to the inner graph of
        scan. In case ``profile`` is True, the profile object will have the
        name of the scan instance, otherwise it will have the passed string.
        Profile object collect (and print) information only when running the
        inner graph with the new cvm linker ( with default modes,
        other linkers this argument is useless)

    :rtype: tuple
    :return: tuple of the form (outputs, updates); ``outputs`` is either a
             Theano variable or a list of Theano variables representing the
             outputs of ``scan`` (in the same order as in
             ``outputs_info``). ``updates`` is a subclass of dictionary
             specifying the
             update rules for all shared variables used in scan
             This dictionary should be passed to ``theano.function`` when
             you compile your function. The change compared to a normal
             dictionary is that we validate that keys are SharedVariable
             and addition of those dictionary are validated to be consistent.
    """
    # Note : see the internal documentation of the scan op for naming
    # conventions and all other details
    if options is None:
        options = {}
    rvals = scan_utils.canonical_arguments(sequences,
                                           outputs_info,
                                           non_sequences,
                                           go_backwards,
                                           n_steps)
    inputs, states_and_outputs_info, parameters, T = rvals
    # If we provided a known number of steps ( before compilation)
    # and if that number is 1 or -1, then we can skip the Scan Op,
    # and just apply the inner function once
    # To do that we check here to see the nature of n_steps
    T_value = None
    if isinstance(n_steps, (float, int)):
        T_value = int(n_steps)
    else:
        try:
            T_value = opt.get_constant_value(n_steps)
        except (TypeError, AttributeError):
            T_value = None

    if T_value in (1, -1):
        return one_step_scan(fn,
                             inputs,
                             states_and_outputs_info,
                             parameters,
                             truncate_gradient)

    # 1. Variable representing the current time step
    t = scalar_shared(numpy.int64(0), name='t')

    # 2. Allocate memory for the states of scan.
    mintaps = []
    lengths = []
    for pos, arg_info in enumerate(states_and_outputs_info):
        if arg_info.get('taps', None) == [-1]:
            mintaps.append(1)
            lengths.append(scalar_shared(numpy.int64(0),
                                         name='l%d' % pos))
            arg_info['initial'] = scan_utils.expand(tensor.unbroadcast(
                    tensor.shape_padleft(arg_info['initial']), 0), T)
        elif arg_info.get('taps', None):
            if numpy.any(numpy.array(arg_info.get('taps', [])) > 0):
                # Make sure we do not have requests for future values of a
                # sequence we can not provide such values
                raise ValueError('Can not use future taps of outputs',
                                 arg_info)
            mintap = abs(numpy.min(arg_info['taps']))
            lengths.append(scalar_shared(numpy.int64(0),
                                         name='l%d' % pos))
            mintaps.append(mintap)
            arg_info['initial'] = scan_utils.expand(
                arg_info['initial'][:mintap], T)
        else:
            mintaps.append(0)
            lengths.append(scalar_shared(numpy.int64(0),
                                         name='l%d' % pos))

    # 3. Generate arguments for the function passed to scan. This will
    # function will return the outputs that need to be computed at every
    # timesteps
    inputs_slices = [input[t] for input in inputs]
    states_slices = []
    for n, state in enumerate(states_and_outputs_info):
        # Check if it is actually a state and not an output
        if mintaps[n] != 0:
            for k in state['taps']:
                states_slices.append(
                    state['initial'][(t + mintaps[n] + k) % lengths[n]])

    # 4. Construct outputs that are to be computed by the inner
    # function of scan
    args = inputs_slices + states_slices + parameters
    cond, states_and_outputs, updates = \
            scan_utils.get_updates_and_outputs(fn(*args))

    # User is allowed to provide no information if it only behaves like a
    # map
    if (len(states_and_outputs) != len(states_and_outputs_info) and
        len(states_and_outputs_info) == 0):
        mintaps = [0] * len(states_and_outputs)

    # 5. Construct the scan op
    # 5.1 Construct list of shared variables with updates (those that
    # can be treated as states (i.e. of TensorType) and those that can not
    # (like Random States)

    if cond is not None:
        _cond = [cond]
    else:
        _cond = []
    rvals = rebuild_collect_shared(
        states_and_outputs + _cond,
        updates=updates,
        rebuild_strict=True,
        copy_inputs_over=True,
        no_default_updates=False)

    # extracting the arguments
    input_variables, cloned_outputs, other_rval = rvals
    clone_d, update_d, update_expr, shared_inputs = other_rval
    additional_input_states = []
    additional_output_states = []
    additional_lengths = []
    additional_mintaps = []
    original_numeric_shared_variables = []

    non_numeric_input_states = []
    non_numeric_output_states = []
    original_non_numeric_shared_variables = []
    pos = len(lengths)
    for sv in shared_inputs:
        if sv in update_d:
            if isinstance(sv, TensorType):
                # We can treat it as a sit sot
                nw_state = scan_utils.expand(
                    tensor.unbroadcast(tensor.shape_padleft(sv, 0), T))
                additional_lengths.append(scalar_shared(numpy.int64(0),
                                                       name='l%d' % pos))
                pos = pos + 1
                additional_mintaps.append(1)
                additional_input_states.append(nw_state)
                additional_output_states.append(
                    scan_utils.clone(tensor.set_subtensor(
                        nw_state[(t + 1) % additional_lengths[-1]],
                        update_d[sv])))
                original_numeric_shared_variables.append(sv)
            else:
                non_numeric_input_states.append(sv)
                non_numeric_output_states.append(update_d[sv])
                original_non_numeric_shared_variables.append(sv)

    # 5.2 Collect inputs/outputs of the inner function
    inputs = []
    outputs = []
    for n, mintap in enumerate(mintaps):
        if mintap != 0:
            input_state = states_and_outputs_info[n]['initial']
            inputs.append(input_state)
            outputs.append(
                tensor.set_subtensor(
                    input_state[(t + mintap) % lengths[n]],
                    states_and_outputs[n]))
        else:
            mem_buffer = scan_utils.allocate_memory(
                T, states_and_outputs_info[n], states_and_outputs[n])
            inputs.append(output)
            outputs.append(
                tensor.set_subtensor(output[t % lengths[n]],
                                     states_and_outputs[n]))
    inputs.extend(additional_input_states)
    outputs.extend(additional_output_states)
    lengths.extend(additional_lengths)
    mintaps.extend(additional_mintaps)
    inputs.extend(non_numeric_input_states)
    outputs.extend(non_numeric_output_states)
    all_other_inputs = gof.graph.inputs(outputs)
    parameters = [x for x in all_other_inputs
                  if (x not in inputs and x not in lengths and x is not t
                      and isinstance(x, gof.Variable) and
                      not isinstance(x, gof.Constant))]
    inputs.extend(parameters)
    # 5.3 Construct the the options dictionary
    options['name'] = name
    options['profile'] = profile
    options['mode'] = mode
    options['inplace'] = False
    options['gpu'] = False
    options['truncate_gradient'] = truncate_gradient
    options['hash_inner_graph'] = 0
    # 5.4 Construct the ScanOp instance
    local_op = scan_op.ScanOp(inputs=inputs,
                              outputs=outputs,
                              lengths=lengths,
                              switches=[],
                              mintaps=mintaps,
                              index=t,
                              options=options,
                              as_repeatUntil=cond)
    # Note that we get here all the outputs followed by the update rules to
    # the shared variables we had in our scan
    # we know that we have (in this given order):
    #   * len(states_and_outputs) real outputs
    #   * len(additional_input_states) updates for numeric shared variable
    #   * len(non_numeric_input_states) updates for non numeric shared
    #   variables
    scan_inputs = [T] + inputs
    scan_outputs_update_rules = scan_utils.to_list(local_op(*scan_inputs))
    # 5.5 Collect outputs and add permutation object
    scan_outputs = []
    for pos in xrange(len(states_and_outputs)):
        out = scan_utils.ScanPermutation(mintaps[pos])(
            scan_outputs_update_rules[pos], t)
        scan_outputs.append(out[mintap:])
    # 5.6 Construct updates dictionary
    update_rules = scan_outputs_update_rules[len(states_and_outputs):]
    updates = {}
    for v, u in izip(original_numeric_shared_variables,
                     update_rules[:len(additional_input_states)]):
        updates[v] = u[-1]
    for v, u in izip(original_non_numeric_shared_variables,
                     update_rules[len(additional_input_states):]):
        updates[v] = u
    # Step 5.7 We are done and can return everything back to the user
    return scan_outputs, updates
Ejemplo n.º 44
0
 def encode(self, t, vecs):
     # vecs[t[0]] and vecs[t[0]] ==> vecs[t[2]]
     w_left, w_right = vecs[t[0]], vecs[t[1]]
     z, loss_rec = self.compose(w_left, w_right)
     return T.set_subtensor(vecs[t[2]], z), loss_rec
Ejemplo n.º 45
0
 def inner(rot_param, base_relative):
     tr = T.eye(4, dtype=base_relative.dtype)
     R = euler_angles_to_rotation_matrix(rot_param)
     tr = T.set_subtensor(tr[:3, :3], R)
     return T.dot(base_relative, tr)
Ejemplo n.º 46
0
 def test_setsubtensor1(self):
     tv = numpy.asarray(self.rng.uniform(size=(3,)),
                        theano.config.floatX)
     t = theano.shared(tv)
     out = tensor.set_subtensor(self.x[:3], t)
     self.check_rop_lop(out, self.in_shape)
Ejemplo n.º 47
0
 def _batchAlign(self, w_tb, mask_b):
     mask_b = T.set_subtensor(mask_b[self.tokmap[w_tb]], 0)
     return mask_b
Ejemplo n.º 48
0
 def compute_absolute(i, parent, relative, absolutes):
     # hack (parent == -1 accesses last element - we set it to zero)
     # Theano did not take ifselse here
     absolutes = T.set_subtensor(absolutes[i],
                                 T.dot(absolutes[parent], relative))
     return absolutes
Ejemplo n.º 49
0
    def __theano_trainx__(self, n_in, n_hidden):
        """
        训练阶段跑一遍训练序列
        """
        # self.alpha_lambda = ['alpha', 'lambda', 'fea_random_zero']
        uix, whx = self.uix, self.whx

        tra_mask = T.imatrix()                          # shape=(n, 157)
        actual_batch_size = tra_mask.shape[0]
        seq_length = T.max(T.sum(tra_mask, axis=1))     # 获取mini-batch里各序列的长度最大值作为seq_length
        mask = tra_mask.T                               # shape=(157, n)

        h0x = T.alloc(self.h0x, actual_batch_size, n_hidden)      # shape=(n, 40)
        bix = T.alloc(self.bix, actual_batch_size, 3, n_hidden)   # shape=(n, 3, 40), n_hidden放在最后
        bix = bix.dimshuffle(1, 2, 0)                             # shape=(3, 40, n)

        # 输入端:只输入购买的商品即可。
        pidxs, qidxs = T.imatrix(), T.imatrix()     # TensorType(int32, matrix)
        ixps = self.lt[pidxs]       # shape((actual_batch_size, seq_length, n_in))
        ixps = ixps.dimshuffle(1, 0, 2)               # shape=(seq_length, batch_size, n_in)

        uiq_ps = Unique(False, False, False)(pidxs)  # 再去重
        uiq_ix = self.lt[uiq_ps]

        # 输出端:h*w 得到score
        yxps, yxqs = self.vyx[pidxs], self.vyx[qidxs]
        yxps, yxqs = yxps.dimshuffle(1, 0, 2), yxqs.dimshuffle(1, 0, 2)

        pqs = T.concatenate((pidxs, qidxs))         # 先拼接
        uiq_pqs = Unique(False, False, False)(pqs)  # 再去重
        uiq_yx = self.vyx[uiq_pqs]

        """
        输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t
        # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘
            # GRU
            z = sigmoid(ux_z * xp + wh_z * h_pre1)
            r = sigmoid(ux_r * xp + wh_r * h_pre1)
            c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1))
            h = z * h_pre1 + (1.0 - z) * c
        # 根据性质:T.dot((n, ), (n, ))得到scalar
            upq  = h_pre1 * (xp - xq)
            loss = log(1.0 + e^(-upq))
        """
        def recurrence(ixp_t, yxp_t, yxq_t, mask_t, hx_t_pre1):
            # 特征、隐层都处理成shape=(batch_size, n_hidden)=(n, 20)
            z_rx = sigmoid(T.dot(uix[:2], ixp_t.T) +
                           T.dot(whx[:2], hx_t_pre1.T) + bix[:2])   # shape=(2, 20, n)
            zx, rx = z_rx[0].T, z_rx[1].T                           # shape=(n, 20)
            cx = tanh(T.dot(uix[2], ixp_t.T) +
                      T.dot(whx[2], (rx * hx_t_pre1).T) + bix[2])    # shape=(20, n)
            hx_t = (T.ones_like(zx) - zx) * hx_t_pre1 + zx * cx.T     # shape=(n, 20)
            # 偏好误差
            upq_t = T.sum(hx_t_pre1 * (yxp_t - yxq_t), axis=1)     # shape=(n, )
            loss_t = T.log(sigmoid(upq_t))                      # shape=(n, )
            loss_t *= mask_t                                    # 只在损失这里乘一下0/1向量就可以了
            return [hx_t, loss_t]                         # shape=(n, 20), (n, )
        [hx, loss], _ = theano.scan(
            fn=recurrence,
            sequences=[ixps, yxps, yxqs, mask],
            outputs_info=[h0x, None],
            n_steps=seq_length)     # 保证只循环到最长有效位

        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, l2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        seq_l2_sq = (
            T.sum([T.sum(par ** 2) for par in [uix, whx, yxps, yxqs, ixps]]) +
            T.sum([T.sum(par ** 2) for par in [bix]]) / actual_batch_size)
        upq = T.sum(loss)
        seq_costs = (
            - upq / actual_batch_size +
            0.5 * l2 * seq_l2_sq)
        seq_grads = T.grad(seq_costs, self.paramsx)
        seq_updates = [(par, par - lr * gra) for par, gra in zip(self.paramsx, seq_grads)]
        update_ix = T.set_subtensor(uiq_ix, uiq_ix - lr * T.grad(seq_costs, self.lt)[uiq_ps])
        update_yx = T.set_subtensor(uiq_yx, uiq_yx - lr * T.grad(seq_costs, self.vyx)[uiq_pqs])
        seq_updates.append((self.lt, update_ix))
        seq_updates.append((self.vyx, update_yx))   # 会直接更改到seq_updates里
        # ----------------------------------------------------------------------------

        # 输入正、负样本序列及其它参数后,更新变量,返回损失。
        # givens给数据
        start_end = T.ivector()
        self.seq_trainx = theano.function(
            inputs=[start_end],
            outputs=-upq,
            updates=seq_updates,
            givens={
                pidxs: self.tra_buys_masks[start_end],       # 类型是 TensorType(int32, matrix)
                qidxs: self.tra_buys_neg_masks[start_end],   # T.ivector()类型是 TensorType(int32, vector)
                tra_mask: self.tra_masks[start_end]})
Ejemplo n.º 50
0
def euler_angles_to_rotation_matrix(xzy):
    tx = xzy[0]
    ty = xzy[2]
    tz = xzy[1]

    Rx = T.eye(3, dtype=tx.dtype)
    Rx = T.set_subtensor(Rx[1, 1], T.cos(tx))
    Rx = T.set_subtensor(Rx[2, 1], T.sin(tx))
    Rx = T.set_subtensor(Rx[1, 2], -Rx[2, 1])
    Rx = T.set_subtensor(Rx[2, 2], Rx[1, 1])

    Ry = T.eye(3, dtype=tx.dtype)
    Ry = T.set_subtensor(Ry[0, 0], T.cos(ty))
    Ry = T.set_subtensor(Ry[0, 2], T.sin(ty))
    Ry = T.set_subtensor(Ry[2, 0], -Ry[0, 2])
    Ry = T.set_subtensor(Ry[2, 2], Ry[0, 0])

    Rz = T.eye(3, dtype=tx.dtype)
    Rz = T.set_subtensor(Rz[0, 0], T.cos(tz))
    Rz = T.set_subtensor(Rz[1, 0], T.sin(tz))
    Rz = T.set_subtensor(Rz[0, 1], -Rz[1, 0])
    Rz = T.set_subtensor(Rz[1, 1], Rz[0, 0])

    return T.dot(T.dot(Rz, Ry), Rx)
Ejemplo n.º 51
0
    def run_experiment(self, dataset, word_embedding, exp_name):

        # load parameters
        num_maps_word = self.options["num_maps_word"]
        drop_rate_word = self.options["drop_rate_word"]
        drop_rate_sentence = self.options["drop_rate_sentence"]
        word_window = self.options["word_window"]
        word_dim = self.options["word_dim"]
        k_max_word = self.options["k_max_word"]
        k_max_sentence = self.options["k_max_sentence"]
        batch_size = self.options["batch_size"]
        rho = self.options["rho"]
        epsilon = self.options["epsilon"]
        norm_lim = self.options["norm_lim"]
        max_iteration = self.options["max_iteration"]
        k_portion = self.options["k_portion"]
        num_maps_sentence = self.options["num_maps_sentence"]
        sentence_window = self.options["sentence_window"]

        sentence_len = len(dataset[0][0][0][0])
        sentence_num = len(dataset[0][0][0])

        # compute the sentence flags
        train_flags, test_flags = construct_sentence_flag(dataset)
        train_k_value = construct_dynamic_k(train_flags, k_portion)
        test_k_value = construct_dynamic_k(test_flags, k_portion)

        train_flags = theano.shared(value=np.asarray(
            train_flags, dtype=theano.config.floatX),
                                    borrow=True)
        test_flags = theano.shared(value=np.asarray(
            test_flags, dtype=theano.config.floatX),
                                   borrow=True)

        train_k = theano.shared(value=np.asarray(train_k_value,
                                                 dtype=theano.config.floatX),
                                borrow=True)
        test_k = theano.shared(value=np.asarray(test_k_value,
                                                dtype=theano.config.floatX),
                               borrow=True)

        # define the parameters
        x = T.tensor3("x")
        y = T.ivector("y")
        sen_flags = T.matrix("flag")
        sen_k = T.matrix("sen_k")
        rng = np.random.RandomState(1234)

        words = theano.shared(value=np.asarray(word_embedding,
                                               dtype=theano.config.floatX),
                              name="embedding",
                              borrow=True)
        zero_vector_tensor = T.vector()
        zero_vec = np.zeros(word_dim, dtype=theano.config.floatX)
        set_zero = theano.function(
            [zero_vector_tensor],
            updates=[(words, T.set_subtensor(words[0, :],
                                             zero_vector_tensor))])

        x_emb = words[T.cast(x.flatten(), dtype="int32")].reshape(
            (x.shape[0] * x.shape[1], 1, x.shape[2], words.shape[1]))

        dropout_x_emb = nn.dropout_from_layer(rng, x_emb, drop_rate_word)

        # compute convolution on words layer
        word_filter_shape = (num_maps_word, 1, word_window, word_dim)
        word_pool_size = (sentence_len - word_window + 1, 1)
        dropout_word_conv = nn.ConvPoolLayer(rng,
                                             input=dropout_x_emb,
                                             input_shape=None,
                                             filter_shape=word_filter_shape,
                                             pool_size=word_pool_size,
                                             activation=Tanh,
                                             k=k_max_word)
        sent_vec_dim = num_maps_word * k_max_word
        dropout_sent_vec = dropout_word_conv.output.reshape(
            (x.shape[0], 1, x.shape[1], sent_vec_dim))
        dropout_sent_vec = nn.dropout_from_layer(rng, dropout_sent_vec,
                                                 drop_rate_sentence)

        word_conv = nn.ConvPoolLayer(rng,
                                     input=dropout_x_emb *
                                     (1 - drop_rate_word),
                                     input_shape=None,
                                     filter_shape=word_filter_shape,
                                     pool_size=word_pool_size,
                                     activation=Tanh,
                                     k=k_max_word,
                                     W=dropout_word_conv.W,
                                     b=dropout_word_conv.b)
        sent_vec = word_conv.output.reshape(
            (x.shape[0], 1, x.shape[1], sent_vec_dim))
        sent_vec = sent_vec * (1 - drop_rate_sentence)

        # construct doc level context information
        sent_filter_shape = (num_maps_sentence, 1, sentence_window,
                             sent_vec_dim)
        sent_pool_size = (sentence_num - sentence_window + 1, 1)
        dropout_sent_conv = nn.ConvPoolLayer(rng,
                                             input=dropout_sent_vec,
                                             input_shape=None,
                                             filter_shape=sent_filter_shape,
                                             pool_size=sent_pool_size,
                                             activation=Tanh,
                                             k=k_max_sentence)

        sent_conv = nn.ConvPoolLayer(rng,
                                     input=sent_vec,
                                     input_shape=None,
                                     filter_shape=sent_filter_shape,
                                     pool_size=sent_pool_size,
                                     activation=Tanh,
                                     k=k_max_sentence,
                                     W=dropout_sent_conv.W,
                                     b=dropout_sent_conv.b)

        # reshape the sentence vec
        dropout_sent_vec = dropout_sent_vec.reshape(
            (x.shape[0], x.shape[1], sent_vec_dim))
        sent_vec = sent_vec.reshape((x.shape[0], x.shape[1], sent_vec_dim))

        dropout_doc_vec = dropout_sent_conv.output.flatten(2)
        doc_vec = sent_conv.output.flatten(2)
        doc_vec_dim = num_maps_sentence * k_max_sentence

        # concatenate the doc vec along with the sentence vector
        con_dropout_sent_vec = T.concatenate([
            dropout_sent_vec,
            T.tile(dropout_doc_vec, [1, x.shape[1]]).reshape(
                (x.shape[0], x.shape[1], doc_vec_dim))
        ],
                                             axis=2).reshape(
                                                 (x.shape[0] * x.shape[1],
                                                  sent_vec_dim + doc_vec_dim))

        con_sent_vec = T.concatenate([
            sent_vec,
            T.tile(doc_vec, [1, x.shape[1]]).reshape(
                (x.shape[0], x.shape[1], doc_vec_dim))
        ],
                                     axis=2).reshape(
                                         (x.shape[0] * x.shape[1],
                                          sent_vec_dim + doc_vec_dim))

        # construct sentence level classifier
        n_in = sent_vec_dim + doc_vec_dim
        n_out = 1
        sen_W_values = np.zeros((n_in, n_out), dtype=theano.config.floatX)
        sen_W = theano.shared(value=sen_W_values, borrow=True, name="logis_W")
        sen_b_value = nn.as_floatX(0.0)
        sen_b = theano.shared(value=sen_b_value, borrow=True, name="logis_b")

        drop_sent_prob = T.nnet.sigmoid(
            T.dot(con_dropout_sent_vec, sen_W) + sen_b)
        sent_prob = T.nnet.sigmoid(T.dot(con_sent_vec, sen_W) + sen_b)

        # reform the sent vec to doc level
        drop_sent_prob = drop_sent_prob.reshape((x.shape[0], x.shape[1]))
        sent_prob = sent_prob.reshape((x.shape[0], x.shape[1]))

        # using the dynamic top k max probability as bag level probability
        # compute the dynamic K for each documents
        drop_doc_prob = T.sum(T.sort(drop_sent_prob, axis=1) * sen_k,
                              axis=1) / T.sum(sen_k, axis=1)
        doc_prob = T.sum(T.sort(sent_prob, axis=1) * sen_k, axis=1) / T.sum(
            sen_k, axis=1)

        drop_doc_prob = T.clip(drop_doc_prob, nn.as_floatX(1e-7),
                               nn.as_floatX(1 - 1e-7))
        doc_prob = T.clip(doc_prob, nn.as_floatX(1e-7), nn.as_floatX(1 - 1e-7))

        doc_preds = doc_prob > 0.5

        # instance level cost
        drop_sent_cost = T.sum(
            T.maximum(
                0.0,
                nn.as_floatX(.5) - T.sgn(
                    drop_sent_prob.reshape((x.shape[0] * x.shape[1], n_out)) -
                    nn.as_floatX(0.6)) * T.dot(con_dropout_sent_vec, sen_W)) *
            sen_flags.reshape(
                (x.shape[0] * x.shape[1], n_out))) / T.sum(sen_flags)

        # we need that the most positive instance at least 0.7 in pos bags
        # and at most 0.1 in neg bags
        # we want the number of positive instance should at least ...
        # and non of the positive instances in the negative bags

        # compute the number of positive instance
        positive_count = T.sum((drop_sent_prob * sen_flags) > 0.5, axis=1)
        pos_cost = T.maximum(nn.as_floatX(0.0),
                             positive_count - T.sum(sen_k, axis=1))
        neg_cost = T.maximum(nn.as_floatX(0.0), positive_count)
        penal_cost = T.mean(pos_cost * y + neg_cost * (nn.as_floatX(1.0) - y))

        # add the sentence similarity constrains
        sen_sen = T.dot(con_dropout_sent_vec, con_dropout_sent_vec.T)
        sen_sqr = T.sum(con_dropout_sent_vec**2, axis=1)
        sen_sqr_left = sen_sqr.dimshuffle(0, 'x')
        sen_sqr_right = sen_sqr.dimshuffle('x', 0)
        sen_sim_matrix = sen_sqr_left - 2 * sen_sen + sen_sqr_right
        sen_sim_matrix = T.exp(-1 * sen_sim_matrix)

        sen_sim_prob = drop_sent_prob.reshape(
            (x.shape[0] * x.shape[1], 1)) - drop_sent_prob.flatten()
        sen_sim_prob = sen_sim_prob**2

        pos_sen_flags = sen_flags * y.dimshuffle(0, 'x')
        sen_sim_flag = T.dot(
            pos_sen_flags.reshape((x.shape[0] * x.shape[1], 1)),
            pos_sen_flags.reshape((1, x.shape[0] * x.shape[1])))

        sen_sim_cost = T.sum(
            sen_sim_matrix * sen_sim_prob * sen_sim_flag) / T.sum(sen_sim_flag)

        # bag level cost
        drop_bag_cost = T.mean(-y * T.log(drop_doc_prob) * nn.as_floatX(0.6) -
                               (1 - y) * T.log(1 - drop_doc_prob) *
                               nn.as_floatX(0.4))
        drop_cost = drop_bag_cost * nn.as_floatX(0.6) + \
            drop_sent_cost * nn.as_floatX(0.1) + \
            penal_cost * nn.as_floatX(0.5) + \
            sen_sim_cost * nn.as_floatX(0.0001)

        # collect parameters
        self.params.append(words)
        self.params += dropout_word_conv.params
        self.params += dropout_sent_conv.params
        self.params.append(sen_W)
        self.params.append(sen_b)

        grad_updates = nn.sgd_updates_adadelta(self.params, drop_cost, rho,
                                               epsilon, norm_lim)

        # construct the dataset
        # random the
        train_x, train_y = nn.shared_dataset(dataset[0])
        test_x, test_y = nn.shared_dataset(dataset[1])
        test_cpu_y = dataset[1][1]

        n_train_batches = int(np.ceil(1.0 * len(dataset[0][0]) / batch_size))
        n_test_batches = int(np.ceil(1.0 * len(dataset[1][0]) / batch_size))

        # construt the model
        index = T.iscalar()
        train_func = theano.function(
            [index], [
                drop_cost, drop_bag_cost, drop_sent_cost, penal_cost,
                sen_sim_cost
            ],
            updates=grad_updates,
            givens={
                x: train_x[index * batch_size:(index + 1) * batch_size],
                y: train_y[index * batch_size:(index + 1) * batch_size],
                sen_flags:
                train_flags[index * batch_size:(index + 1) * batch_size],
                sen_k: train_k[index * batch_size:(index + 1) * batch_size]
            })

        test_func = theano.function(
            [index],
            doc_preds,
            givens={
                x: test_x[index * batch_size:(index + 1) * batch_size],
                sen_k: test_k[index * batch_size:(index + 1) * batch_size]
            })

        get_train_sent_prob = theano.function(
            [index],
            sent_prob,
            givens={x: train_x[index * batch_size:(index + 1) * batch_size]})

        get_test_sent_prob = theano.function(
            [index],
            sent_prob,
            givens={x: test_x[index * batch_size:(index + 1) * batch_size]})

        epoch = 0
        best_score = 0

        log_file = open("./log/%s.log" % exp_name, 'w')

        while epoch <= max_iteration:
            start_time = timeit.default_timer()
            epoch += 1
            costs = []

            for mini_index in np.random.permutation(range(n_train_batches)):
                cost_epoch = train_func(mini_index)
                costs.append(cost_epoch)
                set_zero(zero_vec)

            total_train_cost, train_bag_cost, train_sent_cost, train_penal_cost, train_sim_cost = zip(
                *costs)
            print "Iteration %d, total_cost %f bag_cost %f sent_cost %f penal_cost %f sim cost %f\n" % (
                epoch, np.mean(total_train_cost), np.mean(train_bag_cost),
                np.mean(train_sent_cost), np.mean(train_penal_cost),
                np.mean(train_sim_cost))

            if epoch % 1 == 0:
                test_preds = []
                for i in xrange(n_test_batches):
                    test_y_pred = test_func(i)
                    test_preds.append(test_y_pred)
                test_preds = np.concatenate(test_preds)
                test_score = 1 - np.mean(np.not_equal(test_cpu_y, test_preds))

                precision, recall, beta, support = precision_recall_fscore_support(
                    test_cpu_y, test_preds, pos_label=1)

                if beta[1] > best_score or epoch % 5 == 0:
                    best_score = beta[1]
                    # save the sentence vectors
                    train_sens = [
                        get_train_sent_prob(i) for i in range(n_train_batches)
                    ]
                    test_sens = [
                        get_test_sent_prob(i) for i in range(n_test_batches)
                    ]

                    train_sens = np.concatenate(train_sens, axis=0)
                    test_sens = np.concatenate(test_sens, axis=0)

                    out_train_sent_file = "./results/%s_train_sent_%d.vec" % (
                        exp_name, epoch)
                    out_test_sent_file = "./results/%s_test_sent_%d.vec" % (
                        exp_name, epoch)

                    with open(out_test_sent_file,
                              'w') as test_f, open(out_train_sent_file,
                                                   'w') as train_f:
                        cPickle.dump(train_sens, train_f)
                        cPickle.dump(test_sens, test_f)
                    print "Get best performace at %d iteration %f" % (
                        epoch, test_score)
                    log_file.write(
                        "Get best performance at %d iteration %f \n" %
                        (epoch, test_score))

                end_time = timeit.default_timer()
                print "Iteration %d , precision, recall, f1" % epoch, precision, recall, beta
                log_file.write(
                    "Iteration %d, neg precision %f, pos precision %f, neg recall %f pos recall %f , neg f1 %f, pos f1 %f, total_cost %f bag_cost %f sent_cost %f penal_cost %f\n"
                    % (epoch, precision[0], precision[1], recall[0], recall[1],
                       beta[0], beta[1], np.mean(total_train_cost),
                       np.mean(train_bag_cost), np.mean(train_sent_cost),
                       np.mean(train_penal_cost)))
                print "Using time %f m" % ((end_time - start_time) / 60.)
                log_file.write("Uing time %f m\n" %
                               ((end_time - start_time) / 60.))
            end_time = timeit.default_timer()
            print "Iteration %d Using time %f m" % (epoch,
                                                    (end_time - start_time) /
                                                    60.)
            log_file.write("Uing time %f m\n" %
                           ((end_time - start_time) / 60.))
            log_file.flush()

        log_file.close()
Ejemplo n.º 52
0
 def _form1hot(self, hot_x, idx_x, cutoff_x):
     update_x = T.set_subtensor(hot_x[idx_x[:cutoff_x]], 1.0)
     return update_x
Ejemplo n.º 53
0
def set_subtensor(subtensor, newval):
    return T.set_subtensor(subtensor, newval)
Ejemplo n.º 54
0
    def __init__(self, data, hp):
        super(Vae1, self).__init__(self.__class__.__name__, data, hp)

        self.n_h = 800
        self.n_z = 20
        self.n_t = 1

        self.gaussian = False

        self.params = Parameters()
        n_x = self.data['n_x']
        n_h = self.n_h
        n_z = self.n_z
        n_t = self.n_t
        scale = hp.init_scale

        if hp.load_model and os.path.isfile(self.filename):
            self.params.load(self.filename)
        else:
            with self.params:
                W1 = shared_normal((n_x, n_h), scale=scale)
                W11 = shared_normal((n_h, n_h), scale=scale)
                W111 = shared_normal((n_h, n_h), scale=scale)
                W2 = shared_normal((n_h, n_z), scale=scale)
                W3 = shared_normal((n_h, n_z), scale=scale)
                W4 = shared_normal((n_h, n_h), scale=scale)
                W44 = shared_normal((n_h, n_h), scale=scale)
                W444 = shared_normal((n_z, n_h), scale=scale)
                W5 = shared_normal((n_h, n_x), scale=scale)
                b1 = shared_zeros((n_h, ))
                b11 = shared_zeros((n_h, ))
                b111 = shared_zeros((n_h, ))
                b2 = shared_zeros((n_z, ))
                b3 = shared_zeros((n_z, ))
                b4 = shared_zeros((n_h, ))
                b44 = shared_zeros((n_h, ))
                b444 = shared_zeros((n_h, ))
                b5 = shared_zeros((n_x, ))

        def encoder(x, p):
            h_encoder = T.tanh(T.dot(x, p.W1) + p.b1)
            h_encoder2 = T.tanh(T.dot(h_encoder, p.W11) + p.b11)
            h_encoder3 = T.tanh(T.dot(h_encoder2, p.W111) + p.b111)

            mu_encoder = T.dot(h_encoder3, p.W2) + p.b2
            log_sigma_encoder = 0.5 * (T.dot(h_encoder3, p.W3) + p.b3)
            log_qpz = -0.5 * T.sum(1 + 2 * log_sigma_encoder - mu_encoder**2 -
                                   T.exp(2 * log_sigma_encoder))

            eps = srnd.normal(mu_encoder.shape, dtype=theano.config.floatX)
            z = mu_encoder + eps * T.exp(log_sigma_encoder)
            return z, log_qpz

        def decoder(z, p, x=None):
            h_decoder3 = T.tanh(T.dot(z, p.W444) + p.b444)
            h_decoder2 = T.tanh(T.dot(h_decoder3, p.W44) + p.b44)
            h_decoder = T.tanh(T.dot(h_decoder2, p.W4) + p.b4)

            if self.gaussian:
                pxz = T.tanh(T.dot(h_decoder, p.W5) + p.b5)
            else:
                pxz = T.nnet.sigmoid(T.dot(h_decoder, p.W5) + p.b5)

            if not x is None:
                if self.gaussian:
                    log_sigma_decoder = 0
                    log_pxz = 0.5 * np.log(
                        2 * np.pi) + log_sigma_decoder + 0.5 * T.sum(
                            T.sqr(x - pxz))
                else:
                    log_pxz = T.nnet.binary_crossentropy(pxz, x).sum()
                return pxz, log_pxz
            else:
                return pxz

        x = binomial(self.X)
        z, log_qpz = encoder(x, self.params)
        pxz, log_pxz = decoder(z, self.params, x)
        cost = log_pxz + log_qpz

        s_pxz = decoder(self.Z, self.params)
        a_pxz = T.zeros((self.n_t, s_pxz.shape[0], s_pxz.shape[1]))
        a_pxz = T.set_subtensor(a_pxz[0, :, :], s_pxz)

        self.compile(log_pxz, log_qpz, cost, a_pxz)
Ejemplo n.º 55
0
T = df.values[:, 0].astype(np.float32)
Y = df.values[:, 1].astype(np.float32)

n_times = len(df["X"].unique())

basic_model = Model()

#subtensorの使い方↓
#http://deeplearning.net/software/theano/library/tensor/basic.html

with basic_model:
    #事前分布
    s_mu = HalfNormal('s_mu', sd=100)  #隣接時刻の状態の誤差
    s_Y = HalfNormal('s_Y', sd=100)  #各時刻における状態と観測の誤差
    mu_0 = Normal('mu_0', mu=0, sd=100)  #t=0初期状態
    mu_1 = Normal('mu_1', mu=0, sd=100)  #t=1初期状態

    #誤差項
    e_mu = Normal('e_mu', mu=0, sd=s_mu, shape=n_times - 2)

    mu = tt.zeros((n_times))
    mu = tt.set_subtensor(mu[0], mu_0)
    mu = tt.set_subtensor(mu[1], mu_1)
    for i in range(n_times - 2):
        mu = tt.set_subtensor(mu[i + 2], 2 * mu[i + 1] - mu[i] + e_mu[i])
    #likelihood
    Y_obs = Normal('Y_obs', mu=mu, sd=s_Y, observed=Y)

    #サンプリング
    trace = sample(1000)
    summary(trace)
Ejemplo n.º 56
0
def get_elementwise_objective(Qvalues,
                              actions,
                              rewards,
                              is_alive="always",
                              gamma_or_gammas=0.95,
                              crop_last=True,
                              force_qvalues_after_end=True,
                              qvalues_after_end="zeros",
                              consider_reference_constant=True, ):
    """
    Returns squared error between predicted and reference Qvalues according to Q-learning algorithm
    
        Qreference(state,action) = reward(state,action) + gamma* Q(next_state,next_action)  
        loss = mean over (Qvalues - Qreference)**2
        
    parameters:
    
        Qvalues [batch,tick,action_id] - predicted qvalues
        actions [batch,tick] - commited actions
        rewards [batch,tick] - immediate rewards for taking actions at given time ticks
        
        is_alive [batch,tick] - whether given session is still active at given tick. Defaults to always active.
                            Default value of is_alive implies a simplified computation algorithm for Qlearning loss
        
        gamma_or_gammas - a single value or array[batch,tick](can broadcast dimensions) of delayed reward discounts 
        
        crop_last - if True, zeros-out loss at final tick, if False - computes loss VS Qvalues_after_end
        
        force_qvalues_after_end - if true, sets reference Qvalues at session end to rewards[end] + qvalues_after_end
        
        qvalues_after_end [batch,1,n_actions] - symbolic expression for "next state q-values" for last tick used for reference only. 
                            Defaults at  T.zeros_like(Qvalues[:,0,None,:])
                            If you wish to simply ignore the last tick, use defaults and crop output's last tick ( qref[:,:-1] )

        consider_reference_constant - whether or not zero-out gradient flow through reference_Qvalues
            (True is highly recommended)
    Returns:
                
        tensor [batch, tick] of squared errors over Qvalues (using formula above for loss)

    """
    # get reference Qvalues via Q-learning algorithm
    reference_Qvalues = get_reference_Qvalues(Qvalues, actions, rewards,
                                              gamma_or_gammas=gamma_or_gammas,
                                              qvalues_after_end=qvalues_after_end,
                                              )

    if consider_reference_constant:
        # do not pass gradient through reference Q-values (since they DO depend on Q-values by default)
        reference_Qvalues = consider_constant(reference_Qvalues)

    # get predicted qvalues for committed actions (to compare with reference Q-values)
    action_Qvalues = get_action_Qvalues(Qvalues, actions)

    # if agent is always alive, return the simplified loss
    if is_alive == "always":

        # tensor of element-wise squared errors
        elwise_squared_error = squared_error(reference_Qvalues, action_Qvalues)

    else:
        # we are given an is_alive matrix : uint8[batch,tick]

        # if asked to force reference_Q[end_tick+1,a] = 0, do it
        # note: if agent is always alive, this is meaningless

        if force_qvalues_after_end:
            # set future rewards at session end to rewards + qvalues_after_end
            end_ids = get_end_indicator(is_alive, force_end_at_t_max=True).nonzero()

            if qvalues_after_end == "zeros":
                # "set reference Q-values at end action ids to just the immediate rewards"
                reference_Qvalues = T.set_subtensor(reference_Qvalues[end_ids], rewards[end_ids])
            else:
                last_optimal_rewards = T.zeros_like(rewards[:, 0])

                # "set reference Q-values at end action ids to the immediate rewards + qvalues after end"
                reference_Qvalues = T.set_subtensor(reference_Qvalues[end_ids],
                                                    rewards[end_ids] + gamma_or_gammas * last_optimal_rewards[
                                                        end_ids[0], 0]
                                                    )

        # tensor of element-wise squared errors
        elwise_squared_error = squared_error(reference_Qvalues, action_Qvalues)

        # zero-out loss after session ended
        elwise_squared_error = elwise_squared_error * is_alive
     
    if crop_last:
        elwise_squared_error = T.set_subtensor(elwise_squared_error[:,-1],0)


    return elwise_squared_error
    def __init__(self,
                 modality_names,
                 modality_sizes,
                 locallayer_sizes,
                 fusionlayer_sizes,
                 numpy_rng,
                 batchsize,
                 theano_rng=None):
        assert (len(modality_names) == len(modality_sizes)
                and len(modality_names) == len(locallayer_sizes))
        self.modality_names = modality_names
        self.modality_sizes = modality_sizes
        self.locallayer_sizes = locallayer_sizes
        self.fusionlayer_sizes = fusionlayer_sizes
        self.numpy_rng = numpy_rng
        self.batchsize = batchsize
        if theano_rng is None:
            theano_rng = RandomStreams(1)
        self.theano_rng = theano_rng
        self.mode = theano.shared(np.int8(0), name='mode')

        # start with empty params list
        self.params = []
        self.l1params = []
        self.l2params = []
        self.l21params = []

        # inputs are the concatenated modalities
        self.inputs = T.fmatrix('inputs')

        # targets vector
        self.targets = T.ivector('targets')

        self.modality_inputs = OrderedDict()
        self.modality_models = OrderedDict()
        self.modality_preconcat_layer_sizes = []
        self.modality_concat_layer_sizes = []

        offset = 0
        # local modality networks
        for modality_name, modality_size, locallayer_size in zip(
                modality_names, modality_sizes, locallayer_sizes):

            # get inputs of modality
            self.modality_inputs[modality_name] = self.inputs[:,
                                                              offset:offset +
                                                              modality_size]
            offset += modality_size

            # determine size of input to the last layer in the modalities subnetwork
            if len(locallayer_size) == 1:
                self.modality_preconcat_layer_sizes.append(modality_size)
            else:
                self.modality_preconcat_layer_sizes.append(locallayer_size[-2])

            # construct modality model
            layers = []
            #locallayer_sizes = ((100,), (100,200))
            #locallayer_size = (100,)
            for i, size in enumerate(locallayer_size):
                if i == 0:
                    layer_input = self.modality_inputs[modality_name]
                    layer_input_size = (self.batchsize, modality_size)
                else:
                    layer_input = layers[-1]
                    layer_input_size = layer_input.outputs_shape
                layers.append(
                    AffineLayer(rng=self.numpy_rng,
                                inputs=layer_input,
                                nouts=size,
                                name='{0}_affine_{1}'.format(modality_name, i),
                                inputs_shape=layer_input_size))
                # append params to global list
                self.params.extend(layers[-1].params)
                self.l2params.append(layers[-1].W)
                if i == len(locallayer_size) - 1:
                    self.l1params.append(layers[-1].W)
                    self.l21params.append(layers[-1].W)
                    # update total size of concat layer
                    self.modality_concat_layer_sizes.append(size)
                layers.append(
                    RectifiedTanh(inputs=layers[-1],
                                  name='{0}_rectifiedtanh_{1}'.format(
                                      modality_name, i)))
            # create the modality model object
            self.modality_models[modality_name] = Composite(
                layers=layers, name='{0}_composite'.format(modality_name))
        # concatenate modality model outputs
        self.concat_modalities = Concat(self.modality_models.values(),
                                        name='concat_layer',
                                        axis=1)
        self.fusion_layers = []
        for i, fusionlayer_size in enumerate(fusionlayer_sizes):
            if i == 0:
                layer_input = self.concat_modalities
            else:
                layer_input = self.fusion_layers[-1]
            self.fusion_layers.append(
                AffineLayer(rng=self.numpy_rng,
                            inputs=layer_input,
                            nouts=fusionlayer_size,
                            name='fusion_affine_{0}'.format(i)))
            # append params to global list
            self.params.extend(self.fusion_layers[-1].params)
            self.l2params.append(self.fusion_layers[-1].W)
            self.fusion_layers.append(
                RectifiedTanh(inputs=self.fusion_layers[-1],
                              name='fusion_rectifiedtanh_{0}'.format(i)))

            self.fusion_layers.append(
                Dropout(inputs=self.fusion_layers[-1],
                        dropout_rate=.3,
                        name='fusion_dropout_{0}'.format(i),
                        theano_rng=self.theano_rng,
                        mode_var=self.mode))
        # classification layer
        self.logits = AffineLayer(rng=self.numpy_rng,
                                  inputs=self.fusion_layers[-1],
                                  nouts=7,
                                  name='logit_affine')
        # append params to global list
        self.params.extend(self.logits.params)
        self.l2params.append(self.logits.W)
        self.softmax = Softmax(inputs=self.logits, name='softmax')

        self.probabilities = self.softmax.outputs
        self.probabilities = T.clip(self.probabilities, 1e-6, 1 - 1e-6)

        self.l2cost = L2_sqr(
            T.concatenate([x.flatten() for x in self.l2params], axis=0))

        self.concat_matrix = T.zeros(
            (np.sum(self.modality_preconcat_layer_sizes),
             np.sum(self.modality_concat_layer_sizes)))
        row_offset = 0
        col_offset = 0
        for inp_size, outp_size, p in zip(self.modality_preconcat_layer_sizes,
                                          self.modality_concat_layer_sizes,
                                          self.l1params):
            # embed weight matrices in large concatenated matrix
            self.concat_matrix = T.set_subtensor(
                self.concat_matrix[row_offset:row_offset + inp_size,
                                   col_offset:col_offset + outp_size], p)
        self.l1cost = L11(self.concat_matrix)
        self.l21cost = L21(self.concat_matrix)

        self._cost = (T.nnet.categorical_crossentropy(
            self.probabilities, self.targets).mean() + 3e-5 *
                      (self.l2cost + self.l1cost + self.l21cost))

        self.classification = T.argmax(self.probabilities, axis=1)

        self._grads = T.grad(self._cost, self.params)

        self._classify = theano.function([self.inputs], self.classification)
        self._get_probabilities = theano.function([self.inputs],
                                                  self.probabilities)
Ejemplo n.º 58
0
 def change_race_prob_div(_i, _change, _rep, _times, _item):
     _change = T.set_subtensor(
         _change[_rep[_i]:_rep[_i + 1]],
         T.reshape(T.alloc(_item[_i], _times[_i]), (_times[_i], 1)))
     return _change
Ejemplo n.º 59
0
    def feedForward(self, miniBatchSize):
        '''
        Perform Convolution operation on the output of 'fromLayer'
        Remember output of any layer is always flattened, so first need to reshape it w.r.t to input_shape
        CURRENTLY SUPPORTS ONLY 1-D Convolution and 2-D Convolution
        :param minibatchSize:
        :return:
        '''

        ### Reshape according to 'input_shape'
        self.input = self.fromLayer.output.reshape(
            self.fromLayer.shape_with_minibatch)
        '''insert minibatchsize value also in the input_shape variable, since that will be the complete shape
        of incoming data'''

        inp = list(self.input_shape)
        inp.insert(0, miniBatchSize)
        self.input_shape = list(inp)

        ### Add zero Pads if any
        if self.zero_padding != 0:
            if len(self.input_shape) == 4:
                zero_padding = T.zeros(
                    (self.input_shape[0], self.input_shape[1],
                     self.input_shape[2] + 2 * self.zero_padding,
                     self.input_shape[3] + 2 * self.zero_padding),
                    dtype=theano.config.floatX)
                zero_padding = T.set_subtensor(
                    zero_padding[:, :, self.zero_padding:self.input_shape[2] +
                                 self.zero_padding,
                                 self.zero_padding:self.input_shape[3] +
                                 self.zero_padding], self.input)
                self.input = zero_padding
                input_shape = list(self.input_shape)
                input_shape[2] = input_shape[2] + 2 * self.zero_padding
                input_shape[3] = input_shape[3] + 2 * self.zero_padding
                self.input_shape = tuple(input_shape)
            elif len(self.input_shape) == 3:
                zero_padding = T.zeros(
                    (self.input_shape[0], self.input_shape[1],
                     self.input_shape[2] + 2 * self.zero_padding),
                    dtype=theano.config.floatX)
                zero_padding = T.set_subtensor(
                    zero_padding[:, :, self.zero_padding:self.input_shape[2] +
                                 self.zero_padding], self.input)
                self.input = zero_padding
                input_shape = list(self.input_shape)
                input_shape[2] = input_shape[2] + 2 * self.zero_padding
                self.input_shape = tuple(input_shape)
        conv_out = conv.conv2d(input=self.input,
                               filters=self.w,
                               filter_shape=self.filter_shape,
                               image_shape=self.input_shape,
                               border_mode="valid",
                               subsample=self.stride_length)

        self.output = None
        if len(self.input_shape) == 4:
            self.output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x')
        else:
            self.output = conv_out + self.b.dimshuffle('x', 0, 'x')

        self.output = self.output.reshape(
            self.toLayer.shape_minibatch_flattened)
Ejemplo n.º 60
0
def train_conv_net(datasets,
                   U,
                   img_w=300,
                   filter_hs=[3, 4, 5],
                   hidden_units=[100, 2],
                   dropout_rate=[0.5],
                   shuffle_batch=True,
                   n_epochs=25,
                   batch_size=10,
                   lr_decay=0.95,
                   conv_non_linear="relu",
                   activations=[Iden],
                   sqr_norm_lim=9,
                   non_static=True):
    """
    Train a simple conv net
    img_h = sentence length (padded where necessary)
    img_w = word vector length (300 for word2vec)
    filter_hs = filter window sizes    
    hidden_units = [x,y] x is the number of feature maps (per filter window), and y is the penultimate layer
    sqr_norm_lim = s^2 in the paper
    lr_decay = adadelta decay parameter
    """
    rng = np.random.RandomState(3435)
    img_h = len(datasets[0][0]) - 1
    filter_w = img_w
    feature_maps = hidden_units[0]
    filter_shapes = []
    pool_sizes = []
    for filter_h in filter_hs:
        filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1))
    parameters = [("image shape", img_h, img_w),
                  ("filter shape", filter_shapes),
                  ("hidden_units", hidden_units), ("dropout", dropout_rate),
                  ("batch_size", batch_size), ("non_static", non_static),
                  ("learn_decay", lr_decay),
                  ("conv_non_linear", conv_non_linear),
                  ("non_static", non_static), ("sqr_norm_lim", sqr_norm_lim),
                  ("shuffle_batch", shuffle_batch)]
    print parameters

    #define model architecture
    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')
    Words = theano.shared(value=U, name="Words")
    zero_vec_tensor = T.vector()
    zero_vec = np.zeros(img_w)
    set_zero = theano.function([zero_vec_tensor],
                               updates=[
                                   (Words,
                                    T.set_subtensor(Words[0, :],
                                                    zero_vec_tensor))
                               ],
                               allow_input_downcast=True)
    layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape(
        (x.shape[0], 1, x.shape[1], Words.shape[1]))
    conv_layers = []
    layer1_inputs = []
    for i in xrange(len(filter_hs)):
        filter_shape = filter_shapes[i]
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng,
                                        input=layer0_input,
                                        image_shape=(batch_size, 1, img_h,
                                                     img_w),
                                        filter_shape=filter_shape,
                                        poolsize=pool_size,
                                        non_linear=conv_non_linear)
        layer1_input = conv_layer.output.flatten(2)
        conv_layers.append(conv_layer)
        layer1_inputs.append(layer1_input)
    layer1_input = T.concatenate(layer1_inputs, 1)
    hidden_units[0] = feature_maps * len(filter_hs)
    classifier = MLPDropout(rng,
                            input=layer1_input,
                            layer_sizes=hidden_units,
                            activations=activations,
                            dropout_rates=dropout_rate)

    #define parameters of the model and update functions using adadelta
    params = classifier.params
    for conv_layer in conv_layers:
        params += conv_layer.params
    if non_static:
        #if word vectors are allowed to change, add them as model parameters
        params += [Words]
    cost = classifier.negative_log_likelihood(y)
    dropout_cost = classifier.dropout_negative_log_likelihood(y)
    grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6,
                                        sqr_norm_lim)

    #shuffle dataset and assign to mini batches. if dataset size is not a multiple of mini batches, replicate
    #extra data (at random)
    np.random.seed(3435)
    if datasets[0].shape[0] % batch_size > 0:
        extra_data_num = batch_size - datasets[0].shape[0] % batch_size
        train_set = np.random.permutation(datasets[0])
        extra_data = train_set[:extra_data_num]
        new_data = np.append(datasets[0], extra_data, axis=0)
    else:
        new_data = datasets[0]
    new_data = np.random.permutation(new_data)
    n_batches = new_data.shape[0] / batch_size
    n_train_batches = int(np.round(n_batches * 0.9))
    #divide train set into train/val sets
    test_set_x = datasets[1][:, :img_h]
    test_set_y = np.asarray(datasets[1][:, -1], "int32")
    train_set = new_data[:n_train_batches * batch_size, :]
    val_set = new_data[n_train_batches * batch_size:, :]
    train_set_x, train_set_y = shared_dataset(
        (train_set[:, :img_h], train_set[:, -1]))
    val_set_x, val_set_y = shared_dataset((val_set[:, :img_h], val_set[:, -1]))
    n_val_batches = n_batches - n_train_batches
    val_model = theano.function(
        [index],
        classifier.errors(y),
        givens={
            x: val_set_x[index * batch_size:(index + 1) * batch_size],
            y: val_set_y[index * batch_size:(index + 1) * batch_size]
        },
        allow_input_downcast=True)

    #compile theano functions to get train/val/test errors
    test_model = theano.function(
        [index],
        classifier.errors(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        },
        allow_input_downcast=True)
    train_model = theano.function(
        [index],
        cost,
        updates=grad_updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        },
        allow_input_downcast=False)
    test_pred_layers = []
    test_size = test_set_x.shape[0]
    test_layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape(
        (test_size, 1, img_h, Words.shape[1]))
    for conv_layer in conv_layers:
        test_layer0_output = conv_layer.predict(test_layer0_input, test_size)
        test_pred_layers.append(test_layer0_output.flatten(2))
    test_layer1_input = T.concatenate(test_pred_layers, 1)
    test_y_pred = classifier.predict(test_layer1_input)
    test_error = T.mean(T.neq(test_y_pred, y))
    test_model_all = theano.function([x, y],
                                     test_error,
                                     allow_input_downcast=True)

    #start training over mini-batches
    print '... training'
    epoch = 0
    best_val_perf = 0
    val_perf = 0
    test_perf = 0
    cost_epoch = 0
    while (epoch < n_epochs):
        start_time = time.time()
        epoch = epoch + 1
        if shuffle_batch:
            for minibatch_index in np.random.permutation(
                    range(n_train_batches)):
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
        else:
            for minibatch_index in xrange(n_train_batches):
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
        train_losses = [test_model(i) for i in xrange(n_train_batches)]
        train_perf = 1 - np.mean(train_losses)
        val_losses = [val_model(i) for i in xrange(n_val_batches)]
        val_perf = 1 - np.mean(val_losses)
        print(
            'epoch: %i, training time: %.2f secs, train perf: %.2f %%, val perf: %.2f %%'
            % (epoch, time.time() - start_time, train_perf * 100.,
               val_perf * 100.))
        if val_perf >= best_val_perf:
            best_val_perf = val_perf
            test_loss = test_model_all(test_set_x, test_set_y)
            test_perf = 1 - test_loss
    return test_perf