コード例 #1
0
ファイル: memory.py プロジェクト: strategist922/MEM_DGM
 def get_output_for(self, input, **kwargs):
     assert input.ndim == 2
     activation = T.dot(input, self.C)
     if self.b is not None:
         activation = activation + self.b.dimshuffle('x', 0)
     return self.nonlinearity_final(
         nonlinearities.sigmoid(activation).dot(self.M))
コード例 #2
0
    def __init__(self, GenerativeParams, xDim, yDim):

        super(SigmoidGenerative, self).__init__(GenerativeParams,xDim,yDim)

        layer_size = GenerativeParams['layer_size']
        
        self.un_base_bias = theano.shared(value=np.ones([1,xDim]).astype(theano.config.floatX))
        self.base_bias = sigmoid(self.un_base_bias)
        
        sbn_nn = lasagne.layers.InputLayer((None, xDim))
        for ls in layer_size:
            sbn_nn = SigmoidBernoulli(sbn_nn, ls)
        self.sbn_nn = SigmoidBernoulli(sbn_nn, yDim)
コード例 #3
0
    def __init__(self, GenerativeParams, xDim, yDim):

        super(SigmoidGenerative, self).__init__(GenerativeParams, xDim, yDim)

        layer_size = GenerativeParams['layer_size']

        self.un_base_bias = theano.shared(
            value=np.ones([1, xDim]).astype(theano.config.floatX))
        self.base_bias = sigmoid(self.un_base_bias)

        sbn_nn = lasagne.layers.InputLayer((None, xDim))
        for ls in layer_size:
            sbn_nn = SigmoidBernoulli(sbn_nn, ls)
        self.sbn_nn = SigmoidBernoulli(sbn_nn, yDim)
コード例 #4
0
ファイル: attention_layer.py プロジェクト: imoonkey/thin_nn
    def get_output_for(self, input, only_at_anchor=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        # ## calculate attention anchor position based on atw, atb and input x
        # at_anchor = nonlinearities.rectify(T.dot(input, self.atw) + self.atb[0])
        # at_anchor = T.minimum(at_anchor, 1)
        at_anchor = nonlinearities.sigmoid(T.dot(input, self.atw) + self.atb[0])
        at_anchor *= self.num_units

        self.at_anchor = at_anchor  # for printing
        # print_op = printing.Print('attention')
        # at_anchor = print_op(at_anchor)
        if only_at_anchor:
            return at_anchor

        # ## normal dense layer activation output
        activation = T.dot(input, self.W)

        if self.b is not None:
            activation = activation + self.b.dimshuffle('x', 0)

        out = self.nonlinearity(activation)

        ### multiply activation with attention weight
        attention = T.exp(
            self.at_decay * (
                T.arange(0, self.num_units).dimshuffle('x', 0) -
                at_anchor.dimshuffle(0, 'x')
            ) ** 2)

        ## Truncation
        if self.hard_threshold:
            attention = T.maximum(attention - self.hard_threshold, 0)
        out *= attention
        return out
コード例 #5
0
ファイル: highway.py プロジェクト: XuezheMax/NeuroNLP
    def get_output_for(self, input, **kwargs):
        # if the input has more than two dimensions, flatten it into a
        # batch of feature vectors.
        input_reshape = input.flatten(2) if input.ndim > 2 else input

        activation = T.dot(input_reshape, self.W_h)
        if self.b_h is not None:
            activation = activation + self.b_h.dimshuffle('x', 0)
            activation = self.nonlinearity(activation)

        transform = T.dot(input_reshape, self.W_t)
        if self.b_t is not None:
            transform = transform + self.b_t.dimshuffle('x', 0)
            transform = nonlinearities.sigmoid(transform)

        carry = 1.0 - transform

        output = activation * transform + input_reshape * carry
        # reshape output back to orignal input_shape
        if input.ndim > 2:
            output = T.reshape(output, input.shape)

        return output
コード例 #6
0
ファイル: highway.py プロジェクト: kurtespinosa/ner
    def get_output_for(self, input, **kwargs):
        # if the input has more than two dimensions, flatten it into a
        # batch of feature vectors.
        input_reshape = input.flatten(2) if input.ndim > 2 else input

        activation = T.dot(input_reshape, self.W_h)
        if self.b_h is not None:
            activation = activation + self.b_h.dimshuffle('x', 0)
            activation = self.nonlinearity(activation)

        transform = T.dot(input_reshape, self.W_t)
        if self.b_t is not None:
            transform = transform + self.b_t.dimshuffle('x', 0)
            transform = nonlinearities.sigmoid(transform)

        carry = 1.0 - transform

        output = activation * transform + input_reshape * carry
        # reshape output back to orignal input_shape
        if input.ndim > 2:
            output = output.reshape(input.shape)

        return output
コード例 #7
0
 def get_output_for(self, input, **kwargs):
     activation = T.dot(input, self.C)
     if self.b is not None:
         activation = activation + self.b.dimshuffle('x', 0)
     return nonlinearities.sigmoid(activation)
コード例 #8
0
def main():
    input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
    target_var = T.ivector(name='targets')

    layer_input = lasagne.layers.InputLayer(shape=(None, LENGTH, 1),
                                            input_var=input_var,
                                            name='input')

    layer_rnn = RecurrentLayer(layer_input,
                               NUM_UNITS,
                               nonlinearity=nonlinearities.tanh,
                               only_return_final=True,
                               W_in_to_hid=lasagne.init.Constant(1),
                               W_hid_to_hid=lasagne.init.Constant(2),
                               b=None,
                               name='RNN')
    W = layer_rnn.W_hid_to_hid
    U = layer_rnn.W_in_to_hid

    output = lasagne.layers.get_output(layer_rnn)
    output = output.mean(axis=1)
    prediction = T.switch(T.gt(output, 0), 1, -1)
    acc = T.eq(prediction, target_var)
    acc = acc.sum()
    # get the output before activation function tanh
    epsilon = 1e-6
    prob = 0.5 * T.log((1 + output + epsilon) / (1 - output + epsilon))
    prob = nonlinearities.sigmoid(prob)
    loss = -0.5 * ((1 + target_var) * T.log(prob) +
                   (1 - target_var) * T.log(1 - prob))
    loss = loss.sum()

    batch_size = 100
    learning_rate = 0.01
    steps_per_epoch = 1000
    params = lasagne.layers.get_all_params(layer_rnn, trainable=True)
    updates = lasagne.updates.sgd(loss,
                                  params=params,
                                  learning_rate=learning_rate)
    train_fn = theano.function([input_var, target_var],
                               [loss, acc, W, U, output],
                               updates=updates)

    for epoch in range(3):
        print 'Epoch %d (learning rate=%.4f)' % (epoch, learning_rate)
        loss = 0.0
        correct = 0.0
        num_back = 0
        for step in range(steps_per_epoch):
            x, y = get_batch(batch_size)
            err, corr, w, u, pred = train_fn(x, y)
            # print x
            # print y
            # print pred
            loss += err
            correct += corr
            num_inst = (step + 1) * batch_size
            # update log
            sys.stdout.write("\b" * num_back)
            log_info = 'inst: %d loss: %.4f, corr: %d, acc: %.2f%%, W: %.6f, U: %.6f' % (
                num_inst, loss / num_inst, correct, correct * 100 / num_inst,
                w.sum(), u.sum())
            sys.stdout.write(log_info)
            num_back = len(log_info)
            # raw_input()
        # update training log after each epoch
        sys.stdout.write("\b" * num_back)
        assert num_inst == batch_size * steps_per_epoch
        print 'inst: %d loss: %.4f, corr: %d, acc: %.2f%%' % (
            num_inst, loss / num_inst, correct, correct * 100 / num_inst)
コード例 #9
0
        def step(input_n, cell_previous, hid_previous, avg_previous, *args):
            x = input_n
            if not self.precompute_input:
                input_n = T.dot(input_n, W_in_stacked) + b_stacked

            # Calculate gates pre-activations and slice
            gates = input_n + T.dot(hid_previous, W_hid_stacked)

            # Clip gradients
            if self.grad_clipping:
                gates = theano.gradient.grad_clip(gates, -self.grad_clipping,
                                                  self.grad_clipping)

            # Extract the pre-activation gate values
            ingate = slice_w(gates, 0)
            forgetgate = slice_w(gates, 1)
            cell_input = slice_w(gates, 2)
            outgate = slice_w(gates, 3)

            if self.peepholes:
                # Compute peephole connections
                ingate += cell_previous * self.W_cell_to_ingate
                forgetgate += cell_previous * self.W_cell_to_forgetgate

            # Apply nonlinearities
            ingate = self.nonlinearity_ingate(ingate)
            forgetgate = self.nonlinearity_forgetgate(forgetgate)
            cell_input = self.nonlinearity_cell(cell_input)

            # Compute new cell value
            cell = forgetgate * cell_previous + ingate * cell_input

            if self.peepholes:
                outgate += cell * self.W_cell_to_outgate
            outgate = self.nonlinearity_outgate(outgate)

            # Compute new hidden unit activation
            hid = outgate * self.nonlinearity(cell)

            avg_input = T.dot(x, self.W_avg1) + T.dot(hid,
                                                      self.W_avg2) + self.b_avg
            if self.model_type == 1:
                avg = x * nonlinearities.sigmoid(avg_input)
            elif self.model_type == 6:
                avg = nonlinearities.tanh(avg_input)
            elif self.model_type == 7:
                avg_input = T.dot(x, self.W_avg1) * T.dot(
                    hid, self.W_avg2) + self.b_avg
                avg = x * nonlinearities.sigmoid(avg_input)
            elif self.model_type == 2:
                avg = hid * nonlinearities.sigmoid(avg_input)
            elif self.model_type == 3:
                avg_input2 = T.dot(x, self.W_avg12) + T.dot(
                    hid, self.W_avg22) + self.b_avg2
                g1 = nonlinearities.sigmoid(avg_input)
                g2 = nonlinearities.sigmoid(avg_input2)
                avg = avg_previous * g1 + x * g2
            elif self.model_type == 4:
                avg_input = T.dot(
                    x, self.W_avg1) + T.dot(hid, self.W_avg2) + T.dot(
                        avg_previous, self.W_avg3) + self.b_avg
                avg_input2 = T.dot(
                    x, self.W_avg12) + T.dot(hid, self.W_avg22) + T.dot(
                        avg_previous, self.W_avg32) + self.b_avg2
                g1 = nonlinearities.sigmoid(avg_input)
                g2 = nonlinearities.sigmoid(avg_input2)
                avg = avg_previous * g1 + x * g2
            elif self.model_type == 5:
                avg_input2 = T.dot(x, self.W_avg12) + T.dot(
                    hid, self.W_avg22) + self.b_avg2
                g1 = nonlinearities.sigmoid(avg_input)
                g2 = nonlinearities.sigmoid(avg_input2)
                avg = x * g1
                havg = hid * g2
                avg = avg + havg
            return [cell, hid, avg]
コード例 #10
0
ファイル: nn_lung.py プロジェクト: ericsolo/python
 def get_output_for(self, input, **kwargs):
     if apply_nl:
         ps = nonlinearities.sigmoid(input)
     prod = T.prod(ps, axis=(1,2))
     output = 1 - prod
     return output
コード例 #11
0
ファイル: analysis_memory.py プロジェクト: thu-ml/MEM_DGM
 def get_output_for(self, input, **kwargs):
     activation = T.dot(input, self.C)
     if self.b is not None:
         activation = activation + self.b.dimshuffle('x', 0)
     return nonlinearities.sigmoid(activation)
コード例 #12
0
ファイル: DMNLayer.py プロジェクト: danstrawser/Nlp2Commands
        def step(input_n, hid_previous_total, *args):
            print("317 into step")
            print(" type input n: ", type(input_n))
            
            hid_previous_facts = hid_previous_total[0:self.num_hidden_units_h]
            hid_previous_brain = hid_previous_total[self.num_hidden_units_h:]
            
            self.cur_sequence_idx += 1  # Updates where we are at in the sequence
                                
            # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1}, and W_{hc} h_{t - 1}
            hid_input_facts = T.dot(hid_previous_facts, W_hid_stacked)

            if self.grad_clipping:
                input_n = theano.gradient.grad_clip(
                    input_n, -self.grad_clipping, self.grad_clipping)
                hid_input_facts = theano.gradient.grad_clip(
                    hid_input_facts, -self.grad_clipping, self.grad_clipping)

            if not self.precompute_input:
                # Compute W_{xr}x_t + b_r, W_{xu}x_t + b_u, and W_{xc}x_t + b_c
                input_n = T.dot(input_n, W_in_stacked) + b_stacked  # DS Note:  accomplishes the multiplication AND adds bias

            # Reset and update gates
            resetgate = slice_w_h(hid_input_facts, 0) + slice_w_h(input_n, 0)
            updategate = slice_w_h(hid_input_facts, 1) + slice_w_h(input_n, 1)
            resetgate = self.nonlinearity_resetgate(resetgate)
            updategate = self.nonlinearity_updategate(updategate)

            # DS Edit: DynamMemNet modifiers
            m_dmn = hid_previous_brain  # Note that this should have size 
            c_dmn = input_n  # This is a TesnorType<float64, row>
            q_dmn = self.question_layer  # This is a lasagne recurrent GRU layer
                        
            print(" entering 344")
            # DS Note:  I believe this has size 9 x size(m_dmn)==size(cdmn)
#             z_dmn = [c_dmn, m_dmn, q_dmn, c_dmn * q_dmn, abs(c_dmn - q_dmn), abs(c_dmn - m_dmn), T.dot(c_dmn.T, T.dot(self.W_dmn_b, q_dmn)), 
#                         T.dot(c_dmn.T, T.dot(self.W_dmn_b, m_dmn))]
#             
            z_dmn = T.concatenate([c_dmn, m_dmn, q_dmn, c_dmn * q_dmn, abs(c_dmn - q_dmn), abs(c_dmn - m_dmn), T.dot(c_dmn.T, T.dot(self.W_dmn_b, q_dmn)),
                        T.dot(c_dmn.T, T.dot(self.W_dmn_b, m_dmn))], axis=1)
            G_dmn = nonlinearities.sigmoid(T.dot(self.W_dmn_2, nonlinearities.tanh(T.dot(self.W_dmn_1, z_dmn)) + self.b_dmn_1) + self.b_dmn_2)
            # Note, you also need W_b for the c and q elements.
            
            # Compute W_{xc}x_t + r_t \odot (W_{hc} h_{t - 1})
            hidden_update_in = slice_w_h(input_n, 2)
            hidden_update_hid = slice_w_h(hid_input_facts, 2)
            hidden_update_facts = hidden_update_in + resetgate * hidden_update_hid
            if self.grad_clipping:
                hidden_update_facts = theano.gradient.grad_clip(
                    hidden_update_facts, -self.grad_clipping, self.grad_clipping)
            hidden_update_facts = self.nonlinearity_hid(hidden_update_facts)

            # Compute (1 - u_t)h_{t - 1} + u_t c_t
            hid = (1 - updategate) * hid_previous_facts + updategate * hidden_update_facts  # This is the GRU_fact output
            output_dmn = G_dmn * hid + (1 - G_dmn) * hid_previous_facts  # This is the output of the Dynamic Memory Net modified GRU, Eq. (5)
            
            # UPDATE THE BRAIN
            # We update the brain parameters if the current idx is equal to the sent len
            if self.cur_sequence_idx == self.max_seqlen:
                hid_input_brain = T.dot(hid_previous_brain, W_brain_hid_stacked)            
            
                if self.grad_clipping:
                    input_to_brain = theano.gradient.grad_clip(
                        output_dmn, -self.grad_clipping, self.grad_clipping)
                    hid_input_brain = theano.gradient.grad_clip(
                        hid_input_brain, -self.grad_clipping, self.grad_clipping)
                else:
                    input_to_brain = output_dmn

                if not self.precompute_input:
                    # Compute W_{xr}x_t + b_r, W_{xu}x_t + b_u, and W_{xc}x_t + b_c
                    input_to_brain = T.dot(input_to_brain, W_brain_in_stacked) + b_brain_stacked  # DS Note:  accomplishes the multiplication AND adds bias
    
                # Reset and update gates
                resetgate_brain = slice_w_m(hid_input_brain, 0) + slice_w_m(input_to_brain, 0)
                updategate_brain = slice_w_m(hid_input_brain, 1) + slice_w_m(input_to_brain, 1)
                resetgate_brain = self.nonlinearity_brain_resetgate(resetgate_brain)
                updategate_brain = self.nonlinearity_brain_updategate(updategate_brain)

                hidden_update_in_brain = slice_w_m(input_to_brain, 2)
                hidden_update_brain = slice_w_m(hid_input_brain, 2)
                
                hidden_update_brain = hidden_update_in_brain + resetgate_brain * hidden_update_brain
                
                if self.grad_clipping:
                    hidden_update_brain = theano.gradient.grad_clip(hidden_update_brain, -self.grad_clipping, self.grad_clipping)
                hidden_update_brain = self.nonlinearity_brain_hid_update(hidden_update_brain)
                
                hid_brain = (1 - updategate_brain) * hid_previous_brain + updategate_brain * hidden_update_brain                
            else:                
                hid_brain = hid_previous_brain
            
            # TODO: DS:  ERROR IS HERE
            output_dmn = T.concatenate([output_dmn, hid_brain], axis=1) 
           
            print(" 412 out of step") 
            return output_dmn
コード例 #13
0
def swish(x):
    """"""
    return x * nl.sigmoid(x)
コード例 #14
0
    def get_output_for(self, input, **kwargs):

        ps = nonlinearities.sigmoid(input)
        sum_p_r_benign = T.sum(ps,axis=1)
        sum_log = T.sum(T.log(1-ps+1.e-12),axis=1)
        return T.concatenate([sum_log, sum_p_r_benign])
コード例 #15
0
def safe_sigmoid(x, eps=1e-6):
    return T.clip(sigmoid(x), eps, 1 - eps)
コード例 #16
0
 def get_output_for(self, input, **kwargs):
     ps = nonlinearities.sigmoid(input)
     powd = ps ** self.exp
     tmean = T.mean(powd, axis=(1,2))
     return tmean
コード例 #17
0
 def get_output_for(self, input, **kwargs):
     if self.apply_nl:
         ps = nonlinearities.sigmoid(input)
     prod = T.prod(ps, axis=(1,2))
     output = 1 - prod
     return output
コード例 #18
0
        def step(input_n, hid_previous_total, *args):
            
            hid_previous_facts = hid_previous_total[0:self.num_hidden_units_h]
            hid_previous_brain = hid_previous_total[self.num_hidden_units_h:]
            
            self.cur_sequence_idx += 1  # Updates where we are at in the sequence
            
            # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1}, and W_{hc} h_{t - 1}
            hid_input_facts = T.dot(hid_previous_facts, W_hid_stacked)


            if self.grad_clipping:
                input_n = theano.gradient.grad_clip(
                    input_n, -self.grad_clipping, self.grad_clipping)
                hid_input_facts = theano.gradient.grad_clip(
                    hid_input_facts, -self.grad_clipping, self.grad_clipping)

            if not self.precompute_input:
                # Compute W_{xr}x_t + b_r, W_{xu}x_t + b_u, and W_{xc}x_t + b_c
                input_n = T.dot(input_n, W_in_stacked) + b_stacked  # DS Note:  accomplishes the multiplication AND adds bias

            # Reset and update gates
            resetgate = slice_w_h(hid_input_facts, 0) + slice_w_h(input_n, 0)
            updategate = slice_w_h(hid_input_facts, 1) + slice_w_h(input_n, 1)
            resetgate = self.nonlinearity_resetgate(resetgate)
            updategate = self.nonlinearity_updategate(updategate)
            
            # DS Edit: DynamMemNet modifiers
            m_dmn = hid_previous_brain  # Note that this should have size 
            c_dmn = input_n  # This is a TesnorType<float64, row>
            q_dmn = self.question_layer  # This is a lasagne recurrent GRU layer

            z_dmn = T.concatenate([c_dmn, m_dmn, q_dmn, c_dmn * q_dmn, abs(c_dmn - q_dmn), abs(c_dmn - m_dmn), T.dot(c_dmn.T, T.dot(self.W_dmn_b, q_dmn)),
                         T.dot(c_dmn.T, T.dot(self.W_dmn_b, m_dmn))], axis=1)
            G_dmn = nonlinearities.sigmoid(T.dot(self.W_dmn_2, nonlinearities.tanh(T.dot(self.W_dmn_1, z_dmn)) + self.b_dmn_1) + self.b_dmn_2)
            # Note, you also need W_b for the c and q elements.
            #something_else = T.dot(hid_previous_facts, W_hid_stacked)
            hidden_update_in = slice_w_h(input_n, 2)
            hidden_update_hid = slice_w_h(hid_input_facts, 2)
            hidden_update_facts = hidden_update_in + resetgate * hidden_update_hid
            if self.grad_clipping:
                hidden_update_facts = theano.gradient.grad_clip(
                    hidden_update_facts, -self.grad_clipping, self.grad_clipping)
            hidden_update_facts = self.nonlinearity_hid(hidden_update_facts)

            # Compute (1 - u_t)h_{t - 1} + u_t c_t
            hid = (1 - updategate) * hid_previous_facts + updategate * hidden_update_facts  # This is the GRU_fact output
            #output_dmn = G_dmn * hid + (1 - G_dmn) * hid_previous_facts  # This is the output of the Dynamic Memory Net modified GRU, Eq. (5)
            output_dmn = hid
                        
#             if self.cur_sequence_idx == self.max_seqlen:
#                 hid_input_brain = T.dot(hid_previous_brain, W_brain_hid_stacked)            
#             
#                 if self.grad_clipping:
#                     input_to_brain = theano.gradient.grad_clip(
#                         output_dmn, -self.grad_clipping, self.grad_clipping)
#                     hid_input_brain = theano.gradient.grad_clip(
#                         hid_input_brain, -self.grad_clipping, self.grad_clipping)
#                 else:
#                     input_to_brain = output_dmn
#                     
#                 if not self.precompute_input:
#                     # Compute W_{xr}x_t + b_r, W_{xu}x_t + b_u, and W_{xc}x_t + b_c
#                     input_to_brain = T.dot(input_to_brain, W_brain_in_stacked) + b_brain_stacked  # DS Note:  accomplishes the multiplication AND adds bias
#             
#                 # Reset and update gates
#                 resetgate_brain = slice_w_m(hid_input_brain, 0) + slice_w_m(input_to_brain, 0)
#                 updategate_brain = slice_w_m(hid_input_brain, 1) + slice_w_m(input_to_brain, 1)
#                 resetgate_brain = self.nonlinearity_brain_resetgate(resetgate_brain)
#                 updategate_brain = self.nonlinearity_brain_updategate(updategate_brain)
#             
#                 hidden_update_in_brain = slice_w_m(input_to_brain, 2)
#                 hidden_update_brain = slice_w_m(hid_input_brain, 2)
#                 hidden_update_brain = hidden_update_in_brain + resetgate_brain * hidden_update_brain
#                 
#                 if self.grad_clipping:
#                     hidden_update_brain = theano.gradient.grad_clip(hidden_update_brain, -self.grad_clipping, self.grad_clipping)
#                 hidden_update_brain = self.nonlinearity_brain_hid_update(hidden_update_brain)
#                 
#                 hid_brain = (1 - updategate_brain) * hid_previous_brain + updategate_brain * hidden_update_brain
#             
#             else:                
#             
            hid_brain = hid_previous_brain
                              
            return T.concatenate([output_dmn, hid_brain], axis=1)
コード例 #19
0
ファイル: nn_lung.py プロジェクト: ericsolo/python
    def get_output_for(self, input, **kwargs):

        ps = nonlinearities.sigmoid(input)
        sum_p_r_benign = T.sum(ps,axis=1)
        sum_log = T.sum(T.log(1-ps+1.e-12),axis=1)
        return T.concatenate([sum_log, sum_p_r_benign])
コード例 #20
0
ファイル: nn_lung.py プロジェクト: ericsolo/python
 def get_output_for(self, input, **kwargs):
     ps = nonlinearities.sigmoid(input)
     powd = ps ** self.exp
     tmean = T.mean(powd, axis=(1,2))
     return tmean
コード例 #21
0
def output_layer_nonlinearity(x):
    return T.clip(sigmoid(x),1e-5,1.0-1e-4)
コード例 #22
0
ファイル: qrnn.py プロジェクト: cedricdeboom/Quasi-RNN
 def get_output_for(self, inputs, **kwargs):
     num_batch, _, _ = inputs.shape
     
     #add padded zeros in front of sequence
     padded_input = T.concatenate([T.zeros((num_batch, self.filter_width - 1, self.original_features)), inputs], axis=1)
     
     #reshape input to include 1 filter dimension
     rs = padded_input.dimshuffle([0, 'x', 1, 2])
     
     #apply convolutions for all "gates" (output = (n_batch, n_filters, n_time_steps, 1))
     Z = nonlinearities.tanh(T.nnet.conv2d(rs, self.Z_W,
                                           input_shape=(None, 1, self.internal_seq_len, self.original_features),
                                           filter_shape=(self.num_units, 1, self.filter_width, self.original_features)))
     F = nonlinearities.sigmoid(T.nnet.conv2d(rs, self.F_W,
                                           input_shape=(None, 1, self.internal_seq_len, self.original_features),
                                           filter_shape=(self.num_units, 1, self.filter_width, self.original_features)))
     
     if self.pooling == 'fo' or self.pooling == 'ifo':
         O = nonlinearities.sigmoid(T.nnet.conv2d(rs, self.O_W,
                                           input_shape=(None, 1, self.internal_seq_len, self.original_features),
                                           filter_shape=(self.num_units, 1, self.filter_width, self.original_features)))
     if self.pooling == 'ifo':
         I = nonlinearities.sigmoid(T.nnet.conv2d(rs, self.I_W,
                                           input_shape=(None, 1, self.internal_seq_len, self.original_features),
                                           filter_shape=(self.num_units, 1, self.filter_width, self.original_features)))
     
     # Because scan iterates over the first dimension we dimshuffle to
     # (n_time_steps, n_batch, n_features)
     Z = Z.flatten(ndim=3)
     Z = Z.dimshuffle([2, 0, 1])
     F = F.flatten(ndim=3)
     F = F.dimshuffle([2, 0, 1])
     if self.pooling == 'fo' or self.pooling == 'ifo':
         O = O.flatten(ndim=3)
         O = O.dimshuffle([2, 0, 1])
     if self.pooling == 'ifo':
         I = I.flatten(ndim=3)
         I = I.dimshuffle([2, 0, 1])
     
     # Dot against a 1s vector to repeat to shape (num_batch, num_units)
     ones = T.ones((num_batch, 1))
     hid_init = T.dot(ones, self.hid_init)
     
     # Create single recurrent computation step function
     # input_n is the n'th vector of the input: (n_batch, n_features)
     def step_f(forget_n, z_n, hid_previous, *args):
         return forget_n * hid_previous + (1.0 - forget_n) * z_n
     def step_fo(forget_n, z_n, o_n, hid_previous, cell_previous, *args):
         cell_current = forget_n * cell_previous + (1.0 - forget_n) * z_n
         hid_current = o_n * cell_current
         return [hid_current, cell_current]
     def step_ifo(forget_n, z_n, o_n, i_n, hid_previous, cell_previous, *args):
         cell_current = forget_n * cell_previous + i_n * z_n
         hid_current = o_n * cell_current
         return [hid_current, cell_current]
     
     if self.pooling == 'f':
         step = step_f
         sequences = [F, Z]
         outputs_info = [hid_init]
     if self.pooling == 'fo':
         step = step_fo
         sequences = [F, Z, O]
         # Note that, below, we use hid_init as the initial /cell/ state!
         # That way we only need to declare one set of weights
         outputs_info = [T.zeros((num_batch, self.num_units)), hid_init]
     if self.pooling == 'ifo':
         step = step_ifo
         sequences = [F, Z, O, I]
         outputs_info = [T.zeros((num_batch, self.num_units)), hid_init]
     
     outputs = theano.scan(
             fn=step,
             sequences=sequences,
             outputs_info=outputs_info,
             strict=True)[0]
     
     hid_out = outputs
     if self.pooling == 'fo' or self.pooling == 'ifo':
         hid_out = outputs[0]
     
     # Shuffle back to (n_batch, n_time_steps, n_features)
     hid_out = hid_out.dimshuffle([1, 0, 2])
     return hid_out
コード例 #23
0
    def get_output_for(self, inputs, deterministic=False, **kwargs):
        input = inputs[0]
        time_input = inputs[self.time_incoming_idx]
        event_input = inputs[self.event_incoming_idx]

        mask = None
        hid_init = None
        cell_init = None
        if self.mask_incoming_index > 0:
            mask = inputs[self.mask_incoming_index]
        if self.hid_init_incoming_index > 0:
            hid_init = inputs[self.hid_init_incoming_index]
        if self.cell_init_incoming_index > 0:
            cell_init = inputs[self.cell_init_incoming_index]

        if self.bn:
            input = self.bn.get_output_for(input)

        input = input.dimshuffle(1, 0, 2)
        time_input = time_input.dimshuffle(1, 0)

        seq_len, num_batch, _ = input.shape

        # Stack input weight matrices into a (num_inputs, 4*num_units)
        # matrix, which speeds up computation
        W_in_stacked = T.concatenate([
            self.W_in_to_ingate, self.W_in_to_forgetgate, self.W_in_to_cell,
            self.W_in_to_outgate
        ],
                                     axis=1)

        # Same for hidden weight matrices
        W_hid_stacked = T.concatenate([
            self.W_hid_to_ingate, self.W_hid_to_forgetgate, self.W_hid_to_cell,
            self.W_hid_to_outgate
        ],
                                      axis=1)

        # Stack biases into a (4*num_units) vector
        b_stacked = T.concatenate(
            [self.b_ingate, self.b_forgetgate, self.b_cell, self.b_outgate],
            axis=0)

        input = T.dot(input, W_in_stacked) + b_stacked

        # PHASED LSTM: If test time, off-phase means really shut.
        if deterministic:
            print('Using true off for testing.')
            off_slope = 0.0
        else:
            print('Using {} for off_slope.'.format(self.off_alpha))
            off_slope = self.off_alpha

        if self.model != 'LSTM':
            # PHASED LSTM: Pregenerate broadcast vars.
            #   Same neuron in different batches has same shift and period.  Also,
            #   precalculate the middle (on_mid) and end (on_end) of the open-phase
            #   ramp.
            shift_broadcast = self.shift_timegate.dimshuffle(['x', 0])
            period_broadcast = T.abs_(self.period_timegate.dimshuffle(['x',
                                                                       0]))
            on_mid_broadcast = T.abs_(self.on_end_timegate.dimshuffle(
                ['x', 0])) * 0.5 * period_broadcast
            on_end_broadcast = T.abs_(self.on_end_timegate.dimshuffle(
                ['x', 0])) * period_broadcast

        if self.model == 'HELSTM':
            event_W = self.event_w_timegate
            event_b = T.shape_padleft(self.event_b_timegate, 2)
            out_W = self.out_w_timegate
            out_b = T.shape_padleft(self.out_b_timegate, 2)
            hid_attention = nonlinearities.leaky_rectify(
                T.dot(event_input, event_W) + event_b)
            out_attention = nonlinearities.sigmoid(
                T.dot(hid_attention, out_W) + out_b)
            out_attention = out_attention.dimshuffle(1, 0, 2)

        def slice_w(x, n):
            return x[:, n * self.num_units:(n + 1) * self.num_units]

        def step(input_n, cell_previous, hid_previous, *args):
            gates = input_n + T.dot(hid_previous, W_hid_stacked)

            # Clip gradients
            if self.grad_clipping:
                gates = theano.gradient.grad_clip(gates, -self.grad_clipping,
                                                  self.grad_clipping)

            # Extract the pre-activation gate values
            ingate = slice_w(gates, 0)
            forgetgate = slice_w(gates, 1)
            cell_input = slice_w(gates, 2)
            outgate = slice_w(gates, 3)

            if self.peepholes:
                # Compute peephole connections
                ingate += cell_previous * self.W_cell_to_ingate
                forgetgate += cell_previous * self.W_cell_to_forgetgate

            # Apply nonlinearities
            ingate = self.nonlinearity_ingate(ingate)
            forgetgate = self.nonlinearity_forgetgate(forgetgate)
            cell_input = self.nonlinearity_cell(cell_input)

            # Mix in new stuff
            cell = forgetgate * cell_previous + ingate * cell_input

            if self.peepholes:
                outgate += cell * self.W_cell_to_outgate
            outgate = self.nonlinearity_outgate(outgate)

            # Compute new hidden unit activation
            hid = outgate * self.nonlinearity(cell)
            return [cell, hid]

        # PHASED LSTM: The actual calculation of the time gate
        def calc_time_gate(time_input_n):
            # Broadcast the time across all units
            t_broadcast = time_input_n.dimshuffle([0, 'x'])
            # Get the time within the period
            in_cycle_time = T.mod(t_broadcast + shift_broadcast,
                                  period_broadcast)
            # Find the phase
            is_up_phase = T.le(in_cycle_time, on_mid_broadcast)
            is_down_phase = T.gt(in_cycle_time, on_mid_broadcast) * T.le(
                in_cycle_time, on_end_broadcast)

            # Set the mask
            sleep_wake_mask = T.switch(
                is_up_phase, in_cycle_time / on_mid_broadcast,
                T.switch(is_down_phase,
                         (on_end_broadcast - in_cycle_time) / on_mid_broadcast,
                         off_slope * (in_cycle_time / period_broadcast)))

            return sleep_wake_mask

        #HELSTM: Mask the updates based on the time phase and event attention
        def step_masked(input_n, time_input_n, event_input_n, mask_n,
                        cell_previous, hid_previous, *args):
            cell, hid = step(input_n, cell_previous, hid_previous, *args)

            if self.model != 'LSTM':
                # Get time gate openness
                sleep_wake_mask = calc_time_gate(time_input_n)

                if self.model == 'HELSTM':
                    sleep_wake_mask = event_input_n * sleep_wake_mask

                # Sleep if off, otherwise stay a bit on
                cell = sleep_wake_mask * cell + (
                    1. - sleep_wake_mask) * cell_previous
                hid = sleep_wake_mask * hid + (1. -
                                               sleep_wake_mask) * hid_previous

            #Skip over any input with mask 0 by copying the previous
            #hidden state; proceed normally for any input with mask 1.
            cell = T.switch(mask_n, cell, cell_previous)
            hid = T.switch(mask_n, hid, hid_previous)

            return [cell, hid]

        if mask is not None:
            # mask is given as (batch_size, seq_len). Because scan iterates
            # over first dimension, we dimshuffle to (seq_len, batch_size) and
            # add a broadcastable dimension
            mask = mask.dimshuffle(1, 0, 'x')
        else:
            mask = T.ones_like(time_input).dimshuffle(0, 1, 'x')

        if self.model != 'HELSTM':
            out_attention = event_input  #if not using HELSTM, out_attention is of no use but still need to assign a value to complete sequences
        sequences = [input, time_input, out_attention, mask]
        step_fun = step_masked

        ones = T.ones((num_batch, 1))
        if not isinstance(self.cell_init, Layer):
            # Dot against a 1s vector to repeat to shape (num_batch, num_units)
            cell_init = T.dot(ones, self.cell_init)

        if not isinstance(self.hid_init, Layer):
            # Dot against a 1s vector to repeat to shape (num_batch, num_units)
            hid_init = T.dot(ones, self.hid_init)

        # Scan op iterates over first dimension of input and repeatedly
        # applies the step function
        cell_out, hid_out = theano.scan(fn=step_fun,
                                        sequences=sequences,
                                        outputs_info=[cell_init, hid_init],
                                        go_backwards=self.backwards)[0]

        # When it is requested that we only return the final sequence step,
        # we need to slice it out immediately after scan is applied
        if self.only_return_final:
            hid_out = hid_out[-1]
        else:
            # dimshuffle back to (n_batch, n_time_steps, n_features))
            hid_out = hid_out.dimshuffle(1, 0, 2)

            # if scan is backward reverse the output
            if self.backwards:
                hid_out = hid_out[:, ::-1]

        return hid_out
コード例 #24
0
def safe_sigmoid(x, eps=1e-6):
    return T.clip(sigmoid(x), eps, 1-eps)