Example #1
0
    def __init__(self, input, input_dim, hidden_dim, output_dim, params=None):
        self.input_f = input
        self.input_b = input[::-1]
        if params is None:
            self.fwd_gru = GRU(input=self.input_f,
                               input_dim=input_dim,
                               hidden_dim=hidden_dim,
                               output_dim=output_dim)
            self.bwd_gru = GRU(input=self.input_b,
                               input_dim=input_dim,
                               hidden_dim=hidden_dim,
                               output_dim=output_dim)
            self.V_f = theano.shared(value=get(identifier='uniform',
                                               shape=(hidden_dim, output_dim)),
                                     name='V_f',
                                     borrow=True)
            self.V_b = theano.shared(value=get(identifier='uniform',
                                               shape=(hidden_dim, output_dim)),
                                     name='V_b',
                                     borrow=True)
            self.by = theano.shared(value=get('zero', shape=(output_dim, )),
                                    name='by',
                                    borrow=True)

        else:
            # To support loading from persistent storage, the current implementation of Gru() will require a
            # change and is therefore not supported.
            # An elegant way would be to implement BiGru() without using Gru() [is a trivial thing to do].
            raise NotImplementedError

        # since now bigru is doing the actual classification ; we don't need 'Gru().V & Gru().by' as they
        # are not part of computational graph (separate logistic-regression unit/layer is probably the best way to
        # handle this). Here's the ugly workaround -_-
        self.params = [
            self.fwd_gru.W_z, self.fwd_gru.U_z, self.fwd_gru.b_z,
            self.fwd_gru.W_r, self.fwd_gru.U_r, self.fwd_gru.b_r,
            self.fwd_gru.W, self.fwd_gru.U, self.fwd_gru.b_h, self.bwd_gru.W_z,
            self.bwd_gru.U_z, self.bwd_gru.b_z, self.bwd_gru.W_r,
            self.bwd_gru.U_r, self.bwd_gru.b_r, self.bwd_gru.W, self.bwd_gru.U,
            self.bwd_gru.b_h, self.V_f, self.V_b, self.by
        ]

        self.bwd_gru.h_t = self.bwd_gru.h_t[::-1]
        # Take the weighted sum of forward & backward gru's hidden representation
        self.h_t = T.dot(self.fwd_gru.h_t, self.V_f) + T.dot(
            self.bwd_gru.h_t, self.V_b)

        self.y_t = T.nnet.softmax(self.h_t + self.by)
        self.y = T.argmax(self.y_t, axis=1)
Example #2
0
    def __init__(self,
                 input,
                 input_dim,
                 minibatch,
                 hidden_dim,
                 output_dim,
                 params=None):
        self.in_fwd = input
        self.in_bwd = input[::-1]

        ## create tuning parameters or use existing ones
        if params is None:
            self.fwd_lstm = LSTM(input=self.in_fwd,
                                 input_dim=input_dim,
                                 minibatch=minibatch,
                                 hidden_dim=hidden_dim,
                                 output_dim=output_dim)
            self.bwd_lstm = LSTM(input=self.in_bwd,
                                 input_dim=input_dim,
                                 minibatch=minibatch,
                                 hidden_dim=hidden_dim,
                                 output_dim=output_dim)
            # self.V_f = theano.shared(value=get(identifier='uniform', shape=(hidden_dim, output_dim)), name='V_f', borrow=True)
            # self.V_b = theano.shared(value=get(identifier='uniform', shape=(hidden_dim, output_dim)), name='V_b', borrow=True)
            # self.by = theano.shared(value=get('zero', shape=(output_dim, )), name='by', borrow=True)

            self.V_f_0 = theano.shared(value=get(identifier='uniform',
                                                 shape=(hidden_dim,
                                                        output_dim)),
                                       name='V_f_0',
                                       borrow=True)
            self.V_f_1 = theano.shared(value=get(identifier='uniform',
                                                 shape=(hidden_dim,
                                                        output_dim)),
                                       name='V_f_1',
                                       borrow=True)
            self.V_f_2 = theano.shared(value=get(identifier='uniform',
                                                 shape=(hidden_dim,
                                                        output_dim)),
                                       name='V_f_2',
                                       borrow=True)
            self.V_f_3 = theano.shared(value=get(identifier='uniform',
                                                 shape=(hidden_dim,
                                                        output_dim)),
                                       name='V_f_3',
                                       borrow=True)

            self.V_b_0 = theano.shared(value=get(identifier='uniform',
                                                 shape=(hidden_dim,
                                                        output_dim)),
                                       name='V_b_0',
                                       borrow=True)
            self.V_b_1 = theano.shared(value=get(identifier='uniform',
                                                 shape=(hidden_dim,
                                                        output_dim)),
                                       name='V_b_1',
                                       borrow=True)
            self.V_b_2 = theano.shared(value=get(identifier='uniform',
                                                 shape=(hidden_dim,
                                                        output_dim)),
                                       name='V_b_2',
                                       borrow=True)
            self.V_b_3 = theano.shared(value=get(identifier='uniform',
                                                 shape=(hidden_dim,
                                                        output_dim)),
                                       name='V_b_3',
                                       borrow=True)

            self.by_0 = theano.shared(value=get(identifier='zero',
                                                shape=(output_dim, )),
                                      name='by_0',
                                      borrow=True)
            self.by_1 = theano.shared(value=get(identifier='zero',
                                                shape=(output_dim, )),
                                      name='by_1',
                                      borrow=True)
            self.by_2 = theano.shared(value=get(identifier='zero',
                                                shape=(output_dim, )),
                                      name='by_2',
                                      borrow=True)
            self.by_3 = theano.shared(value=get(identifier='zero',
                                                shape=(output_dim, )),
                                      name='by_3',
                                      borrow=True)

        elif params is not None:
            [
                fwd_lstm_Wi, fwd_lstm_Ui, fwd_lstm_bi, fwd_lstm_Wf,
                fwd_lstm_Uf, fwd_lstm_bf, fwd_lstm_Wo, fwd_lstm_Uo,
                fwd_lstm_bo, fwd_lstm_Wc, fwd_lstm_Uc, fwd_lstm_bc,
                bwd_lstm_Wi, bwd_lstm_Ui, bwd_lstm_bi, bwd_lstm_Wf,
                bwd_lstm_Uf, bwd_lstm_bf, bwd_lstm_Wo, bwd_lstm_Uo,
                bwd_lstm_bo, bwd_lstm_Wc, bwd_lstm_Uc, bwd_lstm_bc, V_f_0,
                V_f_1, V_f_2, V_f_3, V_b_0, V_b_1, V_b_2, V_b_3, by_0, by_1,
                by_2, by_3
            ] = params

            void_M = theano.shared(value=np.zeros(1))

            fwd_param = [
                fwd_lstm_Wi, fwd_lstm_Ui, fwd_lstm_bi, fwd_lstm_Wf,
                fwd_lstm_Uf, fwd_lstm_bf, fwd_lstm_Wo, fwd_lstm_Uo,
                fwd_lstm_bo, fwd_lstm_Wc, fwd_lstm_Uc, fwd_lstm_bc, void_M,
                void_M, void_M, void_M, void_M, void_M, void_M, void_M
            ]
            bwd_param = [
                bwd_lstm_Wi, bwd_lstm_Ui, bwd_lstm_bi, bwd_lstm_Wf,
                bwd_lstm_Uf, bwd_lstm_bf, bwd_lstm_Wo, bwd_lstm_Uo,
                bwd_lstm_bo, bwd_lstm_Wc, bwd_lstm_Uc, bwd_lstm_bc, void_M,
                void_M, void_M, void_M, void_M, void_M, void_M, void_M
            ]

            self.fwd_lstm = LSTM(input=self.in_fwd,
                                 input_dim=input_dim,
                                 minibatch=minibatch,
                                 hidden_dim=hidden_dim,
                                 output_dim=output_dim,
                                 params=fwd_param)
            self.bwd_lstm = LSTM(input=self.in_bwd,
                                 input_dim=input_dim,
                                 minibatch=minibatch,
                                 hidden_dim=hidden_dim,
                                 output_dim=output_dim,
                                 params=bwd_param)
            self.V_f_0 = V_f_0
            self.V_f_1 = V_f_1
            self.V_f_2 = V_f_2
            self.V_f_3 = V_f_3

            self.V_b_0 = V_b_0
            self.V_b_1 = V_b_1
            self.V_b_2 = V_b_2
            self.V_b_3 = V_b_3

            self.by_0 = by_0
            self.by_1 = by_1
            self.by_2 = by_2
            self.by_3 = by_3

        # parameter list
        self.params = [
            self.fwd_lstm.W_i, self.fwd_lstm.U_i, self.fwd_lstm.b_i,
            self.fwd_lstm.W_f, self.fwd_lstm.U_f, self.fwd_lstm.b_f,
            self.fwd_lstm.W_o, self.fwd_lstm.U_o, self.fwd_lstm.b_o,
            self.fwd_lstm.W_c, self.fwd_lstm.U_c, self.fwd_lstm.b_c,
            self.bwd_lstm.W_i, self.bwd_lstm.U_i, self.bwd_lstm.b_i,
            self.bwd_lstm.W_f, self.bwd_lstm.U_f, self.bwd_lstm.b_f,
            self.bwd_lstm.W_o, self.bwd_lstm.U_o, self.bwd_lstm.b_o,
            self.bwd_lstm.W_c, self.bwd_lstm.U_c, self.bwd_lstm.b_c,
            self.V_f_0, self.V_f_1, self.V_f_2, self.V_f_3, self.V_b_0,
            self.V_b_1, self.V_b_2, self.V_b_3, self.by_0, self.by_1,
            self.by_2, self.by_3
        ]

        self.bwd_lstm.h_t = self.bwd_lstm.h_t[::-1]
        # weighted sum of forward & backward
        # self.y_t = T.nnet.sigmoid(T.dot(self.fwd_lstm.h_t, self.V_f) + T.dot(self.bwd_lstm.h_t, self.V_b) + self.by)

        # self.y_t_0 = T.nnet.sigmoid(T.dot(self.fwd_lstm.h_t, self.V_f_0) + T.dot(self.bwd_lstm.h_t, self.V_b_0) + self.by_0)
        # self.y_t_1 = T.nnet.sigmoid(T.dot(self.fwd_lstm.h_t, self.V_f_1) + T.dot(self.bwd_lstm.h_t, self.V_b_1) + self.by_1)
        # self.y_t_2 = T.nnet.sigmoid(T.dot(self.fwd_lstm.h_t, self.V_f_2) + T.dot(self.bwd_lstm.h_t, self.V_b_2) + self.by_2)
        # self.y_t_3 = T.nnet.sigmoid(T.dot(self.fwd_lstm.h_t, self.V_f_3) + T.dot(self.bwd_lstm.h_t, self.V_b_3) + self.by_3)

        self.y_t_0 = T.dot(self.fwd_lstm.h_t, self.V_f_0) + T.dot(
            self.bwd_lstm.h_t, self.V_b_0) + self.by_0
        self.y_t_1 = T.dot(self.fwd_lstm.h_t, self.V_f_1) + T.dot(
            self.bwd_lstm.h_t, self.V_b_1) + self.by_1
        self.y_t_2 = T.dot(self.fwd_lstm.h_t, self.V_f_2) + T.dot(
            self.bwd_lstm.h_t, self.V_b_2) + self.by_2
        self.y_t_3 = T.dot(self.fwd_lstm.h_t, self.V_f_3) + T.dot(
            self.bwd_lstm.h_t, self.V_b_3) + self.by_3

        self.y_temp = T.stack([self.y_t_0, self.y_t_1, self.y_t_2, self.y_t_3],
                              axis=2)
        self.y_t = T.reshape(self.y_temp, [
            self.y_temp.shape[0] * self.y_temp.shape[1], self.y_temp.shape[2]
        ])
        # softmax
        self.y_t = T.nnet.softmax(self.y_t)
        # class label with maximum probability
        self.y_label = T.argmax(self.y_t, axis=1)

        self.y_t = T.reshape(
            self.y_t,
            [self.y_temp.shape[0], self.y_temp.shape[1], self.y_temp.shape[2]])
        self.y_label = T.reshape(self.y_label,
                                 [self.y_temp.shape[0], self.y_temp.shape[1]])
Example #3
0
    def __init__(self,
                 input,
                 input_dim,
                 minibatch,
                 hidden_dim,
                 output_dim,
                 init='uniform',
                 inner_init='orthonormal',
                 gate_act=T.nnet.sigmoid,
                 tanh_act=T.tanh,
                 params=None):
        self.input = input
        self.gate_act = gate_act
        self.activation = tanh_act

        # create tuning parameters or use existing ones
        if params is None:
            # input gate
            self.W_i = theano.shared(value=get(identifier=init,
                                               shape=(input_dim, hidden_dim)),
                                     name='W_i',
                                     borrow=True)
            self.U_i = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_i',
                                     borrow=True)
            self.b_i = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_i',
                                     borrow=True)
            # forget gate
            self.W_f = theano.shared(value=get(identifier=init,
                                               shape=(input_dim, hidden_dim)),
                                     name='W_f',
                                     borrow=True)
            self.U_f = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_f',
                                     borrow=True)
            self.b_f = theano.shared(value=get(identifier='one',
                                               shape=(hidden_dim, )),
                                     name='b_f',
                                     borrow=True)
            # output gate
            self.W_o = theano.shared(value=get(identifier=init,
                                               shape=(input_dim, hidden_dim)),
                                     name='W_o',
                                     borrow=True)
            self.U_o = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_o',
                                     borrow=True)
            self.b_o = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_o',
                                     borrow=True)
            # memory
            self.W_c = theano.shared(value=get(identifier=init,
                                               shape=(input_dim, hidden_dim)),
                                     name='W_c',
                                     borrow=True)
            self.U_c = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_c',
                                     borrow=True)
            self.b_c = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_c',
                                     borrow=True)
            # weights to output neuron
            self.V_0 = theano.shared(value=get(identifier=init,
                                               shape=(hidden_dim, output_dim)),
                                     name='V_0',
                                     borrow=True)
            self.b_y_0 = theano.shared(value=get(identifier='zero',
                                                 shape=(output_dim, )),
                                       name='b_y_0',
                                       borrow=True)
            self.V_1 = theano.shared(value=get(identifier=init,
                                               shape=(hidden_dim, output_dim)),
                                     name='V_1',
                                     borrow=True)
            self.b_y_1 = theano.shared(value=get(identifier='zero',
                                                 shape=(output_dim, )),
                                       name='b_y_1',
                                       borrow=True)
            self.V_2 = theano.shared(value=get(identifier=init,
                                               shape=(hidden_dim, output_dim)),
                                     name='V_2',
                                     borrow=True)
            self.b_y_2 = theano.shared(value=get(identifier='zero',
                                                 shape=(output_dim, )),
                                       name='b_y_2',
                                       borrow=True)
            self.V_3 = theano.shared(value=get(identifier=init,
                                               shape=(hidden_dim, output_dim)),
                                     name='V_3',
                                     borrow=True)
            self.b_y_3 = theano.shared(value=get(identifier='zero',
                                                 shape=(output_dim, )),
                                       name='b_y_3',
                                       borrow=True)

        elif params is not None:
            [
                self.W_i, self.U_i, self.b_i, self.W_f, self.U_f, self.b_f,
                self.W_o, self.U_o, self.b_o, self.W_c, self.U_c, self.b_c,
                self.V_0, self.b_y_0, self.V_1, self.b_y_1, self.V_2,
                self.b_y_2, self.V_3, self.b_y_3
            ] = params

        # parameter list
        self.params = [
            self.W_i, self.U_i, self.b_i, self.W_f, self.U_f, self.b_f,
            self.W_o, self.U_o, self.b_o, self.W_c, self.U_c, self.b_c,
            self.V_0, self.b_y_0, self.V_1, self.b_y_1, self.V_2, self.b_y_2,
            self.V_3, self.b_y_3
        ]

        # initialize internal and hidden state
        if minibatch == 1:
            self.c0 = theano.shared(value=get(identifier='zero',
                                              shape=(hidden_dim, )),
                                    name='c0',
                                    borrow=True)
            self.h0 = theano.shared(value=get(identifier='zero',
                                              shape=(hidden_dim, )),
                                    name='h0',
                                    borrow=True)
        else:
            self.c0 = theano.shared(value=get(identifier='zero',
                                              shape=(minibatch, hidden_dim)),
                                    name='c0',
                                    borrow=True)
            self.h0 = theano.shared(value=get(identifier='zero',
                                              shape=(minibatch, hidden_dim)),
                                    name='h0',
                                    borrow=True)

        def recurrence(x_t, c_tm_prev, h_tm_prev):
            i_t = gate_act(
                T.dot(x_t, self.W_i) + T.dot(h_tm_prev, self.U_i) + self.b_i)
            f_t = gate_act(
                T.dot(x_t, self.W_f) + T.dot(h_tm_prev, self.U_f) + self.b_f)
            o_t = gate_act(
                T.dot(x_t, self.W_o) + T.dot(h_tm_prev, self.U_o) + self.b_o)
            # internal memory
            x_c = T.dot(x_t, self.W_c) + self.b_c
            c_t = f_t * c_tm_prev + i_t * tanh_act(x_c +
                                                   T.dot(h_tm_prev, self.U_c))
            # hidden state
            h_t = o_t * tanh_act(c_t)
            # output
            # y_t_0 = T.nnet.sigmoid(T.dot(h_t, self.V_0) + self.b_y_0)
            # y_t_1 = T.nnet.sigmoid(T.dot(h_t, self.V_1) + self.b_y_1)
            # y_t_2 = T.nnet.sigmoid(T.dot(h_t, self.V_2) + self.b_y_2)
            # y_t_3 = T.nnet.sigmoid(T.dot(h_t, self.V_3) + self.b_y_3)

            y_t_0 = T.dot(h_t, self.V_0) + self.b_y_0
            y_t_1 = T.dot(h_t, self.V_1) + self.b_y_1
            y_t_2 = T.dot(h_t, self.V_2) + self.b_y_2
            y_t_3 = T.dot(h_t, self.V_3) + self.b_y_3

            y_t = T.stack([y_t_0, y_t_1, y_t_2, y_t_3], axis=1)
            # softmax
            y_t = T.nnet.softmax(y_t)
            # class label with maximum probability
            y_label = T.argmax(y_t, axis=1)

            return c_t, h_t, y_t, y_label

        [self.c_t, self.h_t, self.y_t, self.y_label
         ], _ = theano.scan(recurrence,
                            sequences=self.input,
                            outputs_info=[self.c0, self.h0, None, None])
Example #4
0
    def __init__(self,
                 input,
                 input_dim,
                 hidden_dim,
                 output_dim,
                 init='uniform',
                 inner_init='orthonormal',
                 inner_activation=T.nnet.hard_sigmoid,
                 activation=T.tanh,
                 params=None):
        self.input = input
        self.hidden_dim = hidden_dim
        self.activation = activation
        self.inner_activation = inner_activation
        if params is None:
            # update gate
            self.W_z = theano.shared(value=get(identifier=init,
                                               shape=(input_dim, hidden_dim)),
                                     name='W_z',
                                     borrow=True)
            self.U_z = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_z',
                                     borrow=True)
            self.b_z = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_z',
                                     borrow=True)
            # reset gate
            self.W_r = theano.shared(value=get(identifier=init,
                                               shape=(input_dim, hidden_dim)),
                                     name='W_r',
                                     borrow=True)
            self.U_r = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_r',
                                     borrow=True)
            self.b_r = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_r',
                                     borrow=True)
            # weights pertaining to input, hidden & output neurons (externally)
            self.W = theano.shared(value=get(identifier=init,
                                             shape=(input_dim, hidden_dim)),
                                   name='W',
                                   borrow=True)
            self.U = theano.shared(value=get(identifier=inner_init,
                                             shape=(hidden_dim, hidden_dim)),
                                   name='U',
                                   borrow=True)
            self.V = theano.shared(value=get(identifier=init,
                                             shape=(hidden_dim, output_dim)),
                                   name='V',
                                   borrow=True)
            self.b_h = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_h',
                                     borrow=True)
            self.b_y = theano.shared(value=get(identifier='zero',
                                               shape=(output_dim, )),
                                     name='b_y',
                                     borrow=True)
        else:
            self.W_z, self.U_z, self.b_z, self.W_r, self.U_r, self.b_r, \
                self.W, self.U, self.V, self.b_h, self.b_y = params

        self.h0 = theano.shared(value=get(identifier='zero',
                                          shape=(hidden_dim, )),
                                name='h0',
                                borrow=True)
        self.params = [
            self.W_z, self.U_z, self.b_z, self.W_r, self.U_r, self.b_r, self.W,
            self.U, self.V, self.b_h, self.b_y
        ]

        def recurrence(x_t, h_tm_prev):
            x_z = T.dot(x_t, self.W_z) + self.b_z
            x_r = T.dot(x_t, self.W_r) + self.b_r
            x_h = T.dot(x_t, self.W) + self.b_h

            z_t = inner_activation(x_z + T.dot(h_tm_prev, self.U_z))
            r_t = inner_activation(x_r + T.dot(h_tm_prev, self.U_r))
            hh_t = activation(x_h + T.dot(r_t * h_tm_prev, self.U))
            h_t = (T.ones_like(z_t) - z_t) * hh_t + z_t * h_tm_prev

            y_t = T.nnet.softmax(T.dot(h_t, self.V) + self.b_y)

            return h_t, y_t[0]

        [self.h_t, self.y_t], _ = theano.scan(recurrence,
                                              sequences=self.input,
                                              outputs_info=[self.h0, None])

        self.y = T.argmax(self.y_t, axis=1)
Example #5
0
    def __init__(self,
                 input,
                 input_dim,
                 minibatch,
                 hidden_dim,
                 output_dim,
                 params=None):
        self.in_fwd = input
        self.in_bwd = input[::-1]
        self.hidden_dim = hidden_dim

        # create tuning parameters or use existing ones
        if params is None:
            self.fwd_rnn = RNN(input=self.in_fwd,
                               input_dim=input_dim,
                               minibatch=minibatch,
                               hidden_dim=hidden_dim,
                               output_dim=output_dim)
            self.bwd_rnn = RNN(input=self.in_bwd,
                               input_dim=input_dim,
                               minibatch=minibatch,
                               hidden_dim=hidden_dim,
                               output_dim=output_dim)
            self.V_fwd = theano.shared(value=get(identifier='uniform',
                                                 shape=(hidden_dim,
                                                        output_dim)),
                                       name='V_f',
                                       borrow=True)
            self.V_bwd = theano.shared(value=get(identifier='uniform',
                                                 shape=(hidden_dim,
                                                        output_dim)),
                                       name='V_b',
                                       borrow=True)
            self.by = theano.shared(value=get('zero', shape=(output_dim, )),
                                    name='by',
                                    borrow=True)
        elif params is not None:
            [
                fwd_rnn_W, fwd_rnn_U, fwd_rnn_bh, bwd_rnn_W, bwd_rnn_U,
                bwd_rnn_bh, V_fwd, V_bwd, by
            ] = params
            void_M = theano.shared(value=np.zeros(1))

            fwd_param = [fwd_rnn_W, fwd_rnn_U, fwd_rnn_bh, void_M, void_M]
            bwd_param = [bwd_rnn_W, bwd_rnn_U, bwd_rnn_bh, void_M, void_M]

            self.fwd_rnn = RNN(input=self.in_fwd,
                               input_dim=input_dim,
                               minibatch=minibatch,
                               hidden_dim=hidden_dim,
                               output_dim=output_dim,
                               params=fwd_param)
            self.bwd_rnn = RNN(input=self.in_bwd,
                               input_dim=input_dim,
                               minibatch=minibatch,
                               hidden_dim=hidden_dim,
                               output_dim=output_dim,
                               params=bwd_param)
            self.V_fwd = V_fwd
            self.V_bwd = V_bwd
            self.by = by

        # print '#########################'
        # print np.asarray(self.fwd_rnn.W.eval())

        # parameter list
        self.params = [
            self.fwd_rnn.W, self.fwd_rnn.U, self.fwd_rnn.bh, self.bwd_rnn.W,
            self.bwd_rnn.U, self.bwd_rnn.bh, self.V_fwd, self.V_bwd, self.by
        ]

        self.bwd_rnn.h_t = self.bwd_rnn.h_t[::-1]
        # weighted sum of forward & backward
        self.y_t = T.nnet.sigmoid(
            T.dot(self.fwd_rnn.h_t, self.V_fwd) +
            T.dot(self.bwd_rnn.h_t, self.V_bwd) + self.by)
Example #6
0
    def __init__(self,
                 input,
                 input_dim,
                 minibatch,
                 hidden_dim,
                 output_dim,
                 init='uniform',
                 inner_init='orthonormal',
                 params=None):
        self.input = input
        self.hidden_dim = hidden_dim

        # create tuning parameters or use existing ones
        if params is None:
            self.W = theano.shared(value=get(identifier=init,
                                             shape=(input_dim, hidden_dim)),
                                   name='W',
                                   borrow=True)
            self.U = theano.shared(value=get(identifier=inner_init,
                                             shape=(hidden_dim, hidden_dim)),
                                   name='U',
                                   borrow=True)
            self.bh = theano.shared(value=get(identifier='zero',
                                              shape=(hidden_dim, )),
                                    name='bh',
                                    borrow=True)

            self.V = theano.shared(value=get(identifier=init,
                                             shape=(hidden_dim, output_dim)),
                                   name='V',
                                   borrow=True)
            self.by = theano.shared(value=get(identifier='zero',
                                              shape=(output_dim, )),
                                    name='by',
                                    borrow=True)
        elif params is not None:
            self.W, self.U, self.bh, self.V, self.by = params

        # parameter list
        self.params = [self.W, self.U, self.bh, self.V, self.by]

        # initialize hidden state
        if minibatch == 1:
            self.h0 = theano.shared(value=get(identifier='zero',
                                              shape=(hidden_dim, )),
                                    name='h0',
                                    borrow=True)
        else:
            self.h0 = theano.shared(value=get(identifier='zero',
                                              shape=(minibatch, hidden_dim)),
                                    name='h0',
                                    borrow=True)

        def recurrence(x_t, h_tm_prev):
            # hidden state
            h_t = T.tanh(
                T.dot(x_t, self.W) + T.dot(h_tm_prev, self.U) + self.bh)
            # output
            y_t = T.nnet.sigmoid(T.dot(h_t, self.V) + self.by)
            return h_t, y_t

        ### recurrent propagation ###
        [self.h_t, self.y_t], _ = theano.scan(recurrence,
                                              sequences=input,
                                              outputs_info=[self.h0, None])