コード例 #1
0
    def __init__(self, input, emb_mat, emb_dim, hidden_dim, init='uniform', inner_init='orthonormal',
                 activation=T.tanh, params=None):
        input = input.dimshuffle(1, 0)
        if params is None:
            self.emb = theano.shared(value=np.asarray(emb_mat, dtype=theano.config.floatX),
                                     name='emb', borrow=True)
            self.W = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                   name='W', borrow=True)
            self.U = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                   name='U', borrow=True)
            self.bh = theano.shared(value=get(identifier='zero', shape=(hidden_dim,)),
                                    name='bh', borrow=True)
        else:
            self.emb, self.W, self.U, self.bh = params

        self.h0 = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='h0', borrow=True)
        self.params = [self.emb,
                       self.W, self.U,
                       self.bh]

        def recurrence(x_t, h_tm_prev):
            h_t = activation(T.dot(self.emb[x_t], self.W) +
                             T.dot(h_tm_prev, self.U) + self.bh)
            return h_t

        h, _ = theano.scan(
            fn=recurrence,
            sequences=input,
            outputs_info=T.alloc(self.h0, input.shape[1], hidden_dim)
        )

        # 'hidden state + prediction' at last time-step need to be passed to the decoder;
        # prediction at last-time step will always be 'eos' therefore, ignored
        self.h = h[-1]
コード例 #2
0
ファイル: gru.py プロジェクト: hariom-yadaw/neural-converse
    def __init__(self, input, emb_mat, emb_dim, hidden_dim, init='uniform', inner_init='orthonormal',
                 inner_activation=T.nnet.hard_sigmoid, activation=T.tanh,
                 params=None):
        input = input.dimshuffle(1, 0)
        if params is None:
            self.emb = theano.shared(value=np.asarray(emb_mat, dtype=theano.config.floatX),
                                     name='emb', borrow=True)
            # update gate
            self.W_z = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                     name='W_z', borrow=True)
            self.U_z = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                     name='U_z', borrow=True)
            self.b_z = theano.shared(value=get(identifier='zero', shape=(hidden_dim,)),
                                     name='b_z', borrow=True)
            # reset gate
            self.W_r = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                     name='W_r', borrow=True)
            self.U_r = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                     name='U_r', borrow=True)
            self.b_r = theano.shared(value=get(identifier='zero', shape=(hidden_dim,)),
                                     name='b_r', borrow=True)
            # hidden state
            self.W_h = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                     name='W_h', borrow=True)
            self.U_h = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                     name='U_h', borrow=True)
            self.b_h = theano.shared(value=get(identifier='zero', shape=(hidden_dim,)),
                                     name='b_h', borrow=True)
        else:
            self.emb, self.W_z, self.U_z, self.b_z, self.W_r, self.U_r, self.b_r, \
                self.W_h, self.U_h, self.b_h = params

        self.h0 = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='h0', borrow=True)
        self.params = [self.emb,
                       self.W_z, self.U_z, self.b_z,
                       self.W_r, self.U_r, self.b_r,
                       self.W_h, self.U_h, self.b_h]

        def recurrence(x_t, h_tm_prev):
            x_z = T.dot(self.emb[x_t], self.W_z) + self.b_z
            x_r = T.dot(self.emb[x_t], self.W_r) + self.b_r
            x_h = T.dot(self.emb[x_t], self.W_h) + self.b_h

            z_t = inner_activation(x_z + T.dot(h_tm_prev, self.U_z))
            r_t = inner_activation(x_r + T.dot(h_tm_prev, self.U_r))
            hh_t = activation(x_h + T.dot(r_t * h_tm_prev, self.U_h))
            h_t = (T.ones_like(z_t) - z_t) * hh_t + z_t * h_tm_prev

            return h_t

        h, _ = theano.scan(
            fn=recurrence,
            sequences=input,
            outputs_info=T.alloc(self.h0, input.shape[1], hidden_dim)
        )

        # 'hidden state + prediction' at last time-step need to be passed to the decoder;
        # prediction at last-time step will always be 'eos' therefore, ignored
        self.h = h[-1]
コード例 #3
0
    def __init__(self, input, input_dim, hidden_dim, output_dim,
                 mini_batch=False, params=None):
        self.mini_batch = mini_batch
        input_f = input
        if mini_batch:
            input_b = input[::, ::-1]
        else:
            input_b = input[::-1]
        if params is None:
            self.fwd_rnn = Rnn(input=input_f, input_dim=input_dim, hidden_dim=hidden_dim,
                               output_dim=output_dim, mini_batch=mini_batch)
            self.bwd_rnn = Rnn(input=input_b, input_dim=input_dim, hidden_dim=hidden_dim,
                               output_dim=output_dim, mini_batch=mini_batch)
            self.V_f = theano.shared(
                value=get(identifier='uniform', shape=(hidden_dim, output_dim)),
                name='V_f',
                borrow=True
            )
            self.V_b = theano.shared(
                value=get(identifier='uniform', shape=(hidden_dim, output_dim)),
                name='V_b',
                borrow=True
            )
            self.by = theano.shared(
                value=get('zero', shape=(output_dim,)),
                name='by',
                borrow=True)

        else:
            # To support loading from persistent storage, the current implementation of Rnn() will require a
            # change and is therefore not supported.
            # An elegant way would be to implement BiRnn() without using Rnn() [is a trivial thing to do].
            raise NotImplementedError

        # since now birnn is doing the actual classification ; we don't need 'Rnn().V & Rnn().by' as they
        # are not part of computational graph (separate logistic-regression unit/layer is probably the best way to
        # handle this). Here's the ugly workaround -_-
        self.params = [self.fwd_rnn.W, self.fwd_rnn.U, self.fwd_rnn.bh,
                       self.bwd_rnn.W, self.bwd_rnn.U, self.bwd_rnn.bh,
                       self.V_f, self.V_b, self.by]

        self.bwd_rnn.h_t = self.bwd_rnn.h_t[::-1]
        # Take the weighted sum of forward & backward rnn's hidden representation
        self.h_t = T.dot(self.fwd_rnn.h_t, self.V_f) + T.dot(self.bwd_rnn.h_t, self.V_b)

        if mini_batch:
            # T.nnet.softmax cannot operate on tensor3, here's a simple reshape trick to make it work.
            h_t = self.h_t + self.by
            h_t_t = T.reshape(h_t, (h_t.shape[0] * h_t.shape[1], -1))
            y_t = T.nnet.softmax(h_t_t)
            self.y_t = T.reshape(y_t, h_t.shape)
            self.y = T.argmax(self.y_t, axis=2)
        else:
            self.y_t = T.nnet.softmax(self.h_t + self.by)
            self.y = T.argmax(self.y_t, axis=1)
コード例 #4
0
    def __init__(self, input, input_dim, hidden_dim, output_dim, params=None):
        self.input_f = input
        self.input_b = input[::-1]
        if params is None:
            self.fwd_lstm = Lstm(input=self.input_f,
                                 input_dim=input_dim,
                                 hidden_dim=hidden_dim,
                                 output_dim=output_dim)
            self.bwd_lstm = Lstm(input=self.input_b,
                                 input_dim=input_dim,
                                 hidden_dim=hidden_dim,
                                 output_dim=output_dim)
            self.V_f = theano.shared(value=get(identifier='uniform',
                                               shape=(hidden_dim, output_dim)),
                                     name='V_f',
                                     borrow=True)
            self.V_b = theano.shared(value=get(identifier='uniform',
                                               shape=(hidden_dim, output_dim)),
                                     name='V_b',
                                     borrow=True)
            self.by = theano.shared(value=get('zero', shape=(output_dim, )),
                                    name='by',
                                    borrow=True)

        else:
            # To support loading from persistent storage, the current implementation of Lstm() will require a
            # change and is therefore not supported.
            # An elegant way would be to implement BiLstm() without using Lstm() [is a trivial thing to do].
            raise NotImplementedError

        # since now bilstm is doing the actual classification ; we don't need 'Lstm().V & Lstm().by' as they
        # are not part of computational graph (separate logistic-regression unit/layer is probably the best way to
        # handle this). Here's the ugly workaround -_-
        self.params = [
            self.fwd_lstm.W_i, self.fwd_lstm.U_i, self.fwd_lstm.b_i,
            self.fwd_lstm.W_f, self.fwd_lstm.U_f, self.fwd_lstm.b_f,
            self.fwd_lstm.W_c, self.fwd_lstm.U_c, self.fwd_lstm.b_c,
            self.fwd_lstm.W_o, self.fwd_lstm.U_o, self.fwd_lstm.b_o,
            self.bwd_lstm.W_i, self.bwd_lstm.U_i, self.bwd_lstm.b_i,
            self.bwd_lstm.W_f, self.bwd_lstm.U_f, self.bwd_lstm.b_f,
            self.bwd_lstm.W_c, self.bwd_lstm.U_c, self.bwd_lstm.b_c,
            self.bwd_lstm.W_o, self.bwd_lstm.U_o, self.bwd_lstm.b_o, self.V_f,
            self.V_b, self.by
        ]

        self.bwd_lstm.h_t = self.bwd_lstm.h_t[::-1]
        # Take the weighted sum of forward & backward lstm's hidden representation
        self.h_t = T.dot(self.fwd_lstm.h_t, self.V_f) + T.dot(
            self.bwd_lstm.h_t, self.V_b)

        self.y_t = T.nnet.softmax(self.h_t + self.by)
        self.y = T.argmax(self.y_t, axis=1)
コード例 #5
0
ファイル: gru.py プロジェクト: chentao1999/theano-recurrence
    def __init__(self, input, input_dim, hidden_dim, output_dim,
                 params=None):
        self.input_f = input
        self.input_b = input[::-1]
        if params is None:
            self.fwd_gru = Gru(input=self.input_f, input_dim=input_dim, hidden_dim=hidden_dim,
                               output_dim=output_dim)
            self.bwd_gru = Gru(input=self.input_b, input_dim=input_dim, hidden_dim=hidden_dim,
                               output_dim=output_dim)
            self.V_f = theano.shared(
                value=get(identifier='uniform', shape=(hidden_dim, output_dim)),
                name='V_f',
                borrow=True
            )
            self.V_b = theano.shared(
                value=get(identifier='uniform', shape=(hidden_dim, output_dim)),
                name='V_b',
                borrow=True
            )
            self.by = theano.shared(
                value=get('zero', shape=(output_dim,)),
                name='by',
                borrow=True)

        else:
            # To support loading from persistent storage, the current implementation of Gru() will require a
            # change and is therefore not supported.
            # An elegant way would be to implement BiGru() without using Gru() [is a trivial thing to do].
            raise NotImplementedError

        # since now bigru is doing the actual classification ; we don't need 'Gru().V & Gru().by' as they
        # are not part of computational graph (separate logistic-regression unit/layer is probably the best way to
        # handle this). Here's the ugly workaround -_-
        self.params = [self.fwd_gru.W_z, self.fwd_gru.U_z, self.fwd_gru.b_z,
                       self.fwd_gru.W_r, self.fwd_gru.U_r, self.fwd_gru.b_r,
                       self.fwd_gru.W, self.fwd_gru.U, self.fwd_gru.b_h,

                       self.bwd_gru.W_z, self.bwd_gru.U_z, self.bwd_gru.b_z,
                       self.bwd_gru.W_r, self.bwd_gru.U_r, self.bwd_gru.b_r,
                       self.bwd_gru.W, self.bwd_gru.U, self.bwd_gru.b_h,

                       self.V_f, self.V_b, self.by]

        self.bwd_gru.h_t = self.bwd_gru.h_t[::-1]
        # Take the weighted sum of forward & backward gru's hidden representation
        self.h_t = T.dot(self.fwd_gru.h_t, self.V_f) + T.dot(self.bwd_gru.h_t, self.V_b)

        self.y_t = T.nnet.softmax(self.h_t + self.by)
        self.y = T.argmax(self.y_t, axis=1)
コード例 #6
0
    def __init__(self, input, input_dim, output_dim, params):
        if params is None:
            self.W = theano.shared(value=get(identifier='uniform',
                                             shape=(input_dim, output_dim)),
                                   name='w',
                                   borrow=True)
            self.b = theano.shared(value=get(identifier='zero',
                                             shape=(output_dim, )),
                                   name='b',
                                   borrow=True)

        else:
            self.W, self.b = params

        self.params = [self.W, self.b]

        self.p_y_given_x = T.clip(
            T.nnet.softmax(T.dot(input, self.W) + self.b), 0.0001,
            0.9999)  # need 'clipping' to avoid nan in nll

        self.pred = T.argmax(self.p_y_given_x, axis=1)
コード例 #7
0
    def __init__(self, input, input_dim, hidden_dim, output_dim,
                 activation=T.tanh, init='uniform', inner_init='orthonormal',
                 mini_batch=False, params=None):
        self.activation = activation
        self.mini_batch = mini_batch
        if mini_batch:
            input = input.dimshuffle(1, 0, 2)
        if params is None:
            self.W = theano.shared(value=get(identifier=init, shape=(input_dim, hidden_dim)),
                                   name='W',
                                   borrow=True
                                   )
            self.U = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                   name='U',
                                   borrow=True
                                   )
            self.V = theano.shared(value=get(identifier=init, shape=(hidden_dim, output_dim)),
                                   name='V',
                                   borrow=True
                                   )
            self.bh = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                    name='bh',
                                    borrow=True)
            self.by = theano.shared(value=get(identifier='zero', shape=(output_dim, )),
                                    name='by',
                                    borrow=True)
        else:
            self.W, self.U, self.V, self.bh, self.by = params

        self.h0 = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='h0', borrow=True)
        self.params = [self.W, self.U, self.V, self.bh, self.by]

        if mini_batch:
            def recurrence(x_t, h_tm_prev):
                h_t = activation(T.dot(x_t, self.W) +
                                 T.dot(h_tm_prev, self.U) + self.bh)
                y_t = T.nnet.softmax(T.dot(h_t, self.V) + self.by)
                return h_t, y_t

            [self.h_t, self.y_t], _ = theano.scan(
                recurrence,
                sequences=input,
                outputs_info=[T.alloc(self.h0, input.shape[1], hidden_dim), None]
            )
            self.h_t = self.h_t.dimshuffle(1, 0, 2)
            self.y_t = self.y_t.dimshuffle(1, 0, 2)
            self.y = T.argmax(self.y_t, axis=2)
        else:
            def recurrence(x_t, h_tm_prev):
                h_t = activation(T.dot(x_t, self.W) +
                                 T.dot(h_tm_prev, self.U) + self.bh)
                y_t = T.nnet.softmax(T.dot(h_t, self.V) + self.by)
                return h_t, y_t[0]

            [self.h_t, self.y_t], _ = theano.scan(
                recurrence,
                sequences=input,
                outputs_info=[self.h0, None]
            )
            self.y = T.argmax(self.y_t, axis=1)
コード例 #8
0
def pickle_w2vec(w2vec, dataset, emb_path, emb_dim=300):
    model = gensim.models.Word2Vec.load_word2vec_format(w2vec, binary=True)
    voc, _ = load_pickled_data(path=dataset)
    vocab, words_to_ix, _ = voc
    emb = [0] * len(vocab)
    vocab.remove('EOS')
    vocab.remove('UNKNOWN_TOKEN')
    # initialize randomly for 'EOS' & 'UNKNOWN_TOKEN'
    emb[words_to_ix['EOS']] = get(identifier='emb', shape=(emb_dim,), scale=np.sqrt(3))
    emb[words_to_ix['UNKNOWN_TOKEN']] = get(identifier='emb', shape=(emb_dim,), scale=np.sqrt(3))
    unk_count = 2
    for word in vocab:
        if word in model.vocab:
            emb[words_to_ix[word]] = model[word]
        else:
            unk_count += 1
            emb[words_to_ix[word]] = get(identifier='emb', shape=(emb_dim,), scale=np.sqrt(3))
    print('... embeddings initialized randomly for %d words' % unk_count)
    # pickle our mini embeddings
    with open(emb_path, 'wb') as f:
        pkl.dump(emb, f, pkl.HIGHEST_PROTOCOL)
    print('... %s created' % emb_path)
コード例 #9
0
    def __init__(self,
                 input,
                 input_dim,
                 hidden_dim,
                 output_dim,
                 init='uniform',
                 inner_init='orthonormal',
                 inner_activation=T.nnet.hard_sigmoid,
                 activation=T.tanh,
                 params=None):
        self.input = input
        self.inner_activation = inner_activation
        self.activation = activation
        if params is None:
            # input gate
            self.W_i = theano.shared(value=get(identifier=init,
                                               shape=(input_dim, hidden_dim)),
                                     name='W_i',
                                     borrow=True)
            self.U_i = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_i',
                                     borrow=True)
            self.b_i = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_i',
                                     borrow=True)
            # forget gate
            self.W_f = theano.shared(value=get(identifier=init,
                                               shape=(input_dim, hidden_dim)),
                                     name='W_f',
                                     borrow=True)
            self.U_f = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_f',
                                     borrow=True)
            self.b_f = theano.shared(value=get(identifier='one',
                                               shape=(hidden_dim, )),
                                     name='b_f',
                                     borrow=True)
            # memory
            self.W_c = theano.shared(value=get(identifier=init,
                                               shape=(input_dim, hidden_dim)),
                                     name='W_c',
                                     borrow=True)
            self.U_c = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_c',
                                     borrow=True)
            self.b_c = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_c',
                                     borrow=True)
            # output gate
            self.W_o = theano.shared(value=get(identifier=init,
                                               shape=(input_dim, hidden_dim)),
                                     name='W_o',
                                     borrow=True)
            self.U_o = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_o',
                                     borrow=True)
            self.b_o = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_o',
                                     borrow=True)
            # weights pertaining to output neuron
            self.V = theano.shared(value=get(identifier=init,
                                             shape=(hidden_dim, output_dim)),
                                   name='V',
                                   borrow=True)
            self.b_y = theano.shared(value=get(identifier='zero',
                                               shape=(output_dim, )),
                                     name='b_y',
                                     borrow=True)

        else:
            self.W_i, self.U_i, self.b_i, self.W_f, self.U_f, self.b_f, \
                self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o, self.V, self.b_y = params

        self.c0 = theano.shared(value=get(identifier='zero',
                                          shape=(hidden_dim, )),
                                name='c0',
                                borrow=True)
        self.h0 = theano.shared(value=get(identifier='zero',
                                          shape=(hidden_dim, )),
                                name='h0',
                                borrow=True)
        self.params = [
            self.W_i, self.U_i, self.b_i, self.W_f, self.U_f, self.b_f,
            self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o, self.V,
            self.b_y
        ]

        def recurrence(x_t, c_tm_prev, h_tm_prev):
            x_i = T.dot(x_t, self.W_i) + self.b_i
            x_f = T.dot(x_t, self.W_f) + self.b_f
            x_c = T.dot(x_t, self.W_c) + self.b_c
            x_o = T.dot(x_t, self.W_o) + self.b_o

            i_t = inner_activation(x_i + T.dot(h_tm_prev, self.U_i))
            f_t = inner_activation(x_f + T.dot(h_tm_prev, self.U_f))
            c_t = f_t * c_tm_prev + i_t * activation(
                x_c + T.dot(h_tm_prev, self.U_c))  # internal memory
            o_t = inner_activation(x_o + T.dot(h_tm_prev, self.U_o))
            h_t = o_t * activation(c_t)  # actual hidden state

            y_t = T.nnet.softmax(T.dot(h_t, self.V) + self.b_y)

            return c_t, h_t, y_t[0]

        [_, self.h_t,
         self.y_t], _ = theano.scan(recurrence,
                                    sequences=self.input,
                                    outputs_info=[self.c0, self.h0, None])

        self.y = T.argmax(self.y_t, axis=1)
コード例 #10
0
    def __init__(self,
                 story,
                 question,
                 hidden_dim,
                 output_dim,
                 attn_dim,
                 params,
                 activation=T.tanh):
        story = story.dimshuffle(1, 0, 2)
        if params is None:
            self.W_att_story = theano.shared(value=get(identifier='uniform',
                                                       shape=(hidden_dim,
                                                              attn_dim)),
                                             name='W_att_story',
                                             borrow=True)
            self.W_att_question = theano.shared(value=get(identifier='uniform',
                                                          shape=(hidden_dim,
                                                                 attn_dim)),
                                                name='W_att_question',
                                                borrow=True)
            # weight matrix for 'm_t' (see original paper: page 5)
            self.W_m = theano.shared(value=get(identifier='uniform',
                                               shape=(attn_dim, )),
                                     name='W_m',
                                     borrow=True)
            self.W_rg = theano.shared(value=get(identifier='uniform',
                                                shape=(hidden_dim,
                                                       output_dim)),
                                      name='W_rg',
                                      borrow=True)
            self.W_ug = theano.shared(value=get(identifier='uniform',
                                                shape=(hidden_dim,
                                                       output_dim)),
                                      name='W_ug',
                                      borrow=True)
            self.b = theano.shared(value=get(identifier='zero',
                                             shape=(output_dim, )),
                                   name='b',
                                   borrow=True)
        else:
            self.W_att_story, self.W_att_question, self.W_m, self.W_rg, self.W_ug, self.b = params

        self.params = [
            self.W_att_story, self.W_att_question, self.W_m, self.W_rg,
            self.W_ug, self.b
        ]

        # applying attention i.e weighted sum of story & question
        def step(token_t):
            m_t = activation(
                T.dot(token_t, self.W_att_story) +
                T.dot(question, self.W_att_question))
            # attention at time-step t (will be a scalar value)
            s_t = T.dot(m_t, self.W_m)  # is 'W_m' even needed here ?
            return s_t

        s, _ = theano.scan(step, sequences=story, outputs_info=None)
        s = s.dimshuffle(1, 0)
        # normalized attention
        s_norm = T.nnet.softmax(s)

        # embedding of 'story'

        def compute_batch_sum(story_, norm_):
            return story_.T * norm_

        r_t, _ = theano.scan(compute_batch_sum,
                             sequences=[story, s_norm.dimshuffle(1, 0)],
                             outputs_info=None)

        r = T.sum(r_t.dimshuffle(2, 0, 1), axis=1)

        # given 'r' & 'u' ; compute the final 'g' (where 'u' = encoding of question)
        self.g = T.nnet.softmax(
            activation(
                T.dot(r, self.W_rg) + T.dot(question, self.W_ug) + self.b))

        self.pred = T.argmax(self.g, axis=1)
コード例 #11
0
    def __init__(self, input, vocab_size, emb_dim, hidden_dim, init='uniform', inner_init='orthonormal',
                 inner_activation=T.nnet.hard_sigmoid, activation=T.tanh,
                 params=None, merge_mode='concat'):
        input_f = input.dimshuffle(1, 0)
        input_b = input[::-1].dimshuffle(1, 0)
        if params is None:
            self.emb = theano.shared(value=get(identifier=init, shape=(vocab_size, emb_dim), scale=np.sqrt(3)),
                                     name='emb', borrow=True)
            # Forward LSTM
            # input gate
            self.Wf_i = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wf_i', borrow=True)
            self.Uf_i = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Uf_i', borrow=True)
            self.bf_i = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                      name='bf_i', borrow=True)
            # forget gate
            self.Wf_f = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wf_f', borrow=True)
            self.Uf_f = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Uf_f', borrow=True)
            self.bf_f = theano.shared(value=get(identifier='one', shape=(hidden_dim, )),
                                      name='bf_f', borrow=True)
            # memory
            self.Wf_c = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wf_c', borrow=True)
            self.Uf_c = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Uf_c', borrow=True)
            self.bf_c = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                      name='bf_c', borrow=True)
            # output gate
            self.Wf_o = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wf_o', borrow=True)
            self.Uf_o = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Uf_o', borrow=True)
            self.bf_o = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                      name='bf_o', borrow=True)

            # Backward LSTM
            # input gate
            self.Wb_i = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wb_i', borrow=True)
            self.Ub_i = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Ub_i', borrow=True)
            self.bb_i = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                      name='bb_i', borrow=True)
            # forget gate
            self.Wb_f = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wb_f', borrow=True)
            self.Ub_f = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Ub_f', borrow=True)
            self.bb_f = theano.shared(value=get(identifier='one', shape=(hidden_dim, )),
                                      name='bb_f', borrow=True)
            # memory
            self.Wb_c = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wb_c', borrow=True)
            self.Ub_c = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Ub_c', borrow=True)
            self.bb_c = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                      name='bb_c', borrow=True)
            # output gate
            self.Wb_o = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wb_o', borrow=True)
            self.Ub_o = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Ub_o', borrow=True)
            self.bb_o = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                      name='bb_o', borrow=True)

        else:
            self.emb, self.Wf_i, self.Uf_i, self.bf_i, self.Wf_f, self.Uf_f, self.bf_f, \
                self.Wf_c, self.Uf_c, self.bf_c, self.Wf_o, self.Uf_o, self.bf_o,\
                self.Wb_i, self.Ub_i, self.bb_i, self.Wb_f, self.Ub_f, self.bb_f, \
                self.Wb_c, self.Ub_c, self.bb_c, self.Wb_o, self.Ub_o, self.bb_o = params

        self.cf = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='cf', borrow=True)
        self.hf = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='hf', borrow=True)
        self.cb = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='cb', borrow=True)
        self.hb = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='hb', borrow=True)
        self.params = [self.emb,
                       self.Wf_i, self.Uf_i, self.bf_i,
                       self.Wf_f, self.Uf_f, self.bf_f,
                       self.Wf_c, self.Uf_c, self.bf_c,
                       self.Wf_o, self.Uf_o, self.bf_o,

                       self.Wb_i, self.Ub_i, self.bb_i,
                       self.Wb_f, self.Ub_f, self.bb_f,
                       self.Wb_c, self.Ub_c, self.bb_c,
                       self.Wb_o, self.Ub_o, self.bb_o]

        # forward lstm
        def recurrence_f(xf_t, cf_tm, hf_tm):
            xf_i = T.dot(self.emb[xf_t], self.Wf_i) + self.bf_i
            xf_f = T.dot(self.emb[xf_t], self.Wf_f) + self.bf_f
            xf_c = T.dot(self.emb[xf_t], self.Wf_c) + self.bf_c
            xf_o = T.dot(self.emb[xf_t], self.Wf_o) + self.bf_o

            if_t = inner_activation(xf_i + T.dot(hf_tm, self.Uf_i))
            ff_t = inner_activation(xf_f + T.dot(hf_tm, self.Uf_f))
            cf_t = ff_t * cf_tm + if_t * activation(xf_c + T.dot(hf_tm, self.Uf_c))  # internal memory
            of_t = inner_activation(xf_o + T.dot(hf_tm, self.Uf_o))
            hf_t = of_t * activation(cf_t)  # actual hidden state

            return cf_t, hf_t

        [_, self.h_f], _ = theano.scan(
            recurrence_f,
            sequences=input_f,
            outputs_info=[T.alloc(self.cf, input_f.shape[1], hidden_dim),
                          T.alloc(self.hf, input_f.shape[1], hidden_dim)]
        )

        # backward lstm
        def recurrence(xb_t, cb_tm, hb_tm):
            xb_i = T.dot(self.emb[xb_t], self.Wb_i) + self.bb_i
            xb_f = T.dot(self.emb[xb_t], self.Wb_f) + self.bb_f
            xb_c = T.dot(self.emb[xb_t], self.Wb_c) + self.bb_c
            xb_o = T.dot(self.emb[xb_t], self.Wb_o) + self.bb_o

            ib_t = inner_activation(xb_i + T.dot(hb_tm, self.Ub_i))
            fb_t = inner_activation(xb_f + T.dot(hb_tm, self.Ub_f))
            cb_t = fb_t * cb_tm + ib_t * activation(xb_c + T.dot(hb_tm, self.Ub_c))  # internal memory
            ob_t = inner_activation(xb_o + T.dot(hb_tm, self.Ub_o))
            hb_t = ob_t * activation(cb_t)  # actual hidden state

            return cb_t, hb_t

        [_, self.h_b], _ = theano.scan(
            recurrence,
            sequences=input_b,
            outputs_info=[T.alloc(self.cb, input_b.shape[1], hidden_dim),
                          T.alloc(self.hb, input_b.shape[1], hidden_dim)]
        )

        if merge_mode == 'sum':
            self.y = self.h_f[-1] + self.h_b[-1]
        elif merge_mode == 'multiply':
            self.y = self.h_f[-1] * self.h_b[-1]
        elif merge_mode == 'average':
            self.y = (self.h_f[-1] + self.h_b[-1]) / 2
        elif merge_mode == 'concat':
            self.y = T.concatenate([self.h_f[-1], self.h_b[-1]], axis=1)
        else:
            print('Supported "merge_mode" for forward + backward lstm are: "sum", "multiply", average" & "concat".')
            raise NotImplementedError
コード例 #12
0
    def __init__(self, input, vocab_size, emb_dim, hidden_dim, n_layers=2, init='uniform',
                 inner_init='orthonormal', inner_activation=T.nnet.hard_sigmoid,
                 activation=T.tanh, params=None):
        input = input.dimshuffle(1, 0)
        assert(n_layers == 2)  # can only stack one layer
        if params is None:
            self.emb = theano.shared(value=get(identifier=init, shape=(vocab_size, emb_dim), scale=np.sqrt(3)),
                                     name='emb', borrow=True)
            # *** Layer 1 ***
            # input gate
            self.W_i = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                     name='W_i', borrow=True)
            self.U_i = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                     name='U_i', borrow=True)
            self.b_i = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                     name='b_i', borrow=True)
            # forget gate
            self.W_f = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                     name='W_f', borrow=True)
            self.U_f = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                     name='U_f', borrow=True)
            self.b_f = theano.shared(value=get(identifier='one', shape=(hidden_dim, )),
                                     name='b_f', borrow=True)
            # memory
            self.W_c = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                     name='W_c', borrow=True)
            self.U_c = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                     name='U_c', borrow=True)
            self.b_c = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                     name='b_c', borrow=True)
            # output gate
            self.W_o = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                     name='W_o', borrow=True)
            self.U_o = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                     name='U_o', borrow=True)
            self.b_o = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                     name='b_o', borrow=True)

            # *** Layer 2 ***
            # input gate
            self.W_i_1 = theano.shared(value=get(identifier=init, shape=(hidden_dim, hidden_dim)),
                                       name='W_i_1', borrow=True)
            self.U_i_1 = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                       name='U_i_1', borrow=True)
            self.b_i_1 = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                       name='b_i_1', borrow=True)
            # forget gate
            self.W_f_1 = theano.shared(value=get(identifier=init, shape=(hidden_dim, hidden_dim)),
                                       name='W_f_1', borrow=True)
            self.U_f_1 = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                       name='U_f_1', borrow=True)
            self.b_f_1 = theano.shared(value=get(identifier='one', shape=(hidden_dim, )),
                                       name='b_f_1', borrow=True)
            # memory
            self.W_c_1 = theano.shared(value=get(identifier=init, shape=(hidden_dim, hidden_dim)),
                                       name='W_c_1', borrow=True)
            self.U_c_1 = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                       name='U_c_1', borrow=True)
            self.b_c_1 = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                       name='b_c_1', borrow=True)
            # output gate
            self.W_o_1 = theano.shared(value=get(identifier=init, shape=(hidden_dim, hidden_dim)),
                                       name='W_o_1', borrow=True)
            self.U_o_1 = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                       name='U_o_1', borrow=True)
            self.b_o_1 = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                       name='b_o_1', borrow=True)

        else:
            self.emb, self.W_i, self.U_i, self.b_i, self.W_f, self.U_f, self.b_f, \
                self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o, \
                self.W_i_1, self.U_i_1, self.b_i_1, self.W_f_1, self.U_f_1, self.b_f_1, \
                self.W_c_1, self.U_c_1, self.b_c_1, self.W_o_1, self.U_o_1, self.b_o_1 = params

        self.c0 = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='c0', borrow=True)
        self.h0 = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='h0', borrow=True)
        self.c1 = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='c1', borrow=True)
        self.h1 = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='h1', borrow=True)
        self.params = [self.emb,
                       self.W_i, self.U_i, self.b_i,
                       self.W_f, self.U_f, self.b_f,
                       self.W_c, self.U_c, self.b_c,
                       self.W_o, self.U_o, self.b_o,
                       self.W_i_1, self.U_i_1, self.b_i_1,
                       self.W_f_1, self.U_f_1, self.b_f_1,
                       self.W_c_1, self.U_c_1, self.b_c_1,
                       self.W_o_1, self.U_o_1, self.b_o_1]

        def recurrence(x_t, c_t1_prev, h_t1_prev, c_t2_prev, h_t2_prev):
            # Layer 1 computation
            x_i = T.dot(self.emb[x_t], self.W_i) + self.b_i
            x_f = T.dot(self.emb[x_t], self.W_f) + self.b_f
            x_c = T.dot(self.emb[x_t], self.W_c) + self.b_c
            x_o = T.dot(self.emb[x_t], self.W_o) + self.b_o

            i_t = inner_activation(x_i + T.dot(h_t1_prev, self.U_i))
            f_t = inner_activation(x_f + T.dot(h_t1_prev, self.U_f))
            c_t = f_t * c_t1_prev + i_t * activation(x_c + T.dot(h_t1_prev, self.U_c))  # internal memory
            o_t = inner_activation(x_o + T.dot(h_t1_prev, self.U_o))
            h_t = o_t * activation(c_t)  # actual hidden state

            # Layer 2 computation
            x_i_1 = T.dot(h_t, self.W_i_1) + self.b_i_1
            x_f_1 = T.dot(h_t, self.W_f_1) + self.b_f_1
            x_c_1 = T.dot(h_t, self.W_c_1) + self.b_c_1
            x_o_1 = T.dot(h_t, self.W_o_1) + self.b_o_1

            i_t_1 = inner_activation(x_i_1 + T.dot(h_t2_prev, self.U_i_1))
            f_t_1 = inner_activation(x_f_1 + T.dot(h_t2_prev, self.U_f_1))
            c_t_1 = f_t_1 * c_t2_prev + i_t_1 * activation(x_c_1 + T.dot(h_t2_prev, self.U_c_1))  # internal memory
            o_t_1 = inner_activation(x_o_1 + T.dot(h_t2_prev, self.U_o_1))
            h_t_1 = o_t_1 * activation(c_t_1)  # actual hidden state

            return c_t, h_t, c_t_1, h_t_1

        [_, h_1, _, h_2], _ = theano.scan(
            recurrence,
            sequences=input,
            outputs_info=[T.alloc(self.c0, input.shape[1], hidden_dim),
                          T.alloc(self.h0, input.shape[1], hidden_dim),
                          T.alloc(self.c1, input.shape[1], hidden_dim),
                          T.alloc(self.h1, input.shape[1], hidden_dim)]
        )

        # since every hidden layer is connected to output
        self.y = T.concatenate([h_1[-1], h_2[-1]], axis=1)
コード例 #13
0
ファイル: lstm.py プロジェクト: hariom-yadaw/neural-converse
    def __init__(self, input, emb_mat, emb_dim, hidden_dim, init='uniform', inner_init='orthonormal',
                 inner_activation=T.nnet.hard_sigmoid, activation=T.tanh,
                 params=None, merge_mode='sum'):
        if params is None:
            self.emb = theano.shared(value=np.asarray(emb_mat, dtype=theano.config.floatX),
                                     name='emb', borrow=True)
            # Forward LSTM
            # input gate
            self.Wf_i = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wf_i', borrow=True)
            self.Uf_i = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Uf_i', borrow=True)
            self.bf_i = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                      name='bf_i', borrow=True)
            # forget gate
            self.Wf_f = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wf_f', borrow=True)
            self.Uf_f = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Uf_f', borrow=True)
            self.bf_f = theano.shared(value=get(identifier='one', shape=(hidden_dim, )),
                                      name='bf_f', borrow=True)
            # memory
            self.Wf_c = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wf_c', borrow=True)
            self.Uf_c = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Uf_c', borrow=True)
            self.bf_c = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                      name='bf_c', borrow=True)
            # output gate
            self.Wf_o = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wf_o', borrow=True)
            self.Uf_o = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Uf_o', borrow=True)
            self.bf_o = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                      name='bf_o', borrow=True)

            # Backward LSTM
            # input gate
            self.Wb_i = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wb_i', borrow=True)
            self.Ub_i = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Ub_i', borrow=True)
            self.bb_i = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                      name='bb_i', borrow=True)
            # forget gate
            self.Wb_f = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wb_f', borrow=True)
            self.Ub_f = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Ub_f', borrow=True)
            self.bb_f = theano.shared(value=get(identifier='one', shape=(hidden_dim, )),
                                      name='bb_f', borrow=True)
            # memory
            self.Wb_c = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wb_c', borrow=True)
            self.Ub_c = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Ub_c', borrow=True)
            self.bb_c = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                      name='bb_c', borrow=True)
            # output gate
            self.Wb_o = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wb_o', borrow=True)
            self.Ub_o = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Ub_o', borrow=True)
            self.bb_o = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                      name='bb_o', borrow=True)

        else:
            self.emb, self.Wf_i, self.Uf_i, self.bf_i, self.Wf_f, self.Uf_f, self.bf_f, \
                self.Wf_c, self.Uf_c, self.bf_c, self.Wf_o, self.Uf_o, self.bf_o,\
                self.Wb_i, self.Ub_i, self.bb_i, self.Wb_f, self.Ub_f, self.bb_f, \
                self.Wb_c, self.Ub_c, self.bb_c, self.Wb_o, self.Ub_o, self.bb_o = params

        self.cf = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='cf', borrow=True)
        self.hf = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='hf', borrow=True)
        self.cb = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='cb', borrow=True)
        self.hb = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='hb', borrow=True)
        self.params = [self.emb,
                       self.Wf_i, self.Uf_i, self.bf_i,
                       self.Wf_f, self.Uf_f, self.bf_f,
                       self.Wf_c, self.Uf_c, self.bf_c,
                       self.Wf_o, self.Uf_o, self.bf_o,

                       self.Wb_i, self.Ub_i, self.bb_i,
                       self.Wb_f, self.Ub_f, self.bb_f,
                       self.Wb_c, self.Ub_c, self.bb_c,
                       self.Wb_o, self.Ub_o, self.bb_o]

        input_f = input.dimshuffle(1, 0)
        input_b = input[::-1].dimshuffle(1, 0)

        # forward lstm
        def recurrence_f(xf_t, cf_tm, hf_tm):
            xf_i = T.dot(self.emb[xf_t], self.Wf_i) + self.bf_i
            xf_f = T.dot(self.emb[xf_t], self.Wf_f) + self.bf_f
            xf_c = T.dot(self.emb[xf_t], self.Wf_c) + self.bf_c
            xf_o = T.dot(self.emb[xf_t], self.Wf_o) + self.bf_o

            if_t = inner_activation(xf_i + T.dot(hf_tm, self.Uf_i))
            ff_t = inner_activation(xf_f + T.dot(hf_tm, self.Uf_f))
            cf_t = ff_t * cf_tm + if_t * activation(xf_c + T.dot(hf_tm, self.Uf_c))  # internal memory
            of_t = inner_activation(xf_o + T.dot(hf_tm, self.Uf_o))
            hf_t = of_t * activation(cf_t)  # actual hidden state

            return cf_t, hf_t

        [_, h_f], _ = theano.scan(
            fn=recurrence_f,
            sequences=input_f,
            outputs_info=[T.alloc(self.cf, input_f.shape[1], hidden_dim),
                          T.alloc(self.hf, input_f.shape[1], hidden_dim)]
        )

        # backward lstm
        def recurrence_b(xb_t, cb_tm, hb_tm):
            xb_i = T.dot(self.emb[xb_t], self.Wb_i) + self.bb_i
            xb_f = T.dot(self.emb[xb_t], self.Wb_f) + self.bb_f
            xb_c = T.dot(self.emb[xb_t], self.Wb_c) + self.bb_c
            xb_o = T.dot(self.emb[xb_t], self.Wb_o) + self.bb_o

            ib_t = inner_activation(xb_i + T.dot(hb_tm, self.Ub_i))
            fb_t = inner_activation(xb_f + T.dot(hb_tm, self.Ub_f))
            cb_t = fb_t * cb_tm + ib_t * activation(xb_c + T.dot(hb_tm, self.Ub_c))  # internal memory
            ob_t = inner_activation(xb_o + T.dot(hb_tm, self.Ub_o))
            hb_t = ob_t * activation(cb_t)  # actual hidden state

            return cb_t, hb_t

        [_, h_b], _ = theano.scan(
            fn=recurrence_b,
            sequences=input_b,
            outputs_info=[T.alloc(self.cb, input_b.shape[1], hidden_dim),
                          T.alloc(self.hb, input_b.shape[1], hidden_dim)]
        )

        if merge_mode == 'sum':
            h = h_f[-1] + h_b[-1]
        elif merge_mode == 'multiply':
            h = h_f[-1] * h_b[-1]
        elif merge_mode == 'average':
            h = (h_f[-1] + h_b[-1]) / 2
        elif merge_mode == 'concat':
            h = T.concatenate([h_f, h_b])
        else:
            print('Supported "merge_mode" for forward + backward lstm are: "sum", "multiply", "average" & "concat".')
            raise NotImplementedError

        # 'hidden state + prediction' at last time-step need to be passed to the decoder;
        # prediction at last-time step will always be 'eos' therefore, ignored
        self.h = h
コード例 #14
0
ファイル: lstm.py プロジェクト: hariom-yadaw/neural-converse
    def __init__(self, input, emb_mat, emb_dim, hidden_dim, init='uniform', inner_init='orthonormal',
                 inner_activation=T.nnet.hard_sigmoid, activation=T.tanh,
                 params=None):
        input = input.dimshuffle(1, 0)
        if params is None:
            self.emb = theano.shared(value=np.asarray(emb_mat, dtype=theano.config.floatX),
                                     name='emb', borrow=True)
            # input gate
            self.W_i = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                     name='W_i', borrow=True)
            self.U_i = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                     name='U_i', borrow=True)
            self.b_i = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                     name='b_i', borrow=True)
            # forget gate
            self.W_f = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                     name='W_f', borrow=True)
            self.U_f = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                     name='U_f', borrow=True)
            self.b_f = theano.shared(value=get(identifier='one', shape=(hidden_dim, )),
                                     name='b_f', borrow=True)
            # memory
            self.W_c = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                     name='W_c', borrow=True)
            self.U_c = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                     name='U_c', borrow=True)
            self.b_c = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                     name='b_c', borrow=True)
            # output gate
            self.W_o = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                     name='W_o', borrow=True)
            self.U_o = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                     name='U_o', borrow=True)
            self.b_o = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                     name='b_o', borrow=True)

        else:
            self.emb, self.W_i, self.U_i, self.b_i, self.W_f, self.U_f, self.b_f, \
                self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o = params

        self.c0 = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='c0', borrow=True)
        self.h0 = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='h0', borrow=True)
        self.params = [self.emb,
                       self.W_i, self.U_i, self.b_i,
                       self.W_f, self.U_f, self.b_f,
                       self.W_c, self.U_c, self.b_c,
                       self.W_o, self.U_o, self.b_o]

        def recurrence(x_t, c_tm_prev, h_tm_prev):
            x_i = T.dot(self.emb[x_t], self.W_i) + self.b_i
            x_f = T.dot(self.emb[x_t], self.W_f) + self.b_f
            x_c = T.dot(self.emb[x_t], self.W_c) + self.b_c
            x_o = T.dot(self.emb[x_t], self.W_o) + self.b_o

            i_t = inner_activation(x_i + T.dot(h_tm_prev, self.U_i))
            f_t = inner_activation(x_f + T.dot(h_tm_prev, self.U_f))
            c_t = f_t * c_tm_prev + i_t * activation(x_c + T.dot(h_tm_prev, self.U_c))  # internal memory
            o_t = inner_activation(x_o + T.dot(h_tm_prev, self.U_o))
            h_t = o_t * activation(c_t)  # actual hidden state

            return c_t, h_t

        [_, h], _ = theano.scan(
            fn=recurrence,
            sequences=input,
            outputs_info=[T.alloc(self.c0, input.shape[1], hidden_dim),
                          T.alloc(self.h0, input.shape[1], hidden_dim)]
        )

        # 'hidden state + prediction' at last time-step need to be passed to the decoder;
        # prediction at last-time step will always be 'eos' therefore, ignored
        self.h = h[-1]
コード例 #15
0
ファイル: gru.py プロジェクト: chentao1999/theano-recurrence
    def __init__(self, input, input_dim, hidden_dim, output_dim, init='uniform',
                 inner_init='orthonormal', inner_activation=T.nnet.hard_sigmoid,
                 activation=T.tanh, params=None):
        self.input = input
        self.hidden_dim = hidden_dim
        self.activation = activation
        self.inner_activation = inner_activation
        if params is None:
            # update gate
            self.W_z = theano.shared(
                value=get(identifier=init, shape=(input_dim, hidden_dim)),
                name='W_z',
                borrow=True)
            self.U_z = theano.shared(
                value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                name='U_z',
                borrow=True)
            self.b_z = theano.shared(
                value=get(identifier='zero', shape=(hidden_dim,)),
                name='b_z',
                borrow=True)
            # reset gate
            self.W_r = theano.shared(value=get(identifier=init, shape=(input_dim, hidden_dim)),
                                     name='W_r',
                                     borrow=True)
            self.U_r = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                     name='U_r',
                                     borrow=True)
            self.b_r = theano.shared(value=get(identifier='zero', shape=(hidden_dim,)),
                                     name='b_r',
                                     borrow=True)
            # weights pertaining to input, hidden & output neurons (externally)
            self.W = theano.shared(value=get(identifier=init, shape=(input_dim, hidden_dim)),
                                   name='W',
                                   borrow=True)
            self.U = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                   name='U',
                                   borrow=True)
            self.V = theano.shared(value=get(identifier=init, shape=(hidden_dim, output_dim)),
                                   name='V',
                                   borrow=True)
            self.b_h = theano.shared(value=get(identifier='zero', shape=(hidden_dim,)),
                                     name='b_h',
                                     borrow=True)
            self.b_y = theano.shared(value=get(identifier='zero', shape=(output_dim,)),
                                     name='b_y',
                                     borrow=True)
        else:
            self.W_z, self.U_z, self.b_z, self.W_r, self.U_r, self.b_r, \
                self.W, self.U, self.V, self.b_h, self.b_y = params

        self.h0 = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='h0', borrow=True)
        self.params = [self.W_z, self.U_z, self.b_z,
                       self.W_r, self.U_r, self.b_r,
                       self.W, self.U, self.V,
                       self.b_h, self.b_y]

        def recurrence(x_t, h_tm_prev):
            x_z = T.dot(x_t, self.W_z) + self.b_z
            x_r = T.dot(x_t, self.W_r) + self.b_r
            x_h = T.dot(x_t, self.W) + self.b_h

            z_t = inner_activation(x_z + T.dot(h_tm_prev, self.U_z))
            r_t = inner_activation(x_r + T.dot(h_tm_prev, self.U_r))
            hh_t = activation(x_h + T.dot(r_t * h_tm_prev, self.U))
            h_t = (T.ones_like(z_t) - z_t) * hh_t + z_t * h_tm_prev

            y_t = T.nnet.softmax(T.dot(h_t, self.V) + self.b_y)

            return h_t, y_t[0]

        [self.h_t, self.y_t], _ = theano.scan(
            recurrence,
            sequences=self.input,
            outputs_info=[self.h0, None]
        )

        self.y = T.argmax(self.y_t, axis=1)
コード例 #16
0
    def __init__(self,
                 input,
                 vocab_size,
                 emb_dim,
                 hidden_dim,
                 init='uniform',
                 inner_init='orthonormal',
                 inner_activation=T.nnet.hard_sigmoid,
                 activation=T.tanh,
                 params=None,
                 merge_mode='concat'):
        input_f = input.dimshuffle(1, 0)
        input_b = input[::-1].dimshuffle(1, 0)
        if params is None:
            self.emb = theano.shared(value=get(identifier=init,
                                               shape=(vocab_size, emb_dim),
                                               scale=np.sqrt(3)),
                                     name='emb',
                                     borrow=True)
            # Forward GRU
            # update gate
            self.Wf_z = theano.shared(value=get(identifier=init,
                                                shape=(emb_dim, hidden_dim)),
                                      name='Wf_z',
                                      borrow=True)
            self.Uf_z = theano.shared(value=get(identifier=inner_init,
                                                shape=(hidden_dim,
                                                       hidden_dim)),
                                      name='Uf_z',
                                      borrow=True)
            self.bf_z = theano.shared(value=get(identifier='zero',
                                                shape=(hidden_dim, )),
                                      name='bf_z',
                                      borrow=True)
            # reset gate
            self.Wf_r = theano.shared(value=get(identifier=init,
                                                shape=(emb_dim, hidden_dim)),
                                      name='Wf_r',
                                      borrow=True)
            self.Uf_r = theano.shared(value=get(identifier=inner_init,
                                                shape=(hidden_dim,
                                                       hidden_dim)),
                                      name='Uf_r',
                                      borrow=True)
            self.bf_r = theano.shared(value=get(identifier='zero',
                                                shape=(hidden_dim, )),
                                      name='bf_r',
                                      borrow=True)
            # hidden state
            self.Wf_h = theano.shared(value=get(identifier=init,
                                                shape=(emb_dim, hidden_dim)),
                                      name='Wf_h',
                                      borrow=True)
            self.Uf_h = theano.shared(value=get(identifier=inner_init,
                                                shape=(hidden_dim,
                                                       hidden_dim)),
                                      name='Uf_h',
                                      borrow=True)
            self.bf_h = theano.shared(value=get(identifier='zero',
                                                shape=(hidden_dim, )),
                                      name='bf_h',
                                      borrow=True)

            # Backward GRU
            # update gate
            self.Wb_z = theano.shared(value=get(identifier=init,
                                                shape=(emb_dim, hidden_dim)),
                                      name='Wb_z',
                                      borrow=True)
            self.Ub_z = theano.shared(value=get(identifier=inner_init,
                                                shape=(hidden_dim,
                                                       hidden_dim)),
                                      name='Ub_z',
                                      borrow=True)
            self.bb_z = theano.shared(value=get(identifier='zero',
                                                shape=(hidden_dim, )),
                                      name='bb_z',
                                      borrow=True)
            # reset gate
            self.Wb_r = theano.shared(value=get(identifier=init,
                                                shape=(emb_dim, hidden_dim)),
                                      name='Wb_r',
                                      borrow=True)
            self.Ub_r = theano.shared(value=get(identifier=inner_init,
                                                shape=(hidden_dim,
                                                       hidden_dim)),
                                      name='Ub_r',
                                      borrow=True)
            self.bb_r = theano.shared(value=get(identifier='zero',
                                                shape=(hidden_dim, )),
                                      name='bb_r',
                                      borrow=True)
            # hidden state
            self.Wb_h = theano.shared(value=get(identifier=init,
                                                shape=(emb_dim, hidden_dim)),
                                      name='Wb_h',
                                      borrow=True)
            self.Ub_h = theano.shared(value=get(identifier=inner_init,
                                                shape=(hidden_dim,
                                                       hidden_dim)),
                                      name='Ub_h',
                                      borrow=True)
            self.bb_h = theano.shared(value=get(identifier='zero',
                                                shape=(hidden_dim, )),
                                      name='bb_h',
                                      borrow=True)

        else:
            self.emb, self.Wf_z, self.Uf_z, self.bf_z, self.Wf_r, self.Uf_r, self.bf_r, self.Wf_h, self.Uf_h, \
                self.bf_h, self.Wb_z, self.Ub_z, self.bb_z, self.Wb_r, self.Ub_r, self.bb_r, self.Wb_h, \
                self.Ub_h, self.bb_h = params

        self.hf = theano.shared(value=get(identifier='zero',
                                          shape=(hidden_dim, )),
                                name='hf',
                                borrow=True)
        self.hb = theano.shared(value=get(identifier='zero',
                                          shape=(hidden_dim, )),
                                name='hb',
                                borrow=True)
        self.params = [
            self.emb, self.Wf_z, self.Uf_z, self.bf_z, self.Wf_r, self.Uf_r,
            self.bf_r, self.Wf_h, self.Uf_h, self.bf_h, self.Wb_z, self.Ub_z,
            self.bb_z, self.Wb_r, self.Ub_r, self.bb_r, self.Wb_h, self.Ub_h,
            self.bb_h
        ]

        # forward gru
        def recurrence_f(xf_t, hf_tm):
            xf_z = T.dot(self.emb[xf_t], self.Wf_z) + self.bf_z
            xf_r = T.dot(self.emb[xf_t], self.Wf_r) + self.bf_r
            xf_h = T.dot(self.emb[xf_t], self.Wf_h) + self.bf_h

            zf_t = inner_activation(xf_z + T.dot(hf_tm, self.Uf_z))
            rf_t = inner_activation(xf_r + T.dot(hf_tm, self.Uf_r))
            hhf_t = activation(xf_h + T.dot(rf_t * hf_tm, self.Uf_h))
            hf_t = (T.ones_like(zf_t) - zf_t) * hhf_t + zf_t * hf_tm

            return hf_t

        self.h_f, _ = theano.scan(recurrence_f,
                                  sequences=input_f,
                                  outputs_info=T.alloc(self.hf,
                                                       input_f.shape[1],
                                                       hidden_dim))

        # backward gru
        def recurrence_b(xb_t, hb_tm):
            xb_z = T.dot(self.emb[xb_t], self.Wb_z) + self.bb_z
            xb_r = T.dot(self.emb[xb_t], self.Wb_r) + self.bb_r
            xb_h = T.dot(self.emb[xb_t], self.Wb_h) + self.bb_h

            zb_t = inner_activation(xb_z + T.dot(hb_tm, self.Ub_z))
            rb_t = inner_activation(xb_r + T.dot(hb_tm, self.Ub_r))
            hhb_t = activation(xb_h + T.dot(rb_t * hb_tm, self.Ub_h))
            hb_t = (T.ones_like(zb_t) - zb_t) * hhb_t + zb_t * hb_tm

            return hb_t

        self.h_b, _ = theano.scan(recurrence_b,
                                  sequences=input_b,
                                  outputs_info=T.alloc(self.hb,
                                                       input_b.shape[1],
                                                       hidden_dim))

        if merge_mode == 'sum':
            self.y = self.h_f[-1] + self.h_b[-1]
        elif merge_mode == 'multiply':
            self.y = self.h_f[-1] * self.h_b[-1]
        elif merge_mode == 'average':
            self.y = (self.h_f[-1] + self.h_b[-1]) / 2
        elif merge_mode == 'concat':
            self.y = T.concatenate([self.h_f[-1], self.h_b[-1]], axis=1)
        else:
            print(
                'Supported "merge_mode" for forward + backward gru are: "sum", "multiply", average" & "concat".'
            )
            raise NotImplementedError
コード例 #17
0
ファイル: gru.py プロジェクト: hariom-yadaw/neural-converse
    def __init__(self, input, emb_mat, emb_dim, hidden_dim, init='uniform', inner_init='orthonormal',
                 inner_activation=T.nnet.hard_sigmoid, activation=T.tanh,
                 params=None, merge_mode='sum'):
        if params is None:
            self.emb = theano.shared(value=np.asarray(emb_mat, dtype=theano.config.floatX),
                                     name='emb', borrow=True)
            # Forward GRU
            # update gate
            self.Wf_z = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wf_z', borrow=True)
            self.Uf_z = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Uf_z', borrow=True)
            self.bf_z = theano.shared(value=get(identifier='zero', shape=(hidden_dim,)),
                                      name='bf_z', borrow=True)
            # reset gate
            self.Wf_r = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wf_r', borrow=True)
            self.Uf_r = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Uf_r', borrow=True)
            self.bf_r = theano.shared(value=get(identifier='zero', shape=(hidden_dim,)),
                                      name='bf_r', borrow=True)
            # hidden state
            self.Wf_h = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wf_h', borrow=True)
            self.Uf_h = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Uf_h', borrow=True)
            self.bf_h = theano.shared(value=get(identifier='zero', shape=(hidden_dim,)),
                                      name='bf_h', borrow=True)

            # Backward GRU
            # update gate
            self.Wb_z = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wb_z', borrow=True)
            self.Ub_z = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Ub_z', borrow=True)
            self.bb_z = theano.shared(value=get(identifier='zero', shape=(hidden_dim,)),
                                      name='bb_z', borrow=True)
            # reset gate
            self.Wb_r = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wb_r', borrow=True)
            self.Ub_r = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Ub_r', borrow=True)
            self.bb_r = theano.shared(value=get(identifier='zero', shape=(hidden_dim,)),
                                      name='bb_r', borrow=True)
            # hidden state
            self.Wb_h = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                      name='Wb_h', borrow=True)
            self.Ub_h = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                      name='Ub_h', borrow=True)
            self.bb_h = theano.shared(value=get(identifier='zero', shape=(hidden_dim,)),
                                      name='bb_h', borrow=True)

        else:
            self.emb, self.Wf_z, self.Uf_z, self.bf_z, self.Wf_r, self.Uf_r, self.bf_r, \
                self.Wf_h, self.Uf_h, self.bf_h, self.Wb_z, self.Ub_z, self.bb_z, self.Wb_r, \
                self.Ub_r, self.bb_r, self.Wb_h, self.Ub_h, self.bb_h = params

        self.hf = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='hf', borrow=True)
        self.hb = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='hb', borrow=True)
        self.params = [self.emb,
                       self.Wf_z, self.Uf_z, self.bf_z,
                       self.Wf_r, self.Uf_r, self.bf_r,
                       self.Wf_h, self.Uf_h, self.bf_h,

                       self.Wb_z, self.Ub_z, self.bb_z,
                       self.Wb_r, self.Ub_r, self.bb_r,
                       self.Wb_h, self.Ub_h, self.bb_h]

        input_f = input.dimshuffle(1, 0)
        input_b = input[::-1].dimshuffle(1, 0)

        # forward gru
        def recurrence_f(xf_t, hf_tm):
            xf_z = T.dot(self.emb[xf_t], self.Wf_z) + self.bf_z
            xf_r = T.dot(self.emb[xf_t], self.Wf_r) + self.bf_r
            xf_h = T.dot(self.emb[xf_t], self.Wf_h) + self.bf_h

            zf_t = inner_activation(xf_z + T.dot(hf_tm, self.Uf_z))
            rf_t = inner_activation(xf_r + T.dot(hf_tm, self.Uf_r))
            hhf_t = activation(xf_h + T.dot(rf_t * hf_tm, self.Uf_h))
            hf_t = (T.ones_like(zf_t) - zf_t) * hhf_t + zf_t * hf_tm

            return hf_t

        h_f, _ = theano.scan(
            fn=recurrence_f,
            sequences=input_f,
            outputs_info=T.alloc(self.hf, input_f.shape[1], hidden_dim)
        )

        # backward gru
        def recurrence_b(xb_t, hb_tm):
            xb_z = T.dot(self.emb[xb_t], self.Wb_z) + self.bb_z
            xb_r = T.dot(self.emb[xb_t], self.Wb_r) + self.bb_r
            xb_h = T.dot(self.emb[xb_t], self.Wb_h) + self.bb_h

            zb_t = inner_activation(xb_z + T.dot(hb_tm, self.Ub_z))
            rb_t = inner_activation(xb_r + T.dot(hb_tm, self.Ub_r))
            hhb_t = activation(xb_h + T.dot(rb_t * hb_tm, self.Ub_h))
            hb_t = (T.ones_like(zb_t) - zb_t) * hhb_t + zb_t * hb_tm

            return hb_t

        h_b, _ = theano.scan(
            fn=recurrence_b,
            sequences=input_b,
            outputs_info=T.alloc(self.hb, input_b.shape[1], hidden_dim)
        )

        if merge_mode == 'sum':
            h = h_f[-1] + h_b[-1]
        elif merge_mode == 'multiply':
            h = h_f[-1] * h_b[-1]
        elif merge_mode == 'average':
            h = (h_f[-1] + h_b[-1]) / 2
        elif merge_mode == 'concat':
            h = T.concatenate([h_f, h_b])
        else:
            print('Supported "merge_mode" for forward + backward gru are: "sum", "multiply", "average" & "concat".')
            raise NotImplementedError

        # 'hidden state + prediction' at last time-step need to be passed to the decoder;
        # prediction at last-time step will always be 'eos' therefore, ignored
        self.h = h
コード例 #18
0
    def __init__(self,
                 input,
                 vocab_size,
                 emb_dim,
                 hidden_dim,
                 n_layers=2,
                 init='uniform',
                 inner_init='orthonormal',
                 inner_activation=T.nnet.hard_sigmoid,
                 activation=T.tanh,
                 params=None):
        input = input.dimshuffle(1, 0)
        assert (n_layers == 2)  # can only stack one layer
        if params is None:
            self.emb = theano.shared(value=get(identifier=init,
                                               shape=(vocab_size, emb_dim),
                                               scale=np.sqrt(3)),
                                     name='emb',
                                     borrow=True)
            # Layer 1
            # update gate
            self.W_z = theano.shared(value=get(identifier=init,
                                               shape=(emb_dim, hidden_dim)),
                                     name='W_z',
                                     borrow=True)
            self.U_z = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_z',
                                     borrow=True)
            self.b_z = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_z',
                                     borrow=True)
            # reset gate
            self.W_r = theano.shared(value=get(identifier=init,
                                               shape=(emb_dim, hidden_dim)),
                                     name='W_r',
                                     borrow=True)
            self.U_r = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_r',
                                     borrow=True)
            self.b_r = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_r',
                                     borrow=True)
            # hidden state
            self.W_h = theano.shared(value=get(identifier=init,
                                               shape=(emb_dim, hidden_dim)),
                                     name='W_h',
                                     borrow=True)
            self.U_h = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_h',
                                     borrow=True)
            self.b_h = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_h',
                                     borrow=True)
            # Layer 2
            # update gate
            self.W_z_1 = theano.shared(value=get(identifier=init,
                                                 shape=(hidden_dim,
                                                        hidden_dim)),
                                       name='W_z_1',
                                       borrow=True)
            self.U_z_1 = theano.shared(value=get(identifier=inner_init,
                                                 shape=(hidden_dim,
                                                        hidden_dim)),
                                       name='U_z_1',
                                       borrow=True)
            self.b_z_1 = theano.shared(value=get(identifier='zero',
                                                 shape=(hidden_dim, )),
                                       name='b_z_1',
                                       borrow=True)
            # reset gate
            self.W_r_1 = theano.shared(value=get(identifier=init,
                                                 shape=(hidden_dim,
                                                        hidden_dim)),
                                       name='W_r_1',
                                       borrow=True)
            self.U_r_1 = theano.shared(value=get(identifier=inner_init,
                                                 shape=(hidden_dim,
                                                        hidden_dim)),
                                       name='U_r_1',
                                       borrow=True)
            self.b_r_1 = theano.shared(value=get(identifier='zero',
                                                 shape=(hidden_dim, )),
                                       name='b_r_1',
                                       borrow=True)
            # hidden state
            self.W_h_1 = theano.shared(value=get(identifier=init,
                                                 shape=(hidden_dim,
                                                        hidden_dim)),
                                       name='W_h_1',
                                       borrow=True)
            self.U_h_1 = theano.shared(value=get(identifier=inner_init,
                                                 shape=(hidden_dim,
                                                        hidden_dim)),
                                       name='U_h_1',
                                       borrow=True)
            self.b_h_1 = theano.shared(value=get(identifier='zero',
                                                 shape=(hidden_dim, )),
                                       name='b_h_1',
                                       borrow=True)
            # Skip-connections from input to layer 2
            self.s_z = theano.shared(value=get(identifier=init,
                                               shape=(emb_dim, hidden_dim)),
                                     name='s_z',
                                     borrow=True)
            self.s_r = theano.shared(value=get(identifier=init,
                                               shape=(emb_dim, hidden_dim)),
                                     name='s_r',
                                     borrow=True)
            self.s_h = theano.shared(value=get(identifier=init,
                                               shape=(emb_dim, hidden_dim)),
                                     name='s_h',
                                     borrow=True)
        else:
            self.emb, self.W_z, self.U_z, self.b_z, self.W_r, self.U_r, self.b_r, self.W_h, self.U_h, \
                self.b_h, self.W_z_1, self.U_z_1, self.b_z_1, self.W_r_1, self.U_r_1, self.b_r_1, \
                self.W_h_1, self.U_h_1, self.b_h_1, self.s_z, self.s_r, self.s_h = params

        self.h0 = theano.shared(value=get(identifier='zero',
                                          shape=(hidden_dim, )),
                                name='h0',
                                borrow=True)
        self.h1 = theano.shared(value=get(identifier='zero',
                                          shape=(hidden_dim, )),
                                name='h1',
                                borrow=True)
        self.params = [
            self.emb, self.W_z, self.U_z, self.b_z, self.W_r, self.U_r,
            self.b_r, self.W_h, self.U_h, self.b_h, self.W_z_1, self.U_z_1,
            self.b_z_1, self.W_r_1, self.U_r_1, self.b_r_1, self.W_h_1,
            self.U_h_1, self.b_h_1, self.s_z, self.s_r, self.s_h
        ]

        def recurrence(x_t, h_t1_prev, h_t2_prev):
            # Layer 1
            x_z_1 = T.dot(self.emb[x_t], self.W_z) + self.b_z
            x_r_1 = T.dot(self.emb[x_t], self.W_r) + self.b_r
            x_h_1 = T.dot(self.emb[x_t], self.W_h) + self.b_h

            z_t_1 = inner_activation(x_z_1 + T.dot(h_t1_prev, self.U_z))
            r_t_1 = inner_activation(x_r_1 + T.dot(h_t1_prev, self.U_r))
            hh_t_1 = activation(x_h_1 + T.dot(r_t_1 * h_t1_prev, self.U_h))
            h_t_1 = (T.ones_like(z_t_1) - z_t_1) * hh_t_1 + z_t_1 * h_t1_prev

            # Layer 2
            # 's_*' represents skip connections from previous layer
            x_z_2 = T.dot(h_t_1, self.W_z_1) + T.dot(self.emb[x_t],
                                                     self.s_z) + self.b_z_1
            x_r_2 = T.dot(h_t_1, self.W_r_1) + T.dot(self.emb[x_t],
                                                     self.s_r) + self.b_r_1
            x_h_2 = T.dot(h_t_1, self.W_h_1) + T.dot(self.emb[x_t],
                                                     self.s_h) + self.b_h_1

            z_t_2 = inner_activation(x_z_2 + T.dot(h_t2_prev, self.U_z_1))
            r_t_2 = inner_activation(x_r_2 + T.dot(h_t2_prev, self.U_r_1))
            hh_t_2 = activation(x_h_2 + T.dot(r_t_2 * h_t2_prev, self.U_h_1))
            h_t_2 = (T.ones_like(z_t_2) - z_t_2) * hh_t_2 + z_t_2 * h_t2_prev

            return h_t_1, h_t_2

        [h_1, h_2], _ = theano.scan(recurrence,
                                    sequences=input,
                                    outputs_info=[
                                        T.alloc(self.h0, input.shape[1],
                                                hidden_dim),
                                        T.alloc(self.h1, input.shape[1],
                                                hidden_dim)
                                    ])

        # since every hidden layer is connected to output
        self.y = T.concatenate([h_1[-1], h_2[-1]], axis=1)
コード例 #19
0
ファイル: lstm.py プロジェクト: uyaseen/theano-recurrence
    def __init__(self, input, input_dim, hidden_dim, output_dim, init='uniform',
                 inner_init='orthonormal', inner_activation=T.nnet.hard_sigmoid,
                 activation=T.tanh, mini_batch=False, params=None):
        self.inner_activation = inner_activation
        self.activation = activation
        self.mini_batch = mini_batch
        if mini_batch:
            input = input.dimshuffle(1, 0, 2)
        if params is None:
            # input gate
            self.W_i = theano.shared(value=get(identifier=init, shape=(input_dim, hidden_dim)),
                                     name='W_i',
                                     borrow=True)
            self.U_i = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                     name='U_i',
                                     borrow=True)
            self.b_i = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                     name='b_i',
                                     borrow=True)
            # forget gate
            self.W_f = theano.shared(value=get(identifier=init, shape=(input_dim, hidden_dim)),
                                     name='W_f',
                                     borrow=True)
            self.U_f = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                     name='U_f',
                                     borrow=True)
            self.b_f = theano.shared(value=get(identifier='one', shape=(hidden_dim, )),
                                     name='b_f',
                                     borrow=True)
            # memory
            self.W_c = theano.shared(value=get(identifier=init, shape=(input_dim, hidden_dim)),
                                     name='W_c',
                                     borrow=True)
            self.U_c = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                     name='U_c',
                                     borrow=True)
            self.b_c = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                     name='b_c',
                                     borrow=True)
            # output gate
            self.W_o = theano.shared(value=get(identifier=init, shape=(input_dim, hidden_dim)),
                                     name='W_o',
                                     borrow=True)
            self.U_o = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                     name='U_o',
                                     borrow=True)
            self.b_o = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )),
                                     name='b_o',
                                     borrow=True)
            # weights pertaining to output neuron
            self.V = theano.shared(value=get(identifier=init, shape=(hidden_dim, output_dim)),
                                   name='V',
                                   borrow=True)
            self.b_y = theano.shared(value=get(identifier='zero', shape=(output_dim,)),
                                     name='b_y',
                                     borrow=True)

        else:
            self.W_i, self.U_i, self.b_i, self.W_f, self.U_f, self.b_f, \
                self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o, self.V, self.b_y = params

        self.c0 = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='c0', borrow=True)
        self.h0 = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='h0', borrow=True)
        self.params = [self.W_i, self.U_i, self.b_i,
                       self.W_f, self.U_f, self.b_f,
                       self.W_c, self.U_c, self.b_c,
                       self.W_o, self.U_o, self.b_o,
                       self.V, self.b_y]

        if mini_batch:
            def recurrence(x_t, c_tm_prev, h_tm_prev):
                x_i = T.dot(x_t, self.W_i) + self.b_i
                x_f = T.dot(x_t, self.W_f) + self.b_f
                x_c = T.dot(x_t, self.W_c) + self.b_c
                x_o = T.dot(x_t, self.W_o) + self.b_o

                i_t = inner_activation(x_i + T.dot(h_tm_prev, self.U_i))
                f_t = inner_activation(x_f + T.dot(h_tm_prev, self.U_f))
                c_t = f_t * c_tm_prev + i_t * activation(x_c + T.dot(h_tm_prev, self.U_c))  # internal memory
                o_t = inner_activation(x_o + T.dot(h_tm_prev, self.U_o))
                h_t = o_t * activation(c_t)  # actual hidden state

                y_t = T.nnet.softmax(T.dot(h_t, self.V) + self.b_y)

                return c_t, h_t, y_t

            [_, self.h_t, self.y_t], _ = theano.scan(
                recurrence,
                sequences=input,
                outputs_info=[T.alloc(self.c0, input.shape[1], hidden_dim),
                              T.alloc(self.h0, input.shape[1], hidden_dim),
                              None]
            )
            self.h_t = self.h_t.dimshuffle(1, 0, 2)
            self.y_t = self.y_t.dimshuffle(1, 0, 2)
            self.y = T.argmax(self.y_t, axis=2)
        else:
            def recurrence(x_t, c_tm_prev, h_tm_prev):
                x_i = T.dot(x_t, self.W_i) + self.b_i
                x_f = T.dot(x_t, self.W_f) + self.b_f
                x_c = T.dot(x_t, self.W_c) + self.b_c
                x_o = T.dot(x_t, self.W_o) + self.b_o

                i_t = inner_activation(x_i + T.dot(h_tm_prev, self.U_i))
                f_t = inner_activation(x_f + T.dot(h_tm_prev, self.U_f))
                c_t = f_t * c_tm_prev + i_t * activation(x_c + T.dot(h_tm_prev, self.U_c))  # internal memory
                o_t = inner_activation(x_o + T.dot(h_tm_prev, self.U_o))
                h_t = o_t * activation(c_t)  # actual hidden state

                y_t = T.nnet.softmax(T.dot(h_t, self.V) + self.b_y)

                return c_t, h_t, y_t[0]

            [_, self.h_t, self.y_t], _ = theano.scan(
                recurrence,
                sequences=input,
                outputs_info=[self.c0, self.h0, None]
            )
            self.y = T.argmax(self.y_t, axis=1)
コード例 #20
0
    def __init__(self,
                 input,
                 input_dim,
                 hidden_dim,
                 output_dim,
                 init='uniform',
                 inner_init='orthonormal',
                 inner_activation=T.nnet.hard_sigmoid,
                 activation=T.tanh,
                 mini_batch=False,
                 params=None):
        self.activation = activation
        self.inner_activation = inner_activation
        self.mini_batch = mini_batch
        if mini_batch:
            input = input.dimshuffle(1, 0, 2)
        if params is None:
            # update gate
            self.W_z = theano.shared(value=get(identifier=init,
                                               shape=(input_dim, hidden_dim)),
                                     name='W_z',
                                     borrow=True)
            self.U_z = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_z',
                                     borrow=True)
            self.b_z = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_z',
                                     borrow=True)
            # reset gate
            self.W_r = theano.shared(value=get(identifier=init,
                                               shape=(input_dim, hidden_dim)),
                                     name='W_r',
                                     borrow=True)
            self.U_r = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_r',
                                     borrow=True)
            self.b_r = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_r',
                                     borrow=True)
            # weights pertaining to input, hidden & output neurons (externally)
            self.W = theano.shared(value=get(identifier=init,
                                             shape=(input_dim, hidden_dim)),
                                   name='W',
                                   borrow=True)
            self.U = theano.shared(value=get(identifier=inner_init,
                                             shape=(hidden_dim, hidden_dim)),
                                   name='U',
                                   borrow=True)
            self.V = theano.shared(value=get(identifier=init,
                                             shape=(hidden_dim, output_dim)),
                                   name='V',
                                   borrow=True)
            self.b_h = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_h',
                                     borrow=True)
            self.b_y = theano.shared(value=get(identifier='zero',
                                               shape=(output_dim, )),
                                     name='b_y',
                                     borrow=True)
        else:
            self.W_z, self.U_z, self.b_z, self.W_r, self.U_r, self.b_r, \
                self.W, self.U, self.V, self.b_h, self.b_y = params

        self.h0 = theano.shared(value=get(identifier='zero',
                                          shape=(hidden_dim, )),
                                name='h0',
                                borrow=True)
        self.params = [
            self.W_z, self.U_z, self.b_z, self.W_r, self.U_r, self.b_r, self.W,
            self.U, self.V, self.b_h, self.b_y
        ]

        if mini_batch:

            def recurrence(x_t, h_tm_prev):
                x_z = T.dot(x_t, self.W_z) + self.b_z
                x_r = T.dot(x_t, self.W_r) + self.b_r
                x_h = T.dot(x_t, self.W) + self.b_h

                z_t = inner_activation(x_z + T.dot(h_tm_prev, self.U_z))
                r_t = inner_activation(x_r + T.dot(h_tm_prev, self.U_r))
                hh_t = activation(x_h + T.dot(r_t * h_tm_prev, self.U))
                h_t = (T.ones_like(z_t) - z_t) * hh_t + z_t * h_tm_prev

                y_t = T.nnet.softmax(T.dot(h_t, self.V) + self.b_y)

                return h_t, y_t

            [self.h_t,
             self.y_t], _ = theano.scan(recurrence,
                                        sequences=input,
                                        outputs_info=[
                                            T.alloc(self.h0, input.shape[1],
                                                    hidden_dim), None
                                        ])
            self.h_t = self.h_t.dimshuffle(1, 0, 2)
            self.y_t = self.y_t.dimshuffle(1, 0, 2)
            self.y = T.argmax(self.y_t, axis=2)
        else:

            def recurrence(x_t, h_tm_prev):
                x_z = T.dot(x_t, self.W_z) + self.b_z
                x_r = T.dot(x_t, self.W_r) + self.b_r
                x_h = T.dot(x_t, self.W) + self.b_h

                z_t = inner_activation(x_z + T.dot(h_tm_prev, self.U_z))
                r_t = inner_activation(x_r + T.dot(h_tm_prev, self.U_r))
                hh_t = activation(x_h + T.dot(r_t * h_tm_prev, self.U))
                h_t = (T.ones_like(z_t) - z_t) * hh_t + z_t * h_tm_prev

                y_t = T.nnet.softmax(T.dot(h_t, self.V) + self.b_y)

                return h_t, y_t[0]

            [self.h_t, self.y_t], _ = theano.scan(recurrence,
                                                  sequences=input,
                                                  outputs_info=[self.h0, None])
            self.y = T.argmax(self.y_t, axis=1)
コード例 #21
0
    def __init__(self,
                 enc_h,
                 mask,
                 emb_mat,
                 vocab_size,
                 emb_dim,
                 hidden_dim,
                 eos_token,
                 batch_size,
                 max_len,
                 init='uniform',
                 inner_init='orthonormal',
                 inner_activation=T.nnet.hard_sigmoid,
                 activation=T.tanh,
                 params=None,
                 max_response=100):
        self.enc_h = enc_h
        self.mask = mask
        self.eos_token = eos_token
        self.batch_size = batch_size
        self.inner_activation = inner_activation
        self.activation = activation
        self.max_response = max_response
        if params is None:
            self.emb = theano.shared(value=np.asarray(
                emb_mat, dtype=theano.config.floatX),
                                     name='emb',
                                     borrow=True)
            # update gate
            self.W_z = theano.shared(value=get(identifier=init,
                                               shape=(emb_dim, hidden_dim)),
                                     name='W_z',
                                     borrow=True)
            self.U_z = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_z',
                                     borrow=True)
            self.b_z = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_z',
                                     borrow=True)
            # reset gate
            self.W_r = theano.shared(value=get(identifier=init,
                                               shape=(emb_dim, hidden_dim)),
                                     name='W_r',
                                     borrow=True)
            self.U_r = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_r',
                                     borrow=True)
            self.b_r = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_r',
                                     borrow=True)
            # weights pertaining to input, hidden & output neurons (externally)
            self.W = theano.shared(value=get(identifier=init,
                                             shape=(emb_dim, hidden_dim)),
                                   name='W',
                                   borrow=True)
            self.U = theano.shared(value=get(identifier=inner_init,
                                             shape=(hidden_dim, hidden_dim)),
                                   name='U',
                                   borrow=True)
            self.V = theano.shared(value=get(identifier=init,
                                             shape=(hidden_dim, vocab_size)),
                                   name='V',
                                   borrow=True)
            self.bh = theano.shared(value=get(identifier='zero',
                                              shape=(hidden_dim, )),
                                    name='bh',
                                    borrow=True)
            self.by = theano.shared(value=get(identifier='zero',
                                              shape=(vocab_size, )),
                                    name='by',
                                    borrow=True)
            # to weight 'context' from encoder
            self.c_h = theano.shared(value=get(identifier=init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='c_h',
                                     borrow=True)
            self.c_y = theano.shared(value=get(identifier=init,
                                               shape=(hidden_dim, vocab_size)),
                                     name='c_y',
                                     borrow=True)
            # to weight 'y_t-1' for decoder's 'y'
            self.y_t1 = theano.shared(value=get(identifier=init,
                                                shape=(emb_dim, vocab_size)),
                                      name='y_t1',
                                      borrow=True)
        else:
            self.emb, self.W_z, self.U_z, self.b_z, self.W_r, self.U_r, self.b_r, \
                self.W, self.U, self.V, self.bh, self.by, self.c_h, self.c_y, \
                self.y_t1 = params

        self.params = [
            self.emb, self.W_z, self.U_z, self.b_z, self.W_r, self.U_r,
            self.b_r, self.W, self.U, self.V, self.bh, self.by, self.c_h,
            self.c_y, self.y_t1
        ]

        self.h0 = theano.shared(value=get(identifier='zero',
                                          shape=(hidden_dim, )),
                                name='h0',
                                borrow=True)
        # y(t-1) from encoder will always be 'eos' token
        self.y0 = theano.shared(value=np.asarray(np.full((batch_size, ),
                                                         self.eos_token),
                                                 dtype='int32'),
                                name='y0',
                                borrow=True)

        # remember for decoder both h_t and y_t are conditioned on 'enc_h' & 'y_t-1'.
        def recurrence(msk, h_tm_prev, y_tm_prev):
            x_z = T.dot(self.emb[y_tm_prev], self.W_z) + self.b_z
            x_r = T.dot(self.emb[y_tm_prev], self.W_r) + self.b_r
            x_h = T.dot(self.emb[y_tm_prev], self.W) + T.dot(
                self.enc_h, self.c_h) + self.bh

            z_t = self.inner_activation(x_z + T.dot(h_tm_prev, self.U_z))
            r_t = self.inner_activation(x_r + T.dot(h_tm_prev, self.U_r))
            hh_t = self.activation(x_h + T.dot(r_t * h_tm_prev, self.U))
            h_t = (T.ones_like(z_t) - z_t) * hh_t + z_t * h_tm_prev

            # needed to back-propagate errors
            y_d_t = T.dot(h_t, self.V) + T.dot(self.enc_h, self.c_y) + T.dot(
                self.emb[y_tm_prev], self.y_t1) + self.by
            # ignore padded tokens
            y_d_t = T.batched_dot(y_d_t, msk)
            y_d = T.clip(T.nnet.softmax(y_d_t), 0.0001, 0.9999)
            y_t = T.argmax(y_d, axis=1)
            return h_t, y_d, T.cast(y_t.flatten(), 'int32')

        [_, y_dist, y], _ = theano.scan(
            fn=recurrence,
            sequences=mask.dimshuffle(
                1, 0),  # ugly, but we have to go till the end
            outputs_info=[
                T.alloc(self.h0, self.enc_h.shape[0], hidden_dim), None,
                T.alloc(self.y0, self.enc_h.shape[0])
            ],
            n_steps=max_len)

        self.y = y.dimshuffle(1, 0)
        self.y_dist = y_dist.dimshuffle(1, 0, 2)
コード例 #22
0
    def __init__(self, input, emb_mat, emb_dim, hidden_dim, init='uniform', inner_init='orthonormal',
                 activation=T.tanh, params=None, merge_mode='sum'):
        if params is None:
            self.emb = theano.shared(value=np.asarray(emb_mat, dtype=theano.config.floatX),
                                     name='emb', borrow=True)
            # weights for forward rnn
            self.W_f = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                     name='W_f', borrow=True)
            self.U_f = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                     name='U_f', borrow=True)
            self.b_f = theano.shared(value=get(identifier='zero', shape=(hidden_dim,)),
                                     name='b_f', borrow=True)
            # weights for backward rnn
            self.W_b = theano.shared(value=get(identifier=init, shape=(emb_dim, hidden_dim)),
                                     name='W_b', borrow=True)
            self.U_b = theano.shared(value=get(identifier=inner_init, shape=(hidden_dim, hidden_dim)),
                                     name='U_b', borrow=True)
            self.b_b = theano.shared(value=get(identifier='zero', shape=(hidden_dim,)),
                                     name='b_b', borrow=True)

        else:
            self.emb, self.W_f, self.U_f, self.b_f, self.W_b, self.U_b, self.b_b = params

        self.hf = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='hf', borrow=True)
        self.hb = theano.shared(value=get(identifier='zero', shape=(hidden_dim, )), name='hb', borrow=True)
        self.params = [self.emb,
                       self.W_f, self.U_f, self.b_f,
                       self.W_b, self.U_b, self.b_b]

        input_f = input.dimshuffle(1, 0)
        input_b = input[::-1].dimshuffle(1, 0)

        # forward rnn
        def recurrence_f(xf_t, hf_tm):
            hf_t = activation(T.dot(self.emb[xf_t], self.W_f) +
                              T.dot(hf_tm, self.U_f) + self.b_f)
            return hf_t

        h_f, _ = theano.scan(
            fn=recurrence_f,
            sequences=input_f,
            outputs_info=T.alloc(self.hf, input_f.shape[1], hidden_dim)
        )

        # backward rnn
        def recurrence_b(xb_t, hb_tm):
            hf_b = activation(T.dot(self.emb[xb_t], self.W_b) +
                              T.dot(hb_tm, self.U_b) + self.b_b)
            return hf_b

        h_b, _ = theano.scan(
            fn=recurrence_b,
            sequences=input_b,
            outputs_info=T.alloc(self.hb, input_b.shape[1], hidden_dim)
        )

        if merge_mode == 'sum':
            h = h_f[-1] + h_b[-1]
        elif merge_mode == 'multiply':
            h = h_f[-1] * h_b[-1]
        elif merge_mode == 'average':
            h = (h_f[-1] + h_b[-1]) / 2
        elif merge_mode == 'concat':
            h = T.concatenate([h_f, h_b])
        else:
            print('Supported "merge_mode" for forward + backward rnn are: "sum", "multiply", "average" & "concat".')
            raise NotImplementedError

        # 'hidden state + prediction' at last time-step need to be passed to the decoder;
        # prediction at last-time step will always be 'eos' therefore, ignored
        self.h = h
コード例 #23
0
    def __init__(self,
                 enc_h,
                 mask,
                 emb_mat,
                 vocab_size,
                 emb_dim,
                 hidden_dim,
                 eos_token,
                 batch_size,
                 max_len,
                 init='uniform',
                 inner_init='orthonormal',
                 inner_activation=T.nnet.hard_sigmoid,
                 activation=T.tanh,
                 params=None,
                 max_response=100):
        self.enc_h = enc_h
        self.mask = mask
        self.eos_token = eos_token
        self.batch_size = batch_size
        self.inner_activation = inner_activation
        self.activation = activation
        self.max_response = max_response
        if params is None:
            self.emb = theano.shared(value=np.asarray(
                emb_mat, dtype=theano.config.floatX),
                                     name='emb',
                                     borrow=True)
            # input gate
            self.W_i = theano.shared(value=get(identifier=init,
                                               shape=(emb_dim, hidden_dim)),
                                     name='W_i',
                                     borrow=True)
            self.U_i = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_i',
                                     borrow=True)
            self.b_i = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_i',
                                     borrow=True)
            # forget gate
            self.W_f = theano.shared(value=get(identifier=init,
                                               shape=(emb_dim, hidden_dim)),
                                     name='W_f',
                                     borrow=True)
            self.U_f = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_f',
                                     borrow=True)
            self.b_f = theano.shared(value=get(identifier='one',
                                               shape=(hidden_dim, )),
                                     name='b_f',
                                     borrow=True)
            # memory
            self.W_c = theano.shared(value=get(identifier=init,
                                               shape=(emb_dim, hidden_dim)),
                                     name='W_c',
                                     borrow=True)
            self.U_c = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_c',
                                     borrow=True)
            self.b_c = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_c',
                                     borrow=True)
            # output gate
            self.W_o = theano.shared(value=get(identifier=init,
                                               shape=(emb_dim, hidden_dim)),
                                     name='W_o',
                                     borrow=True)
            self.U_o = theano.shared(value=get(identifier=inner_init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='U_o',
                                     borrow=True)
            self.b_o = theano.shared(value=get(identifier='zero',
                                               shape=(hidden_dim, )),
                                     name='b_o',
                                     borrow=True)
            # weights pertaining to output neuron
            self.V = theano.shared(value=get(identifier=init,
                                             shape=(hidden_dim, vocab_size)),
                                   name='V',
                                   borrow=True)
            self.by = theano.shared(value=get(identifier='zero',
                                              shape=(vocab_size, )),
                                    name='by',
                                    borrow=True)
            # to weight 'context' from encoder
            self.c_h = theano.shared(value=get(identifier=init,
                                               shape=(hidden_dim, hidden_dim)),
                                     name='c_h',
                                     borrow=True)
            self.c_y = theano.shared(value=get(identifier=init,
                                               shape=(hidden_dim, vocab_size)),
                                     name='c_y',
                                     borrow=True)
            # to weight 'y_t-1' for decoder's 'y'
            self.y_t1 = theano.shared(value=get(identifier=init,
                                                shape=(emb_dim, vocab_size)),
                                      name='y_t1',
                                      borrow=True)
        else:
            self.emb, self.W_i, self.U_i, self.b_i, self.W_f, self.U_f, self.b_f, \
                self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o, \
                self.V, self.by, self.c_h, self.c_y, self.y_t1 = params

        self.params = [
            self.emb, self.W_i, self.U_i, self.b_i, self.W_f, self.U_f,
            self.b_f, self.W_c, self.U_c, self.b_c, self.W_o, self.U_o,
            self.b_o, self.V, self.by, self.c_h, self.c_y, self.y_t1
        ]

        self.c0 = theano.shared(value=get(identifier='zero',
                                          shape=(hidden_dim, )),
                                name='c0',
                                borrow=True)
        self.h0 = theano.shared(value=get(identifier='zero',
                                          shape=(hidden_dim, )),
                                name='h0',
                                borrow=True)
        # y(t-1) from encoder will always be 'eos' token
        self.y0 = theano.shared(value=np.asarray(np.full((batch_size, ),
                                                         self.eos_token),
                                                 dtype='int32'),
                                name='y0',
                                borrow=True)

        # remember for decoder both h_t and y_t are conditioned on 'enc_h' & 'y_t-1'.
        def recurrence(msk, c_tm_prev, h_tm_prev, y_tm_prev):
            x_i = T.dot(self.emb[y_tm_prev], self.W_i) + self.b_i
            x_f = T.dot(self.emb[y_tm_prev], self.W_f) + self.b_f
            x_c = T.dot(self.emb[y_tm_prev], self.W_c) + self.b_c
            x_o = T.dot(self.emb[y_tm_prev], self.W_o) + T.dot(
                self.enc_h, self.c_h) + self.b_o

            i_t = self.inner_activation(x_i + T.dot(h_tm_prev, self.U_i))
            f_t = self.inner_activation(x_f + T.dot(h_tm_prev, self.U_f))
            c_t = f_t * c_tm_prev + i_t * self.activation(
                x_c + T.dot(h_tm_prev, self.U_c))  # internal memory
            o_t = self.inner_activation(x_o + T.dot(h_tm_prev, self.U_o))
            h_t = o_t * self.activation(c_t)  # actual hidden state

            # needed to back-propagate errors
            y_d_t = T.dot(h_t, self.V) + T.dot(self.enc_h, self.c_y) + T.dot(
                self.emb[y_tm_prev], self.y_t1) + self.by
            # ignore padded tokens
            y_d_t = T.batched_dot(y_d_t, msk)
            y_d = T.clip(T.nnet.softmax(y_d_t), 0.0001, 0.9999)
            y_t = T.argmax(y_d, axis=1)
            return c_t, h_t, y_d, T.cast(y_t.flatten(), 'int32')

        [_, _, y_dist, y], _ = theano.scan(
            fn=recurrence,
            sequences=mask.dimshuffle(
                1, 0),  # ugly, but we have to go till the end
            outputs_info=[
                T.alloc(self.c0, self.enc_h.shape[0], hidden_dim),
                T.alloc(self.h0, self.enc_h.shape[0], hidden_dim), None,
                T.alloc(self.y0, self.enc_h.shape[0])
            ],
            n_steps=max_len)

        self.y = y.dimshuffle(1, 0)
        self.y_dist = y_dist.dimshuffle(1, 0, 2)