Ejemplo n.º 1
0
    def apply_morph_only_rnn_gru(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1):
        """
            sentence : sentence * batch
            sentence_morph : sentence * batch * morph
            1. morph lookup -> dropout
            2. MorphStructRNN
            3. lstm -> dropout
            4. lstm -> maxout -> dropout
            5. logistic
        """
        src, src_mask = sentence[:-1], sentence_mask[:-1]
        tgt, tgt_mask = sentence[1:], sentence_mask[1:]
        src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1]

        #morph lookup table
        emb_morph_range = T.arange(self.n_emb_morph)
        table_morph = LookupTable(self.n_emb_morph, self.morph_size, name='Memb')
        src_morph_emb = table_morph.apply(src_morph, emb_morph_range)
        self.layers.append(table_morph)

        if self.dropout < 1.0:
            src_morph_emb = DropoutLayer(src_morph_emb, use_noise, self.dropout)

        morph_layer_1st = MorphStructRNN(self.n_emb_morph, self.n_hids, 'gru')
        hiddens = morph_layer_1st.apply(src_morph_emb, src_morph_mask)
        self.layers.append(morph_layer_1st)

        rnn_layer_2rd = LSTM(self.n_hids , self.n_hids)
        hiddens , cells  = rnn_layer_2rd.apply(hiddens , src_mask)
        self.layers.append(rnn_layer_2rd)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        rnn_layer_3nd = LSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn_layer_3nd.apply(hiddens , src_mask)
        self.layers.append(rnn_layer_3nd)

        if True:
            maxout = MaxoutLayer()
            src_morph_merge_emb = src_morph_emb.sum(2)
            src_morph_mask = src_morph_mask.max(axis=2)
            #src_morph_merge_emb : sentence * batch * n_emb_morph
            states = T.concatenate([src_morph_merge_emb, hiddens], axis=2)
            maxout_n_fold = 2
            hiddens = maxout.apply(states, self.n_emb_morph + self.n_hids, self.n_hids, src_morph_mask, maxout_n_fold)
            self.layers.append(maxout)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)

        for layer in self.layers:
            self.params.extend(layer.params)
        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
Ejemplo n.º 2
0
    def apply_normal(self, sentence, sentence_mask, use_noise=1, use_maxout=True):
        """
            sentence : sentence * batch
            1. word lookup -> dropout
            2. lstm -> dropout
            3. lstm -> maxout -> dropout
            4. logistic
        """
        src, src_mask = sentence[:-1], sentence_mask[:-1]
        tgt, tgt_mask = sentence[1:], sentence_mask[1:]

        emb_lstm_range = T.arange(self.n_emb_lstm)
        #word lookup table
        table = DynamicMixLookupTable(self.n_emb_lstm, **self.cfig)
        #table = DynamicLookupTable(self.n_emb_lstm, **self.cfig)
        #table = LookupTable(self.n_emb_lstm, self.vocab_size, name='Wemb')
        src_emb = table.apply(src, emb_lstm_range)
        self.src_emb = src_emb
        self.layers.append(table)

        if self.dropout < 1.0:
            src_emb = DropoutLayer(src_emb, use_noise, self.dropout)

        rnn_layer_1st = LSTM(self.n_emb_lstm, self.n_hids)
        hiddens , cells  = rnn_layer_1st.apply(src_emb, src_mask)
        self.layers.append(rnn_layer_1st)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        rnn_layer_2rd = LSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask)
        self.layers.append(rnn_layer_2rd)

        if use_maxout:
            maxout = MaxoutLayer()
            states = T.concatenate([src_emb, hiddens], axis=2)
            maxout_n_fold = 2
            hiddens = maxout.apply(states, self.n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold)
            self.layers.append(maxout)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)
        self.cost = logistic_layer.cost(tgt, tgt_mask)

        #hier_softmax_layer = HierarchicalSoftmax(hiddens, self.n_hids, self.vocab_size)
        #self.layers.append(hier_softmax_layer)
        #self.cost = hier_softmax_layer.cost(tgt, tgt_mask)

        for layer in self.layers:
            self.params.extend(layer.params)
        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
Ejemplo n.º 3
0
    def apply_model(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1):
        """
            sentence : sentence * batch
            sentence_morph : sentence * batch * morph
        """
        src, src_mask = sentence[:-1], sentence_mask[:-1]
        tgt, tgt_mask = sentence[1:], sentence_mask[1:]
        src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1]

        src_emb = lookup_layer('word',src)
        #src_morph_emb : sentence * batch * morph * n_emb_morph
        #src_morph_emb = lookup_layer('morph',src)

        if self.dropout < 1.0:
            src_emb = DropoutLayer(src_emb, use_noise, self.dropout)

        rnn_layer_1rd = LSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn_layer_1rd.apply(hiddens , src_mask)
        self.layers.append(rnn_layer_1rd)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        rnn_layer_2rd = LSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask)
        self.layers.append(rnn_layer_2rd)

        if True:
            maxout = MaxoutLayer()
            #src_emb : sentence * batch * n_emb
            #hiddens : sentence * batch * hids
            states = T.concatenate([src_emb, hiddens], axis=2)
            maxout_n_fold = 2
            hiddens = maxout.apply(states, self.n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold)
            self.layers.append(maxout)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)
        for layer in self.layers:
            self.params.extend(layer.params)

        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
Ejemplo n.º 4
0
Archivo: lm.py Proyecto: gumaojie/rnnlm
    def apply(self, sentence, sentence_mask, use_noise=1):
        n_emb_lstm = self.n_emb_lstm

        src = sentence[:-1]
        src_mask = sentence_mask[:-1]
        tgt = sentence[1:]
        tgt_mask = sentence_mask[1:]

        emb_lstm_range = T.arange(n_emb_lstm)
        table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb')
        state_below = table.apply(src, emb_lstm_range)
        self.layers.append(table)
        if self.dropout < 1.0:
            state_below = dropout_layer(state_below, use_noise, self.dropout)

        rnn = LSTM(n_emb_lstm, self.n_hids)
        hiddens , cells  = rnn.apply(state_below, src_mask)
        self.layers.append(rnn)
        #if self.dropout < 1.0:
        #    hiddens = dropout_layer(hiddens, use_noise, self.dropout)
        rnn2 = FLSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn2.apply(hiddens , hiddens , src_mask)
        self.layers.append(rnn2)

        #rnn = NormalRNN(n_emb_lstm , self.n_hids)
        #hiddens  = rnn.apply(state_below, src_mask)
        #self.layers.append(rnn)

        if True:
            maxout = maxout_layer()
            states = T.concatenate([state_below, hiddens], axis=2)
            maxout_n_fold = 2
            hiddens = maxout.apply(states, n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold)
            self.layers.append(maxout)
        if self.dropout < 1.0:
            hiddens = dropout_layer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)

        for layer in self.layers:
            self.params.extend(layer.params)

        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
Ejemplo n.º 5
0
    def apply(self, sentence, sentence_mask, use_noise=1):
        n_emb_lstm = self.n_emb_lstm
        n_emb_struct = self.n_emb_struct
        n_emb_share = self.n_emb_share

        src = sentence[:-1]
        src_mask = sentence_mask[:-1]
        tgt = sentence[1:]
        tgt_mask = sentence_mask[1:]

        if False: #(share only part of embedding)
            n_emb_all = n_emb_lstm + n_emb_struct - n_emb_share
            emb_all_range = T.arange(n_emb_all)
            emb_lstm_range = T.arange(n_emb_lstm)
            emb_struct_range = T.arange(n_emb_lstm - n_emb_share, n_emb_all)

            table = lookup_table(n_emb_all, self.vocab_size, name='Wemb')
            state_below = table.apply(src, emb_all_range)
            state_below_lstm = table.apply(src, emb_lstm_range)
            state_below_struct = table.apply(src, emb_struct_range)
            self.layers.append(table)

            rnn = SLSTM(n_emb_lstm, n_emb_struct, n_emb_share, self.n_hids, self.n_shids, self.n_structs)
            #rnn = LSTM(self.n_in, self.n_hids)
            hiddens = rnn.merge_out(state_below, state_below_lstm, state_below_struct, src_mask)
            self.layers.append(rnn)

        elif True: # use rnn_pyramid
            emb_lstm_range = T.arange(n_emb_lstm)
            table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb')
            state_below = table.apply(src, emb_lstm_range)
            self.layers.append(table)

            if self.dropout < 1.0:
                state_below = dropout_layer(state_below, use_noise, self.dropout)

            rnn = rnn_pyramid_layer(n_emb_lstm, self.n_hids)
            hiddens, cells, structs = rnn.apply(state_below, src_mask)
            self.layers.append(rnn)
            self.structs = structs

        else: # share all embedding
            emb_lstm_range = T.arange(n_emb_lstm)
            table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb')
            state_below = table.apply(src, emb_lstm_range)
            self.layers.append(table)

            if self.dropout < 1.0:
                state_below = dropout_layer(state_below, use_noise, self.dropout)

            rnn = LSTM(n_emb_lstm, self.n_hids)
            hiddens, cells = rnn.apply(state_below, src_mask)
            #hiddens = rnn.merge_out(state_below, src_mask)
            self.layers.append(rnn)

            if self.dropout < 1.0:
                hiddens = dropout_layer(hiddens, use_noise, self.dropout)

            rnn1 = LSTM(self.n_hids, self.n_hids)
            hiddens, cells = rnn1.apply(hiddens, src_mask)
            #hiddens = rnn.merge_out(state_below, src_mask)
            self.layers.append(rnn1)

            maxout = maxout_layer()
            states = T.concatenate([state_below, hiddens], axis=2)
            hiddens = maxout.apply(states, n_emb_lstm + self.n_hids, self.n_hids, src_mask)
            self.layers.append(maxout)

            #rnng = LSTM(n_emb_lstm, self.n_hids)
            #hiddens, cells = rnn.apply(state_below, src_mask)
            #hiddensg = rnng.merge_out(state_below, src_mask)
            #self.layers.append(rnng)

            if self.dropout < 1.0:
                hiddens = dropout_layer(hiddens, use_noise, self.dropout)

            #chunk = chunk_layer(n_lstm_in + n_lstm_out, n_lstm_out, n_chunk_out, 6)
            n_emb_hid = n_emb_lstm + self.n_hids
            emb_hid = T.concatenate([state_below, hiddens], axis=2)
            #chunk = chunk_layer(self.n_hids, self.n_hids, self.n_hids, self.n_structs)
            #hiddens = chunk.merge_out(hiddens, hiddens, src_mask, merge_how="for_struct",\
            #        state_below_other=state_below, n_other=n_emb_lstm)
            chunk = chunk_layer(n_emb_hid, self.n_hids, self.n_hids, self.n_structs)
            hiddens = chunk.merge_out(emb_hid, hiddens, src_mask, merge_how="for_struct",\
                    state_below_other=None, n_other=0)
            #chunk = chunk_layer(self.n_hids, self.n_hids, self.n_hids, self.n_structs)
            #hiddens = chunk.merge_out(hiddens, hiddensg, src_mask, merge_how="both",\
            #        state_below_other=state_below, n_other=n_emb_lstm)
            self.layers.append(chunk)

        # apply dropout
        if self.dropout < 1.0:
            # dropout is applied to the output of maxout in ghog
            hiddens = dropout_layer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)

        for layer in self.layers:
            self.params.extend(layer.params)

        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
Ejemplo n.º 6
0
    def apply(self, sentence, sentence_mask, use_noise=1):
        n_emb_lstm = self.n_emb_lstm
        n_emb_struct = self.n_emb_struct
        n_emb_share = self.n_emb_share

        src = sentence[:-1]
        src_mask = sentence_mask[:-1]
        tgt = sentence[1:]
        tgt_mask = sentence_mask[1:]

        emb_lstm_range = T.arange(n_emb_lstm)
        table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb')
        state_below = table.apply(src, emb_lstm_range)
        self.layers.append(table)

        if self.dropout < 1.0:
            state_below = dropout_layer(state_below, use_noise, self.dropout)

        rnn = LSTM(n_emb_lstm, self.n_hids)
        hiddens, cells = rnn.apply(state_below, src_mask)
        #hiddens = rnn.merge_out(state_below, src_mask)
        self.layers.append(rnn)

        if True:
            if self.dropout < 1.0:
                hiddens = dropout_layer(hiddens, use_noise, self.dropout)

            rnn1 = LSTM(self.n_hids, self.n_hids)
            hiddens, cells = rnn1.apply(hiddens, src_mask)
            #hiddens = rnn.merge_out(state_below, src_mask)
            self.layers.append(rnn1)

        if True:
            if self.dropout < 1.0:
                hiddens = dropout_layer(hiddens, use_noise, self.dropout)

            rnnp = rnn_pyramid_layer(self.n_hids, n_emb_lstm, self.n_hids)
            hiddens,cells,structs,pyramid = rnnp.apply(hiddens, state_below, src_mask)
            self.layers.append(rnnp)
            #self.structs = structs
            self.rnn_len = rnnp.n_steps
        self.sent_len = sentence.shape[0]

        if True:
            maxout = maxout_layer()
            states = T.concatenate([state_below, hiddens], axis=2)
            maxout_n_fold = 2
            hiddens = maxout.apply(states, n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold)
            self.layers.append(maxout)

        if self.dropout < 1.0:
            hiddens = dropout_layer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)

        for layer in self.layers:
            self.params.extend(layer.params)

        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
Ejemplo n.º 7
0
    def apply_morph_attention(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1):
        """
            sentence : sentence * batch
            sentence_morph : sentence * batch * morph
            src_morph_emb : sentence * batch * morph * n_emb_morph
            1. word morph lookup -> dropout -> attention
            2. lstm -> dropout
            3. lstm -> maxout -> dropout
            4. logistic
        """
        src, src_mask = sentence[:-1], sentence_mask[:-1]
        tgt, tgt_mask = sentence[1:], sentence_mask[1:]
        src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1]

        #word lookup table
        emb_lstm_range = T.arange(self.n_emb_lstm)
        table = LookupTable(self.n_emb_lstm, self.vocab_size, name='Wemb')
        src_emb = table.apply(src, emb_lstm_range)
        self.layers.append(table)

        #morph lookup table
        emb_morph_range = T.arange(self.n_emb_morph)
        table_morph = LookupTable(self.n_emb_morph, self.morph_size, name='Memb')
        src_morph_emb = table_morph.apply(src_morph, emb_morph_range)
        self.layers.append(table_morph)

        if self.dropout < 1.0:
            src_emb = DropoutLayer(src_emb, use_noise, self.dropout)
            src_morph_emb = DropoutLayer(src_morph_emb, use_noise, self.dropout)

        lstm_att_1st = LstmMorphAttention(self.n_hids, self.n_hids, self.n_hids)
        hiddens, cells = lstm_att_1st.apply(src_emb, src_morph_emb, src_mask)
        self.layers.append(lstm_att_1st)
        #print len(hiddens) , hiddens[0].ndim

        rnn_layer_2rd = LSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask)
        self.layers.append(rnn_layer_2rd)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        rnn_layer_3nd = LSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn_layer_3nd.apply(hiddens, src_mask)
        self.layers.append(rnn_layer_3nd)

        if True:
            maxout = MaxoutLayer()
            #src_emb : sentence * batch * n_emb
            #hiddens : sentence * batch * hids
            states = T.concatenate([src_emb, hiddens], axis=2)
            maxout_n_fold = 2
            hiddens = maxout.apply(states, self.n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold)
            self.layers.append(maxout)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)
        for layer in self.layers:
            self.params.extend(layer.params)
        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)