def apply_morph_only_rnn_gru(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1): """ sentence : sentence * batch sentence_morph : sentence * batch * morph 1. morph lookup -> dropout 2. MorphStructRNN 3. lstm -> dropout 4. lstm -> maxout -> dropout 5. logistic """ src, src_mask = sentence[:-1], sentence_mask[:-1] tgt, tgt_mask = sentence[1:], sentence_mask[1:] src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1] #morph lookup table emb_morph_range = T.arange(self.n_emb_morph) table_morph = LookupTable(self.n_emb_morph, self.morph_size, name='Memb') src_morph_emb = table_morph.apply(src_morph, emb_morph_range) self.layers.append(table_morph) if self.dropout < 1.0: src_morph_emb = DropoutLayer(src_morph_emb, use_noise, self.dropout) morph_layer_1st = MorphStructRNN(self.n_emb_morph, self.n_hids, 'gru') hiddens = morph_layer_1st.apply(src_morph_emb, src_morph_mask) self.layers.append(morph_layer_1st) rnn_layer_2rd = LSTM(self.n_hids , self.n_hids) hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask) self.layers.append(rnn_layer_2rd) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) rnn_layer_3nd = LSTM(self.n_hids, self.n_hids) hiddens , cells = rnn_layer_3nd.apply(hiddens , src_mask) self.layers.append(rnn_layer_3nd) if True: maxout = MaxoutLayer() src_morph_merge_emb = src_morph_emb.sum(2) src_morph_mask = src_morph_mask.max(axis=2) #src_morph_merge_emb : sentence * batch * n_emb_morph states = T.concatenate([src_morph_merge_emb, hiddens], axis=2) maxout_n_fold = 2 hiddens = maxout.apply(states, self.n_emb_morph + self.n_hids, self.n_hids, src_morph_mask, maxout_n_fold) self.layers.append(maxout) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def apply_normal(self, sentence, sentence_mask, use_noise=1, use_maxout=True): """ sentence : sentence * batch 1. word lookup -> dropout 2. lstm -> dropout 3. lstm -> maxout -> dropout 4. logistic """ src, src_mask = sentence[:-1], sentence_mask[:-1] tgt, tgt_mask = sentence[1:], sentence_mask[1:] emb_lstm_range = T.arange(self.n_emb_lstm) #word lookup table table = DynamicMixLookupTable(self.n_emb_lstm, **self.cfig) #table = DynamicLookupTable(self.n_emb_lstm, **self.cfig) #table = LookupTable(self.n_emb_lstm, self.vocab_size, name='Wemb') src_emb = table.apply(src, emb_lstm_range) self.src_emb = src_emb self.layers.append(table) if self.dropout < 1.0: src_emb = DropoutLayer(src_emb, use_noise, self.dropout) rnn_layer_1st = LSTM(self.n_emb_lstm, self.n_hids) hiddens , cells = rnn_layer_1st.apply(src_emb, src_mask) self.layers.append(rnn_layer_1st) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) rnn_layer_2rd = LSTM(self.n_hids, self.n_hids) hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask) self.layers.append(rnn_layer_2rd) if use_maxout: maxout = MaxoutLayer() states = T.concatenate([src_emb, hiddens], axis=2) maxout_n_fold = 2 hiddens = maxout.apply(states, self.n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold) self.layers.append(maxout) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) #hier_softmax_layer = HierarchicalSoftmax(hiddens, self.n_hids, self.vocab_size) #self.layers.append(hier_softmax_layer) #self.cost = hier_softmax_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def apply_model(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1): """ sentence : sentence * batch sentence_morph : sentence * batch * morph """ src, src_mask = sentence[:-1], sentence_mask[:-1] tgt, tgt_mask = sentence[1:], sentence_mask[1:] src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1] src_emb = lookup_layer('word',src) #src_morph_emb : sentence * batch * morph * n_emb_morph #src_morph_emb = lookup_layer('morph',src) if self.dropout < 1.0: src_emb = DropoutLayer(src_emb, use_noise, self.dropout) rnn_layer_1rd = LSTM(self.n_hids, self.n_hids) hiddens , cells = rnn_layer_1rd.apply(hiddens , src_mask) self.layers.append(rnn_layer_1rd) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) rnn_layer_2rd = LSTM(self.n_hids, self.n_hids) hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask) self.layers.append(rnn_layer_2rd) if True: maxout = MaxoutLayer() #src_emb : sentence * batch * n_emb #hiddens : sentence * batch * hids states = T.concatenate([src_emb, hiddens], axis=2) maxout_n_fold = 2 hiddens = maxout.apply(states, self.n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold) self.layers.append(maxout) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def apply(self, sentence, sentence_mask, use_noise=1): n_emb_lstm = self.n_emb_lstm src = sentence[:-1] src_mask = sentence_mask[:-1] tgt = sentence[1:] tgt_mask = sentence_mask[1:] emb_lstm_range = T.arange(n_emb_lstm) table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb') state_below = table.apply(src, emb_lstm_range) self.layers.append(table) if self.dropout < 1.0: state_below = dropout_layer(state_below, use_noise, self.dropout) rnn = LSTM(n_emb_lstm, self.n_hids) hiddens , cells = rnn.apply(state_below, src_mask) self.layers.append(rnn) #if self.dropout < 1.0: # hiddens = dropout_layer(hiddens, use_noise, self.dropout) rnn2 = FLSTM(self.n_hids, self.n_hids) hiddens , cells = rnn2.apply(hiddens , hiddens , src_mask) self.layers.append(rnn2) #rnn = NormalRNN(n_emb_lstm , self.n_hids) #hiddens = rnn.apply(state_below, src_mask) #self.layers.append(rnn) if True: maxout = maxout_layer() states = T.concatenate([state_below, hiddens], axis=2) maxout_n_fold = 2 hiddens = maxout.apply(states, n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold) self.layers.append(maxout) if self.dropout < 1.0: hiddens = dropout_layer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def apply(self, sentence, sentence_mask, use_noise=1): n_emb_lstm = self.n_emb_lstm n_emb_struct = self.n_emb_struct n_emb_share = self.n_emb_share src = sentence[:-1] src_mask = sentence_mask[:-1] tgt = sentence[1:] tgt_mask = sentence_mask[1:] if False: #(share only part of embedding) n_emb_all = n_emb_lstm + n_emb_struct - n_emb_share emb_all_range = T.arange(n_emb_all) emb_lstm_range = T.arange(n_emb_lstm) emb_struct_range = T.arange(n_emb_lstm - n_emb_share, n_emb_all) table = lookup_table(n_emb_all, self.vocab_size, name='Wemb') state_below = table.apply(src, emb_all_range) state_below_lstm = table.apply(src, emb_lstm_range) state_below_struct = table.apply(src, emb_struct_range) self.layers.append(table) rnn = SLSTM(n_emb_lstm, n_emb_struct, n_emb_share, self.n_hids, self.n_shids, self.n_structs) #rnn = LSTM(self.n_in, self.n_hids) hiddens = rnn.merge_out(state_below, state_below_lstm, state_below_struct, src_mask) self.layers.append(rnn) elif True: # use rnn_pyramid emb_lstm_range = T.arange(n_emb_lstm) table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb') state_below = table.apply(src, emb_lstm_range) self.layers.append(table) if self.dropout < 1.0: state_below = dropout_layer(state_below, use_noise, self.dropout) rnn = rnn_pyramid_layer(n_emb_lstm, self.n_hids) hiddens, cells, structs = rnn.apply(state_below, src_mask) self.layers.append(rnn) self.structs = structs else: # share all embedding emb_lstm_range = T.arange(n_emb_lstm) table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb') state_below = table.apply(src, emb_lstm_range) self.layers.append(table) if self.dropout < 1.0: state_below = dropout_layer(state_below, use_noise, self.dropout) rnn = LSTM(n_emb_lstm, self.n_hids) hiddens, cells = rnn.apply(state_below, src_mask) #hiddens = rnn.merge_out(state_below, src_mask) self.layers.append(rnn) if self.dropout < 1.0: hiddens = dropout_layer(hiddens, use_noise, self.dropout) rnn1 = LSTM(self.n_hids, self.n_hids) hiddens, cells = rnn1.apply(hiddens, src_mask) #hiddens = rnn.merge_out(state_below, src_mask) self.layers.append(rnn1) maxout = maxout_layer() states = T.concatenate([state_below, hiddens], axis=2) hiddens = maxout.apply(states, n_emb_lstm + self.n_hids, self.n_hids, src_mask) self.layers.append(maxout) #rnng = LSTM(n_emb_lstm, self.n_hids) #hiddens, cells = rnn.apply(state_below, src_mask) #hiddensg = rnng.merge_out(state_below, src_mask) #self.layers.append(rnng) if self.dropout < 1.0: hiddens = dropout_layer(hiddens, use_noise, self.dropout) #chunk = chunk_layer(n_lstm_in + n_lstm_out, n_lstm_out, n_chunk_out, 6) n_emb_hid = n_emb_lstm + self.n_hids emb_hid = T.concatenate([state_below, hiddens], axis=2) #chunk = chunk_layer(self.n_hids, self.n_hids, self.n_hids, self.n_structs) #hiddens = chunk.merge_out(hiddens, hiddens, src_mask, merge_how="for_struct",\ # state_below_other=state_below, n_other=n_emb_lstm) chunk = chunk_layer(n_emb_hid, self.n_hids, self.n_hids, self.n_structs) hiddens = chunk.merge_out(emb_hid, hiddens, src_mask, merge_how="for_struct",\ state_below_other=None, n_other=0) #chunk = chunk_layer(self.n_hids, self.n_hids, self.n_hids, self.n_structs) #hiddens = chunk.merge_out(hiddens, hiddensg, src_mask, merge_how="both",\ # state_below_other=state_below, n_other=n_emb_lstm) self.layers.append(chunk) # apply dropout if self.dropout < 1.0: # dropout is applied to the output of maxout in ghog hiddens = dropout_layer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def apply(self, sentence, sentence_mask, use_noise=1): n_emb_lstm = self.n_emb_lstm n_emb_struct = self.n_emb_struct n_emb_share = self.n_emb_share src = sentence[:-1] src_mask = sentence_mask[:-1] tgt = sentence[1:] tgt_mask = sentence_mask[1:] emb_lstm_range = T.arange(n_emb_lstm) table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb') state_below = table.apply(src, emb_lstm_range) self.layers.append(table) if self.dropout < 1.0: state_below = dropout_layer(state_below, use_noise, self.dropout) rnn = LSTM(n_emb_lstm, self.n_hids) hiddens, cells = rnn.apply(state_below, src_mask) #hiddens = rnn.merge_out(state_below, src_mask) self.layers.append(rnn) if True: if self.dropout < 1.0: hiddens = dropout_layer(hiddens, use_noise, self.dropout) rnn1 = LSTM(self.n_hids, self.n_hids) hiddens, cells = rnn1.apply(hiddens, src_mask) #hiddens = rnn.merge_out(state_below, src_mask) self.layers.append(rnn1) if True: if self.dropout < 1.0: hiddens = dropout_layer(hiddens, use_noise, self.dropout) rnnp = rnn_pyramid_layer(self.n_hids, n_emb_lstm, self.n_hids) hiddens,cells,structs,pyramid = rnnp.apply(hiddens, state_below, src_mask) self.layers.append(rnnp) #self.structs = structs self.rnn_len = rnnp.n_steps self.sent_len = sentence.shape[0] if True: maxout = maxout_layer() states = T.concatenate([state_below, hiddens], axis=2) maxout_n_fold = 2 hiddens = maxout.apply(states, n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold) self.layers.append(maxout) if self.dropout < 1.0: hiddens = dropout_layer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def apply_morph_attention(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1): """ sentence : sentence * batch sentence_morph : sentence * batch * morph src_morph_emb : sentence * batch * morph * n_emb_morph 1. word morph lookup -> dropout -> attention 2. lstm -> dropout 3. lstm -> maxout -> dropout 4. logistic """ src, src_mask = sentence[:-1], sentence_mask[:-1] tgt, tgt_mask = sentence[1:], sentence_mask[1:] src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1] #word lookup table emb_lstm_range = T.arange(self.n_emb_lstm) table = LookupTable(self.n_emb_lstm, self.vocab_size, name='Wemb') src_emb = table.apply(src, emb_lstm_range) self.layers.append(table) #morph lookup table emb_morph_range = T.arange(self.n_emb_morph) table_morph = LookupTable(self.n_emb_morph, self.morph_size, name='Memb') src_morph_emb = table_morph.apply(src_morph, emb_morph_range) self.layers.append(table_morph) if self.dropout < 1.0: src_emb = DropoutLayer(src_emb, use_noise, self.dropout) src_morph_emb = DropoutLayer(src_morph_emb, use_noise, self.dropout) lstm_att_1st = LstmMorphAttention(self.n_hids, self.n_hids, self.n_hids) hiddens, cells = lstm_att_1st.apply(src_emb, src_morph_emb, src_mask) self.layers.append(lstm_att_1st) #print len(hiddens) , hiddens[0].ndim rnn_layer_2rd = LSTM(self.n_hids, self.n_hids) hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask) self.layers.append(rnn_layer_2rd) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) rnn_layer_3nd = LSTM(self.n_hids, self.n_hids) hiddens , cells = rnn_layer_3nd.apply(hiddens, src_mask) self.layers.append(rnn_layer_3nd) if True: maxout = MaxoutLayer() #src_emb : sentence * batch * n_emb #hiddens : sentence * batch * hids states = T.concatenate([src_emb, hiddens], axis=2) maxout_n_fold = 2 hiddens = maxout.apply(states, self.n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold) self.layers.append(maxout) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)