def predict(sent, model, builders): """ predict tags and demographic labels :param sent: :param model: :param builders: :return: tag and label predictions """ if MLP: H = pycnn.parameter(pH) O = pycnn.parameter(pO) else: O = pycnn.parameter(pO) tags = [] for forward_state, backward_state in build_tagging_graph(words, model, builders): if MLP: r_t = O * (pycnn.tanh(H * pycnn.concatenate([forward_state, backward_state]))) else: r_t = O * pycnn.concatenate([forward_state, backward_state]) out = pycnn.softmax(r_t) chosen = np.argmax(out.npvalue()) tags.append(vocab_tags.i2w[chosen]) return tags
def evaluate_recurrent(self, word_inds, tag_inds, test=False): fwd1 = self.fwd_lstm1.initial_state() back1 = self.back_lstm1.initial_state() fwd2 = self.fwd_lstm2.initial_state() back2 = self.back_lstm2.initial_state() sentence = [] for (w, t) in zip(word_inds, tag_inds): wordvec = pycnn.lookup(self.model['word-embed'], w) tagvec = pycnn.lookup(self.model['tag-embed'], t) vec = pycnn.concatenate([wordvec, tagvec]) sentence.append(vec) fwd1_out = [] for vec in sentence: fwd1 = fwd1.add_input(vec) fwd_vec = fwd1.output() fwd1_out.append(fwd_vec) back1_out = [] for vec in reversed(sentence): back1 = back1.add_input(vec) back_vec = back1.output() back1_out.append(back_vec) lstm2_input = [] for (f, b) in zip(fwd1_out, reversed(back1_out)): lstm2_input.append(pycnn.concatenate([f, b])) fwd2_out = [] for vec in lstm2_input: if self.droprate > 0 and not test: vec = pycnn.dropout(vec, self.droprate) fwd2 = fwd2.add_input(vec) fwd_vec = fwd2.output() fwd2_out.append(fwd_vec) back2_out = [] for vec in reversed(lstm2_input): if self.droprate > 0 and not test: vec = pycnn.dropout(vec, self.droprate) back2 = back2.add_input(vec) back_vec = back2.output() back2_out.append(back_vec) fwd_out = [ pycnn.concatenate([f1, f2]) for (f1, f2) in zip(fwd1_out, fwd2_out) ] back_out = [ pycnn.concatenate([b1, b2]) for (b1, b2) in zip(back1_out, back2_out) ] return fwd_out, back_out[::-1]
def predict(self, word_indices, char_indices, task_id, train=False): """ predict tags for a sentence represented as char+word embeddings """ pycnn.renew_cg() # new graph char_emb = [] rev_char_emb = [] # get representation for words for chars_of_token in char_indices: # use last state as word representation last_state = self.char_rnn.predict_sequence([self.cembeds[c] for c in chars_of_token])[-1] rev_last_state = self.char_rnn.predict_sequence([self.cembeds[c] for c in reversed(chars_of_token)])[-1] char_emb.append(last_state) rev_char_emb.append(rev_last_state) wfeatures = [self.wembeds[w] for w in word_indices] features = [pycnn.concatenate([w,c,rev_c]) for w,c,rev_c in zip(wfeatures,char_emb,reversed(rev_char_emb))] if train: # only do at training time features = [pycnn.noise(fe,self.noise_sigma) for fe in features] output_expected_at_layer = self.predictors["task_expected_at"][task_id] output_expected_at_layer -=1 # go through layers # input is now combination of w + char emb prev = features num_layers = self.h_layers # for i in range(0,num_layers-1): for i in range(0,num_layers): predictor = self.predictors["inner"][i] forward_sequence, backward_sequence = predictor.predict_sequence(prev) if i > 0 and self.activation: # activation between LSTM layers forward_sequence = [self.activation(s) for s in forward_sequence] backward_sequence = [self.activation(s) for s in backward_sequence] if i == output_expected_at_layer: output_predictor = self.predictors["output_layers_dict"][task_id] concat_layer = [pycnn.concatenate([f, b]) for f, b in zip(forward_sequence,reversed(backward_sequence))] if train and self.noise_sigma > 0.0: concat_layer = [pycnn.noise(fe,self.noise_sigma) for fe in concat_layer] output = output_predictor.predict_sequence(concat_layer) return output prev = forward_sequence prev_rev = backward_sequence # not used raise "oops should not be here" return None
def attend(model, input_vectors, state): w1 = pc.parameter(model['attention_w1']) w2 = pc.parameter(model['attention_w2']) v = pc.parameter(model['attention_v']) attention_weights = [] w2dt = w2*pc.concatenate(list(state.s())) for input_vector in input_vectors: attention_weight = v*pc.tanh(w1*input_vector + w2dt) attention_weights.append(attention_weight) attention_weights = pc.softmax(pc.concatenate(attention_weights)) output_vectors = pc.esum([vector*attention_weight for vector, attention_weight in zip(input_vectors, attention_weights)]) return output_vectors
def attend(model, vectors, state): w = pc.parameter(model['attention_w']) attention_weights = [] for vector in vectors: #concatenate each encoded vector with the current decoder state attention_input = pc.concatenate([vector, pc.concatenate(list(state.s()))]) #get the attention wieght for the decoded vector attention_weights.append(w * attention_input) #normalize the weights attention_weights = pc.softmax(pc.concatenate(attention_weights)) #apply the weights vectors = pc.esum([vector*attention_weight for vector, attention_weight in zip(vectors, attention_weights)]) return vectors
def _build_word_expression_list(self, sentence, is_train=False): renew_cg() sentence_word_vectors = [] for word in sentence: sentence_word_vectors.append( self._get_word_vector(word, use_dropout=is_train)) lstm_forward = self.word_builders[0].initial_state() lstm_backward = self.word_builders[1].initial_state() embeddings_forward = [] embeddings_backward = [] for word_vector, reverse_word_vector in zip( sentence_word_vectors, reversed(sentence_word_vectors)): lstm_forward = lstm_forward.add_input(word_vector) lstm_backward = lstm_backward.add_input(reverse_word_vector) embeddings_forward.append(lstm_forward.output()) embeddings_backward.append(lstm_backward.output()) O = parameter(self.param_out) sentence_word_expressions = [] for word_f_embedding, word_b_embedding in zip( embeddings_forward, reversed(embeddings_backward)): word_concat_embedding = concatenate( [word_f_embedding, word_b_embedding]) word_expression = O * word_concat_embedding sentence_word_expressions.append(word_expression) return sentence_word_expressions
def fit(words, tags, labels, model, builders): """ compute joint error of the :param words: list of indices :param tags: list of indices :param labels: index :param model: current model to access parameters :param builders: builder to create state combinations :return: joint error """ # retrieve model parameters if MLP: H = pycnn.parameter(pH) O = pycnn.parameter(pO) else: O = pycnn.parameter(pO) errs = [] for (forward_state, backward_state), tag in zip(build_tagging_graph(words, model, builders), tags): f_b = pycnn.concatenate([forward_state, backward_state]) if MLP: # TODO: add bias terms r_t = O * (pycnn.tanh(H * f_b)) else: r_t = O * f_b err = pycnn.pickneglogsoftmax(r_t, tag) errs.append(err) return pycnn.esum(errs)
def _build_word_expression_list(self, sentence, is_train=False): renew_cg() sentence_word_vectors = [] for word in sentence: sentence_word_vectors.append(self._get_word_vector(word, use_dropout=is_train)) lstm_forward = self.word_builders[0].initial_state() lstm_backward = self.word_builders[1].initial_state() embeddings_forward = [] embeddings_backward = [] for word_vector, reverse_word_vector in zip(sentence_word_vectors, reversed(sentence_word_vectors)): lstm_forward = lstm_forward.add_input(word_vector) lstm_backward = lstm_backward.add_input(reverse_word_vector) embeddings_forward.append(lstm_forward.output()) embeddings_backward.append(lstm_backward.output()) O = parameter(self.param_out) sentence_word_expressions = [] for word_f_embedding, word_b_embedding in zip(embeddings_forward, reversed(embeddings_backward)): word_concat_embedding = concatenate([word_f_embedding, word_b_embedding]) word_expression = O * word_concat_embedding sentence_word_expressions.append(word_expression) return sentence_word_expressions
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) # encode the lemma blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma) # convert features to matching embeddings, if UNK handle properly feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] i = 0 predicted_sequence = [] # run the decoder through the sequence and predict characters while i < MAX_PREDICTION_LEN: # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, feats_input])) decoder_rnn_output = s.output() # perform attention step attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias # find best candidate output probs = pc.softmax(readout) next_char_index = common.argmax(probs.vec_value()) predicted_sequence.append(inverse_alphabet_index[next_char_index]) # check if reached end of word if predicted_sequence[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[next_char_index] i += 1 # remove the end word symbol return predicted_sequence[0:-1]
def _get_word_vector(self, word, use_dropout=False): word_embedding = self._get_word_embedding(word) char_representation = self._get_char_representation(word) word_vector = concatenate([word_embedding, char_representation]) if use_dropout: word_vector = dropout(word_vector, 0.5) return word_vector
def encode_sentence(model, enc_fwd_lstm, enc_bwd_lstm, sentence): sentence_rev = [sentence[i] for i in range(len(sentence)-1, -1, -1)] fwd_vectors = run_lstm(model, enc_fwd_lstm.initial_state(), sentence) bwd_vectors = run_lstm(model, enc_bwd_lstm.initial_state(), sentence_rev) bwd_vectors = [bwd_vectors[i] for i in range(len(bwd_vectors)-1, -1, -1)] vectors = [pc.concatenate(list(p)) for p in zip(fwd_vectors, bwd_vectors)] return vectors
def encode_sentence(model, enc_fwd_lstm, enc_bwd_lstm, sentence): sentence_rev = [sentence[i] for i in range(len(sentence) - 1, -1, -1)] fwd_vectors = run_lstm(model, enc_fwd_lstm.initial_state(), sentence) bwd_vectors = run_lstm(model, enc_bwd_lstm.initial_state(), sentence_rev) bwd_vectors = [bwd_vectors[i] for i in range(len(bwd_vectors) - 1, -1, -1)] vectors = [pc.concatenate(list(p)) for p in zip(fwd_vectors, bwd_vectors)] return vectors
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # convert features to matching embeddings, if UNK handle properly feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types) # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] loss = [] padded_word = word + END_WORD # run the decoder through the output sequence and aggregate loss for i, output_char in enumerate(padded_word): # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, feats_input])) decoder_rnn_output = s.output() attention_output_vector, alphas, W = task1_attention_implementation.attend( blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char]) # print 'computed readout layer' loss.append(current_loss) # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[output_char]] total_sequence_loss = pc.esum(loss) # loss = average(loss) return total_sequence_loss
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # convert features to matching embeddings, if UNK handle properly feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types) # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] loss = [] padded_word = word + END_WORD # run the decoder through the output sequence and aggregate loss for i, output_char in enumerate(padded_word): # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, feats_input])) decoder_rnn_output = s.output() attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char]) # print 'computed readout layer' loss.append(current_loss) # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[output_char]] total_sequence_loss = pc.esum(loss) # loss = average(loss) return total_sequence_loss
def evaluate_label(self, fwd_out, back_out, lefts, rights, test=False): fwd_span_out = [] for left_index, right_index in zip(lefts, rights): fwd_span_out.append(fwd_out[right_index] - fwd_out[left_index - 1]) fwd_span_vec = pycnn.concatenate(fwd_span_out) back_span_out = [] for left_index, right_index in zip(lefts, rights): back_span_out.append(back_out[left_index] - back_out[right_index + 1]) back_span_vec = pycnn.concatenate(back_span_out) hidden_input = pycnn.concatenate([fwd_span_vec, back_span_vec]) if self.droprate > 0 and not test: hidden_input = pycnn.dropout(hidden_input, self.droprate) hidden_output = self.activation(self.W1_label * hidden_input + self.b1_label) scores = (self.W2_label * hidden_output + self.b2_label) return scores
def encode_feats(feat_index, feat_lookup, feats, feature_types): feat_vecs = [] for feat in sorted(feature_types): # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well? # yes since each location has its own weights # if this feature has a value, take it from the lookup. otherwise use UNK if feat in feats: feat_str = feat + ':' + feats[feat] try: feat_vecs.append(feat_lookup[feat_index[feat_str]]) except KeyError: # handle UNK or dropout feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) else: feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) return pc.concatenate(feat_vecs)
def _get_char_representation(self, word): word_char_vectors = [] for char in word.text: char_index = self.char_indexer.get_index(char) if char_index is None: print "Warning: Unexpected char '%s' (word='%s')" % (char, word.text) continue char_vector = lookup(self.model["char_lookup"], char_index) word_char_vectors.append(char_vector) lstm_forward = self.char_builders[0].initial_state() lstm_backward = self.char_builders[1].initial_state() for char_vector, reverse_char_vector in zip(word_char_vectors, reversed(word_char_vectors)): lstm_forward = lstm_forward.add_input(char_vector) lstm_backward = lstm_backward.add_input(reverse_char_vector) return concatenate([lstm_forward.output(), lstm_backward.output()])
def add_input(self, input_vec): """ Note that this function updates the existing State object! """ x = pycnn.concatenate([input_vec, self.h]) i = pycnn.logistic(self.W_i * x + self.b_i) f = pycnn.logistic(self.W_f * x + self.b_f) g = pycnn.tanh(self.W_c * x + self.b_c) o = pycnn.logistic(self.W_o * x + self.b_o) c = pycnn.cwise_multiply(f, self.c) + pycnn.cwise_multiply(i, g) h = pycnn.cwise_multiply(o, pycnn.tanh(c)) self.c = c self.h = h self.outputs.append(h) return self
def _get_char_representation(self, word, use_dropout): word_char_vectors = [] for char in word.text: char_index = self.char_indexer.get_index(char) if char_index is None: print "Warning: Unexpected char '%s' (word='%s')" % (char, word.text) continue char_vector = lookup(self.model["char_lookup"], char_index) word_char_vectors.append(char_vector) lstm_forward = self.char_builders[0].initial_state() lstm_backward = self.char_builders[1].initial_state() for char_vector, reverse_char_vector in zip( word_char_vectors, reversed(word_char_vectors)): lstm_forward = lstm_forward.add_input(char_vector) lstm_backward = lstm_backward.add_input(reverse_char_vector) return concatenate([lstm_forward.output(), lstm_backward.output()])
def _build_sentence_expressions(self, sentence): lstm_forward = self.word_builders[0].initial_state() lstm_backward = self.word_builders[1].initial_state() embeddings_forward = [] embeddings_backward = [] for word, reverse_word in zip(sentence, reversed(sentence)): lstm_forward = lstm_forward.add_input(word.vector) lstm_backward = lstm_backward.add_input(reverse_word.vector) embeddings_forward.append(lstm_forward.output()) embeddings_backward.append(lstm_backward.output()) H = parameter(self.param_hidden) O = parameter(self.param_out) sentence_expressions = [] for word_f_embedding, word_b_embedding in zip(embeddings_forward, reversed(embeddings_backward)): word_concat_embedding = concatenate([word_f_embedding, word_b_embedding]) word_expression = O * self.activation(H * word_concat_embedding) sentence_expressions.append(word_expression) return sentence_expressions
def _build_sentence_expressions(self, sentence): lstm_forward = self.word_builders[0].initial_state() lstm_backward = self.word_builders[1].initial_state() embeddings_forward = [] embeddings_backward = [] for word, reverse_word in zip(sentence, reversed(sentence)): lstm_forward = lstm_forward.add_input(word.vector) lstm_backward = lstm_backward.add_input(reverse_word.vector) embeddings_forward.append(lstm_forward.output()) embeddings_backward.append(lstm_backward.output()) H = parameter(self.param_hidden) O = parameter(self.param_out) sentence_expressions = [] for word_f_embedding, word_b_embedding in zip( embeddings_forward, reversed(embeddings_backward)): word_concat_embedding = concatenate( [word_f_embedding, word_b_embedding]) word_expression = O * self.activation(H * word_concat_embedding) sentence_expressions.append(word_expression) return sentence_expressions
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) # convert characters to matching embeddings, if UNK handle properly padded_lemma = BEGIN_WORD + lemma + END_WORD lemma_char_vecs = [] for char in padded_lemma: try: lemma_char_vecs.append(char_lookup[alphabet_index[char]]) except KeyError: # handle UNK lemma_char_vecs.append(char_lookup[alphabet_index[UNK]]) # convert features to matching embeddings, if UNK handle properly feat_vecs = [] for feat in sorted(feature_types): # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well? # if this feature has a value, take it from the lookup. otherwise use UNK if feat in feats: feat_str = feat + ':' + feats[feat] try: feat_vecs.append(feat_lookup[feat_index[feat_str]]) except KeyError: # handle UNK or dropout feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) else: feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) feats_input = pc.concatenate(feat_vecs) # BiLSTM forward pass s_0 = encoder_frnn.initial_state() s = s_0 frnn_outputs = [] for c in lemma_char_vecs: s = s.add_input(c) frnn_outputs.append(s.output()) # BiLSTM backward pass s_0 = encoder_rrnn.initial_state() s = s_0 rrnn_outputs = [] for c in reversed(lemma_char_vecs): s = s.add_input(c) rrnn_outputs.append(s.output()) # BiLTSM outputs blstm_outputs = [] lemma_char_vecs_len = len(lemma_char_vecs) for i in xrange(lemma_char_vecs_len): blstm_outputs.append(pc.concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]])) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]] # i is input index, j is output index i = j = 0 num_outputs = 0 predicted_output_sequence = [] # run the decoder through the sequence and predict characters, twice max prediction as step outputs are added while num_outputs < MAX_PREDICTION_LEN * 3: # prepare input vector and perform LSTM step decoder_input = pc.concatenate([prev_output_vec, prev_char_vec, # char_lookup[alphabet_index[str(i)]], # char_lookup[alphabet_index[str(j)]], blstm_outputs[i], feats_input]) s = s.add_input(decoder_input) # compute softmax probs vector and predict with argmax decoder_rnn_output = s.output() probs = pc.softmax(R * decoder_rnn_output + bias) probs = probs.vec_value() predicted_output_index = common.argmax(probs) predicted_output = inverse_alphabet_index[predicted_output_index] predicted_output_sequence.append(predicted_output) # check if step or char output to promote i or j. if predicted_output == STEP: prev_char_vec = char_lookup[alphabet_index[EPSILON]] if i < len(padded_lemma) - 1: i += 1 else: if predicted_output.isdigit(): # handle copy # try: # prev_char_vec = char_lookup[alphabet_index[padded_lemma[i]]] # except KeyError: # prev_char_vec = char_lookup[alphabet_index[UNK]] try: # this way END_WORD cannot be copied (as it is in the training stage) if i < len(lemma) + 1: prev_char_vec = char_lookup[alphabet_index[padded_lemma[i]]] else: # if trying to copy from a non-existent index, pad with last lemma character prev_char_vec = char_lookup[alphabet_index[lemma[-1]]] except KeyError: prev_char_vec = char_lookup[alphabet_index[UNK]] else: # handle char prev_char_vec = char_lookup[predicted_output_index] j += 1 num_outputs += 1 # check if reached end of word if predicted_output_sequence[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[predicted_output_index] # remove the end word symbol return predicted_output_sequence[0:-1]
def one_word_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, aligned_pair, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) padded_lemma = BEGIN_WORD + lemma + END_WORD # convert characters to matching embeddings lemma_char_vecs = [] for char in padded_lemma: try: lemma_char_vecs.append(char_lookup[alphabet_index[char]]) except KeyError: # handle UNK lemma_char_vecs.append(char_lookup[alphabet_index[UNK]]) # convert features to matching embeddings, if UNK handle properly feat_vecs = [] for feat in sorted(feature_types): # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well? # if this feature has a value, take it from the lookup. otherwise use UNK if feat in feats: feat_str = feat + ':' + feats[feat] try: feat_vecs.append(feat_lookup[feat_index[feat_str]]) except KeyError: # handle UNK or dropout feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) else: feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) feats_input = pc.concatenate(feat_vecs) # BiLSTM forward pass s_0 = encoder_frnn.initial_state() s = s_0 frnn_outputs = [] for c in lemma_char_vecs: s = s.add_input(c) frnn_outputs.append(s.output()) # BiLSTM backward pass s_0 = encoder_rrnn.initial_state() s = s_0 rrnn_outputs = [] for c in reversed(lemma_char_vecs): s = s.add_input(c) rrnn_outputs.append(s.output()) # BiLTSM outputs blstm_outputs = [] lemma_char_vecs_len = len(lemma_char_vecs) for i in xrange(lemma_char_vecs_len): blstm_outputs.append(pc.concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]])) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]] loss = [] # i is input index, j is output index i = 0 j = 0 # go through alignments, progress j when new output is introduced, progress i when new char is seen on lemma (no ~) # TODO: try sutskever flip trick? # TODO: attention on the lemma chars/feats could help here? aligned_lemma, aligned_word = aligned_pair aligned_lemma += END_WORD aligned_word += END_WORD # run through the alignments for index, (input_char, output_char) in enumerate(zip(aligned_lemma, aligned_word)): possible_outputs = [] # feedback, i, j, blstm[i], feats decoder_input = pc.concatenate([prev_output_vec, prev_char_vec, # char_lookup[alphabet_index[str(i)]], # char_lookup[alphabet_index[str(j)]], blstm_outputs[i], feats_input]) # if reached the end word symbol if output_char == END_WORD: s = s.add_input(decoder_input) decoder_rnn_output = s.output() probs = pc.softmax(R * decoder_rnn_output + bias) # compute local loss loss.append(-pc.log(pc.pick(probs, alphabet_index[END_WORD]))) continue # if there is no prefix, step if padded_lemma[i] == BEGIN_WORD and aligned_lemma[index] != ALIGN_SYMBOL: # perform rnn step # feedback, i, j, blstm[i], feats s = s.add_input(decoder_input) decoder_rnn_output = s.output() probs = pc.softmax(R * decoder_rnn_output + bias) # compute local loss loss.append(-pc.log(pc.pick(probs, alphabet_index[STEP]))) # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[STEP]] prev_char_vec = char_lookup[alphabet_index[EPSILON]] i += 1 # if there is new output if aligned_word[index] != ALIGN_SYMBOL: decoder_input = pc.concatenate([prev_output_vec, prev_char_vec, # char_lookup[alphabet_index[str(i)]], # char_lookup[alphabet_index[str(j)]], blstm_outputs[i], feats_input]) # copy i action - maybe model as a single action? if padded_lemma[i] == aligned_word[j]: possible_outputs.append(str(i)) possible_outputs.append(padded_lemma[i]) else: possible_outputs.append(aligned_word[index]) # perform rnn step s = s.add_input(decoder_input) decoder_rnn_output = s.output() probs = pc.softmax(R * decoder_rnn_output + bias) local_loss = pc.scalarInput(0) max_output_loss = -pc.log(pc.pick(probs, alphabet_index[possible_outputs[0]])) max_likelihood_output = possible_outputs[0] # sum over all correct output possibilities and pick feedback output to be the one with the highest # probability for output in possible_outputs: neg_log_likelihood = -pc.log(pc.pick(probs, alphabet_index[output])) if neg_log_likelihood < max_output_loss: max_likelihood_output = output max_output_loss = neg_log_likelihood local_loss += neg_log_likelihood loss.append(local_loss) # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[max_likelihood_output]] prev_char_vec = char_lookup[alphabet_index[aligned_word[index]]] j += 1 # now check if it's time to progress on input if i < len(padded_lemma) - 1 and aligned_lemma[index + 1] != ALIGN_SYMBOL: # perform rnn step # feedback, i, j, blstm[i], feats decoder_input = pc.concatenate([prev_output_vec, prev_char_vec, # char_lookup[alphabet_index[str(i)]], # char_lookup[alphabet_index[str(j)]], blstm_outputs[i], feats_input]) s = s.add_input(decoder_input) decoder_rnn_output = s.output() probs = pc.softmax(R * decoder_rnn_output + bias) # compute local loss loss.append(-pc.log(pc.pick(probs, alphabet_index[STEP]))) # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[STEP]] prev_char_vec = char_lookup[alphabet_index[EPSILON]] i += 1 # TODO: maybe here a "special" loss function is appropriate? # loss = esum(loss) loss = pc.average(loss) return loss
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index, feature_types, alignment): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) template = task1_ms2s.generate_template_from_alignment(alignment) blstm_outputs = task1_attention_implementation.encode_feats_and_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, feat_index, feat_lookup, feats, feature_types, lemma) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]] loss = [] padded_word = word + END_WORD padded_template = template + [END_WORD] # run the decoder through the output sequence and aggregate loss for i, output_char in enumerate(padded_word): # find all possible actions - copy from index, output specific character etc. possible_outputs = list(set([padded_template[i]]))# + [output_char])) # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, prev_char_vec])) decoder_rnn_output = s.output() attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias # choose which feedback based on minimum neg. log likelihood: initialize with the character loss min_neg_log_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char]) prev_output_char = output_char prev_output_action = output_char for output in possible_outputs: current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output]) # append the loss of all options loss.append(current_loss) if current_loss < min_neg_log_loss: min_neg_log_loss = current_loss prev_output_action = output # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[prev_output_action]] prev_char_vec = char_lookup[alphabet_index[prev_output_char]] total_sequence_loss = pc.esum(loss) # loss = average(loss) return total_sequence_loss
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) # encode the lemma blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma) # convert features to matching embeddings, if UNK handle properly feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] i = 0 predicted_sequence = [] # run the decoder through the sequence and predict characters while i < MAX_PREDICTION_LEN: # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, feats_input])) decoder_rnn_output = s.output() # perform attention step attention_output_vector, alphas, W = task1_attention_implementation.attend( blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias # find best candidate output probs = pc.softmax(readout) next_char_index = common.argmax(probs.vec_value()) predicted_sequence.append(inverse_alphabet_index[next_char_index]) # check if reached end of word if predicted_sequence[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[next_char_index] i += 1 # remove the end word symbol return predicted_sequence[0:-1]
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index, feature_types, alignment): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) template = task1_ms2s.generate_template_from_alignment(alignment) blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # convert features to matching embeddings, if UNK handle properly feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types) # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]] loss = [] padded_word = word + END_WORD padded_template = template + [END_WORD] # run the decoder through the output sequence and aggregate loss for i, output_char in enumerate(padded_word): # find all possible actions - copy from index, output specific character etc. possible_outputs = list(set([padded_template[i]] + [output_char])) # get current h of the decoder s = s.add_input( pc.concatenate([prev_output_vec, prev_char_vec, feats_input])) decoder_rnn_output = s.output() attention_output_vector, alphas, W = task1_attention_implementation.attend( blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias # choose which feedback based on minimum neg. log likelihood: initialize with the character loss min_neg_log_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char]) prev_output_char = output_char prev_output_action = output_char for output in possible_outputs: current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output]) # append the loss of all options loss.append(current_loss) if current_loss < min_neg_log_loss: min_neg_log_loss = current_loss prev_output_action = output # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[prev_output_action]] prev_char_vec = char_lookup[alphabet_index[prev_output_char]] total_sequence_loss = pc.esum(loss) # loss = average(loss) return total_sequence_loss
def _get_word_vector(self, word, use_dropout=False): word_embedding = self._get_word_embedding(word, use_dropout) char_representation = self._get_char_representation(word, use_dropout) return concatenate([word_embedding, char_representation])