def generate(model, input, enc_fwd_lstm, enc_bwd_lstm, dec_lstm): def sample(probs): rnd = random.random() for i, p in enumerate(probs): rnd -= p if rnd <= 0: break return i embedded = embedd_sentence(model, input) encoded = encode_sentence(model, enc_fwd_lstm, enc_bwd_lstm, embedded) w = pc.parameter(model["decoder_w"]) b = pc.parameter(model["decoder_b"]) s = dec_lstm.initial_state().add_input(pc.vecInput(STATE_SIZE * 2)) out = '' count_EOS = 0 for i in range(len(input)*2): if count_EOS == 2: break vector = attend(model, encoded, s) s = s.add_input(vector) out_vector = w * s.output() + b probs = pc.softmax(out_vector) probs = probs.vec_value() next_char = sample(probs) if int2char[next_char] == EOS: count_EOS += 1 continue out += int2char[next_char] return out
def predict(sent, model, builders): """ predict tags and demographic labels :param sent: :param model: :param builders: :return: tag and label predictions """ if MLP: H = pycnn.parameter(pH) O = pycnn.parameter(pO) else: O = pycnn.parameter(pO) tags = [] for forward_state, backward_state in build_tagging_graph(words, model, builders): if MLP: r_t = O * (pycnn.tanh(H * pycnn.concatenate([forward_state, backward_state]))) else: r_t = O * pycnn.concatenate([forward_state, backward_state]) out = pycnn.softmax(r_t) chosen = np.argmax(out.npvalue()) tags.append(vocab_tags.i2w[chosen]) return tags
def generate(model, input, enc_fwd_lstm, enc_bwd_lstm, dec_lstm): def sample(probs): rnd = random.random() for i, p in enumerate(probs): rnd -= p if rnd <= 0: break return i embedded = embedd_sentence(model, input) encoded = encode_sentence(model, enc_fwd_lstm, enc_bwd_lstm, embedded) w = pc.parameter(model["decoder_w"]) b = pc.parameter(model["decoder_b"]) s = dec_lstm.initial_state().add_input(pc.vecInput(STATE_SIZE * 2)) out = '' count_EOS = 0 for i in range(len(input) * 2): if count_EOS == 2: break vector = attend(model, encoded, s) s = s.add_input(vector) out_vector = w * s.output() + b probs = pc.softmax(out_vector) probs = probs.vec_value() next_char = sample(probs) if int2char[next_char] == EOS: count_EOS += 1 continue out += int2char[next_char] return out
def fit(words, tags, labels, model, builders): """ compute joint error of the :param words: list of indices :param tags: list of indices :param labels: index :param model: current model to access parameters :param builders: builder to create state combinations :return: joint error """ # retrieve model parameters if MLP: H = pycnn.parameter(pH) O = pycnn.parameter(pO) else: O = pycnn.parameter(pO) errs = [] for (forward_state, backward_state), tag in zip(build_tagging_graph(words, model, builders), tags): f_b = pycnn.concatenate([forward_state, backward_state]) if MLP: # TODO: add bias terms r_t = O * (pycnn.tanh(H * f_b)) else: r_t = O * f_b err = pycnn.pickneglogsoftmax(r_t, tag) errs.append(err) return pycnn.esum(errs)
def attend(model, input_vectors, state): w1 = pc.parameter(model['attention_w1']) w2 = pc.parameter(model['attention_w2']) v = pc.parameter(model['attention_v']) attention_weights = [] w2dt = w2*pc.concatenate(list(state.s())) for input_vector in input_vectors: attention_weight = v*pc.tanh(w1*input_vector + w2dt) attention_weights.append(attention_weight) attention_weights = pc.softmax(pc.concatenate(attention_weights)) output_vectors = pc.esum([vector*attention_weight for vector, attention_weight in zip(input_vectors, attention_weights)]) return output_vectors
def _build_word_expression_list(self, sentence, is_train=False): renew_cg() sentence_word_vectors = [] for word in sentence: sentence_word_vectors.append(self._get_word_vector(word, use_dropout=is_train)) lstm_forward = self.word_builders[0].initial_state() lstm_backward = self.word_builders[1].initial_state() embeddings_forward = [] embeddings_backward = [] for word_vector, reverse_word_vector in zip(sentence_word_vectors, reversed(sentence_word_vectors)): lstm_forward = lstm_forward.add_input(word_vector) lstm_backward = lstm_backward.add_input(reverse_word_vector) embeddings_forward.append(lstm_forward.output()) embeddings_backward.append(lstm_backward.output()) O = parameter(self.param_out) sentence_word_expressions = [] for word_f_embedding, word_b_embedding in zip(embeddings_forward, reversed(embeddings_backward)): word_concat_embedding = concatenate([word_f_embedding, word_b_embedding]) word_expression = O * word_concat_embedding sentence_word_expressions.append(word_expression) return sentence_word_expressions
def calc_sentence_error_semi_viterbi(self, sentence): word_expression_list = self._build_word_expression_list(sentence, is_train=True) transition_matrix = parameter(self.param_transition) gold_expr = self._get_gold_expression(sentence, word_expression_list, transition_matrix) output_expr, _ = self.decode_sentence_tags_by_viterbi(word_expression_list, transition_matrix) return exp(output_expr - gold_expr)
def calc_sentence_error_viterbi(self, sentence): word_expression_list = self._build_word_expression_list(sentence, is_train=True) transition_matrix = parameter(self.param_transition) gold_expr = self._get_gold_expression(sentence, word_expression_list, transition_matrix) all_sequence_expr = self._get_all_sequence_expr(word_expression_list, transition_matrix) return all_sequence_expr - gold_expr
def do_cpu(): C.renew_cg() W = C.parameter(cpW) W = W*W*W*W*W*W*W z = C.squared_distance(W,W) z.value() z.backward()
def _build_word_expression_list(self, sentence, is_train=False): renew_cg() sentence_word_vectors = [] for word in sentence: sentence_word_vectors.append( self._get_word_vector(word, use_dropout=is_train)) lstm_forward = self.word_builders[0].initial_state() lstm_backward = self.word_builders[1].initial_state() embeddings_forward = [] embeddings_backward = [] for word_vector, reverse_word_vector in zip( sentence_word_vectors, reversed(sentence_word_vectors)): lstm_forward = lstm_forward.add_input(word_vector) lstm_backward = lstm_backward.add_input(reverse_word_vector) embeddings_forward.append(lstm_forward.output()) embeddings_backward.append(lstm_backward.output()) O = parameter(self.param_out) sentence_word_expressions = [] for word_f_embedding, word_b_embedding in zip( embeddings_forward, reversed(embeddings_backward)): word_concat_embedding = concatenate( [word_f_embedding, word_b_embedding]) word_expression = O * word_concat_embedding sentence_word_expressions.append(word_expression) return sentence_word_expressions
def predict(word_indices, model, builder, target): """ predict demographic label :param word_indices: :param model: :param builder: :return: tag and label predictions """ forward_states = build_tagging_graph(word_indices, model, builder) final_state = forward_states[-1] H = pycnn.parameter(pH) bias_H = pycnn.parameter(biasH) H2 = pycnn.parameter(pH2) bias_H2 = pycnn.parameter(biasH2) if target in ['age', 'both', 'joint']: O = pycnn.parameter(pOutAge) bias_O = pycnn.parameter(biasOutAge) elif target == 'gender': O = pycnn.parameter(pOutGender) bias_O = pycnn.parameter(biasOutGender) if target == 'both': O2 = pycnn.parameter(pOutGender) bias_O2 = pycnn.parameter(biasOutGender) if target == 'both': # hidden = bias_H2 + pycnn.tanh(H2 * (bias_H + pycnn.tanh(H * final_state))) hidden = bias_H + pycnn.tanh(H * final_state) r_age = bias_O + (O * hidden) r_gender = bias_O2 + (O2 * hidden) out_age = pycnn.softmax(r_age) out_gender = pycnn.softmax(r_gender) return [np.argmax(out_age.npvalue()), np.argmax(out_gender.npvalue())] else: # r_t = bias_O + (O * (bias_H2 + pycnn.tanh(H2 * (bias_H + pycnn.tanh(H * final_state))))) r_t = bias_O + (O * (bias_H + (H * final_state))) out = pycnn.softmax(r_t) chosen = np.argmax(out.npvalue()) return chosen
def tag_sentence_viterbi(self, sentence): word_expression_list = self._build_word_expression_list(sentence, is_train=False) transition_matrix = parameter(self.param_transition) best_score_expression, sentence_tags = self.decode_sentence_tags_by_viterbi(word_expression_list, transition_matrix) for word, word_tag in zip(sentence, sentence_tags): word.tag = word_tag
def tag_sentence_viterbi(self, sentence): word_expression_list = self._build_word_expression_list(sentence, is_train=False) transition_matrix = parameter(self.param_transition) best_score_expression, sentence_tags = self.decode_sentence_tags_by_viterbi( word_expression_list, transition_matrix) for word, word_tag in zip(sentence, sentence_tags): word.tag = word_tag
def decode(model, dec_lstm, vectors, output): output = [EOS] + list(output) + [EOS] output = [char2int[c] for c in output] w = pc.parameter(model["decoder_w"]) b = pc.parameter(model["decoder_b"]) s = dec_lstm.initial_state().add_input(pc.vecInput(STATE_SIZE * 2)) loss = [] for char in output: vector = attend(model, vectors, s) s = s.add_input(vector) out_vector = w * s.output() + b probs = pc.softmax(out_vector) loss.append(-pc.log(pc.pick(probs, char))) loss = pc.esum(loss) return loss
def decode(model, dec_lstm, vectors, output): output = [EOS] + list(output) + [EOS] output = [char2int[c] for c in output] w = pc.parameter(model["decoder_w"]) b = pc.parameter(model["decoder_b"]) s = dec_lstm.initial_state().add_input(pc.vecInput(STATE_SIZE*2)) loss = [] for char in output: vector = attend(model, vectors, s) s = s.add_input(vector) out_vector = w * s.output() + b probs = pc.softmax(out_vector) loss.append(-pc.log(pc.pick(probs, char))) loss = pc.esum(loss) return loss
def calc_sentence_error_viterbi(self, sentence): word_expression_list = self._build_word_expression_list(sentence, is_train=True) transition_matrix = parameter(self.param_transition) gold_expr = self._get_gold_expression(sentence, word_expression_list, transition_matrix) all_sequence_expr = self._get_all_sequence_expr( word_expression_list, transition_matrix) return all_sequence_expr - gold_expr
def calc_sentence_error_semi_viterbi(self, sentence): word_expression_list = self._build_word_expression_list(sentence, is_train=True) transition_matrix = parameter(self.param_transition) gold_expr = self._get_gold_expression(sentence, word_expression_list, transition_matrix) output_expr, _ = self.decode_sentence_tags_by_viterbi( word_expression_list, transition_matrix) return exp(output_expr - gold_expr)
def prep_params(self): self.W1_struct = pycnn.parameter(self.model['struct-hidden-W']) self.b1_struct = pycnn.parameter(self.model['struct-hidden-b']) self.W2_struct = pycnn.parameter(self.model['struct-out-W']) self.b2_struct = pycnn.parameter(self.model['struct-out-b']) self.W1_label = pycnn.parameter(self.model['label-hidden-W']) self.b1_label = pycnn.parameter(self.model['label-hidden-b']) self.W2_label = pycnn.parameter(self.model['label-out-W']) self.b2_label = pycnn.parameter(self.model['label-out-b'])
def attend(model, vectors, state): w = pc.parameter(model['attention_w']) attention_weights = [] for vector in vectors: #concatenate each encoded vector with the current decoder state attention_input = pc.concatenate([vector, pc.concatenate(list(state.s()))]) #get the attention wieght for the decoded vector attention_weights.append(w * attention_input) #normalize the weights attention_weights = pc.softmax(pc.concatenate(attention_weights)) #apply the weights vectors = pc.esum([vector*attention_weight for vector, attention_weight in zip(vectors, attention_weights)]) return vectors
def _build_sentence_expressions(self, sentence): lstm_forward = self.word_builders[0].initial_state() lstm_backward = self.word_builders[1].initial_state() embeddings_forward = [] embeddings_backward = [] for word, reverse_word in zip(sentence, reversed(sentence)): lstm_forward = lstm_forward.add_input(word.vector) lstm_backward = lstm_backward.add_input(reverse_word.vector) embeddings_forward.append(lstm_forward.output()) embeddings_backward.append(lstm_backward.output()) H = parameter(self.param_hidden) O = parameter(self.param_out) sentence_expressions = [] for word_f_embedding, word_b_embedding in zip(embeddings_forward, reversed(embeddings_backward)): word_concat_embedding = concatenate([word_f_embedding, word_b_embedding]) word_expression = O * self.activation(H * word_concat_embedding) sentence_expressions.append(word_expression) return sentence_expressions
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) # encode the lemma blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma) # convert features to matching embeddings, if UNK handle properly feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] i = 0 predicted_sequence = [] # run the decoder through the sequence and predict characters while i < MAX_PREDICTION_LEN: # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, feats_input])) decoder_rnn_output = s.output() # perform attention step attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias # find best candidate output probs = pc.softmax(readout) next_char_index = common.argmax(probs.vec_value()) predicted_sequence.append(inverse_alphabet_index[next_char_index]) # check if reached end of word if predicted_sequence[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[next_char_index] i += 1 # remove the end word symbol return predicted_sequence[0:-1]
def __init__(self, lstm): self.lstm = lstm self.outputs = [] self.c = pycnn.parameter(self.lstm.c0) self.h = pycnn.tanh(self.c) self.W_i = pycnn.parameter(self.lstm.W_i) self.b_i = pycnn.parameter(self.lstm.b_i) self.W_f = pycnn.parameter(self.lstm.W_f) self.b_f = pycnn.parameter(self.lstm.b_f) self.W_c = pycnn.parameter(self.lstm.W_c) self.b_c = pycnn.parameter(self.lstm.b_c) self.W_o = pycnn.parameter(self.lstm.W_o) self.b_o = pycnn.parameter(self.lstm.b_o)
def _build_sentence_expressions(self, sentence): lstm_forward = self.word_builders[0].initial_state() lstm_backward = self.word_builders[1].initial_state() embeddings_forward = [] embeddings_backward = [] for word, reverse_word in zip(sentence, reversed(sentence)): lstm_forward = lstm_forward.add_input(word.vector) lstm_backward = lstm_backward.add_input(reverse_word.vector) embeddings_forward.append(lstm_forward.output()) embeddings_backward.append(lstm_backward.output()) H = parameter(self.param_hidden) O = parameter(self.param_out) sentence_expressions = [] for word_f_embedding, word_b_embedding in zip( embeddings_forward, reversed(embeddings_backward)): word_concat_embedding = concatenate( [word_f_embedding, word_b_embedding]) word_expression = O * self.activation(H * word_concat_embedding) sentence_expressions.append(word_expression) return sentence_expressions
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # convert features to matching embeddings, if UNK handle properly feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types) # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] loss = [] padded_word = word + END_WORD # run the decoder through the output sequence and aggregate loss for i, output_char in enumerate(padded_word): # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, feats_input])) decoder_rnn_output = s.output() attention_output_vector, alphas, W = task1_attention_implementation.attend( blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char]) # print 'computed readout layer' loss.append(current_loss) # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[output_char]] total_sequence_loss = pc.esum(loss) # loss = average(loss) return total_sequence_loss
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # convert features to matching embeddings, if UNK handle properly feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types) # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] loss = [] padded_word = word + END_WORD # run the decoder through the output sequence and aggregate loss for i, output_char in enumerate(padded_word): # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, feats_input])) decoder_rnn_output = s.output() attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char]) # print 'computed readout layer' loss.append(current_loss) # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[output_char]] total_sequence_loss = pc.esum(loss) # loss = average(loss) return total_sequence_loss
def fit(word_indices, label, model, builder, target): """ compute joint error of the :param word_indices: list of indices :param label: index :param model: current model to access parameters :param builder: builder to create state combinations :return: joint error """ forward_states = build_tagging_graph(word_indices, model, builder) # retrieve model parameters final_state = forward_states[-1] # final_state = pycnn.dropout(final_state, 0.1) # print("final state", final_state, file=sys.stderr) H = pycnn.parameter(pH) bias_H = pycnn.parameter(biasH) H2 = pycnn.parameter(pH2) bias_H2 = pycnn.parameter(biasH2) if target in ['age', 'joint']: O = pycnn.parameter(pOutAge) bias_O = pycnn.parameter(biasOutAge) elif target == 'gender': O = pycnn.parameter(pOutGender) bias_O = pycnn.parameter(biasOutGender) # print(pycnn.cg().PrintGraphviz()) # if target == 'both': # hidden = bias_H + pycnn.tanh(H * final_state) # r_age = bias_O + (O * hidden) # r_gender = bias_O2 + (O2 * hidden) # return pycnn.esum([pycnn.pickneglogsoftmax(r_age, label[0]), pycnn.pickneglogsoftmax(r_gender, label[1])]) # r_t = bias_O + (O * (bias_H2 + pycnn.tanh(H2 * (bias_H + pycnn.tanh(H * final_state))))) r_t = bias_O + (O * (bias_H + (H * final_state))) # return pycnn.pick(r_t, label) return pycnn.pickneglogsoftmax(r_t, label)
def one_word_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, aligned_pair, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) padded_lemma = BEGIN_WORD + lemma + END_WORD # convert characters to matching embeddings lemma_char_vecs = [] for char in padded_lemma: try: lemma_char_vecs.append(char_lookup[alphabet_index[char]]) except KeyError: # handle UNK lemma_char_vecs.append(char_lookup[alphabet_index[UNK]]) # convert features to matching embeddings, if UNK handle properly feat_vecs = [] for feat in sorted(feature_types): # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well? # if this feature has a value, take it from the lookup. otherwise use UNK if feat in feats: feat_str = feat + ':' + feats[feat] try: feat_vecs.append(feat_lookup[feat_index[feat_str]]) except KeyError: # handle UNK or dropout feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) else: feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) feats_input = pc.concatenate(feat_vecs) # BiLSTM forward pass s_0 = encoder_frnn.initial_state() s = s_0 frnn_outputs = [] for c in lemma_char_vecs: s = s.add_input(c) frnn_outputs.append(s.output()) # BiLSTM backward pass s_0 = encoder_rrnn.initial_state() s = s_0 rrnn_outputs = [] for c in reversed(lemma_char_vecs): s = s.add_input(c) rrnn_outputs.append(s.output()) # BiLTSM outputs blstm_outputs = [] lemma_char_vecs_len = len(lemma_char_vecs) for i in xrange(lemma_char_vecs_len): blstm_outputs.append(pc.concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]])) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]] loss = [] # i is input index, j is output index i = 0 j = 0 # go through alignments, progress j when new output is introduced, progress i when new char is seen on lemma (no ~) # TODO: try sutskever flip trick? # TODO: attention on the lemma chars/feats could help here? aligned_lemma, aligned_word = aligned_pair aligned_lemma += END_WORD aligned_word += END_WORD # run through the alignments for index, (input_char, output_char) in enumerate(zip(aligned_lemma, aligned_word)): possible_outputs = [] # feedback, i, j, blstm[i], feats decoder_input = pc.concatenate([prev_output_vec, prev_char_vec, # char_lookup[alphabet_index[str(i)]], # char_lookup[alphabet_index[str(j)]], blstm_outputs[i], feats_input]) # if reached the end word symbol if output_char == END_WORD: s = s.add_input(decoder_input) decoder_rnn_output = s.output() probs = pc.softmax(R * decoder_rnn_output + bias) # compute local loss loss.append(-pc.log(pc.pick(probs, alphabet_index[END_WORD]))) continue # if there is no prefix, step if padded_lemma[i] == BEGIN_WORD and aligned_lemma[index] != ALIGN_SYMBOL: # perform rnn step # feedback, i, j, blstm[i], feats s = s.add_input(decoder_input) decoder_rnn_output = s.output() probs = pc.softmax(R * decoder_rnn_output + bias) # compute local loss loss.append(-pc.log(pc.pick(probs, alphabet_index[STEP]))) # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[STEP]] prev_char_vec = char_lookup[alphabet_index[EPSILON]] i += 1 # if there is new output if aligned_word[index] != ALIGN_SYMBOL: decoder_input = pc.concatenate([prev_output_vec, prev_char_vec, # char_lookup[alphabet_index[str(i)]], # char_lookup[alphabet_index[str(j)]], blstm_outputs[i], feats_input]) # copy i action - maybe model as a single action? if padded_lemma[i] == aligned_word[j]: possible_outputs.append(str(i)) possible_outputs.append(padded_lemma[i]) else: possible_outputs.append(aligned_word[index]) # perform rnn step s = s.add_input(decoder_input) decoder_rnn_output = s.output() probs = pc.softmax(R * decoder_rnn_output + bias) local_loss = pc.scalarInput(0) max_output_loss = -pc.log(pc.pick(probs, alphabet_index[possible_outputs[0]])) max_likelihood_output = possible_outputs[0] # sum over all correct output possibilities and pick feedback output to be the one with the highest # probability for output in possible_outputs: neg_log_likelihood = -pc.log(pc.pick(probs, alphabet_index[output])) if neg_log_likelihood < max_output_loss: max_likelihood_output = output max_output_loss = neg_log_likelihood local_loss += neg_log_likelihood loss.append(local_loss) # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[max_likelihood_output]] prev_char_vec = char_lookup[alphabet_index[aligned_word[index]]] j += 1 # now check if it's time to progress on input if i < len(padded_lemma) - 1 and aligned_lemma[index + 1] != ALIGN_SYMBOL: # perform rnn step # feedback, i, j, blstm[i], feats decoder_input = pc.concatenate([prev_output_vec, prev_char_vec, # char_lookup[alphabet_index[str(i)]], # char_lookup[alphabet_index[str(j)]], blstm_outputs[i], feats_input]) s = s.add_input(decoder_input) decoder_rnn_output = s.output() probs = pc.softmax(R * decoder_rnn_output + bias) # compute local loss loss.append(-pc.log(pc.pick(probs, alphabet_index[STEP]))) # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[STEP]] prev_char_vec = char_lookup[alphabet_index[EPSILON]] i += 1 # TODO: maybe here a "special" loss function is appropriate? # loss = esum(loss) loss = pc.average(loss) return loss
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) # convert characters to matching embeddings, if UNK handle properly padded_lemma = BEGIN_WORD + lemma + END_WORD lemma_char_vecs = [] for char in padded_lemma: try: lemma_char_vecs.append(char_lookup[alphabet_index[char]]) except KeyError: # handle UNK lemma_char_vecs.append(char_lookup[alphabet_index[UNK]]) # convert features to matching embeddings, if UNK handle properly feat_vecs = [] for feat in sorted(feature_types): # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well? # if this feature has a value, take it from the lookup. otherwise use UNK if feat in feats: feat_str = feat + ':' + feats[feat] try: feat_vecs.append(feat_lookup[feat_index[feat_str]]) except KeyError: # handle UNK or dropout feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) else: feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) feats_input = pc.concatenate(feat_vecs) # BiLSTM forward pass s_0 = encoder_frnn.initial_state() s = s_0 frnn_outputs = [] for c in lemma_char_vecs: s = s.add_input(c) frnn_outputs.append(s.output()) # BiLSTM backward pass s_0 = encoder_rrnn.initial_state() s = s_0 rrnn_outputs = [] for c in reversed(lemma_char_vecs): s = s.add_input(c) rrnn_outputs.append(s.output()) # BiLTSM outputs blstm_outputs = [] lemma_char_vecs_len = len(lemma_char_vecs) for i in xrange(lemma_char_vecs_len): blstm_outputs.append(pc.concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]])) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]] # i is input index, j is output index i = j = 0 num_outputs = 0 predicted_output_sequence = [] # run the decoder through the sequence and predict characters, twice max prediction as step outputs are added while num_outputs < MAX_PREDICTION_LEN * 3: # prepare input vector and perform LSTM step decoder_input = pc.concatenate([prev_output_vec, prev_char_vec, # char_lookup[alphabet_index[str(i)]], # char_lookup[alphabet_index[str(j)]], blstm_outputs[i], feats_input]) s = s.add_input(decoder_input) # compute softmax probs vector and predict with argmax decoder_rnn_output = s.output() probs = pc.softmax(R * decoder_rnn_output + bias) probs = probs.vec_value() predicted_output_index = common.argmax(probs) predicted_output = inverse_alphabet_index[predicted_output_index] predicted_output_sequence.append(predicted_output) # check if step or char output to promote i or j. if predicted_output == STEP: prev_char_vec = char_lookup[alphabet_index[EPSILON]] if i < len(padded_lemma) - 1: i += 1 else: if predicted_output.isdigit(): # handle copy # try: # prev_char_vec = char_lookup[alphabet_index[padded_lemma[i]]] # except KeyError: # prev_char_vec = char_lookup[alphabet_index[UNK]] try: # this way END_WORD cannot be copied (as it is in the training stage) if i < len(lemma) + 1: prev_char_vec = char_lookup[alphabet_index[padded_lemma[i]]] else: # if trying to copy from a non-existent index, pad with last lemma character prev_char_vec = char_lookup[alphabet_index[lemma[-1]]] except KeyError: prev_char_vec = char_lookup[alphabet_index[UNK]] else: # handle char prev_char_vec = char_lookup[predicted_output_index] j += 1 num_outputs += 1 # check if reached end of word if predicted_output_sequence[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[predicted_output_index] # remove the end word symbol return predicted_output_sequence[0:-1]
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) # encode the lemma blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma) # convert features to matching embeddings, if UNK handle properly feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] i = 0 predicted_sequence = [] # run the decoder through the sequence and predict characters while i < MAX_PREDICTION_LEN: # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, feats_input])) decoder_rnn_output = s.output() # perform attention step attention_output_vector, alphas, W = task1_attention_implementation.attend( blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias # find best candidate output probs = pc.softmax(readout) next_char_index = common.argmax(probs.vec_value()) predicted_sequence.append(inverse_alphabet_index[next_char_index]) # check if reached end of word if predicted_sequence[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[next_char_index] i += 1 # remove the end word symbol return predicted_sequence[0:-1]
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index, feature_types, alignment): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) template = task1_ms2s.generate_template_from_alignment(alignment) blstm_outputs = task1_attention_implementation.encode_feats_and_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, feat_index, feat_lookup, feats, feature_types, lemma) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]] loss = [] padded_word = word + END_WORD padded_template = template + [END_WORD] # run the decoder through the output sequence and aggregate loss for i, output_char in enumerate(padded_word): # find all possible actions - copy from index, output specific character etc. possible_outputs = list(set([padded_template[i]]))# + [output_char])) # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, prev_char_vec])) decoder_rnn_output = s.output() attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias # choose which feedback based on minimum neg. log likelihood: initialize with the character loss min_neg_log_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char]) prev_output_char = output_char prev_output_action = output_char for output in possible_outputs: current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output]) # append the loss of all options loss.append(current_loss) if current_loss < min_neg_log_loss: min_neg_log_loss = current_loss prev_output_action = output # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[prev_output_action]] prev_char_vec = char_lookup[alphabet_index[prev_output_char]] total_sequence_loss = pc.esum(loss) # loss = average(loss) return total_sequence_loss
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index, feature_types, alignment): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) template = task1_ms2s.generate_template_from_alignment(alignment) blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # convert features to matching embeddings, if UNK handle properly feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types) # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]] loss = [] padded_word = word + END_WORD padded_template = template + [END_WORD] # run the decoder through the output sequence and aggregate loss for i, output_char in enumerate(padded_word): # find all possible actions - copy from index, output specific character etc. possible_outputs = list(set([padded_template[i]] + [output_char])) # get current h of the decoder s = s.add_input( pc.concatenate([prev_output_vec, prev_char_vec, feats_input])) decoder_rnn_output = s.output() attention_output_vector, alphas, W = task1_attention_implementation.attend( blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias # choose which feedback based on minimum neg. log likelihood: initialize with the character loss min_neg_log_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char]) prev_output_char = output_char prev_output_action = output_char for output in possible_outputs: current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output]) # append the loss of all options loss.append(current_loss) if current_loss < min_neg_log_loss: min_neg_log_loss = current_loss prev_output_action = output # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[prev_output_action]] prev_char_vec = char_lookup[alphabet_index[prev_output_char]] total_sequence_loss = pc.esum(loss) # loss = average(loss) return total_sequence_loss