def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) # encode the lemma blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma) # convert features to matching embeddings, if UNK handle properly feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] i = 0 predicted_sequence = [] # run the decoder through the sequence and predict characters while i < MAX_PREDICTION_LEN: # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, feats_input])) decoder_rnn_output = s.output() # perform attention step attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias # find best candidate output probs = pc.softmax(readout) next_char_index = common.argmax(probs.vec_value()) predicted_sequence.append(inverse_alphabet_index[next_char_index]) # check if reached end of word if predicted_sequence[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[next_char_index] i += 1 # remove the end word symbol return predicted_sequence[0:-1]
def predict_output_sequence(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, W_c, W__a, U__a, v__a, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): pc.renew_cg() R = pc.parameter(R) bias = pc.parameter(bias) W_c = pc.parameter(W_c) W__a = pc.parameter(W__a) U__a = pc.parameter(U__a) v__a = pc.parameter(v__a) blstm_outputs = encode_feats_and_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, feat_index, feat_lookup, feats, feature_types, lemma) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] i = 0 predicted_sequence = [] # run the decoder through the sequence and predict characters while i < MAX_PREDICTION_LEN: # get current h of the decoder s = s.add_input(prev_output_vec) decoder_rnn_output = s.output() # perform attention step attention_output_vector, alphas, W = attend(blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias # find best candidate output probs = pc.softmax(readout) next_char_index = common.argmax(probs.vec_value()) predicted_sequence.append(inverse_alphabet_index[next_char_index]) # check if reached end of word if predicted_sequence[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[next_char_index] i += 1 # remove the end word symbol return predicted_sequence[0:-1]
def predict_nbest_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types, nbest): renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = parameter(model["R"]) bias = parameter(model["bias"]) # convert characters to matching embeddings, if UNK handle properly lemma = BEGIN_WORD + lemma + END_WORD lemma_char_vecs = [] for char in lemma: try: lemma_char_vecs.append(char_lookup[alphabet_index[char]]) except KeyError: # handle UNK lemma_char_vecs.append(char_lookup[alphabet_index[UNK]]) # convert features to matching embeddings, if UNK handle properly feat_vecs = [] for feat in sorted(feature_types): # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well? # if this feature has a value, take it from the lookup. otherwise use UNK if feat in feats: feat_str = feat + ':' + feats[feat] try: feat_vecs.append(feat_lookup[feat_index[feat_str]]) except KeyError: # handle UNK or dropout feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) else: feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) feats_input = concatenate(feat_vecs) # BiLSTM forward pass s_0 = encoder_frnn.initial_state() s = s_0 frnn_outputs = [] for c in lemma_char_vecs: s = s.add_input(c) frnn_outputs.append(s.output()) # BiLSTM backward pass s_0 = encoder_rrnn.initial_state() s = s_0 rrnn_outputs = [] for c in reversed(lemma_char_vecs): s = s.add_input(c) rrnn_outputs.append(s.output()) # BiLTSM outputs blstm_outputs = [] lemma_char_vecs_len = len(lemma_char_vecs) for i in xrange(lemma_char_vecs_len): blstm_outputs.append(concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]])) # beam search # initialize the decoder rnn s_0 = decoder_rnn.initial_state() # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] i = 0 beam_width = BEAM_WIDTH beam = {} beam[-1] = [([BEGIN_WORD], 1.0, s_0)] # (sequence, probability, decoder_rnn) final_states = [] # run the decoder through the sequence and predict characters while i < MAX_PREDICTION_LEN and len(beam[i-1]) > 0: # at each stage: # create all expansions from the previous beam: new_hypos = [] for hypothesis in beam[i-1]: seq, hyp_prob, prefix_decoder = hypothesis last_hypo_char = seq[-1] # cant expand finished sequences if last_hypo_char == END_WORD: continue # expand from the last character of the hypothesis try: prev_output_vec = char_lookup[alphabet_index[last_hypo_char]] except KeyError: # not a character # print 'impossible to expand, key error'# + str(seq) continue # if the lemma is finished, pad with epsilon chars if i < len(lemma): blstm_output = blstm_outputs[i] try: lemma_input_char_vec = char_lookup[alphabet_index[lemma[i]]] except KeyError: # handle unseen characters lemma_input_char_vec = char_lookup[alphabet_index[UNK]] else: lemma_input_char_vec = char_lookup[alphabet_index[EPSILON]] blstm_output = blstm_outputs[lemma_char_vecs_len - 1] decoder_input = concatenate([blstm_output, prev_output_vec, lemma_input_char_vec, char_lookup[alphabet_index[str(i)]], feats_input]) # prepare input vector and perform LSTM step s = prefix_decoder.add_input(decoder_input) # compute softmax probs decoder_rnn_output = s.output() probs = softmax(R * decoder_rnn_output + bias) probs = probs.vec_value() # expand - create new hypos for index, p in enumerate(probs): new_seq = list(seq) new_seq.append(inverse_alphabet_index[index]) new_prob = hyp_prob * p if new_seq[-1] == END_WORD: # if found a complete sequence - add to final states final_states.append((new_seq[1:-1], new_prob)) else: new_hypos.append((new_seq, new_prob, s)) # add the expansions with the largest probability to the beam together with their score and prefix rnn state new_probs = [p for (s, p, r) in new_hypos] argmax_indices = common.argmax(new_probs, n=beam_width) beam[i] = [new_hypos[l] for l in argmax_indices] i += 1 # get nbest results from final states found in search final_probs = [p for (s, p) in final_states] argmax_indices = common.argmax(final_probs, n=nbest) nbest_templates = [final_states[l] for l in argmax_indices] return nbest_templates
def predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = parameter(model["R"]) bias = parameter(model["bias"]) # convert characters to matching embeddings, if UNK handle properly lemma = BEGIN_WORD + lemma + END_WORD lemma_char_vecs = [] for char in lemma: try: lemma_char_vecs.append(char_lookup[alphabet_index[char]]) except KeyError: # handle UNK lemma_char_vecs.append(char_lookup[alphabet_index[UNK]]) # convert features to matching embeddings, if UNK handle properly feat_vecs = [] for feat in sorted(feature_types): # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well? # if this feature has a value, take it from the lookup. otherwise use UNK if feat in feats: feat_str = feat + ':' + feats[feat] try: feat_vecs.append(feat_lookup[feat_index[feat_str]]) except KeyError: # handle UNK or dropout feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) else: feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) feats_input = concatenate(feat_vecs) # BiLSTM forward pass s_0 = encoder_frnn.initial_state() s = s_0 frnn_outputs = [] for c in lemma_char_vecs: s = s.add_input(c) frnn_outputs.append(s.output()) # BiLSTM backward pass s_0 = encoder_rrnn.initial_state() s = s_0 rrnn_outputs = [] for c in reversed(lemma_char_vecs): s = s.add_input(c) rrnn_outputs.append(s.output()) # BiLTSM outputs blstm_outputs = [] lemma_char_vecs_len = len(lemma_char_vecs) for i in xrange(lemma_char_vecs_len): blstm_outputs.append(concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]])) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] i = 0 predicted_template = [] # run the decoder through the sequence and predict characters while i < MAX_PREDICTION_LEN: # if the lemma is finished, pad with epsilon chars if i < len(lemma): blstm_output = blstm_outputs[i] try: lemma_input_char_vec = char_lookup[alphabet_index[lemma[i]]] except KeyError: # handle unseen characters lemma_input_char_vec = char_lookup[alphabet_index[UNK]] else: lemma_input_char_vec = char_lookup[alphabet_index[EPSILON]] blstm_output = blstm_outputs[lemma_char_vecs_len - 1] decoder_input = concatenate([blstm_output, prev_output_vec, lemma_input_char_vec, char_lookup[alphabet_index[str(i)]], feats_input]) # prepare input vector and perform LSTM step # decoder_input = concatenate([encoded, prev_output_vec]) s = s.add_input(decoder_input) # compute softmax probs and predict decoder_rnn_output = s.output() probs = softmax(R * decoder_rnn_output + bias) probs = probs.vec_value() next_char_index = common.argmax(probs) predicted_template.append(inverse_alphabet_index[next_char_index]) # check if reached end of word if predicted_template[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[next_char_index] i += 1 # remove the end word symbol return predicted_template[0:-1]
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) # encode the lemma blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma) # convert features to matching embeddings, if UNK handle properly feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] i = 0 predicted_sequence = [] # run the decoder through the sequence and predict characters while i < MAX_PREDICTION_LEN: # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, feats_input])) decoder_rnn_output = s.output() # perform attention step attention_output_vector, alphas, W = task1_attention_implementation.attend( blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias # find best candidate output probs = pc.softmax(readout) next_char_index = common.argmax(probs.vec_value()) predicted_sequence.append(inverse_alphabet_index[next_char_index]) # check if reached end of word if predicted_sequence[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[next_char_index] i += 1 # remove the end word symbol return predicted_sequence[0:-1]
def predict_inflection(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, alphabet_index, inverse_alphabet_index): renew_cg() # read the parameters lookup = model["lookup"] R = parameter(model["R"]) bias = parameter(model["bias"]) # convert characters to matching embeddings, if UNK handle properly lemma = BEGIN_WORD + lemma + END_WORD lemma_char_vecs = [] for char in lemma: try: lemma_char_vecs.append(lookup[alphabet_index[char]]) except KeyError: # handle UNK lemma_char_vecs.append(lookup[alphabet_index[UNK]]) # bilstm forward pass s_0 = encoder_frnn.initial_state() s = s_0 for c in lemma_char_vecs: s = s.add_input(c) encoder_frnn_h = s.h() # bilstm backward pass s_0 = encoder_rrnn.initial_state() s = s_0 for c in reversed(lemma_char_vecs): s = s.add_input(c) encoder_rrnn_h = s.h() # concatenate BILSTM final hidden states if len(encoder_rrnn_h) == 1 and len(encoder_frnn_h) == 1: encoded = concatenate([encoder_frnn_h[0], encoder_rrnn_h[0]]) else: # if there's more than one layer, take the last one encoded = concatenate([encoder_frnn_h[-1], encoder_rrnn_h[-1]]) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = lookup[alphabet_index[BEGIN_WORD]] i = 0 predicted = '' # run the decoder through the sequence and predict characters while i < MAX_PREDICTION_LEN: # if the lemma is finished or unknown character, pad with epsilon chars if i < len(lemma) and lemma[i] in alphabet_index: lemma_input_char_vec = lookup[alphabet_index[lemma[i]]] else: lemma_input_char_vec = lookup[alphabet_index[EPSILON]] # prepare input vector and perform LSTM step decoder_input = concatenate( [encoded, prev_output_vec, lemma_input_char_vec]) s = s.add_input(decoder_input) # compute softmax probs and predict decoder_rnn_output = s.output() probs = softmax(R * decoder_rnn_output + bias) probs = probs.vec_value() next_char_index = common.argmax(probs) predicted = predicted + inverse_alphabet_index[next_char_index] # check if reached end of word if predicted[-1] == END_WORD: break # prepare for the next iteration # prev_output_vec = lookup[next_char_index] prev_output_vec = decoder_rnn_output i += 1 # remove the begin and end word symbols return predicted[1:-1]
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, char_feedback_rnn, action_feedback_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): renew_cg() # read the parameters input_char_lookup = model["input_char_lookup"] output_char_lookup = model["output_char_lookup"] feat_lookup = model["feat_lookup"] R = parameter(model["R"]) bias = parameter(model["bias"]) # feedback_R = parameter(model["feedback_R"]) # feedback_bias = parameter(model["feedback_bias"]) # convert characters to matching embeddings, if UNK handle properly padded_lemma = BEGIN_WORD + lemma + END_WORD lemma_char_vecs = [] for char in padded_lemma: try: lemma_char_vecs.append(input_char_lookup[alphabet_index[char]]) except KeyError: # handle UNK lemma_char_vecs.append(input_char_lookup[alphabet_index[UNK]]) # convert features to matching embeddings, if UNK handle properly feat_vecs = [] for feat in sorted(feature_types): # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well? # if this feature has a value, take it from the lookup. otherwise use UNK if feat in feats: feat_str = feat + ':' + feats[feat] try: feat_vecs.append(feat_lookup[feat_index[feat_str]]) except KeyError: # handle UNK or dropout feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) else: feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) feats_input = concatenate(feat_vecs) # BiLSTM forward pass s_0 = encoder_frnn.initial_state() s = s_0 frnn_outputs = [] for c in lemma_char_vecs: s = s.add_input(c) frnn_outputs.append(s.output()) # BiLSTM backward pass s_0 = encoder_rrnn.initial_state() s = s_0 rrnn_outputs = [] for c in reversed(lemma_char_vecs): s = s.add_input(c) rrnn_outputs.append(s.output()) # BiLTSM outputs blstm_outputs = [] lemma_char_vecs_len = len(lemma_char_vecs) for i in xrange(lemma_char_vecs_len): blstm_outputs.append(concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]])) # initialize the decoder rnn s = decoder_rnn.initial_state() # set prev_output_vec for first lstm step as BEGIN_WORD for both feedback lstms # prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] begin_vec = output_char_lookup[alphabet_index[BEGIN_WORD]] # c_f_state = char_feedback_rnn.initial_state() # a_s_state = action_feedback_rnn.initial_state() # c_f_state = c_f_state.add_input(begin_vec) # a_s_state = a_s_state.add_input(begin_vec) # prev_output_vec = tanh(feedback_R * concatenate([c_f_state.output(), a_s_state.output()]) + feedback_bias) prev_action_vec = begin_vec prev_char_vec = begin_vec # i is input index, j is output index i = j = 0 num_outputs = 0 predicted_output_sequence = [] # run the decoder through the sequence and predict characters, twice max prediction as step outputs are added while num_outputs < MAX_PREDICTION_LEN * 3: # prepare input vector and perform LSTM step decoder_input = concatenate([ prev_action_vec, # prev_char_vec, # prev_output_vec, # input_char_lookup[alphabet_index[str(i)]], # input_char_lookup[alphabet_index[str(j)]], blstm_outputs[i], feats_input]) s = s.add_input(decoder_input) # compute softmax probs vector and predict with argmax decoder_rnn_output = s.output() probs = softmax(R * decoder_rnn_output + bias) probs = probs.vec_value() predicted_output_index = common.argmax(probs) predicted_output = inverse_alphabet_index[predicted_output_index] predicted_output_sequence.append(predicted_output) # check if step or char output to promote i or j. if predicted_output == STEP: # prepare for the next iteration - "feedback" # prev_output_vec = char_lookup[alphabet_index[STEP]] # step_vec = output_char_lookup[alphabet_index[STEP]] # not changing c_f_state as no character was predicted, only step action is done # stepping the actions feedback lstm with step # a_s_state = a_s_state.add_input(step_vec) # prev_output_vec = tanh(feedback_R * concatenate([c_f_state.output(), a_s_state.output()]) + feedback_bias) # prev_action_vec = step_vec # prev_char_vec = output_char_lookup[alphabet_index[EPSILON]] if i < len(lemma) - 1: i += 1 else: j += 1 # if predicted_output.isdigit(): # copy action # action_feedback_vec = output_char_lookup[alphabet_index[predicted_output]] # the copied char # char_feedback_vec = output_char_lookup[alphabet_index[padded_lemma[i]]] # else: # char action # action_feedback_vec = output_char_lookup[alphabet_index[predicted_output]] # the predicted char embedding # char_feedback_vec = output_char_lookup[alphabet_index[predicted_output]] # stepping the char feedback lstm with predicted char # c_f_state = c_f_state.add_input(char_feedback_vec) # stepping the actions feedback lstm with char or copy action # a_s_state = a_s_state.add_input(action_feedback_vec) # combine lstm feedbacks through an MLP # prev_output_vec = tanh(feedback_R * concatenate([c_f_state.output(), a_s_state.output()]) + feedback_bias) # prev_action_vec = action_feedback_vec # prev_char_vec = char_feedback_vec # promote j as a new character was added to the output num_outputs += 1 # check if reached end of word if predicted_output_sequence[-1] == END_WORD: break prev_action_vec = output_char_lookup[predicted_output_index] # prepare for the next iteration - "feedback" - already computed above using the two feedback lstms # prev_output_vec = char_lookup[predicted_output_index] # remove the end word symbol return predicted_output_sequence[0:-1]
def predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = parameter(model["R"]) bias = parameter(model["bias"]) # convert characters to matching embeddings, if UNK handle properly lemma = BEGIN_WORD + lemma + END_WORD lemma_char_vecs = [] for char in lemma: try: lemma_char_vecs.append(char_lookup[alphabet_index[char]]) except KeyError: # handle UNK lemma_char_vecs.append(char_lookup[alphabet_index[UNK]]) # convert features to matching embeddings, if UNK handle properly feat_vecs = [] for feat in sorted(feature_types): # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well? # if this feature has a value, take it from the lookup. otherwise use UNK if feat in feats: feat_str = feat + ':' + feats[feat] try: feat_vecs.append(feat_lookup[feat_index[feat_str]]) except KeyError: # handle UNK or dropout feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) else: feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) feats_input = concatenate(feat_vecs) # bilstm forward pass s_0 = encoder_frnn.initial_state() s = s_0 for c in lemma_char_vecs: s = s.add_input(c) encoder_frnn_h = s.h() # bilstm backward pass s_0 = encoder_rrnn.initial_state() s = s_0 for c in reversed(lemma_char_vecs): s = s.add_input(c) encoder_rrnn_h = s.h() # concatenate BILSTM final hidden states if len(encoder_rrnn_h) == 1 and len(encoder_frnn_h) == 1: encoded = concatenate([encoder_frnn_h[0], encoder_rrnn_h[0]]) else: # if there's more than one layer, take the last one encoded = concatenate([encoder_frnn_h[-1], encoder_rrnn_h[-1]]) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] i = 0 predicted_template = [] # run the decoder through the sequence and predict characters while i < MAX_PREDICTION_LEN: # if the lemma is finished, pad with epsilon chars if i < len(lemma): try: lemma_input_char_vec = char_lookup[alphabet_index[lemma[i]]] except KeyError: # handle unseen characters lemma_input_char_vec = char_lookup[alphabet_index[UNK]] else: lemma_input_char_vec = char_lookup[alphabet_index[EPSILON]] decoder_input = concatenate([ encoded, prev_output_vec, lemma_input_char_vec, char_lookup[alphabet_index[str(i)]], feats_input ]) # prepare input vector and perform LSTM step # decoder_input = concatenate([encoded, prev_output_vec]) s = s.add_input(decoder_input) # compute softmax probs and predict decoder_rnn_output = s.output() probs = softmax(R * decoder_rnn_output + bias) probs = probs.vec_value() next_char_index = common.argmax(probs) predicted_template.append(inverse_alphabet_index[next_char_index]) # check if reached end of word if predicted_template[-1] == END_WORD: break # prepare for the next iteration # prev_output_vec = lookup[next_char_index] prev_output_vec = decoder_rnn_output i += 1 # remove the begin and end word symbols return predicted_template[0:-1]
def predict_beamsearch(self, encoder, input_seq): if len(input_seq) == 0: return [] dn.renew_cg() self.readout = dn.parameter(self.params['readout']) self.bias = dn.parameter(self.params['bias']) self.w_c = dn.parameter(self.params['w_c']) self.u_a = dn.parameter(self.params['u_a']) self.v_a = dn.parameter(self.params['v_a']) self.w_a = dn.parameter(self.params['w_a']) alphas_mtx = [] # encode input sequence blstm_outputs, input_masks = encoder.encode_batch([input_seq]) # complete sequences and their probabilities final_states = [] # initialize the decoder rnn s_0 = self.decoder_rnn.initial_state() # holds beam step index mapped to (sequence, probability, decoder state, attn_vector) tuples beam = {-1: [([common.BEGIN_SEQ], 1.0, s_0, self.init_lookup[0])]} i = 0 # expand another step if didn't reach max length and there's still beams to expand while i < self.max_prediction_len and len(beam[i - 1]) > 0: # create all expansions from the previous beam: new_hypos = [] for hypothesis in beam[i - 1]: prefix_seq, prefix_prob, prefix_decoder, prefix_attn = hypothesis last_hypo_symbol = prefix_seq[-1] # cant expand finished sequences if last_hypo_symbol == common.END_SEQ: continue # expand from the last symbol of the hypothesis try: prev_output_vec = self.output_lookup[self.y2int[last_hypo_symbol]] except KeyError: # not a known symbol print 'impossible to expand, key error: ' + str(last_hypo_symbol) continue decoder_input = dn.concatenate([prev_output_vec, prefix_attn]) s = prefix_decoder.add_input(decoder_input) decoder_rnn_output = s.output() # perform attention step attention_output_vector, alphas = self.attend(blstm_outputs, decoder_rnn_output) # save attention weights for plotting # TODO: add attention weights properly to allow building the attention matrix for the best path if self.plot: val = alphas.vec_value() alphas_mtx.append(val) # compute output probabilities # h = readout * attention_output_vector + bias h = dn.affine_transform([self.bias, self.readout, attention_output_vector]) # TODO: understand why diverse needs tanh before softmax if self.diverse: h = dn.tanh(h) probs = dn.softmax(h) probs_val = probs.npvalue() # TODO: maybe should choose nbest from all expansions and not only from nbest of each hypothesis? # find best candidate outputs n_best_indices = common.argmax(probs_val, self.beam_size) for index in n_best_indices: p = probs_val[index] new_seq = prefix_seq + [self.int2y[index]] new_prob = prefix_prob * p if new_seq[-1] == common.END_SEQ or i == self.max_prediction_len - 1: # TODO: add to final states only if fits in k best? # if found a complete sequence or max length - add to final states final_states.append((new_seq[1:-1], new_prob)) else: new_hypos.append((new_seq, new_prob, s, attention_output_vector)) # add the most probable expansions from all hypotheses to the beam new_probs = np.array([p for (s, p, r, a) in new_hypos]) argmax_indices = common.argmax(new_probs, self.beam_size) beam[i] = [new_hypos[l] for l in argmax_indices] i += 1 # get nbest results from final states found in search final_probs = np.array([p for (s, p) in final_states]) argmax_indices = common.argmax(final_probs, self.beam_size) nbest_seqs = [final_states[l] for l in argmax_indices] return nbest_seqs, alphas_mtx
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) W_c = pc.parameter(model["W_c"]) blstm_outputs = soft_attention.encode_feats_and_chars( alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, feat_index, feat_lookup, feats, feature_types, lemma) feat_list = [] for feat in sorted(feature_types): if feat in feats: feat_list.append(feats[feat]) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] i = 0 predicted_sequence = [] alphas_mtx = [] # run the decoder through the sequence and predict characters while i < MAX_PREDICTION_LEN: # get current h of the decoder s = s.add_input(prev_output_vec) decoder_rnn_output = s.output() # perform attention step attention_output_vector, alphas, W = soft_attention.attend( blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) val = alphas.vec_value() print 'alphas:' print val alphas_mtx.append(val) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias next_char_index = common.argmax(readout.vec_value()) predicted_sequence.append(inverse_alphabet_index[next_char_index]) # check if reached end of word if predicted_sequence[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[next_char_index] i += 1 # remove the end word symbol return predicted_sequence, alphas_mtx, [ BEGIN_WORD ] + feat_list + list(lemma) + [END_WORD], W
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) W_c = pc.parameter(model["W_c"]) blstm_outputs = soft_attention.encode_feats_and_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, feat_index, feat_lookup, feats, feature_types, lemma) feat_list = [] for feat in sorted(feature_types): if feat in feats: feat_list.append(feats[feat]) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] i = 0 predicted_sequence = [] alphas_mtx = [] # run the decoder through the sequence and predict characters while i < MAX_PREDICTION_LEN: # get current h of the decoder s = s.add_input(prev_output_vec) decoder_rnn_output = s.output() # perform attention step attention_output_vector, alphas, W = soft_attention.attend(blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) val = alphas.vec_value() print 'alphas:' print val alphas_mtx.append(val) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias next_char_index = common.argmax(readout.vec_value()) predicted_sequence.append(inverse_alphabet_index[next_char_index]) # check if reached end of word if predicted_sequence[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[next_char_index] i += 1 # remove the end word symbol return predicted_sequence, alphas_mtx, [BEGIN_WORD] + feat_list + list(lemma) + [END_WORD], W
def predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, source_word, source_feats, target_feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = parameter(model["R"]) bias = parameter(model["bias"]) # convert characters to matching embeddings, if UNK handle properly source_word = BEGIN_WORD + source_word + END_WORD source_word_char_vecs = [] for char in source_word: try: source_word_char_vecs.append(char_lookup[alphabet_index[char]]) except KeyError: # handle UNK source_word_char_vecs.append(char_lookup[alphabet_index[UNK]]) # convert features to matching embeddings, if UNK handle properly feat_vecs = [] for feats in [source_feats, target_feats]: for feat in sorted(feature_types): # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well? # if this feature has a value, take it from the lookup. otherwise use UNK if feat in feats: feat_str = feat + ':' + feats[feat] try: feat_vecs.append(feat_lookup[feat_index[feat_str]]) except KeyError: # handle UNK or dropout feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) else: feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) feats_input = concatenate(feat_vecs) # BiLSTM forward pass s_0 = encoder_frnn.initial_state() s = s_0 frnn_outputs = [] for c in source_word_char_vecs: s = s.add_input(c) frnn_outputs.append(s.output()) # BiLSTM backward pass s_0 = encoder_rrnn.initial_state() s = s_0 rrnn_outputs = [] for c in reversed(source_word_char_vecs): s = s.add_input(c) rrnn_outputs.append(s.output()) # BiLTSM outputs blstm_outputs = [] source_word_char_vecs_len = len(source_word_char_vecs) for i in xrange(source_word_char_vecs_len): blstm_outputs.append(concatenate([frnn_outputs[i], rrnn_outputs[source_word_char_vecs_len - i - 1]])) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] # i is input index, j is output index i = j = 0 num_outputs = 0 predicted_output_sequence = [] # run the decoder through the sequence and predict characters, twice max prediction as step outputs are added while num_outputs < MAX_PREDICTION_LEN * 3: # prepare input vector and perform LSTM step decoder_input = concatenate([prev_output_vec, char_lookup[alphabet_index[str(i)]], char_lookup[alphabet_index[str(j)]], blstm_outputs[i], feats_input]) s = s.add_input(decoder_input) # compute softmax probs vector and predict with argmax decoder_rnn_output = s.output() probs = softmax(R * decoder_rnn_output + bias) probs = probs.vec_value() predicted_output_index = common.argmax(probs) predicted_output = inverse_alphabet_index[predicted_output_index] predicted_output_sequence.append(predicted_output) # check if step or char output to promote i or j. if predicted_output == STEP: if i < len(source_word) - 1: i += 1 else: j += 1 num_outputs += 1 # check if reached end of word if predicted_output_sequence[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[predicted_output_index] # remove the end word symbol return predicted_output_sequence[0:-1]
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = parameter(model["R"]) bias = parameter(model["bias"]) # convert characters to matching embeddings, if UNK handle properly lemma = BEGIN_WORD + lemma + END_WORD lemma_char_vecs = [] for char in lemma: try: lemma_char_vecs.append(char_lookup[alphabet_index[char]]) except KeyError: # handle UNK lemma_char_vecs.append(char_lookup[alphabet_index[UNK]]) # convert features to matching embeddings, if UNK handle properly feat_vecs = [] for feat in sorted(feature_types): # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well? # if this feature has a value, take it from the lookup. otherwise use UNK if feat in feats: feat_str = feat + ':' + feats[feat] try: feat_vecs.append(feat_lookup[feat_index[feat_str]]) except KeyError: # handle UNK or dropout feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) else: feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) feats_input = concatenate(feat_vecs) # BiLSTM forward pass s_0 = encoder_frnn.initial_state() s = s_0 frnn_outputs = [] for c in lemma_char_vecs: s = s.add_input(c) frnn_outputs.append(s.output()) # BiLSTM backward pass s_0 = encoder_rrnn.initial_state() s = s_0 rrnn_outputs = [] for c in reversed(lemma_char_vecs): s = s.add_input(c) rrnn_outputs.append(s.output()) # BiLTSM outputs blstm_outputs = [] lemma_char_vecs_len = len(lemma_char_vecs) for i in xrange(lemma_char_vecs_len): blstm_outputs.append(concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]])) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] # i is input index, j is output index i = j = 0 num_outputs = 0 predicted_output_sequence = [] # run the decoder through the sequence and predict characters, twice max prediction as step outputs are added while num_outputs < MAX_PREDICTION_LEN * 3: # prepare input vector and perform LSTM step decoder_input = concatenate([prev_output_vec, char_lookup[alphabet_index[str(i)]], char_lookup[alphabet_index[str(j)]], blstm_outputs[i], feats_input]) s = s.add_input(decoder_input) # compute softmax probs vector and predict with argmax decoder_rnn_output = s.output() probs = softmax(R * decoder_rnn_output + bias) probs = probs.vec_value() predicted_output_index = common.argmax(probs) predicted_output = inverse_alphabet_index[predicted_output_index] predicted_output_sequence.append(predicted_output) # check if step or char output to promote i or j. if predicted_output == STEP: if i < len(lemma) - 1: i += 1 else: j += 1 num_outputs += 1 # check if reached end of word if predicted_output_sequence[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[predicted_output_index] # remove the end word symbol return predicted_output_sequence[0:-1]
def predict_output_sequence(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters # char_lookup = model["char_lookup"] # feat_lookup = model["feat_lookup"] # R = pc.parameter(model["R"]) # bias = pc.parameter(model["bias"]) R = pc.parameter(R) bias = pc.parameter(bias) # convert characters to matching embeddings, if UNK handle properly padded_lemma = BEGIN_WORD + lemma + END_WORD lemma_char_vecs = encode_lemma(alphabet_index, char_lookup, padded_lemma) # convert features to matching embeddings, if UNK handle properly feat_vecs = encode_feats(feat_index, feat_lookup, feats, feature_types) #~ feats_input = pc.concatenate(feat_vecs) blstm_outputs = bilstm_transduce(encoder_frnn, encoder_rrnn, lemma_char_vecs) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] # i is input index, j is output index i = 0 num_outputs = 0 predicted_output_sequence = [] # run the decoder through the sequence and predict characters, twice max prediction as step outputs are added while num_outputs < MAX_PREDICTION_LEN * 3: # prepare input vector and perform LSTM step decoder_input = pc.concatenate([prev_output_vec, blstm_outputs[i]]) s = s.add_input(decoder_input) # compute softmax probs vector and predict with argmax decoder_rnn_output = s.output() probs = pc.softmax(R * decoder_rnn_output + bias) probs = probs.vec_value() predicted_output_index = common.argmax(probs) predicted_output = inverse_alphabet_index[predicted_output_index] predicted_output_sequence.append(predicted_output) # check if step or char output to promote i. if predicted_output == STEP: if i < len(padded_lemma) - 1: i += 1 num_outputs += 1 # check if reached end of word if predicted_output_sequence[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[predicted_output_index] # remove the end word symbol return u''.join(predicted_output_sequence[0:-1])
def predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, source_word, source_feats, target_feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = parameter(model["R"]) bias = parameter(model["bias"]) # convert characters to matching embeddings, if UNK handle properly source_word = BEGIN_WORD + source_word + END_WORD source_word_char_vecs = [] for char in source_word: try: source_word_char_vecs.append(char_lookup[alphabet_index[char]]) except KeyError: # handle UNK source_word_char_vecs.append(char_lookup[alphabet_index[UNK]]) # convert features to matching embeddings, if UNK handle properly feat_vecs = [] for feats in [source_feats, target_feats]: for feat in sorted(feature_types): # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well? # if this feature has a value, take it from the lookup. otherwise use UNK if feat in feats: feat_str = feat + ':' + feats[feat] try: feat_vecs.append(feat_lookup[feat_index[feat_str]]) except KeyError: # handle UNK or dropout feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) else: feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) feats_input = concatenate(feat_vecs) # BiLSTM forward pass s_0 = encoder_frnn.initial_state() s = s_0 frnn_outputs = [] for c in source_word_char_vecs: s = s.add_input(c) frnn_outputs.append(s.output()) # BiLSTM backward pass s_0 = encoder_rrnn.initial_state() s = s_0 rrnn_outputs = [] for c in reversed(source_word_char_vecs): s = s.add_input(c) rrnn_outputs.append(s.output()) # BiLTSM outputs blstm_outputs = [] source_word_char_vecs_len = len(source_word_char_vecs) for i in xrange(source_word_char_vecs_len): blstm_outputs.append(concatenate([frnn_outputs[i], rrnn_outputs[source_word_char_vecs_len - i - 1]])) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] i = 0 predicted_template = [] # run the decoder through the sequence and predict characters while i < MAX_PREDICTION_LEN: # if the source word is finished, pad with epsilon chars if i < len(source_word): blstm_output = blstm_outputs[i] try: source_word_input_char_vec = char_lookup[alphabet_index[source_word[i]]] except KeyError: # handle unseen characters source_word_input_char_vec = char_lookup[alphabet_index[UNK]] else: source_word_input_char_vec = char_lookup[alphabet_index[EPSILON]] blstm_output = blstm_outputs[source_word_char_vecs_len - 1] decoder_input = concatenate([blstm_output, prev_output_vec, source_word_input_char_vec, char_lookup[alphabet_index[str(i)]], feats_input]) # prepare input vector and perform LSTM step # decoder_input = concatenate([encoded, prev_output_vec]) s = s.add_input(decoder_input) # compute softmax probs and predict decoder_rnn_output = s.output() probs = softmax(R * decoder_rnn_output + bias) probs = probs.vec_value() next_char_index = common.argmax(probs) predicted_template.append(inverse_alphabet_index[next_char_index]) # check if reached end of word if predicted_template[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[next_char_index] i += 1 # remove the end word symbol return predicted_template[0:-1]
def predict_inflection(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types): renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = parameter(model["R"]) bias = parameter(model["bias"]) # convert characters to matching embeddings, if UNK handle properly lemma = BEGIN_WORD + lemma + END_WORD lemma_char_vecs = [] for char in lemma: try: lemma_char_vecs.append(char_lookup[alphabet_index[char]]) except KeyError: # handle UNK or dropout lemma_char_vecs.append(char_lookup[alphabet_index[UNK]]) # convert features to matching embeddings, if UNK handle properly feat_vecs = [] for feat in sorted(feature_types): # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well? # if this feature has a value, take it from the lookup. otherwise use UNK if feat in feat_dict: feat_str = feat + ':' + feat_dict[feat] try: feat_vecs.append(feat_lookup[feat_index[feat_str]]) except KeyError: # handle UNK or dropout feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) else: feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) feats_input = concatenate(feat_vecs) # bilstm forward pass s_0 = encoder_frnn.initial_state() s = s_0 for c in lemma_char_vecs: s = s.add_input(c) encoder_frnn_h = s.h() # bilstm backward pass s_0 = encoder_rrnn.initial_state() s = s_0 for c in reversed(lemma_char_vecs): s = s.add_input(c) encoder_rrnn_h = s.h() # concatenate BILSTM final hidden states if len(encoder_rrnn_h) == 1 and len(encoder_frnn_h) == 1: encoded = concatenate([encoder_frnn_h[0], encoder_rrnn_h[0]]) else: # if there's more than one hidden layer in the rnn's, take the last one encoded = concatenate([encoder_frnn_h[-1], encoder_rrnn_h[-1]]) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] i = 0 predicted = '' # run the decoder through the sequence and predict characters while i < MAX_PREDICTION_LEN: # if the lemma is finished or unknown character, pad with epsilon chars if i < len(lemma) and lemma[i] in alphabet_index: lemma_input_char_vec = char_lookup[alphabet_index[lemma[i]]] else: lemma_input_char_vec = char_lookup[alphabet_index[EPSILON]] # prepare input vector and perform LSTM step decoder_input = concatenate([encoded, prev_output_vec, lemma_input_char_vec, feats_input]) s = s.add_input(decoder_input) # compute softmax probs and predict decoder_rnn_output = s.output() probs = softmax(R * decoder_rnn_output + bias) probs = probs.vec_value() next_predicted_char_index = common.argmax(probs) predicted = predicted + inverse_alphabet_index[next_predicted_char_index] # check if reached end of word if predicted[-1] == END_WORD: break # prepare for the next iteration prev_output_vec = char_lookup[next_predicted_char_index] i += 1 # remove the begin and end word symbols return predicted[1:-1]
def predict_nbest_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, source_word, source_feats, target_feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types, nbest): renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = parameter(model["R"]) bias = parameter(model["bias"]) # convert characters to matching embeddings, if UNK handle properly source_word = BEGIN_WORD + source_word + END_WORD source_word_char_vecs = [] for char in source_word: try: source_word_char_vecs.append(char_lookup[alphabet_index[char]]) except KeyError: # handle UNK source_word_char_vecs.append(char_lookup[alphabet_index[UNK]]) # convert features to matching embeddings, if UNK handle properly feat_vecs = [] for feats in [source_feats, target_feats]: for feat in sorted(feature_types): # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well? # if this feature has a value, take it from the lookup. otherwise use UNK if feat in feats: feat_str = feat + ':' + feats[feat] try: feat_vecs.append(feat_lookup[feat_index[feat_str]]) except KeyError: # handle UNK or dropout feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) else: feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) feats_input = concatenate(feat_vecs) # BiLSTM forward pass s_0 = encoder_frnn.initial_state() s = s_0 frnn_outputs = [] for c in source_word_char_vecs: s = s.add_input(c) frnn_outputs.append(s.output()) # BiLSTM backward pass s_0 = encoder_rrnn.initial_state() s = s_0 rrnn_outputs = [] for c in reversed(source_word_char_vecs): s = s.add_input(c) rrnn_outputs.append(s.output()) # BiLTSM outputs blstm_outputs = [] source_word_char_vecs_len = len(source_word_char_vecs) for i in xrange(source_word_char_vecs_len): blstm_outputs.append(concatenate([frnn_outputs[i], rrnn_outputs[source_word_char_vecs_len - i - 1]])) # beam search # initialize the decoder rnn s_0 = decoder_rnn.initial_state() # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] i = 0 beam_width = BEAM_WIDTH beam = {} beam[-1] = [([BEGIN_WORD], 1.0, s_0)] # (sequence, probability, decoder_rnn) final_states = [] # run the decoder through the sequence and predict characters while i < MAX_PREDICTION_LEN and len(beam[i - 1]) > 0: # at each stage: # create all expansions from the previous beam: new_hypos = [] for hypothesis in beam[i - 1]: seq, hyp_prob, prefix_decoder = hypothesis last_hypo_char = seq[-1] # cant expand finished sequences if last_hypo_char == END_WORD: continue # expand from the last character of the hypothesis try: prev_output_vec = char_lookup[alphabet_index[last_hypo_char]] except KeyError: # not a character # print 'impossible to expand, key error'# + str(seq) continue # if the lemma is finished, pad with epsilon chars if i < len(source_word): blstm_output = blstm_outputs[i] try: source_word_input_char_vec = char_lookup[alphabet_index[source_word[i]]] except KeyError: # handle unseen characters source_word_input_char_vec = char_lookup[alphabet_index[UNK]] else: source_word_input_char_vec = char_lookup[alphabet_index[EPSILON]] blstm_output = blstm_outputs[source_word_char_vecs_len - 1] decoder_input = concatenate([blstm_output, prev_output_vec, source_word_input_char_vec, char_lookup[alphabet_index[str(i)]], feats_input]) # prepare input vector and perform LSTM step s = prefix_decoder.add_input(decoder_input) # compute softmax probs decoder_rnn_output = s.output() probs = softmax(R * decoder_rnn_output + bias) probs = probs.vec_value() # expand - create new hypos for index, p in enumerate(probs): new_seq = list(seq) new_seq.append(inverse_alphabet_index[index]) new_prob = hyp_prob * p if new_seq[-1] == END_WORD: # if found a complete sequence - add to final states final_states.append((new_seq[1:-1], new_prob)) else: new_hypos.append((new_seq, new_prob, s)) # add the expansions with the largest probability to the beam together with their score and prefix rnn state new_probs = [p for (s, p, r) in new_hypos] argmax_indices = common.argmax(new_probs, n=beam_width) beam[i] = [new_hypos[l] for l in argmax_indices] i += 1 # get nbest results from final states found in search final_probs = [p for (s, p) in final_states] argmax_indices = common.argmax(final_probs, n=nbest) nbest_templates = [final_states[l] for l in argmax_indices] return nbest_templates
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) # convert characters to matching embeddings, if UNK handle properly padded_lemma = BEGIN_WORD + lemma + END_WORD lemma_char_vecs = [] for char in padded_lemma: try: lemma_char_vecs.append(char_lookup[alphabet_index[char]]) except KeyError: # handle UNK lemma_char_vecs.append(char_lookup[alphabet_index[UNK]]) # convert features to matching embeddings, if UNK handle properly feat_vecs = [] for feat in sorted(feature_types): # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well? # if this feature has a value, take it from the lookup. otherwise use UNK if feat in feats: feat_str = feat + ':' + feats[feat] try: feat_vecs.append(feat_lookup[feat_index[feat_str]]) except KeyError: # handle UNK or dropout feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) else: feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) feats_input = pc.concatenate(feat_vecs) # BiLSTM forward pass s_0 = encoder_frnn.initial_state() s = s_0 frnn_outputs = [] for c in lemma_char_vecs: s = s.add_input(c) frnn_outputs.append(s.output()) # BiLSTM backward pass s_0 = encoder_rrnn.initial_state() s = s_0 rrnn_outputs = [] for c in reversed(lemma_char_vecs): s = s.add_input(c) rrnn_outputs.append(s.output()) # BiLTSM outputs blstm_outputs = [] lemma_char_vecs_len = len(lemma_char_vecs) for i in xrange(lemma_char_vecs_len): blstm_outputs.append(pc.concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]])) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]] # i is input index, j is output index i = j = 0 num_outputs = 0 predicted_output_sequence = [] # run the decoder through the sequence and predict characters, twice max prediction as step outputs are added while num_outputs < MAX_PREDICTION_LEN * 3: # prepare input vector and perform LSTM step decoder_input = pc.concatenate([prev_output_vec, prev_char_vec, # char_lookup[alphabet_index[str(i)]], # char_lookup[alphabet_index[str(j)]], blstm_outputs[i], feats_input]) s = s.add_input(decoder_input) # compute softmax probs vector and predict with argmax decoder_rnn_output = s.output() probs = pc.softmax(R * decoder_rnn_output + bias) probs = probs.vec_value() predicted_output_index = common.argmax(probs) predicted_output = inverse_alphabet_index[predicted_output_index] predicted_output_sequence.append(predicted_output) # check if step or char output to promote i or j. if predicted_output == STEP: prev_char_vec = char_lookup[alphabet_index[EPSILON]] if i < len(padded_lemma) - 1: i += 1 else: if predicted_output.isdigit(): # handle copy # try: # prev_char_vec = char_lookup[alphabet_index[padded_lemma[i]]] # except KeyError: # prev_char_vec = char_lookup[alphabet_index[UNK]] try: # this way END_WORD cannot be copied (as it is in the training stage) if i < len(lemma) + 1: prev_char_vec = char_lookup[alphabet_index[padded_lemma[i]]] else: # if trying to copy from a non-existent index, pad with last lemma character prev_char_vec = char_lookup[alphabet_index[lemma[-1]]] except KeyError: prev_char_vec = char_lookup[alphabet_index[UNK]] else: # handle char prev_char_vec = char_lookup[predicted_output_index] j += 1 num_outputs += 1 # check if reached end of word if predicted_output_sequence[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[predicted_output_index] # remove the end word symbol return predicted_output_sequence[0:-1]
def predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, alphabet_index, inverse_alphabet_index): renew_cg() # read the parameters lookup = model["lookup"] # noinspection PyPep8Naming R = parameter(model["R"]) bias = parameter(model["bias"]) # convert characters to matching embeddings, if UNK handle properly lemma = BEGIN_WORD + lemma + END_WORD lemma_char_vecs = [] for char in lemma: try: lemma_char_vecs.append(lookup[alphabet_index[char]]) except KeyError: # handle UNK lemma_char_vecs.append(lookup[alphabet_index[UNK]]) # bilstm forward pass s_0 = encoder_frnn.initial_state() s = s_0 for c in lemma_char_vecs: s = s.add_input(c) encoder_frnn_h = s.h() # bilstm backward pass s_0 = encoder_rrnn.initial_state() s = s_0 for c in reversed(lemma_char_vecs): s = s.add_input(c) encoder_rrnn_h = s.h() # concatenate BILSTM final hidden states if len(encoder_rrnn_h) == 1 and len(encoder_frnn_h) == 1: encoded = concatenate([encoder_frnn_h[0], encoder_rrnn_h[0]]) else: # if there's more than one layer, take the last one encoded = concatenate([encoder_frnn_h[-1], encoder_rrnn_h[-1]]) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = lookup[alphabet_index[BEGIN_WORD]] i = 0 predicted_template = [] # run the decoder through the sequence and predict characters while i < MAX_PREDICTION_LEN: # if the lemma is finished, pad with epsilon chars if i < len(lemma): lemma_input_char_vec = lookup[alphabet_index[lemma[i]]] else: lemma_input_char_vec = lookup[alphabet_index[EPSILON]] decoder_input = concatenate([encoded, prev_output_vec, lemma_input_char_vec, lookup[alphabet_index[str(i)]]]) # prepare input vector and perform LSTM step # decoder_input = concatenate([encoded, prev_output_vec]) s = s.add_input(decoder_input) # compute softmax probs and predict decoder_rnn_output = s.output() probs = softmax(R * decoder_rnn_output + bias) probs = probs.vec_value() next_char_index = common.argmax(probs) predicted_template.append(inverse_alphabet_index[next_char_index]) # check if reached end of word if predicted_template[-1] == END_WORD: break # prepare for the next iteration # prev_output_vec = lookup[next_char_index] prev_output_vec = decoder_rnn_output i += 1 # remove the begin and end word symbols return predicted_template[0:-1]