def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index,
                            inverse_alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])
    W_c = pc.parameter(model["W_c"])
    W__a = pc.parameter(model["W__a"])
    U__a = pc.parameter(model["U__a"])
    v__a = pc.parameter(model["v__a"])

    # encode the lemma
    blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma)

    # convert features to matching embeddings, if UNK handle properly
    feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    i = 0
    predicted_sequence = []

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN:

        # get current h of the decoder
        s = s.add_input(pc.concatenate([prev_output_vec, feats_input]))
        decoder_rnn_output = s.output()

        # perform attention step
        attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output,
                                                                                   W_c, v__a, W__a, U__a)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias

        # find best candidate output
        probs = pc.softmax(readout)
        next_char_index = common.argmax(probs.vec_value())
        predicted_sequence.append(inverse_alphabet_index[next_char_index])

        # check if reached end of word
        if predicted_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[next_char_index]
        i += 1

    # remove the end word symbol
    return predicted_sequence[0:-1]
Beispiel #2
0
def predict_output_sequence(model, char_lookup, feat_lookup, R, bias,
                            encoder_frnn, encoder_rrnn, decoder_rnn, W_c, W__a,
                            U__a, v__a, lemma, feats, alphabet_index,
                            inverse_alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    R = pc.parameter(R)
    bias = pc.parameter(bias)
    W_c = pc.parameter(W_c)
    W__a = pc.parameter(W__a)
    U__a = pc.parameter(U__a)
    v__a = pc.parameter(v__a)

    blstm_outputs = encode_feats_and_chars(alphabet_index, char_lookup,
                                           encoder_frnn, encoder_rrnn,
                                           feat_index, feat_lookup, feats,
                                           feature_types, lemma)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    i = 0
    predicted_sequence = []

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN:

        # get current h of the decoder
        s = s.add_input(prev_output_vec)
        decoder_rnn_output = s.output()

        # perform attention step
        attention_output_vector, alphas, W = attend(blstm_outputs,
                                                    decoder_rnn_output, W_c,
                                                    v__a, W__a, U__a)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias

        # find best candidate output
        probs = pc.softmax(readout)
        next_char_index = common.argmax(probs.vec_value())
        predicted_sequence.append(inverse_alphabet_index[next_char_index])

        # check if reached end of word
        if predicted_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[next_char_index]
        i += 1

    # remove the end word symbol
    return predicted_sequence[0:-1]
def predict_output_sequence(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, W_c, W__a, U__a, v__a, lemma, feats, alphabet_index,
                            inverse_alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    R = pc.parameter(R)
    bias = pc.parameter(bias)
    W_c = pc.parameter(W_c)
    W__a = pc.parameter(W__a)
    U__a = pc.parameter(U__a)
    v__a = pc.parameter(v__a)

    blstm_outputs = encode_feats_and_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, feat_index,
                                           feat_lookup, feats, feature_types, lemma)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    i = 0
    predicted_sequence = []

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN:

        # get current h of the decoder
        s = s.add_input(prev_output_vec)
        decoder_rnn_output = s.output()

        # perform attention step
        attention_output_vector, alphas, W = attend(blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias

        # find best candidate output
        probs = pc.softmax(readout)
        next_char_index = common.argmax(probs.vec_value())
        predicted_sequence.append(inverse_alphabet_index[next_char_index])

        # check if reached end of word
        if predicted_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[next_char_index]
        i += 1

    # remove the end word symbol
    return predicted_sequence[0:-1]
def predict_nbest_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index,
                                inverse_alphabet_index, feat_index, feature_types, nbest):
    renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = parameter(model["R"])
    bias = parameter(model["bias"])

    # convert characters to matching embeddings, if UNK handle properly
    lemma = BEGIN_WORD + lemma + END_WORD
    lemma_char_vecs = []
    for char in lemma:
        try:
            lemma_char_vecs.append(char_lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            lemma_char_vecs.append(char_lookup[alphabet_index[UNK]])

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = []
    for feat in sorted(feature_types):
        # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
        # if this feature has a value, take it from the lookup. otherwise use UNK
        if feat in feats:
            feat_str = feat + ':' + feats[feat]
            try:
                feat_vecs.append(feat_lookup[feat_index[feat_str]])
            except KeyError:
                # handle UNK or dropout
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
        else:
            feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    feats_input = concatenate(feat_vecs)

    # BiLSTM forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    frnn_outputs = []
    for c in lemma_char_vecs:
        s = s.add_input(c)
        frnn_outputs.append(s.output())

    # BiLSTM backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    rrnn_outputs = []
    for c in reversed(lemma_char_vecs):
        s = s.add_input(c)
        rrnn_outputs.append(s.output())

    # BiLTSM outputs
    blstm_outputs = []
    lemma_char_vecs_len = len(lemma_char_vecs)
    for i in xrange(lemma_char_vecs_len):
        blstm_outputs.append(concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]]))

    # beam search

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]

    i = 0
    beam_width = BEAM_WIDTH
    beam = {}
    beam[-1] = [([BEGIN_WORD], 1.0, s_0)] # (sequence, probability, decoder_rnn)
    final_states = []

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN and len(beam[i-1]) > 0:

        # at each stage:
        # create all expansions from the previous beam:
        new_hypos = []
        for hypothesis in beam[i-1]:
            seq, hyp_prob, prefix_decoder = hypothesis
            last_hypo_char = seq[-1]

            # cant expand finished sequences
            if last_hypo_char == END_WORD:
                continue

            # expand from the last character of the hypothesis
            try:
                prev_output_vec = char_lookup[alphabet_index[last_hypo_char]]
            except KeyError:
                # not a character
                # print 'impossible to expand, key error'# + str(seq)
                continue

            # if the lemma is finished, pad with epsilon chars
            if i < len(lemma):
                blstm_output = blstm_outputs[i]
                try:
                    lemma_input_char_vec = char_lookup[alphabet_index[lemma[i]]]
                except KeyError:
                    # handle unseen characters
                    lemma_input_char_vec = char_lookup[alphabet_index[UNK]]
            else:
                lemma_input_char_vec = char_lookup[alphabet_index[EPSILON]]
                blstm_output = blstm_outputs[lemma_char_vecs_len - 1]

            decoder_input = concatenate([blstm_output,
                                         prev_output_vec,
                                         lemma_input_char_vec,
                                         char_lookup[alphabet_index[str(i)]],
                                         feats_input])

            # prepare input vector and perform LSTM step
            s = prefix_decoder.add_input(decoder_input)

            # compute softmax probs
            decoder_rnn_output = s.output()
            probs = softmax(R * decoder_rnn_output + bias)
            probs = probs.vec_value()

            # expand - create new hypos
            for index, p in enumerate(probs):
                new_seq = list(seq)
                new_seq.append(inverse_alphabet_index[index])
                new_prob = hyp_prob * p
                if new_seq[-1] == END_WORD:
                    # if found a complete sequence - add to final states
                    final_states.append((new_seq[1:-1], new_prob))
                else:
                    new_hypos.append((new_seq, new_prob, s))

        # add the expansions with the largest probability to the beam together with their score and prefix rnn state
        new_probs = [p for (s, p, r) in new_hypos]
        argmax_indices = common.argmax(new_probs, n=beam_width)
        beam[i] = [new_hypos[l] for l in argmax_indices]
        i += 1

    # get nbest results from final states found in search
    final_probs = [p for (s, p) in final_states]
    argmax_indices = common.argmax(final_probs, n=nbest)
    nbest_templates = [final_states[l] for l in argmax_indices]

    return nbest_templates
def predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index,
                                inverse_alphabet_index, feat_index, feature_types):
    renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = parameter(model["R"])
    bias = parameter(model["bias"])

    # convert characters to matching embeddings, if UNK handle properly
    lemma = BEGIN_WORD + lemma + END_WORD
    lemma_char_vecs = []
    for char in lemma:
        try:
            lemma_char_vecs.append(char_lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            lemma_char_vecs.append(char_lookup[alphabet_index[UNK]])

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = []
    for feat in sorted(feature_types):
        # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
        # if this feature has a value, take it from the lookup. otherwise use UNK
        if feat in feats:
            feat_str = feat + ':' + feats[feat]
            try:
                feat_vecs.append(feat_lookup[feat_index[feat_str]])
            except KeyError:
                # handle UNK or dropout
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
        else:
            feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    feats_input = concatenate(feat_vecs)

    # BiLSTM forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    frnn_outputs = []
    for c in lemma_char_vecs:
        s = s.add_input(c)
        frnn_outputs.append(s.output())

    # BiLSTM backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    rrnn_outputs = []
    for c in reversed(lemma_char_vecs):
        s = s.add_input(c)
        rrnn_outputs.append(s.output())

    # BiLTSM outputs
    blstm_outputs = []
    lemma_char_vecs_len = len(lemma_char_vecs)
    for i in xrange(lemma_char_vecs_len):
        blstm_outputs.append(concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]]))

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    i = 0
    predicted_template = []

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN:

        # if the lemma is finished, pad with epsilon chars
        if i < len(lemma):
            blstm_output = blstm_outputs[i]
            try:
                lemma_input_char_vec = char_lookup[alphabet_index[lemma[i]]]
            except KeyError:
                # handle unseen characters
                lemma_input_char_vec = char_lookup[alphabet_index[UNK]]
        else:
            lemma_input_char_vec = char_lookup[alphabet_index[EPSILON]]
            blstm_output = blstm_outputs[lemma_char_vecs_len - 1]

        decoder_input = concatenate([blstm_output,
                                     prev_output_vec,
                                     lemma_input_char_vec,
                                     char_lookup[alphabet_index[str(i)]],
                                     feats_input])

        # prepare input vector and perform LSTM step
        # decoder_input = concatenate([encoded, prev_output_vec])
        s = s.add_input(decoder_input)

        # compute softmax probs and predict
        decoder_rnn_output = s.output()
        probs = softmax(R * decoder_rnn_output + bias)
        probs = probs.vec_value()
        next_char_index = common.argmax(probs)
        predicted_template.append(inverse_alphabet_index[next_char_index])

        # check if reached end of word
        if predicted_template[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[next_char_index]
        i += 1

    # remove the end word symbol
    return predicted_template[0:-1]
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn,
                            lemma, feats, alphabet_index,
                            inverse_alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])
    W_c = pc.parameter(model["W_c"])
    W__a = pc.parameter(model["W__a"])
    U__a = pc.parameter(model["U__a"])
    v__a = pc.parameter(model["v__a"])

    # encode the lemma
    blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn,
                                 encoder_rrnn, lemma)

    # convert features to matching embeddings, if UNK handle properly
    feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    i = 0
    predicted_sequence = []

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN:

        # get current h of the decoder
        s = s.add_input(pc.concatenate([prev_output_vec, feats_input]))
        decoder_rnn_output = s.output()

        # perform attention step
        attention_output_vector, alphas, W = task1_attention_implementation.attend(
            blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias

        # find best candidate output
        probs = pc.softmax(readout)
        next_char_index = common.argmax(probs.vec_value())
        predicted_sequence.append(inverse_alphabet_index[next_char_index])

        # check if reached end of word
        if predicted_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[next_char_index]
        i += 1

    # remove the end word symbol
    return predicted_sequence[0:-1]
def predict_inflection(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma,
                       alphabet_index, inverse_alphabet_index):
    renew_cg()

    # read the parameters
    lookup = model["lookup"]
    R = parameter(model["R"])
    bias = parameter(model["bias"])

    # convert characters to matching embeddings, if UNK handle properly
    lemma = BEGIN_WORD + lemma + END_WORD
    lemma_char_vecs = []
    for char in lemma:
        try:
            lemma_char_vecs.append(lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            lemma_char_vecs.append(lookup[alphabet_index[UNK]])

    # bilstm forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    for c in lemma_char_vecs:
        s = s.add_input(c)
    encoder_frnn_h = s.h()

    # bilstm backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    for c in reversed(lemma_char_vecs):
        s = s.add_input(c)
    encoder_rrnn_h = s.h()

    # concatenate BILSTM final hidden states
    if len(encoder_rrnn_h) == 1 and len(encoder_frnn_h) == 1:
        encoded = concatenate([encoder_frnn_h[0], encoder_rrnn_h[0]])
    else:
        # if there's more than one layer, take the last one
        encoded = concatenate([encoder_frnn_h[-1], encoder_rrnn_h[-1]])

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = lookup[alphabet_index[BEGIN_WORD]]
    i = 0
    predicted = ''

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN:

        # if the lemma is finished or unknown character, pad with epsilon chars
        if i < len(lemma) and lemma[i] in alphabet_index:
            lemma_input_char_vec = lookup[alphabet_index[lemma[i]]]
        else:
            lemma_input_char_vec = lookup[alphabet_index[EPSILON]]

        # prepare input vector and perform LSTM step
        decoder_input = concatenate(
            [encoded, prev_output_vec, lemma_input_char_vec])
        s = s.add_input(decoder_input)

        # compute softmax probs and predict
        decoder_rnn_output = s.output()
        probs = softmax(R * decoder_rnn_output + bias)
        probs = probs.vec_value()
        next_char_index = common.argmax(probs)
        predicted = predicted + inverse_alphabet_index[next_char_index]

        # check if reached end of word
        if predicted[-1] == END_WORD:
            break

        # prepare for the next iteration
        # prev_output_vec = lookup[next_char_index]
        prev_output_vec = decoder_rnn_output
        i += 1

    # remove the begin and end word symbols
    return predicted[1:-1]
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, char_feedback_rnn, action_feedback_rnn,
                            lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types):
    renew_cg()

    # read the parameters
    input_char_lookup = model["input_char_lookup"]
    output_char_lookup = model["output_char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = parameter(model["R"])
    bias = parameter(model["bias"])

    # feedback_R = parameter(model["feedback_R"])
    # feedback_bias = parameter(model["feedback_bias"])

    # convert characters to matching embeddings, if UNK handle properly
    padded_lemma = BEGIN_WORD + lemma + END_WORD
    lemma_char_vecs = []
    for char in padded_lemma:
        try:
            lemma_char_vecs.append(input_char_lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            lemma_char_vecs.append(input_char_lookup[alphabet_index[UNK]])

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = []
    for feat in sorted(feature_types):
        # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
        # if this feature has a value, take it from the lookup. otherwise use UNK
        if feat in feats:
            feat_str = feat + ':' + feats[feat]
            try:
                feat_vecs.append(feat_lookup[feat_index[feat_str]])
            except KeyError:
                # handle UNK or dropout
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
        else:
            feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    feats_input = concatenate(feat_vecs)

    # BiLSTM forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    frnn_outputs = []
    for c in lemma_char_vecs:
        s = s.add_input(c)
        frnn_outputs.append(s.output())

    # BiLSTM backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    rrnn_outputs = []
    for c in reversed(lemma_char_vecs):
        s = s.add_input(c)
        rrnn_outputs.append(s.output())

    # BiLTSM outputs
    blstm_outputs = []
    lemma_char_vecs_len = len(lemma_char_vecs)
    for i in xrange(lemma_char_vecs_len):
        blstm_outputs.append(concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]]))

    # initialize the decoder rnn
    s = decoder_rnn.initial_state()

    # set prev_output_vec for first lstm step as BEGIN_WORD for both feedback lstms
    # prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    begin_vec = output_char_lookup[alphabet_index[BEGIN_WORD]]
    # c_f_state = char_feedback_rnn.initial_state()
    # a_s_state = action_feedback_rnn.initial_state()
    # c_f_state = c_f_state.add_input(begin_vec)
    # a_s_state = a_s_state.add_input(begin_vec)
    # prev_output_vec = tanh(feedback_R * concatenate([c_f_state.output(), a_s_state.output()]) + feedback_bias)
    prev_action_vec = begin_vec
    prev_char_vec = begin_vec

    # i is input index, j is output index
    i = j = 0
    num_outputs = 0
    predicted_output_sequence = []

    # run the decoder through the sequence and predict characters, twice max prediction as step outputs are added
    while num_outputs < MAX_PREDICTION_LEN * 3:

        # prepare input vector and perform LSTM step
        decoder_input = concatenate([
                                     prev_action_vec,
                                     # prev_char_vec,
                                     # prev_output_vec,
                                     # input_char_lookup[alphabet_index[str(i)]],
                                     # input_char_lookup[alphabet_index[str(j)]],
                                     blstm_outputs[i],
                                     feats_input])

        s = s.add_input(decoder_input)

        # compute softmax probs vector and predict with argmax
        decoder_rnn_output = s.output()
        probs = softmax(R * decoder_rnn_output + bias)
        probs = probs.vec_value()
        predicted_output_index = common.argmax(probs)
        predicted_output = inverse_alphabet_index[predicted_output_index]
        predicted_output_sequence.append(predicted_output)


        # check if step or char output to promote i or j.
        if predicted_output == STEP:
            # prepare for the next iteration - "feedback"
            # prev_output_vec = char_lookup[alphabet_index[STEP]]
            # step_vec = output_char_lookup[alphabet_index[STEP]]
            # not changing c_f_state as no character was predicted, only step action is done
            # stepping the actions feedback lstm with step
            # a_s_state = a_s_state.add_input(step_vec)
            # prev_output_vec = tanh(feedback_R * concatenate([c_f_state.output(), a_s_state.output()]) + feedback_bias)
            # prev_action_vec = step_vec
            # prev_char_vec = output_char_lookup[alphabet_index[EPSILON]]
            if i < len(lemma) - 1:
                i += 1
        else:
            j += 1
            # if predicted_output.isdigit():
                # copy action
                # action_feedback_vec = output_char_lookup[alphabet_index[predicted_output]]

                # the copied char
                # char_feedback_vec = output_char_lookup[alphabet_index[padded_lemma[i]]]
            # else:
                # char action
                # action_feedback_vec = output_char_lookup[alphabet_index[predicted_output]]

                # the predicted char embedding
                # char_feedback_vec = output_char_lookup[alphabet_index[predicted_output]]


            # stepping the char feedback lstm with predicted char
            # c_f_state = c_f_state.add_input(char_feedback_vec)

            # stepping the actions feedback lstm with char or copy action
            # a_s_state = a_s_state.add_input(action_feedback_vec)

            # combine lstm feedbacks through an MLP
            # prev_output_vec = tanh(feedback_R * concatenate([c_f_state.output(), a_s_state.output()]) + feedback_bias)
            # prev_action_vec = action_feedback_vec
            # prev_char_vec = char_feedback_vec

            # promote j as a new character was added to the output


        num_outputs += 1

        # check if reached end of word
        if predicted_output_sequence[-1] == END_WORD:
            break

        prev_action_vec = output_char_lookup[predicted_output_index]

        # prepare for the next iteration - "feedback" - already computed above using the two feedback lstms
        # prev_output_vec = char_lookup[predicted_output_index]

    # remove the end word symbol
    return predicted_output_sequence[0:-1]
Beispiel #9
0
def predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn,
                                lemma, feats, alphabet_index,
                                inverse_alphabet_index, feat_index,
                                feature_types):
    renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = parameter(model["R"])
    bias = parameter(model["bias"])

    # convert characters to matching embeddings, if UNK handle properly
    lemma = BEGIN_WORD + lemma + END_WORD
    lemma_char_vecs = []
    for char in lemma:
        try:
            lemma_char_vecs.append(char_lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            lemma_char_vecs.append(char_lookup[alphabet_index[UNK]])

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = []
    for feat in sorted(feature_types):
        # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
        # if this feature has a value, take it from the lookup. otherwise use UNK
        if feat in feats:
            feat_str = feat + ':' + feats[feat]
            try:
                feat_vecs.append(feat_lookup[feat_index[feat_str]])
            except KeyError:
                # handle UNK or dropout
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
        else:
            feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    feats_input = concatenate(feat_vecs)

    # bilstm forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    for c in lemma_char_vecs:
        s = s.add_input(c)
    encoder_frnn_h = s.h()

    # bilstm backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    for c in reversed(lemma_char_vecs):
        s = s.add_input(c)
    encoder_rrnn_h = s.h()

    # concatenate BILSTM final hidden states
    if len(encoder_rrnn_h) == 1 and len(encoder_frnn_h) == 1:
        encoded = concatenate([encoder_frnn_h[0], encoder_rrnn_h[0]])
    else:
        # if there's more than one layer, take the last one
        encoded = concatenate([encoder_frnn_h[-1], encoder_rrnn_h[-1]])

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    i = 0
    predicted_template = []

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN:

        # if the lemma is finished, pad with epsilon chars
        if i < len(lemma):
            try:
                lemma_input_char_vec = char_lookup[alphabet_index[lemma[i]]]
            except KeyError:
                # handle unseen characters
                lemma_input_char_vec = char_lookup[alphabet_index[UNK]]
        else:
            lemma_input_char_vec = char_lookup[alphabet_index[EPSILON]]

        decoder_input = concatenate([
            encoded, prev_output_vec, lemma_input_char_vec,
            char_lookup[alphabet_index[str(i)]], feats_input
        ])

        # prepare input vector and perform LSTM step
        # decoder_input = concatenate([encoded, prev_output_vec])
        s = s.add_input(decoder_input)

        # compute softmax probs and predict
        decoder_rnn_output = s.output()
        probs = softmax(R * decoder_rnn_output + bias)
        probs = probs.vec_value()
        next_char_index = common.argmax(probs)
        predicted_template.append(inverse_alphabet_index[next_char_index])

        # check if reached end of word
        if predicted_template[-1] == END_WORD:
            break

        # prepare for the next iteration
        # prev_output_vec = lookup[next_char_index]
        prev_output_vec = decoder_rnn_output
        i += 1

    # remove the begin and end word symbols
    return predicted_template[0:-1]
    def predict_beamsearch(self, encoder, input_seq):
        if len(input_seq) == 0:
            return []

        dn.renew_cg()

        self.readout = dn.parameter(self.params['readout'])
        self.bias = dn.parameter(self.params['bias'])
        self.w_c = dn.parameter(self.params['w_c'])
        self.u_a = dn.parameter(self.params['u_a'])
        self.v_a = dn.parameter(self.params['v_a'])
        self.w_a = dn.parameter(self.params['w_a'])

        alphas_mtx = []

        # encode input sequence
        blstm_outputs, input_masks = encoder.encode_batch([input_seq])

        # complete sequences and their probabilities
        final_states = []

        # initialize the decoder rnn
        s_0 = self.decoder_rnn.initial_state()

        # holds beam step index mapped to (sequence, probability, decoder state, attn_vector) tuples
        beam = {-1: [([common.BEGIN_SEQ], 1.0, s_0, self.init_lookup[0])]}
        i = 0

        # expand another step if didn't reach max length and there's still beams to expand
        while i < self.max_prediction_len and len(beam[i - 1]) > 0:

            # create all expansions from the previous beam:
            new_hypos = []
            for hypothesis in beam[i - 1]:
                prefix_seq, prefix_prob, prefix_decoder, prefix_attn = hypothesis
                last_hypo_symbol = prefix_seq[-1]

                # cant expand finished sequences
                if last_hypo_symbol == common.END_SEQ:
                    continue

                # expand from the last symbol of the hypothesis
                try:
                    prev_output_vec = self.output_lookup[self.y2int[last_hypo_symbol]]
                except KeyError:
                    # not a known symbol
                    print 'impossible to expand, key error: ' + str(last_hypo_symbol)
                    continue

                decoder_input = dn.concatenate([prev_output_vec, prefix_attn])
                s = prefix_decoder.add_input(decoder_input)
                decoder_rnn_output = s.output()

                # perform attention step
                attention_output_vector, alphas = self.attend(blstm_outputs, decoder_rnn_output)

                # save attention weights for plotting
                # TODO: add attention weights properly to allow building the attention matrix for the best path
                if self.plot:
                    val = alphas.vec_value()
                    alphas_mtx.append(val)

                # compute output probabilities
                # h = readout * attention_output_vector + bias
                h = dn.affine_transform([self.bias, self.readout, attention_output_vector])

                # TODO: understand why diverse needs tanh before softmax
                if self.diverse:
                    h = dn.tanh(h)
                probs = dn.softmax(h)
                probs_val = probs.npvalue()

                # TODO: maybe should choose nbest from all expansions and not only from nbest of each hypothesis?
                # find best candidate outputs
                n_best_indices = common.argmax(probs_val, self.beam_size)
                for index in n_best_indices:
                    p = probs_val[index]
                    new_seq = prefix_seq + [self.int2y[index]]
                    new_prob = prefix_prob * p
                    if new_seq[-1] == common.END_SEQ or i == self.max_prediction_len - 1:
                        # TODO: add to final states only if fits in k best?
                        # if found a complete sequence or max length - add to final states
                        final_states.append((new_seq[1:-1], new_prob))
                    else:
                        new_hypos.append((new_seq, new_prob, s, attention_output_vector))

            # add the most probable expansions from all hypotheses to the beam
            new_probs = np.array([p for (s, p, r, a) in new_hypos])
            argmax_indices = common.argmax(new_probs, self.beam_size)
            beam[i] = [new_hypos[l] for l in argmax_indices]
            i += 1

        # get nbest results from final states found in search
        final_probs = np.array([p for (s, p) in final_states])
        argmax_indices = common.argmax(final_probs, self.beam_size)
        nbest_seqs = [final_states[l] for l in argmax_indices]

        return nbest_seqs, alphas_mtx
Beispiel #11
0
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn,
                            lemma, feats, alphabet_index,
                            inverse_alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])
    W__a = pc.parameter(model["W__a"])
    U__a = pc.parameter(model["U__a"])
    v__a = pc.parameter(model["v__a"])
    W_c = pc.parameter(model["W_c"])

    blstm_outputs = soft_attention.encode_feats_and_chars(
        alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, feat_index,
        feat_lookup, feats, feature_types, lemma)
    feat_list = []
    for feat in sorted(feature_types):
        if feat in feats:
            feat_list.append(feats[feat])

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    i = 0
    predicted_sequence = []
    alphas_mtx = []

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN:

        # get current h of the decoder
        s = s.add_input(prev_output_vec)
        decoder_rnn_output = s.output()

        # perform attention step
        attention_output_vector, alphas, W = soft_attention.attend(
            blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a)
        val = alphas.vec_value()
        print 'alphas:'
        print val
        alphas_mtx.append(val)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias
        next_char_index = common.argmax(readout.vec_value())
        predicted_sequence.append(inverse_alphabet_index[next_char_index])

        # check if reached end of word
        if predicted_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[next_char_index]
        i += 1

    # remove the end word symbol
    return predicted_sequence, alphas_mtx, [
        BEGIN_WORD
    ] + feat_list + list(lemma) + [END_WORD], W
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index,
                            inverse_alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])
    W__a = pc.parameter(model["W__a"])
    U__a = pc.parameter(model["U__a"])
    v__a = pc.parameter(model["v__a"])
    W_c = pc.parameter(model["W_c"])

    blstm_outputs = soft_attention.encode_feats_and_chars(alphabet_index, char_lookup, encoder_frnn,
                                                          encoder_rrnn, feat_index, feat_lookup,
                                                          feats, feature_types, lemma)
    feat_list = []
    for feat in sorted(feature_types):
        if feat in feats:
            feat_list.append(feats[feat])

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    i = 0
    predicted_sequence = []
    alphas_mtx = []

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN:

        # get current h of the decoder
        s = s.add_input(prev_output_vec)
        decoder_rnn_output = s.output()

        # perform attention step
        attention_output_vector, alphas, W = soft_attention.attend(blstm_outputs, decoder_rnn_output,
                                                                   W_c, v__a, W__a, U__a)
        val = alphas.vec_value()
        print 'alphas:'
        print val
        alphas_mtx.append(val)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias
        next_char_index = common.argmax(readout.vec_value())
        predicted_sequence.append(inverse_alphabet_index[next_char_index])

        # check if reached end of word
        if predicted_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[next_char_index]
        i += 1

    # remove the end word symbol
    return predicted_sequence, alphas_mtx, [BEGIN_WORD] + feat_list + list(lemma) + [END_WORD], W
def predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, source_word, source_feats, 
                                target_feats, alphabet_index,
                                inverse_alphabet_index, feat_index, feature_types):
    renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = parameter(model["R"])
    bias = parameter(model["bias"])

    # convert characters to matching embeddings, if UNK handle properly
    source_word = BEGIN_WORD + source_word + END_WORD
    source_word_char_vecs = []
    for char in source_word:
        try:
            source_word_char_vecs.append(char_lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            source_word_char_vecs.append(char_lookup[alphabet_index[UNK]])

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = []
    for feats in [source_feats, target_feats]:
        for feat in sorted(feature_types):
            # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
            # if this feature has a value, take it from the lookup. otherwise use UNK
            if feat in feats:
                feat_str = feat + ':' + feats[feat]
                try:
                    feat_vecs.append(feat_lookup[feat_index[feat_str]])
                except KeyError:
                    # handle UNK or dropout
                    feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
            else:
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    feats_input = concatenate(feat_vecs)

    # BiLSTM forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    frnn_outputs = []
    for c in source_word_char_vecs:
        s = s.add_input(c)
        frnn_outputs.append(s.output())

    # BiLSTM backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    rrnn_outputs = []
    for c in reversed(source_word_char_vecs):
        s = s.add_input(c)
        rrnn_outputs.append(s.output())

    # BiLTSM outputs
    blstm_outputs = []
    source_word_char_vecs_len = len(source_word_char_vecs)
    for i in xrange(source_word_char_vecs_len):
        blstm_outputs.append(concatenate([frnn_outputs[i], rrnn_outputs[source_word_char_vecs_len - i - 1]]))

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]

    # i is input index, j is output index
    i = j = 0
    num_outputs = 0
    predicted_output_sequence = []

    # run the decoder through the sequence and predict characters, twice max prediction as step outputs are added
    while num_outputs < MAX_PREDICTION_LEN * 3:

        # prepare input vector and perform LSTM step
        decoder_input = concatenate([prev_output_vec,
                                     char_lookup[alphabet_index[str(i)]],
                                     char_lookup[alphabet_index[str(j)]],
                                     blstm_outputs[i],
                                     feats_input])

        s = s.add_input(decoder_input)

        # compute softmax probs vector and predict with argmax
        decoder_rnn_output = s.output()
        probs = softmax(R * decoder_rnn_output + bias)
        probs = probs.vec_value()
        predicted_output_index = common.argmax(probs)
        predicted_output = inverse_alphabet_index[predicted_output_index]
        predicted_output_sequence.append(predicted_output)

        # check if step or char output to promote i or j.
        if predicted_output == STEP:
            if i < len(source_word) - 1:
                i += 1
        else:
            j += 1

        num_outputs += 1
        # check if reached end of word
        if predicted_output_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[predicted_output_index]

    # remove the end word symbol
    return predicted_output_sequence[0:-1]
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index,
                            inverse_alphabet_index, feat_index, feature_types):
    renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = parameter(model["R"])
    bias = parameter(model["bias"])

    # convert characters to matching embeddings, if UNK handle properly
    lemma = BEGIN_WORD + lemma + END_WORD
    lemma_char_vecs = []
    for char in lemma:
        try:
            lemma_char_vecs.append(char_lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            lemma_char_vecs.append(char_lookup[alphabet_index[UNK]])

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = []
    for feat in sorted(feature_types):
        # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
        # if this feature has a value, take it from the lookup. otherwise use UNK
        if feat in feats:
            feat_str = feat + ':' + feats[feat]
            try:
                feat_vecs.append(feat_lookup[feat_index[feat_str]])
            except KeyError:
                # handle UNK or dropout
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
        else:
            feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    feats_input = concatenate(feat_vecs)

    # BiLSTM forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    frnn_outputs = []
    for c in lemma_char_vecs:
        s = s.add_input(c)
        frnn_outputs.append(s.output())

    # BiLSTM backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    rrnn_outputs = []
    for c in reversed(lemma_char_vecs):
        s = s.add_input(c)
        rrnn_outputs.append(s.output())

    # BiLTSM outputs
    blstm_outputs = []
    lemma_char_vecs_len = len(lemma_char_vecs)
    for i in xrange(lemma_char_vecs_len):
        blstm_outputs.append(concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]]))

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]

    # i is input index, j is output index
    i = j = 0
    num_outputs = 0
    predicted_output_sequence = []

    # run the decoder through the sequence and predict characters, twice max prediction as step outputs are added
    while num_outputs < MAX_PREDICTION_LEN * 3:

        # prepare input vector and perform LSTM step
        decoder_input = concatenate([prev_output_vec,
                                     char_lookup[alphabet_index[str(i)]],
                                     char_lookup[alphabet_index[str(j)]],
                                     blstm_outputs[i],
                                     feats_input])

        s = s.add_input(decoder_input)

        # compute softmax probs vector and predict with argmax
        decoder_rnn_output = s.output()
        probs = softmax(R * decoder_rnn_output + bias)
        probs = probs.vec_value()
        predicted_output_index = common.argmax(probs)
        predicted_output = inverse_alphabet_index[predicted_output_index]
        predicted_output_sequence.append(predicted_output)

        # check if step or char output to promote i or j.
        if predicted_output == STEP:
            if i < len(lemma) - 1:
                i += 1
        else:
            j += 1

        num_outputs += 1
        # check if reached end of word
        if predicted_output_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[predicted_output_index]

    # remove the end word symbol
    return predicted_output_sequence[0:-1]
def predict_output_sequence(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index,
                            inverse_alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    # char_lookup = model["char_lookup"]
    # feat_lookup = model["feat_lookup"]
    # R = pc.parameter(model["R"])
    # bias = pc.parameter(model["bias"])
    R = pc.parameter(R)
    bias = pc.parameter(bias)

    # convert characters to matching embeddings, if UNK handle properly
    padded_lemma = BEGIN_WORD + lemma + END_WORD
    lemma_char_vecs = encode_lemma(alphabet_index, char_lookup, padded_lemma)

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = encode_feats(feat_index, feat_lookup, feats, feature_types)

    #~ feats_input = pc.concatenate(feat_vecs)

    blstm_outputs = bilstm_transduce(encoder_frnn, encoder_rrnn, lemma_char_vecs)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]

    # i is input index, j is output index
    i = 0
    num_outputs = 0
    predicted_output_sequence = []

    # run the decoder through the sequence and predict characters, twice max prediction as step outputs are added
    while num_outputs < MAX_PREDICTION_LEN * 3:

        # prepare input vector and perform LSTM step
        decoder_input = pc.concatenate([prev_output_vec,
                                        blstm_outputs[i]])

        s = s.add_input(decoder_input)

        # compute softmax probs vector and predict with argmax
        decoder_rnn_output = s.output()
        probs = pc.softmax(R * decoder_rnn_output + bias)
        probs = probs.vec_value()
        predicted_output_index = common.argmax(probs)
        predicted_output = inverse_alphabet_index[predicted_output_index]
        predicted_output_sequence.append(predicted_output)

        # check if step or char output to promote i.
        if predicted_output == STEP:
            if i < len(padded_lemma) - 1:
                i += 1

        num_outputs += 1

        # check if reached end of word
        if predicted_output_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[predicted_output_index]

    # remove the end word symbol

    return u''.join(predicted_output_sequence[0:-1])
def predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, source_word, source_feats,
                                target_feats, alphabet_index,
                                inverse_alphabet_index, feat_index, feature_types):
    renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = parameter(model["R"])
    bias = parameter(model["bias"])

    # convert characters to matching embeddings, if UNK handle properly
    source_word = BEGIN_WORD + source_word + END_WORD
    source_word_char_vecs = []
    for char in source_word:
        try:
            source_word_char_vecs.append(char_lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            source_word_char_vecs.append(char_lookup[alphabet_index[UNK]])

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = []
    for feats in [source_feats, target_feats]:
        for feat in sorted(feature_types):
            # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
            # if this feature has a value, take it from the lookup. otherwise use UNK
            if feat in feats:
                feat_str = feat + ':' + feats[feat]
                try:
                    feat_vecs.append(feat_lookup[feat_index[feat_str]])
                except KeyError:
                    # handle UNK or dropout
                    feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
            else:
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    feats_input = concatenate(feat_vecs)

    # BiLSTM forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    frnn_outputs = []
    for c in source_word_char_vecs:
        s = s.add_input(c)
        frnn_outputs.append(s.output())

    # BiLSTM backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    rrnn_outputs = []
    for c in reversed(source_word_char_vecs):
        s = s.add_input(c)
        rrnn_outputs.append(s.output())

    # BiLTSM outputs
    blstm_outputs = []
    source_word_char_vecs_len = len(source_word_char_vecs)
    for i in xrange(source_word_char_vecs_len):
        blstm_outputs.append(concatenate([frnn_outputs[i], rrnn_outputs[source_word_char_vecs_len - i - 1]]))

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    i = 0
    predicted_template = []

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN:

        # if the source word is finished, pad with epsilon chars
        if i < len(source_word):
            blstm_output = blstm_outputs[i]
            try:
                source_word_input_char_vec = char_lookup[alphabet_index[source_word[i]]]
            except KeyError:
                # handle unseen characters
                source_word_input_char_vec = char_lookup[alphabet_index[UNK]]
        else:
            source_word_input_char_vec = char_lookup[alphabet_index[EPSILON]]
            blstm_output = blstm_outputs[source_word_char_vecs_len - 1]

        decoder_input = concatenate([blstm_output, prev_output_vec, source_word_input_char_vec,
                                     char_lookup[alphabet_index[str(i)]], feats_input])

        # prepare input vector and perform LSTM step
        # decoder_input = concatenate([encoded, prev_output_vec])
        s = s.add_input(decoder_input)

        # compute softmax probs and predict
        decoder_rnn_output = s.output()
        probs = softmax(R * decoder_rnn_output + bias)
        probs = probs.vec_value()
        next_char_index = common.argmax(probs)
        predicted_template.append(inverse_alphabet_index[next_char_index])

        # check if reached end of word
        if predicted_template[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[next_char_index]
        i += 1

    # remove the end word symbol
    return predicted_template[0:-1]
def predict_inflection(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feat_dict, alphabet_index,
                       inverse_alphabet_index, feat_index, feature_types):
    renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = parameter(model["R"])
    bias = parameter(model["bias"])

    # convert characters to matching embeddings, if UNK handle properly
    lemma = BEGIN_WORD + lemma + END_WORD
    lemma_char_vecs = []
    for char in lemma:
        try:
            lemma_char_vecs.append(char_lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK or dropout
            lemma_char_vecs.append(char_lookup[alphabet_index[UNK]])

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = []
    for feat in sorted(feature_types):
        # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
        # if this feature has a value, take it from the lookup. otherwise use UNK
        if feat in feat_dict:
            feat_str = feat + ':' + feat_dict[feat]
            try:
                feat_vecs.append(feat_lookup[feat_index[feat_str]])
            except KeyError:
                # handle UNK or dropout
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
        else:
            feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    feats_input = concatenate(feat_vecs)

    # bilstm forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    for c in lemma_char_vecs:
        s = s.add_input(c)
    encoder_frnn_h = s.h()

    # bilstm backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    for c in reversed(lemma_char_vecs):
        s = s.add_input(c)
    encoder_rrnn_h = s.h()

    # concatenate BILSTM final hidden states
    if len(encoder_rrnn_h) == 1 and len(encoder_frnn_h) == 1:
        encoded = concatenate([encoder_frnn_h[0], encoder_rrnn_h[0]])
    else:
        # if there's more than one hidden layer in the rnn's, take the last one
        encoded = concatenate([encoder_frnn_h[-1], encoder_rrnn_h[-1]])

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    i = 0
    predicted = ''

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN:

        # if the lemma is finished or unknown character, pad with epsilon chars
        if i < len(lemma) and lemma[i] in alphabet_index:
            lemma_input_char_vec = char_lookup[alphabet_index[lemma[i]]]
        else:
            lemma_input_char_vec = char_lookup[alphabet_index[EPSILON]]

        # prepare input vector and perform LSTM step
        decoder_input = concatenate([encoded, prev_output_vec, lemma_input_char_vec, feats_input])
        s = s.add_input(decoder_input)

        # compute softmax probs and predict
        decoder_rnn_output = s.output()
        probs = softmax(R * decoder_rnn_output + bias)
        probs = probs.vec_value()
        next_predicted_char_index = common.argmax(probs)
        predicted = predicted + inverse_alphabet_index[next_predicted_char_index]

        # check if reached end of word
        if predicted[-1] == END_WORD:
            break

        # prepare for the next iteration
        prev_output_vec = char_lookup[next_predicted_char_index]
        i += 1

    # remove the begin and end word symbols
    return predicted[1:-1]
def predict_nbest_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, source_word, source_feats,
                           target_feats, alphabet_index,
                           inverse_alphabet_index, feat_index, feature_types, nbest):
    renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = parameter(model["R"])
    bias = parameter(model["bias"])

    # convert characters to matching embeddings, if UNK handle properly
    source_word = BEGIN_WORD + source_word + END_WORD
    source_word_char_vecs = []
    for char in source_word:
        try:
            source_word_char_vecs.append(char_lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            source_word_char_vecs.append(char_lookup[alphabet_index[UNK]])

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = []
    for feats in [source_feats, target_feats]:
        for feat in sorted(feature_types):
            # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
            # if this feature has a value, take it from the lookup. otherwise use UNK
            if feat in feats:
                feat_str = feat + ':' + feats[feat]
                try:
                    feat_vecs.append(feat_lookup[feat_index[feat_str]])
                except KeyError:
                    # handle UNK or dropout
                    feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
            else:
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    feats_input = concatenate(feat_vecs)

    # BiLSTM forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    frnn_outputs = []
    for c in source_word_char_vecs:
        s = s.add_input(c)
        frnn_outputs.append(s.output())

    # BiLSTM backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    rrnn_outputs = []
    for c in reversed(source_word_char_vecs):
        s = s.add_input(c)
        rrnn_outputs.append(s.output())

    # BiLTSM outputs
    blstm_outputs = []
    source_word_char_vecs_len = len(source_word_char_vecs)
    for i in xrange(source_word_char_vecs_len):
        blstm_outputs.append(concatenate([frnn_outputs[i], rrnn_outputs[source_word_char_vecs_len - i - 1]]))

    # beam search

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]

    i = 0
    beam_width = BEAM_WIDTH
    beam = {}
    beam[-1] = [([BEGIN_WORD], 1.0, s_0)]  # (sequence, probability, decoder_rnn)
    final_states = []

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN and len(beam[i - 1]) > 0:

        # at each stage:
        # create all expansions from the previous beam:
        new_hypos = []
        for hypothesis in beam[i - 1]:
            seq, hyp_prob, prefix_decoder = hypothesis
            last_hypo_char = seq[-1]

            # cant expand finished sequences
            if last_hypo_char == END_WORD:
                continue

            # expand from the last character of the hypothesis
            try:
                prev_output_vec = char_lookup[alphabet_index[last_hypo_char]]
            except KeyError:
                # not a character
                # print 'impossible to expand, key error'# + str(seq)
                continue

            # if the lemma is finished, pad with epsilon chars
            if i < len(source_word):
                blstm_output = blstm_outputs[i]
                try:
                    source_word_input_char_vec = char_lookup[alphabet_index[source_word[i]]]
                except KeyError:
                    # handle unseen characters
                    source_word_input_char_vec = char_lookup[alphabet_index[UNK]]
            else:
                source_word_input_char_vec = char_lookup[alphabet_index[EPSILON]]
                blstm_output = blstm_outputs[source_word_char_vecs_len - 1]

            decoder_input = concatenate([blstm_output,
                                         prev_output_vec,
                                         source_word_input_char_vec,
                                         char_lookup[alphabet_index[str(i)]],
                                         feats_input])

            # prepare input vector and perform LSTM step
            s = prefix_decoder.add_input(decoder_input)

            # compute softmax probs
            decoder_rnn_output = s.output()
            probs = softmax(R * decoder_rnn_output + bias)
            probs = probs.vec_value()

            # expand - create new hypos
            for index, p in enumerate(probs):
                new_seq = list(seq)
                new_seq.append(inverse_alphabet_index[index])
                new_prob = hyp_prob * p
                if new_seq[-1] == END_WORD:
                    # if found a complete sequence - add to final states
                    final_states.append((new_seq[1:-1], new_prob))
                else:
                    new_hypos.append((new_seq, new_prob, s))

        # add the expansions with the largest probability to the beam together with their score and prefix rnn state
        new_probs = [p for (s, p, r) in new_hypos]
        argmax_indices = common.argmax(new_probs, n=beam_width)
        beam[i] = [new_hypos[l] for l in argmax_indices]
        i += 1

    # get nbest results from final states found in search
    final_probs = [p for (s, p) in final_states]
    argmax_indices = common.argmax(final_probs, n=nbest)
    nbest_templates = [final_states[l] for l in argmax_indices]

    return nbest_templates
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index,
                            inverse_alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])

    # convert characters to matching embeddings, if UNK handle properly
    padded_lemma = BEGIN_WORD + lemma + END_WORD
    lemma_char_vecs = []
    for char in padded_lemma:
        try:
            lemma_char_vecs.append(char_lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            lemma_char_vecs.append(char_lookup[alphabet_index[UNK]])

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = []
    for feat in sorted(feature_types):
        # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
        # if this feature has a value, take it from the lookup. otherwise use UNK
        if feat in feats:
            feat_str = feat + ':' + feats[feat]
            try:
                feat_vecs.append(feat_lookup[feat_index[feat_str]])
            except KeyError:
                # handle UNK or dropout
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
        else:
            feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    feats_input = pc.concatenate(feat_vecs)

    # BiLSTM forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    frnn_outputs = []
    for c in lemma_char_vecs:
        s = s.add_input(c)
        frnn_outputs.append(s.output())

    # BiLSTM backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    rrnn_outputs = []
    for c in reversed(lemma_char_vecs):
        s = s.add_input(c)
        rrnn_outputs.append(s.output())

    # BiLTSM outputs
    blstm_outputs = []
    lemma_char_vecs_len = len(lemma_char_vecs)
    for i in xrange(lemma_char_vecs_len):
        blstm_outputs.append(pc.concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]]))

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]]

    # i is input index, j is output index
    i = j = 0
    num_outputs = 0
    predicted_output_sequence = []

    # run the decoder through the sequence and predict characters, twice max prediction as step outputs are added
    while num_outputs < MAX_PREDICTION_LEN * 3:

        # prepare input vector and perform LSTM step
        decoder_input = pc.concatenate([prev_output_vec,
                                        prev_char_vec,
                                        # char_lookup[alphabet_index[str(i)]],
                                        # char_lookup[alphabet_index[str(j)]],
                                        blstm_outputs[i],
                                        feats_input])

        s = s.add_input(decoder_input)

        # compute softmax probs vector and predict with argmax
        decoder_rnn_output = s.output()
        probs = pc.softmax(R * decoder_rnn_output + bias)
        probs = probs.vec_value()
        predicted_output_index = common.argmax(probs)
        predicted_output = inverse_alphabet_index[predicted_output_index]
        predicted_output_sequence.append(predicted_output)

        # check if step or char output to promote i or j.
        if predicted_output == STEP:
            prev_char_vec = char_lookup[alphabet_index[EPSILON]]
            if i < len(padded_lemma) - 1:
                i += 1
        else:
            if predicted_output.isdigit():
                # handle copy
                # try:
                #     prev_char_vec = char_lookup[alphabet_index[padded_lemma[i]]]
                # except KeyError:
                #     prev_char_vec = char_lookup[alphabet_index[UNK]]
                try:
                    # this way END_WORD cannot be copied (as it is in the training stage)
                    if i < len(lemma) + 1:
                        prev_char_vec = char_lookup[alphabet_index[padded_lemma[i]]]
                    else:
                        # if trying to copy from a non-existent index, pad with last lemma character
                        prev_char_vec = char_lookup[alphabet_index[lemma[-1]]]
                except KeyError:
                    prev_char_vec = char_lookup[alphabet_index[UNK]]
            else:
                # handle char
                prev_char_vec = char_lookup[predicted_output_index]

            j += 1

        num_outputs += 1
        # check if reached end of word
        if predicted_output_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[predicted_output_index]

    # remove the end word symbol
    return predicted_output_sequence[0:-1]
def predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, alphabet_index,
                                inverse_alphabet_index):
    renew_cg()

    # read the parameters
    lookup = model["lookup"]
    # noinspection PyPep8Naming
    R = parameter(model["R"])
    bias = parameter(model["bias"])

    # convert characters to matching embeddings, if UNK handle properly
    lemma = BEGIN_WORD + lemma + END_WORD
    lemma_char_vecs = []
    for char in lemma:
        try:
            lemma_char_vecs.append(lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            lemma_char_vecs.append(lookup[alphabet_index[UNK]])

    # bilstm forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    for c in lemma_char_vecs:
        s = s.add_input(c)
    encoder_frnn_h = s.h()

    # bilstm backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    for c in reversed(lemma_char_vecs):
        s = s.add_input(c)
    encoder_rrnn_h = s.h()

    # concatenate BILSTM final hidden states
    if len(encoder_rrnn_h) == 1 and len(encoder_frnn_h) == 1:
        encoded = concatenate([encoder_frnn_h[0], encoder_rrnn_h[0]])
    else:
        # if there's more than one layer, take the last one
        encoded = concatenate([encoder_frnn_h[-1], encoder_rrnn_h[-1]])

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = lookup[alphabet_index[BEGIN_WORD]]
    i = 0
    predicted_template = []

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN:

        # if the lemma is finished, pad with epsilon chars
        if i < len(lemma):
            lemma_input_char_vec = lookup[alphabet_index[lemma[i]]]
        else:
            lemma_input_char_vec = lookup[alphabet_index[EPSILON]]

        decoder_input = concatenate([encoded, prev_output_vec, lemma_input_char_vec, lookup[alphabet_index[str(i)]]])

        # prepare input vector and perform LSTM step
        # decoder_input = concatenate([encoded, prev_output_vec])
        s = s.add_input(decoder_input)

        # compute softmax probs and predict
        decoder_rnn_output = s.output()
        probs = softmax(R * decoder_rnn_output + bias)
        probs = probs.vec_value()
        next_char_index = common.argmax(probs)
        predicted_template.append(inverse_alphabet_index[next_char_index])

        # check if reached end of word
        if predicted_template[-1] == END_WORD:
            break

        # prepare for the next iteration
        # prev_output_vec = lookup[next_char_index]
        prev_output_vec = decoder_rnn_output
        i += 1

    # remove the begin and end word symbols
    return predicted_template[0:-1]