Ejemplo n.º 1
0
def generate(model, input, enc_fwd_lstm, enc_bwd_lstm, dec_lstm):
    def sample(probs):
        rnd = random.random()
        for i, p in enumerate(probs):
            rnd -= p
            if rnd <= 0: break
        return i

    embedded = embedd_sentence(model, input)
    encoded = encode_sentence(model, enc_fwd_lstm, enc_bwd_lstm, embedded)

    w = pc.parameter(model["decoder_w"])
    b = pc.parameter(model["decoder_b"])

    s = dec_lstm.initial_state().add_input(pc.vecInput(STATE_SIZE * 2))
    out = ''
    count_EOS = 0
    for i in range(len(input)*2):
        if count_EOS == 2: break
        vector = attend(model, encoded, s)

        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = pc.softmax(out_vector)
        probs = probs.vec_value()
        next_char = sample(probs)
        if int2char[next_char] == EOS:
            count_EOS += 1
            continue

        out += int2char[next_char]
    return out
Ejemplo n.º 2
0
def predict(sent, model, builders):
    """
    predict tags and demographic labels
    :param sent:
    :param model:
    :param builders:
    :return: tag and label predictions
    """
    if MLP:
        H = pycnn.parameter(pH)
        O = pycnn.parameter(pO)
    else:
        O = pycnn.parameter(pO)

    tags = []
    for forward_state, backward_state in build_tagging_graph(words, model, builders):
        if MLP:
            r_t = O * (pycnn.tanh(H * pycnn.concatenate([forward_state, backward_state])))
        else:
            r_t = O * pycnn.concatenate([forward_state, backward_state])

        out = pycnn.softmax(r_t)
        chosen = np.argmax(out.npvalue())
        tags.append(vocab_tags.i2w[chosen])

    return tags
Ejemplo n.º 3
0
 def tag_sentence(self, sentence):
     word_expression_list = self._build_word_expression_list(sentence,
                                                             is_train=False)
     for word, word_expression in zip(sentence, word_expression_list):
         out = softmax(word_expression)
         tag_index = np.argmax(out.npvalue())
         word.tag = self.tag_indexer.get_object(tag_index)
Ejemplo n.º 4
0
def generate(model, input, enc_fwd_lstm, enc_bwd_lstm, dec_lstm):
    def sample(probs):
        rnd = random.random()
        for i, p in enumerate(probs):
            rnd -= p
            if rnd <= 0: break
        return i

    embedded = embedd_sentence(model, input)
    encoded = encode_sentence(model, enc_fwd_lstm, enc_bwd_lstm, embedded)

    w = pc.parameter(model["decoder_w"])
    b = pc.parameter(model["decoder_b"])

    s = dec_lstm.initial_state().add_input(pc.vecInput(STATE_SIZE * 2))
    out = ''
    count_EOS = 0
    for i in range(len(input) * 2):
        if count_EOS == 2: break
        vector = attend(model, encoded, s)

        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = pc.softmax(out_vector)
        probs = probs.vec_value()
        next_char = sample(probs)
        if int2char[next_char] == EOS:
            count_EOS += 1
            continue

        out += int2char[next_char]
    return out
Ejemplo n.º 5
0
def predict(word_indices, model, builder, target):
    """
    predict demographic label
    :param word_indices:
    :param model:
    :param builder:
    :return: tag and label predictions
    """
    forward_states = build_tagging_graph(word_indices, model, builder)
    final_state = forward_states[-1]

    H = pycnn.parameter(pH)
    bias_H = pycnn.parameter(biasH)
    H2 = pycnn.parameter(pH2)
    bias_H2 = pycnn.parameter(biasH2)

    if target in ['age', 'both', 'joint']:
        O = pycnn.parameter(pOutAge)
        bias_O = pycnn.parameter(biasOutAge)
    elif target == 'gender':
        O = pycnn.parameter(pOutGender)
        bias_O = pycnn.parameter(biasOutGender)

    if target == 'both':
        O2 = pycnn.parameter(pOutGender)
        bias_O2 = pycnn.parameter(biasOutGender)


    if target == 'both':
        # hidden = bias_H2 + pycnn.tanh(H2 * (bias_H + pycnn.tanh(H * final_state)))
        hidden = bias_H + pycnn.tanh(H * final_state)
        r_age = bias_O + (O * hidden)
        r_gender = bias_O2 + (O2 * hidden)

        out_age = pycnn.softmax(r_age)
        out_gender = pycnn.softmax(r_gender)

        return [np.argmax(out_age.npvalue()), np.argmax(out_gender.npvalue())]

    else:
        # r_t = bias_O + (O * (bias_H2 + pycnn.tanh(H2 * (bias_H + pycnn.tanh(H * final_state)))))
        r_t = bias_O + (O * (bias_H + (H * final_state)))

        out = pycnn.softmax(r_t)
        chosen = np.argmax(out.npvalue())

        return chosen
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index,
                            inverse_alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])
    W_c = pc.parameter(model["W_c"])
    W__a = pc.parameter(model["W__a"])
    U__a = pc.parameter(model["U__a"])
    v__a = pc.parameter(model["v__a"])

    # encode the lemma
    blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma)

    # convert features to matching embeddings, if UNK handle properly
    feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    i = 0
    predicted_sequence = []

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN:

        # get current h of the decoder
        s = s.add_input(pc.concatenate([prev_output_vec, feats_input]))
        decoder_rnn_output = s.output()

        # perform attention step
        attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output,
                                                                                   W_c, v__a, W__a, U__a)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias

        # find best candidate output
        probs = pc.softmax(readout)
        next_char_index = common.argmax(probs.vec_value())
        predicted_sequence.append(inverse_alphabet_index[next_char_index])

        # check if reached end of word
        if predicted_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[next_char_index]
        i += 1

    # remove the end word symbol
    return predicted_sequence[0:-1]
Ejemplo n.º 7
0
    def tag_sentence(self, sentence):
        renew_cg()

        for word in sentence:
            word.vector = self._get_word_vector(word, use_dropout=False)

        sentence_expressions = self._build_sentence_expressions(sentence)
        for word, word_expression in zip(sentence, sentence_expressions):
            out = softmax(word_expression)
            tag_index = np.argmax(out.npvalue())
            word.tag = self.tag_indexer.get_object(tag_index)
Ejemplo n.º 8
0
    def tag_sentence(self, sentence):
        renew_cg()

        for word in sentence:
            word.vector = self._get_word_vector(word, use_dropout=False)

        sentence_expressions = self._build_sentence_expressions(sentence)
        for word, word_expression in zip(sentence, sentence_expressions):
            out = softmax(word_expression)
            tag_index = np.argmax(out.npvalue())
            word.tag = self.tag_indexer.get_object(tag_index)
Ejemplo n.º 9
0
def attend(model, input_vectors, state):
    w1 = pc.parameter(model['attention_w1'])
    w2 = pc.parameter(model['attention_w2'])
    v = pc.parameter(model['attention_v'])
    attention_weights = []

    w2dt = w2*pc.concatenate(list(state.s()))
    for input_vector in input_vectors:
        attention_weight = v*pc.tanh(w1*input_vector + w2dt)
        attention_weights.append(attention_weight)
    attention_weights = pc.softmax(pc.concatenate(attention_weights))
    output_vectors = pc.esum([vector*attention_weight for vector, attention_weight in zip(input_vectors, attention_weights)])
    return output_vectors
Ejemplo n.º 10
0
def attend(model, input_vectors, state):
    w1 = pc.parameter(model['attention_w1'])
    w2 = pc.parameter(model['attention_w2'])
    v = pc.parameter(model['attention_v'])
    attention_weights = []

    w2dt = w2*pc.concatenate(list(state.s()))
    for input_vector in input_vectors:
        attention_weight = v*pc.tanh(w1*input_vector + w2dt)
        attention_weights.append(attention_weight)
    attention_weights = pc.softmax(pc.concatenate(attention_weights))
    output_vectors = pc.esum([vector*attention_weight for vector, attention_weight in zip(input_vectors, attention_weights)])
    return output_vectors
Ejemplo n.º 11
0
def attend(model, vectors, state):
    w = pc.parameter(model['attention_w'])
    attention_weights = []
    for vector in vectors:
        #concatenate each encoded vector with the current decoder state
        attention_input = pc.concatenate([vector, pc.concatenate(list(state.s()))])
        #get the attention wieght for the decoded vector
        attention_weights.append(w * attention_input)
    #normalize the weights
    attention_weights = pc.softmax(pc.concatenate(attention_weights))
    #apply the weights
    vectors = pc.esum([vector*attention_weight for vector, attention_weight in zip(vectors, attention_weights)])
    return vectors
Ejemplo n.º 12
0
def decode(model, dec_lstm, vectors, output):
    output = [EOS] + list(output) + [EOS]
    output = [char2int[c] for c in output]

    w = pc.parameter(model["decoder_w"])
    b = pc.parameter(model["decoder_b"])

    s = dec_lstm.initial_state().add_input(pc.vecInput(STATE_SIZE*2))

    loss = []
    for char in output:
        vector = attend(model, vectors, s)

        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = pc.softmax(out_vector)
        loss.append(-pc.log(pc.pick(probs, char)))
    loss = pc.esum(loss)
    return loss
Ejemplo n.º 13
0
def decode(model, dec_lstm, vectors, output):
    output = [EOS] + list(output) + [EOS]
    output = [char2int[c] for c in output]

    w = pc.parameter(model["decoder_w"])
    b = pc.parameter(model["decoder_b"])

    s = dec_lstm.initial_state().add_input(pc.vecInput(STATE_SIZE * 2))

    loss = []
    for char in output:
        vector = attend(model, vectors, s)

        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = pc.softmax(out_vector)
        loss.append(-pc.log(pc.pick(probs, char)))
    loss = pc.esum(loss)
    return loss
def one_word_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, aligned_pair,
                  feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])

    padded_lemma = BEGIN_WORD + lemma + END_WORD

    # convert characters to matching embeddings
    lemma_char_vecs = []
    for char in padded_lemma:
        try:
            lemma_char_vecs.append(char_lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            lemma_char_vecs.append(char_lookup[alphabet_index[UNK]])

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = []
    for feat in sorted(feature_types):
        # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
        # if this feature has a value, take it from the lookup. otherwise use UNK
        if feat in feats:
            feat_str = feat + ':' + feats[feat]
            try:
                feat_vecs.append(feat_lookup[feat_index[feat_str]])
            except KeyError:
                # handle UNK or dropout
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
        else:
            feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    feats_input = pc.concatenate(feat_vecs)

    # BiLSTM forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    frnn_outputs = []
    for c in lemma_char_vecs:
        s = s.add_input(c)
        frnn_outputs.append(s.output())

    # BiLSTM backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    rrnn_outputs = []
    for c in reversed(lemma_char_vecs):
        s = s.add_input(c)
        rrnn_outputs.append(s.output())

    # BiLTSM outputs
    blstm_outputs = []
    lemma_char_vecs_len = len(lemma_char_vecs)
    for i in xrange(lemma_char_vecs_len):
        blstm_outputs.append(pc.concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]]))

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    loss = []

    # i is input index, j is output index
    i = 0
    j = 0

    # go through alignments, progress j when new output is introduced, progress i when new char is seen on lemma (no ~)
    # TODO: try sutskever flip trick?
    # TODO: attention on the lemma chars/feats could help here?
    aligned_lemma, aligned_word = aligned_pair
    aligned_lemma += END_WORD
    aligned_word += END_WORD

    # run through the alignments
    for index, (input_char, output_char) in enumerate(zip(aligned_lemma, aligned_word)):
        possible_outputs = []

        # feedback, i, j, blstm[i], feats
        decoder_input = pc.concatenate([prev_output_vec,
                                        prev_char_vec,
                                        # char_lookup[alphabet_index[str(i)]],
                                        # char_lookup[alphabet_index[str(j)]],
                                        blstm_outputs[i],
                                        feats_input])

        # if reached the end word symbol
        if output_char == END_WORD:
            s = s.add_input(decoder_input)
            decoder_rnn_output = s.output()
            probs = pc.softmax(R * decoder_rnn_output + bias)

            # compute local loss
            loss.append(-pc.log(pc.pick(probs, alphabet_index[END_WORD])))
            continue

        # if there is no prefix, step
        if padded_lemma[i] == BEGIN_WORD and aligned_lemma[index] != ALIGN_SYMBOL:
            # perform rnn step
            # feedback, i, j, blstm[i], feats
            s = s.add_input(decoder_input)
            decoder_rnn_output = s.output()
            probs = pc.softmax(R * decoder_rnn_output + bias)

            # compute local loss
            loss.append(-pc.log(pc.pick(probs, alphabet_index[STEP])))

            # prepare for the next iteration - "feedback"
            prev_output_vec = char_lookup[alphabet_index[STEP]]
            prev_char_vec = char_lookup[alphabet_index[EPSILON]]
            i += 1

        # if there is new output
        if aligned_word[index] != ALIGN_SYMBOL:
            decoder_input = pc.concatenate([prev_output_vec,
                                            prev_char_vec,
                                            # char_lookup[alphabet_index[str(i)]],
                                            # char_lookup[alphabet_index[str(j)]],
                                            blstm_outputs[i],
                                            feats_input])

            # copy i action - maybe model as a single action?
            if padded_lemma[i] == aligned_word[j]:
                possible_outputs.append(str(i))
                possible_outputs.append(padded_lemma[i])
            else:
                possible_outputs.append(aligned_word[index])

            # perform rnn step
            s = s.add_input(decoder_input)
            decoder_rnn_output = s.output()
            probs = pc.softmax(R * decoder_rnn_output + bias)

            local_loss = pc.scalarInput(0)
            max_output_loss = -pc.log(pc.pick(probs, alphabet_index[possible_outputs[0]]))
            max_likelihood_output = possible_outputs[0]

            # sum over all correct output possibilities and pick feedback output to be the one with the highest
            # probability
            for output in possible_outputs:
                neg_log_likelihood = -pc.log(pc.pick(probs, alphabet_index[output]))
                if neg_log_likelihood < max_output_loss:
                    max_likelihood_output = output
                    max_output_loss = neg_log_likelihood

                local_loss += neg_log_likelihood
            loss.append(local_loss)

            # prepare for the next iteration - "feedback"
            prev_output_vec = char_lookup[alphabet_index[max_likelihood_output]]
            prev_char_vec = char_lookup[alphabet_index[aligned_word[index]]]
            j += 1

        # now check if it's time to progress on input
        if i < len(padded_lemma) - 1 and aligned_lemma[index + 1] != ALIGN_SYMBOL:
            # perform rnn step
            # feedback, i, j, blstm[i], feats
            decoder_input = pc.concatenate([prev_output_vec,
                                            prev_char_vec,
                                            # char_lookup[alphabet_index[str(i)]],
                                            # char_lookup[alphabet_index[str(j)]],
                                            blstm_outputs[i],
                                            feats_input])

            s = s.add_input(decoder_input)
            decoder_rnn_output = s.output()
            probs = pc.softmax(R * decoder_rnn_output + bias)

            # compute local loss
            loss.append(-pc.log(pc.pick(probs, alphabet_index[STEP])))

            # prepare for the next iteration - "feedback"
            prev_output_vec = char_lookup[alphabet_index[STEP]]
            prev_char_vec = char_lookup[alphabet_index[EPSILON]]
            i += 1

    # TODO: maybe here a "special" loss function is appropriate?
    # loss = esum(loss)
    loss = pc.average(loss)

    return loss
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index,
                            inverse_alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])

    # convert characters to matching embeddings, if UNK handle properly
    padded_lemma = BEGIN_WORD + lemma + END_WORD
    lemma_char_vecs = []
    for char in padded_lemma:
        try:
            lemma_char_vecs.append(char_lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            lemma_char_vecs.append(char_lookup[alphabet_index[UNK]])

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = []
    for feat in sorted(feature_types):
        # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
        # if this feature has a value, take it from the lookup. otherwise use UNK
        if feat in feats:
            feat_str = feat + ':' + feats[feat]
            try:
                feat_vecs.append(feat_lookup[feat_index[feat_str]])
            except KeyError:
                # handle UNK or dropout
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
        else:
            feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    feats_input = pc.concatenate(feat_vecs)

    # BiLSTM forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    frnn_outputs = []
    for c in lemma_char_vecs:
        s = s.add_input(c)
        frnn_outputs.append(s.output())

    # BiLSTM backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    rrnn_outputs = []
    for c in reversed(lemma_char_vecs):
        s = s.add_input(c)
        rrnn_outputs.append(s.output())

    # BiLTSM outputs
    blstm_outputs = []
    lemma_char_vecs_len = len(lemma_char_vecs)
    for i in xrange(lemma_char_vecs_len):
        blstm_outputs.append(pc.concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]]))

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]]

    # i is input index, j is output index
    i = j = 0
    num_outputs = 0
    predicted_output_sequence = []

    # run the decoder through the sequence and predict characters, twice max prediction as step outputs are added
    while num_outputs < MAX_PREDICTION_LEN * 3:

        # prepare input vector and perform LSTM step
        decoder_input = pc.concatenate([prev_output_vec,
                                        prev_char_vec,
                                        # char_lookup[alphabet_index[str(i)]],
                                        # char_lookup[alphabet_index[str(j)]],
                                        blstm_outputs[i],
                                        feats_input])

        s = s.add_input(decoder_input)

        # compute softmax probs vector and predict with argmax
        decoder_rnn_output = s.output()
        probs = pc.softmax(R * decoder_rnn_output + bias)
        probs = probs.vec_value()
        predicted_output_index = common.argmax(probs)
        predicted_output = inverse_alphabet_index[predicted_output_index]
        predicted_output_sequence.append(predicted_output)

        # check if step or char output to promote i or j.
        if predicted_output == STEP:
            prev_char_vec = char_lookup[alphabet_index[EPSILON]]
            if i < len(padded_lemma) - 1:
                i += 1
        else:
            if predicted_output.isdigit():
                # handle copy
                # try:
                #     prev_char_vec = char_lookup[alphabet_index[padded_lemma[i]]]
                # except KeyError:
                #     prev_char_vec = char_lookup[alphabet_index[UNK]]
                try:
                    # this way END_WORD cannot be copied (as it is in the training stage)
                    if i < len(lemma) + 1:
                        prev_char_vec = char_lookup[alphabet_index[padded_lemma[i]]]
                    else:
                        # if trying to copy from a non-existent index, pad with last lemma character
                        prev_char_vec = char_lookup[alphabet_index[lemma[-1]]]
                except KeyError:
                    prev_char_vec = char_lookup[alphabet_index[UNK]]
            else:
                # handle char
                prev_char_vec = char_lookup[predicted_output_index]

            j += 1

        num_outputs += 1
        # check if reached end of word
        if predicted_output_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[predicted_output_index]

    # remove the end word symbol
    return predicted_output_sequence[0:-1]
def one_word_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, aligned_pair,
                  feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])

    padded_lemma = BEGIN_WORD + lemma + END_WORD

    # convert characters to matching embeddings
    lemma_char_vecs = []
    for char in padded_lemma:
        try:
            lemma_char_vecs.append(char_lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            lemma_char_vecs.append(char_lookup[alphabet_index[UNK]])

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = []
    for feat in sorted(feature_types):
        # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
        # if this feature has a value, take it from the lookup. otherwise use UNK
        if feat in feats:
            feat_str = feat + ':' + feats[feat]
            try:
                feat_vecs.append(feat_lookup[feat_index[feat_str]])
            except KeyError:
                # handle UNK or dropout
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
        else:
            feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    feats_input = pc.concatenate(feat_vecs)

    # BiLSTM forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    frnn_outputs = []
    for c in lemma_char_vecs:
        s = s.add_input(c)
        frnn_outputs.append(s.output())

    # BiLSTM backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    rrnn_outputs = []
    for c in reversed(lemma_char_vecs):
        s = s.add_input(c)
        rrnn_outputs.append(s.output())

    # BiLTSM outputs
    blstm_outputs = []
    lemma_char_vecs_len = len(lemma_char_vecs)
    for i in xrange(lemma_char_vecs_len):
        blstm_outputs.append(pc.concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]]))

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    loss = []

    # i is input index, j is output index
    i = 0
    j = 0

    # go through alignments, progress j when new output is introduced, progress i when new char is seen on lemma (no ~)
    # TODO: try sutskever flip trick?
    # TODO: attention on the lemma chars/feats could help here?
    aligned_lemma, aligned_word = aligned_pair
    aligned_lemma += END_WORD
    aligned_word += END_WORD

    # run through the alignments
    for index, (input_char, output_char) in enumerate(zip(aligned_lemma, aligned_word)):
        possible_outputs = []

        # feedback, i, j, blstm[i], feats
        decoder_input = pc.concatenate([prev_output_vec,
                                        prev_char_vec,
                                        # char_lookup[alphabet_index[str(i)]],
                                        # char_lookup[alphabet_index[str(j)]],
                                        blstm_outputs[i],
                                        feats_input])

        # if reached the end word symbol
        if output_char == END_WORD:
            s = s.add_input(decoder_input)
            decoder_rnn_output = s.output()
            probs = pc.softmax(R * decoder_rnn_output + bias)

            # compute local loss
            loss.append(-pc.log(pc.pick(probs, alphabet_index[END_WORD])))
            continue

        # if there is no prefix, step
        if padded_lemma[i] == BEGIN_WORD and aligned_lemma[index] != ALIGN_SYMBOL:
            # perform rnn step
            # feedback, i, j, blstm[i], feats
            s = s.add_input(decoder_input)
            decoder_rnn_output = s.output()
            probs = pc.softmax(R * decoder_rnn_output + bias)

            # compute local loss
            loss.append(-pc.log(pc.pick(probs, alphabet_index[STEP])))

            # prepare for the next iteration - "feedback"
            prev_output_vec = char_lookup[alphabet_index[STEP]]
            prev_char_vec = char_lookup[alphabet_index[EPSILON]]
            i += 1

        # if there is new output
        if aligned_word[index] != ALIGN_SYMBOL:
            decoder_input = pc.concatenate([prev_output_vec,
                                            prev_char_vec,
                                            # char_lookup[alphabet_index[str(i)]],
                                            # char_lookup[alphabet_index[str(j)]],
                                            blstm_outputs[i],
                                            feats_input])

            # copy i action - maybe model as a single action?
            if padded_lemma[i] == aligned_word[j]:
                possible_outputs.append(str(i))
                possible_outputs.append(padded_lemma[i])
            else:
                possible_outputs.append(aligned_word[index])

            # perform rnn step
            s = s.add_input(decoder_input)
            decoder_rnn_output = s.output()
            probs = pc.softmax(R * decoder_rnn_output + bias)

            local_loss = pc.scalarInput(0)
            max_output_loss = -pc.log(pc.pick(probs, alphabet_index[possible_outputs[0]]))
            max_likelihood_output = possible_outputs[0]

            # sum over all correct output possibilities and pick feedback output to be the one with the highest
            # probability
            for output in possible_outputs:
                neg_log_likelihood = -pc.log(pc.pick(probs, alphabet_index[output]))
                if neg_log_likelihood < max_output_loss:
                    max_likelihood_output = output
                    max_output_loss = neg_log_likelihood

                local_loss += neg_log_likelihood
            loss.append(local_loss)

            # prepare for the next iteration - "feedback"
            prev_output_vec = char_lookup[alphabet_index[max_likelihood_output]]
            prev_char_vec = char_lookup[alphabet_index[aligned_word[index]]]
            j += 1

        # now check if it's time to progress on input
        if i < len(padded_lemma) - 1 and aligned_lemma[index + 1] != ALIGN_SYMBOL:
            # perform rnn step
            # feedback, i, j, blstm[i], feats
            decoder_input = pc.concatenate([prev_output_vec,
                                            prev_char_vec,
                                            # char_lookup[alphabet_index[str(i)]],
                                            # char_lookup[alphabet_index[str(j)]],
                                            blstm_outputs[i],
                                            feats_input])

            s = s.add_input(decoder_input)
            decoder_rnn_output = s.output()
            probs = pc.softmax(R * decoder_rnn_output + bias)

            # compute local loss
            loss.append(-pc.log(pc.pick(probs, alphabet_index[STEP])))

            # prepare for the next iteration - "feedback"
            prev_output_vec = char_lookup[alphabet_index[STEP]]
            prev_char_vec = char_lookup[alphabet_index[EPSILON]]
            i += 1

    # TODO: maybe here a "special" loss function is appropriate?
    # loss = esum(loss)
    loss = pc.average(loss)

    return loss
Ejemplo n.º 17
0
    def train(
        feature_mapper,
        word_dims,
        tag_dims,
        lstm_units,
        hidden_units,
        epochs,
        batch_size,
        train_data_file,
        dev_data_file,
        model_save_file,
        droprate,
        unk_param,
        alpha=1.0,
        beta=0.0,
    ):

        start_time = time.time()

        fm = feature_mapper
        word_count = fm.total_words()
        tag_count = fm.total_tags()

        network = Network(
            word_count=word_count,
            tag_count=tag_count,
            word_dims=word_dims,
            tag_dims=tag_dims,
            lstm_units=lstm_units,
            hidden_units=hidden_units,
            struct_out=2,
            label_out=fm.total_label_actions(),
            droprate=droprate,
        )
        network.init_params()

        print('Hidden units: {},  per-LSTM units: {}'.format(
            hidden_units,
            lstm_units,
        ))
        print('Embeddings: word={}  tag={}'.format(
            (word_count, word_dims),
            (tag_count, tag_dims),
        ))
        print('Dropout rate: {}'.format(droprate))
        print('Parameters initialized in [-0.01, 0.01]')
        print('Random UNKing parameter z = {}'.format(unk_param))
        print('Exploration: alpha={} beta={}'.format(alpha, beta))

        training_data = fm.gold_data_from_file(train_data_file)
        num_batches = -(-len(training_data) // batch_size)
        print('Loaded {} training sentences ({} batches of size {})!'.format(
            len(training_data),
            num_batches,
            batch_size,
        ))
        parse_every = -(-num_batches // 4)

        dev_trees = PhraseTree.load_treefile(dev_data_file)
        print('Loaded {} validation trees!'.format(len(dev_trees)))

        best_acc = FScore()

        for epoch in xrange(1, epochs + 1):
            print('........... epoch {} ...........'.format(epoch))

            total_cost = 0.0
            total_states = 0
            training_acc = FScore()

            np.random.shuffle(training_data)

            for b in xrange(num_batches):
                batch = training_data[(b * batch_size):((b + 1) * batch_size)]

                explore = [
                    Parser.exploration(
                        example,
                        fm,
                        network,
                        alpha=alpha,
                        beta=beta,
                    ) for example in batch
                ]
                for (_, acc) in explore:
                    training_acc += acc

                batch = [example for (example, _) in explore]

                pycnn.renew_cg()
                network.prep_params()

                errors = []

                for example in batch:

                    ## random UNKing ##
                    for (i, w) in enumerate(example['w']):
                        if w <= 2:
                            continue

                        freq = fm.word_freq_list[w]
                        drop_prob = unk_param / (unk_param + freq)
                        r = np.random.random()
                        if r < drop_prob:
                            example['w'][i] = 0

                    fwd, back = network.evaluate_recurrent(
                        example['w'],
                        example['t'],
                    )

                    for (left,
                         right), correct in example['struct_data'].items():
                        scores = network.evaluate_struct(
                            fwd, back, left, right)

                        probs = pycnn.softmax(scores)
                        loss = -pycnn.log(pycnn.pick(probs, correct))
                        errors.append(loss)
                    total_states += len(example['struct_data'])

                    for (left,
                         right), correct in example['label_data'].items():
                        scores = network.evaluate_label(fwd, back, left, right)

                        probs = pycnn.softmax(scores)
                        loss = -pycnn.log(pycnn.pick(probs, correct))
                        errors.append(loss)
                    total_states += len(example['label_data'])

                batch_error = pycnn.esum(errors)
                total_cost += batch_error.scalar_value()
                batch_error.backward()
                network.trainer.update()

                mean_cost = total_cost / total_states

                print(
                    '\rBatch {}  Mean Cost {:.4f} [Train: {}]'.format(
                        b,
                        mean_cost,
                        training_acc,
                    ),
                    end='',
                )
                sys.stdout.flush()

                if ((b + 1) % parse_every) == 0 or b == (num_batches - 1):
                    dev_acc = Parser.evaluate_corpus(
                        dev_trees,
                        fm,
                        network,
                    )
                    print('  [Val: {}]'.format(dev_acc))

                    if dev_acc > best_acc:
                        best_acc = dev_acc
                        network.save(model_save_file)
                        print('    [saved model: {}]'.format(model_save_file))

            current_time = time.time()
            runmins = (current_time - start_time) / 60.
            print('  Elapsed time: {:.2f}m'.format(runmins))
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn,
                            lemma, feats, alphabet_index,
                            inverse_alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])
    W_c = pc.parameter(model["W_c"])
    W__a = pc.parameter(model["W__a"])
    U__a = pc.parameter(model["U__a"])
    v__a = pc.parameter(model["v__a"])

    # encode the lemma
    blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn,
                                 encoder_rrnn, lemma)

    # convert features to matching embeddings, if UNK handle properly
    feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    i = 0
    predicted_sequence = []

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN:

        # get current h of the decoder
        s = s.add_input(pc.concatenate([prev_output_vec, feats_input]))
        decoder_rnn_output = s.output()

        # perform attention step
        attention_output_vector, alphas, W = task1_attention_implementation.attend(
            blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias

        # find best candidate output
        probs = pc.softmax(readout)
        next_char_index = common.argmax(probs.vec_value())
        predicted_sequence.append(inverse_alphabet_index[next_char_index])

        # check if reached end of word
        if predicted_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[next_char_index]
        i += 1

    # remove the end word symbol
    return predicted_sequence[0:-1]
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index,
                            inverse_alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])

    # convert characters to matching embeddings, if UNK handle properly
    padded_lemma = BEGIN_WORD + lemma + END_WORD
    lemma_char_vecs = []
    for char in padded_lemma:
        try:
            lemma_char_vecs.append(char_lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            lemma_char_vecs.append(char_lookup[alphabet_index[UNK]])

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = []
    for feat in sorted(feature_types):
        # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
        # if this feature has a value, take it from the lookup. otherwise use UNK
        if feat in feats:
            feat_str = feat + ':' + feats[feat]
            try:
                feat_vecs.append(feat_lookup[feat_index[feat_str]])
            except KeyError:
                # handle UNK or dropout
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
        else:
            feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    feats_input = pc.concatenate(feat_vecs)

    # BiLSTM forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    frnn_outputs = []
    for c in lemma_char_vecs:
        s = s.add_input(c)
        frnn_outputs.append(s.output())

    # BiLSTM backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    rrnn_outputs = []
    for c in reversed(lemma_char_vecs):
        s = s.add_input(c)
        rrnn_outputs.append(s.output())

    # BiLTSM outputs
    blstm_outputs = []
    lemma_char_vecs_len = len(lemma_char_vecs)
    for i in xrange(lemma_char_vecs_len):
        blstm_outputs.append(pc.concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]]))

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]]

    # i is input index, j is output index
    i = j = 0
    num_outputs = 0
    predicted_output_sequence = []

    # run the decoder through the sequence and predict characters, twice max prediction as step outputs are added
    while num_outputs < MAX_PREDICTION_LEN * 3:

        # prepare input vector and perform LSTM step
        decoder_input = pc.concatenate([prev_output_vec,
                                        prev_char_vec,
                                        # char_lookup[alphabet_index[str(i)]],
                                        # char_lookup[alphabet_index[str(j)]],
                                        blstm_outputs[i],
                                        feats_input])

        s = s.add_input(decoder_input)

        # compute softmax probs vector and predict with argmax
        decoder_rnn_output = s.output()
        probs = pc.softmax(R * decoder_rnn_output + bias)
        probs = probs.vec_value()
        predicted_output_index = common.argmax(probs)
        predicted_output = inverse_alphabet_index[predicted_output_index]
        predicted_output_sequence.append(predicted_output)

        # check if step or char output to promote i or j.
        if predicted_output == STEP:
            prev_char_vec = char_lookup[alphabet_index[EPSILON]]
            if i < len(padded_lemma) - 1:
                i += 1
        else:
            if predicted_output.isdigit():
                # handle copy
                # try:
                #     prev_char_vec = char_lookup[alphabet_index[padded_lemma[i]]]
                # except KeyError:
                #     prev_char_vec = char_lookup[alphabet_index[UNK]]
                try:
                    # this way END_WORD cannot be copied (as it is in the training stage)
                    if i < len(lemma) + 1:
                        prev_char_vec = char_lookup[alphabet_index[padded_lemma[i]]]
                    else:
                        # if trying to copy from a non-existent index, pad with last lemma character
                        prev_char_vec = char_lookup[alphabet_index[lemma[-1]]]
                except KeyError:
                    prev_char_vec = char_lookup[alphabet_index[UNK]]
            else:
                # handle char
                prev_char_vec = char_lookup[predicted_output_index]

            j += 1

        num_outputs += 1
        # check if reached end of word
        if predicted_output_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[predicted_output_index]

    # remove the end word symbol
    return predicted_output_sequence[0:-1]
Ejemplo n.º 20
0
 def tag_sentence(self, sentence):
     word_expression_list = self._build_word_expression_list(sentence, is_train=False)
     for word, word_expression in zip(sentence, word_expression_list):
         out = softmax(word_expression)
         tag_index = np.argmax(out.npvalue())
         word.tag = self.tag_indexer.get_object(tag_index)