Esempio n. 1
0
def predict(sent, model, builders):
    """
    predict tags and demographic labels
    :param sent:
    :param model:
    :param builders:
    :return: tag and label predictions
    """
    if MLP:
        H = pycnn.parameter(pH)
        O = pycnn.parameter(pO)
    else:
        O = pycnn.parameter(pO)

    tags = []
    for forward_state, backward_state in build_tagging_graph(words, model, builders):
        if MLP:
            r_t = O * (pycnn.tanh(H * pycnn.concatenate([forward_state, backward_state])))
        else:
            r_t = O * pycnn.concatenate([forward_state, backward_state])

        out = pycnn.softmax(r_t)
        chosen = np.argmax(out.npvalue())
        tags.append(vocab_tags.i2w[chosen])

    return tags
Esempio n. 2
0
    def evaluate_recurrent(self, word_inds, tag_inds, test=False):

        fwd1 = self.fwd_lstm1.initial_state()
        back1 = self.back_lstm1.initial_state()

        fwd2 = self.fwd_lstm2.initial_state()
        back2 = self.back_lstm2.initial_state()

        sentence = []

        for (w, t) in zip(word_inds, tag_inds):
            wordvec = pycnn.lookup(self.model['word-embed'], w)
            tagvec = pycnn.lookup(self.model['tag-embed'], t)
            vec = pycnn.concatenate([wordvec, tagvec])
            sentence.append(vec)

        fwd1_out = []
        for vec in sentence:
            fwd1 = fwd1.add_input(vec)
            fwd_vec = fwd1.output()
            fwd1_out.append(fwd_vec)

        back1_out = []
        for vec in reversed(sentence):
            back1 = back1.add_input(vec)
            back_vec = back1.output()
            back1_out.append(back_vec)

        lstm2_input = []
        for (f, b) in zip(fwd1_out, reversed(back1_out)):
            lstm2_input.append(pycnn.concatenate([f, b]))

        fwd2_out = []
        for vec in lstm2_input:
            if self.droprate > 0 and not test:
                vec = pycnn.dropout(vec, self.droprate)
            fwd2 = fwd2.add_input(vec)
            fwd_vec = fwd2.output()
            fwd2_out.append(fwd_vec)

        back2_out = []
        for vec in reversed(lstm2_input):
            if self.droprate > 0 and not test:
                vec = pycnn.dropout(vec, self.droprate)
            back2 = back2.add_input(vec)
            back_vec = back2.output()
            back2_out.append(back_vec)

        fwd_out = [
            pycnn.concatenate([f1, f2])
            for (f1, f2) in zip(fwd1_out, fwd2_out)
        ]
        back_out = [
            pycnn.concatenate([b1, b2])
            for (b1, b2) in zip(back1_out, back2_out)
        ]

        return fwd_out, back_out[::-1]
Esempio n. 3
0
    def predict(self, word_indices, char_indices, task_id, train=False):
        """
        predict tags for a sentence represented as char+word embeddings
        """
        pycnn.renew_cg() # new graph

        char_emb = []
        rev_char_emb = []
        # get representation for words
        for chars_of_token in char_indices:
            # use last state as word representation
            last_state = self.char_rnn.predict_sequence([self.cembeds[c] for c in chars_of_token])[-1]
            rev_last_state = self.char_rnn.predict_sequence([self.cembeds[c] for c in reversed(chars_of_token)])[-1]
            char_emb.append(last_state)
            rev_char_emb.append(rev_last_state)
            
        wfeatures = [self.wembeds[w] for w in word_indices]
        features = [pycnn.concatenate([w,c,rev_c]) for w,c,rev_c in zip(wfeatures,char_emb,reversed(rev_char_emb))]
        
        if train: # only do at training time
            features = [pycnn.noise(fe,self.noise_sigma) for fe in features]

        output_expected_at_layer = self.predictors["task_expected_at"][task_id]
        output_expected_at_layer -=1

        # go through layers
        # input is now combination of w + char emb
        prev = features
        num_layers = self.h_layers
#        for i in range(0,num_layers-1):
        for i in range(0,num_layers):
            predictor = self.predictors["inner"][i]
            forward_sequence, backward_sequence = predictor.predict_sequence(prev)        
            if i > 0 and self.activation:
                # activation between LSTM layers
                forward_sequence = [self.activation(s) for s in forward_sequence]
                backward_sequence = [self.activation(s) for s in backward_sequence]

            if i == output_expected_at_layer:
                output_predictor = self.predictors["output_layers_dict"][task_id] 
                concat_layer = [pycnn.concatenate([f, b]) for f, b in zip(forward_sequence,reversed(backward_sequence))]

                if train and self.noise_sigma > 0.0:
                    concat_layer = [pycnn.noise(fe,self.noise_sigma) for fe in concat_layer]
                output = output_predictor.predict_sequence(concat_layer)
                return output

            prev = forward_sequence
            prev_rev = backward_sequence # not used

        raise "oops should not be here"
        return None
Esempio n. 4
0
def attend(model, input_vectors, state):
    w1 = pc.parameter(model['attention_w1'])
    w2 = pc.parameter(model['attention_w2'])
    v = pc.parameter(model['attention_v'])
    attention_weights = []

    w2dt = w2*pc.concatenate(list(state.s()))
    for input_vector in input_vectors:
        attention_weight = v*pc.tanh(w1*input_vector + w2dt)
        attention_weights.append(attention_weight)
    attention_weights = pc.softmax(pc.concatenate(attention_weights))
    output_vectors = pc.esum([vector*attention_weight for vector, attention_weight in zip(input_vectors, attention_weights)])
    return output_vectors
Esempio n. 5
0
def attend(model, input_vectors, state):
    w1 = pc.parameter(model['attention_w1'])
    w2 = pc.parameter(model['attention_w2'])
    v = pc.parameter(model['attention_v'])
    attention_weights = []

    w2dt = w2*pc.concatenate(list(state.s()))
    for input_vector in input_vectors:
        attention_weight = v*pc.tanh(w1*input_vector + w2dt)
        attention_weights.append(attention_weight)
    attention_weights = pc.softmax(pc.concatenate(attention_weights))
    output_vectors = pc.esum([vector*attention_weight for vector, attention_weight in zip(input_vectors, attention_weights)])
    return output_vectors
Esempio n. 6
0
def attend(model, vectors, state):
    w = pc.parameter(model['attention_w'])
    attention_weights = []
    for vector in vectors:
        #concatenate each encoded vector with the current decoder state
        attention_input = pc.concatenate([vector, pc.concatenate(list(state.s()))])
        #get the attention wieght for the decoded vector
        attention_weights.append(w * attention_input)
    #normalize the weights
    attention_weights = pc.softmax(pc.concatenate(attention_weights))
    #apply the weights
    vectors = pc.esum([vector*attention_weight for vector, attention_weight in zip(vectors, attention_weights)])
    return vectors
Esempio n. 7
0
    def _build_word_expression_list(self, sentence, is_train=False):
        renew_cg()

        sentence_word_vectors = []
        for word in sentence:
            sentence_word_vectors.append(
                self._get_word_vector(word, use_dropout=is_train))

        lstm_forward = self.word_builders[0].initial_state()
        lstm_backward = self.word_builders[1].initial_state()

        embeddings_forward = []
        embeddings_backward = []
        for word_vector, reverse_word_vector in zip(
                sentence_word_vectors, reversed(sentence_word_vectors)):
            lstm_forward = lstm_forward.add_input(word_vector)
            lstm_backward = lstm_backward.add_input(reverse_word_vector)

            embeddings_forward.append(lstm_forward.output())
            embeddings_backward.append(lstm_backward.output())

        O = parameter(self.param_out)

        sentence_word_expressions = []
        for word_f_embedding, word_b_embedding in zip(
                embeddings_forward, reversed(embeddings_backward)):
            word_concat_embedding = concatenate(
                [word_f_embedding, word_b_embedding])

            word_expression = O * word_concat_embedding
            sentence_word_expressions.append(word_expression)
        return sentence_word_expressions
Esempio n. 8
0
def fit(words, tags, labels, model, builders):
    """
    compute joint error of the
    :param words: list of indices
    :param tags: list of indices
    :param labels: index
    :param model: current model to access parameters
    :param builders: builder to create state combinations
    :return: joint error
    """
    # retrieve model parameters
    if MLP:
        H = pycnn.parameter(pH)
        O = pycnn.parameter(pO)
    else:
        O = pycnn.parameter(pO)

    errs = []
    for (forward_state, backward_state), tag in zip(build_tagging_graph(words, model, builders), tags):
        f_b = pycnn.concatenate([forward_state, backward_state])
        if MLP:
            # TODO: add bias terms
            r_t = O * (pycnn.tanh(H * f_b))
        else:
            r_t = O * f_b
        err = pycnn.pickneglogsoftmax(r_t, tag)
        errs.append(err)

    return pycnn.esum(errs)
Esempio n. 9
0
    def _build_word_expression_list(self, sentence, is_train=False):
        renew_cg()

        sentence_word_vectors = []
        for word in sentence:
            sentence_word_vectors.append(self._get_word_vector(word, use_dropout=is_train))

        lstm_forward = self.word_builders[0].initial_state()
        lstm_backward = self.word_builders[1].initial_state()

        embeddings_forward = []
        embeddings_backward = []
        for word_vector, reverse_word_vector in zip(sentence_word_vectors, reversed(sentence_word_vectors)):
            lstm_forward = lstm_forward.add_input(word_vector)
            lstm_backward = lstm_backward.add_input(reverse_word_vector)

            embeddings_forward.append(lstm_forward.output())
            embeddings_backward.append(lstm_backward.output())

        O = parameter(self.param_out)

        sentence_word_expressions = []
        for word_f_embedding, word_b_embedding in zip(embeddings_forward, reversed(embeddings_backward)):
            word_concat_embedding = concatenate([word_f_embedding, word_b_embedding])

            word_expression = O * word_concat_embedding
            sentence_word_expressions.append(word_expression)
        return sentence_word_expressions
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index,
                            inverse_alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])
    W_c = pc.parameter(model["W_c"])
    W__a = pc.parameter(model["W__a"])
    U__a = pc.parameter(model["U__a"])
    v__a = pc.parameter(model["v__a"])

    # encode the lemma
    blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma)

    # convert features to matching embeddings, if UNK handle properly
    feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    i = 0
    predicted_sequence = []

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN:

        # get current h of the decoder
        s = s.add_input(pc.concatenate([prev_output_vec, feats_input]))
        decoder_rnn_output = s.output()

        # perform attention step
        attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output,
                                                                                   W_c, v__a, W__a, U__a)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias

        # find best candidate output
        probs = pc.softmax(readout)
        next_char_index = common.argmax(probs.vec_value())
        predicted_sequence.append(inverse_alphabet_index[next_char_index])

        # check if reached end of word
        if predicted_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[next_char_index]
        i += 1

    # remove the end word symbol
    return predicted_sequence[0:-1]
Esempio n. 11
0
    def _get_word_vector(self, word, use_dropout=False):
        word_embedding = self._get_word_embedding(word)
        char_representation = self._get_char_representation(word)

        word_vector = concatenate([word_embedding, char_representation])
        if use_dropout:
            word_vector = dropout(word_vector, 0.5)
        return word_vector
Esempio n. 12
0
    def _get_word_vector(self, word, use_dropout=False):
        word_embedding = self._get_word_embedding(word)
        char_representation = self._get_char_representation(word)

        word_vector = concatenate([word_embedding, char_representation])
        if use_dropout:
            word_vector = dropout(word_vector, 0.5)
        return word_vector
Esempio n. 13
0
def encode_sentence(model, enc_fwd_lstm, enc_bwd_lstm, sentence):
    sentence_rev = [sentence[i] for i in range(len(sentence)-1, -1, -1)]

    fwd_vectors = run_lstm(model, enc_fwd_lstm.initial_state(), sentence)
    bwd_vectors = run_lstm(model, enc_bwd_lstm.initial_state(), sentence_rev)
    bwd_vectors = [bwd_vectors[i] for i in range(len(bwd_vectors)-1, -1, -1)]
    vectors = [pc.concatenate(list(p)) for p in zip(fwd_vectors, bwd_vectors)]

    return vectors
def encode_sentence(model, enc_fwd_lstm, enc_bwd_lstm, sentence):
    sentence_rev = [sentence[i] for i in range(len(sentence) - 1, -1, -1)]

    fwd_vectors = run_lstm(model, enc_fwd_lstm.initial_state(), sentence)
    bwd_vectors = run_lstm(model, enc_bwd_lstm.initial_state(), sentence_rev)
    bwd_vectors = [bwd_vectors[i] for i in range(len(bwd_vectors) - 1, -1, -1)]
    vectors = [pc.concatenate(list(p)) for p in zip(fwd_vectors, bwd_vectors)]

    return vectors
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats,
                 word, alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])
    W_c = pc.parameter(model["W_c"])
    W__a = pc.parameter(model["W__a"])
    U__a = pc.parameter(model["U__a"])
    v__a = pc.parameter(model["v__a"])

    blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn,
                                 encoder_rrnn, lemma)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # convert features to matching embeddings, if UNK handle properly
    feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types)

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    loss = []
    padded_word = word + END_WORD

    # run the decoder through the output sequence and aggregate loss
    for i, output_char in enumerate(padded_word):

        # get current h of the decoder
        s = s.add_input(pc.concatenate([prev_output_vec, feats_input]))
        decoder_rnn_output = s.output()

        attention_output_vector, alphas, W = task1_attention_implementation.attend(
            blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias

        current_loss = pc.pickneglogsoftmax(readout,
                                            alphabet_index[output_char])
        # print 'computed readout layer'
        loss.append(current_loss)

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[alphabet_index[output_char]]

    total_sequence_loss = pc.esum(loss)
    # loss = average(loss)

    return total_sequence_loss
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index,
                 feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])
    W_c = pc.parameter(model["W_c"])
    W__a = pc.parameter(model["W__a"])
    U__a = pc.parameter(model["U__a"])
    v__a = pc.parameter(model["v__a"])

    blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # convert features to matching embeddings, if UNK handle properly
    feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types)


    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    loss = []
    padded_word = word + END_WORD

    # run the decoder through the output sequence and aggregate loss
    for i, output_char in enumerate(padded_word):

        # get current h of the decoder
        s = s.add_input(pc.concatenate([prev_output_vec, feats_input]))
        decoder_rnn_output = s.output()

        attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output,
                                                                                   W_c, v__a, W__a, U__a)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias

        current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char])
        # print 'computed readout layer'
        loss.append(current_loss)

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[alphabet_index[output_char]]

    total_sequence_loss = pc.esum(loss)
    # loss = average(loss)

    return total_sequence_loss
Esempio n. 17
0
    def evaluate_label(self, fwd_out, back_out, lefts, rights, test=False):

        fwd_span_out = []
        for left_index, right_index in zip(lefts, rights):
            fwd_span_out.append(fwd_out[right_index] - fwd_out[left_index - 1])
        fwd_span_vec = pycnn.concatenate(fwd_span_out)

        back_span_out = []
        for left_index, right_index in zip(lefts, rights):
            back_span_out.append(back_out[left_index] -
                                 back_out[right_index + 1])
        back_span_vec = pycnn.concatenate(back_span_out)

        hidden_input = pycnn.concatenate([fwd_span_vec, back_span_vec])

        if self.droprate > 0 and not test:
            hidden_input = pycnn.dropout(hidden_input, self.droprate)

        hidden_output = self.activation(self.W1_label * hidden_input +
                                        self.b1_label)

        scores = (self.W2_label * hidden_output + self.b2_label)

        return scores
def encode_feats(feat_index, feat_lookup, feats, feature_types):
    feat_vecs = []
    for feat in sorted(feature_types):
        # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
        # yes since each location has its own weights
        # if this feature has a value, take it from the lookup. otherwise use UNK
        if feat in feats:
            feat_str = feat + ':' + feats[feat]
            try:
                feat_vecs.append(feat_lookup[feat_index[feat_str]])
            except KeyError:
                # handle UNK or dropout
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
        else:
            feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    return pc.concatenate(feat_vecs)
def encode_feats(feat_index, feat_lookup, feats, feature_types):
    feat_vecs = []
    for feat in sorted(feature_types):
        # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
        # yes since each location has its own weights
        # if this feature has a value, take it from the lookup. otherwise use UNK
        if feat in feats:
            feat_str = feat + ':' + feats[feat]
            try:
                feat_vecs.append(feat_lookup[feat_index[feat_str]])
            except KeyError:
                # handle UNK or dropout
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
        else:
            feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    return pc.concatenate(feat_vecs)
Esempio n. 20
0
    def _get_char_representation(self, word):
        word_char_vectors = []
        for char in word.text:
            char_index = self.char_indexer.get_index(char)
            if char_index is None:
                print "Warning: Unexpected char '%s' (word='%s')" % (char, word.text)
                continue
            char_vector = lookup(self.model["char_lookup"], char_index)
            word_char_vectors.append(char_vector)

        lstm_forward = self.char_builders[0].initial_state()
        lstm_backward = self.char_builders[1].initial_state()

        for char_vector, reverse_char_vector in zip(word_char_vectors, reversed(word_char_vectors)):
            lstm_forward = lstm_forward.add_input(char_vector)
            lstm_backward = lstm_backward.add_input(reverse_char_vector)
        return concatenate([lstm_forward.output(), lstm_backward.output()])
Esempio n. 21
0
        def add_input(self, input_vec):
            """
            Note that this function updates the existing State object!
            """
            x = pycnn.concatenate([input_vec, self.h])

            i = pycnn.logistic(self.W_i * x + self.b_i)
            f = pycnn.logistic(self.W_f * x + self.b_f)
            g = pycnn.tanh(self.W_c * x + self.b_c)
            o = pycnn.logistic(self.W_o * x + self.b_o)

            c = pycnn.cwise_multiply(f, self.c) + pycnn.cwise_multiply(i, g)
            h = pycnn.cwise_multiply(o, pycnn.tanh(c))

            self.c = c
            self.h = h
            self.outputs.append(h)

            return self
Esempio n. 22
0
    def _get_char_representation(self, word, use_dropout):
        word_char_vectors = []
        for char in word.text:
            char_index = self.char_indexer.get_index(char)
            if char_index is None:
                print "Warning: Unexpected char '%s' (word='%s')" % (char,
                                                                     word.text)
                continue
            char_vector = lookup(self.model["char_lookup"], char_index)
            word_char_vectors.append(char_vector)

        lstm_forward = self.char_builders[0].initial_state()
        lstm_backward = self.char_builders[1].initial_state()

        for char_vector, reverse_char_vector in zip(
                word_char_vectors, reversed(word_char_vectors)):
            lstm_forward = lstm_forward.add_input(char_vector)
            lstm_backward = lstm_backward.add_input(reverse_char_vector)
        return concatenate([lstm_forward.output(), lstm_backward.output()])
Esempio n. 23
0
    def _build_sentence_expressions(self, sentence):
        lstm_forward = self.word_builders[0].initial_state()
        lstm_backward = self.word_builders[1].initial_state()

        embeddings_forward = []
        embeddings_backward = []
        for word, reverse_word in zip(sentence, reversed(sentence)):
            lstm_forward = lstm_forward.add_input(word.vector)
            lstm_backward = lstm_backward.add_input(reverse_word.vector)

            embeddings_forward.append(lstm_forward.output())
            embeddings_backward.append(lstm_backward.output())

        H = parameter(self.param_hidden)
        O = parameter(self.param_out)

        sentence_expressions = []
        for word_f_embedding, word_b_embedding in zip(embeddings_forward, reversed(embeddings_backward)):
            word_concat_embedding = concatenate([word_f_embedding, word_b_embedding])
            word_expression = O * self.activation(H * word_concat_embedding)
            sentence_expressions.append(word_expression)
        return sentence_expressions
Esempio n. 24
0
    def _build_sentence_expressions(self, sentence):
        lstm_forward = self.word_builders[0].initial_state()
        lstm_backward = self.word_builders[1].initial_state()

        embeddings_forward = []
        embeddings_backward = []
        for word, reverse_word in zip(sentence, reversed(sentence)):
            lstm_forward = lstm_forward.add_input(word.vector)
            lstm_backward = lstm_backward.add_input(reverse_word.vector)

            embeddings_forward.append(lstm_forward.output())
            embeddings_backward.append(lstm_backward.output())

        H = parameter(self.param_hidden)
        O = parameter(self.param_out)

        sentence_expressions = []
        for word_f_embedding, word_b_embedding in zip(
                embeddings_forward, reversed(embeddings_backward)):
            word_concat_embedding = concatenate(
                [word_f_embedding, word_b_embedding])
            word_expression = O * self.activation(H * word_concat_embedding)
            sentence_expressions.append(word_expression)
        return sentence_expressions
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index,
                            inverse_alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])

    # convert characters to matching embeddings, if UNK handle properly
    padded_lemma = BEGIN_WORD + lemma + END_WORD
    lemma_char_vecs = []
    for char in padded_lemma:
        try:
            lemma_char_vecs.append(char_lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            lemma_char_vecs.append(char_lookup[alphabet_index[UNK]])

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = []
    for feat in sorted(feature_types):
        # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
        # if this feature has a value, take it from the lookup. otherwise use UNK
        if feat in feats:
            feat_str = feat + ':' + feats[feat]
            try:
                feat_vecs.append(feat_lookup[feat_index[feat_str]])
            except KeyError:
                # handle UNK or dropout
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
        else:
            feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    feats_input = pc.concatenate(feat_vecs)

    # BiLSTM forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    frnn_outputs = []
    for c in lemma_char_vecs:
        s = s.add_input(c)
        frnn_outputs.append(s.output())

    # BiLSTM backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    rrnn_outputs = []
    for c in reversed(lemma_char_vecs):
        s = s.add_input(c)
        rrnn_outputs.append(s.output())

    # BiLTSM outputs
    blstm_outputs = []
    lemma_char_vecs_len = len(lemma_char_vecs)
    for i in xrange(lemma_char_vecs_len):
        blstm_outputs.append(pc.concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]]))

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]]

    # i is input index, j is output index
    i = j = 0
    num_outputs = 0
    predicted_output_sequence = []

    # run the decoder through the sequence and predict characters, twice max prediction as step outputs are added
    while num_outputs < MAX_PREDICTION_LEN * 3:

        # prepare input vector and perform LSTM step
        decoder_input = pc.concatenate([prev_output_vec,
                                        prev_char_vec,
                                        # char_lookup[alphabet_index[str(i)]],
                                        # char_lookup[alphabet_index[str(j)]],
                                        blstm_outputs[i],
                                        feats_input])

        s = s.add_input(decoder_input)

        # compute softmax probs vector and predict with argmax
        decoder_rnn_output = s.output()
        probs = pc.softmax(R * decoder_rnn_output + bias)
        probs = probs.vec_value()
        predicted_output_index = common.argmax(probs)
        predicted_output = inverse_alphabet_index[predicted_output_index]
        predicted_output_sequence.append(predicted_output)

        # check if step or char output to promote i or j.
        if predicted_output == STEP:
            prev_char_vec = char_lookup[alphabet_index[EPSILON]]
            if i < len(padded_lemma) - 1:
                i += 1
        else:
            if predicted_output.isdigit():
                # handle copy
                # try:
                #     prev_char_vec = char_lookup[alphabet_index[padded_lemma[i]]]
                # except KeyError:
                #     prev_char_vec = char_lookup[alphabet_index[UNK]]
                try:
                    # this way END_WORD cannot be copied (as it is in the training stage)
                    if i < len(lemma) + 1:
                        prev_char_vec = char_lookup[alphabet_index[padded_lemma[i]]]
                    else:
                        # if trying to copy from a non-existent index, pad with last lemma character
                        prev_char_vec = char_lookup[alphabet_index[lemma[-1]]]
                except KeyError:
                    prev_char_vec = char_lookup[alphabet_index[UNK]]
            else:
                # handle char
                prev_char_vec = char_lookup[predicted_output_index]

            j += 1

        num_outputs += 1
        # check if reached end of word
        if predicted_output_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[predicted_output_index]

    # remove the end word symbol
    return predicted_output_sequence[0:-1]
def one_word_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, aligned_pair,
                  feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])

    padded_lemma = BEGIN_WORD + lemma + END_WORD

    # convert characters to matching embeddings
    lemma_char_vecs = []
    for char in padded_lemma:
        try:
            lemma_char_vecs.append(char_lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            lemma_char_vecs.append(char_lookup[alphabet_index[UNK]])

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = []
    for feat in sorted(feature_types):
        # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
        # if this feature has a value, take it from the lookup. otherwise use UNK
        if feat in feats:
            feat_str = feat + ':' + feats[feat]
            try:
                feat_vecs.append(feat_lookup[feat_index[feat_str]])
            except KeyError:
                # handle UNK or dropout
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
        else:
            feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    feats_input = pc.concatenate(feat_vecs)

    # BiLSTM forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    frnn_outputs = []
    for c in lemma_char_vecs:
        s = s.add_input(c)
        frnn_outputs.append(s.output())

    # BiLSTM backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    rrnn_outputs = []
    for c in reversed(lemma_char_vecs):
        s = s.add_input(c)
        rrnn_outputs.append(s.output())

    # BiLTSM outputs
    blstm_outputs = []
    lemma_char_vecs_len = len(lemma_char_vecs)
    for i in xrange(lemma_char_vecs_len):
        blstm_outputs.append(pc.concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]]))

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    loss = []

    # i is input index, j is output index
    i = 0
    j = 0

    # go through alignments, progress j when new output is introduced, progress i when new char is seen on lemma (no ~)
    # TODO: try sutskever flip trick?
    # TODO: attention on the lemma chars/feats could help here?
    aligned_lemma, aligned_word = aligned_pair
    aligned_lemma += END_WORD
    aligned_word += END_WORD

    # run through the alignments
    for index, (input_char, output_char) in enumerate(zip(aligned_lemma, aligned_word)):
        possible_outputs = []

        # feedback, i, j, blstm[i], feats
        decoder_input = pc.concatenate([prev_output_vec,
                                        prev_char_vec,
                                        # char_lookup[alphabet_index[str(i)]],
                                        # char_lookup[alphabet_index[str(j)]],
                                        blstm_outputs[i],
                                        feats_input])

        # if reached the end word symbol
        if output_char == END_WORD:
            s = s.add_input(decoder_input)
            decoder_rnn_output = s.output()
            probs = pc.softmax(R * decoder_rnn_output + bias)

            # compute local loss
            loss.append(-pc.log(pc.pick(probs, alphabet_index[END_WORD])))
            continue

        # if there is no prefix, step
        if padded_lemma[i] == BEGIN_WORD and aligned_lemma[index] != ALIGN_SYMBOL:
            # perform rnn step
            # feedback, i, j, blstm[i], feats
            s = s.add_input(decoder_input)
            decoder_rnn_output = s.output()
            probs = pc.softmax(R * decoder_rnn_output + bias)

            # compute local loss
            loss.append(-pc.log(pc.pick(probs, alphabet_index[STEP])))

            # prepare for the next iteration - "feedback"
            prev_output_vec = char_lookup[alphabet_index[STEP]]
            prev_char_vec = char_lookup[alphabet_index[EPSILON]]
            i += 1

        # if there is new output
        if aligned_word[index] != ALIGN_SYMBOL:
            decoder_input = pc.concatenate([prev_output_vec,
                                            prev_char_vec,
                                            # char_lookup[alphabet_index[str(i)]],
                                            # char_lookup[alphabet_index[str(j)]],
                                            blstm_outputs[i],
                                            feats_input])

            # copy i action - maybe model as a single action?
            if padded_lemma[i] == aligned_word[j]:
                possible_outputs.append(str(i))
                possible_outputs.append(padded_lemma[i])
            else:
                possible_outputs.append(aligned_word[index])

            # perform rnn step
            s = s.add_input(decoder_input)
            decoder_rnn_output = s.output()
            probs = pc.softmax(R * decoder_rnn_output + bias)

            local_loss = pc.scalarInput(0)
            max_output_loss = -pc.log(pc.pick(probs, alphabet_index[possible_outputs[0]]))
            max_likelihood_output = possible_outputs[0]

            # sum over all correct output possibilities and pick feedback output to be the one with the highest
            # probability
            for output in possible_outputs:
                neg_log_likelihood = -pc.log(pc.pick(probs, alphabet_index[output]))
                if neg_log_likelihood < max_output_loss:
                    max_likelihood_output = output
                    max_output_loss = neg_log_likelihood

                local_loss += neg_log_likelihood
            loss.append(local_loss)

            # prepare for the next iteration - "feedback"
            prev_output_vec = char_lookup[alphabet_index[max_likelihood_output]]
            prev_char_vec = char_lookup[alphabet_index[aligned_word[index]]]
            j += 1

        # now check if it's time to progress on input
        if i < len(padded_lemma) - 1 and aligned_lemma[index + 1] != ALIGN_SYMBOL:
            # perform rnn step
            # feedback, i, j, blstm[i], feats
            decoder_input = pc.concatenate([prev_output_vec,
                                            prev_char_vec,
                                            # char_lookup[alphabet_index[str(i)]],
                                            # char_lookup[alphabet_index[str(j)]],
                                            blstm_outputs[i],
                                            feats_input])

            s = s.add_input(decoder_input)
            decoder_rnn_output = s.output()
            probs = pc.softmax(R * decoder_rnn_output + bias)

            # compute local loss
            loss.append(-pc.log(pc.pick(probs, alphabet_index[STEP])))

            # prepare for the next iteration - "feedback"
            prev_output_vec = char_lookup[alphabet_index[STEP]]
            prev_char_vec = char_lookup[alphabet_index[EPSILON]]
            i += 1

    # TODO: maybe here a "special" loss function is appropriate?
    # loss = esum(loss)
    loss = pc.average(loss)

    return loss
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index,
                            inverse_alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])

    # convert characters to matching embeddings, if UNK handle properly
    padded_lemma = BEGIN_WORD + lemma + END_WORD
    lemma_char_vecs = []
    for char in padded_lemma:
        try:
            lemma_char_vecs.append(char_lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            lemma_char_vecs.append(char_lookup[alphabet_index[UNK]])

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = []
    for feat in sorted(feature_types):
        # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
        # if this feature has a value, take it from the lookup. otherwise use UNK
        if feat in feats:
            feat_str = feat + ':' + feats[feat]
            try:
                feat_vecs.append(feat_lookup[feat_index[feat_str]])
            except KeyError:
                # handle UNK or dropout
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
        else:
            feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    feats_input = pc.concatenate(feat_vecs)

    # BiLSTM forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    frnn_outputs = []
    for c in lemma_char_vecs:
        s = s.add_input(c)
        frnn_outputs.append(s.output())

    # BiLSTM backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    rrnn_outputs = []
    for c in reversed(lemma_char_vecs):
        s = s.add_input(c)
        rrnn_outputs.append(s.output())

    # BiLTSM outputs
    blstm_outputs = []
    lemma_char_vecs_len = len(lemma_char_vecs)
    for i in xrange(lemma_char_vecs_len):
        blstm_outputs.append(pc.concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]]))

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]]

    # i is input index, j is output index
    i = j = 0
    num_outputs = 0
    predicted_output_sequence = []

    # run the decoder through the sequence and predict characters, twice max prediction as step outputs are added
    while num_outputs < MAX_PREDICTION_LEN * 3:

        # prepare input vector and perform LSTM step
        decoder_input = pc.concatenate([prev_output_vec,
                                        prev_char_vec,
                                        # char_lookup[alphabet_index[str(i)]],
                                        # char_lookup[alphabet_index[str(j)]],
                                        blstm_outputs[i],
                                        feats_input])

        s = s.add_input(decoder_input)

        # compute softmax probs vector and predict with argmax
        decoder_rnn_output = s.output()
        probs = pc.softmax(R * decoder_rnn_output + bias)
        probs = probs.vec_value()
        predicted_output_index = common.argmax(probs)
        predicted_output = inverse_alphabet_index[predicted_output_index]
        predicted_output_sequence.append(predicted_output)

        # check if step or char output to promote i or j.
        if predicted_output == STEP:
            prev_char_vec = char_lookup[alphabet_index[EPSILON]]
            if i < len(padded_lemma) - 1:
                i += 1
        else:
            if predicted_output.isdigit():
                # handle copy
                # try:
                #     prev_char_vec = char_lookup[alphabet_index[padded_lemma[i]]]
                # except KeyError:
                #     prev_char_vec = char_lookup[alphabet_index[UNK]]
                try:
                    # this way END_WORD cannot be copied (as it is in the training stage)
                    if i < len(lemma) + 1:
                        prev_char_vec = char_lookup[alphabet_index[padded_lemma[i]]]
                    else:
                        # if trying to copy from a non-existent index, pad with last lemma character
                        prev_char_vec = char_lookup[alphabet_index[lemma[-1]]]
                except KeyError:
                    prev_char_vec = char_lookup[alphabet_index[UNK]]
            else:
                # handle char
                prev_char_vec = char_lookup[predicted_output_index]

            j += 1

        num_outputs += 1
        # check if reached end of word
        if predicted_output_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[predicted_output_index]

    # remove the end word symbol
    return predicted_output_sequence[0:-1]
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index,
                 feature_types, alignment):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])
    W_c = pc.parameter(model["W_c"])
    W__a = pc.parameter(model["W__a"])
    U__a = pc.parameter(model["U__a"])
    v__a = pc.parameter(model["v__a"])

    template = task1_ms2s.generate_template_from_alignment(alignment)

    blstm_outputs = task1_attention_implementation.encode_feats_and_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, feat_index,
                                           feat_lookup, feats, feature_types, lemma)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]]

    loss = []
    padded_word = word + END_WORD
    padded_template = template + [END_WORD]

    # run the decoder through the output sequence and aggregate loss
    for i, output_char in enumerate(padded_word):

        # find all possible actions - copy from index, output specific character etc.
        possible_outputs = list(set([padded_template[i]]))# + [output_char]))

        # get current h of the decoder
        s = s.add_input(pc.concatenate([prev_output_vec, prev_char_vec]))
        decoder_rnn_output = s.output()

        attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output,
                                                                                   W_c, v__a, W__a, U__a)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias

        # choose which feedback based on minimum neg. log likelihood: initialize with the character loss
        min_neg_log_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char])
        prev_output_char = output_char
        prev_output_action = output_char
        for output in possible_outputs:
            current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output])

            # append the loss of all options
            loss.append(current_loss)
            if current_loss < min_neg_log_loss:
                min_neg_log_loss = current_loss
                prev_output_action = output

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[alphabet_index[prev_output_action]]
        prev_char_vec = char_lookup[alphabet_index[prev_output_char]]

    total_sequence_loss = pc.esum(loss)
    # loss = average(loss)

    return total_sequence_loss
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn,
                            lemma, feats, alphabet_index,
                            inverse_alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])
    W_c = pc.parameter(model["W_c"])
    W__a = pc.parameter(model["W__a"])
    U__a = pc.parameter(model["U__a"])
    v__a = pc.parameter(model["v__a"])

    # encode the lemma
    blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn,
                                 encoder_rrnn, lemma)

    # convert features to matching embeddings, if UNK handle properly
    feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    i = 0
    predicted_sequence = []

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN:

        # get current h of the decoder
        s = s.add_input(pc.concatenate([prev_output_vec, feats_input]))
        decoder_rnn_output = s.output()

        # perform attention step
        attention_output_vector, alphas, W = task1_attention_implementation.attend(
            blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias

        # find best candidate output
        probs = pc.softmax(readout)
        next_char_index = common.argmax(probs.vec_value())
        predicted_sequence.append(inverse_alphabet_index[next_char_index])

        # check if reached end of word
        if predicted_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[next_char_index]
        i += 1

    # remove the end word symbol
    return predicted_sequence[0:-1]
Esempio n. 30
0
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats,
                 word, alphabet_index, feat_index, feature_types, alignment):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])
    W_c = pc.parameter(model["W_c"])
    W__a = pc.parameter(model["W__a"])
    U__a = pc.parameter(model["U__a"])
    v__a = pc.parameter(model["v__a"])

    template = task1_ms2s.generate_template_from_alignment(alignment)

    blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn,
                                 encoder_rrnn, lemma)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # convert features to matching embeddings, if UNK handle properly
    feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types)

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]]

    loss = []
    padded_word = word + END_WORD
    padded_template = template + [END_WORD]

    # run the decoder through the output sequence and aggregate loss
    for i, output_char in enumerate(padded_word):

        # find all possible actions - copy from index, output specific character etc.
        possible_outputs = list(set([padded_template[i]] + [output_char]))

        # get current h of the decoder
        s = s.add_input(
            pc.concatenate([prev_output_vec, prev_char_vec, feats_input]))
        decoder_rnn_output = s.output()

        attention_output_vector, alphas, W = task1_attention_implementation.attend(
            blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias

        # choose which feedback based on minimum neg. log likelihood: initialize with the character loss
        min_neg_log_loss = pc.pickneglogsoftmax(readout,
                                                alphabet_index[output_char])
        prev_output_char = output_char
        prev_output_action = output_char
        for output in possible_outputs:
            current_loss = pc.pickneglogsoftmax(readout,
                                                alphabet_index[output])

            # append the loss of all options
            loss.append(current_loss)
            if current_loss < min_neg_log_loss:
                min_neg_log_loss = current_loss
                prev_output_action = output

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[alphabet_index[prev_output_action]]
        prev_char_vec = char_lookup[alphabet_index[prev_output_char]]

    total_sequence_loss = pc.esum(loss)
    # loss = average(loss)

    return total_sequence_loss
def one_word_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, aligned_pair,
                  feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])

    padded_lemma = BEGIN_WORD + lemma + END_WORD

    # convert characters to matching embeddings
    lemma_char_vecs = []
    for char in padded_lemma:
        try:
            lemma_char_vecs.append(char_lookup[alphabet_index[char]])
        except KeyError:
            # handle UNK
            lemma_char_vecs.append(char_lookup[alphabet_index[UNK]])

    # convert features to matching embeddings, if UNK handle properly
    feat_vecs = []
    for feat in sorted(feature_types):
        # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well?
        # if this feature has a value, take it from the lookup. otherwise use UNK
        if feat in feats:
            feat_str = feat + ':' + feats[feat]
            try:
                feat_vecs.append(feat_lookup[feat_index[feat_str]])
            except KeyError:
                # handle UNK or dropout
                feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
        else:
            feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]])
    feats_input = pc.concatenate(feat_vecs)

    # BiLSTM forward pass
    s_0 = encoder_frnn.initial_state()
    s = s_0
    frnn_outputs = []
    for c in lemma_char_vecs:
        s = s.add_input(c)
        frnn_outputs.append(s.output())

    # BiLSTM backward pass
    s_0 = encoder_rrnn.initial_state()
    s = s_0
    rrnn_outputs = []
    for c in reversed(lemma_char_vecs):
        s = s.add_input(c)
        rrnn_outputs.append(s.output())

    # BiLTSM outputs
    blstm_outputs = []
    lemma_char_vecs_len = len(lemma_char_vecs)
    for i in xrange(lemma_char_vecs_len):
        blstm_outputs.append(pc.concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]]))

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    loss = []

    # i is input index, j is output index
    i = 0
    j = 0

    # go through alignments, progress j when new output is introduced, progress i when new char is seen on lemma (no ~)
    # TODO: try sutskever flip trick?
    # TODO: attention on the lemma chars/feats could help here?
    aligned_lemma, aligned_word = aligned_pair
    aligned_lemma += END_WORD
    aligned_word += END_WORD

    # run through the alignments
    for index, (input_char, output_char) in enumerate(zip(aligned_lemma, aligned_word)):
        possible_outputs = []

        # feedback, i, j, blstm[i], feats
        decoder_input = pc.concatenate([prev_output_vec,
                                        prev_char_vec,
                                        # char_lookup[alphabet_index[str(i)]],
                                        # char_lookup[alphabet_index[str(j)]],
                                        blstm_outputs[i],
                                        feats_input])

        # if reached the end word symbol
        if output_char == END_WORD:
            s = s.add_input(decoder_input)
            decoder_rnn_output = s.output()
            probs = pc.softmax(R * decoder_rnn_output + bias)

            # compute local loss
            loss.append(-pc.log(pc.pick(probs, alphabet_index[END_WORD])))
            continue

        # if there is no prefix, step
        if padded_lemma[i] == BEGIN_WORD and aligned_lemma[index] != ALIGN_SYMBOL:
            # perform rnn step
            # feedback, i, j, blstm[i], feats
            s = s.add_input(decoder_input)
            decoder_rnn_output = s.output()
            probs = pc.softmax(R * decoder_rnn_output + bias)

            # compute local loss
            loss.append(-pc.log(pc.pick(probs, alphabet_index[STEP])))

            # prepare for the next iteration - "feedback"
            prev_output_vec = char_lookup[alphabet_index[STEP]]
            prev_char_vec = char_lookup[alphabet_index[EPSILON]]
            i += 1

        # if there is new output
        if aligned_word[index] != ALIGN_SYMBOL:
            decoder_input = pc.concatenate([prev_output_vec,
                                            prev_char_vec,
                                            # char_lookup[alphabet_index[str(i)]],
                                            # char_lookup[alphabet_index[str(j)]],
                                            blstm_outputs[i],
                                            feats_input])

            # copy i action - maybe model as a single action?
            if padded_lemma[i] == aligned_word[j]:
                possible_outputs.append(str(i))
                possible_outputs.append(padded_lemma[i])
            else:
                possible_outputs.append(aligned_word[index])

            # perform rnn step
            s = s.add_input(decoder_input)
            decoder_rnn_output = s.output()
            probs = pc.softmax(R * decoder_rnn_output + bias)

            local_loss = pc.scalarInput(0)
            max_output_loss = -pc.log(pc.pick(probs, alphabet_index[possible_outputs[0]]))
            max_likelihood_output = possible_outputs[0]

            # sum over all correct output possibilities and pick feedback output to be the one with the highest
            # probability
            for output in possible_outputs:
                neg_log_likelihood = -pc.log(pc.pick(probs, alphabet_index[output]))
                if neg_log_likelihood < max_output_loss:
                    max_likelihood_output = output
                    max_output_loss = neg_log_likelihood

                local_loss += neg_log_likelihood
            loss.append(local_loss)

            # prepare for the next iteration - "feedback"
            prev_output_vec = char_lookup[alphabet_index[max_likelihood_output]]
            prev_char_vec = char_lookup[alphabet_index[aligned_word[index]]]
            j += 1

        # now check if it's time to progress on input
        if i < len(padded_lemma) - 1 and aligned_lemma[index + 1] != ALIGN_SYMBOL:
            # perform rnn step
            # feedback, i, j, blstm[i], feats
            decoder_input = pc.concatenate([prev_output_vec,
                                            prev_char_vec,
                                            # char_lookup[alphabet_index[str(i)]],
                                            # char_lookup[alphabet_index[str(j)]],
                                            blstm_outputs[i],
                                            feats_input])

            s = s.add_input(decoder_input)
            decoder_rnn_output = s.output()
            probs = pc.softmax(R * decoder_rnn_output + bias)

            # compute local loss
            loss.append(-pc.log(pc.pick(probs, alphabet_index[STEP])))

            # prepare for the next iteration - "feedback"
            prev_output_vec = char_lookup[alphabet_index[STEP]]
            prev_char_vec = char_lookup[alphabet_index[EPSILON]]
            i += 1

    # TODO: maybe here a "special" loss function is appropriate?
    # loss = esum(loss)
    loss = pc.average(loss)

    return loss
Esempio n. 32
0
 def _get_word_vector(self, word, use_dropout=False):
     word_embedding = self._get_word_embedding(word, use_dropout)
     char_representation = self._get_char_representation(word, use_dropout)
     return concatenate([word_embedding, char_representation])
Esempio n. 33
0
 def _get_word_vector(self, word, use_dropout=False):
     word_embedding = self._get_word_embedding(word, use_dropout)
     char_representation = self._get_char_representation(word, use_dropout)
     return concatenate([word_embedding, char_representation])