def fit(words, tags, labels, model, builders): """ compute joint error of the :param words: list of indices :param tags: list of indices :param labels: index :param model: current model to access parameters :param builders: builder to create state combinations :return: joint error """ # retrieve model parameters if MLP: H = pycnn.parameter(pH) O = pycnn.parameter(pO) else: O = pycnn.parameter(pO) errs = [] for (forward_state, backward_state), tag in zip(build_tagging_graph(words, model, builders), tags): f_b = pycnn.concatenate([forward_state, backward_state]) if MLP: # TODO: add bias terms r_t = O * (pycnn.tanh(H * f_b)) else: r_t = O * f_b err = pycnn.pickneglogsoftmax(r_t, tag) errs.append(err) return pycnn.esum(errs)
def calc_sentence_error(self, sentence): word_expression_list = self._build_word_expression_list(sentence, is_train=True) sentence_errors = [] for word, word_expression in zip(sentence, word_expression_list): gold_label_index = self.tag_indexer.get_index(word.gold_label) word_error = pickneglogsoftmax(word_expression, gold_label_index) sentence_errors.append(word_error) return esum(sentence_errors)
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # convert features to matching embeddings, if UNK handle properly feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types) # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] loss = [] padded_word = word + END_WORD # run the decoder through the output sequence and aggregate loss for i, output_char in enumerate(padded_word): # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, feats_input])) decoder_rnn_output = s.output() attention_output_vector, alphas, W = task1_attention_implementation.attend( blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char]) # print 'computed readout layer' loss.append(current_loss) # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[output_char]] total_sequence_loss = pc.esum(loss) # loss = average(loss) return total_sequence_loss
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # convert features to matching embeddings, if UNK handle properly feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types) # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] loss = [] padded_word = word + END_WORD # run the decoder through the output sequence and aggregate loss for i, output_char in enumerate(padded_word): # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, feats_input])) decoder_rnn_output = s.output() attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char]) # print 'computed readout layer' loss.append(current_loss) # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[output_char]] total_sequence_loss = pc.esum(loss) # loss = average(loss) return total_sequence_loss
def calc_sentence_error(self, sentence): renew_cg() for word in sentence: # word.vector = noise(self._get_word_vector(word), 0.1) word.vector = self._get_word_vector(word, use_dropout=True) sentence_expressions = self._build_sentence_expressions(sentence) sentence_errors = [] for word, word_expression in zip(sentence, sentence_expressions): gold_label_index = self.tag_indexer.get_index(word.gold_label) word_error = pickneglogsoftmax(word_expression, gold_label_index) sentence_errors.append(word_error) return esum(sentence_errors)
def fit(word_indices, label, model, builder, target): """ compute joint error of the :param word_indices: list of indices :param label: index :param model: current model to access parameters :param builder: builder to create state combinations :return: joint error """ forward_states = build_tagging_graph(word_indices, model, builder) # retrieve model parameters final_state = forward_states[-1] # final_state = pycnn.dropout(final_state, 0.1) # print("final state", final_state, file=sys.stderr) H = pycnn.parameter(pH) bias_H = pycnn.parameter(biasH) H2 = pycnn.parameter(pH2) bias_H2 = pycnn.parameter(biasH2) if target in ['age', 'joint']: O = pycnn.parameter(pOutAge) bias_O = pycnn.parameter(biasOutAge) elif target == 'gender': O = pycnn.parameter(pOutGender) bias_O = pycnn.parameter(biasOutGender) # print(pycnn.cg().PrintGraphviz()) # if target == 'both': # hidden = bias_H + pycnn.tanh(H * final_state) # r_age = bias_O + (O * hidden) # r_gender = bias_O2 + (O2 * hidden) # return pycnn.esum([pycnn.pickneglogsoftmax(r_age, label[0]), pycnn.pickneglogsoftmax(r_gender, label[1])]) # r_t = bias_O + (O * (bias_H2 + pycnn.tanh(H2 * (bias_H + pycnn.tanh(H * final_state))))) r_t = bias_O + (O * (bias_H + (H * final_state))) # return pycnn.pick(r_t, label) return pycnn.pickneglogsoftmax(r_t, label)
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index, feature_types, alignment): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) template = task1_ms2s.generate_template_from_alignment(alignment) blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # convert features to matching embeddings, if UNK handle properly feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types) # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]] loss = [] padded_word = word + END_WORD padded_template = template + [END_WORD] # run the decoder through the output sequence and aggregate loss for i, output_char in enumerate(padded_word): # find all possible actions - copy from index, output specific character etc. possible_outputs = list(set([padded_template[i]] + [output_char])) # get current h of the decoder s = s.add_input( pc.concatenate([prev_output_vec, prev_char_vec, feats_input])) decoder_rnn_output = s.output() attention_output_vector, alphas, W = task1_attention_implementation.attend( blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias # choose which feedback based on minimum neg. log likelihood: initialize with the character loss min_neg_log_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char]) prev_output_char = output_char prev_output_action = output_char for output in possible_outputs: current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output]) # append the loss of all options loss.append(current_loss) if current_loss < min_neg_log_loss: min_neg_log_loss = current_loss prev_output_action = output # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[prev_output_action]] prev_char_vec = char_lookup[alphabet_index[prev_output_char]] total_sequence_loss = pc.esum(loss) # loss = average(loss) return total_sequence_loss
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index, feature_types, alignment): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) template = task1_ms2s.generate_template_from_alignment(alignment) blstm_outputs = task1_attention_implementation.encode_feats_and_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, feat_index, feat_lookup, feats, feature_types, lemma) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]] loss = [] padded_word = word + END_WORD padded_template = template + [END_WORD] # run the decoder through the output sequence and aggregate loss for i, output_char in enumerate(padded_word): # find all possible actions - copy from index, output specific character etc. possible_outputs = list(set([padded_template[i]]))# + [output_char])) # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, prev_char_vec])) decoder_rnn_output = s.output() attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias # choose which feedback based on minimum neg. log likelihood: initialize with the character loss min_neg_log_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char]) prev_output_char = output_char prev_output_action = output_char for output in possible_outputs: current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output]) # append the loss of all options loss.append(current_loss) if current_loss < min_neg_log_loss: min_neg_log_loss = current_loss prev_output_action = output # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[prev_output_action]] prev_char_vec = char_lookup[alphabet_index[prev_output_char]] total_sequence_loss = pc.esum(loss) # loss = average(loss) return total_sequence_loss