def do_cpu(): C.renew_cg() W = C.parameter(cpW) W = W*W*W*W*W*W*W z = C.squared_distance(W,W) z.value() z.backward()
def _build_word_expression_list(self, sentence, is_train=False): renew_cg() sentence_word_vectors = [] for word in sentence: sentence_word_vectors.append(self._get_word_vector(word, use_dropout=is_train)) lstm_forward = self.word_builders[0].initial_state() lstm_backward = self.word_builders[1].initial_state() embeddings_forward = [] embeddings_backward = [] for word_vector, reverse_word_vector in zip(sentence_word_vectors, reversed(sentence_word_vectors)): lstm_forward = lstm_forward.add_input(word_vector) lstm_backward = lstm_backward.add_input(reverse_word_vector) embeddings_forward.append(lstm_forward.output()) embeddings_backward.append(lstm_backward.output()) O = parameter(self.param_out) sentence_word_expressions = [] for word_f_embedding, word_b_embedding in zip(embeddings_forward, reversed(embeddings_backward)): word_concat_embedding = concatenate([word_f_embedding, word_b_embedding]) word_expression = O * word_concat_embedding sentence_word_expressions.append(word_expression) return sentence_word_expressions
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) # encode the lemma blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma) # convert features to matching embeddings, if UNK handle properly feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] i = 0 predicted_sequence = [] # run the decoder through the sequence and predict characters while i < MAX_PREDICTION_LEN: # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, feats_input])) decoder_rnn_output = s.output() # perform attention step attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias # find best candidate output probs = pc.softmax(readout) next_char_index = common.argmax(probs.vec_value()) predicted_sequence.append(inverse_alphabet_index[next_char_index]) # check if reached end of word if predicted_sequence[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[next_char_index] i += 1 # remove the end word symbol return predicted_sequence[0:-1]
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # convert features to matching embeddings, if UNK handle properly feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types) # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] loss = [] padded_word = word + END_WORD # run the decoder through the output sequence and aggregate loss for i, output_char in enumerate(padded_word): # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, feats_input])) decoder_rnn_output = s.output() attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char]) # print 'computed readout layer' loss.append(current_loss) # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[output_char]] total_sequence_loss = pc.esum(loss) # loss = average(loss) return total_sequence_loss
def tag_sentence(self, sentence): renew_cg() for word in sentence: word.vector = self._get_word_vector(word, use_dropout=False) sentence_expressions = self._build_sentence_expressions(sentence) for word, word_expression in zip(sentence, sentence_expressions): out = softmax(word_expression) tag_index = np.argmax(out.npvalue()) word.tag = self.tag_indexer.get_object(tag_index)
def predict(self, word_indices, char_indices, task_id, train=False): """ predict tags for a sentence represented as char+word embeddings """ pycnn.renew_cg() # new graph char_emb = [] rev_char_emb = [] # get representation for words for chars_of_token in char_indices: # use last state as word representation last_state = self.char_rnn.predict_sequence([self.cembeds[c] for c in chars_of_token])[-1] rev_last_state = self.char_rnn.predict_sequence([self.cembeds[c] for c in reversed(chars_of_token)])[-1] char_emb.append(last_state) rev_char_emb.append(rev_last_state) wfeatures = [self.wembeds[w] for w in word_indices] features = [pycnn.concatenate([w,c,rev_c]) for w,c,rev_c in zip(wfeatures,char_emb,reversed(rev_char_emb))] if train: # only do at training time features = [pycnn.noise(fe,self.noise_sigma) for fe in features] output_expected_at_layer = self.predictors["task_expected_at"][task_id] output_expected_at_layer -=1 # go through layers # input is now combination of w + char emb prev = features num_layers = self.h_layers # for i in range(0,num_layers-1): for i in range(0,num_layers): predictor = self.predictors["inner"][i] forward_sequence, backward_sequence = predictor.predict_sequence(prev) if i > 0 and self.activation: # activation between LSTM layers forward_sequence = [self.activation(s) for s in forward_sequence] backward_sequence = [self.activation(s) for s in backward_sequence] if i == output_expected_at_layer: output_predictor = self.predictors["output_layers_dict"][task_id] concat_layer = [pycnn.concatenate([f, b]) for f, b in zip(forward_sequence,reversed(backward_sequence))] if train and self.noise_sigma > 0.0: concat_layer = [pycnn.noise(fe,self.noise_sigma) for fe in concat_layer] output = output_predictor.predict_sequence(concat_layer) return output prev = forward_sequence prev_rev = backward_sequence # not used raise "oops should not be here" return None
def calc_sentence_error(self, sentence): renew_cg() for word in sentence: # word.vector = noise(self._get_word_vector(word), 0.1) word.vector = self._get_word_vector(word, use_dropout=True) sentence_expressions = self._build_sentence_expressions(sentence) sentence_errors = [] for word, word_expression in zip(sentence, sentence_expressions): gold_label_index = self.tag_indexer.get_index(word.gold_label) word_error = pickneglogsoftmax(word_expression, gold_label_index) sentence_errors.append(word_error) return esum(sentence_errors)
def build_tagging_graph(word_indices, model, builder): """ build the computational graph :param word_indices: list of indices :param model: current model to access parameters :param builder: builder to create state combinations :return: forward and backward sequence """ pycnn.renew_cg() f_init = builder.initial_state() # retrieve embeddings from the model and add noise word_embeddings = [pycnn.lookup(model["word_lookup"], w) for w in word_indices] word_embeddings = [pycnn.noise(we, args.noise) for we in word_embeddings] # compute the expressions for the forward pass forward_sequence = [x.output() for x in f_init.add_inputs(word_embeddings)] return forward_sequence
def build_tagging_graph(words, model, builders): """ build the computational graph :param words: list of indices :param model: current model to access parameters :param builders: builder to create state combinations :return: forward and backward sequence """ pycnn.renew_cg() f_init, b_init = [b.initial_state() for b in builders] # retrieve embeddings from the model and add noise word_embeddings = [pycnn.lookup(model["lookup"], w) for w in words] word_embeddings = [pycnn.noise(we, 0.1) for we in word_embeddings] # compute the expressions for the forward and backward pass forward_sequence = [x.output() for x in f_init.add_inputs(word_embeddings)] backward_sequence = [x.output() for x in b_init.add_inputs(reversed(word_embeddings))] return list(zip(forward_sequence, reversed(backward_sequence)))
def predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, alphabet_index, inverse_alphabet_index, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) # convert characters to matching embeddings, if UNK handle properly padded_lemma = BEGIN_WORD + lemma + END_WORD lemma_char_vecs = [] for char in padded_lemma: try: lemma_char_vecs.append(char_lookup[alphabet_index[char]]) except KeyError: # handle UNK lemma_char_vecs.append(char_lookup[alphabet_index[UNK]]) # convert features to matching embeddings, if UNK handle properly feat_vecs = [] for feat in sorted(feature_types): # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well? # if this feature has a value, take it from the lookup. otherwise use UNK if feat in feats: feat_str = feat + ':' + feats[feat] try: feat_vecs.append(feat_lookup[feat_index[feat_str]]) except KeyError: # handle UNK or dropout feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) else: feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) feats_input = pc.concatenate(feat_vecs) # BiLSTM forward pass s_0 = encoder_frnn.initial_state() s = s_0 frnn_outputs = [] for c in lemma_char_vecs: s = s.add_input(c) frnn_outputs.append(s.output()) # BiLSTM backward pass s_0 = encoder_rrnn.initial_state() s = s_0 rrnn_outputs = [] for c in reversed(lemma_char_vecs): s = s.add_input(c) rrnn_outputs.append(s.output()) # BiLTSM outputs blstm_outputs = [] lemma_char_vecs_len = len(lemma_char_vecs) for i in xrange(lemma_char_vecs_len): blstm_outputs.append(pc.concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]])) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]] # i is input index, j is output index i = j = 0 num_outputs = 0 predicted_output_sequence = [] # run the decoder through the sequence and predict characters, twice max prediction as step outputs are added while num_outputs < MAX_PREDICTION_LEN * 3: # prepare input vector and perform LSTM step decoder_input = pc.concatenate([prev_output_vec, prev_char_vec, # char_lookup[alphabet_index[str(i)]], # char_lookup[alphabet_index[str(j)]], blstm_outputs[i], feats_input]) s = s.add_input(decoder_input) # compute softmax probs vector and predict with argmax decoder_rnn_output = s.output() probs = pc.softmax(R * decoder_rnn_output + bias) probs = probs.vec_value() predicted_output_index = common.argmax(probs) predicted_output = inverse_alphabet_index[predicted_output_index] predicted_output_sequence.append(predicted_output) # check if step or char output to promote i or j. if predicted_output == STEP: prev_char_vec = char_lookup[alphabet_index[EPSILON]] if i < len(padded_lemma) - 1: i += 1 else: if predicted_output.isdigit(): # handle copy # try: # prev_char_vec = char_lookup[alphabet_index[padded_lemma[i]]] # except KeyError: # prev_char_vec = char_lookup[alphabet_index[UNK]] try: # this way END_WORD cannot be copied (as it is in the training stage) if i < len(lemma) + 1: prev_char_vec = char_lookup[alphabet_index[padded_lemma[i]]] else: # if trying to copy from a non-existent index, pad with last lemma character prev_char_vec = char_lookup[alphabet_index[lemma[-1]]] except KeyError: prev_char_vec = char_lookup[alphabet_index[UNK]] else: # handle char prev_char_vec = char_lookup[predicted_output_index] j += 1 num_outputs += 1 # check if reached end of word if predicted_output_sequence[-1] == END_WORD: break # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[predicted_output_index] # remove the end word symbol return predicted_output_sequence[0:-1]
def one_word_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, aligned_pair, feat_index, feature_types): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) padded_lemma = BEGIN_WORD + lemma + END_WORD # convert characters to matching embeddings lemma_char_vecs = [] for char in padded_lemma: try: lemma_char_vecs.append(char_lookup[alphabet_index[char]]) except KeyError: # handle UNK lemma_char_vecs.append(char_lookup[alphabet_index[UNK]]) # convert features to matching embeddings, if UNK handle properly feat_vecs = [] for feat in sorted(feature_types): # TODO: is it OK to use same UNK for all feature types? and for unseen feats as well? # if this feature has a value, take it from the lookup. otherwise use UNK if feat in feats: feat_str = feat + ':' + feats[feat] try: feat_vecs.append(feat_lookup[feat_index[feat_str]]) except KeyError: # handle UNK or dropout feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) else: feat_vecs.append(feat_lookup[feat_index[UNK_FEAT]]) feats_input = pc.concatenate(feat_vecs) # BiLSTM forward pass s_0 = encoder_frnn.initial_state() s = s_0 frnn_outputs = [] for c in lemma_char_vecs: s = s.add_input(c) frnn_outputs.append(s.output()) # BiLSTM backward pass s_0 = encoder_rrnn.initial_state() s = s_0 rrnn_outputs = [] for c in reversed(lemma_char_vecs): s = s.add_input(c) rrnn_outputs.append(s.output()) # BiLTSM outputs blstm_outputs = [] lemma_char_vecs_len = len(lemma_char_vecs) for i in xrange(lemma_char_vecs_len): blstm_outputs.append(pc.concatenate([frnn_outputs[i], rrnn_outputs[lemma_char_vecs_len - i - 1]])) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]] loss = [] # i is input index, j is output index i = 0 j = 0 # go through alignments, progress j when new output is introduced, progress i when new char is seen on lemma (no ~) # TODO: try sutskever flip trick? # TODO: attention on the lemma chars/feats could help here? aligned_lemma, aligned_word = aligned_pair aligned_lemma += END_WORD aligned_word += END_WORD # run through the alignments for index, (input_char, output_char) in enumerate(zip(aligned_lemma, aligned_word)): possible_outputs = [] # feedback, i, j, blstm[i], feats decoder_input = pc.concatenate([prev_output_vec, prev_char_vec, # char_lookup[alphabet_index[str(i)]], # char_lookup[alphabet_index[str(j)]], blstm_outputs[i], feats_input]) # if reached the end word symbol if output_char == END_WORD: s = s.add_input(decoder_input) decoder_rnn_output = s.output() probs = pc.softmax(R * decoder_rnn_output + bias) # compute local loss loss.append(-pc.log(pc.pick(probs, alphabet_index[END_WORD]))) continue # if there is no prefix, step if padded_lemma[i] == BEGIN_WORD and aligned_lemma[index] != ALIGN_SYMBOL: # perform rnn step # feedback, i, j, blstm[i], feats s = s.add_input(decoder_input) decoder_rnn_output = s.output() probs = pc.softmax(R * decoder_rnn_output + bias) # compute local loss loss.append(-pc.log(pc.pick(probs, alphabet_index[STEP]))) # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[STEP]] prev_char_vec = char_lookup[alphabet_index[EPSILON]] i += 1 # if there is new output if aligned_word[index] != ALIGN_SYMBOL: decoder_input = pc.concatenate([prev_output_vec, prev_char_vec, # char_lookup[alphabet_index[str(i)]], # char_lookup[alphabet_index[str(j)]], blstm_outputs[i], feats_input]) # copy i action - maybe model as a single action? if padded_lemma[i] == aligned_word[j]: possible_outputs.append(str(i)) possible_outputs.append(padded_lemma[i]) else: possible_outputs.append(aligned_word[index]) # perform rnn step s = s.add_input(decoder_input) decoder_rnn_output = s.output() probs = pc.softmax(R * decoder_rnn_output + bias) local_loss = pc.scalarInput(0) max_output_loss = -pc.log(pc.pick(probs, alphabet_index[possible_outputs[0]])) max_likelihood_output = possible_outputs[0] # sum over all correct output possibilities and pick feedback output to be the one with the highest # probability for output in possible_outputs: neg_log_likelihood = -pc.log(pc.pick(probs, alphabet_index[output])) if neg_log_likelihood < max_output_loss: max_likelihood_output = output max_output_loss = neg_log_likelihood local_loss += neg_log_likelihood loss.append(local_loss) # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[max_likelihood_output]] prev_char_vec = char_lookup[alphabet_index[aligned_word[index]]] j += 1 # now check if it's time to progress on input if i < len(padded_lemma) - 1 and aligned_lemma[index + 1] != ALIGN_SYMBOL: # perform rnn step # feedback, i, j, blstm[i], feats decoder_input = pc.concatenate([prev_output_vec, prev_char_vec, # char_lookup[alphabet_index[str(i)]], # char_lookup[alphabet_index[str(j)]], blstm_outputs[i], feats_input]) s = s.add_input(decoder_input) decoder_rnn_output = s.output() probs = pc.softmax(R * decoder_rnn_output + bias) # compute local loss loss.append(-pc.log(pc.pick(probs, alphabet_index[STEP]))) # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[STEP]] prev_char_vec = char_lookup[alphabet_index[EPSILON]] i += 1 # TODO: maybe here a "special" loss function is appropriate? # loss = esum(loss) loss = pc.average(loss) return loss
if args.target in ['joint']: pOutAge = model.add_parameters("OUT_AGE", (num_labels, MLP_HIDDEN_LAYER_SIZE)) biasOutAge = model.add_parameters("BIAS_OUT_AGE", (num_labels)) elif args.target in ['age', 'both']: pOutAge = model.add_parameters("OUT_AGE", (len(age_labels), MLP_HIDDEN_LAYER_SIZE)) biasOutAge = model.add_parameters("BIAS_OUT_AGE", (len(age_labels))) if args.target in ['gender', 'both']: pOutGender = model.add_parameters("OUT2", (len(gender_labels), MLP_HIDDEN_LAYER_SIZE)) biasOutGender = model.add_parameters("BIAS_OUT2", (len(gender_labels))) print("declared variables", file=sys.stderr) pycnn.renew_cg() new_word_embeddings = [pycnn.lookup(model["word_lookup"], w) for w in train[0][0][0]] print(new_word_embeddings) pycnn.conv1d_narrow(new_word_embeddings, pycnn.cg()) sys.exit() def build_tagging_graph(word_indices, model, builder): """ build the computational graph :param word_indices: list of indices :param model: current model to access parameters :param builder: builder to create state combinations :return: forward and backward sequence
def get_loss(model, input_sentence, output_sentence, enc_fwd_lstm, enc_bwd_lstm, dec_lstm): pc.renew_cg() embedded = embedd_sentence(model, input_sentence) encoded = encode_sentence(model, enc_fwd_lstm, enc_bwd_lstm, embedded) return decode(model, dec_lstm, encoded, output_sentence)
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index, feature_types, alignment): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) template = task1_ms2s.generate_template_from_alignment(alignment) blstm_outputs = task1_attention_implementation.encode_feats_and_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, feat_index, feat_lookup, feats, feature_types, lemma) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]] loss = [] padded_word = word + END_WORD padded_template = template + [END_WORD] # run the decoder through the output sequence and aggregate loss for i, output_char in enumerate(padded_word): # find all possible actions - copy from index, output specific character etc. possible_outputs = list(set([padded_template[i]]))# + [output_char])) # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, prev_char_vec])) decoder_rnn_output = s.output() attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias # choose which feedback based on minimum neg. log likelihood: initialize with the character loss min_neg_log_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char]) prev_output_char = output_char prev_output_action = output_char for output in possible_outputs: current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output]) # append the loss of all options loss.append(current_loss) if current_loss < min_neg_log_loss: min_neg_log_loss = current_loss prev_output_action = output # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[prev_output_action]] prev_char_vec = char_lookup[alphabet_index[prev_output_char]] total_sequence_loss = pc.esum(loss) # loss = average(loss) return total_sequence_loss
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index, feature_types, alignment): pc.renew_cg() # read the parameters char_lookup = model["char_lookup"] feat_lookup = model["feat_lookup"] R = pc.parameter(model["R"]) bias = pc.parameter(model["bias"]) W_c = pc.parameter(model["W_c"]) W__a = pc.parameter(model["W__a"]) U__a = pc.parameter(model["U__a"]) v__a = pc.parameter(model["v__a"]) template = task1_ms2s.generate_template_from_alignment(alignment) blstm_outputs = task1_attention_implementation.encode_feats_and_chars( alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, feat_index, feat_lookup, feats, feature_types, lemma) # initialize the decoder rnn s_0 = decoder_rnn.initial_state() s = s_0 # set prev_output_vec for first lstm step as BEGIN_WORD prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]] prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]] loss = [] padded_word = word + END_WORD padded_template = template + [END_WORD] # run the decoder through the output sequence and aggregate loss for i, output_char in enumerate(padded_word): # find all possible actions - copy from index, output specific character etc. possible_outputs = list(set([padded_template[i]])) # + [output_char])) # get current h of the decoder s = s.add_input(pc.concatenate([prev_output_vec, prev_char_vec])) decoder_rnn_output = s.output() attention_output_vector, alphas, W = task1_attention_implementation.attend( blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a) # compute output probabilities # print 'computing readout layer...' readout = R * attention_output_vector + bias # choose which feedback based on minimum neg. log likelihood: initialize with the character loss min_neg_log_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char]) prev_output_char = output_char prev_output_action = output_char for output in possible_outputs: current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output]) # append the loss of all options loss.append(current_loss) if current_loss < min_neg_log_loss: min_neg_log_loss = current_loss prev_output_action = output # prepare for the next iteration - "feedback" prev_output_vec = char_lookup[alphabet_index[prev_output_action]] prev_char_vec = char_lookup[alphabet_index[prev_output_char]] total_sequence_loss = pc.esum(loss) # loss = average(loss) return total_sequence_loss