def internal_eval(batches, transducer, vocab, previous_predicted_actions, check_condition=True, name='train'): then = time.time() print('evaluating on {} data...'.format(name)) number_correct = 0. total_loss = 0. predictions = [] pred_acts = [] i = 0 # counter of samples for j, batch in enumerate(batches): dy.renew_cg() batch_loss = [] for sample in batch: feats = sample.pos, sample.feats loss, prediction, predicted_actions = transducer.transduce( sample.lemma, feats, external_cg=True) ### predictions.append(prediction) pred_acts.append(predicted_actions) batch_loss.extend(loss) # evaluation correct_prediction = False if (prediction in vocab.word and vocab.word.w2i[prediction] == sample.word): correct_prediction = True number_correct += 1 if check_condition: # display prediction for this sample if it differs the prediction # of the previous epoch or its an error if predicted_actions != previous_predicted_actions[ i] or not correct_prediction: # print( 'BEFORE: ', datasets.action2string(previous_predicted_actions[i], vocab)) print('THIS TIME: ', datasets.action2string(predicted_actions, vocab)) print('TRUE: ', sample.act_repr) print('PRED: ', prediction) print('WORD: ', sample.word_str) print('X' if correct_prediction else 'V') # increment counter of samples i += 1 batch_loss = -dy.average(batch_loss) total_loss += batch_loss.scalar_value() # report progress if j > 0 and j % 100 == 0: print('\t\t...{} batches'.format(j)) accuracy = number_correct / i print('\t...finished in {:.3f} sec'.format(time.time() - then)) return accuracy, total_loss, predictions, pred_acts
def transduce(self, lemma, feats, oracle_actions=None, external_cg=True, sampling=False, unk_avg=True, debug_mode=False): def _valid_actions(encoder): valid_actions = list(self.INSERTS) if len(encoder) > 1: valid_actions += [STEP] else: valid_actions += [END_WORD] return valid_actions if not external_cg: dy.renew_cg() if oracle_actions: # reverse to enable simple popping oracle_actions = oracle_actions[::-1] oracle_actions.pop() # Deterministic insertion of BEGIN_WORD # vectorize lemma lemma_enc = self._build_lemma(lemma, unk_avg, is_training=bool(oracle_actions)) # vectorize features features = self._build_features(*feats) # add encoder and decoder to computation graph encoder = Encoder(self.fbuffRNN, self.bbuffRNN) decoder = self.wordRNN.initial_state() # add classifier to computation graph if self.MLP_DIM: # decoder output to hidden W_s2h = dy.parameter(self.pW_s2h) b_s2h = dy.parameter(self.pb_s2h) # hidden to action W_act = dy.parameter(self.pW_act) b_act = dy.parameter(self.pb_act) # encoder is a stack which pops lemma characters # and their representations from the top encoder.transduce(lemma_enc, lemma) action_history = [BEGIN_WORD] word = [] losses = [] count = 0 if debug_mode: print() if oracle_actions: print(action2string(oracle_actions, self.vocab)) print(lemma2string(lemma, self.vocab)) while len(action_history) <= MAX_ACTION_SEQ_LEN: # what is at the top of encoder? encoder_embedding, char_enc = encoder.embedding(extra=True) if debug_mode: print('Action history: ', action_history, action2string(action_history, self.vocab)) print('Encoder length: ', len(encoder)) print('Current char: ', char_enc, lemma2string([char_enc], self.vocab)) print('Word so far: ', ''.join(word)) # decoder decoder_input = dy.concatenate([ encoder_embedding, features, self.ACT_LOOKUP[action_history[-1]] ]) decoder = decoder.add_input(decoder_input) decoder_output = decoder.output() # classifier if self.MLP_DIM: h = self.NONLIN(W_s2h * decoder_output + b_s2h) else: h = decoder_output valid_actions = _valid_actions(encoder) log_probs = dy.log_softmax(W_act * h + b_act, valid_actions) if oracle_actions is None: if sampling: dist = np.exp(log_probs.npvalue()) # sample according to softmax rand = np.random.rand() for action, p in enumerate(dist): rand -= p if rand <= 0: break else: action = np.argmax(log_probs.npvalue()) else: action = oracle_actions.pop() losses.append(dy.pick(log_probs, action)) action_history.append(action) if action == STEP: # Delete action encoder.pop() elif action == END_WORD: # Finish transduction break else: # Insert action assert action in self.INSERTS, (char_, action2string([char_], self.vocab), self.INSERTS) char_ = self.vocab.act.i2w[action] word.append(char_) word = ''.join(word) return losses, word, action_history
def beam_search_decode(self, lemma, feats, external_cg=True, unk_avg=True, beam_width=4): # Returns an expression of the loss for the sequence of actions. # (that is, the oracle_actions if present or the predicted sequence otherwise) def _valid_actions(encoder): valid_actions = list(self.INSERTS) if len(encoder) > 1: valid_actions += [STEP] else: valid_actions += [END_WORD] return valid_actions if not external_cg: dy.renew_cg() # vectorize lemma lemma_enc = self._build_lemma(lemma, unk_avg, is_training=False) # vectorize features features = self._build_features(*feats) # add encoder and decoder to computation graph encoder = Encoder(self.fbuffRNN, self.bbuffRNN) decoder = self.wordRNN.initial_state() # encoder is a stack which pops lemma characters and their # representations from the top. encoder.transduce(lemma_enc, lemma) # add classifier to computation graph if self.MLP_DIM: # decoder output to hidden W_s2h = dy.parameter(self.pW_s2h) b_s2h = dy.parameter(self.pb_s2h) # hidden to action W_act = dy.parameter(self.pW_act) b_act = dy.parameter(self.pb_act) # a list of tuples: # (decoder state, encoder state, list of previous actions, # log prob of previous actions, log prob of previous actions as dynet object, # word generated so far) beam = [(decoder, encoder, [BEGIN_WORD], 0., 0., [])] beam_length = 0 complete_hypotheses = [] while beam_length <= MAX_ACTION_SEQ_LEN: if not beam or beam_width == 0: break # compute probability of each of the actions and choose an action # either from the oracle or if there is no oracle, based on the model expansion = [] # print 'Beam length: ', beam_length for decoder, encoder, prev_actions, log_p, log_p_expr, word in beam: # print 'Expansion: ', action2string(prev_actions, self.vocab), log_p, ''.join(word) encoder_embedding, char_enc = encoder.embedding(extra=True) # decoder decoder_input = dy.concatenate([ encoder_embedding, features, self.ACT_LOOKUP[prev_actions[-1]] ]) decoder = decoder.add_input(decoder_input) decoder_output = decoder.output() # generate if self.MLP_DIM: h = self.NONLIN(W_s2h * decoder_output + b_s2h) else: h = decoder_output logits = W_act * h + b_act valid_actions = _valid_actions(encoder) log_probs_expr = dy.log_softmax(logits, valid_actions) log_probs = log_probs_expr.npvalue() top_actions = np.argsort(log_probs)[-beam_width:] # print 'top_actions: ', top_actions, action2string(top_actions, self.vocab) # print 'log_probs: ', log_probs # print expansion.extend( ((decoder, encoder.copy(), list(prev_actions), a, log_p + log_probs[a], log_p_expr + log_probs_expr[a], list(word), char_enc) for a in top_actions)) # print 'Overall, {} expansions'.format(len(expansion)) beam = [] expansion.sort(key=lambda e: e[4]) for e in expansion[-beam_width:]: decoder, encoder, prev_actions, action, log_p, log_p_expr, word, char_enc = e prev_actions.append(action) # execute the action to update the transducer state if action == END_WORD: # 1. Finish transduction: # * beam width should be decremented # * expansion should be taken off the beam and # stored to final hypotheses set beam_width -= 1 complete_hypotheses.append( (log_p, log_p_expr, ''.join(word), prev_actions)) else: if action == STEP: encoder.pop() else: # one of the INSERT actions # 1. Append inserted character to the output word assert action in self.INSERTS, (char_, action2string( [char_], self.vocab), self.INSERTS) char_ = self.vocab.act.i2w[action] word.append(char_) beam.append((decoder, encoder, prev_actions, log_p, log_p_expr, word)) beam_length += 1 if not complete_hypotheses: # nothing found because the model is so crappy complete_hypotheses = [ (log_p, log_p_expr, ''.join(word), prev_actions) for _, _, prev_actions, log_p, log_p_expr, word in beam ] complete_hypotheses.sort(key=lambda h: h[0], reverse=True) # print u'Complete hypotheses:' # for log_p, _, word, actions in complete_hypotheses: # print u'Actions {}, word {}, log p {:.3f}'.format(action2string(actions, self.vocab), word, log_p) return complete_hypotheses
def transduce(self, lemma, feats, oracle_actions=None, external_cg=True, sampling=False, unk_avg=True, verbose=False): """ Transduce an encoded lemma and features. Args: lemma: The input lemma, a list of integer character codes. feats: The features determining the morphological transformation. The most common format is a list of integer codes, one code per feature-value pair. oracle_actions: `None` means prediction. List of action codes is a static oracle. A dictionary of keys (explained below) is the config for a dynamic oracle. * "target_word": List of action codes for the target word form. * "loss": Which loss function to use (softmax-margin, NLL, MSE). * "rollout_mixin_beta": How to mix reference and learned roll-outs (1 is only reference, 0 is only model). * "global_rollout": Whether to use one type of roll-out (expert or model) at the sequence level. * "optimal": Whether to use an optimal or noisy (=buggy) expert * "bias_inserts": Whether to use a buggy roll-out for inserts (which makes them as cheap as copies) external_cg: Whether or not an external computation graph is defined. sampling: Whether or not sampling should be used for decoding (e.g. for MRT) or training (e.g. dynamic oracles with exploration / learned roll-ins). dynamic: Whether or not `oracle_actions` is a static oracle (list of actions) or a confuguration for a dynamic oracle. unk_avg: Whether or not to average all char embeddings to produce UNK embedding (see `self._build_lemma`). verbose: Whether or not to report on processing steps. """ # Returns an expression of the loss for the sequence of actions. # (that is, the oracle_actions if present or the predicted sequence otherwise) def _valid_actions(encoder): valid_actions = [] if len(encoder) > 1: valid_actions += [COPY, DELETE] else: valid_actions += [END_WORD] valid_actions += self.INSERTS return valid_actions if not external_cg: dy.renew_cg() dynamic = None # indicates prediction or static if oracle_actions: # if not, then prediction if isinstance(oracle_actions, dict): # dynamic oracle: # @TODO NB target word is not wrapped in boundary tags target_word = oracle_actions['target_word'] generation_errors = set() dynamic = oracle_actions else: # static oracle: # reverse to enable simple popping oracle_actions = oracle_actions[::-1] oracle_actions.pop() # COPY of BEGIN_WORD_CHAR # vectorize lemma lemma_enc = self._build_lemma(lemma, unk_avg, is_training=bool(oracle_actions)) # vectorize features features = self._build_features(*feats) # add encoder and decoder to computation graph encoder = Encoder(self.fbuffRNN, self.bbuffRNN) decoder = self.wordRNN.initial_state() # add classifier to computation graph if self.MLP_DIM: # decoder output to hidden W_s2h = dy.parameter(self.pW_s2h) b_s2h = dy.parameter(self.pb_s2h) # hidden to action W_act = dy.parameter(self.pW_act) b_act = dy.parameter(self.pb_act) # encoder is a stack which pops lemma characters and their # representations from the top. Thus, to get lemma characters # in the right order, the lemma has to be reversed. encoder.transduce(lemma_enc, lemma) encoder.pop() # BEGIN_WORD_CHAR action_history = [COPY] word = [] losses = [] if verbose and not dynamic: count = 0 print print action2string(oracle_actions, self.vocab) print lemma2string(lemma, self.vocab) if dynamic: # use model rollout for the whole of this sequence rollout_on = dynamic['global_rollout'] and np.random.rand( ) > dynamic['rollout_mixin_beta'] while len(action_history) <= MAX_ACTION_SEQ_LEN: if verbose and not dynamic: print 'Action: ', count, self.vocab.act.i2w[action_history[-1]] print 'Encoder length, char: ', lemma, len( encoder), self.vocab.char.i2w[encoder.s[-1][-1]] print 'word: ', u''.join(word) print('Remaining actions: ', oracle_actions, action2string(oracle_actions, self.vocab)) count += 1 # compute probability of each of the actions and choose an action # either from the oracle or if there is no oracle, based on the model valid_actions = _valid_actions(encoder) encoder_embedding = encoder.embedding() # decoder decoder_input = dy.concatenate([ encoder_embedding, features, self.ACT_LOOKUP[action_history[-1]] ]) decoder = decoder.add_input(decoder_input) # classifier if self.double_feats: classifier_input = dy.concatenate([decoder.output(), features]) else: classifier_input = decoder.output() if self.MLP_DIM: h = self.NONLIN(W_s2h * classifier_input + b_s2h) else: h = classifier_input logits = W_act * h + b_act # get action (argmax, sampling, or use oracle actions) if oracle_actions is None: # predicting by argmax or sampling log_probs = dy.log_softmax(logits, valid_actions) log_probs_np = log_probs.npvalue() if sampling: action = sample(log_probs_np) else: action = np.argmax(log_probs_np) losses.append(dy.pick(log_probs, action)) elif dynamic: # training with dynamic oracle if rollout_on or ( not dynamic['global_rollout'] and np.random.rand() > dynamic['rollout_mixin_beta']): # the second disjunct allows for model roll-out applied locally rollout = lambda action: self.rollout( action, dy.log_softmax(logits, valid_actions), action_history, features, decoder, encoder, word, W_act, b_act) # @TODO W_s2h ... else: rollout = None optim_actions, costs = oracle_with_rollout( word, target_word, encoder.get_extra(), valid_actions, rollout, self.vocab, optimal=dynamic['optimal'], bias_inserts=dynamic['bias_inserts'], errors=generation_errors, verbose=verbose) log_probs = dy.log_softmax(logits, valid_actions) log_probs_np = log_probs.npvalue() if sampling == 1. or np.random.rand() <= sampling: # action is picked by sampling action = sample(log_probs_np) # @TODO IL learned roll-ins are done with policy i.e. greedy / beam search decoding if verbose: print 'Rolling in with model: ', action, self.vocab.act.i2w[ action] else: # action is picked from optim_actions action = optim_actions[np.argmax( [log_probs_np[a] for a in optim_actions])] #print [log_probs_np[a] for a in optim_actions] # loss is over all optimal actions. if dynamic['loss'] == 'softmax-margin': loss = log_sum_softmax_margin_loss(optim_actions, logits, self.NUM_ACTS, costs=costs, valid_actions=None, verbose=verbose) elif dynamic['loss'] == 'nll': loss = log_sum_softmax_loss(optim_actions, logits, self.NUM_ACTS, valid_actions=valid_actions, verbose=verbose) elif dynamic['loss'] == 'mse': loss = cost_sensitive_reg_loss( optim_actions, logits, self.NUM_ACTS, # NB expects both costs and valid actions! costs=costs, valid_actions=valid_actions, verbose=verbose) ################ else: raise NotImplementedError losses.append(loss) #print 'Action' #print action #print self.vocab.act.i2w[action] else: # training with static oracle action = oracle_actions.pop() log_probs = dy.log_softmax(logits, valid_actions) losses.append(dy.pick(log_probs, action)) action_history.append(action) #print 'action, log_probs: ', action, self.vocab.act.i2w[action], losses[-1].scalar_value(), log_probs.npvalue() # execute the action to update the transducer state if action == COPY: # 1. Increment attention index try: char_ = encoder.pop() except IndexError, e: print np.exp(log_probs.npvalue()) print 'COPY: ', action # 2. Append copied character to the output word word.append(self.vocab.char.i2w[char_]) elif action == DELETE: # 1. Increment attention index try: encoder.pop() except IndexError, e: print np.exp(log_probs.npvalue()) print 'DELETE: ', action
def oracle_with_rollout(word, target_word, rest_of_input, valid_actions, rollout, vocab, optimal=False, bias_inserts=False, errors=None, verbose=False, del_cost=1, ins_cost=1, copy_cost=0, accuracy_error_cost=5.): """Given the word form constructed so far, the target word, the buffer, and set of valid actions, what are the next optimal actions and the cost of all the actions? Under gold rollout, an action is optimal if the cost of taking it is the lowest, assuming that all future actions are optimal too. Biasing inserts in model roll-outs (due to a bug) gained performance.""" bias_inserts_on = bias_inserts and np.random.rand() > 0.5 if verbose: if rollout: print 'Rolling out with model...' if bias_inserts_on: print 'Will use bias inserts.' len_target_word = len(target_word) rest_of_input = rest_of_input[: -1] # discount END WORD! @TODO undo choice of word wrapping len_word = len(word) if optimal: # errors indicate that we use optimal reference policy if errors: # errors account for all possible errors except for last character num_errors = len(errors) if verbose: print u'Word contains at least {} errors: {}, {}, {}'.format( num_errors, ''.join(word[:-1]) + '(' + word[-1] + ')', ''.join([ c for i, c in enumerate(word[:-1]) if i not in errors ]) + '(' + word[-1] + ')', action2string(target_word, vocab)), errors len_word -= num_errors try: if len_word and ( len_word > len_target_word or # overgenerated ! word[-1] != vocab.char.i2w[target_word[len_word - 1]] ): # generated a wrong char if verbose: if len_word > len_target_word: message = ''.join(word), action2string( target_word, vocab) else: message = word[-1], vocab.char.i2w[target_word[len_word - 1]] print u'Last action resulted in error: {}, {}'.format( *message) # there was an error, so in the following, ignore the last # generated char in accordance to the optimal policy, i.e. len_word -= 1 errors.add(len(word) - 1) except Exception, e: print 'len_word, word, target word: ', len_word, ''.join( word), action2string(target_word, vocab) raise e
top_of_buffer = rest_of_input[0] if len( rest_of_input) > 0 else END_WORD actions = [] # these actions are on a path to correct prediction action_costs = [] # their costs if DELETE in valid_actions: actions.append(DELETE) if rollout: _, prediction, predicted_actions = rollout(DELETE) # give cost to entire prediction. Alternatives would include: # * computing cost from this DELETE action on (i.e. like dynamic oracle does), # * take into account accuracy (dynamic oracle does not). cost = cost_actions(predicted_actions) if verbose == 2: # prediction, predicted actions, cost print u'DELETE COST (pred.): {}, {}, {}'.format( prediction, action2string(predicted_actions, vocab), cost) else: cost = del_cost + edit_cost_matrix( rest_of_input[1:], # delete one symbol target_word[len_word:])[-1, -1] if verbose == 2: # rest of lemma, rest of target, cost print u'DELETE COST (ref.): {}, {}, {}'.format( action2string(rest_of_input[1:], vocab), action2string(target_word[len_word:], vocab), cost) action_costs.append(cost) if COPY in valid_actions and target_char_i == top_of_buffer: # if valid, copy is on a path to target actions.append(COPY)
def oracle_with_rollout(word, target_word, rest_of_input, valid_actions, rollout, vocab, optimal=False, bias_inserts=False, errors=None, verbose=False, del_cost=1, ins_cost=1, copy_cost=0, accuracy_error_cost=5.): """Given the word form constructed so far, the target word, the buffer, and set of valid actions, what are the next optimal actions and the cost of all the actions? Under gold rollout, an action is optimal if the cost of taking it is the lowest, assuming that all future actions are optimal too. Biasing inserts in model roll-outs (due to a bug) gained performance.""" bias_inserts_on = bias_inserts and np.random.rand() > 0.5 if verbose: if rollout: print('Rolling out with model...') if bias_inserts_on: print('Will use bias inserts.') len_target_word = len(target_word) rest_of_input = rest_of_input[:-1] # discount END WORD! @TODO undo choice of word wrapping len_word = len(word) if optimal: # errors indicate that we use optimal reference policy if errors: # errors account for all possible errors except for last character num_errors = len(errors) if verbose: print(('Word contains at least {} errors: {}, {}, {}'.format(num_errors, ''.join(word[:-1]) + '(' + word[-1] + ')', ''.join([c for i, c in enumerate(word[:-1]) if i not in errors]) + '(' + word[-1] + ')', action2string(target_word, vocab)), errors)) len_word -= num_errors try: if len_word and (len_word > len_target_word or # overgenerated ! word[-1] != vocab.char.i2w[target_word[len_word-1]]): # generated a wrong char if verbose: if len_word > len_target_word: message = ''.join(word), action2string(target_word, vocab) else: message = word[-1], vocab.char.i2w[target_word[len_word-1]] print('Last action resulted in error: {}, {}'.format(*message)) # there was an error, so in the following, ignore the last # generated char in accordance to the optimal policy, i.e. len_word -= 1 errors.add(len(word)-1) except Exception as e: print(('len_word, word, target word: ', len_word, ''.join(word), action2string(target_word, vocab))) raise e # (i) incorporate action validity into costs: costs = - np.ones(vocab.act_train) * np.inf # valid but suboptimal actions get high costs, e.g. actions leading to # wrong accuracy. @TODO This should be dependent on e.g. levenshtein costs[valid_actions] = accuracy_error_cost if len_word >= len_target_word: if DELETE in valid_actions: # maybe the buffer is still not empty optimal_actions = [DELETE] costs[END_WORD] = 0. else: assert END_WORD in valid_actions optimal_actions = [END_WORD] costs[END_WORD] = 0. else: # assume no sampling, therefore we are in edit distance # cost matrix. The lowest cost in is [len_word+1, len_target_word+1] # and action history defines position in the cost matrix. All costs # are then read off the cost matrix: INSERT(top_of_butter), DELETE, # COPY. All actions leading to an accuracy error get a -np.inf cost (?). # Optimal cost is (min_edit_distance - current_cost). Return optimal # cost actions and costs for all actions. target_char_i = target_word[len_word] # next target char, unicode => Works because of param_tying!!! target_char = vocab.char.i2w[target_char_i] top_of_buffer = rest_of_input[0] if len(rest_of_input) > 0 else END_WORD actions = [] # these actions are on a path to correct prediction action_costs = [] # their costs if DELETE in valid_actions: actions.append(DELETE) if rollout: _, prediction, predicted_actions = rollout(DELETE) # give cost to entire prediction. Alternatives would include: # * computing cost from this DELETE action on (i.e. like dynamic oracle does), # * take into account accuracy (dynamic oracle does not). cost = cost_actions(predicted_actions) if verbose == 2: # prediction, predicted actions, cost print(('DELETE COST (pred.): {}, {}, {}'.format( prediction, action2string(predicted_actions, vocab), cost))) else: cost = del_cost + edit_cost_matrix(rest_of_input[1:], # delete one symbol target_word[len_word:])[-1, -1] if verbose == 2: # rest of lemma, rest of target, cost print(('DELETE COST (ref.): {}, {}, {}'.format( action2string(rest_of_input[1:], vocab), action2string(target_word[len_word:], vocab), cost))) action_costs.append(cost) if COPY in valid_actions and target_char_i == top_of_buffer: # if valid, copy is on a path to target actions.append(COPY) if rollout: _, prediction, predicted_actions = rollout(COPY) cost = cost_actions(predicted_actions) if verbose == 2: print('COPY COST (pred.): {}, {}, {}'.format( prediction, action2string(predicted_actions, vocab), cost)) else: cost = copy_cost + edit_cost_matrix(rest_of_input[1:], # delete one symbol target_word[len_word+1:])[-1, -1] # insert this symbol if verbose == 2: print('COPY COST (ref.): {}, {}, {}'.format(action2string(rest_of_input[1:], vocab), action2string(target_word[len_word+1:], vocab), cost)) action_costs.append(cost) if target_char in vocab.act.w2i: # if such an action exists ... # if target char can be inserted by a corresponding insert action, allow that # @TODO speed this up by not going from dictionaries insert_target_char = vocab.act.w2i[target_char] actions.append(insert_target_char) if rollout: # @TODO BUG: SCORED WITH ROLLOUT COPY !!! _, prediction, predicted_actions = rollout(COPY if bias_inserts_on else insert_target_char) cost = cost_actions(predicted_actions) if verbose == 2: print('INSERT COST (pred.): {}, {}, {}'.format( prediction, action2string(predicted_actions, vocab), cost)) else: if bias_inserts_on: # ENCOURAGE WITH ORACLE INSERTS cost = copy_cost + edit_cost_matrix(rest_of_input[1:], # delete one symbol target_word[len_word+1:])[-1, -1] # insert this symbol else: cost = ins_cost + edit_cost_matrix(rest_of_input, target_word[len_word+1:])[-1, -1] # insert one symbol if verbose == 2: print('INSERT COST (ref.): {}, {}, {}'.format(action2string(rest_of_input, vocab), action2string(target_word[len_word+1:], vocab), cost)) action_costs.append(cost) if verbose == 2: print('Target char:', target_char_i, target_char) print('Actions, action costs:', action2string(actions, vocab), action_costs) print('Top of the buffer:', top_of_buffer, action2string([top_of_buffer], vocab)) # minimal cost according to gold oracle: optimal_cost = np.min(action_costs) optimal_actions = [] for action, cost in zip(actions, action_costs): if cost == optimal_cost: optimal_actions.append(action) costs[action] = cost - optimal_cost if verbose == 2: print('Word:', ''.join(word)) print('Target word:', action2string(target_word, vocab)) print('Rest of input:', action2string(rest_of_input, vocab)) print('Valid actions:', valid_actions, action2string(valid_actions, vocab)) print('Optimal actions:', optimal_actions, action2string(optimal_actions, vocab)) print('Costs:', costs) return optimal_actions, costs
def internal_eval_beam(batches, transducer, vocab, beam_width, previous_predicted_actions, check_condition=True, name='train'): assert callable( getattr(transducer, "beam_search_decode", None)), 'transducer does not implement beam search.' then = time.time() print 'evaluating on {} data with beam search (beam width {})...'.format( name, beam_width) number_correct = 0. total_loss = 0. predictions = [] pred_acts = [] i = 0 # counter of samples for j, batch in enumerate(batches): dy.renew_cg() batch_loss = [] for sample in batch: feats = sample.pos, sample.feats hypotheses = transducer.beam_search_decode(sample.lemma, feats, external_cg=True, beam_width=beam_width) # take top hypothesis try: loss, loss_expr, prediction, predicted_actions = hypotheses[0] except Exception, e: print hypotheses raise e predictions.append(prediction) pred_acts.append(predicted_actions) batch_loss.append(loss) # sanity check: Basically, this often is wrong... #assert round(loss, 3) == round(loss_expr.scalar_value(), 3), (loss, loss_expr.scalar_value()) # evaluation correct_prediction = False if (prediction in vocab.word and vocab.word.w2i[prediction] == sample.word): correct_prediction = True number_correct += 1 if check_condition: # compare to greedy prediction: _, greedy_prediction, _ = transducer.transduce( sample.lemma, feats, external_cg=True) if greedy_prediction != prediction: print 'Beam! Target: ', sample.word_str print 'Greedy prediction: ', greedy_prediction print u'Complete hypotheses:' for log_p, _, pred_word, pred_actions in hypotheses: print u'Actions {}, word {}, -log p {:.3f}'.format( action2string(pred_actions, VOCAB), pred_word, -log_p) if check_condition: # display prediction for this sample if it differs the prediction # of the previous epoch or its an error if predicted_actions != previous_predicted_actions[ i] or not correct_prediction: # print 'BEFORE: ', datasets.action2string( previous_predicted_actions[i], vocab) print 'THIS TIME: ', datasets.action2string( predicted_actions, vocab) print 'TRUE: ', sample.act_repr print 'PRED: ', prediction print 'WORD: ', sample.word_str print 'X' if correct_prediction else 'V' # increment counter of samples i += 1 batch_loss = -np.mean(batch_loss) total_loss += batch_loss # report progress if j > 0 and j % 100 == 0: print '\t\t...{} batches'.format(j)
def action2string(self, acts): return datasets.action2string(acts, self.vocab)