def find_word_beam(self, posterior): """ Do an internal beam search over non-terminal functions to find the next best n terminal tokens, as ranked by normalized path score Returns: posterior containing up to n terminal tokens and their normalized path score """ top_tokens = utils.argmax_n(posterior, self.beam_size) hypos = [ InternalHypo(posterior[tok], self.get_state(), tok) for tok in top_tokens if tok in self.nonterminals ] best_hypo = InternalHypo(utils.NEG_INF, None, None) best_posterior = None while hypos and hypos[0].norm_score > best_hypo.norm_score: next_hypos = [] for hypo in hypos: self.set_state(copy.deepcopy(hypo.predictor_state)) self.consume(hypo.word_to_consume) new_post = self.predict_next(predicting_next_word=True) top_tokens = utils.argmax_n(new_post, self.beam_size) next_state = copy.deepcopy(self.get_state()) new_norm_score = self.norm_score( new_post[top_tokens[0]] + hypo.score, hypo.beam_len + 1) if (self.are_best_terminal(new_post) and new_norm_score > best_hypo.norm_score): best_hypo = copy.deepcopy(hypo) best_hypo.predictor_state = next_state best_hypo.norm_score = new_norm_score best_posterior = new_post self.norm_score(best_hypo) else: if hypo.beam_len == self.max_internal_len: logging.info('cutting off internal hypo - too long') continue for tok in top_tokens: if tok in self.nonterminals: new_hypo = copy.deepcopy(hypo) new_hypo.extend(new_post[tok], next_state, tok) next_hypos.append(new_hypo) map(self.norm_hypo_score, next_hypos) next_hypos.sort(key=lambda h: -h.norm_score) hypos = next_hypos[:self.beam_size] self.set_state(best_hypo.predictor_state) for tok in best_posterior.keys(): best_posterior[tok] = self.norm_score( best_hypo.score + best_posterior[tok], best_hypo.beam_len + 1) if tok in self.nonterminals: del best_posterior[tok] return best_posterior
def find_word_beam(self, posterior): """ Do an internal beam search over non-terminal functions to find the next best n terminal tokens, as ranked by normalized path score Returns: posterior containing up to n terminal tokens and their normalized path score """ top_tokens = utils.argmax_n(posterior, self.beam_size) hypos = [InternalHypo(posterior[tok], self.get_state(), tok) for tok in top_tokens if tok in self.nonterminals] best_hypo = InternalHypo(utils.NEG_INF, None, None) best_posterior = None while hypos and hypos[0].norm_score > best_hypo.norm_score: next_hypos = [] for hypo in hypos: self.set_state(copy.deepcopy(hypo.predictor_state)) self.consume(hypo.word_to_consume) new_post = self.predict_next(predicting_next_word=True) top_tokens = utils.argmax_n(new_post, self.beam_size) next_state = copy.deepcopy(self.get_state()) new_norm_score = self.norm_score( new_post[top_tokens[0]] + hypo.score, hypo.beam_len + 1) if (self.are_best_terminal(new_post) and new_norm_score > best_hypo.norm_score): best_hypo = copy.deepcopy(hypo) best_hypo.predictor_state = next_state best_hypo.norm_score = new_norm_score best_posterior = new_post self.norm_score(best_hypo) else: if hypo.beam_len == self.max_internal_len: logging.info('cutting off internal hypo - too long') continue for tok in top_tokens: if tok in self.nonterminals: new_hypo = copy.deepcopy(hypo) new_hypo.extend(new_post[tok], next_state, tok) next_hypos.append(new_hypo) map(self.norm_hypo_score, next_hypos) next_hypos.sort(key=lambda h: -h.norm_score) hypos = next_hypos[:self.beam_size] self.set_state(best_hypo.predictor_state) for tok in best_posterior.keys(): best_posterior[tok] = self.norm_score( best_hypo.score + best_posterior[tok], best_hypo.beam_len + 1) if tok in self.nonterminals: del best_posterior[tok] return best_posterior
def _expand_hypo(self, hypo): """Expands hypothesis by calling predict_next() only on one single predictor. """ if hypo.score <= self.min_score: return [] pred_idx = 0 for idx, s in enumerate(hypo.predictor_states): if not s is None: pred_idx = idx break self.apply_predictors_count += 1 predictor = self.predictors[pred_idx][0] predictor.set_state(copy.deepcopy(hypo.predictor_states[pred_idx])) if not hypo.word_to_consume is None: # Consume if cheap expand predictor.consume(hypo.word_to_consume) hypo.word_to_consume = None posterior = predictor.predict_next() hypo.predictor_states = list(hypo.predictor_states) hypo.predictor_states[pred_idx] = predictor.get_state() breakdown_dummy = [(0.0, 1.0)] * len(self.predictors) ret = [] for trgt_word in utils.argmax_n(posterior, self.beam_size): score_breakdown = list(breakdown_dummy) score_breakdown[pred_idx] = (posterior[trgt_word], 1.0) ret.append( hypo.cheap_expand(trgt_word, posterior[trgt_word], score_breakdown)) return ret
def _create_short_list(self, logits): """Creates a set of tokens which are likely translations.""" words = set() filt_logits = logits[self.min_id:] for strat in self.shortlist_strategies: if strat[:3] == "top": n = int(strat[3:]) words.update(utils.argmax_n(filt_logits, n)) elif strat[:4] == "prob": p = float(strat[4:]) unnorm_probs = np.exp(filt_logits) threshold = np.sum(unnorm_probs) * p acc = 0.0 for word in np.argsort(filt_logits)[::-1]: acc += unnorm_probs[word] words.add(word) if acc >= threshold: break else: raise AttributeError("Unknown shortlist strategy '%s'" % strat) if self.min_id: words = set(w+self.min_id for w in words) try: words.remove(utils.EOS_ID) except KeyError: pass return words
def _expand_hypo(self, hypo): """Expands hypothesis by calling predict_next() only on one single predictor. """ if hypo.score <= self.min_score: return [] pred_idx = 0 for idx, s in enumerate(hypo.predictor_states): if not s is None: pred_idx = idx break self.apply_predictors_count += 1 predictor = self.predictors[pred_idx][0] predictor.set_state(copy.deepcopy(hypo.predictor_states[pred_idx])) if not hypo.word_to_consume is None: # Consume if cheap expand predictor.consume(hypo.word_to_consume) hypo.word_to_consume = None posterior = predictor.predict_next() hypo.predictor_states = list(hypo.predictor_states) hypo.predictor_states[pred_idx] = predictor.get_state() breakdown_dummy = [(0.0, 1.0)] * len(self.predictors) ret = [] for trgt_word in utils.argmax_n(posterior, self.beam_size): score_breakdown = list(breakdown_dummy) score_breakdown[pred_idx] = (posterior[trgt_word], 1.0) ret.append(hypo.cheap_expand(trgt_word, posterior[trgt_word], score_breakdown)) return ret
def _create_short_list(self, logits): """Creates a set of tokens which are likely translations.""" words = set() filt_logits = logits[self.min_id:] for strat in self.shortlist_strategies: if strat[:3] == "top": n = int(strat[3:]) words.update(utils.argmax_n(filt_logits, n)) elif strat[:4] == "prob": p = float(strat[4:]) unnorm_probs = np.exp(filt_logits) threshold = np.sum(unnorm_probs) * p acc = 0.0 for word in np.argsort(filt_logits)[::-1]: acc += unnorm_probs[word] words.add(word) if acc >= threshold: break else: raise AttributeError("Unknown shortlist strategy '%s'" % strat) if self.min_id: words = set(w + self.min_id for w in words) try: words.remove(utils.EOS_ID) except KeyError: pass return words
def _get_next_hypos_maxent(self, hypos, scores): """Get hypotheses of the next time step. Args: hypos (list): List of hypotheses scores (list): hypo scores with heuristic estimates Return: list. List with hypotheses. """ # Update self.maxent_ngram_mass for hypo_score, hypo in zip(scores, hypos): s = hypo.trgt_sentence h = s[:-1] l = len(s) if l <= self.maxent_processed_length: continue # TODO: Could be more efficient by checking is_sublist for # all orders in one pass for order in xrange(min(len(s), self.max_order), self.min_order - 1, -1): ngram = s[-order:] # Do not use this ngram if it occurs before if is_sublist(ngram, h): break # All lower order ngrams are too prev_mass = self.maxent_ngram_mass.get(ngram) if prev_mass is None: updated_mass = hypo_score else: updated_mass = max( prev_mass, hypo_score, np.log(np.exp(prev_mass) + np.exp(hypo_score))) self.maxent_ngram_mass.add(ngram, updated_mass) self.maxent_processed_length += 1 exp_counts = [] for hypo in hypos: s = hypo.trgt_sentence l = len(s) cnt = 0.0 for order in xrange(self.min_order, self.max_order + 1): for start in xrange(l - order + 1): logprob = self.maxent_ngram_mass.get(s[start:start + order]) # MaxEnt means that we estimate the probability of the # ngram as p + (1-p) * 0.5 ie. if logprob: cnt += 1.0 + np.exp(logprob) else: cnt += 1.0 exp_counts.append(cnt * 0.5) next_hypos = [] for idx in utils.argmax_n(exp_counts, self.beam_size): hypos[idx].bleu = exp_counts[idx] next_hypos.append(hypos[idx]) logging.debug( "Selected (score=%f expected_counts=%f): %s" % (scores[idx], hypos[idx].bleu, hypos[idx].trgt_sentence)) return next_hypos
def are_best_terminal(self, posterior): """Return true if most probable tokens in posterior are all terminals (including EOS) """ best_rule_ids = utils.argmax_n(posterior, self.beam_size) for tok in best_rule_ids: if tok in self.nonterminals: return False return True
def _get_next_hypos_renorm(self, hypos, scores): """Get hypotheses of the next time step. Args: hypos (list): List of hypotheses scores (list): hypo scores with heuristic estimates Return: list. List with hypotheses. """ probs = (1.0 - self.smooth_factor) * np.exp( scores - utils.log_sum(scores)) \ + self.smooth_factor / float(len(scores)) lengths = [len(hypo.trgt_sentence) for hypo in hypos] logging.debug("%d candidates min_length=%d max_length=%d" % (len(lengths), min(lengths), max(lengths))) ngrams = [] for hypo in hypos: ngram_list = [] for order in xrange(self.min_order, self.max_order + 1): ngram_list.append( set([ " ".join( map(str, hypo.trgt_sentence[start:start + order])) for start in xrange(len(hypo.trgt_sentence)) ])) ngrams.append(ngram_list) exp_bleus = [] for hyp_ngrams, hyp_length in zip(ngrams, lengths): precisions = np.array([ self._compute_bleu(hyp_ngrams, ref_ngrams, hyp_length, ref_length) for ref_ngrams, ref_length in zip(ngrams, lengths) ]) exp_bleus.append(precisions * probs) next_hypos = [] if self.selection_strategy == 'oracle_bleu': for _ in xrange(min(self.beam_size, len(hypos))): idx = np.argmax(np.sum(exp_bleus, axis=1)) bleu = np.sum(exp_bleus[idx]) logging.debug("Selected (score=%f expected_bleu=%f): %s" % (scores[idx], bleu, hypos[idx].trgt_sentence)) hypos[idx].bleu = -bleu next_hypos.append(hypos[idx]) gained_bleus = exp_bleus[idx] for update_idx in xrange(len(exp_bleus)): exp_bleus[update_idx] = np.maximum(exp_bleus[update_idx], gained_bleus) else: # selection strategy 'bleu' total_exp_bleus = np.sum(exp_bleus, axis=1) for idx in utils.argmax_n(total_exp_bleus, self.beam_size): hypos[idx].bleu = total_exp_bleus[idx] next_hypos.append(hypos[idx]) logging.debug( "Selected (score=%f expected_bleu=%f): %s" % (scores[idx], hypos[idx].bleu, hypos[idx].trgt_sentence)) return next_hypos
def _get_next_hypos_maxent(self, hypos, scores): """Get hypotheses of the next time step. Args: hypos (list): List of hypotheses scores (list): hypo scores with heuristic estimates Return: list. List with hypotheses. """ # Update self.maxent_ngram_mass for hypo_score, hypo in zip(scores, hypos): s = hypo.trgt_sentence h = s[:-1] l = len(s) if l <= self.maxent_processed_length: continue # TODO: Could be more efficient by checking is_sublist for # all orders in one pass for order in xrange(min(len(s), self.max_order), self.min_order-1, -1): ngram = s[-order:] # Do not use this ngram if it occurs before if is_sublist(ngram, h): break # All lower order ngrams are too prev_mass = self.maxent_ngram_mass.get(ngram) if prev_mass is None: updated_mass = hypo_score else: updated_mass = max(prev_mass, hypo_score, np.log(np.exp(prev_mass)+np.exp(hypo_score))) self.maxent_ngram_mass.add(ngram, updated_mass) self.maxent_processed_length += 1 exp_counts = [] for hypo in hypos: s = hypo.trgt_sentence l = len(s) cnt = 0.0 for order in xrange(self.min_order, self.max_order+1): for start in xrange(l-order+1): logprob = self.maxent_ngram_mass.get(s[start:start+order]) # MaxEnt means that we estimate the probability of the # ngram as p + (1-p) * 0.5 ie. if logprob: cnt += 1.0 + np.exp(logprob) else: cnt += 1.0 exp_counts.append(cnt * 0.5) next_hypos = [] for idx in utils.argmax_n(exp_counts, self.beam_size): hypos[idx].bleu = exp_counts[idx] next_hypos.append(hypos[idx]) logging.debug("Selected (score=%f expected_counts=%f): %s" % (scores[idx], hypos[idx].bleu, hypos[idx].trgt_sentence)) return next_hypos
def apply_predictors(self, top_n=0): """Get the distribution over the next word by combining the predictor scores. Args: top_n (int): If positive, return only the best n words. Returns: combined,score_breakdown: Two dicts. ``combined`` maps target word ids to the combined score, ``score_breakdown`` contains the scores for each predictor separately represented as tuples (unweighted_score, predictor_weight) """ self.apply_predictors_count += 1 bounded_predictors = [ el for el in self.predictors if not isinstance(el[0], UnboundedVocabularyPredictor) ] # Get bounded posteriors bounded_posteriors = [ p.predict_next() for (p, _) in bounded_predictors ] non_zero_words = self._get_non_zero_words(bounded_predictors, bounded_posteriors) if not non_zero_words: # Special case: no word is possible non_zero_words = set([utils.EOS_ID]) # Add unbounded predictors and unk probabilities posteriors = [] unk_probs = [] pred_weights = [] bounded_idx = 0 for (p, w) in self.predictors: if isinstance(p, UnboundedVocabularyPredictor): posterior = p.predict_next(non_zero_words) else: # Take it from the bounded_* variables posterior = bounded_posteriors[bounded_idx] bounded_idx += 1 posteriors.append(posterior) unk_probs.append(p.get_unk_probability(posterior)) pred_weights.append(w) pred_weights = self.apply_interpolation_strategy( pred_weights, non_zero_words, posteriors, unk_probs) ret = self.combine_posteriors(non_zero_words, posteriors, unk_probs, pred_weights, top_n) if not self.allow_unk_in_output and utils.UNK_ID in ret[0]: del ret[0][utils.UNK_ID] del ret[1][utils.UNK_ID] if top_n > 0 and len(ret[0]) > top_n: top = utils.argmax_n(ret[0], top_n) ret = ({w: ret[0][w] for w in top}, {w: ret[1][w] for w in top}) self.notify_observers(ret, message_type=MESSAGE_TYPE_POSTERIOR) return ret
def initialize_internal_hypos(self, posterior): top_tokens = utils.argmax_n(posterior, self.beam_size) hypos = [] top_terminals = [] for tok in top_tokens: new_hypo = InternalHypo(posterior[tok], posterior[tok], copy.deepcopy(self.predictor.get_state()), tok) if tok not in self.nonterminals: self.tok_to_hypo[tok] = new_hypo top_terminals.append(tok) hypos.append(new_hypo) return hypos, top_terminals
def _get_next_hypos_renorm(self, hypos, scores): """Get hypotheses of the next time step. Args: hypos (list): List of hypotheses scores (list): hypo scores with heuristic estimates Return: list. List with hypotheses. """ probs = (1.0 - self.smooth_factor) * np.exp( scores - utils.log_sum(scores)) \ + self.smooth_factor / float(len(scores)) lengths = [len(hypo.trgt_sentence) for hypo in hypos] logging.debug("%d candidates min_length=%d max_length=%d" % (len(lengths), min(lengths), max(lengths))) ngrams = [] for hypo in hypos: ngram_list = [] for order in xrange(self.min_order, self.max_order+1): ngram_list.append(set([ " ".join(map(str, hypo.trgt_sentence[start:start+order])) for start in xrange(len(hypo.trgt_sentence))])) ngrams.append(ngram_list) exp_bleus = [] for hyp_ngrams, hyp_length in zip(ngrams, lengths): precisions = np.array([self._compute_bleu( hyp_ngrams, ref_ngrams, hyp_length, ref_length) for ref_ngrams, ref_length in zip(ngrams, lengths)]) exp_bleus.append(precisions * probs) next_hypos = [] if self.selection_strategy == 'oracle_bleu': for _ in xrange(min(self.beam_size, len(hypos))): idx = np.argmax(np.sum(exp_bleus, axis=1)) bleu = np.sum(exp_bleus[idx]) logging.debug("Selected (score=%f expected_bleu=%f): %s" % (scores[idx], bleu, hypos[idx].trgt_sentence)) hypos[idx].bleu = -bleu next_hypos.append(hypos[idx]) gained_bleus = exp_bleus[idx] for update_idx in xrange(len(exp_bleus)): exp_bleus[update_idx] = np.maximum(exp_bleus[update_idx], gained_bleus) else: # selection strategy 'bleu' total_exp_bleus = np.sum(exp_bleus, axis=1) for idx in utils.argmax_n(total_exp_bleus, self.beam_size): hypos[idx].bleu = total_exp_bleus[idx] next_hypos.append(hypos[idx]) logging.debug("Selected (score=%f expected_bleu=%f): %s" % (scores[idx], hypos[idx].bleu, hypos[idx].trgt_sentence)) return next_hypos
def apply_predictors(self, top_n=0): """Get the distribution over the next word by combining the predictor scores. Args: top_n (int): If positive, return only the best n words. Returns: combined,score_breakdown: Two dicts. ``combined`` maps target word ids to the combined score, ``score_breakdown`` contains the scores for each predictor separately represented as tuples (unweighted_score, predictor_weight) """ self.apply_predictors_count += 1 bounded_predictors = [el for el in self.predictors if not isinstance(el[0], UnboundedVocabularyPredictor)] # Get bounded posteriors bounded_posteriors = [p.predict_next() for (p, _) in bounded_predictors] non_zero_words = self._get_non_zero_words(bounded_predictors, bounded_posteriors) if not non_zero_words: # Special case: no word is possible non_zero_words = set([utils.EOS_ID]) # Add unbounded predictors and unk probabilities posteriors = [] unk_probs = [] pred_weights = [] bounded_idx = 0 for (p, w) in self.predictors: if isinstance(p, UnboundedVocabularyPredictor): posterior = p.predict_next(non_zero_words) else: # Take it from the bounded_* variables posterior = bounded_posteriors[bounded_idx] bounded_idx += 1 posteriors.append(posterior) unk_probs.append(p.get_unk_probability(posterior)) pred_weights.append(w) pred_weights = self.apply_interpolation_strategy( pred_weights, non_zero_words, posteriors, unk_probs) ret = self.combine_posteriors( non_zero_words, posteriors, unk_probs, pred_weights, top_n) if not self.allow_unk_in_output and utils.UNK_ID in ret[0]: del ret[0][utils.UNK_ID] del ret[1][utils.UNK_ID] if top_n > 0 and len(ret[0]) > top_n: top = utils.argmax_n(ret[0], top_n) ret = ({w: ret[0][w] for w in top}, {w: ret[1][w] for w in top}) self.notify_observers(ret, message_type = MESSAGE_TYPE_POSTERIOR) return ret
def _combine_posteriors_norm_none(self, non_zero_words, posteriors, unk_probs, top_n=0): """Combine predictor posteriors according the normalization scheme ``CLOSED_VOCAB_SCORE_NORM_NONE``. For more information on closed vocabulary predictor score normalization see the documentation on the ``CLOSED_VOCAB_SCORE_NORM_*`` vars. Args: non_zero_words (set): All words with positive probability posteriors: Predictor posterior distributions calculated with ``predict_next()`` unk_probs: UNK probabilities of the predictors, calculated with ``get_unk_probability`` top_n (int): If positive, return only top n words Returns: combined,score_breakdown: like in ``apply_predictors()`` """ if isinstance(non_zero_words, xrange) and top_n > 0: n_words = len(non_zero_words) scaled_posteriors = [] for posterior, unk_prob, (_, weight) in zip( posteriors, unk_probs, self.predictors): if isinstance(posterior, dict): arr = np.full(n_words, unk_prob) for word, score in posterior.iteritems(): arr[word] = score scaled_posteriors.append(arr * weight) else: n_unks = n_words - len(posterior) if n_unks: posterior = np.concatenate(( posterior, np.full(n_unks, unk_prob))) scaled_posteriors.append(posterior * weight) combined_scores = np.sum(scaled_posteriors, axis=0) non_zero_words = utils.argmax_n(combined_scores, top_n) combined = {} score_breakdown = {} for trgt_word in non_zero_words: preds = [(utils.common_get(posteriors[idx], trgt_word, unk_probs[idx]), w) for idx, (_,w) in enumerate(self.predictors)] combined[trgt_word] = self.combi_predictor_method(preds) score_breakdown[trgt_word] = preds return combined, score_breakdown
def _expand_hypo(self, hypo): self.set_predictor_states(copy.deepcopy(hypo.predictor_states)) if not hypo.word_to_consume is None: # Consume if cheap expand self.consume(hypo.word_to_consume) hypo.word_to_consume = None posterior, score_breakdown = self.apply_predictors() hypo.predictor_states = self.get_predictor_states() top = utils.argmax_n(posterior, self.beam_size) # EOS hypo eos_hypo = hypo.cheap_expand(utils.EOS_ID, posterior[utils.EOS_ID], score_breakdown[utils.EOS_ID]) self.add_full_hypo(eos_hypo.generate_full_hypothesis()) # All other hypos return [ hypo.cheap_expand(trgt_word, posterior[trgt_word], score_breakdown[trgt_word]) for trgt_word in top if trgt_word != utils.EOS_ID ]
def _expand_hypo_nmt(self, hypo): """Get the best beam size expansions of ``hypo`` by one CHAR based on nmt predictor scores only. Args: hypo (PartialHypothesis): Hypothesis to expans Returns: list. List of child hypotheses """ if hypo.score <= self.min_score: return [] self.set_predictor_states(copy.deepcopy(hypo.predictor_states)) # self.set_predictor_states(hypo.predictor_states) if not hypo.word_to_consume is None: # Consume if cheap expand self.consume(hypo.word_to_consume) hypo.word_to_consume = None posterior, score_breakdown = self.apply_predictors() # self.apply_predictors() hypo.predictor_states = self.get_predictor_states() # nmt_only_scores = {k: sum([v[i][0] for i,s in enumerate(v) if self.predictor_names[i]=="nmt"]) for k, v in score_breakdown.items()} # nmt_only_scores = np.array([v for k,v in sorted(nmt_only_scores.items(),key=lambda v:v[0])]) # nmt_only_scores = np.array([ sum([ v[i][0] for i, s in enumerate(v) if self.predictor_names[i] == "nmt" ]) for k, v in sorted(score_breakdown.items(), key=lambda t: t[0]) ]) # nmt_only_scores = np.array([sum([v[i][0] for i,s in enumerate(v) if self.predictor_names[i]=="nmt"]) for k,v in sorted(self.score_breakdown.items(),key=lambda t:t[0])]) # logging.debug(u'score_breakdown.items(): {}'.format(score_breakdown.items())) # logging.debug(u'next nmt scores: {}'.format(nmt_only_scores)) top = utils.argmax_n(nmt_only_scores, self.beam_size) # char_only_scores = {k: sum([v[i][0] for i,s in enumerate(v) if self.predictor_levels[i]=="c"]) for k, v in score_breakdown.items()} # top = utils.argmax_n(char_only_scores, self.beam_size) return [ hypo.cheap_expand(trgt_word, posterior[trgt_word], score_breakdown[trgt_word]) for trgt_word in top ]
def _expand_hypo(self, hypo): """Get the best beam size expansions of ``hypo``. Args: hypo (SimPartialHypothesis): Hypothesis to expand Returns: list. List of child hypotheses """ if hypo.score <= self.min_score: return [] self.set_predictor_states(copy.deepcopy(hypo.predictor_states)) if not hypo.word_to_consume is None: # Consume if cheap expand self.consume(hypo.word_to_consume) hypo.word_to_consume = None posterior, score_breakdown = self.apply_predictors(self.beam_size) hypo.predictor_states = self.get_predictor_states() top = utils.argmax_n(posterior, self.beam_size) return [ hypo.cheap_expand(trgt_word, posterior[trgt_word], score_breakdown[trgt_word]) for trgt_word in top ]
def decode(self, src_sentence): """Decodes a single source sentence using beam search. """ self.initialize_predictors(src_sentence) hypos = [PartialHypothesis(self.get_predictor_states())] it = 0 while self.stop_criterion(hypos): if it > 2*len(src_sentence): # prevent infinite loops break it = it + 1 next_hypos = [] next_scores = [] for hypo in hypos: if hypo.get_last_word() == utils.EOS_ID: next_hypos.append(hypo) next_scores.append(hypo.score) continue self.set_predictor_states(copy.deepcopy(hypo.predictor_states)) if not hypo.word_to_consume is None: # Consume if cheap expand self.consume(hypo.word_to_consume) posterior,score_breakdown = self.apply_predictors() hypo.predictor_states = self.get_predictor_states() top = utils.argmax_n(posterior, self.beam_size) for trgt_word in top: next_hypo = hypo.cheap_expand(trgt_word, posterior[trgt_word], score_breakdown[trgt_word]) next_hypos.append(next_hypo) next_scores.append(next_hypo.score) hypos = [next_hypos[idx] for idx in np.argsort(next_scores)[-self.beam_size:]] ret = [hypos[-idx-1].generate_full_hypothesis() for idx in xrange(len(hypos)) if hypos[-idx-1].get_last_word() == utils.EOS_ID] if not ret: logging.warn("No complete hypotheses found for %s" % src_sentence) return [hypos[-idx-1].generate_full_hypothesis() for idx in xrange(len(hypos))] return ret
def _scale_combine_non_zero_scores(non_zero_word_count, posteriors, unk_probs, pred_weights, top_n=0): scaled_posteriors = [] for posterior, unk_prob, weight in zip(posteriors, unk_probs, pred_weights): if isinstance(posterior, dict): arr = np.full(non_zero_word_count, unk_prob) for word, score in posterior.iteritems(): arr[word] = score scaled_posteriors.append(arr * weight) else: n_unks = non_zero_word_count - len(posterior) if n_unks > 0: posterior = np.concatenate( (posterior, np.full(n_unks, unk_prob))) elif n_unks < 0: posterior = posterior[:n_unks] scaled_posteriors.append(posterior * weight) combined_scores = np.sum(scaled_posteriors, axis=0) return utils.argmax_n(combined_scores, top_n)
def _scale_combine_non_zero_scores(non_zero_word_count, posteriors, unk_probs, pred_weights, top_n=0): scaled_posteriors = [] for posterior, unk_prob, weight in zip( posteriors, unk_probs, pred_weights): if isinstance(posterior, dict): arr = np.full(non_zero_word_count, unk_prob) for word, score in posterior.iteritems(): arr[word] = score scaled_posteriors.append(arr * weight) else: n_unks = non_zero_word_count - len(posterior) if n_unks > 0: posterior = np.concatenate(( posterior, np.full(n_unks, unk_prob))) elif n_unks < 0: posterior = posterior[:n_unks] scaled_posteriors.append(posterior * weight) combined_scores = np.sum(scaled_posteriors, axis=0) return utils.argmax_n(combined_scores, top_n)
def maybe_add_new_top_tokens(self, top_terminals, hypo, next_hypos): new_post = self.predict_next(predicting_internally=True) top_tokens = utils.argmax_n(new_post, self.beam_size) next_state = copy.deepcopy(self.predictor.get_state()) for tok in top_tokens: score = hypo.score + new_post[tok] new_hypo = InternalHypo(score, new_post[tok], next_state, tok) if tok not in self.nonterminals: add_hypo = False found = False for t in top_terminals: if t == tok: found = True if self.tok_to_hypo[tok].score < new_hypo.score: add_hypo = True top_terminals.remove(t) break if not found: add_hypo = True if add_hypo: top_terminals.append(tok) self.tok_to_hypo[tok] = new_hypo else: next_hypos.append(new_hypo)