Example #1
0
    def find_word_beam(self, posterior):
        """
        Do an internal beam search over non-terminal functions to find 
        the next best n terminal tokens, as ranked by normalized path score
        
        Returns: posterior containing up to n terminal tokens 
                 and their normalized path score
        """
        top_tokens = utils.argmax_n(posterior, self.beam_size)
        hypos = [
            InternalHypo(posterior[tok], self.get_state(), tok)
            for tok in top_tokens if tok in self.nonterminals
        ]
        best_hypo = InternalHypo(utils.NEG_INF, None, None)
        best_posterior = None
        while hypos and hypos[0].norm_score > best_hypo.norm_score:
            next_hypos = []
            for hypo in hypos:
                self.set_state(copy.deepcopy(hypo.predictor_state))
                self.consume(hypo.word_to_consume)
                new_post = self.predict_next(predicting_next_word=True)
                top_tokens = utils.argmax_n(new_post, self.beam_size)
                next_state = copy.deepcopy(self.get_state())
                new_norm_score = self.norm_score(
                    new_post[top_tokens[0]] + hypo.score, hypo.beam_len + 1)
                if (self.are_best_terminal(new_post)
                        and new_norm_score > best_hypo.norm_score):
                    best_hypo = copy.deepcopy(hypo)
                    best_hypo.predictor_state = next_state
                    best_hypo.norm_score = new_norm_score
                    best_posterior = new_post
                    self.norm_score(best_hypo)
                else:
                    if hypo.beam_len == self.max_internal_len:
                        logging.info('cutting off internal hypo - too long')
                        continue
                    for tok in top_tokens:
                        if tok in self.nonterminals:
                            new_hypo = copy.deepcopy(hypo)
                            new_hypo.extend(new_post[tok], next_state, tok)
                            next_hypos.append(new_hypo)

            map(self.norm_hypo_score, next_hypos)
            next_hypos.sort(key=lambda h: -h.norm_score)
            hypos = next_hypos[:self.beam_size]
        self.set_state(best_hypo.predictor_state)
        for tok in best_posterior.keys():
            best_posterior[tok] = self.norm_score(
                best_hypo.score + best_posterior[tok], best_hypo.beam_len + 1)
            if tok in self.nonterminals:
                del best_posterior[tok]
        return best_posterior
Example #2
0
    def find_word_beam(self, posterior):
        """
        Do an internal beam search over non-terminal functions to find 
        the next best n terminal tokens, as ranked by normalized path score
        
        Returns: posterior containing up to n terminal tokens 
                 and their normalized path score
        """
        top_tokens = utils.argmax_n(posterior, self.beam_size)
        hypos = [InternalHypo(posterior[tok], self.get_state(), tok) 
                 for tok in top_tokens if tok in self.nonterminals]
        best_hypo = InternalHypo(utils.NEG_INF, None, None)
        best_posterior = None
        while hypos and hypos[0].norm_score > best_hypo.norm_score:
            next_hypos = []
            for hypo in hypos:
                self.set_state(copy.deepcopy(hypo.predictor_state))
                self.consume(hypo.word_to_consume)
                new_post = self.predict_next(predicting_next_word=True)
                top_tokens = utils.argmax_n(new_post, self.beam_size)
                next_state = copy.deepcopy(self.get_state())
                new_norm_score = self.norm_score(
                    new_post[top_tokens[0]] + hypo.score, hypo.beam_len + 1)
                if (self.are_best_terminal(new_post) and
                    new_norm_score > best_hypo.norm_score):
                    best_hypo = copy.deepcopy(hypo)
                    best_hypo.predictor_state = next_state
                    best_hypo.norm_score = new_norm_score
                    best_posterior = new_post
                    self.norm_score(best_hypo)
                else:
                    if hypo.beam_len == self.max_internal_len:
                        logging.info('cutting off internal hypo - too long')
                        continue
                    for tok in top_tokens:
                        if tok in self.nonterminals:
                            new_hypo = copy.deepcopy(hypo)
                            new_hypo.extend(new_post[tok], next_state, tok)
                            next_hypos.append(new_hypo)

            map(self.norm_hypo_score, next_hypos)
            next_hypos.sort(key=lambda h: -h.norm_score)
            hypos = next_hypos[:self.beam_size]
        self.set_state(best_hypo.predictor_state)
        for tok in best_posterior.keys():
            best_posterior[tok] = self.norm_score(
                best_hypo.score + best_posterior[tok], best_hypo.beam_len + 1)
            if tok in self.nonterminals:
                del best_posterior[tok]
        return best_posterior
 def _expand_hypo(self, hypo):
     """Expands hypothesis by calling predict_next() only on one
     single predictor.
     """
     if hypo.score <= self.min_score:
         return []
     pred_idx = 0
     for idx, s in enumerate(hypo.predictor_states):
         if not s is None:
             pred_idx = idx
             break
     self.apply_predictors_count += 1
     predictor = self.predictors[pred_idx][0]
     predictor.set_state(copy.deepcopy(hypo.predictor_states[pred_idx]))
     if not hypo.word_to_consume is None:  # Consume if cheap expand
         predictor.consume(hypo.word_to_consume)
         hypo.word_to_consume = None
     posterior = predictor.predict_next()
     hypo.predictor_states = list(hypo.predictor_states)
     hypo.predictor_states[pred_idx] = predictor.get_state()
     breakdown_dummy = [(0.0, 1.0)] * len(self.predictors)
     ret = []
     for trgt_word in utils.argmax_n(posterior, self.beam_size):
         score_breakdown = list(breakdown_dummy)
         score_breakdown[pred_idx] = (posterior[trgt_word], 1.0)
         ret.append(
             hypo.cheap_expand(trgt_word, posterior[trgt_word],
                               score_breakdown))
     return ret
Example #4
0
 def _create_short_list(self, logits):
     """Creates a set of tokens which are likely translations."""
     words = set()
     filt_logits = logits[self.min_id:]
     for strat in self.shortlist_strategies:
         if strat[:3] == "top":
             n = int(strat[3:])
             words.update(utils.argmax_n(filt_logits, n))
         elif strat[:4] == "prob":
             p = float(strat[4:])
             unnorm_probs = np.exp(filt_logits)
             threshold = np.sum(unnorm_probs) * p
             acc = 0.0
             for word in np.argsort(filt_logits)[::-1]:
                 acc += unnorm_probs[word]
                 words.add(word)
                 if acc >= threshold:
                     break
         else:
             raise AttributeError("Unknown shortlist strategy '%s'" % strat)
     if self.min_id:
         words = set(w+self.min_id for w in words)
     try:
         words.remove(utils.EOS_ID)
     except KeyError:
         pass
     return words
Example #5
0
 def _expand_hypo(self, hypo):
     """Expands hypothesis by calling predict_next() only on one
     single predictor.
     """
     if hypo.score <= self.min_score:
         return []
     pred_idx = 0
     for idx, s in enumerate(hypo.predictor_states):
         if not s is None:
             pred_idx = idx
             break
     self.apply_predictors_count += 1
     predictor = self.predictors[pred_idx][0]
     predictor.set_state(copy.deepcopy(hypo.predictor_states[pred_idx]))
     if not hypo.word_to_consume is None: # Consume if cheap expand
         predictor.consume(hypo.word_to_consume)
         hypo.word_to_consume = None
     posterior = predictor.predict_next()
     hypo.predictor_states = list(hypo.predictor_states)
     hypo.predictor_states[pred_idx] = predictor.get_state()
     breakdown_dummy = [(0.0, 1.0)] * len(self.predictors)
     ret = []
     for trgt_word in utils.argmax_n(posterior, self.beam_size):
         score_breakdown = list(breakdown_dummy)
         score_breakdown[pred_idx] = (posterior[trgt_word], 1.0)
         ret.append(hypo.cheap_expand(trgt_word,
                                      posterior[trgt_word],
                                      score_breakdown))
     return ret
Example #6
0
 def _create_short_list(self, logits):
     """Creates a set of tokens which are likely translations."""
     words = set()
     filt_logits = logits[self.min_id:]
     for strat in self.shortlist_strategies:
         if strat[:3] == "top":
             n = int(strat[3:])
             words.update(utils.argmax_n(filt_logits, n))
         elif strat[:4] == "prob":
             p = float(strat[4:])
             unnorm_probs = np.exp(filt_logits)
             threshold = np.sum(unnorm_probs) * p
             acc = 0.0
             for word in np.argsort(filt_logits)[::-1]:
                 acc += unnorm_probs[word]
                 words.add(word)
                 if acc >= threshold:
                     break
         else:
             raise AttributeError("Unknown shortlist strategy '%s'" % strat)
     if self.min_id:
         words = set(w + self.min_id for w in words)
     try:
         words.remove(utils.EOS_ID)
     except KeyError:
         pass
     return words
Example #7
0
 def _get_next_hypos_maxent(self, hypos, scores):
     """Get hypotheses of the next time step.
     
     Args:
         hypos (list): List of hypotheses
         scores (list): hypo scores with heuristic estimates
     
     Return:
         list. List with hypotheses.
     """
     # Update self.maxent_ngram_mass
     for hypo_score, hypo in zip(scores, hypos):
         s = hypo.trgt_sentence
         h = s[:-1]
         l = len(s)
         if l <= self.maxent_processed_length:
             continue
         # TODO: Could be more efficient by checking is_sublist for
         # all orders in one pass
         for order in xrange(min(len(s), self.max_order),
                             self.min_order - 1, -1):
             ngram = s[-order:]
             # Do not use this ngram if it occurs before
             if is_sublist(ngram, h):
                 break  # All lower order ngrams are too
             prev_mass = self.maxent_ngram_mass.get(ngram)
             if prev_mass is None:
                 updated_mass = hypo_score
             else:
                 updated_mass = max(
                     prev_mass, hypo_score,
                     np.log(np.exp(prev_mass) + np.exp(hypo_score)))
             self.maxent_ngram_mass.add(ngram, updated_mass)
     self.maxent_processed_length += 1
     exp_counts = []
     for hypo in hypos:
         s = hypo.trgt_sentence
         l = len(s)
         cnt = 0.0
         for order in xrange(self.min_order, self.max_order + 1):
             for start in xrange(l - order + 1):
                 logprob = self.maxent_ngram_mass.get(s[start:start +
                                                        order])
                 # MaxEnt means that we estimate the probability of the
                 # ngram as p + (1-p) * 0.5 ie.
                 if logprob:
                     cnt += 1.0 + np.exp(logprob)
                 else:
                     cnt += 1.0
         exp_counts.append(cnt * 0.5)
     next_hypos = []
     for idx in utils.argmax_n(exp_counts, self.beam_size):
         hypos[idx].bleu = exp_counts[idx]
         next_hypos.append(hypos[idx])
         logging.debug(
             "Selected (score=%f expected_counts=%f): %s" %
             (scores[idx], hypos[idx].bleu, hypos[idx].trgt_sentence))
     return next_hypos
Example #8
0
 def are_best_terminal(self, posterior):
     """Return true if most probable tokens in posterior are all terminals
     (including EOS)
     """
     best_rule_ids = utils.argmax_n(posterior, self.beam_size)
     for tok in best_rule_ids:
         if tok in self.nonterminals:
             return False
     return True
Example #9
0
 def _get_next_hypos_renorm(self, hypos, scores):
     """Get hypotheses of the next time step.
     
     Args:
         hypos (list): List of hypotheses
         scores (list): hypo scores with heuristic estimates
     
     Return:
         list. List with hypotheses.
     """
     probs = (1.0 - self.smooth_factor) * np.exp(
         scores - utils.log_sum(scores)) \
         + self.smooth_factor / float(len(scores))
     lengths = [len(hypo.trgt_sentence) for hypo in hypos]
     logging.debug("%d candidates min_length=%d max_length=%d" %
                   (len(lengths), min(lengths), max(lengths)))
     ngrams = []
     for hypo in hypos:
         ngram_list = []
         for order in xrange(self.min_order, self.max_order + 1):
             ngram_list.append(
                 set([
                     " ".join(
                         map(str, hypo.trgt_sentence[start:start + order]))
                     for start in xrange(len(hypo.trgt_sentence))
                 ]))
         ngrams.append(ngram_list)
     exp_bleus = []
     for hyp_ngrams, hyp_length in zip(ngrams, lengths):
         precisions = np.array([
             self._compute_bleu(hyp_ngrams, ref_ngrams, hyp_length,
                                ref_length)
             for ref_ngrams, ref_length in zip(ngrams, lengths)
         ])
         exp_bleus.append(precisions * probs)
     next_hypos = []
     if self.selection_strategy == 'oracle_bleu':
         for _ in xrange(min(self.beam_size, len(hypos))):
             idx = np.argmax(np.sum(exp_bleus, axis=1))
             bleu = np.sum(exp_bleus[idx])
             logging.debug("Selected (score=%f expected_bleu=%f): %s" %
                           (scores[idx], bleu, hypos[idx].trgt_sentence))
             hypos[idx].bleu = -bleu
             next_hypos.append(hypos[idx])
             gained_bleus = exp_bleus[idx]
             for update_idx in xrange(len(exp_bleus)):
                 exp_bleus[update_idx] = np.maximum(exp_bleus[update_idx],
                                                    gained_bleus)
     else:  # selection strategy 'bleu'
         total_exp_bleus = np.sum(exp_bleus, axis=1)
         for idx in utils.argmax_n(total_exp_bleus, self.beam_size):
             hypos[idx].bleu = total_exp_bleus[idx]
             next_hypos.append(hypos[idx])
             logging.debug(
                 "Selected (score=%f expected_bleu=%f): %s" %
                 (scores[idx], hypos[idx].bleu, hypos[idx].trgt_sentence))
     return next_hypos
Example #10
0
 def are_best_terminal(self, posterior):
     """Return true if most probable tokens in posterior are all terminals
     (including EOS)
     """
     best_rule_ids = utils.argmax_n(posterior, self.beam_size)
     for tok in best_rule_ids:
         if tok in self.nonterminals:
             return False
     return True
Example #11
0
 def _get_next_hypos_maxent(self, hypos, scores):
     """Get hypotheses of the next time step.
     
     Args:
         hypos (list): List of hypotheses
         scores (list): hypo scores with heuristic estimates
     
     Return:
         list. List with hypotheses.
     """
     # Update self.maxent_ngram_mass
     for hypo_score, hypo in zip(scores, hypos):
         s = hypo.trgt_sentence
         h = s[:-1]
         l = len(s)
         if l <= self.maxent_processed_length:
             continue
         # TODO: Could be more efficient by checking is_sublist for
         # all orders in one pass
         for order in xrange(min(len(s), self.max_order), 
                             self.min_order-1,
                             -1):
             ngram = s[-order:]
             # Do not use this ngram if it occurs before
             if is_sublist(ngram, h):
                 break # All lower order ngrams are too
             prev_mass = self.maxent_ngram_mass.get(ngram)
             if prev_mass is None:
                 updated_mass = hypo_score
             else:
                 updated_mass = max(prev_mass, hypo_score, 
                         np.log(np.exp(prev_mass)+np.exp(hypo_score)))
             self.maxent_ngram_mass.add(ngram, updated_mass)
     self.maxent_processed_length += 1
     exp_counts = []
     for hypo in hypos:
         s = hypo.trgt_sentence
         l = len(s)
         cnt = 0.0
         for order in xrange(self.min_order, self.max_order+1):
             for start in xrange(l-order+1):
                 logprob = self.maxent_ngram_mass.get(s[start:start+order])
                 # MaxEnt means that we estimate the probability of the 
                 # ngram as p + (1-p) * 0.5 ie.
                 if logprob:
                     cnt += 1.0 + np.exp(logprob)
                 else:
                     cnt += 1.0
         exp_counts.append(cnt * 0.5)
     next_hypos = []
     for idx in utils.argmax_n(exp_counts, self.beam_size):
         hypos[idx].bleu = exp_counts[idx]
         next_hypos.append(hypos[idx])
         logging.debug("Selected (score=%f expected_counts=%f): %s"
             % (scores[idx], hypos[idx].bleu, hypos[idx].trgt_sentence))
     return next_hypos
Example #12
0
    def apply_predictors(self, top_n=0):
        """Get the distribution over the next word by combining the
        predictor scores.

        Args:
            top_n (int): If positive, return only the best n words.
        
        Returns:
            combined,score_breakdown: Two dicts. ``combined`` maps 
            target word ids to the combined score, ``score_breakdown``
            contains the scores for each predictor separately 
            represented as tuples (unweighted_score, predictor_weight)
        """
        self.apply_predictors_count += 1
        bounded_predictors = [
            el for el in self.predictors
            if not isinstance(el[0], UnboundedVocabularyPredictor)
        ]
        # Get bounded posteriors
        bounded_posteriors = [
            p.predict_next() for (p, _) in bounded_predictors
        ]
        non_zero_words = self._get_non_zero_words(bounded_predictors,
                                                  bounded_posteriors)
        if not non_zero_words:  # Special case: no word is possible
            non_zero_words = set([utils.EOS_ID])
        # Add unbounded predictors and unk probabilities
        posteriors = []
        unk_probs = []
        pred_weights = []
        bounded_idx = 0
        for (p, w) in self.predictors:
            if isinstance(p, UnboundedVocabularyPredictor):
                posterior = p.predict_next(non_zero_words)
            else:  # Take it from the bounded_* variables
                posterior = bounded_posteriors[bounded_idx]
                bounded_idx += 1
            posteriors.append(posterior)
            unk_probs.append(p.get_unk_probability(posterior))
            pred_weights.append(w)
        pred_weights = self.apply_interpolation_strategy(
            pred_weights, non_zero_words, posteriors, unk_probs)
        ret = self.combine_posteriors(non_zero_words, posteriors, unk_probs,
                                      pred_weights, top_n)
        if not self.allow_unk_in_output and utils.UNK_ID in ret[0]:
            del ret[0][utils.UNK_ID]
            del ret[1][utils.UNK_ID]
        if top_n > 0 and len(ret[0]) > top_n:
            top = utils.argmax_n(ret[0], top_n)
            ret = ({w: ret[0][w] for w in top}, {w: ret[1][w] for w in top})
        self.notify_observers(ret, message_type=MESSAGE_TYPE_POSTERIOR)
        return ret
Example #13
0
 def initialize_internal_hypos(self, posterior):
     top_tokens = utils.argmax_n(posterior, self.beam_size)
     hypos = []
     top_terminals = []
     for tok in top_tokens:
         new_hypo = InternalHypo(posterior[tok], posterior[tok],
                                 copy.deepcopy(self.predictor.get_state()),
                                 tok)
         if tok not in self.nonterminals:
             self.tok_to_hypo[tok] = new_hypo
             top_terminals.append(tok)
         hypos.append(new_hypo)
     return hypos, top_terminals
Example #14
0
 def _get_next_hypos_renorm(self, hypos, scores):
     """Get hypotheses of the next time step.
     
     Args:
         hypos (list): List of hypotheses
         scores (list): hypo scores with heuristic estimates
     
     Return:
         list. List with hypotheses.
     """
     probs = (1.0 - self.smooth_factor) * np.exp(
         scores - utils.log_sum(scores)) \
         + self.smooth_factor / float(len(scores))
     lengths = [len(hypo.trgt_sentence) for hypo in hypos]
     logging.debug("%d candidates min_length=%d max_length=%d" % 
         (len(lengths), min(lengths), max(lengths)))
     ngrams = []
     for hypo in hypos:
         ngram_list = []
         for order in xrange(self.min_order, self.max_order+1):
             ngram_list.append(set([
                 " ".join(map(str, hypo.trgt_sentence[start:start+order]))
                 for start in xrange(len(hypo.trgt_sentence))]))
         ngrams.append(ngram_list)
     exp_bleus = []
     for hyp_ngrams, hyp_length in zip(ngrams, lengths):
         precisions = np.array([self._compute_bleu(
                 hyp_ngrams, ref_ngrams, hyp_length, ref_length)
             for ref_ngrams, ref_length in zip(ngrams, lengths)])
         exp_bleus.append(precisions * probs)
     next_hypos = []
     if self.selection_strategy == 'oracle_bleu': 
         for _ in xrange(min(self.beam_size, len(hypos))):
             idx = np.argmax(np.sum(exp_bleus, axis=1))
             bleu = np.sum(exp_bleus[idx])
             logging.debug("Selected (score=%f expected_bleu=%f): %s"
                     % (scores[idx], bleu, hypos[idx].trgt_sentence))
             hypos[idx].bleu = -bleu
             next_hypos.append(hypos[idx])
             gained_bleus = exp_bleus[idx]
             for update_idx in xrange(len(exp_bleus)):
                 exp_bleus[update_idx] = np.maximum(exp_bleus[update_idx], 
                                                    gained_bleus)
     else: # selection strategy 'bleu'
         total_exp_bleus = np.sum(exp_bleus, axis=1)
         for idx in utils.argmax_n(total_exp_bleus, self.beam_size):
             hypos[idx].bleu = total_exp_bleus[idx]
             next_hypos.append(hypos[idx])
             logging.debug("Selected (score=%f expected_bleu=%f): %s"
                 % (scores[idx], hypos[idx].bleu, hypos[idx].trgt_sentence))
     return next_hypos
Example #15
0
 def initialize_internal_hypos(self, posterior):
     top_tokens = utils.argmax_n(posterior, self.beam_size)
     hypos = []
     top_terminals = []
     for tok in top_tokens:
         new_hypo = InternalHypo(posterior[tok],
                                 posterior[tok],
                                 copy.deepcopy(self.predictor.get_state()),
                                 tok)
         if tok not in self.nonterminals:
             self.tok_to_hypo[tok] = new_hypo
             top_terminals.append(tok)
         hypos.append(new_hypo)
     return hypos, top_terminals
Example #16
0
    def apply_predictors(self, top_n=0):
        """Get the distribution over the next word by combining the
        predictor scores.

        Args:
            top_n (int): If positive, return only the best n words.
        
        Returns:
            combined,score_breakdown: Two dicts. ``combined`` maps 
            target word ids to the combined score, ``score_breakdown``
            contains the scores for each predictor separately 
            represented as tuples (unweighted_score, predictor_weight)
        """
        self.apply_predictors_count += 1
        bounded_predictors = [el for el in self.predictors 
                        if not isinstance(el[0], UnboundedVocabularyPredictor)]
        # Get bounded posteriors
        bounded_posteriors = [p.predict_next() for (p, _) in bounded_predictors]
        non_zero_words = self._get_non_zero_words(bounded_predictors,
                                                  bounded_posteriors)
        if not non_zero_words: # Special case: no word is possible
            non_zero_words = set([utils.EOS_ID])
        # Add unbounded predictors and unk probabilities
        posteriors = []
        unk_probs = []
        pred_weights = []
        bounded_idx = 0
        for (p, w) in self.predictors:
            if isinstance(p, UnboundedVocabularyPredictor):
                posterior = p.predict_next(non_zero_words)
            else: # Take it from the bounded_* variables
                posterior = bounded_posteriors[bounded_idx]
                bounded_idx += 1
            posteriors.append(posterior)
            unk_probs.append(p.get_unk_probability(posterior))
            pred_weights.append(w)
        pred_weights = self.apply_interpolation_strategy(
                pred_weights, non_zero_words, posteriors, unk_probs)
        ret = self.combine_posteriors(
            non_zero_words, posteriors, unk_probs, pred_weights, top_n)
        if not self.allow_unk_in_output and utils.UNK_ID in ret[0]:
            del ret[0][utils.UNK_ID]
            del ret[1][utils.UNK_ID]
        if top_n > 0 and len(ret[0]) > top_n:
            top = utils.argmax_n(ret[0], top_n)
            ret = ({w: ret[0][w] for w in top},
                   {w: ret[1][w] for w in top})
        self.notify_observers(ret, message_type = MESSAGE_TYPE_POSTERIOR)
        return ret
Example #17
0
 def _combine_posteriors_norm_none(self,
                                    non_zero_words,
                                   posteriors,
                                   unk_probs,
                                   top_n=0):
     """Combine predictor posteriors according the normalization
     scheme ``CLOSED_VOCAB_SCORE_NORM_NONE``. For more information
     on closed vocabulary predictor score normalization see the 
     documentation on the ``CLOSED_VOCAB_SCORE_NORM_*`` vars.
     
     Args:
         non_zero_words (set): All words with positive probability
         posteriors: Predictor posterior distributions calculated
                     with ``predict_next()``
         unk_probs: UNK probabilities of the predictors, calculated
                    with ``get_unk_probability``
         top_n (int): If positive, return only top n words
     
     Returns:
         combined,score_breakdown: like in ``apply_predictors()``
     """
     if isinstance(non_zero_words, xrange) and top_n > 0:
         n_words = len(non_zero_words)
         scaled_posteriors = []
         for posterior, unk_prob, (_, weight) in zip(
                       posteriors, unk_probs, self.predictors):
             if isinstance(posterior, dict):
                 arr = np.full(n_words, unk_prob)
                 for word, score in posterior.iteritems():
                     arr[word] = score
                 scaled_posteriors.append(arr * weight)
             else:
                 n_unks = n_words - len(posterior)
                 if n_unks:
                     posterior = np.concatenate((
                            posterior, np.full(n_unks, unk_prob)))
                 scaled_posteriors.append(posterior * weight)
         combined_scores = np.sum(scaled_posteriors, axis=0)
         non_zero_words = utils.argmax_n(combined_scores, top_n)
     combined = {}
     score_breakdown = {}
     for trgt_word in non_zero_words:
         preds = [(utils.common_get(posteriors[idx],
                                    trgt_word, unk_probs[idx]), w)
                     for idx, (_,w) in enumerate(self.predictors)]
         combined[trgt_word] = self.combi_predictor_method(preds) 
         score_breakdown[trgt_word] = preds
     return combined, score_breakdown
Example #18
0
 def _expand_hypo(self, hypo):
     self.set_predictor_states(copy.deepcopy(hypo.predictor_states))
     if not hypo.word_to_consume is None:  # Consume if cheap expand
         self.consume(hypo.word_to_consume)
         hypo.word_to_consume = None
     posterior, score_breakdown = self.apply_predictors()
     hypo.predictor_states = self.get_predictor_states()
     top = utils.argmax_n(posterior, self.beam_size)
     # EOS hypo
     eos_hypo = hypo.cheap_expand(utils.EOS_ID, posterior[utils.EOS_ID],
                                  score_breakdown[utils.EOS_ID])
     self.add_full_hypo(eos_hypo.generate_full_hypothesis())
     # All other hypos
     return [
         hypo.cheap_expand(trgt_word, posterior[trgt_word],
                           score_breakdown[trgt_word]) for trgt_word in top
         if trgt_word != utils.EOS_ID
     ]
Example #19
0
    def _expand_hypo_nmt(self, hypo):
        """Get the best beam size expansions of ``hypo`` by one CHAR based on nmt predictor scores only.
        
        Args:
        hypo (PartialHypothesis): Hypothesis to expans
        
        Returns:
        list. List of child hypotheses
        """
        if hypo.score <= self.min_score:
            return []
        self.set_predictor_states(copy.deepcopy(hypo.predictor_states))
        #        self.set_predictor_states(hypo.predictor_states)
        if not hypo.word_to_consume is None:  # Consume if cheap expand
            self.consume(hypo.word_to_consume)
            hypo.word_to_consume = None
        posterior, score_breakdown = self.apply_predictors()
        #        self.apply_predictors()
        hypo.predictor_states = self.get_predictor_states()
        #        nmt_only_scores = {k: sum([v[i][0] for i,s in enumerate(v) if self.predictor_names[i]=="nmt"]) for k, v in score_breakdown.items()}
        #        nmt_only_scores = np.array([v for k,v in sorted(nmt_only_scores.items(),key=lambda v:v[0])])
        #
        nmt_only_scores = np.array([
            sum([
                v[i][0] for i, s in enumerate(v)
                if self.predictor_names[i] == "nmt"
            ]) for k, v in sorted(score_breakdown.items(), key=lambda t: t[0])
        ])

        #        nmt_only_scores = np.array([sum([v[i][0] for i,s in enumerate(v) if self.predictor_names[i]=="nmt"]) for k,v in sorted(self.score_breakdown.items(),key=lambda t:t[0])])

        #        logging.debug(u'score_breakdown.items(): {}'.format(score_breakdown.items()))
        #        logging.debug(u'next nmt scores: {}'.format(nmt_only_scores))
        top = utils.argmax_n(nmt_only_scores, self.beam_size)
        #        char_only_scores = {k: sum([v[i][0] for i,s in enumerate(v) if self.predictor_levels[i]=="c"]) for k, v in score_breakdown.items()}
        #        top = utils.argmax_n(char_only_scores, self.beam_size)
        return [
            hypo.cheap_expand(trgt_word, posterior[trgt_word],
                              score_breakdown[trgt_word]) for trgt_word in top
        ]
Example #20
0
    def _expand_hypo(self, hypo):
        """Get the best beam size expansions of ``hypo``.

        Args:
            hypo (SimPartialHypothesis): Hypothesis to expand

        Returns:
            list. List of child hypotheses
        """
        if hypo.score <= self.min_score:
            return []
        self.set_predictor_states(copy.deepcopy(hypo.predictor_states))
        if not hypo.word_to_consume is None:  # Consume if cheap expand
            self.consume(hypo.word_to_consume)
            hypo.word_to_consume = None
        posterior, score_breakdown = self.apply_predictors(self.beam_size)
        hypo.predictor_states = self.get_predictor_states()
        top = utils.argmax_n(posterior, self.beam_size)
        return [
            hypo.cheap_expand(trgt_word, posterior[trgt_word],
                              score_breakdown[trgt_word]) for trgt_word in top
        ]
Example #21
0
 def decode(self, src_sentence):
     """Decodes a single source sentence using beam search. """
     self.initialize_predictors(src_sentence)
     hypos = [PartialHypothesis(self.get_predictor_states())]
     it = 0
     while self.stop_criterion(hypos):
         if it > 2*len(src_sentence): # prevent infinite loops
             break
         it = it + 1
         next_hypos = []
         next_scores = []
         for hypo in hypos:
             if hypo.get_last_word() == utils.EOS_ID:
                 next_hypos.append(hypo)
                 next_scores.append(hypo.score)
                 continue 
             self.set_predictor_states(copy.deepcopy(hypo.predictor_states))
             if not hypo.word_to_consume is None: # Consume if cheap expand
                 self.consume(hypo.word_to_consume)
             posterior,score_breakdown = self.apply_predictors()
             hypo.predictor_states = self.get_predictor_states()
             top = utils.argmax_n(posterior, self.beam_size)
             for trgt_word in top:
                 next_hypo = hypo.cheap_expand(trgt_word,
                                               posterior[trgt_word],
                                               score_breakdown[trgt_word])
                 next_hypos.append(next_hypo)
                 next_scores.append(next_hypo.score)
         hypos = [next_hypos[idx] 
                     for idx in np.argsort(next_scores)[-self.beam_size:]]
     ret = [hypos[-idx-1].generate_full_hypothesis() 
             for idx in xrange(len(hypos)) 
                  if hypos[-idx-1].get_last_word() == utils.EOS_ID]
     if not ret:
         logging.warn("No complete hypotheses found for %s" % src_sentence)
         return [hypos[-idx-1].generate_full_hypothesis() 
                     for idx in xrange(len(hypos))]
     return ret
Example #22
0
 def _scale_combine_non_zero_scores(non_zero_word_count,
                                    posteriors,
                                    unk_probs,
                                    pred_weights,
                                    top_n=0):
     scaled_posteriors = []
     for posterior, unk_prob, weight in zip(posteriors, unk_probs,
                                            pred_weights):
         if isinstance(posterior, dict):
             arr = np.full(non_zero_word_count, unk_prob)
             for word, score in posterior.iteritems():
                 arr[word] = score
             scaled_posteriors.append(arr * weight)
         else:
             n_unks = non_zero_word_count - len(posterior)
             if n_unks > 0:
                 posterior = np.concatenate(
                     (posterior, np.full(n_unks, unk_prob)))
             elif n_unks < 0:
                 posterior = posterior[:n_unks]
             scaled_posteriors.append(posterior * weight)
     combined_scores = np.sum(scaled_posteriors, axis=0)
     return utils.argmax_n(combined_scores, top_n)
Example #23
0
 def _scale_combine_non_zero_scores(non_zero_word_count,
                                    posteriors,
                                    unk_probs,
                                    pred_weights,
                                    top_n=0):
   scaled_posteriors = []
   for posterior, unk_prob, weight in zip(
           posteriors, unk_probs, pred_weights):
       if isinstance(posterior, dict):
           arr = np.full(non_zero_word_count, unk_prob)
           for word, score in posterior.iteritems():
               arr[word] = score
           scaled_posteriors.append(arr * weight)
       else:
           n_unks = non_zero_word_count - len(posterior)
           if n_unks > 0:
               posterior = np.concatenate((
                   posterior, np.full(n_unks, unk_prob)))
           elif n_unks < 0:
               posterior = posterior[:n_unks]
           scaled_posteriors.append(posterior * weight)
   combined_scores = np.sum(scaled_posteriors, axis=0)
   return utils.argmax_n(combined_scores, top_n)
Example #24
0
 def maybe_add_new_top_tokens(self, top_terminals, hypo, next_hypos):
     new_post = self.predict_next(predicting_internally=True)
     top_tokens = utils.argmax_n(new_post, self.beam_size)
     next_state = copy.deepcopy(self.predictor.get_state())
     for tok in top_tokens:
         score = hypo.score + new_post[tok]
         new_hypo = InternalHypo(score, new_post[tok], next_state, tok)
         if tok not in self.nonterminals:
             add_hypo = False
             found = False
             for t in top_terminals:
                 if t == tok:
                     found = True
                     if self.tok_to_hypo[tok].score < new_hypo.score:
                         add_hypo = True
                         top_terminals.remove(t)
                     break
             if not found:
                 add_hypo = True
             if add_hypo:
                 top_terminals.append(tok)
                 self.tok_to_hypo[tok] = new_hypo
         else:
             next_hypos.append(new_hypo)
Example #25
0
 def maybe_add_new_top_tokens(self, top_terminals, hypo, next_hypos):
     new_post = self.predict_next(predicting_internally=True)
     top_tokens = utils.argmax_n(new_post, self.beam_size)
     next_state = copy.deepcopy(self.predictor.get_state())
     for tok in top_tokens:
         score = hypo.score + new_post[tok]
         new_hypo = InternalHypo(score, new_post[tok], next_state, tok)
         if tok not in self.nonterminals:
             add_hypo = False
             found = False
             for t in top_terminals:
                 if t == tok:
                     found = True
                     if self.tok_to_hypo[tok].score < new_hypo.score:
                         add_hypo = True
                         top_terminals.remove(t)
                     break
             if not found:
                 add_hypo = True
             if add_hypo:
                 top_terminals.append(tok)
                 self.tok_to_hypo[tok] = new_hypo
         else:
             next_hypos.append(new_hypo)