Ejemplo n.º 1
0
 def predict_next(self, trgt_words):
     """Pass through to slave predictor """
     posterior = self.slave_predictor.predict_next([self.trgt_map[w] 
                                                    for w in trgt_words])
     return {self.trgt_map_inverse.get(idx,
                                       utils.UNK_ID): self.slave_weight*prob 
                         for idx, prob in utils.common_iterable(posterior)}
Ejemplo n.º 2
0
 def _expand_hypo(self, hypo):
     """Get the best beam size expansions of ``hypo``.
     
     Args:
         hypo (PartialHypothesis): Hypothesis to expand        
     Returns:
         list. List of child hypotheses
     """
     self.set_predictor_states(copy.deepcopy(hypo.predictor_states))
     if not hypo.word_to_consume is None: # Consume if cheap expand
         self.consume(hypo.word_to_consume)
         hypo.word_to_consume = None
     posterior, score_breakdown = self.apply_predictors()
     hypo.predictor_states = self.get_predictor_states()
     expanded_hypos = [hypo.cheap_expand(w, s, score_breakdown[w]) 
                       for w, s in utils.common_iterable(posterior)]
     for expanded_hypo in expanded_hypos:
         if 'prev_score' in self.breakdown2score_kwargs:
             self.breakdown2score_kwargs['prev_score'] = expanded_hypo.score_minus_last
         expanded_hypo.score = self.breakdown2score(
             expanded_hypo.score,
             expanded_hypo.score_breakdown,
             **self.breakdown2score_kwargs)
     expanded_hypos.sort(key=lambda x: -x.score)
     return expanded_hypos[:self.beam_size]
Ejemplo n.º 3
0
 def predict_next(self):
     """Pass through to slave predictor """
     if not self.trgt_map:
         return self.slave_predictor.predict_next()
     posterior = self.slave_predictor.predict_next()
     return {self.trgt_map_inverse.get(idx, utils.UNK_ID): self.slave_weight * prob 
         for idx, prob in utils.common_iterable(posterior)}
Ejemplo n.º 4
0
 def _expand_hypo(self, hypo):
     """Get the best beam size expansions of ``hypo``.
     
     Args:
         hypo (PartialHypothesis): Hypothesis to expand        
     Returns:
         list. List of child hypotheses
     """
     self.set_predictor_states(copy.deepcopy(hypo.predictor_states))
     if not hypo.word_to_consume is None:  # Consume if cheap expand
         self.consume(hypo.word_to_consume)
         hypo.word_to_consume = None
     posterior, score_breakdown = self.apply_predictors()
     hypo.predictor_states = self.get_predictor_states()
     expanded_hypos = [
         hypo.cheap_expand(w, s, score_breakdown[w])
         for w, s in utils.common_iterable(posterior)
     ]
     for expanded_hypo in expanded_hypos:
         if 'prev_score' in self.breakdown2score_kwargs:
             self.breakdown2score_kwargs[
                 'prev_score'] = expanded_hypo.score_minus_last
         expanded_hypo.score = self.breakdown2score(
             expanded_hypo.score, expanded_hypo.score_breakdown,
             **self.breakdown2score_kwargs)
     expanded_hypos.sort(key=lambda x: -x.score)
     return expanded_hypos[:self.beam_size]
Ejemplo n.º 5
0
 def score2rank(self, scores):
     if isinstance(scores, dict):
         l = list(utils.common_iterable(scores))
         l.sort(key=operator.itemgetter(1), reverse=True)
         return {el[0]: -(r+1) for r, el in enumerate(l)}
     # scores is a list or numpy array.
     indices = np.argsort(-scores)
     ranks = np.empty_like(scores)
     ranks[indices] =  np.arange(-1, -len(scores)-1, -1, dtype=scores.dtype)
     return ranks
Ejemplo n.º 6
0
 def _dfs(self, partial_hypo):
     """Recursive function for doing dfs. Note that we do not keep
     track of the predictor states inside ``partial_hypo``, because 
     at each call of ``_dfs`` the current predictor states are equal
     to the hypo predictor states.
     
     Args:
         partial_hypo (PartialHypothesis): Partial hypothesis 
                                           generated so far. 
     """
     partial_hypo_length = len(partial_hypo.trgt_sentence)
     self.apply_predictors_count += 1
     posterior = self.dfs_predictor.predict_next()
     logging.debug("Expand: exp: %d partial_score: "
                   "%f sentence: %s" %
                   (self.apply_predictors_count, partial_hypo.score,
                    partial_hypo.trgt_sentence))
     # Check EOS
     eos_score = posterior[utils.EOS_ID]
     if (self.len_enabled[partial_hypo_length]
             and partial_hypo.score + eos_score >
             self.len_lower_bounds[partial_hypo_length]):
         eos_hypo = partial_hypo.expand(
             utils.EOS_ID,
             None,  # Do not store states
             eos_score,
             [(eos_score, 1.0)])
         logging.info("New best: len: %d score: %f -> %f exp: %d" %
                      (partial_hypo_length,
                       self.len_lower_bounds[partial_hypo_length],
                       eos_hypo.score, self.apply_predictors_count))
         self.len_lower_bounds[partial_hypo_length] = eos_hypo.score
         self.len_best_hypos[partial_hypo_length] = eos_hypo
         self._update_min_lower_bounds()
     if partial_hypo_length >= self.max_len:
         return
     first_expansion = True
     for trgt_word, score in utils.common_iterable(posterior):
         if trgt_word != utils.EOS_ID:
             lower_bound = self.len_min_lower_bounds[partial_hypo_length +
                                                     1]
             if partial_hypo.score + score > lower_bound:
                 if first_expansion:
                     pred_states = copy.deepcopy(
                         self.get_predictor_states())
                     first_expansion = False
                 else:
                     self.set_predictor_states(copy.deepcopy(pred_states))
                 self.consume(trgt_word)
                 self._dfs(
                     partial_hypo.expand(
                         trgt_word,
                         None,  # Do not store states
                         score,
                         [(score, 1.0)]))
Ejemplo n.º 7
0
 def score2rank(self, scores):
     if isinstance(scores, dict):
         l = list(utils.common_iterable(scores))
         l.sort(key=operator.itemgetter(1), reverse=True)
         return {el[0]: -(r + 1) for r, el in enumerate(l)}
     # scores is a list or numpy array.
     indices = np.argsort(-scores)
     ranks = np.empty_like(scores)
     ranks[indices] = np.arange(-1,
                                -len(scores) - 1,
                                -1,
                                dtype=scores.dtype)
     return ranks
Ejemplo n.º 8
0
 def _get_initial_stubs(self, predictor, start_posterior, min_score):
     """Get the initial predictor stubs for full word search with a 
     single predictor. 
     """
     stubs = []
     pred_state = predictor.get_state()
     for t, s in utils.common_iterable(start_posterior):
         stub = PredictorStub([t], pred_state)
         stub.score_next(s)
         if stub.score >= min_score:
             stubs.append(stub)
     stubs.sort(key=lambda s: s.score, reverse=True)
     return stubs
Ejemplo n.º 9
0
 def _get_initial_stubs(self, predictor, start_posterior, min_score):
     """Get the initial predictor stubs for full word search with a 
     single predictor. 
     """
     stubs = []
     pred_state = predictor.get_state()
     for t, s in utils.common_iterable(start_posterior):
         stub = PredictorStub([t], pred_state)
         stub.score_next(s)
         if stub.score >= min_score:
             stubs.append(stub)
     stubs.sort(key=lambda s: s.score, reverse=True)
     return stubs
Ejemplo n.º 10
0
    def _dfs(self, partial_hypo):
        """Recursive function for doing dfs. Note that we do not keep
        track of the predictor states inside ``partial_hypo``, because 
        at each call of ``_dfs`` the current predictor states are equal
        to the hypo predictor states.
        
        Args:
            partial_hypo (PartialHypothesis): Partial hypothesis 
                                              generated so far. 
        """
        if partial_hypo.get_last_word() == utils.EOS_ID:
            if len(partial_hypo.trgt_sentence) >= self._min_length:
                self.add_full_hypo(partial_hypo.generate_full_hypothesis())
                if partial_hypo.score > self.best_score:
                    self.best_score = partial_hypo.score
                    logging.info("New best: score: %f exp: %d sentence: %s" %
                                 (self.best_score, self.apply_predictors_count,
                                  partial_hypo.trgt_sentence))
            return

        self.apply_predictors_count += 1
        posterior = self.dfs_predictor.predict_next()
        logging.debug("Expand: best_score: %f exp: %d partial_score: "
                      "%f sentence: %s" %
                      (self.best_score, self.apply_predictors_count,
                       partial_hypo.score, partial_hypo.trgt_sentence))
        first_expansion = True
        for trgt_word, score in utils.common_iterable(posterior):
            if partial_hypo.score + score > self.best_score:
                if first_expansion:
                    pred_states = copy.deepcopy(self.get_predictor_states())
                    first_expansion = False
                else:
                    self.set_predictor_states(copy.deepcopy(pred_states))
                self.consume(trgt_word)
                self._dfs(
                    partial_hypo.expand(
                        trgt_word,
                        None,  # Do not store states
                        score,
                        [(score, 1.0)]))
 def predict_next(self):
     """This method first performs beam search internally to update
     the slave predictor state to a point where the best stop_size 
     entries in the predict_next() return value are in-vocabulary
     (bounded by max_id). Then, it returns the slave posterior in 
     that state.
     """
     hypos = [
         SkipvocabInternalHypothesis(0.0, self.slave_predictor.get_state(),
                                     None)
     ]
     best_score = utils.NEG_INF
     best_predictor_state = None
     best_posterior = None
     while hypos and hypos[0].score > best_score:
         next_hypos = []
         for hypo in hypos:
             self.slave_predictor.set_state(
                 copy.deepcopy(hypo.predictor_state))
             if hypo.word_to_consume is not None:
                 self.slave_predictor.consume(hypo.word_to_consume)
             posterior = self.slave_predictor.predict_next()
             pred_state = copy.deepcopy(self.slave_predictor.get_state())
             if (self._is_stopping_posterior(posterior)
                     and hypo.score > best_score):
                 # This is the new best result of the internal beam search
                 best_score = hypo.score
                 best_predictor_state = pred_state
                 best_posterior = posterior
             else:
                 # Look for ways to expand this hypo with OOV words.
                 for word, score in utils.common_iterable(posterior):
                     if word > self.max_id:
                         next_hypos.append(
                             SkipvocabInternalHypothesis(
                                 hypo.score + score, pred_state, word))
         next_hypos.sort(key=lambda h: -h.score)
         hypos = next_hypos[:self.beam]
     self.slave_predictor.set_state(copy.deepcopy(best_predictor_state))
     return best_posterior
Ejemplo n.º 12
0
 def predict_next(self):
     """This method first performs beam search internally to update
     the slave predictor state to a point where the best stop_size 
     entries in the predict_next() return value are in-vocabulary
     (bounded by max_id). Then, it returns the slave posterior in 
     that state.
     """
     hypos = [SkipvocabInternalHypothesis(0.0, 
                                          self.slave_predictor.get_state(),
                                          None)]
     best_score = utils.NEG_INF
     best_predictor_state = None
     best_posterior = None
     while hypos and hypos[0].score > best_score:
         next_hypos = []
         for hypo in hypos:
             self.slave_predictor.set_state(copy.deepcopy(
                 hypo.predictor_state))
             if hypo.word_to_consume is not None:
                 self.slave_predictor.consume(hypo.word_to_consume)
             posterior = self.slave_predictor.predict_next()
             pred_state = copy.deepcopy(self.slave_predictor.get_state())
             if (self._is_stopping_posterior(posterior) 
                     and hypo.score > best_score):
                 # This is the new best result of the internal beam search
                 best_score = hypo.score
                 best_predictor_state = pred_state
                 best_posterior = posterior
             else:
                 # Look for ways to expand this hypo with OOV words.
                 for word, score in utils.common_iterable(posterior):
                     if word > self.max_id:
                         next_hypos.append(SkipvocabInternalHypothesis(
                             hypo.score + score, pred_state, word))
         next_hypos.sort(key=lambda h: -h.score)
         hypos = next_hypos[:self.beam]
     self.slave_predictor.set_state(copy.deepcopy(best_predictor_state))
     return best_posterior
Ejemplo n.º 13
0
 def _search_full_words(self, predictor, start_posterior, tok, min_score):
     stubs = self._get_initial_stubs(predictor, start_posterior, min_score)
     while not self._best_keys_complete(stubs, tok):
         next_stubs = []
         for stub in stubs[:self.beam_size]:
             key = tok.tokens2key(stub.tokens)
             if (not key) or len(key) > self.max_word_len:
                 continue
             if is_key_complete(key):
                 next_stubs.append(stub)
                 continue
             predictor.set_state(copy.deepcopy(stub.pred_state))
             predictor.consume(stub.tokens[-1])
             posterior = predictor.predict_next()
             pred_state = predictor.get_state()
             for t, s in utils.common_iterable(posterior):
                 if t != utils.UNK_ID and not tok.is_word_begin_token(t):
                     child_stub = stub.expand(t, s, pred_state)
                     if child_stub.score >= min_score:
                         next_stubs.append(child_stub)
         stubs = next_stubs
         stubs.sort(key=lambda s: s.score, reverse=True)
     return stubs
Ejemplo n.º 14
0
 def _search_full_words(self, predictor, start_posterior, tok, min_score):
     stubs = self._get_initial_stubs(predictor, start_posterior, min_score)
     while not self._best_keys_complete(stubs, tok):
         next_stubs = []
         for stub in stubs[:self.beam_size]:
             key = tok.tokens2key(stub.tokens)
             if (not key) or len(key) > self.max_word_len:
                 continue
             if is_key_complete(key):
                 next_stubs.append(stub)
                 continue
             predictor.set_state(copy.deepcopy(stub.pred_state))
             predictor.consume(stub.tokens[-1])
             posterior = predictor.predict_next()
             pred_state = predictor.get_state()
             for t, s in utils.common_iterable(posterior):
                 if t != utils.UNK_ID and not tok.is_word_begin_token(t):
                     child_stub = stub.expand(t, s, pred_state)
                     if child_stub.score >= min_score:
                         next_stubs.append(child_stub)
         stubs = next_stubs
         stubs.sort(key=lambda s: s.score, reverse=True)
     return stubs
 def _is_stopping_posterior(self, posterior):
     for word, _ in sorted(utils.common_iterable(posterior),
                           key=lambda h: -h[1])[:self.stop_size]:
         if word > self.max_id:
             return False
     return True
Ejemplo n.º 16
0
 def _is_stopping_posterior(self, posterior):
     for word, _ in sorted(utils.common_iterable(posterior),
                           key=lambda h: -h[1])[:self.stop_size]:
         if word > self.max_id:
             return False
     return True
Ejemplo n.º 17
0
 def _register_bigram_scores(self, last_word, posterior):
     for w, score in utils.common_iterable(posterior):
         self.bigram_scores[last_word][w] = min(
             self.bigram_scores[last_word][w], score)
Ejemplo n.º 18
0
 def _is_stopping_posterior(self, posterior):
     for word, _ in sorted(utils.common_iterable(posterior),
                           key=lambda h: -h[1])[:self.stop_size]:
         if self.vocab_spec.contains(word):
             return False
     return True