コード例 #1
0
ファイル: vocabulary.py プロジェクト: ucam-smt/sgnmt
 def consume(self, word):
     """Pass through to slave predictor """
     if not self.trgt_map:
         self.slave_predictor.consume(word)
     else:
         self.slave_predictor.consume(utils.common_get(
             self.trgt_map, word, utils.UNK_ID))
コード例 #2
0
ファイル: length.py プロジェクト: ucam-smt/sgnmt
 def predict_next(self):
     """Looks up ngram scores via self.scores. """
     cur_hist_length = len(self.history)
     this_scores = [[] for _ in xrange(cur_hist_length+1)]
     this_unk_scores = [[] for _ in xrange(cur_hist_length+1)]
     for pos in xrange(len(self.scores)):
         this_scores[0].append(self.scores[pos])
         this_unk_scores[0].append(self.unk_scores[pos])
         acc = 0.0
         for order, word in enumerate(self.history):
             if pos + order + 1 >= len(self.scores):
                 break
             acc += utils.common_get(
                 self.scores[pos + order], word, 
                 self.unk_scores[pos + order])
             this_scores[order+1].append(acc + self.scores[pos + order + 1])
             this_unk_scores[order+1].append(
                 acc + self.unk_scores[pos + order + 1])
     combined_scores = []
     combined_unk_scores = []
     for order, (scores, unk_scores) in enumerate(zip(this_scores, 
                                                      this_unk_scores)):
         if scores and order + 1 >= self.min_order:
             score_matrix = np.vstack(scores)
             combined_scores.append(logsumexp(score_matrix, axis=0))
             combined_unk_scores.append(utils.log_sum(unk_scores))
     if not combined_scores:
         self.cur_unk_score = 0.0
         return {}
     self.cur_unk_score = sum(combined_unk_scores)
     return sum(combined_scores)
コード例 #3
0
ファイル: core.py プロジェクト: ml-lab/sgnmt
 def _combine_posteriors_norm_reduced(self, non_zero_words, posteriors,
                                      unk_probs):
     """Combine predictor posteriors according the normalization
     scheme ``CLOSED_VOCAB_SCORE_NORM_REDUCED``. For more information
     on closed vocabulary predictor score normalization see the 
     documentation on the ``CLOSED_VOCAB_SCORE_NORM_*`` vars.
     
     Args:
         non_zero_words (set): All words with positive probability
         posteriors: Predictor posterior distributions calculated
                     with ``predict_next()``
         unk_probs: UNK probabilities of the predictors, calculated
                    with ``get_unk_probability``
     
     Returns:
         combined,score_breakdown: like in ``apply_predictors()``
     """
     n_predictors = len(self.predictors)
     score_breakdown_raw = {}
     for trgt_word in non_zero_words:
         score_breakdown_raw[trgt_word] = [
             (utils.common_get(posteriors[idx], trgt_word,
                               unk_probs[idx]), w)
             for idx, (_, w) in enumerate(self.predictors)
         ]
     sums = []
     for idx in xrange(n_predictors):
         sums.append(
             utils.log_sum([
                 preds[idx][0]
                 for preds in score_breakdown_raw.itervalues()
             ]))
     return self._combine_posteriors_with_renorm(score_breakdown_raw, sums)
コード例 #4
0
 def decode(self, src_sentence):
     self.initialize_predictors(src_sentence)
     trg_sentence = self.trg_sentences[self.current_sen_id] + [utils.EOS_ID]
     score_breakdown = []
     score = 0.0
     all_posteriors = []
     all_unk_scores = []
     for trg_word in trg_sentence:
         self.apply_predictors_count += 1
         breakdown = []
         posteriors = []
         unk_scores = []
         for (p, w) in self.predictors:
             if isinstance(p, UnboundedVocabularyPredictor):
                 posterior = p.predict_next([trg_word])
             else: 
                 posterior = p.predict_next()
             unk_prob = p.get_unk_probability(posterior)
             pred_score = utils.common_get(posterior, trg_word, unk_prob)
             breakdown.append((pred_score, w))
             score += pred_score * w
             posteriors.append(posterior)
             unk_scores.append(unk_prob)
         all_posteriors.append(posteriors)
         all_unk_scores.append(unk_scores)
         score_breakdown.append(breakdown)
         self.consume(trg_word)
     self.add_full_hypo(core.Hypothesis(trg_sentence, score, score_breakdown))
     self.last_meta_data = {
         "src_sentence": np.array(src_sentence + [utils.EOS_ID]),
         "trg_sentence": np.array(trg_sentence),
         "posteriors": all_posteriors,
         "unk_scores": all_unk_scores
     }
     return self.full_hypos
コード例 #5
0
ファイル: length.py プロジェクト: strategist922/sgnmt
 def predict_next(self):
     """Looks up ngram scores via self.scores. """
     cur_hist_length = len(self.history)
     this_scores = [[] for _ in range(cur_hist_length + 1)]
     this_unk_scores = [[] for _ in range(cur_hist_length + 1)]
     for pos in range(len(self.scores)):
         this_scores[0].append(self.scores[pos])
         this_unk_scores[0].append(self.unk_scores[pos])
         acc = 0.0
         for order, word in enumerate(self.history):
             if pos + order + 1 >= len(self.scores):
                 break
             acc += utils.common_get(self.scores[pos + order], word,
                                     self.unk_scores[pos + order])
             this_scores[order + 1].append(acc +
                                           self.scores[pos + order + 1])
             this_unk_scores[order +
                             1].append(acc +
                                       self.unk_scores[pos + order + 1])
     combined_scores = []
     combined_unk_scores = []
     for order, (scores,
                 unk_scores) in enumerate(zip(this_scores,
                                              this_unk_scores)):
         if scores and order + 1 >= self.min_order:
             score_matrix = np.vstack(scores)
             combined_scores.append(logsumexp(score_matrix, axis=0))
             combined_unk_scores.append(utils.log_sum(unk_scores))
     if not combined_scores:
         self.cur_unk_score = 0.0
         return {}
     self.cur_unk_score = sum(combined_unk_scores)
     return sum(combined_scores)
コード例 #6
0
ファイル: core.py プロジェクト: ml-lab/sgnmt
 def _combine_posteriors_norm_none(self, non_zero_words, posteriors,
                                   unk_probs):
     """Combine predictor posteriors according the normalization
     scheme ``CLOSED_VOCAB_SCORE_NORM_NONE``. For more information
     on closed vocabulary predictor score normalization see the 
     documentation on the ``CLOSED_VOCAB_SCORE_NORM_*`` vars.
     
     Args:
         non_zero_words (set): All words with positive probability
         posteriors: Predictor posterior distributions calculated
                     with ``predict_next()``
         unk_probs: UNK probabilities of the predictors, calculated
                    with ``get_unk_probability``
     
     Returns:
         combined,score_breakdown: like in ``apply_predictors()``
     """
     combined = {}
     score_breakdown = {}
     for trgt_word in non_zero_words:
         preds = [(utils.common_get(posteriors[idx], trgt_word,
                                    unk_probs[idx]), w)
                  for idx, (_, w) in enumerate(self.predictors)]
         combined[trgt_word] = self.combi_predictor_method(preds)
         score_breakdown[trgt_word] = preds
     return combined, score_breakdown
コード例 #7
0
ファイル: core.py プロジェクト: chagge/sgnmt
 def _combine_posteriors_norm_none(self,
                                   non_zero_words,
                                   posteriors,
                                   unk_probs):
     """Combine predictor posteriors according the normalization
     scheme ``CLOSED_VOCAB_SCORE_NORM_NONE``. For more information
     on closed vocabulary predictor score normalization see the 
     documentation on the ``CLOSED_VOCAB_SCORE_NORM_*`` vars.
     
     Args:
         non_zero_words (set): All words with positive probability
         posteriors: Predictor posterior distributions calculated
                     with ``predict_next()``
         unk_probs: UNK probabilities of the predictors, calculated
                    with ``get_unk_probability``
     
     Returns:
         combined,score_breakdown: like in ``apply_predictors()``
     """
     combined = {}
     score_breakdown = {}
     for trgt_word in non_zero_words:
         preds = [(utils.common_get(posteriors[idx],
                                    trgt_word, unk_probs[idx]), w)
                     for idx, (_,w) in enumerate(self.predictors)]
         combined[trgt_word] = self.combi_predictor_method(preds) 
         score_breakdown[trgt_word] = preds
     return combined, score_breakdown
コード例 #8
0
ファイル: core.py プロジェクト: ucam-smt/sgnmt
 def _combine_posteriors_norm_reduced(self,
                                      non_zero_words,
                                      posteriors,
                                      unk_probs,
                                      pred_weights,
                                      top_n=0):
     """Combine predictor posteriors according the normalization
     scheme ``CLOSED_VOCAB_SCORE_NORM_REDUCED``. For more information
     on closed vocabulary predictor score normalization see the 
     documentation on the ``CLOSED_VOCAB_SCORE_NORM_*`` vars.
     
     Args:
         non_zero_words (set): All words with positive probability
         posteriors: Predictor posterior distributions calculated
                     with ``predict_next()``
         unk_probs: UNK probabilities of the predictors, calculated
                    with ``get_unk_probability``
         pred_weights (list): Predictor weights
         top_n (int): Not implemented!
     
     Returns:
         combined,score_breakdown: like in ``apply_predictors()``
     """
     n_predictors = len(self.predictors)
     score_breakdown_raw = {}
     for trgt_word in non_zero_words: 
         score_breakdown_raw[trgt_word] = [(utils.common_get(
                                             posteriors[idx],
                                             trgt_word, unk_probs[idx]), w)
                     for idx, w in enumerate(pred_weights)]
     sums = []
     for idx in xrange(n_predictors):
         sums.append(utils.log_sum([preds[idx][0] 
                         for preds in score_breakdown_raw.itervalues()]))
     return self._combine_posteriors_with_renorm(score_breakdown_raw, sums)
コード例 #9
0
 def _get_stub_prob_bounded(self):
     """get_stub_prob implementation for bounded vocabulary slave
     predictors.
     """
     word = self.words.get(self.word_stub)
     return common_get(self.slave_posterior, word if word else utils.UNK_ID,
                       self.slave_unk)
コード例 #10
0
ファイル: tokenization.py プロジェクト: ucam-smt/sgnmt
 def _get_stub_prob_bounded(self):
     """get_stub_prob implementation for bounded vocabulary slave
     predictors.
     """
     word = self.words.get(self.word_stub)
     return common_get(self.slave_posterior,
                       word if word else utils.UNK_ID,
                       self.slave_unk)
コード例 #11
0
 def consume_single(self, predictor):
     if not self.unconsumed:
         return
     if not self.posterior is None:
         self.pending_score += utils.common_get(self.posterior,
                                                self.unconsumed[0],
                                                self.posterior[utils.UNK_ID])
         self.posterior = None
コード例 #12
0
 def get_unk_probability(self, posterior):
     """Returns negative infinity if UNK is not in the lattice.
     Otherwise, return UNK score.
     
     Returns:
         float. Negative infinity
     """
     return utils.common_get(posterior, utils.UNK_ID, utils.NEG_INF)
コード例 #13
0
ファイル: automata.py プロジェクト: ucam-smt/sgnmt
 def get_unk_probability(self, posterior):
     """Returns negative infinity if UNK is not in the lattice.
     Otherwise, return UNK score.
     
     Returns:
         float. Negative infinity
     """
     return utils.common_get(posterior, utils.UNK_ID, utils.NEG_INF)
コード例 #14
0
 def _get_stub_prob_unbounded(self):
     """get_stub_prob implementation for unbounded vocabulary slave
     predictors.
     """
     word = self.words.get(self.word_stub)
     if word:
         posterior = self.slave_predictor.predict_next([word])
         return common_get(posterior, word, self.slave_unk)
     return self.slave_unk
コード例 #15
0
ファイル: tokenization.py プロジェクト: ucam-smt/sgnmt
 def _get_stub_prob_unbounded(self):
     """get_stub_prob implementation for unbounded vocabulary slave
     predictors.
     """
     word = self.words.get(self.word_stub)
     if word:
         posterior = self.slave_predictor.predict_next([word])
         return common_get(posterior, word, self.slave_unk)
     return self.slave_unk
コード例 #16
0
 def expand(self, decoder):
     for pidx, (p, _) in enumerate(decoder.predictors):
         stub = self.pred_stubs[pidx]
         if not stub.has_full_score():
             p.set_state(copy.deepcopy(stub.pred_state))
             p.consume(stub.tokens[stub.score_pos - 1])
             posterior = p.predict_next()
             stub.score_next(
                 utils.common_get(posterior, stub.tokens[stub.score_pos],
                                  p.get_unk_probability(posterior)))
             stub.pred_state = p.get_state()
コード例 #17
0
ファイル: multisegbeam.py プロジェクト: ucam-smt/sgnmt
 def expand(self, decoder):
     for pidx,(p, _) in enumerate(decoder.predictors):
         stub = self.pred_stubs[pidx]
         if not stub.has_full_score():
             p.set_state(copy.deepcopy(stub.pred_state))
             p.consume(stub.tokens[stub.score_pos-1])
             posterior = p.predict_next()
             stub.score_next(utils.common_get(
                                          posterior,
                                          stub.tokens[stub.score_pos],
                                          p.get_unk_probability(posterior)))
             stub.pred_state = p.get_state()
コード例 #18
0
    def _get_complete_continuations(self, hypo, min_hypo_score):
        """This is a generator which yields the complete continuations 
        of ``hypo`` in descending order of score
        """
        min_score = min_hypo_score - hypo.score
        if min_score > 0.0:
            return

        pred_weights = map(lambda el: el[1], self.predictors)
        # Get initial continuations by searching with predictors separately
        start_posteriors = self._get_word_initial_posteriors(hypo)
        pred_states = self.get_predictor_states()
        keys = {}
        for pidx, (p, w) in enumerate(self.predictors):
            stubs = self._search_full_words(p, start_posteriors[pidx],
                                            self.toks[pidx], min_score / w)
            n_added = 0
            for stub in stubs:
                key = self.toks[pidx].tokens2key(stub.tokens)
                if is_key_complete(key):
                    if key in keys:  # Add to existing continuation
                        prev_stub = keys[key].pred_stubs[pidx]
                        if prev_stub is None or prev_stub.score < stub.score:
                            keys[key].pred_stubs[pidx] = stub
                    elif n_added < self.beam_size:  # Create new continuation
                        n_added += 1
                        stubs = [None] * len(self.predictors)
                        stubs[pidx] = stub
                        keys[key] = Continuation(hypo, stubs, key)
        # Fill in stubs which are set to None
        for cont in keys.itervalues():
            for pidx in xrange(len(self.predictors)):
                if cont.pred_stubs[pidx] is None:
                    stub = PredictorStub(self.toks[pidx].key2tokens(cont.key),
                                         pred_states[pidx])
                    stub.score_next(
                        utils.common_get(start_posteriors[pidx],
                                         stub.tokens[0],
                                         start_posteriors[pidx][utils.UNK_ID]))
                    cont.pred_stubs[pidx] = stub
        conts = [(-c.calculate_score(pred_weights), c)
                 for c in keys.itervalues()]
        heapq.heapify(conts)
        # Iterate through conts, expand if necessary, yield if complete
        while conts:
            s, cont = heapq.heappop(conts)
            if cont.is_complete():
                yield -s, cont
            else:  # Need to rescore with sec predictors
                cont.expand(self)
                heapq.heappush(conts,
                               (-cont.calculate_score(pred_weights), cont))
コード例 #19
0
ファイル: multisegbeam.py プロジェクト: ucam-smt/sgnmt
 def _get_complete_continuations(self, hypo, min_hypo_score):
     """This is a generator which yields the complete continuations 
     of ``hypo`` in descending order of score
     """
     min_score = min_hypo_score - hypo.score
     if min_score > 0.0:
         return
     
     pred_weights = map(lambda el: el[1], self.predictors)
     # Get initial continuations by searching with predictors separately
     start_posteriors = self._get_word_initial_posteriors(hypo)
     pred_states = self.get_predictor_states()
     keys = {}
     for pidx, (p,w) in enumerate(self.predictors):
         stubs = self._search_full_words(p,
                                         start_posteriors[pidx],
                                         self.toks[pidx],
                                         min_score / w)
         n_added = 0
         for stub in stubs:
             key = self.toks[pidx].tokens2key(stub.tokens)
             if is_key_complete(key):
                 if key in keys: # Add to existing continuation
                     prev_stub = keys[key].pred_stubs[pidx]
                     if prev_stub is None or prev_stub.score < stub.score:
                         keys[key].pred_stubs[pidx] = stub
                 elif n_added < self.beam_size: # Create new continuation
                     n_added += 1
                     stubs = [None] * len(self.predictors)
                     stubs[pidx] = stub
                     keys[key] = Continuation(hypo, stubs, key)
     # Fill in stubs which are set to None
     for cont in keys.itervalues():
         for pidx in xrange(len(self.predictors)):
             if cont.pred_stubs[pidx] is None:
                 stub = PredictorStub(self.toks[pidx].key2tokens(cont.key),
                                      pred_states[pidx])
                 stub.score_next(utils.common_get(
                                      start_posteriors[pidx],
                                      stub.tokens[0],
                                      start_posteriors[pidx][utils.UNK_ID]))
                 cont.pred_stubs[pidx] = stub
     conts = [(-c.calculate_score(pred_weights), c) for c in keys.itervalues()]
     heapq.heapify(conts)
     # Iterate through conts, expand if necessary, yield if complete
     while conts:
         s,cont = heapq.heappop(conts)
         if cont.is_complete():
             yield -s,cont
         else: # Need to rescore with sec predictors
             cont.expand(self)
             heapq.heappush(conts, (-cont.calculate_score(pred_weights), cont))
コード例 #20
0
    def _get_stub_prob_unbounded(self, ch):
        """get_stub_prob implementation for unbounded vocabulary slave
            predictors. (LM is an unbouded vocabulary predictor)
            """
        word = self.words.get(self.word_stub)

        if word:
            if ch in [utils.EOS_ID]:  # end of word char
                posterior = self.slave_predictor.predict_next([word], 1)
            else:  # segmentation boundary  ch in [self.sync_symb]
                posterior = self.slave_predictor.predict_next([word])
            return utils.common_get(posterior, word, self.slave_unk)
        return self.slave_unk
コード例 #21
0
 def score(self, token, predictor):
     """Returns a score which can be added if ``token`` is consumed
     next. This is not necessarily the full score but an upper bound
     on it: Continuations will have a score lower or equal than
     this. We only use the current posterior vector and do not
     consume tokens with the wrapped predictor.
     """
     if token and self.unconsumed:
         self.consume_all(predictor)
     s = self.pending_score
     if token:
         s += utils.common_get(self.posterior,
                               token, 
                               self.posterior[utils.UNK_ID])
     return s
コード例 #22
0
 def _combine_posteriors_norm_none(self,
                                    non_zero_words,
                                   posteriors,
                                   unk_probs,
                                   top_n=0):
     """Combine predictor posteriors according the normalization
     scheme ``CLOSED_VOCAB_SCORE_NORM_NONE``. For more information
     on closed vocabulary predictor score normalization see the 
     documentation on the ``CLOSED_VOCAB_SCORE_NORM_*`` vars.
     
     Args:
         non_zero_words (set): All words with positive probability
         posteriors: Predictor posterior distributions calculated
                     with ``predict_next()``
         unk_probs: UNK probabilities of the predictors, calculated
                    with ``get_unk_probability``
         top_n (int): If positive, return only top n words
     
     Returns:
         combined,score_breakdown: like in ``apply_predictors()``
     """
     if isinstance(non_zero_words, xrange) and top_n > 0:
         n_words = len(non_zero_words)
         scaled_posteriors = []
         for posterior, unk_prob, (_, weight) in zip(
                       posteriors, unk_probs, self.predictors):
             if isinstance(posterior, dict):
                 arr = np.full(n_words, unk_prob)
                 for word, score in posterior.iteritems():
                     arr[word] = score
                 scaled_posteriors.append(arr * weight)
             else:
                 n_unks = n_words - len(posterior)
                 if n_unks:
                     posterior = np.concatenate((
                            posterior, np.full(n_unks, unk_prob)))
                 scaled_posteriors.append(posterior * weight)
         combined_scores = np.sum(scaled_posteriors, axis=0)
         non_zero_words = utils.argmax_n(combined_scores, top_n)
     combined = {}
     score_breakdown = {}
     for trgt_word in non_zero_words:
         preds = [(utils.common_get(posteriors[idx],
                                    trgt_word, unk_probs[idx]), w)
                     for idx, (_,w) in enumerate(self.predictors)]
         combined[trgt_word] = self.combi_predictor_method(preds) 
         score_breakdown[trgt_word] = preds
     return combined, score_breakdown
コード例 #23
0
    def _combine_posteriors_norm_non_zero(self,
                                          non_zero_words,
                                          posteriors,
                                          unk_probs,
                                          pred_weights,
                                          top_n=0):
        """Combine predictor posteriors according the normalization
        scheme ``CLOSED_VOCAB_SCORE_NORM_NON_ZERO``. For more information
        on closed vocabulary predictor score normalization see the 
        documentation on the ``CLOSED_VOCAB_SCORE_NORM_*`` vars.
        
        Args:
            non_zero_words (set): All words with positive probability
            posteriors: Predictor posterior distributions calculated
                        with ``predict_next()``
            unk_probs: UNK probabilities of the predictors, calculated
                       with ``get_unk_probability``
            pred_weights (list): Predictor weights
            top_n (int): If positive, return only top n words

        
        Returns:
            combined,score_breakdown: like in ``apply_predictors()``
        """
        if isinstance(non_zero_words, range) and top_n > 0:
          non_zero_words = Decoder._scale_combine_non_zero_scores(len(non_zero_words), 
                                                                  posteriors,
                                                                  unk_probs,
                                                                  pred_weights,
                                                                  top_n)
        combined = {}
        score_breakdown = {}
        for trgt_word in non_zero_words:
            preds = [(utils.common_get(posteriors[idx],
                                       trgt_word, unk_probs[idx]), w)
                        for idx, w in enumerate(pred_weights)]
            combi_score = self.combi_predictor_method(preds)
            if abs(combi_score) <= EPS_P:
                continue
            combined[trgt_word] = combi_score  
            score_breakdown[trgt_word] = preds
        return combined, score_breakdown
コード例 #24
0
ファイル: core.py プロジェクト: ucam-smt/sgnmt
    def _combine_posteriors_norm_non_zero(self,
                                          non_zero_words,
                                          posteriors,
                                          unk_probs,
                                          pred_weights,
                                          top_n=0):
        """Combine predictor posteriors according the normalization
        scheme ``CLOSED_VOCAB_SCORE_NORM_NON_ZERO``. For more information
        on closed vocabulary predictor score normalization see the 
        documentation on the ``CLOSED_VOCAB_SCORE_NORM_*`` vars.
        
        Args:
            non_zero_words (set): All words with positive probability
            posteriors: Predictor posterior distributions calculated
                        with ``predict_next()``
            unk_probs: UNK probabilities of the predictors, calculated
                       with ``get_unk_probability``
            pred_weights (list): Predictor weights
            top_n (int): If positive, return only top n words

        
        Returns:
            combined,score_breakdown: like in ``apply_predictors()``
        """
        if isinstance(non_zero_words, xrange) and top_n > 0:
          non_zero_words = Decoder._scale_combine_non_zero_scores(len(non_zero_words), 
                                                                  posteriors,
                                                                  unk_probs,
                                                                  pred_weights,
                                                                  top_n)
        combined = {}
        score_breakdown = {}
        for trgt_word in non_zero_words:
            preds = [(utils.common_get(posteriors[idx],
                                       trgt_word, unk_probs[idx]), w)
                        for idx, w in enumerate(pred_weights)]
            combi_score = self.combi_predictor_method(preds)
            if abs(combi_score) <= EPS_P:
                continue
            combined[trgt_word] = combi_score  
            score_breakdown[trgt_word] = preds
        return combined, score_breakdown
コード例 #25
0
 def consume_all(self, predictor):
     """Consume all unconsumed tokens and update pred_state, 
     pending_score, and posterior accordingly.
     
     Args:
         predictor (Predictor): Predictor instance
     """
     if not self.unconsumed:
         return
     if self.posterior is None:
         self.update_posterior(predictor)
     predictor.set_state(copy.deepcopy(self.pred_state))
     for token in self.unconsumed:
         self.pending_score += utils.common_get(self.posterior,
                                                token,
                                                self.posterior[utils.UNK_ID])
         #print("consume %d (consume all, %d)" % (token, predictor.config['src_vocab_size']))
         predictor.consume(token)
         self.posterior = predictor.predict_next()
     self.pred_state = copy.deepcopy(predictor.get_state())
     self.unconsumed = []
コード例 #26
0
 def decode(self, src_sentence):
     self.initialize_predictors(src_sentence)
     trg_sentence = self.trg_sentences[self.current_sen_id] + [utils.EOS_ID]
     score_breakdown = []
     score = 0.0
     all_posteriors = []
     all_unk_scores = []
     for trg_word in trg_sentence:
         self.apply_predictors_count += 1
         breakdown = []
         posteriors = []
         unk_scores = []
         for (p, w) in self.predictors:
             if isinstance(p, UnboundedVocabularyPredictor):
                 posterior = p.predict_next([trg_word])
             else:
                 posterior = p.predict_next()
             unk_prob = p.get_unk_probability(posterior)
             pred_score = utils.common_get(posterior, trg_word, unk_prob)
             breakdown.append((pred_score, w))
             score += pred_score * w
             posteriors.append(posterior)
             unk_scores.append(unk_prob)
         all_posteriors.append(posteriors)
         all_unk_scores.append(unk_scores)
         score_breakdown.append(breakdown)
         self.consume(trg_word)
     self.add_full_hypo(
         core.Hypothesis(trg_sentence, score, score_breakdown))
     self.last_meta_data = {
         "src_sentence": np.array(src_sentence + [utils.EOS_ID]),
         "trg_sentence": np.array(trg_sentence),
         "posteriors": all_posteriors,
         "unk_scores": all_unk_scores
     }
     return self.full_hypos
コード例 #27
0
ファイル: tokenization.py プロジェクト: ucam-smt/sgnmt
 def _get_token_score(self, token, predictor):
     """Look up ``token`` in ``self.posterior``. """
     return utils.common_get(self.posterior,
                             token,
                             predictor.get_unk_probability(self.posterior))
コード例 #28
0
ファイル: tf_t2t.py プロジェクト: strategist922/sgnmt
 def get_unk_probability(self, posterior):
     """Fetch posterior[t2t_unk_id]"""
     if len(self.history_sentences) > self.max_sentences:
         return 0.0
     return utils.common_get(posterior, self._t2t_unk_id, utils.NEG_INF)
コード例 #29
0
ファイル: tf_t2t.py プロジェクト: ucam-smt/sgnmt
 def get_unk_probability(self, posterior):
     """Returns self.other_scores[n_aligned_words]."""
     return utils.common_get(self.other_scores, self.n_aligned_words, 0.0)
コード例 #30
0
ファイル: ffnnlm.py プロジェクト: ucam-smt/sgnmt
 def get_unk_probability(self, posterior):
     """Use NPLM UNK score if exists """
     return utils.common_get(posterior, utils.UNK_ID, utils.NEG_INF)
コード例 #31
0
 def get_unk_probability(self, posterior):
     """Use NPLM UNK score if exists """
     return utils.common_get(posterior, utils.UNK_ID, NEG_INF)
コード例 #32
0
 def _update_slave_vars(self, posterior):
     self.slave_unk = self.slave_predictor.get_unk_probability(posterior)
     self.slave_go = common_get(posterior, utils.GO_ID, self.slave_unk)
     self.slave_eos = common_get(posterior, utils.EOS_ID, self.slave_unk)
コード例 #33
0
 def get_unk_probability(self, posterior):
     """Fetch posterior[utils.UNK_ID]"""
     return utils.common_get(posterior, utils.UNK_ID, utils.NEG_INF)
コード例 #34
0
ファイル: tokenization.py プロジェクト: ml-lab/sgnmt
 def _get_token_score(self, token, predictor):
     """Look up ``token`` in ``self.posterior``. """
     return utils.common_get(self.posterior, token,
                             predictor.get_unk_probability(self.posterior))
コード例 #35
0
 def predict_next(self):
     """Returns self.pop_scores[n_aligned_words] for POP and EOS."""
     score = utils.common_get(self.pop_scores, self.n_aligned_words, 0.0)
     return {self.pop_id: score, utils.EOS_ID: score, 6: 0.0, 7: 0.0}
コード例 #36
0
ファイル: tf_t2t.py プロジェクト: ucam-smt/sgnmt
 def predict_next(self):
     """Returns self.pop_scores[n_aligned_words] for POP and EOS."""
     score = utils.common_get(self.pop_scores, self.n_aligned_words, 0.0)
     return {self.pop_id: score, utils.EOS_ID: score, 6: 0.0, 7: 0.0}
コード例 #37
0
 def get_unk_probability(self, posterior):
     """Fetch posterior[t2t_unk_id]"""
     return utils.common_get(posterior, self._t2t_unk_id, utils.NEG_INF)
コード例 #38
0
ファイル: tokenization.py プロジェクト: ucam-smt/sgnmt
 def _update_slave_vars(self, posterior):
     self.slave_unk = self.slave_predictor.get_unk_probability(posterior)
     self.slave_go = common_get(posterior, utils.GO_ID, self.slave_unk)
     self.slave_eos = common_get(posterior, utils.EOS_ID, self.slave_unk)
コード例 #39
0
 def get_unk_probability(self, posterior):
     """Returns self.other_scores[n_aligned_words]."""
     return utils.common_get(self.other_scores, self.n_aligned_words, 0.0)
コード例 #40
0
ファイル: tf_t2t.py プロジェクト: ucam-smt/sgnmt
 def get_unk_probability(self, posterior):
     """Fetch posterior[t2t_unk_id]"""
     return utils.common_get(posterior, self._t2t_unk_id, utils.NEG_INF)