Example #1
0
 def decode(self, src_sentence):
     """Decode a single source sentence in a greedy way: Always take
     the highest scoring word as next word and proceed to the next
     position. This makes it possible to decode without using the 
     predictors ``get_state()`` and ``set_state()`` methods as we
     do not have to keep track of predictor states.
     
     Args:
         src_sentence (list): List of source word ids without <S> or
                              </S> which make up the source sentence
     
     Returns:
         list. A list of a single best ``Hypothesis`` instance."""
     self.initialize_predictors(src_sentence)
     trgt_sentence = []
     score_breakdown = []
     trgt_word = None
     score = 0.0
     while trgt_word != utils.EOS_ID and len(trgt_sentence) <= self.max_len:
         posterior, breakdown = self.apply_predictors(1)
         trgt_word = utils.argmax(posterior)
         score += posterior[trgt_word]
         trgt_sentence.append(trgt_word)
         logging.debug("Partial hypothesis (%f): %s" %
                       (score, " ".join([str(i) for i in trgt_sentence])))
         score_breakdown.append(breakdown[trgt_word])
         self.consume(trgt_word)
     self.add_full_hypo(Hypothesis(trgt_sentence, score, score_breakdown))
     return self.full_hypos
Example #2
0
 def decode(self, src_sentence):
     """Decodes a single source sentence with the original blocks
     beam search decoder. Does not use predictors. Note that the
     score breakdowns in returned hypotheses are only on the 
     sentence level, not on the word level. For finer grained NMT
     scores you need to use the nmt predictor. ``src_sentence`` is a
     list of source word ids representing the source sentence without
     <S> or </S> symbols. As blocks expects to see </S>, this method
     adds it automatically.
     
     Args:
         src_sentence (list): List of source word ids without <S> or
                              </S> which make up the source sentence
     
     Returns:
         list. A list of ``Hypothesis`` instances ordered by their
         score.
     """
     seq = self.src_sparse_feat_map.words2dense(utils.oov_to_unk(
             src_sentence,
             self.config['src_vocab_size'])) + [self.src_eos]
     if self.src_sparse_feat_map.dim > 1: # sparse src feats
         input_ = np.transpose(
                         np.tile(seq, (self.config['beam_size'], 1, 1)),
                         (2,0,1))
     else: # word ids on the source side
         input_ = np.tile(seq, (self.config['beam_size'], 1))
     trans, costs = self.beam_search.search(
                 input_values={self.nmt_model.sampling_input: input_},
                 max_length=3*len(src_sentence),
                 eol_symbol=utils.EOS_ID,
                 ignore_first_eol=True)
     hypos = []
     max_len = 0
     for idx in xrange(len(trans)):
         max_len = max(max_len, len(trans[idx]))
         hypo = Hypothesis(trans[idx], -costs[idx])
         hypo.score_breakdown = len(trans[idx]) * [[(0.0,1.0)]]
         hypo.score_breakdown[0] = [(-costs[idx],1.0)]
         hypos.append(hypo)
     self.apply_predictors_count = max_len * self.config['beam_size']
     return hypos
 def decode(self, src_sentence):
     """Decodes a single source sentence with the original blocks
     beam search decoder. Does not use predictors. Note that the
     score breakdowns in returned hypotheses are only on the 
     sentence level, not on the word level. For finer grained NMT
     scores you need to use the nmt predictor. ``src_sentence`` is a
     list of source word ids representing the source sentence without
     <S> or </S> symbols. As blocks expects to see </S>, this method
     adds it automatically.
     
     Args:
         src_sentence (list): List of source word ids without <S> or
                              </S> which make up the source sentence
     
     Returns:
         list. A list of ``Hypothesis`` instances ordered by their
         score.
     """
     seq = self.src_sparse_feat_map.words2dense(
         utils.oov_to_unk(src_sentence,
                          self.config['src_vocab_size'])) + [self.src_eos]
     if self.src_sparse_feat_map.dim > 1:  # sparse src feats
         input_ = np.transpose(
             np.tile(seq, (self.config['beam_size'], 1, 1)), (2, 0, 1))
     else:  # word ids on the source side
         input_ = np.tile(seq, (self.config['beam_size'], 1))
     trans, costs = self.beam_search.search(
         input_values={self.nmt_model.sampling_input: input_},
         max_length=3 * len(src_sentence),
         eol_symbol=utils.EOS_ID,
         ignore_first_eol=True)
     hypos = []
     max_len = 0
     for idx in xrange(len(trans)):
         max_len = max(max_len, len(trans[idx]))
         hypo = Hypothesis(trans[idx], -costs[idx])
         hypo.score_breakdown = len(trans[idx]) * [[(0.0, 1.0)]]
         hypo.score_breakdown[0] = [(-costs[idx], 1.0)]
         hypos.append(hypo)
     self.apply_predictors_count = max_len * self.config['beam_size']
     return hypos
    def decode(self, src_sentence):
        """This is a generalization to NMT ensembles of 
        ``BeamSearch.search``.
        
        Args:
            src_sentence (list): List of source word ids without <S> or
                                 </S> which make up the source sentence
        
        Returns:
            list. A list of ``Hypothesis`` instances ordered by their
            score.
        """
        for search in self.beam_searches:
            if not search.compiled:
                search.compile()
        seq = self.src_sparse_feat_map.words2dense(
            utils.oov_to_unk(src_sentence,
                             self.src_vocab_size)) + [self.src_eos]
        if self.src_sparse_feat_map.dim > 1:  # sparse src feats
            input_ = np.transpose(np.tile(seq, (self.beam_size, 1, 1)),
                                  (2, 0, 1))
        else:  # word ids on the source side
            input_ = np.tile(seq, (self.beam_size, 1))

        contexts_and_states = []
        for sys_idx in xrange(self.n_networks):
            contexts, states, _ = \
                self.beam_searches[sys_idx].compute_initial_states_and_contexts(
                            {self.nmt_models[sys_idx].sampling_input: input_})
            contexts_and_states.append(
                (contexts, states, self.beam_searches[sys_idx]))

        # This array will store all generated outputs, including those from
        # previous step and those from already finished sequences.
        all_outputs = states['outputs'][None, :]
        all_masks = np.ones_like(all_outputs, dtype=config.floatX)
        all_costs = np.zeros_like(all_outputs, dtype=config.floatX)

        for i in range(3 * len(src_sentence)):
            if all_masks[-1].sum() == 0:
                break
            logprobs_lst = []
            for contexts, states, search in contexts_and_states:
                logprobs_lst.append(search.compute_logprobs(contexts, states))

            logprobs = np.sum(logprobs_lst, axis=0)
            next_costs = (all_costs[-1, :, None] +
                          logprobs * all_masks[-1, :, None])
            (finished, ) = np.where(all_masks[-1] == 0)
            next_costs[finished, :utils.EOS_ID] = np.inf
            next_costs[finished, utils.EOS_ID + 1:] = np.inf

            # The `i == 0` is required because at the first step the beam
            # size is effectively only 1.
            (indexes, outputs), chosen_costs = BeamSearch._smallest(
                next_costs, self.beam_size, only_first_row=i == 0)

            all_outputs = all_outputs[:, indexes]
            all_masks = all_masks[:, indexes]
            all_costs = all_costs[:, indexes]

            # Rearrange everything
            for contexts, states, search in contexts_and_states:
                for name in states:
                    states[name] = states[name][indexes]
                states.update(
                    search.compute_next_states(contexts, states, outputs))

            all_outputs = np.vstack([all_outputs, outputs[None, :]])
            all_costs = np.vstack([all_costs, chosen_costs[None, :]])
            mask = outputs != utils.EOS_ID
            if i == 0:
                mask[:] = 1
            all_masks = np.vstack([all_masks, mask[None, :]])

        all_outputs = all_outputs[1:]
        all_masks = all_masks[:-1]
        all_costs = all_costs[1:] - all_costs[:-1]
        result = all_outputs, all_masks, all_costs
        trans, costs = BeamSearch.result_to_lists(result)
        hypos = []
        max_len = 0
        for idx in xrange(len(trans)):
            max_len = max(max_len, len(trans[idx]))
            hypo = Hypothesis(trans[idx], -costs[idx])
            hypo.score_breakdown = len(trans[idx]) * [[(0.0, 1.0)]]
            hypo.score_breakdown[0] = [(-costs[idx], 1.0)]
            hypos.append(hypo)
        self.apply_predictors_count = max_len * self.beam_size
        return hypos
Example #5
0
def _generate_dummy_hypo(predictors):
    return Hypothesis([utils.UNK_ID], 0.0, [[(0.0, w) for _, w in predictors]]) 
Example #6
0
    def decode(self, src_sentence):
        """This is a generalization to NMT ensembles of 
        ``BeamSearch.search``.
        
        Args:
            src_sentence (list): List of source word ids without <S> or
                                 </S> which make up the source sentence
        
        Returns:
            list. A list of ``Hypothesis`` instances ordered by their
            score.
        """
        for search in self.beam_searches:
            if not search.compiled:
                search.compile()
        seq = self.src_sparse_feat_map.words2dense(utils.oov_to_unk(
                src_sentence,
                self.src_vocab_size)) + [self.src_eos]
        if self.src_sparse_feat_map.dim > 1: # sparse src feats
            input_ = np.transpose(
                            np.tile(seq, (self.beam_size, 1, 1)),
                            (2,0,1))
        else: # word ids on the source side
            input_ = np.tile(seq, (self.beam_size, 1))

        contexts_and_states = []
        for sys_idx in xrange(self.n_networks):
            contexts, states, _ = \
                self.beam_searches[sys_idx].compute_initial_states_and_contexts(
                            {self.nmt_models[sys_idx].sampling_input: input_})
            contexts_and_states.append((contexts, 
                                        states, 
                                        self.beam_searches[sys_idx]))

        # This array will store all generated outputs, including those from
        # previous step and those from already finished sequences.
        all_outputs = states['outputs'][None, :]
        all_masks = np.ones_like(all_outputs, dtype=config.floatX)
        all_costs = np.zeros_like(all_outputs, dtype=config.floatX)

        for i in range(3*len(src_sentence)):
            if all_masks[-1].sum() == 0:
                break
            logprobs_lst = []
            for contexts, states, search in contexts_and_states:
                logprobs_lst.append(search.compute_logprobs(contexts, states))
            
            logprobs = np.sum(logprobs_lst, axis=0)
            next_costs = (all_costs[-1, :, None] +
                          logprobs * all_masks[-1, :, None])
            (finished,) = np.where(all_masks[-1] == 0)
            next_costs[finished, :utils.EOS_ID] = np.inf
            next_costs[finished, utils.EOS_ID + 1:] = np.inf

            # The `i == 0` is required because at the first step the beam
            # size is effectively only 1.
            (indexes, outputs), chosen_costs = BeamSearch._smallest(
                next_costs, self.beam_size, only_first_row=i == 0)

            all_outputs = all_outputs[:, indexes]
            all_masks = all_masks[:, indexes]
            all_costs = all_costs[:, indexes]
            
            # Rearrange everything
            for contexts, states, search in contexts_and_states:
                for name in states:
                    states[name] = states[name][indexes]
                states.update(search.compute_next_states(contexts, 
                                                         states, 
                                                         outputs))
            
            all_outputs = np.vstack([all_outputs, outputs[None, :]])
            all_costs = np.vstack([all_costs, chosen_costs[None, :]])
            mask = outputs != utils.EOS_ID
            if i == 0:
                mask[:] = 1
            all_masks = np.vstack([all_masks, mask[None, :]])

        all_outputs = all_outputs[1:]
        all_masks = all_masks[:-1]
        all_costs = all_costs[1:] - all_costs[:-1]
        result = all_outputs, all_masks, all_costs
        trans, costs = BeamSearch.result_to_lists(result)
        hypos = []
        max_len = 0
        for idx in xrange(len(trans)):
            max_len = max(max_len, len(trans[idx]))
            hypo = Hypothesis(trans[idx], -costs[idx])
            hypo.score_breakdown = len(trans[idx]) * [[(0.0,1.0)]]
            hypo.score_breakdown[0] = [(-costs[idx],1.0)]
            hypos.append(hypo)
        self.apply_predictors_count = max_len * self.beam_size
        return hypos
Example #7
0
    def decode(self, src_sentence):
        """This is a generalization to NMT ensembles of ``DynetNMTVanillaDecoder``.
                    
        Args:
        src_sentence (list): List of source word ids without <S> or
        </S> which make up the source sentence
        
        Returns:
        list. A list of ``Hypothesis`` instances ordered by their
        score.
        """
        dy.renew_cg()
        logging.debug(u'src_sentence: {}'.format(src_sentence))
        MAX_PRED_SEQ_LEN = 30  #3*len(src_sentence)
        beam_size = self.beam_size
        nmt_models = self.nmt_models

        #        nmt_vocab = nmt_models[0].vocab # same vocab file for all nmt_models!!
        #        BEGIN   = nmt_vocab.w2i[BEGIN_CHAR]
        BEGIN = utils.GO_ID
        STOP = utils.EOS_ID
        #        STOP   = nmt_vocab.w2i[STOP_CHAR]

        for m in nmt_models:
            m.initialize(src_sentence)
        states = [[m.s] * beam_size
                  for m in nmt_models]  # ensemble x beam matrix of states
        # This array will store all generated outputs, including those from
        # previous step and those from already finished sequences.
        all_outputs = np.full(shape=(1, beam_size),
                              fill_value=BEGIN,
                              dtype=int)
        all_masks = np.ones_like(
            all_outputs, dtype=float)  # whether predicted symbol is self.STOP
        all_costs = np.zeros_like(
            all_outputs, dtype=float)  # the cumulative cost of predictions

        for i in range(MAX_PRED_SEQ_LEN):
            if all_masks[-1].sum() == 0:
                logging.debug(u'check masks: {}'.format(all_masks[-1]))
                break

            # We carefully hack values of the `logprobs` array to ensure
            # that all finished sequences are continued with `eos_symbol`.
            logprobs_lst = []
            for j, m in enumerate(nmt_models):
                logprobs_m = -np.array([m.predict_next_(s) for s in states[j]
                                        ])  # beam_size x vocab_len
                logprobs_lst.append(logprobs_m)
            logprobs = np.sum(logprobs_lst, axis=0)
            next_costs = (
                all_costs[-1, :, None] + logprobs * all_masks[-1, :, None]
            )  #take last row of cumul prev costs and turn into beam_size X 1 matrix, take logprobs distributions for unfinished hypos only and add it (elem-wise) with the array of prev costs; result: beam_size x vocab_len matrix of next costs
            (finished, ) = np.where(
                all_masks[-1] == 0
            )  # finished hypos have all their cost on the self.STOP symbol
            next_costs[finished, :STOP] = np.inf
            next_costs[finished, STOP + 1:] = np.inf

            # indexes - the hypos from prev step to keep, outputs - the next step prediction, chosen cost - cost of predicted symbol
            (indexes,
             outputs), chosen_costs = DynetNMTVanillaDecoder._smallest(
                 next_costs, beam_size, only_first_row=i == 0)

            # Rearrange everything
            new_states = []
            for j, m in enumerate(nmt_models):
                new_states.append([states[j][ind] for ind in indexes])

            #        new_states = ((states_m[ind] for ind in indexes) for states_m in states)
            all_outputs = all_outputs[:, indexes]
            all_masks = all_masks[:, indexes]
            all_costs = all_costs[:, indexes]

            # Record chosen output and compute new states
            states = [[
                m.consume_next_(s, pred_id)
                for s, pred_id in zip(m_new_states, outputs)
            ] for m, m_new_states in zip(nmt_models, new_states)]
            all_outputs = np.vstack([all_outputs, outputs[None, :]])
            logging.debug(u'all_outputs: {}'.format(all_outputs))
            logging.debug(u'outputs: {}'.format(
                [utils.apply_trg_wmap([c]) for c in outputs]))
            logging.debug(u'indexes: {}'.format(indexes))
            logging.debug(u'chosen_costs: {}'.format(chosen_costs))
            logging.debug(u'outputs != STOP: {}'.format(outputs != STOP))
            all_costs = np.vstack([all_costs, chosen_costs[None, :]])
            mask = outputs != STOP
            #        if ignore_first_eol: # and i == 0:
            #            mask[:] = 1
            all_masks = np.vstack([all_masks, mask[None, :]])
            logging.debug(u'last masks: {}'.format(all_masks[-1]))

        all_outputs = all_outputs[1:]  # skipping first row of self.BEGIN
        logging.debug(u'outputs: {}'.format(all_outputs))
        all_masks = all_masks[:
                              -1]  #? all_masks[:-1] # skipping first row of self.BEGIN and the last row of self.STOP
        logging.debug(u'masks: {}'.format(all_masks))
        all_costs = all_costs[
            1:] - all_costs[:
                            -1]  #turn cumulative cost ito cost of each step #?actually the last row would suffice for us?
        result = all_outputs, all_masks, all_costs

        trans, costs = DynetNMTVanillaDecoder.result_to_lists(
            result)  #(nmt_vocab,result)
        logging.debug(u'trans: {}'.format(trans))
        hypos = []
        max_len = 0
        for idx in xrange(len(trans)):
            max_len = max(max_len, len(trans[idx]))
            hypo = Hypothesis(trans[idx], -costs[idx])
            hypo.score_breakdown = len(trans[idx]) * [[(0.0, 1.0)]]
            hypo.score_breakdown[0] = [(-costs[idx], 1.0)]
            hypos.append(hypo)
            self.apply_predictors_count = max_len * self.beam_size
        logging.debug(u'hypos: {}'.format(all_outputs))
        return hypos
Example #8
0
    def decode(self, src_sentence):
        """Decodes a single source sentence. Note that the
        score breakdowns in returned hypotheses are only on the
        sentence level, not on the word level. For finer grained NMT
        scores you need to use the nmt predictor. ``src_sentence`` is a
        list of source word ids representing the source sentence without
        <S> or </S> symbols. As blocks expects to see </S>, this method
        adds it automatically.
        
        Args:
        src_sentence (list): List of source word ids without <S> or
        </S> which make up the source sentence
        
        Returns:
        list. A list of ``Hypothesis`` instances ordered by their
        score.
        """
        dy.renew_cg()
        logging.debug(u'src_sentence: {}'.format(src_sentence))
        #        MAX_PRED_SEQ_LEN = 30*len(src_sentence)
        MAX_PRED_SEQ_LEN = 30
        logging.debug(u'MAX_PRED_SEQ_LEN: {}'.format(MAX_PRED_SEQ_LEN))
        BEGIN = utils.GO_ID
        STOP = utils.EOS_ID
        logging.debug(u'BEGIN: {}, STOP: {}'.format(BEGIN, STOP))
        beam_size = self.beam_size
        self.nmt_model.initialize(src_sentence)
        #        ignore_first_eol=True
        states = [self.nmt_model.s] * beam_size
        # This array will store all generated outputs, including those from
        # previous step and those from already finished sequences.
        all_outputs = np.full(shape=(1, beam_size),
                              fill_value=BEGIN,
                              dtype=int)
        all_masks = np.ones_like(
            all_outputs, dtype=float)  # whether predicted symbol is self.STOP
        all_costs = np.zeros_like(
            all_outputs, dtype=float)  # the cumulative cost of predictions

        for i in range(MAX_PRED_SEQ_LEN):
            if all_masks[-1].sum() == 0:
                logging.debug(u'all_masks: {}'.format(all_masks))
                break

            # We carefully hack values of the `logprobs` array to ensure
            # that all finished sequences are continued with `eos_symbol`.
            logprobs = -np.array(
                [self.nmt_model.predict_next_(s) for s in states])
            #            print logprobs
            #            print all_masks[-1, :, None]
            next_costs = (
                all_costs[-1, :, None] + logprobs * all_masks[-1, :, None]
            )  #take last row of cumul prev costs and turn into beam_size X 1 matrix, take logprobs distributions for unfinished hypos only and add it (elem-wise) with the array of prev costs; result: beam_size x vocab_len matrix of next costs
            (finished, ) = np.where(
                all_masks[-1] == 0
            )  # finished hypos have all their cost on the self.STOP symbol
            next_costs[finished, :STOP] = np.inf
            next_costs[finished, STOP + 1:] = np.inf

            # indexes - the hypos from prev step to keep, outputs - the next step prediction, chosen cost - cost of predicted symbol
            (indexes,
             outputs), chosen_costs = self._smallest(next_costs,
                                                     beam_size,
                                                     only_first_row=i == 0)
            #            print outputs
            # Rearrange everything
            new_states = (states[ind] for ind in indexes)
            all_outputs = all_outputs[:, indexes]
            all_masks = all_masks[:, indexes]
            all_costs = all_costs[:, indexes]

            # Record chosen output and compute new states
            states = [
                self.nmt_model.consume_next_(s, pred_id)
                for s, pred_id in zip(new_states, outputs)
            ]
            all_outputs = np.vstack([all_outputs, outputs[None, :]])
            logging.debug(u'all_outputs: {}'.format(all_outputs))
            logging.debug(u'outputs: {}'.format(
                [utils.apply_trg_wmap([c]) for c in outputs]))
            logging.debug(u'indexes: {}'.format(indexes))
            logging.debug(u'chosen_costs: {}'.format(chosen_costs))
            logging.debug(u'outputs != STOP: {}'.format(outputs != STOP))
            all_costs = np.vstack([all_costs, chosen_costs[None, :]])
            mask = outputs != STOP
            #            if ignore_first_eol: #and i == 0:
            #                mask[:] = 1
            all_masks = np.vstack([all_masks, mask[None, :]])

        all_outputs = all_outputs[1:]  # skipping first row of self.BEGIN
        logging.debug(u'outputs: {}'.format(all_outputs))
        all_masks = all_masks[:
                              -1]  #? all_masks[:-1] # skipping first row of self.BEGIN and the last row of self.STOP
        logging.debug(u'masks: {}'.format(all_masks))
        all_costs = all_costs[
            1:] - all_costs[:
                            -1]  #turn cumulative cost ito cost of each step #?actually the last row would suffice for us?
        result = all_outputs, all_masks, all_costs

        trans, costs = self.result_to_lists(
            result)  #self.nmt_model.vocab, result)
        logging.debug(u'trans: {}'.format(trans))
        hypos = []
        max_len = 0
        for idx in xrange(len(trans)):
            max_len = max(max_len, len(trans[idx]))
            hypo = Hypothesis(trans[idx], -costs[idx])
            hypo.score_breakdown = len(trans[idx]) * [[(0.0, 1.0)]]
            hypo.score_breakdown[0] = [(-costs[idx], 1.0)]
            hypos.append(hypo)

        logging.debug(u'hypos: {}'.format(all_outputs))
        return hypos