def decode(self, src_sentence): """Decode a single source sentence in a greedy way: Always take the highest scoring word as next word and proceed to the next position. This makes it possible to decode without using the predictors ``get_state()`` and ``set_state()`` methods as we do not have to keep track of predictor states. Args: src_sentence (list): List of source word ids without <S> or </S> which make up the source sentence Returns: list. A list of a single best ``Hypothesis`` instance.""" self.initialize_predictors(src_sentence) trgt_sentence = [] score_breakdown = [] trgt_word = None score = 0.0 while trgt_word != utils.EOS_ID and len(trgt_sentence) <= self.max_len: posterior, breakdown = self.apply_predictors(1) trgt_word = utils.argmax(posterior) score += posterior[trgt_word] trgt_sentence.append(trgt_word) logging.debug("Partial hypothesis (%f): %s" % (score, " ".join([str(i) for i in trgt_sentence]))) score_breakdown.append(breakdown[trgt_word]) self.consume(trgt_word) self.add_full_hypo(Hypothesis(trgt_sentence, score, score_breakdown)) return self.full_hypos
def decode(self, src_sentence): """Decodes a single source sentence with the original blocks beam search decoder. Does not use predictors. Note that the score breakdowns in returned hypotheses are only on the sentence level, not on the word level. For finer grained NMT scores you need to use the nmt predictor. ``src_sentence`` is a list of source word ids representing the source sentence without <S> or </S> symbols. As blocks expects to see </S>, this method adds it automatically. Args: src_sentence (list): List of source word ids without <S> or </S> which make up the source sentence Returns: list. A list of ``Hypothesis`` instances ordered by their score. """ seq = self.src_sparse_feat_map.words2dense(utils.oov_to_unk( src_sentence, self.config['src_vocab_size'])) + [self.src_eos] if self.src_sparse_feat_map.dim > 1: # sparse src feats input_ = np.transpose( np.tile(seq, (self.config['beam_size'], 1, 1)), (2,0,1)) else: # word ids on the source side input_ = np.tile(seq, (self.config['beam_size'], 1)) trans, costs = self.beam_search.search( input_values={self.nmt_model.sampling_input: input_}, max_length=3*len(src_sentence), eol_symbol=utils.EOS_ID, ignore_first_eol=True) hypos = [] max_len = 0 for idx in xrange(len(trans)): max_len = max(max_len, len(trans[idx])) hypo = Hypothesis(trans[idx], -costs[idx]) hypo.score_breakdown = len(trans[idx]) * [[(0.0,1.0)]] hypo.score_breakdown[0] = [(-costs[idx],1.0)] hypos.append(hypo) self.apply_predictors_count = max_len * self.config['beam_size'] return hypos
def decode(self, src_sentence): """Decodes a single source sentence with the original blocks beam search decoder. Does not use predictors. Note that the score breakdowns in returned hypotheses are only on the sentence level, not on the word level. For finer grained NMT scores you need to use the nmt predictor. ``src_sentence`` is a list of source word ids representing the source sentence without <S> or </S> symbols. As blocks expects to see </S>, this method adds it automatically. Args: src_sentence (list): List of source word ids without <S> or </S> which make up the source sentence Returns: list. A list of ``Hypothesis`` instances ordered by their score. """ seq = self.src_sparse_feat_map.words2dense( utils.oov_to_unk(src_sentence, self.config['src_vocab_size'])) + [self.src_eos] if self.src_sparse_feat_map.dim > 1: # sparse src feats input_ = np.transpose( np.tile(seq, (self.config['beam_size'], 1, 1)), (2, 0, 1)) else: # word ids on the source side input_ = np.tile(seq, (self.config['beam_size'], 1)) trans, costs = self.beam_search.search( input_values={self.nmt_model.sampling_input: input_}, max_length=3 * len(src_sentence), eol_symbol=utils.EOS_ID, ignore_first_eol=True) hypos = [] max_len = 0 for idx in xrange(len(trans)): max_len = max(max_len, len(trans[idx])) hypo = Hypothesis(trans[idx], -costs[idx]) hypo.score_breakdown = len(trans[idx]) * [[(0.0, 1.0)]] hypo.score_breakdown[0] = [(-costs[idx], 1.0)] hypos.append(hypo) self.apply_predictors_count = max_len * self.config['beam_size'] return hypos
def decode(self, src_sentence): """This is a generalization to NMT ensembles of ``BeamSearch.search``. Args: src_sentence (list): List of source word ids without <S> or </S> which make up the source sentence Returns: list. A list of ``Hypothesis`` instances ordered by their score. """ for search in self.beam_searches: if not search.compiled: search.compile() seq = self.src_sparse_feat_map.words2dense( utils.oov_to_unk(src_sentence, self.src_vocab_size)) + [self.src_eos] if self.src_sparse_feat_map.dim > 1: # sparse src feats input_ = np.transpose(np.tile(seq, (self.beam_size, 1, 1)), (2, 0, 1)) else: # word ids on the source side input_ = np.tile(seq, (self.beam_size, 1)) contexts_and_states = [] for sys_idx in xrange(self.n_networks): contexts, states, _ = \ self.beam_searches[sys_idx].compute_initial_states_and_contexts( {self.nmt_models[sys_idx].sampling_input: input_}) contexts_and_states.append( (contexts, states, self.beam_searches[sys_idx])) # This array will store all generated outputs, including those from # previous step and those from already finished sequences. all_outputs = states['outputs'][None, :] all_masks = np.ones_like(all_outputs, dtype=config.floatX) all_costs = np.zeros_like(all_outputs, dtype=config.floatX) for i in range(3 * len(src_sentence)): if all_masks[-1].sum() == 0: break logprobs_lst = [] for contexts, states, search in contexts_and_states: logprobs_lst.append(search.compute_logprobs(contexts, states)) logprobs = np.sum(logprobs_lst, axis=0) next_costs = (all_costs[-1, :, None] + logprobs * all_masks[-1, :, None]) (finished, ) = np.where(all_masks[-1] == 0) next_costs[finished, :utils.EOS_ID] = np.inf next_costs[finished, utils.EOS_ID + 1:] = np.inf # The `i == 0` is required because at the first step the beam # size is effectively only 1. (indexes, outputs), chosen_costs = BeamSearch._smallest( next_costs, self.beam_size, only_first_row=i == 0) all_outputs = all_outputs[:, indexes] all_masks = all_masks[:, indexes] all_costs = all_costs[:, indexes] # Rearrange everything for contexts, states, search in contexts_and_states: for name in states: states[name] = states[name][indexes] states.update( search.compute_next_states(contexts, states, outputs)) all_outputs = np.vstack([all_outputs, outputs[None, :]]) all_costs = np.vstack([all_costs, chosen_costs[None, :]]) mask = outputs != utils.EOS_ID if i == 0: mask[:] = 1 all_masks = np.vstack([all_masks, mask[None, :]]) all_outputs = all_outputs[1:] all_masks = all_masks[:-1] all_costs = all_costs[1:] - all_costs[:-1] result = all_outputs, all_masks, all_costs trans, costs = BeamSearch.result_to_lists(result) hypos = [] max_len = 0 for idx in xrange(len(trans)): max_len = max(max_len, len(trans[idx])) hypo = Hypothesis(trans[idx], -costs[idx]) hypo.score_breakdown = len(trans[idx]) * [[(0.0, 1.0)]] hypo.score_breakdown[0] = [(-costs[idx], 1.0)] hypos.append(hypo) self.apply_predictors_count = max_len * self.beam_size return hypos
def _generate_dummy_hypo(predictors): return Hypothesis([utils.UNK_ID], 0.0, [[(0.0, w) for _, w in predictors]])
def decode(self, src_sentence): """This is a generalization to NMT ensembles of ``BeamSearch.search``. Args: src_sentence (list): List of source word ids without <S> or </S> which make up the source sentence Returns: list. A list of ``Hypothesis`` instances ordered by their score. """ for search in self.beam_searches: if not search.compiled: search.compile() seq = self.src_sparse_feat_map.words2dense(utils.oov_to_unk( src_sentence, self.src_vocab_size)) + [self.src_eos] if self.src_sparse_feat_map.dim > 1: # sparse src feats input_ = np.transpose( np.tile(seq, (self.beam_size, 1, 1)), (2,0,1)) else: # word ids on the source side input_ = np.tile(seq, (self.beam_size, 1)) contexts_and_states = [] for sys_idx in xrange(self.n_networks): contexts, states, _ = \ self.beam_searches[sys_idx].compute_initial_states_and_contexts( {self.nmt_models[sys_idx].sampling_input: input_}) contexts_and_states.append((contexts, states, self.beam_searches[sys_idx])) # This array will store all generated outputs, including those from # previous step and those from already finished sequences. all_outputs = states['outputs'][None, :] all_masks = np.ones_like(all_outputs, dtype=config.floatX) all_costs = np.zeros_like(all_outputs, dtype=config.floatX) for i in range(3*len(src_sentence)): if all_masks[-1].sum() == 0: break logprobs_lst = [] for contexts, states, search in contexts_and_states: logprobs_lst.append(search.compute_logprobs(contexts, states)) logprobs = np.sum(logprobs_lst, axis=0) next_costs = (all_costs[-1, :, None] + logprobs * all_masks[-1, :, None]) (finished,) = np.where(all_masks[-1] == 0) next_costs[finished, :utils.EOS_ID] = np.inf next_costs[finished, utils.EOS_ID + 1:] = np.inf # The `i == 0` is required because at the first step the beam # size is effectively only 1. (indexes, outputs), chosen_costs = BeamSearch._smallest( next_costs, self.beam_size, only_first_row=i == 0) all_outputs = all_outputs[:, indexes] all_masks = all_masks[:, indexes] all_costs = all_costs[:, indexes] # Rearrange everything for contexts, states, search in contexts_and_states: for name in states: states[name] = states[name][indexes] states.update(search.compute_next_states(contexts, states, outputs)) all_outputs = np.vstack([all_outputs, outputs[None, :]]) all_costs = np.vstack([all_costs, chosen_costs[None, :]]) mask = outputs != utils.EOS_ID if i == 0: mask[:] = 1 all_masks = np.vstack([all_masks, mask[None, :]]) all_outputs = all_outputs[1:] all_masks = all_masks[:-1] all_costs = all_costs[1:] - all_costs[:-1] result = all_outputs, all_masks, all_costs trans, costs = BeamSearch.result_to_lists(result) hypos = [] max_len = 0 for idx in xrange(len(trans)): max_len = max(max_len, len(trans[idx])) hypo = Hypothesis(trans[idx], -costs[idx]) hypo.score_breakdown = len(trans[idx]) * [[(0.0,1.0)]] hypo.score_breakdown[0] = [(-costs[idx],1.0)] hypos.append(hypo) self.apply_predictors_count = max_len * self.beam_size return hypos
def decode(self, src_sentence): """This is a generalization to NMT ensembles of ``DynetNMTVanillaDecoder``. Args: src_sentence (list): List of source word ids without <S> or </S> which make up the source sentence Returns: list. A list of ``Hypothesis`` instances ordered by their score. """ dy.renew_cg() logging.debug(u'src_sentence: {}'.format(src_sentence)) MAX_PRED_SEQ_LEN = 30 #3*len(src_sentence) beam_size = self.beam_size nmt_models = self.nmt_models # nmt_vocab = nmt_models[0].vocab # same vocab file for all nmt_models!! # BEGIN = nmt_vocab.w2i[BEGIN_CHAR] BEGIN = utils.GO_ID STOP = utils.EOS_ID # STOP = nmt_vocab.w2i[STOP_CHAR] for m in nmt_models: m.initialize(src_sentence) states = [[m.s] * beam_size for m in nmt_models] # ensemble x beam matrix of states # This array will store all generated outputs, including those from # previous step and those from already finished sequences. all_outputs = np.full(shape=(1, beam_size), fill_value=BEGIN, dtype=int) all_masks = np.ones_like( all_outputs, dtype=float) # whether predicted symbol is self.STOP all_costs = np.zeros_like( all_outputs, dtype=float) # the cumulative cost of predictions for i in range(MAX_PRED_SEQ_LEN): if all_masks[-1].sum() == 0: logging.debug(u'check masks: {}'.format(all_masks[-1])) break # We carefully hack values of the `logprobs` array to ensure # that all finished sequences are continued with `eos_symbol`. logprobs_lst = [] for j, m in enumerate(nmt_models): logprobs_m = -np.array([m.predict_next_(s) for s in states[j] ]) # beam_size x vocab_len logprobs_lst.append(logprobs_m) logprobs = np.sum(logprobs_lst, axis=0) next_costs = ( all_costs[-1, :, None] + logprobs * all_masks[-1, :, None] ) #take last row of cumul prev costs and turn into beam_size X 1 matrix, take logprobs distributions for unfinished hypos only and add it (elem-wise) with the array of prev costs; result: beam_size x vocab_len matrix of next costs (finished, ) = np.where( all_masks[-1] == 0 ) # finished hypos have all their cost on the self.STOP symbol next_costs[finished, :STOP] = np.inf next_costs[finished, STOP + 1:] = np.inf # indexes - the hypos from prev step to keep, outputs - the next step prediction, chosen cost - cost of predicted symbol (indexes, outputs), chosen_costs = DynetNMTVanillaDecoder._smallest( next_costs, beam_size, only_first_row=i == 0) # Rearrange everything new_states = [] for j, m in enumerate(nmt_models): new_states.append([states[j][ind] for ind in indexes]) # new_states = ((states_m[ind] for ind in indexes) for states_m in states) all_outputs = all_outputs[:, indexes] all_masks = all_masks[:, indexes] all_costs = all_costs[:, indexes] # Record chosen output and compute new states states = [[ m.consume_next_(s, pred_id) for s, pred_id in zip(m_new_states, outputs) ] for m, m_new_states in zip(nmt_models, new_states)] all_outputs = np.vstack([all_outputs, outputs[None, :]]) logging.debug(u'all_outputs: {}'.format(all_outputs)) logging.debug(u'outputs: {}'.format( [utils.apply_trg_wmap([c]) for c in outputs])) logging.debug(u'indexes: {}'.format(indexes)) logging.debug(u'chosen_costs: {}'.format(chosen_costs)) logging.debug(u'outputs != STOP: {}'.format(outputs != STOP)) all_costs = np.vstack([all_costs, chosen_costs[None, :]]) mask = outputs != STOP # if ignore_first_eol: # and i == 0: # mask[:] = 1 all_masks = np.vstack([all_masks, mask[None, :]]) logging.debug(u'last masks: {}'.format(all_masks[-1])) all_outputs = all_outputs[1:] # skipping first row of self.BEGIN logging.debug(u'outputs: {}'.format(all_outputs)) all_masks = all_masks[: -1] #? all_masks[:-1] # skipping first row of self.BEGIN and the last row of self.STOP logging.debug(u'masks: {}'.format(all_masks)) all_costs = all_costs[ 1:] - all_costs[: -1] #turn cumulative cost ito cost of each step #?actually the last row would suffice for us? result = all_outputs, all_masks, all_costs trans, costs = DynetNMTVanillaDecoder.result_to_lists( result) #(nmt_vocab,result) logging.debug(u'trans: {}'.format(trans)) hypos = [] max_len = 0 for idx in xrange(len(trans)): max_len = max(max_len, len(trans[idx])) hypo = Hypothesis(trans[idx], -costs[idx]) hypo.score_breakdown = len(trans[idx]) * [[(0.0, 1.0)]] hypo.score_breakdown[0] = [(-costs[idx], 1.0)] hypos.append(hypo) self.apply_predictors_count = max_len * self.beam_size logging.debug(u'hypos: {}'.format(all_outputs)) return hypos
def decode(self, src_sentence): """Decodes a single source sentence. Note that the score breakdowns in returned hypotheses are only on the sentence level, not on the word level. For finer grained NMT scores you need to use the nmt predictor. ``src_sentence`` is a list of source word ids representing the source sentence without <S> or </S> symbols. As blocks expects to see </S>, this method adds it automatically. Args: src_sentence (list): List of source word ids without <S> or </S> which make up the source sentence Returns: list. A list of ``Hypothesis`` instances ordered by their score. """ dy.renew_cg() logging.debug(u'src_sentence: {}'.format(src_sentence)) # MAX_PRED_SEQ_LEN = 30*len(src_sentence) MAX_PRED_SEQ_LEN = 30 logging.debug(u'MAX_PRED_SEQ_LEN: {}'.format(MAX_PRED_SEQ_LEN)) BEGIN = utils.GO_ID STOP = utils.EOS_ID logging.debug(u'BEGIN: {}, STOP: {}'.format(BEGIN, STOP)) beam_size = self.beam_size self.nmt_model.initialize(src_sentence) # ignore_first_eol=True states = [self.nmt_model.s] * beam_size # This array will store all generated outputs, including those from # previous step and those from already finished sequences. all_outputs = np.full(shape=(1, beam_size), fill_value=BEGIN, dtype=int) all_masks = np.ones_like( all_outputs, dtype=float) # whether predicted symbol is self.STOP all_costs = np.zeros_like( all_outputs, dtype=float) # the cumulative cost of predictions for i in range(MAX_PRED_SEQ_LEN): if all_masks[-1].sum() == 0: logging.debug(u'all_masks: {}'.format(all_masks)) break # We carefully hack values of the `logprobs` array to ensure # that all finished sequences are continued with `eos_symbol`. logprobs = -np.array( [self.nmt_model.predict_next_(s) for s in states]) # print logprobs # print all_masks[-1, :, None] next_costs = ( all_costs[-1, :, None] + logprobs * all_masks[-1, :, None] ) #take last row of cumul prev costs and turn into beam_size X 1 matrix, take logprobs distributions for unfinished hypos only and add it (elem-wise) with the array of prev costs; result: beam_size x vocab_len matrix of next costs (finished, ) = np.where( all_masks[-1] == 0 ) # finished hypos have all their cost on the self.STOP symbol next_costs[finished, :STOP] = np.inf next_costs[finished, STOP + 1:] = np.inf # indexes - the hypos from prev step to keep, outputs - the next step prediction, chosen cost - cost of predicted symbol (indexes, outputs), chosen_costs = self._smallest(next_costs, beam_size, only_first_row=i == 0) # print outputs # Rearrange everything new_states = (states[ind] for ind in indexes) all_outputs = all_outputs[:, indexes] all_masks = all_masks[:, indexes] all_costs = all_costs[:, indexes] # Record chosen output and compute new states states = [ self.nmt_model.consume_next_(s, pred_id) for s, pred_id in zip(new_states, outputs) ] all_outputs = np.vstack([all_outputs, outputs[None, :]]) logging.debug(u'all_outputs: {}'.format(all_outputs)) logging.debug(u'outputs: {}'.format( [utils.apply_trg_wmap([c]) for c in outputs])) logging.debug(u'indexes: {}'.format(indexes)) logging.debug(u'chosen_costs: {}'.format(chosen_costs)) logging.debug(u'outputs != STOP: {}'.format(outputs != STOP)) all_costs = np.vstack([all_costs, chosen_costs[None, :]]) mask = outputs != STOP # if ignore_first_eol: #and i == 0: # mask[:] = 1 all_masks = np.vstack([all_masks, mask[None, :]]) all_outputs = all_outputs[1:] # skipping first row of self.BEGIN logging.debug(u'outputs: {}'.format(all_outputs)) all_masks = all_masks[: -1] #? all_masks[:-1] # skipping first row of self.BEGIN and the last row of self.STOP logging.debug(u'masks: {}'.format(all_masks)) all_costs = all_costs[ 1:] - all_costs[: -1] #turn cumulative cost ito cost of each step #?actually the last row would suffice for us? result = all_outputs, all_masks, all_costs trans, costs = self.result_to_lists( result) #self.nmt_model.vocab, result) logging.debug(u'trans: {}'.format(trans)) hypos = [] max_len = 0 for idx in xrange(len(trans)): max_len = max(max_len, len(trans[idx])) hypo = Hypothesis(trans[idx], -costs[idx]) hypo.score_breakdown = len(trans[idx]) * [[(0.0, 1.0)]] hypo.score_breakdown[0] = [(-costs[idx], 1.0)] hypos.append(hypo) logging.debug(u'hypos: {}'.format(all_outputs)) return hypos