def decode(self, src_sentence): """This is a generalization to NMT ensembles of ``BeamSearch.search``. Args: src_sentence (list): List of source word ids without <S> or </S> which make up the source sentence Returns: list. A list of ``Hypothesis`` instances ordered by their score. """ for search in self.beam_searches: if not search.compiled: search.compile() seq = self.src_sparse_feat_map.words2dense( utils.oov_to_unk(src_sentence, self.src_vocab_size)) + [self.src_eos] if self.src_sparse_feat_map.dim > 1: # sparse src feats input_ = np.transpose(np.tile(seq, (self.beam_size, 1, 1)), (2, 0, 1)) else: # word ids on the source side input_ = np.tile(seq, (self.beam_size, 1)) contexts_and_states = [] for sys_idx in xrange(self.n_networks): contexts, states, _ = \ self.beam_searches[sys_idx].compute_initial_states_and_contexts( {self.nmt_models[sys_idx].sampling_input: input_}) contexts_and_states.append( (contexts, states, self.beam_searches[sys_idx])) # This array will store all generated outputs, including those from # previous step and those from already finished sequences. all_outputs = states['outputs'][None, :] all_masks = np.ones_like(all_outputs, dtype=config.floatX) all_costs = np.zeros_like(all_outputs, dtype=config.floatX) for i in range(3 * len(src_sentence)): if all_masks[-1].sum() == 0: break logprobs_lst = [] for contexts, states, search in contexts_and_states: logprobs_lst.append(search.compute_logprobs(contexts, states)) logprobs = np.sum(logprobs_lst, axis=0) next_costs = (all_costs[-1, :, None] + logprobs * all_masks[-1, :, None]) (finished, ) = np.where(all_masks[-1] == 0) next_costs[finished, :utils.EOS_ID] = np.inf next_costs[finished, utils.EOS_ID + 1:] = np.inf # The `i == 0` is required because at the first step the beam # size is effectively only 1. (indexes, outputs), chosen_costs = BeamSearch._smallest( next_costs, self.beam_size, only_first_row=i == 0) all_outputs = all_outputs[:, indexes] all_masks = all_masks[:, indexes] all_costs = all_costs[:, indexes] # Rearrange everything for contexts, states, search in contexts_and_states: for name in states: states[name] = states[name][indexes] states.update( search.compute_next_states(contexts, states, outputs)) all_outputs = np.vstack([all_outputs, outputs[None, :]]) all_costs = np.vstack([all_costs, chosen_costs[None, :]]) mask = outputs != utils.EOS_ID if i == 0: mask[:] = 1 all_masks = np.vstack([all_masks, mask[None, :]]) all_outputs = all_outputs[1:] all_masks = all_masks[:-1] all_costs = all_costs[1:] - all_costs[:-1] result = all_outputs, all_masks, all_costs trans, costs = BeamSearch.result_to_lists(result) hypos = [] max_len = 0 for idx in xrange(len(trans)): max_len = max(max_len, len(trans[idx])) hypo = Hypothesis(trans[idx], -costs[idx]) hypo.score_breakdown = len(trans[idx]) * [[(0.0, 1.0)]] hypo.score_breakdown[0] = [(-costs[idx], 1.0)] hypos.append(hypo) self.apply_predictors_count = max_len * self.beam_size return hypos
def test_beam_search_smallest(): a = numpy.array([[3, 6, 4], [1, 2, 7]]) ind, mins = BeamSearch._smallest(a, 2) assert numpy.all(numpy.array(ind) == numpy.array([[1, 1], [0, 1]])) assert numpy.all(mins == [1, 2])
def decode(self, src_sentence): """This is a generalization to NMT ensembles of ``BeamSearch.search``. Args: src_sentence (list): List of source word ids without <S> or </S> which make up the source sentence Returns: list. A list of ``Hypothesis`` instances ordered by their score. """ for search in self.beam_searches: if not search.compiled: search.compile() seq = self.src_sparse_feat_map.words2dense(utils.oov_to_unk( src_sentence, self.src_vocab_size)) + [self.src_eos] if self.src_sparse_feat_map.dim > 1: # sparse src feats input_ = np.transpose( np.tile(seq, (self.beam_size, 1, 1)), (2,0,1)) else: # word ids on the source side input_ = np.tile(seq, (self.beam_size, 1)) contexts_and_states = [] for sys_idx in xrange(self.n_networks): contexts, states, _ = \ self.beam_searches[sys_idx].compute_initial_states_and_contexts( {self.nmt_models[sys_idx].sampling_input: input_}) contexts_and_states.append((contexts, states, self.beam_searches[sys_idx])) # This array will store all generated outputs, including those from # previous step and those from already finished sequences. all_outputs = states['outputs'][None, :] all_masks = np.ones_like(all_outputs, dtype=config.floatX) all_costs = np.zeros_like(all_outputs, dtype=config.floatX) for i in range(3*len(src_sentence)): if all_masks[-1].sum() == 0: break logprobs_lst = [] for contexts, states, search in contexts_and_states: logprobs_lst.append(search.compute_logprobs(contexts, states)) logprobs = np.sum(logprobs_lst, axis=0) next_costs = (all_costs[-1, :, None] + logprobs * all_masks[-1, :, None]) (finished,) = np.where(all_masks[-1] == 0) next_costs[finished, :utils.EOS_ID] = np.inf next_costs[finished, utils.EOS_ID + 1:] = np.inf # The `i == 0` is required because at the first step the beam # size is effectively only 1. (indexes, outputs), chosen_costs = BeamSearch._smallest( next_costs, self.beam_size, only_first_row=i == 0) all_outputs = all_outputs[:, indexes] all_masks = all_masks[:, indexes] all_costs = all_costs[:, indexes] # Rearrange everything for contexts, states, search in contexts_and_states: for name in states: states[name] = states[name][indexes] states.update(search.compute_next_states(contexts, states, outputs)) all_outputs = np.vstack([all_outputs, outputs[None, :]]) all_costs = np.vstack([all_costs, chosen_costs[None, :]]) mask = outputs != utils.EOS_ID if i == 0: mask[:] = 1 all_masks = np.vstack([all_masks, mask[None, :]]) all_outputs = all_outputs[1:] all_masks = all_masks[:-1] all_costs = all_costs[1:] - all_costs[:-1] result = all_outputs, all_masks, all_costs trans, costs = BeamSearch.result_to_lists(result) hypos = [] max_len = 0 for idx in xrange(len(trans)): max_len = max(max_len, len(trans[idx])) hypo = Hypothesis(trans[idx], -costs[idx]) hypo.score_breakdown = len(trans[idx]) * [[(0.0,1.0)]] hypo.score_breakdown[0] = [(-costs[idx],1.0)] hypos.append(hypo) self.apply_predictors_count = max_len * self.beam_size return hypos