def _get_initial_hypos(self): """Get the list of initial ``PartialHypothesis``. """ self.cur_fst = load_fst( utils.get_path(self.fst_path, self.current_sen_id + 1)) init_hypo = PartialHypothesis(self.get_predictor_states()) init_hypo.fst_node = self._find_start_node() return [init_hypo]
def decode(self, src_sentence): """Decodes a single source sentence using beam search. """ self.initialize_predictors(src_sentence) hypos = [PartialHypothesis(self.get_predictor_states())] guard_hypo = PartialHypothesis(None) guard_hypo.score = utils.NEG_INF it = 0 while self.stop_criterion(hypos): if it > self.max_len: # prevent infinite loops break it = it + 1 next_hypos = [guard_hypo] #print("HYPOS") #for hypo in hypos: # print("it%d: %s (%f)" % (it, utils.apply_trg_wmap(hypo.trgt_sentence), hypo.score)) for hypo in hypos: #print("H: %s (%f: %f, %f, %f)" % (utils.apply_trg_wmap(hypo.trgt_sentence), hypo.score, sum([s[0][0] for s in hypo.score_breakdown]), sum([s[1][0] for s in hypo.score_breakdown]), sum([s[2][0] for s in hypo.score_breakdown]))) if hypo.get_last_word() == utils.EOS_ID: next_hypos = self._rebuild_hypo_list(next_hypos, hypo) for s, cont in self._get_complete_continuations( hypo, next_hypos[-1].score): if hypo.score + s < next_hypos[-1].score: break next_hypos = self._rebuild_hypo_list( next_hypos, cont.generate_expanded_hypo(self)) hypos = [h for h in next_hypos if h.score > utils.NEG_INF] for hypo in hypos: if hypo.get_last_word() == utils.EOS_ID: self.add_full_hypo(hypo.generate_full_hypothesis()) if not self.full_hypos: logging.warn("No complete hypotheses found for %s" % src_sentence) for hypo in hypos: self.add_full_hypo(hypo.generate_full_hypothesis()) return self.get_full_hypos_sorted()
def decode(self, src_sentence): """Decodes a single source sentence using beam search. """ self.initialize_predictors(src_sentence) hypos = [PartialHypothesis(self.get_predictor_states())] guard_hypo = PartialHypothesis(None) guard_hypo.score = utils.NEG_INF it = 0 while self.stop_criterion(hypos): if it > self.max_len: # prevent infinite loops break it = it + 1 next_hypos = [guard_hypo] for hypo in hypos: if hypo.get_last_word() == utils.EOS_ID: next_hypos = self._rebuild_hypo_list(next_hypos, hypo) for s, cont in self._get_complete_continuations( hypo, next_hypos[-1].score): if hypo.score + s < next_hypos[-1].score: break next_hypos = self._rebuild_hypo_list( next_hypos, cont.generate_expanded_hypo(self)) hypos = [h for h in next_hypos if h.score > utils.NEG_INF] for hypo in hypos: if hypo.get_last_word() == utils.EOS_ID: self.add_full_hypo(hypo.generate_full_hypothesis()) if not self.full_hypos: logging.warn("No complete hypotheses found for %s" % src_sentence) for hypo in hypos: self.add_full_hypo(hypo.generate_full_hypothesis()) return self.get_full_hypos_sorted()
def create_initial_node(self): """Create the root node for the search tree. """ init_hypo = PartialHypothesis() posterior,score_breakdown = self.apply_predictors() best_word = utils.argmax(posterior) init_hypo.predictor_states = self.get_predictor_states() init_node = BOWNode(init_hypo, posterior, score_breakdown, []) self._add_to_heap(init_node, best_word, 0.0) # Expected score irrelevant
def create_initial_node(self): """Create the root node for the search tree. """ init_hypo = PartialHypothesis() posterior,score_breakdown = self.apply_predictors() children = sorted([RestartingChild(w, posterior[w], score_breakdown[w]) for w in posterior], key=lambda c: c.score, reverse=True) init_hypo.predictor_states = self.get_predictor_states() heappush(self.open_nodes, (0.0, RestartingNode(init_hypo, children)))
def create_initial_node(self): """Create the root node for the search tree. """ init_hypo = PartialHypothesis() posterior,score_breakdown = self.apply_predictors() children = sorted([RestartingChild(w, posterior[w], score_breakdown[w]) for w in posterior], key=lambda c: c.score, reverse=True) init_hypo.predictor_states = self.get_predictor_states() heappush(self.open_nodes, (0.0, RestartingNode(init_hypo, children)))
def create_initial_node(self): """Create the root node for the search tree. """ init_hypo = PartialHypothesis() posterior, score_breakdown = self.apply_predictors() best_word = utils.argmax(posterior) init_hypo.predictor_states = self.get_predictor_states() init_node = BOWNode(init_hypo, posterior, score_breakdown, []) self._add_to_heap(init_node, best_word, 0.0) # Expected score irrelevant
def decode(self, src_sentence): """Decodes a single source sentence exhaustively using depth first search under length constraints. Args: src_sentence (list): List of source word ids without <S> or </S> which make up the source sentence Returns: list. A list of ``Hypothesis`` instances ordered by their score. """ if len(self.predictors) != 1: logging.fatal("SimpleDFS only works with a single predictor!") self.dfs_predictor = self.predictors[0][0] self.initialize_predictors(src_sentence) lower_bounds = self.all_lower_bounds[self.current_sen_id] self.max_len = max(int(el[0]) for el in lower_bounds) self.len_enabled = np.zeros((self.max_len + 1, ), np.bool) self.len_lower_bounds = -np.ones((self.max_len + 1, )) * np.inf self.len_min_lower_bounds = np.zeros((self.max_len + 1, )) self.len_best_hypos = [None] * (self.max_len + 1) for el in lower_bounds: l = int(el[0]) self.len_enabled[l] = True self.len_lower_bounds[l] = float(el[1]) self._update_min_lower_bounds() self._dfs(PartialHypothesis()) for hypo in self.len_best_hypos: if hypo is not None: self.add_full_hypo(hypo.generate_full_hypothesis()) return self.get_full_hypos_sorted()
def _initialize_order_ds(self): self.queues = [MinMaxHeap() for k in range(self.max_len + 1)] self.queues[0].insert( (0.0, PartialHypothesis(self.get_predictor_states()))) self.queue_order = SortedDict({0.0: 0}) self.score_by_t = [0.0] self.score_by_t.extend([None] * self.max_len) self.time_sync = defaultdict(lambda: self.beam if self.beam > 0 else utils.INF) self.time_sync[0] = 1
def _greedy_decode(self): """Performs greedy decoding from the start node. Used to obtain initial bigram statistics. """ hypo = PartialHypothesis() hypos = [] posteriors = [] score_breakdowns = [] bag = dict(self.full_bag) while bag: posterior,score_breakdown = self.apply_predictors() hypo.predictor_states = copy.deepcopy(self.get_predictor_states()) bag_posterior = {w: posterior[w] for w in self.full_bag_with_eos} bag_breakdown = {w: score_breakdown[w] for w in self.full_bag_with_eos} posteriors.append(bag_posterior) score_breakdowns.append(bag_breakdown) hypos.append(hypo) best_word = utils.argmax({w: bag_posterior[w] for w in bag}) bag[best_word] -= 1 if bag[best_word] < 1: del bag[best_word] self.consume(best_word) hypo = hypo.expand(best_word, None, bag_posterior[best_word], score_breakdown[best_word]) posterior,score_breakdown = self.apply_predictors() hypo.predictor_states = copy.deepcopy(self.get_predictor_states()) bag_posterior = {w: posterior[w] for w in self.full_bag_with_eos} bag_breakdown = {w: score_breakdown[w] for w in self.full_bag_with_eos} posteriors.append(bag_posterior) score_breakdowns.append(bag_breakdown) hypos.append(hypo) hypo = hypo.cheap_expand(utils.EOS_ID, bag_posterior[utils.EOS_ID], score_breakdown[utils.EOS_ID]) logging.debug("Greedy hypo (%f): %s" % ( hypo.score, ' '.join([str(w) for w in hypo.trgt_sentence]))) self._process_new_hypos(hypos, posteriors, score_breakdowns, hypo)
def decode(self, src_sentence): """Decodes a single source sentence using beam search. """ self.initialize_predictors(src_sentence) hypos = [PartialHypothesis(self.get_predictor_states())] for _ in range(self.max_len): next_hypos = [] for hypo in hypos: next_hypos.extend(self._expand_hypo(hypo)) next_hypos.sort(key=lambda h: -h.score) hypos = next_hypos[:self.beam_size] return self.get_full_hypos_sorted()
def _get_initial_hypos(self): """Get the list of initial ``PartialHypothesis``. This is not a single empty hypo but one empty hypo for each predictor. """ states = self.get_predictor_states() none_states = [None] * len(states) ret = [] for idx, state in enumerate(states): pred_states = list(none_states) pred_states[idx] = state ret.append(PartialHypothesis(pred_states)) return ret
def decode(self, src_sentence): """Decodes a single source sentence using A* search. """ self.initialize_predictors(src_sentence) open_set = [] best_score = self.get_lower_score_bound() print("Bound:", best_score) heappush(open_set, (0.0, PartialHypothesis(self.get_predictor_states()))) count = 0 while open_set: c, hypo = heappop(open_set) count += 1 if self.early_stopping and hypo.score < best_score: continue logging.debug( "Expand (est=%f score=%f exp=%d best=%f): sentence: %s" % (-c, hypo.score, self.apply_predictors_count, best_score, hypo.trgt_sentence)) if hypo.get_last_word() == utils.EOS_ID: # Found best hypothesis if hypo.score > best_score: logging.debug( "New best hypo (score=%f exp=%d): %s" % (hypo.score, self.apply_predictors_count, ' '.join( [str(w) for w in hypo.trgt_sentence]))) best_score = hypo.score self.add_full_hypo(hypo.generate_full_hypothesis()) if len(self.full_hypos ) >= self.nbest: # if we have enough hypos return self.get_full_hypos_sorted(), count continue self.set_predictor_states(copy.deepcopy(hypo.predictor_states)) if not hypo.word_to_consume is None: # Consume if cheap expand self.consume(hypo.word_to_consume) hypo.word_to_consume = None posterior, score_breakdown = self.apply_predictors() hypo.predictor_states = self.get_predictor_states() for trgt_word in posterior: # Estimate future cost, add to heap next_hypo = hypo.cheap_expand(trgt_word, posterior[trgt_word], score_breakdown[trgt_word]) score = next_hypo.score if score > best_score: # only push if hypothesis can beat lower bound. Saves memory... heappush(open_set, (-score, next_hypo)) # Limit heap capacity if self.capacity > 0 and len(open_set) > self.capacity: new_open_set = [] for _ in range(self.capacity): heappush(new_open_set, heappop(open_set)) open_set = new_open_set return self.get_full_hypos_sorted(), count
def _initialize_decoding(self, src_sentence): """Helper function for ``decode`` to which initializes all the class attributes """ self.initialize_predictors(src_sentence) self.max_expansions = self.get_max_expansions(self.max_expansions_param, src_sentence) init_hypo = PartialHypothesis() init_hypo.predictor_states = self.get_predictor_states() init_hypo.scores = [] init_hypo.parent_hypo_array_idx = 0 # point to guardian self.buckets = [[] for _ in xrange(self.max_len+1)] self.expanded_hypos = [[] for _ in xrange(self.max_len+1)] self.buckets[0].append((0.0, init_hypo)) self.expand_counts = [0.0] # with guardian self.expand_backpointers = [0] # with guardian self.last_bucket = 0 self.best_score = self.get_lower_score_bound() self.best_word_scores = [NEG_INF] * (self.max_len+1) self.compressed = [True] * (self.max_len+1) self.guaranteed_optimality = True self.cur_iter = 0
def _initialize_decoding(self, src_sentence): """Helper function for ``decode`` to which initializes all the class attributes """ self.initialize_predictors(src_sentence) self.max_expansions = self.get_max_expansions( self.max_expansions_param, src_sentence) init_hypo = PartialHypothesis() init_hypo.predictor_states = self.get_predictor_states() init_hypo.scores = [] init_hypo.parent_hypo_array_idx = 0 # point to guardian self.buckets = [[] for _ in xrange(self.max_len + 1)] self.expanded_hypos = [[] for _ in xrange(self.max_len + 1)] self.buckets[0].append((0.0, init_hypo)) self.expand_counts = [0.0] # with guardian self.expand_backpointers = [0] # with guardian self.last_bucket = 0 self.best_score = self.get_lower_score_bound() self.best_word_scores = [NEG_INF] * (self.max_len + 1) self.compressed = [True] * (self.max_len + 1) self.guaranteed_optimality = True self.cur_iter = 0
def _greedy_decode(self): """Performs greedy decoding from the start node. Used to obtain the initial hypothesis. """ hypo = PartialHypothesis() hypos = [] posteriors = [] score_breakdowns = [] scores = [] bag = dict(self.full_bag) while bag: posterior,score_breakdown = self.apply_predictors() hypo.predictor_states = copy.deepcopy(self.get_predictor_states()) hypos.append(hypo) posteriors.append(posterior) score_breakdowns.append(score_breakdown) best_word = utils.argmax({w: posterior[w] for w in bag}) bag[best_word] -= 1 if bag[best_word] < 1: del bag[best_word] self.consume(best_word) hypo = hypo.expand(best_word, None, posterior[best_word], score_breakdown[best_word]) scores.append(posterior[best_word]) posterior,score_breakdown = self.apply_predictors() hypo.predictor_states = self.get_predictor_states() hypos.append(hypo) posteriors.append(posterior) score_breakdowns.append(score_breakdown) hypo = hypo.expand(utils.EOS_ID, None, posterior[utils.EOS_ID], score_breakdown[utils.EOS_ID]) logging.debug("Greedy hypo (%f): %s" % ( hypo.score, ' '.join([str(w) for w in hypo.trgt_sentence]))) scores.append(posterior[utils.EOS_ID]) self.best_score = hypo.score self.add_full_hypo(hypo.generate_full_hypothesis()) self._process_new_hypos(FlipCandidate(hypo.trgt_sentence, scores, self._create_dummy_bigrams(), hypo.score), len(hypo.trgt_sentence), hypos, posteriors, score_breakdowns)
def decode(self, src_sentence): """Decodes a single source sentence using beam search. """ self.initialize_predictors(src_sentence) hypos = [PartialHypothesis(self.get_predictor_states())] it = 0 while self.stop_criterion(hypos): if it > self.max_len: # prevent infinite loops break it = it + 1 next_hypos = [] next_scores = [] self.min_score = utils.NEG_INF self.best_scores = [] print("HYPOS") for hypo in hypos: print( "it%d: %s (%f)" % (it, utils.apply_trg_wmap(hypo.trgt_sentence), hypo.score)) for hypo in hypos: print("H: %s (%f)" % (utils.apply_trg_wmap(hypo.trgt_sentence), hypo.score)) if hypo.get_last_word() == utils.EOS_ID: next_hypos.append(hypo) next_scores.append(self._get_combined_score(hypo)) continue for next_hypo in self._expand_hypo(hypo): next_score = self._get_combined_score(next_hypo) if next_score > self.min_score: next_hypos.append(next_hypo) next_scores.append(next_score) self._register_score(next_score) if self.hypo_recombination: hypos = self._filter_equal_hypos(next_hypos, next_scores) else: hypos = self._get_next_hypos(next_hypos, next_scores) for hypo in hypos: if hypo.get_last_word() == utils.EOS_ID: self.add_full_hypo(hypo.generate_full_hypothesis()) if not self.full_hypos: logging.warn("No complete hypotheses found for %s" % src_sentence) for hypo in hypos: self.add_full_hypo(hypo.generate_full_hypothesis()) return self.get_full_hypos_sorted()
def decode(self, src_sentence): """Decodes a single source sentence exhaustively using depth first search. Args: src_sentence (list): List of source word ids without <S> or </S> which make up the source sentence Returns: list. A list of ``Hypothesis`` instances ordered by their score. """ if len(self.predictors) != 1: logging.fatal("SimpleDFS only works with a single predictor!") self.dfs_predictor = self.predictors[0][0] if self._min_length_ratio > 0.0: self._min_length = int( math.ceil(self._min_length_ratio * len(src_sentence))) + 1 self.initialize_predictors(src_sentence) self.best_score = self.get_lower_score_bound() self._dfs(PartialHypothesis()) return self.get_full_hypos_sorted()
def decode(self, src_sentence): """Decodes a single source sentence using depth first search. If ``max_expansions`` equals 0, this corresponds to exhaustive search for the globally best scoring hypothesis. Note that with ``early_stopping`` enabled, the returned set of hypothesis are not necessarily the global n-best hypotheses. To create an exact n-best list, disable both ``max_expansions`` and ``early_stopping`` in the constructor. Args: src_sentence (list): List of source word ids without <S> or </S> which make up the source sentence Returns: list. A list of ``Hypothesis`` instances ordered by their score. If ``max_expansions`` equals 0, the first element holds the global best scoring hypothesis """ self.initialize_predictors(src_sentence) self.max_expansions = self.get_max_expansions( self.max_expansions_param, src_sentence) self.best_score = self.get_lower_score_bound() self._dfs(PartialHypothesis()) return self.get_full_hypos_sorted()
def _greedy_decode(self): """Performs greedy decoding from the start node. Used to obtain initial bigram statistics. """ hypo = PartialHypothesis() hypos = [] posteriors = [] score_breakdowns = [] bag = dict(self.full_bag) while bag: posterior,score_breakdown = self.apply_predictors() hypo.predictor_states = copy.deepcopy(self.get_predictor_states()) bag_posterior = {w: posterior[w] for w in self.full_bag_with_eos} bag_breakdown = {w: score_breakdown[w] for w in self.full_bag_with_eos} posteriors.append(bag_posterior) score_breakdowns.append(bag_breakdown) hypos.append(hypo) best_word = utils.argmax({w: bag_posterior[w] for w in bag}) bag[best_word] -= 1 if bag[best_word] < 1: del bag[best_word] self.consume(best_word) hypo = hypo.expand(best_word, None, bag_posterior[best_word], score_breakdown[best_word]) posterior,score_breakdown = self.apply_predictors() hypo.predictor_states = copy.deepcopy(self.get_predictor_states()) bag_posterior = {w: posterior[w] for w in self.full_bag_with_eos} bag_breakdown = {w: score_breakdown[w] for w in self.full_bag_with_eos} posteriors.append(bag_posterior) score_breakdowns.append(bag_breakdown) hypos.append(hypo) hypo = hypo.cheap_expand(utils.EOS_ID, bag_posterior[utils.EOS_ID], score_breakdown[utils.EOS_ID]) logging.debug("Greedy hypo (%f): %s" % ( hypo.score, ' '.join([str(w) for w in hypo.trgt_sentence]))) self._process_new_hypos(hypos, posteriors, score_breakdowns, hypo)
def _get_initial_hypos(self): """Get the list of initial ``PartialHypothesis``. """ return [PartialHypothesis(self.get_predictor_states())]