def main(): # tm should translate unknown words as-is with probability 1 for word in set(sum(french,())): if (word,) not in tm: tm[(word,)] = [models.phrase(word, 0.0)] total_prob = 0 sys.stderr.write("Decoding %s...\n" % (opts.input,)) for idx,f in enumerate(french): initial_hypothesis = hypothesis(lm.begin(), 0.0, 0, 0, None, None) heaps = [{} for _ in f] + [{}] heaps[0][lm.begin(), 0, 0] = initial_hypothesis for i, heap in enumerate(heaps[:-1]): # maintain beam heap # front_item = sorted(heap.itervalues(), key=lambda h: -h.logprob)[0] # for k in heap.keys(): # if heap[k].logprob < front_item.logprob - opts.bwidth: # del heap[k] for h in sorted(heap.itervalues(),key=lambda h: -h.logprob)[:opts.s]: # prune fopen = prefix1bits(h.coverage) for j in xrange(fopen,min(fopen+1+opts.disord, len(f)+1)): for k in xrange(j+1, len(f)+1): if f[j:k] in tm: if (h.coverage & bitmap(range(j, k))) == 0: for phrase in tm[f[j:k]]: lm_prob = 0 lm_state = h.lm_state for word in phrase.english.split(): (lm_state, prob) = lm.score(lm_state, word) lm_prob += prob lm_prob += lm.end(lm_state) if k == len(f) else 0.0 coverage = h.coverage | bitmap(range(j, k)) logprob = h.logprob + opts.alpha*lm_prob + opts.beta*phrase.logprob + opts.eta*abs(h.end + 1 - j) new_hypothesis = hypothesis(lm_state, logprob, coverage, k, h, phrase) # add to heap num = onbits(coverage) if (lm_state, coverage, k) not in heaps[num] or new_hypothesis.logprob > heaps[num][lm_state, coverage, k].logprob: heaps[num][lm_state, coverage, k] = new_hypothesis winner = max(heaps[-1].itervalues(), key=lambda h: h.logprob) def extract_english(h): return "" if h.predecessor is None else "%s%s " % (extract_english(h.predecessor), h.phrase.english) out = extract_english(winner) print out sys.stderr.write("#{0}:{2} - {1}\n".format(idx, out , winner.logprob)) total_prob += winner.logprob # if opts.verbose: # def extract_tm_logprob(h): # return 0.0 if h.predecessor is None else h.phrase.logprob + extract_tm_logprob(h.predecessor) # tm_logprob = extract_tm_logprob(winner) # sys.stderr.write("LM = %f, TM = %f, Total = %f\n" % # (winner.logprob - tm_logprob, tm_logprob, winner.logprob)) sys.stderr.write("Total score: {0}\n".format(total_prob))
def decode(n): ret = [] tm = models.TM(opts.tm, opts.k) lm = models.LM(opts.lm) french = [tuple(line.strip().split()) for line in open(opts.input).readlines()[:min(n,opts.num_sents)]] # tm should translate unknown words as-is with probability 1 for word in set(sum(french,())): if (word,) not in tm: tm[(word,)] = [models.phrase(word, 0.0)] sys.stderr.write("Decoding %s...\n" % (opts.input,)) for f in french: # The following code implements a monotone decoding # algorithm (one that doesn't permute the target phrases). # Hence all hypotheses in stacks[i] represent translations of # the first i words of the input sentence. You should generalize # this so that they can represent translations of *any* i words. hypothesis = namedtuple("hypothesis", "logprob, lm_state, predecessor, phrase, i, j, f") initial_hypothesis = hypothesis(0.0, lm.begin(), None, None, 0, 0, f[0]) stacks = [{} for _ in f] + [{}] stacks[0][lm.begin()] = initial_hypothesis for i, stack in enumerate(stacks[:-1]): #print "Stack for " + str(french[i]) + ": " + str(stack) + "\n" for h in sorted(stack.itervalues(),key=lambda h: -h.logprob)[:opts.s]: # prune for j in xrange(i+1,len(f)+1): if f[i:j] in tm: for phrase in tm[f[i:j]]: logprob = h.logprob + phrase.logprob lm_state = h.lm_state for word in phrase.english.split(): (lm_state, word_logprob) = lm.score(lm_state, word) logprob += word_logprob logprob += lm.end(lm_state) if j == len(f) else 0.0 new_hypothesis = hypothesis(logprob, lm_state, h, phrase, i, j, f) if lm_state not in stacks[j] or stacks[j][lm_state].logprob < logprob: # second case is recombination stacks[j][lm_state] = new_hypothesis winner = max(stacks[-1].itervalues(), key=lambda h: h.logprob) english_phrases = [] tm_logprob_phrases = [] def extract_english(h): if h.predecessor is not None: english_phrases.insert(0, (h.phrase.english, h.i, h.j, h.f)) tm_logprob_phrases.insert(0, h.phrase.logprob) return "" if h.predecessor is None else "%s%s " % (extract_english(h.predecessor), h.phrase.english) ret.append((extract_english(winner), english_phrases, tm_logprob_phrases)) if opts.verbose: def extract_tm_logprob(h): return 0.0 if h.predecessor is None else h.phrase.logprob + extract_tm_logprob(h.predecessor) tm_logprob = extract_tm_logprob(winner) sys.stderr.write("LM = %f, TM = %f, Total = %f\n" % (winner.logprob - tm_logprob, tm_logprob, winner.logprob)) return ret
def __init__(self, opts): self.opts = opts self.tm = models.TM(opts.tm, sys.maxint) self.lm = models.LM(opts.lm) self.french = [ tuple(line.strip().split()) for line in open(opts.input).readlines() ] # tm should translate unknown words as-is with probability 1 for word in set(sum(self.french, ())): if (word, ) not in self.tm: self.tm[(word, )] = [models.phrase(word, 0.0)]
def make_agtsp(f): # make AGTSP nodes = [Node(0.0, START_SYMBOL, -1, -1, 0, models.phrase('', 0.0))] groups = defaultdict(list) # french word => [tsp_tuple, ...] groups[nodes[0].word_index] = [nodes[0]] # add startword group for i in xrange(len(f)): for j in xrange(i+1,len(f)+1): if f[i:j] in tm: for phrase in tm[f[i:j]]: phrase = Phrase(phrase.english, phrase.logprob, random()) for (i_w, word) in enumerate(f[i:j]): word_index = i+i_w n = Node(phrase.logprob, word, word_index, i, j, phrase) nodes.append(n) groups[word_index] = groups[word_index] + [n] return nodes, groups
def main(w0 = None): # tm should translate unknown words as-is with probability 1 w = w0 if w is None: # lm_logprob, distortion penenalty, direct translate logprob, direct lexicon logprob, inverse translation logprob, inverse lexicon logprob if opts.weights == "no weights specify": w = [1.0/7] * 7 # w = [1.76846735947, 0.352553835525, 1.00071564481, 1.49937872683, 0.562198294709, -0.701483985454, 1.80395218437] else: w = [float(line.strip()) for line in open(opts.weights)] sys.stderr.write(str(w) + '\n') tm = models.TM(opts.tm, opts.k, opts.mute) lm = models.LM(opts.lm, opts.mute) # ibm_t = {} ibm_t = init('./data/ibm.t.gz') french = [tuple(line.strip().split()) for line in open(opts.input).readlines()[:opts.num_sents]] french = french[opts.start : opts.end] bound_width = float(opts.bwidth) for word in set(sum(french,())): if (word,) not in tm: tm[(word,)] = [models.phrase(word, [0.0, 0.0, 0.0, 0.0])] nbest_output = [] total_prob = 0 if opts.mute == 0: sys.stderr.write("Start decoding %s ...\n" % (opts.input,)) for idx,f in enumerate(french): if opts.mute == 0: sys.stderr.write("Decoding sentence #%s ...\n" % (str(idx))) initial_hypothesis = hypothesis(lm.begin(), 0.0, 0, 0, None, None, None) heaps = [{} for _ in f] + [{}] heaps[0][lm.begin(), 0, 0] = initial_hypothesis for i, heap in enumerate(heaps[:-1]): # maintain beam heap # front_item = sorted(heap.itervalues(), key=lambda h: -h.logprob)[0] for h in sorted(heap.itervalues(),key=lambda h: -h.logprob)[:opts.s]: # prune # if h.logprob < front_item.logprob - float(opts.bwidth): # continue fopen = prefix1bits(h.coverage) for j in xrange(fopen,min(fopen+1+opts.disord, len(f)+1)): for k in xrange(j+1, len(f)+1): if f[j:k] in tm: if (h.coverage & bitmap(range(j, k))) == 0: for phrase in tm[f[j:k]]: lm_prob = 0 lm_state = h.lm_state for word in phrase.english.split(): (lm_state, prob) = lm.score(lm_state, word) lm_prob += prob lm_prob += lm.end(lm_state) if k == len(f) else 0.0 coverage = h.coverage | bitmap(range(j, k)) # logprob = h.logprob + lm_prob*w[0] + getDotProduct(phrase.several_logprob, w[2:6]) + abs(h.end+1-j)*w[1] + ibm_model_1_w_score(ibm_t, f, phrase.english)*w[6] logprob = h.logprob logprob += lm_prob*w[0] logprob += getDotProduct(phrase.several_logprob, w[1:5]) # logprob += opts.diseta*abs(h.end+1-j)*w[1] logprob += ibm_model_1_w_score(ibm_t, f, phrase.english)*w[5] logprob += (len(phrase.english.split()) - (k - j)) * w[6] new_hypothesis = hypothesis(lm_state, logprob, coverage, k, h, phrase, abs(h.end + 1 - j)) # add to heap num = onbits(coverage) if (lm_state, coverage, k) not in heaps[num] or new_hypothesis.logprob > heaps[num][lm_state, coverage, k].logprob: heaps[num][lm_state, coverage, k] = new_hypothesis winners = sorted(heaps[-1].itervalues(), key=lambda h: -h.logprob)[0:opts.nbest] def get_lm_logprob(test_list): stance = [] for i in test_list: stance += (i.split()) stance = tuple(stance) lm_state = ("<s>",) score = 0.0 for word in stance: (lm_state, word_score) = lm.score(lm_state, word) score += word_score return score def get_list_and_features(h, idx_self): lst = []; features = [0, 0, 0, 0, 0, 0, 0] current_h = h; while current_h.phrase is not None: # print current_h lst.append(current_h.phrase.english) # features[1] += current_h.distortionPenalty features[1] += current_h.phrase.several_logprob[0] # translation feature 1 features[2] += current_h.phrase.several_logprob[1] # translation feature 2 features[3] += current_h.phrase.several_logprob[2] # translation feature 3 features[4] += current_h.phrase.several_logprob[3] # translation feature 4 current_h = current_h.predecessor lst.reverse() features[0] = get_lm_logprob(lst) # language model score features[5] = ibm_model_1_score(ibm_t, f, lst) features[6] = len(lst) - len(french[idx_self]) return (lst, features) for win in winners: # s = str(idx) + " ||| " (lst, features) = get_list_and_features(win, idx) print local_search.local_search(lst, lm)
else: w = [] for line in open(opts.weights): w.extend([float(line)]) tm = models.TM(opts.tm, opts.k) lm = models.LM(opts.lm) french = [ tuple(line.strip().split()) for line in open(opts.input).readlines()[:opts.num_sents] ] # tm should translate unknown words as-is with probability 1 for word in set(sum(french, ())): if (word, ) not in tm: tm[(word, )] = [models.phrase(word, 0.0, [0.0] * 4)] def getrange(data): ranges = [] for key, group in groupby(enumerate(data), lambda (index, item): index - item): group = map(itemgetter(1), group) ranges.append(xrange(group[0], group[-1] + 1)) return ranges def bitmap(sequence): return reduce(lambda x, y: x | y, map(lambda i: long('1' + '0' * i, 2), sequence), 0)
w = [0.2]*5 else: w = [] for line in open(opts.weights): w.extend([float(line)]) tm = models.TM(opts.tm, opts.k) lm = models.LM(opts.lm) french = [tuple(line.strip().split()) for line in open(opts.input).readlines()[:opts.num_sents]] # tm should translate unknown words as-is with probability 1 for word in set(sum(french,())): if (word,) not in tm: tm[(word,)] = [models.phrase(word, 0.0, [0.0]*4)] def getrange(data): ranges = [] for key, group in groupby(enumerate(data), lambda (index, item): index - item): group = map(itemgetter(1), group) ranges.append(xrange(group[0], group[-1] + 1)) return ranges def bitmap(sequence): return reduce(lambda x,y: x|y, map(lambda i: long('1'+'0'*i,2), sequence), 0) def bitmap2str(b, n, on='o', off='.'): return '' if n==0 else (on if b&1==1 else off) + bitmap2str(b>>1, n-1, on, off) def cand_phrases(ranges, f):
optparser.add_option("-s", "--stack-size", dest="s", default=15, type="int", help="Maximum stack size (default=15)") optparser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Verbose mode (default=off)") opts = optparser.parse_args()[0] # store the stack length in a global stack_size = opts.s tm = models.TM(opts.tm, opts.k) lm = models.LM(opts.lm) french = [tuple(line.strip().split()) for line in open(opts.input).readlines()[:opts.num_sents]] for english_word in set(sum(french, ())): if (english_word,) not in tm: tm[(english_word,)] = [models.phrase(english_word, 0.0)] def determine_expense(scope): """ Given a translation scope evaluates the expense of decoding it. :param scope: the scope to evaluate :return: the potential expense associated with evaluating this scope """ potential_expense = 0 start = -1 for j, translated_flag in enumerate(scope): if not translated_flag: if start == -1: start = j else:
optparser.add_option("-s", "--stack-size", dest="s", default=1, type="int", help="Maximum stack size (default=1)") optparser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Verbose mode (default=off)") opts = optparser.parse_args()[0] opts.k = 4 tm = models.TM(opts.tm, opts.k) lm = models.LM(opts.lm) french = [tuple(line.strip().split()) for line in open(opts.input).readlines()[:opts.num_sents]] # tm should translate unknown words as-is with probability 1 for word in set(sum(french,())): if (word,) not in tm: tm[(word,)] = [models.phrase(word, 0.0)] # values for the model parameter dd = 5 nn = -4 beta = 2 gooby = tm[("de", "ce")] #print(len(gooby)) #print(gooby) class state: def __init__(self, e1, e2, b, r, alpha): self.e1 = e1 self.e2 = e2 self.b = b self.r = r
optparser.add_option("-n", "--num_sentences", dest="num_sents", default=sys.maxint, type="int", help="Number of sentences to decode (default=no limit)") optparser.add_option("-k", "--translations-per-phrase", dest="k", default=1, type="int", help="Limit on number of translations to consider per phrase (default=1)") optparser.add_option("-s", "--stack-size", dest="s", default=1, type="int", help="Maximum stack size (default=1)") optparser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Verbose mode (default=off)") optparser.add_option("-e", "--number-iterations", dest="e", default=10, type="int", help="number of iterations (default=10)") opts = optparser.parse_args()[0] tm = models.TM(opts.tm, opts.k) lm = models.LM(opts.lm) french = [tuple(line.strip().split()) for line in open(opts.input).readlines()[:opts.num_sents]] # tm should translate unknown words as-is with probability 1 for word in set(sum(french,())): if (word,) not in tm: tm[(word,)] = [models.phrase(word, 0.0)] converged = [0]*(len(french)+1) #starts at 0!!! sys.stderr.write("Decoding %s...\n" % (opts.input,)) i_sen = 0 print('Iters\tViolations') for f in french: i_sen += 1 print('SENTENCE ' + str(i_sen)) u = [0 for _ in f] #Lagrangian hypothesis = namedtuple("hypothesis", "logprob, lm_state, predecessor, phrase, start, end, num_trans, y_i") initial_hypothesis = hypothesis(0.0, lm.begin(), None, None, 0, 0, 0, [0 for _ in f]) stacks = [{} for _ in f] + [{}] stacks[0][(lm.begin(), 0,0)] = initial_hypothesis num_words = len(f)
optparser.add_option("-k", "--translations-per-phrase", dest="k", default=1, type="int", help="Limit on number of translations to consider per phrase (default=1)") optparser.add_option("-s", "--stack-size", dest="s", default=1, type="int", help="Maximum stack size (default=1)") optparser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Verbose mode (default=off)") opts = optparser.parse_args()[0] tm = models.TM(opts.tm, opts.k) lm = models.LM(opts.lm) french = [tuple(line.strip().split()) for line in open(opts.input).readlines()[:opts.num_sents]] def extract_english(h): return "" if h.predecessor is None else "%s%s " % (extract_english(h.predecessor), h.phrase.english) # tm should translate unknown words as-is with probability 1 for word in set(sum(french,())): if (word,) not in tm: tm[(word,)] = [models.phrase(word, 0.0)] # adding empty phrase to the translation dictionary tm[()] = [models.phrase("", 0.0)] def update_lm_state(lm_state, logprob, phrase): for word in phrase.english.split(): (lm_state, word_logprob) = lm.score(lm_state, word) logprob += word_logprob return lm_state, logprob sys.stderr.write("Decoding %s...\n" % (opts.input,)) for f in french: # The following code implements a local-reordering decoding algorithm. # All hypotheses in stacks[i] represent translations of the first i # words of the input sentence.
######################################################################################################################################## ## init for decoder part lm = models.LM(opts.lm, opts.mute) tm = models.TM(opts.tm, opts.k, opts.mute) french = [tuple(line.strip().split()) for line in open(opts.input).readlines()[:opts.num_sents]] bound_width = float(opts.bwidth) for word in set(sum(french,())): if (word,) not in tm: tm[(word,)] = [models.phrase(word, [0.0, 0.0, 0.0, 0.0])] # ibm_t = {} ibm_t = library.init('./data/ibm.t.gz') ######################################################################################################################################## ## init for reranker part references = [[], [], [], []] sys.stderr.write("Reading English Sentences ... \n") def readReference(ref_fileName): ref = [] for i, line in enumerate(open(ref_fileName)): # Initialize references to correct english sentences
def get_candidates(inputfile, tm, lm, weights, stack_size=10, nbest=None, simpmode=True, separate_unknown_words=False, verbose=False): if nbest is None: nbest = stack_size print >> sys.stderr, "Decoding: " + inputfile print >> sys.stderr, "Reading input..." french = [line.strip().split() for line in open(inputfile).readlines()] # list of list if simpmode: from mafan import simplify for li, line in enumerate(french): for wi, word in enumerate(line): french[li][wi] = simplify(word.decode('utf-8')).encode('utf-8') # tm should translate unknown words as-is with a small probability # (i.e. only fallback to copying unknown words over as the last resort) for i in xrange(len(french)): j = 0 while j < len(french[i]): word = french[i][j] if (word, ) not in tm: flag = True if len(word) >= 2 and separate_unknown_words: for separate in xrange(1, len(word)): if (word[:separate], ) in tm and ( word[separate:], ) in tm: french[i][j] = word[:separate] j += 1 french[i].insert(j, word[separate:]) flag = False break if flag: tm[(word, )] = [ models.phrase(word, [unknown_word_logprob] * number_of_features_PT) ] j += 1 print >> sys.stderr, "Start decoding..." for n, f in enumerate(french): if verbose: print >> sys.stderr, "Input: " + ' '.join(f) # Generate cache for phrase segmentations. f_cache = generate_phrase_cache(f, tm) # Pre-calculate future cost table future_cost_table = precalcuate_future_cost( f, tm, weights[:number_of_features_PT]) # score = dot(features, weights) # features = sums of each log feature # predecessor = previous hypothesis # lm_state = N-gram state (the last one or two words) # last_frange = (i, j) the range of last translated phrase in f # phrase = the last TM phrase object (correspondence to f[last_frange]) # coverage = bit string representing the translation coverage on f # future_cost = a safe estimation to be added to total_score hypothesis = namedtuple( "hypothesis", "score, features, lm_state, predecessor, last_frange, phrase, coverage, future_cost" ) initial_hypothesis = hypothesis(0.0, [0.0] * number_of_features, lm.begin(), None, (0, 0), None, 0, 0) # stacks[# of covered words in f] (from 0 to |f|) stacks = [{} for _ in xrange(len(f) + 1)] # stacks[size][(lm_state, last_frange[1], coverage)]: # recombination based on (lm_state, last_frange[1], coverage). # For different hypotheses with the same tuple, keep the one with the higher score. # lm_state affects LM; last_frange affects distortion; coverage affects available choices. stacks[0][(lm.begin(), None, 0)] = initial_hypothesis for i, stack in enumerate(stacks[:-1]): if verbose: print >> sys.stderr, "Stack[%d]:" % i # Top-k pruning s_hypotheses = sorted(stack.values(), key=lambda h: h.score + h.future_cost, reverse=True) for h in s_hypotheses[:stack_size]: if verbose: print >> sys.stderr, h.score, h.lm_state, bin( h.coverage), ' '.join(f[h.last_frange[0]:h. last_frange[1]]), h.future_cost for (f_range, delta_coverage, tm_phrases) in enumerate_phrases(f_cache, h.coverage): # f_range = (i, j) of the enumerated next phrase to be translated # delta_coverage = coverage of f_range # tm_phrases = TM entries corresponding to fphrase f[f_range] length = i + f_range[1] - f_range[0] coverage = h.coverage | delta_coverage distance = abs(f_range[0] - h.last_frange[1]) # if distance > max_distance and i < len(stacks) / 2: # continue # TM might give us multiple candidates for a fphrase. for phrase in tm_phrases: features = h.features[:] # copy! # Features from phrase table for fid in range(number_of_features_PT): features[fid] += phrase.features[fid] # log_lmprob (N-gram) lm_state = h.lm_state loglm = 0.0 for word in phrase.english.split(): (lm_state, word_logprob) = lm.score(lm_state, word) loglm += word_logprob # Don't forget the STOP N-gram if we just covered the whole sentence. loglm += lm.end(lm_state) if length == len(f) else 0.0 features[4] += loglm # log distortion (distance ** alpha) features[5] += log(alpha) * distance # length of the translation (-length) features[6] += -len(phrase.english.split()) score = calculate_total_score(features, weights) future_list = get_future_list(coverage, len(f)) future_cost = get_future_cost(future_list, future_cost_table) new_state = (lm_state, f_range[1], coverage) new_hypothesis = hypothesis(score, features, lm_state, h, f_range, phrase, coverage, future_cost) # Recombination if new_state not in stacks[length] or \ score + future_cost > stacks[length][new_state].score + stacks[length][new_state].future_cost: stacks[length][new_state] = new_hypothesis winners = sorted(stacks[len(f)].values(), key=lambda h: h.score, reverse=True) if nbest == 1: yield extract_english(winners[0]) else: for s in winners[:nbest]: yield ("%d ||| %s |||" + " %f" * number_of_features) % \ ((n, extract_english(s)) + tuple(s.features)) print >> sys.stderr, "Decoding completed"
def translate(input_sentence, n_iter, reordering_limit): def lm_score(phrases): score = 0 lm_state = lm.begin() for phrase in phrases: for word in phrase.english: lm_state, word_logprob = lm.score(lm_state, word) score += word_logprob score += lm.end(lm_state) return score def replace_moves(i): iphrase = source[i] ophrase = target[alignment[i]] for alternative in tm[iphrase]: if alternative == ophrase: continue # modify replace_apply(i, alternative) # score tm_delta = alternative.logprob - ophrase.logprob yield (i, alternative), tm_delta # revert replace_apply(i, ophrase) def replace_apply(i, alternative): target[alignment[i]] = alternative def merge_moves(i): i1, i2 = source[i - 1], source[i] a1, a2 = alignment[i - 1], alignment[i] # |a1 - a2| = 1 a_min = min(a1, a2) # replace a_max = max(a1, a2) # remove # a_max = a_min + 1 o1, o2 = target[a_min], target[a_max] for alternative in tm.get(i1 + i2, []): # modify merge_apply(i, i1 + i2, alternative, a_min) # score tm_delta = alternative.logprob - o1.logprob - o2.logprob yield (i, i1 + i2, alternative, a_min), tm_delta # revert split_apply(i, i1, i2, o1, o2, a1, a2) def split_apply(i, i1, i2, o1, o2, a1, a2): source.insert(i, i2) source[i - 1] = i1 al = min(a1, a2) target.insert(al + 1, o2) target[al] = o1 for k, a in enumerate(alignment): if a >= al + 1: alignment[k] += 1 alignment.insert(i, a2) alignment[i - 1] = a1 def merge_apply(i, src, tgt, al): del source[i] source[i - 1] = src del target[al + 1] target[al] = tgt del alignment[i] alignment[i - 1] = al for k, a in enumerate(alignment): if a >= al + 1: alignment[k] -= 1 def split_moves(i): src, tgt = source[i], target[alignment[i]] al = alignment[i] for k in range(1, len(src)): i1, i2 = src[:k], src[k:] for o1 in tm.get(i1, []): for o2 in tm.get(i2, []): # modify split_apply(i + 1, i1, i2, o1, o2, al, al + 1) # score tm_delta = o1.logprob + o2.logprob - tgt.logprob yield (i + 1, i1, i2, o1, o2, al, al + 1), tm_delta # revert merge_apply(i + 1, src, tgt, al) def swap_moves(i, j): # modify swap_apply(i, j) # score yield (i, j), 0 # revert swap_apply(i, j) def swap_apply(i, j): target[alignment[i]], target[alignment[j]] = target[alignment[j]], target[alignment[i]] alignment[i], alignment[j] = alignment[j], alignment[i] def violates_reordering(i, al): d_source_left = sum(len(phrase) for phrase in source[:i]) d_target_left = sum(len(phrase.english) for phrase in target[:al]) d_source_right = sum(len(phrase) for phrase in source[i + 1 :]) d_target_right = sum(len(phrase.english) for phrase in target[al + 1 :]) d = max(abs(d_source_left - d_target_left), abs(d_source_right - d_target_right)) return d > reordering_limit def full_score(moves): for m, tm_delta in moves: yield m, tm_delta, lm_score(target) - score[1] def stochastic_strategy(moves, apply_move): choice = None for m, tm_delta, lm_delta in full_score(moves): if sigmoid(tm_delta + lm_delta, alpha) > random.random(): choice = m, tm_delta, lm_delta if choice: m, tm_delta, lm_delta = choice apply_move(*m) score[0] += tm_delta score[1] += lm_delta # Make initial decoding easy for w in input_sentence: if not (w,) in tm: tm[(w,)] = [models.phrase((w,), -20)] source = [(w,) for w in input_sentence] target = [max(tm[(w,)], key=lambda phrase: phrase.logprob) for w in input_sentence] alignment = [i for i in range(len(input_sentence))] score = [tm_score(target), lm_score(target)] logging.info(source_output(source)) logging.info(target_output(target)) logging.info(" ".join(map(str, alignment))) logging.info("Initial score: %s -> %d", score, score[0] + score[1]) strategy = stochastic_strategy history = [((score[:], source[:], target[:], alignment[:]))] for it in xrange(n_iter): history.append((score[:], source[:], target[:], alignment[:])) alpha = 1 - math.exp(-it * 10.0 / n_iter) # replace for i in range(len(source)): strategy(replace_moves(i), replace_apply) # merge i = 1 while True: if i >= len(source): break # adjacent target phrases only: if abs(alignment[i] - alignment[i - 1]) == 1: strategy(merge_moves(i), merge_apply) i += 1 # swap for i in range(0, len(source)): for j in range(0, len(source)): if i == j: continue if violates_reordering(i, alignment[j]) or violates_reordering(j, alignment[i]): continue strategy(swap_moves(i, j), swap_apply) # split for i in range(0, len(source)): strategy(split_moves(i), split_apply) if it % (n_iter / 100) == 0: logging.info("%d | %.2f %s %.2f", it, alpha, target_output(target), score[0] + score[1]) score, source, target, alignment = max(history, key=lambda t: sum(t[0])) logging.info(source_output(source)) logging.info(target_output(target)) logging.info(" ".join(map(str, alignment))) logging.info("Final score: %s -> %d", score, score[0] + score[1]) return " ".join(" ".join(phrase.english) for phrase in target)
action="store_true", default=False, help="Verbose mode (default=off)") opts = optparser.parse_args()[0] tm = models.TM(opts.tm, opts.k) lm = models.LM(opts.lm) french = [ tuple(line.strip().split()) for line in open(opts.input).readlines()[:opts.num_sents] ] # tm should translate unknown words as-is with probability 1 for word in set(sum(french, ())): if (word, ) not in tm: tm[(word, )] = [models.phrase(word, 0.0)] sys.stderr.write("Decoding %s...\n" % (opts.input, )) for f in french: # The following code implements a monotone decoding # algorithm (one that doesn't permute the target phrases). # Hence all hypotheses in stacks[i] represent translations of # the first i words of the input sentence. You should generalize # this so that they can represent translations of *any* i words. hypothesis = namedtuple("hypothesis", "logprob, lm_state, predecessor, phrase") initial_hypothesis = hypothesis(0.0, lm.begin(), None, None) stacks = [{} for _ in f] + [{}] stacks[0][lm.begin()] = initial_hypothesis for i, stack in enumerate(stacks[:-1]): for h in sorted(stack.itervalues(),
def handle_unk_words(french, tm): for word in set(sum(french,())): if (word,) not in tm: tm[(word,)] = [models.phrase(word, [0.0,0.0,0.0,0.0], 0.0)]
def get_candidates(input, tm, lm, weights, s=1): alpha = 0.95 #reordering parameter french = [list(line.strip().split()) for line in open(input).readlines()] for li, line in enumerate(french): for wi, word in enumerate(line): french[li][wi] = simplify(word.decode('utf-8')).encode('utf-8') # tm should translate unknown words as-is with probability 1 for word in set(sum(french, [])): if (word, ) not in tm: tm[(word, )] = [models.phrase(word, [0.0, 0.0, 0.0, 0.0])] def generate_phrase_cache(f): cache = [] for i in range(0, len(f)): entries = [] bitstring = 0 for j in range(i + 1, len(f) + 1): bitstring += 1 << (len(f) - j) if tuple(f[i:j]) in tm: entries.append({ 'end': j, 'bitstring': bitstring, 'phrase': tm[tuple(f[i:j])] }) cache.append(entries) return cache def enumerate_phrases(f_cache, coverage): for i in range(0, len(f_cache)): bitstring = 0 for entry in f_cache[i]: if (entry['bitstring'] & coverage) == 0: yield ((i, entry['end']), entry['bitstring'], entry['phrase']) def precalcuate_future_cost(f): phraseCheapestTable = {} futureCostTable = {} for i in range(0, len(f)): for j in range(i + 1, len(f) + 1): if f[i:j] in tm: phraseCheapestTable[i, j] = -sys.maxint for phrase in tm[f[i:j]]: if phrase.logprob > phraseCheapestTable[i, j]: phraseCheapestTable[i, j] = phrase.logprob for i in range(0, len(f)): futureCostTable[i, 1] = phraseCheapestTable[i, i + 1] for j in range(2, len(f) + 1 - i): if (i, i + j) in phraseCheapestTable: futureCostTable[i, j] = phraseCheapestTable[i, i + j] else: futureCostTable[i, j] = -sys.maxint for k in range(1, j): if (((i + k, i + j) in phraseCheapestTable) and (futureCostTable[i, j] < futureCostTable[i, k] + phraseCheapestTable[i + k, i + j])): futureCostTable[i, j] = futureCostTable[ i, k] + phraseCheapestTable[i + k, i + j] return futureCostTable def get_future_list(bitstring): bitList = bin(bitstring)[2:] futureList = [] count = 0 index = 0 findZeroBit = False for i in range(len(bitList)): if bitList[i] == '0': if not findZeroBit: index = i findZeroBit = True count = count + 1 else: if findZeroBit: futureList.append((index, count)) findZeroBit = False count = 0 if findZeroBit: futureList.append((index, count)) return futureList def get_future_cost(bitList, futureCostTable): cost = 0 for item in bitList: cost = cost + futureCostTable[item] return cost def extract_english(h): return "" if h.predecessor is None else "%s%s " % (extract_english( h.predecessor), h.phrase.english) results = [] sys.stderr.write("Decoding %s...\n" % (input, )) for n, f in enumerate(french): # Generate cache for phrase segmentations. f_cache = generate_phrase_cache(f) # Pre-calculate future cost table #future_cost_table = precalcuate_future_cost(f) # logprob = log_lmprob + log_tmprob + distortion_penalty # predecessor = previous hypothesis # lm_state = N-gram state (the last one or two words) # last_frange = (i, j) the range of last translated phrase in f # phrase = the last TM phrase object (correspondence to f[last_frange]) # coverage = bit string representing the translation coverage on f # future_cost hypothesis = namedtuple( "hypothesis", "logprob, features, lm_score, lm_state, predecessor, last_frange, phrase, coverage" ) initial_hypothesis = hypothesis(0.0, [0.0, 0.0, 0.0, 0.0], 0.0, lm.begin(), None, (0, 0), None, 0) # stacks[# of covered words in f] (from 0 to |f|) stacks = [{} for _ in range(len(f) + 1)] # stacks[size][(lm_state, last_frange, coverage)]: # recombination based on (lm_state, last_frange, coverage). # For different hypotheses with the same tuple, keep the one with the higher logprob. # lm_state affects LM; last_frange affects distortion; coverage affects available choices. stacks[0][(lm.begin(), None, 0)] = initial_hypothesis for i, stack in enumerate(stacks[:-1]): # Top-k pruning for h in sorted(stack.itervalues(), key=lambda h: -h.logprob)[:s]: for (f_range, delta_coverage, tm_phrases) in enumerate_phrases(f_cache, h.coverage): # f_range = (i, j) of the enumerated next phrase to be translated # delta_coverage = coverage of f_range # tm_phrases = TM entries corresponding to fphrase f[f_range] length = i + f_range[1] - f_range[0] coverage = h.coverage | delta_coverage distance = f_range[0] - h.last_frange[1] # TM might give us multiple candidates for a fphrase. for phrase in tm_phrases: # log_tmprob and distortion features = map(add, h.features, phrase.features) # log_lmprob (N-gram) lm_state = h.lm_state lm_score = h.lm_score for word in phrase.english.split(): (lm_state, word_logprob) = lm.score(lm_state, word) lm_score += word_logprob # Don't forget the STOP N-gram if we just covered the whole sentence. lm_score += lm.end(lm_state) if length == len( f) else 0.0 # Future cost. #future_list = get_future_list(delta_coverage) #future_cost = get_future_cost(future_list, future_cost_table) logprob = sum( p * q for p, q in zip((features + [lm_score]), weights)) new_state = (lm_state, f_range, coverage) new_hypothesis = hypothesis(logprob, features, lm_score, lm_state, h, f_range, phrase, coverage) if new_state not in stacks[length] or \ logprob > stacks[length][new_state].logprob: # recombination stacks[length][new_state] = new_hypothesis winner = sorted(stacks[len(f)].itervalues(), key=lambda h: h.logprob, reverse=True)[0:100] for i in range(len(winner)): results += [ "%d ||| %s ||| %f %f %f %f %f" % (n, extract_english(winner[i]), winner[i].features[0], winner[i].features[1], winner[i].features[2], winner[i].features[3], winner[i].lm_score) ] return results
def main(): # tm should translate unknown words as-is with probability 1 for word in set(sum(french,())): if (word,) not in tm: tm[(word,)] = [models.phrase(word, 0.0)] total_prob = 0 if opts.mute == 0: sys.stderr.write("Decoding %s...\n" % (opts.input,)) for idx,f in enumerate(french): initial_hypothesis = hypothesis(lm.begin(), 0.0, 0, 0, None, None) heaps = [{} for _ in f] + [{}] heaps[0][lm.begin(), 0, 0] = initial_hypothesis for i, heap in enumerate(heaps[:-1]): # maintain beam heap # front_item = sorted(heap.itervalues(), key=lambda h: -h.logprob)[0] # for k in heap.keys(): # if heap[k].logprob < front_item.logprob - bound_width: # del heap[k] for h in sorted(heap.itervalues(),key=lambda h: -h.logprob)[:opts.s]: # prune fopen = prefix1bits(h.coverage) for j in xrange(fopen,min(fopen+1+opts.disord, len(f)+1)): for k in xrange(j+1, len(f)+1): if f[j:k] in tm: if (h.coverage & bitmap(range(j, k))) == 0: for phrase in tm[f[j:k]]: lm_prob = 0 lm_state = h.lm_state for word in phrase.english.split(): (lm_state, prob) = lm.score(lm_state, word) lm_prob += prob lm_prob += lm.end(lm_state) if k == len(f) else 0.0 coverage = h.coverage | bitmap(range(j, k)) logprob = h.logprob + opts.alpha*lm_prob + opts.beta*phrase.logprob # + eta*abs(h.end + 1 - j) new_hypothesis = hypothesis(lm_state, logprob, coverage, k, h, phrase) # add to heap num = onbits(coverage) if (lm_state, coverage, k) not in heaps[num] or new_hypothesis.logprob > heaps[num][lm_state, coverage, k].logprob: heaps[num][lm_state, coverage, k] = new_hypothesis winner = max(heaps[-1].itervalues(), key=lambda h: h.logprob) eng_list = ["<s>"] def get_list(h, output_list): if h.predecessor is not None: get_list(h.predecessor, output_list) output_list.append(h.phrase.english) def get_prob(test_list): stance = [] for i in test_list: stance += (i.split()) stance = tuple(stance) lm_state = (stance[0],) score = 0.0 for word in stance[1:]: (lm_state, word_score) = lm.score(lm_state, word) score += word_score return score get_list(winner, eng_list) eng_list.append("</s>") if opts.mute == 0: sys.stderr.write("Start local search ...\n") while True: best_list = copy.deepcopy(eng_list) # insert for i in range(1,len(eng_list)-1): for j in range(1, i): now_list = copy.deepcopy(eng_list) now_list.pop(i) now_list.insert(j, eng_list[i]) if get_prob(now_list) > get_prob(best_list): best_list = now_list for j in range(i+2, len(eng_list)-1): now_list = copy.deepcopy(eng_list) now_list.insert(j, eng_list[i]) now_list.pop(i) if get_prob(now_list) > get_prob(best_list): best_list = now_list # swap for i in range(1,len(eng_list)-2): for j in range(i+1,len(eng_list)-1): now_list = copy.deepcopy(eng_list) now_list[i], now_list[j] = now_list[j], now_list[i] if get_prob(now_list) > get_prob(best_list): best_list = now_list if get_prob(best_list) == get_prob(eng_list): break else: eng_list = best_list for i in eng_list[1:-1]: print i, print if opts.mute == 0: sys.stderr.write("#{0}:{2} - {1}\n".format(idx, eng_list , get_prob(eng_list)))
def main(): # tm should translate unknown words as-is with probability 1 for word in set(sum(french, ())): if (word, ) not in tm: tm[(word, )] = [models.phrase(word, 0.0)] total_prob = 0 if opts.mute == 0: sys.stderr.write("Decoding %s...\n" % (opts.input, )) for idx, f in enumerate(french): initial_hypothesis = hypothesis(lm.begin(), 0.0, 0, 0, None, None) heaps = [{} for _ in f] + [{}] heaps[0][lm.begin(), 0, 0] = initial_hypothesis for i, heap in enumerate(heaps[:-1]): # maintain beam heap # front_item = sorted(heap.itervalues(), key=lambda h: -h.logprob)[0] # for k in heap.keys(): # if heap[k].logprob < front_item.logprob - bound_width: # del heap[k] for h in sorted(heap.itervalues(), key=lambda h: -h.logprob)[:opts.s]: # prune fopen = prefix1bits(h.coverage) for j in xrange(fopen, min(fopen + 1 + opts.disord, len(f) + 1)): for k in xrange(j + 1, len(f) + 1): if f[j:k] in tm: if (h.coverage & bitmap(range(j, k))) == 0: for phrase in tm[f[j:k]]: lm_prob = 0 lm_state = h.lm_state for word in phrase.english.split(): (lm_state, prob) = lm.score(lm_state, word) lm_prob += prob lm_prob += lm.end(lm_state) if k == len( f) else 0.0 coverage = h.coverage | bitmap(range(j, k)) logprob = h.logprob + opts.alpha * lm_prob + opts.beta * phrase.logprob # + eta*abs(h.end + 1 - j) new_hypothesis = hypothesis( lm_state, logprob, coverage, k, h, phrase) # add to heap num = onbits(coverage) if (lm_state, coverage, k) not in heaps[ num] or new_hypothesis.logprob > heaps[ num][lm_state, coverage, k].logprob: heaps[num][lm_state, coverage, k] = new_hypothesis winner = max(heaps[-1].itervalues(), key=lambda h: h.logprob) eng_list = ["<s>"] def get_list(h, output_list): if h.predecessor is not None: get_list(h.predecessor, output_list) output_list.append(h.phrase.english) def get_prob(test_list): stance = [] for i in test_list: stance += (i.split()) stance = tuple(stance) lm_state = (stance[0], ) score = 0.0 for word in stance[1:]: (lm_state, word_score) = lm.score(lm_state, word) score += word_score return score get_list(winner, eng_list) eng_list.append("</s>") if opts.mute == 0: sys.stderr.write("Start local search ...\n") while True: best_list = copy.deepcopy(eng_list) # insert for i in range(1, len(eng_list) - 1): for j in range(1, i): now_list = copy.deepcopy(eng_list) now_list.pop(i) now_list.insert(j, eng_list[i]) if get_prob(now_list) > get_prob(best_list): best_list = now_list for j in range(i + 2, len(eng_list) - 1): now_list = copy.deepcopy(eng_list) now_list.insert(j, eng_list[i]) now_list.pop(i) if get_prob(now_list) > get_prob(best_list): best_list = now_list # swap for i in range(1, len(eng_list) - 2): for j in range(i + 1, len(eng_list) - 1): now_list = copy.deepcopy(eng_list) now_list[i], now_list[j] = now_list[j], now_list[i] if get_prob(now_list) > get_prob(best_list): best_list = now_list if get_prob(best_list) == get_prob(eng_list): break else: eng_list = best_list for i in eng_list[1:-1]: print i, print if opts.mute == 0: sys.stderr.write("#{0}:{2} - {1}\n".format(idx, eng_list, get_prob(eng_list)))
def main(): # tm should translate unknown words as-is with probability 1 for word in set(sum(french, ())): if (word, ) not in tm: tm[(word, )] = [models.phrase(word, 0.0)] total_prob = 0 sys.stderr.write("Decoding %s...\n" % (opts.input, )) for idx, f in enumerate(french): initial_hypothesis = hypothesis(lm.begin(), 0.0, 0, 0, None, None) heaps = [{} for _ in f] + [{}] heaps[0][lm.begin(), 0, 0] = initial_hypothesis for i, heap in enumerate(heaps[:-1]): # maintain beam heap # front_item = sorted(heap.itervalues(), key=lambda h: -h.logprob)[0] # for k in heap.keys(): # if heap[k].logprob < front_item.logprob - opts.bwidth: # del heap[k] for h in sorted(heap.itervalues(), key=lambda h: -h.logprob)[:opts.s]: # prune fopen = prefix1bits(h.coverage) for j in xrange(fopen, min(fopen + 1 + opts.disord, len(f) + 1)): for k in xrange(j + 1, len(f) + 1): if f[j:k] in tm: if (h.coverage & bitmap(range(j, k))) == 0: for phrase in tm[f[j:k]]: lm_prob = 0 lm_state = h.lm_state for word in phrase.english.split(): (lm_state, prob) = lm.score(lm_state, word) lm_prob += prob lm_prob += lm.end(lm_state) if k == len( f) else 0.0 coverage = h.coverage | bitmap(range(j, k)) logprob = h.logprob + opts.alpha * lm_prob + opts.beta * phrase.logprob + opts.eta * abs( h.end + 1 - j) new_hypothesis = hypothesis( lm_state, logprob, coverage, k, h, phrase) # add to heap num = onbits(coverage) if (lm_state, coverage, k) not in heaps[ num] or new_hypothesis.logprob > heaps[ num][lm_state, coverage, k].logprob: heaps[num][lm_state, coverage, k] = new_hypothesis winner = max(heaps[-1].itervalues(), key=lambda h: h.logprob) def extract_english(h): return "" if h.predecessor is None else "%s%s " % (extract_english( h.predecessor), h.phrase.english) out = extract_english(winner) print out sys.stderr.write("#{0}:{2} - {1}\n".format(idx, out, winner.logprob)) total_prob += winner.logprob # if opts.verbose: # def extract_tm_logprob(h): # return 0.0 if h.predecessor is None else h.phrase.logprob + extract_tm_logprob(h.predecessor) # tm_logprob = extract_tm_logprob(winner) # sys.stderr.write("LM = %f, TM = %f, Total = %f\n" % # (winner.logprob - tm_logprob, tm_logprob, winner.logprob)) sys.stderr.write("Total score: {0}\n".format(total_prob))
lm = models.LM(opts.lm) french = [ tuple(line.strip().split()) for line in open(opts.input).readlines()[:opts.num_sents] ] def extract_english(h): return "" if h.predecessor is None else "%s%s " % (extract_english( h.predecessor), h.phrase.english) # tm should translate unknown words as-is with probability 1 for word in set(sum(french, ())): if (word, ) not in tm: tm[(word, )] = [models.phrase(word, 0.0)] # adding empty phrase to the translation dictionary tm[()] = [models.phrase("", 0.0)] def update_lm_state(lm_state, logprob, phrase): for word in phrase.english.split(): (lm_state, word_logprob) = lm.score(lm_state, word) logprob += word_logprob return lm_state, logprob sys.stderr.write("Decoding %s...\n" % (opts.input, )) for f in french: # The following code implements a local-reordering decoding algorithm.
def decode(tm, lm, source_sentence, stack_size=1, max_reordering=None): """Return the most probable decoding of *source_sentence* under the provided probabilistic translation and language models.""" # Compute the future cost table. future_costs = {} for segment_length in xrange(1, len(source_sentence) + 1): for start in xrange(len(source_sentence) - segment_length + 1): end = start + segment_length future_costs[(start, end)] = float('-inf') candidates = tm.get(source_sentence[start:end], []) if candidates: logprob = candidates[0].logprob lm_state = tuple() for target_word in candidates[0].english.split(): lm_state, word_logprob = lm.score(lm_state, target_word) logprob += word_logprob future_costs[(start, end)] = logprob for mid in xrange(start + 1, end): future_costs[(start, end)] = max( future_costs[(start, mid)] + future_costs[(mid, end)], future_costs[(start, end)]) # Actually start decoding. initial = Hypothesis(0.0, future_costs[(0, len(source_sentence))], (False,) * len(source_sentence), lm.begin(), None, None) # We add 1 here because we need to have stacks for both ends: 0 and # len(source_sentence). stacks = [{} for _ in xrange(len(source_sentence) + 1)] stacks[0][lm.begin()] = initial # Iterate over every stack but the last. It's not possible to add # anything to a hypothesis in the last stack anyway, so we skip it. for i, stack in enumerate(stacks[:-1]): # Take only the best *stack_size* hypotheses. Using the sum of # the log-probability and the future cost negatively impacts the # model score (??). hypotheses = sorted(stack.itervalues(), key=lambda h: -h.logprob)[:stack_size] for hypothesis in hypotheses: # Save ourselves a couple of levels of indentation later on. def untranslated_segments(): if max_reordering is None: starts = xrange(len(source_sentence)) else: starts = xrange(min(i + max_reordering, len(source_sentence))) for start in starts: if hypothesis.coverage[start]: continue ends = xrange(start, len(source_sentence)) for end in ends: if hypothesis.coverage[end]: break yield (start, end + 1) # Iterate over blocks of untranslated source words. for start, end in untranslated_segments(): source_phrase = source_sentence[start:end] # Get all of the potential candidate translations. candidates = tm.get(source_phrase, []) # Translate unknown unigrams to themselves. if not candidates and len(source_phrase) == 1: candidates.append(models.phrase(source_phrase[0], 0.0)) for candidate in candidates: logprob = hypothesis.logprob + candidate.logprob # Make a new coverage vector with the appropriate # elements set to True. This isn't pretty. Sorry. coverage = (hypothesis.coverage[:start] + (True,) * (end - start) + hypothesis.coverage[end:]) # Find the future cost estimate for this hypothesis # by summing over contiguous incomplete segments. future_cost = 0.0 cost_start = None for cost_i, covered in enumerate(coverage + (True,)): if covered: if cost_start is not None: future_cost += \ future_costs[(cost_start, cost_i)] cost_start = None else: if cost_start is None: cost_start = cost_i # Make a new LM state. lm_state = hypothesis.lm_state for target_word in candidate.english.split(): lm_state, word_logprob = \ lm.score(lm_state, target_word) logprob += word_logprob # Add the final transition probability if the end of # this segment is also the end of the sentence. if end == len(source_sentence): logprob += lm.end(lm_state) # If the new hypothesis is the best hypothesis for # its state and number of completed words, push it # onto the stack, replacing any that is present. completed = sum(int(x) for x in coverage) if (lm_state not in stacks[completed] or (stacks[completed][lm_state].logprob + stacks[completed][lm_state].future_cost) < logprob + future_cost): stacks[completed][lm_state] = Hypothesis( logprob, future_cost, coverage, lm_state, hypothesis, candidate) # We don't need to specify a key, since we're looking for the best # log-probability, and that's the first element of a hypothesis. best = max(stacks[-1].itervalues()) current = best decoding = [] while current.candidate: decoding.insert(0, current.candidate.english) current = current.predecessor return tuple(decoding)