tm = models.TM(opts.tm, sys.maxint) lm = models.LM(opts.lm) french_sents = [tuple(line.strip().split()) for line in open(opts.input).readlines()[:opts.num_sents]] outputs = glob.glob("./old/out*") results = collections.defaultdict(list) count = 0 for output in outputs: count = count + 1 out = [tuple(line.strip().split()) for line in open(output).readlines()] if (len(french_sents) != len(out)): continue sent_num = 0 for e in out: prob = gradeOneSent.gradeOneSentence(sent_num, tuple(e), french_sents[sent_num], lm, tm); results[sent_num].append((prob, e)) sent_num = sent_num + 1 def printTuple(t): s = "" for i in range(len(t)): s = s + t[i] + " " return s for sent_num in range(55): hypos = sorted(results[sent_num], key=lambda tup: tup[0], reverse=True) print printTuple(hypos[0][1])
# on the last stack winner = max(stacks[-1].itervalues(), key=lambda h: h.logprob) def extract_english_recursive(h): return '' if h.predecessor is None else '%s%s ' % (extract_english_recursive(h.predecessor), h.phrase.english) def extract_tm_logprob(h): return 0.0 if h.predecessor is None else h.phrase.logprob + extract_tm_logprob(h.predecessor) sent_num = sent_num +1 # coding for Marginalization probMax = float("-inf") #for h0 in heapq.nlargest(1000, stacks[-1].itervalues(), key=lambda h: h.logprob): # TODO: only consider 1000 top hypothesis in the last stack for h0 in stacks[-1].itervalues(): # consider all the hypothesis in the last stack e = tuple(extract_english_recursive(h0).strip().split()) prob = gradeOneSent.gradeOneSentence(sent_num, e, f, lm, tm) if prob > probMax: eMax = e probMax = prob e = eMax # do all the reordering for i in range(len(e)-1): #k = min(i+20,len(e)-1) #TODO: k = len(e)-1 for j in range(i+1, k): etmp = list(e) etmp[i] = e[j] etmp[j] = e[i] etmp = tuple(etmp)