def parse_binary_search(sent_len, scorer, m): def binary_search(seq, t): min = -10 max = 10 for i in range(25): if max < min: return -1 m = (min + max) / 2.0 size = seq(m) #print t, m, size if size < t: min = m elif size > t: max = m else: return m return m c = Chart(sent_len+1) interface.Parser().parse_bigram(sent_len, scorer, None, c) counts = c.counts() # hypergraph = c.hypergraph # pot = c.pot # def score(label, tail_size): # typ, d, s, t, _ = label # if typ == interface.Trap: # if d == interface.Left: s, t = t, s # return scorer.arc_score(s, t) # if typ == interface.Tri and d == interface.Right and tail_size == 1: # return scorer.bigram_score(s, t+1) - \ # ((t+1 - s - 1) * scorer.skip_penalty) if s != t+1 else 0.0 # return 0.0 def f(pen): scorer.skip_penalty = pen # c.score = score parse = interface.make_parse(sent_len+1, c.regen(pen, counts)) return sent_len - parse.skipped_words() pen = binary_search(f, m) scorer.skip_penalty = pen return interface.make_parse(sent_len+1, c.regen(pen, counts))
def f(pen): scorer.skip_penalty = pen # c.score = score parse = interface.make_parse(sent_len+1, c.regen(pen, counts)) return sent_len - parse.skipped_words()