def random_derivation(s, depth = 0, category = 'S', productions_above = []): global lexicon #for i in range(depth): # print ' ', key = sexp.pretty_lambda(s) #print key if key in lexicon: #print lexicon[key] options = [l for l in lexicon[key] if l[0].replace('/', '|').replace('\\', '|') == category] if len(options) == 0: return False picked = random.sample(options, 1)[0] return [key, picked[0], picked[1]] if sexp.totally_vacuous(s): return False if depth > 3: return False splits = sum([sexp.split(s, sub) for sub in sexp.subexps(s)], []) if not splits: return False random.shuffle(splits) for split in splits: #print sexp.pretty_lambda(split[0]) + " : " + sexp.pretty_lambda(s[1]) #print #print '\n'.join([sexp.pretty_lambda(s[0]) + " : " + sexp.pretty_lambda(s[1]) for s in productions_above]) #print #print split in productions_above #print #print f = split[0] g = split[1] fcat = catf(f) if '|' in fcat: fcat2 = '(%s)' % fcat else: fcat2 = fcat gcat = '%s|%s' % (category, fcat2) #print category, fcat, gcat #print f, g #exit() d1 = random_derivation(split[0], depth+1, fcat, productions_above + [split]) d2 = random_derivation(split[1], depth+1, gcat, productions_above + [split]) if d1 and d2: return [key, category, d1, d2] return False
def decode(self, sent, category, chart, depth=0): if len(chart.cells) == 0: return False #TODO what's going on here? key = sexp.pretty_lambda(sent) les = self.lex_entries(key, category) if les: nchart = Chart(sent, category) for le in les: ncell = self.new_cell(le, chart) ncell.set_chart(nchart) nchart.add(ncell) return nchart if sexp.totally_vacuous(sent): return False if depth == self.MAX_DEPTH: return False splits = sexp.all_splits(sent) nchart = Chart(sent, category) for split in splits: (fcat, gcat) = self.make_categories(split, category) lchart = self.decode(split[0], fcat, chart, depth+1) if not lchart: continue lchart.keep(self.BEAM_WIDTH) rchart = self.decode(split[1], gcat, lchart, depth+1) if not rchart: continue nchart.add_all(rchart) nchart.keep(self.BEAM_WIDTH) return nchart
def best_derivation(sent, category, cky=None, depth=0): global counter global cache global lexicon if cky == None: cky = [] lkey = sexp.pretty_lambda(sent) key = lkey + ' ' + category #if key in cache: # return cache[key] counter += 1 if lkey in lexicon: terminals = all_lex_entries(lkey, category) scored = [(terminal, lm_score(terminal, cky)) for terminal in terminals] if terminals: r = {'key': key, 'scored': scored} #terminal = choose_lex_entry(lkey, category) #if terminal: # r = {'key': key, # 'score': 1, # 'terminal': terminal} else: r = False cache[key] = r return r if sexp.totally_vacuous(sent): r = False cache[key] = r return r if depth == 3: r = False return r subs = sexp.subexps(sent) splits = sum((sexp.split(sent, sub) for sub in subs), []) scores = [] for split in splits: ncky = list(cky) (fcat, gcat) = make_categories(split, category) left = best_derivation(split[0], fcat, ncky, depth+1) if not left: continue right = best_derivation(split[1], gcat, ncky, depth+1) if not right: continue sc = left['score'] + right['score'] + split_potential(sent, split) scores.append({'key': key, 'score': sc, 'left': left, 'right': right}) if not scores: return False r = max(scores, key=lambda x: x['score']) cache[key] = r return r