def pattern_parse(sentence, trace=0): tokens = tokenize(sentence.split(". ")[0].strip("?!.")).split() chart = pattern_chart(tokens, thegrammar, theunigrams, trace) bestprob = 1e-60 bestframe = None bestrel = None bestmatches = None for pprob, rel, pattern in patterns: ptok = pattern.split() for end, prob, frame, matchdict in match_pattern(ptok, 0, chart, tokens): prob *= pprob if end == len(tokens): if trace > 0: print prob, pattern if prob > bestprob: bestprob = prob bestframe = untokenize(' '.join(frame)) bestrel = rel bestmatches = matchdict #if bestpattern is not None: break return bestprob, bestframe, bestrel, bestmatches
def match_pattern(rhs, start, chart, tokens): if len(rhs) == 0: yield start, 1.0, [], {} return symb = str(rhs[0]) group = None if symb[0] == "{": parts = symb[1:-1].split(':') symb = parts[0] if len(parts) > 1: group = parts[1] for next, prob in chart[symb][start].items(): for end, prob2, frame, matchdict in match_pattern(rhs[1:], next, chart, tokens): if group is not None: if group in string.digits: chunk = ["{%s}" % group] groupn = int(group) if groupn == 0: chunk = [] else: chunk = tokens[start:next] groupn = group matchdict[groupn] = untokenize(' '.join(tokens[start:next])) else: chunk = tokens[start:next] yield end, prob*prob2, chunk + frame, matchdict