) (VP (VB gave) (NP (DT the) (NN lecture) ) ) )""" # uncomment to use the above simple trees for debugging: # trees = [TRANSFORM(Tree.from_string(t)) for t in (t0, t1)] # grammar = PCFG.from_trees(trees) # let's get some input to build a grammar: grammar = PCFG.from_trees(list(TRANSFORM(t) for t in Tree.from_stream(GzipFile('bigger_treebank_2.txt.gz')))) print "Read {} rules in grammar.".format(len(grammar)) trees = list(TRANSFORM(t) for t in Tree.from_stream(open('end_of_wsj.txt'))) print "Read {} trees.".format(len(trees)) # now try and parse our trees: results = [] for idx, tree in enumerate(trees): tokens = [(t,) for t in tree.terminals()] # print 'Sentence {}\tTokens: "{}"'.format(idx, ' '.join(tree.terminals())) chart = Chart(grammar, tokens) chart.pretty_print() has_parse = chart.extract_parse() if not has_parse: print 'Sentence {}\tTokens: "{}" has no parse!'.format(idx, ' '.join(tree.terminals()))