def demo(): """ A demonstration of the porter stemmer on a sample from the Penn Treebank corpus. """ from nltk_lite.corpora import treebank stemmer = PorterStemmer() i = 0 orig = [] stemmed = [] for sent in treebank.raw(): for word in sent: orig.append(word) sword = stemmer.stem(word) stemmed.append(sword) i+=1 if i>3: break # Convert the results to a string, and word-wrap them. results = ' '.join(stemmed) results = re.sub(r"(.{,70})\s", r'\1\n', results+' ').rstrip() # Convert the original to a string, and word wrap it. original = ' '.join(orig) original = re.sub(r"(.{,70})\s", r'\1\n', original+' ').rstrip() # Print the results. print '-Original-'.center(70).replace(' ', '*').replace('-', ' ') print original print '-Results-'.center(70).replace(' ', '*').replace('-', ' ') print results print '*'*70
def demo(): """ A demonstration showing how PCFG C{Grammar}s can be created and used. """ from nltk_lite.corpora import treebank, extract from nltk_lite.parse import cfg, pcfg, pchart, treetransforms from itertools import islice # Create some probabilistic CFG Productions S, A, B, C = cfg.nonterminals('S A B C') pcfg_prods = [pcfg.Production(A, [B, B], prob=0.3), pcfg.Production(A, [C, B, C], prob=0.7), pcfg.Production(B, [B, 'b'], prob=0.5), pcfg.Production(B, [C], prob=0.5), pcfg.Production(C, ['a'], prob=0.1), pcfg.Production(C, ['b'], prob=0.9)] pcfg_prod = pcfg_prods[2] print 'A PCFG production:', `pcfg_prod` print ' pcfg_prod.lhs() =>', `pcfg_prod.lhs()` print ' pcfg_prod.rhs() =>', `pcfg_prod.rhs()` print ' pcfg_prod.prob() =>', `pcfg_prod.prob()` print # Create and print a PCFG grammar = pcfg.Grammar(S, pcfg_prods) print 'A PCFG grammar:', `grammar` print ' grammar.start() =>', `grammar.start()` print ' grammar.productions() =>', # Use string.replace(...) is to line-wrap the output. print `grammar.productions()`.replace(',', ',\n'+' '*26) print # extract productions from three trees and induce the PCFG print "Induce PCFG grammar from treebank data:" productions = [] for tree in islice(treebank.parsed(),3): # perform optional in-place tree transformations, e.g.: # treetransforms.collapseUnary(tree, collapsePOS = False) # treetransforms.chomskyNormalForm(tree, horzMarkov = 2) productions += tree.productions() grammar = pcfg.induce(S, productions) print grammar print print "Parse sentence using induced grammar:" parser = pchart.InsideParse(grammar) parser.trace(3) sent = extract(0, treebank.raw()) print sent for parse in parser.get_parse_list(sent): print parse