def demo_eval(chunkparser, text): """ Demonstration code for evaluating a chunk parser, using a C{ChunkScore}. This function assumes that C{text} contains one sentence per line, and that each sentence has the form expected by C{tree.chunk}. It runs the given chunk parser on each sentence in the text, and scores the result. It prints the final score (precision, recall, and f-measure); and reports the set of chunks that were missed and the set of chunks that were incorrect. (At most 10 missing chunks and 10 incorrect chunks are reported). @param chunkparser: The chunkparser to be tested @type chunkparser: C{ChunkParserI} @param text: The chunked tagged text that should be used for evaluation. @type text: C{string} """ from nltk import chunk, Tree # Evaluate our chunk parser. chunkscore = chunk.ChunkScore() for sentence in text.split('\n'): print sentence sentence = sentence.strip() if not sentence: continue gold = chunk.tagstr2tree(sentence) tokens = gold.leaves() test = chunkparser.parse(Tree('S', tokens), trace=1) chunkscore.score(gold, test) print print '/' + ('=' * 75) + '\\' print 'Scoring', chunkparser print('-' * 77) print 'Precision: %5.1f%%' % (chunkscore.precision() * 100), ' ' * 4, print 'Recall: %5.1f%%' % (chunkscore.recall() * 100), ' ' * 6, print 'F-Measure: %5.1f%%' % (chunkscore.f_measure() * 100) # Missed chunks. if chunkscore.missed(): print 'Missed:' missed = chunkscore.missed() for chunk in missed[:10]: print ' ', ' '.join(c.__str__() for c in chunk) if len(chunkscore.missed()) > 10: print ' ...' # Incorrect chunks. if chunkscore.incorrect(): print 'Incorrect:' incorrect = chunkscore.incorrect() for chunk in incorrect[:10]: print ' ', ' '.join(c.__str__() for c in chunk) if len(chunkscore.incorrect()) > 10: print ' ...' print '\\' + ('=' * 75) + '/' print
def demo(): s = "[ Pierre/NNP Vinken/NNP ] ,/, [ 61/CD years/NNS ] old/JJ ,/, will/MD join/VB [ the/DT board/NN ] ./." from nltk import chunk t = chunk.tagstr2tree(s, chunk_node='NP') print t.pprint() print s = """ These DT B-NP research NN I-NP protocols NNS I-NP offer VBP B-VP to TO B-PP the DT B-NP patient NN I-NP not RB O only RB O the DT B-NP very RB I-NP best JJS I-NP therapy NN I-NP which WDT B-NP we PRP B-NP have VBP B-VP established VBN I-VP today NN B-NP but CC B-NP also RB I-NP the DT B-NP hope NN I-NP of IN B-PP something NN B-NP still RB B-ADJP better JJR I-ADJP . . O """ conll_tree = conllstr2tree(s, chunk_types=('NP', 'PP')) print conll_tree.pprint() # Demonstrate CoNLL output print "CoNLL output:" print chunk.tree2conllstr(conll_tree) print
def demo_eval(chunkparser, text): """ Demonstration code for evaluating a chunk parser, using a C{ChunkScore}. This function assumes that C{text} contains one sentence per line, and that each sentence has the form expected by C{tree.chunk}. It runs the given chunk parser on each sentence in the text, and scores the result. It prints the final score (precision, recall, and f-measure); and reports the set of chunks that were missed and the set of chunks that were incorrect. (At most 10 missing chunks and 10 incorrect chunks are reported). @param chunkparser: The chunkparser to be tested @type chunkparser: C{ChunkParserI} @param text: The chunked tagged text that should be used for evaluation. @type text: C{string} """ from nltk import chunk, Tree # Evaluate our chunk parser. chunkscore = chunk.ChunkScore() for sentence in text.split('\n'): print sentence sentence = sentence.strip() if not sentence: continue gold = chunk.tagstr2tree(sentence) tokens = gold.leaves() test = chunkparser.parse(Tree('S', tokens), trace=1) chunkscore.score(gold, test) print print '/'+('='*75)+'\\' print 'Scoring', chunkparser print ('-'*77) print 'Precision: %5.1f%%' % (chunkscore.precision()*100), ' '*4, print 'Recall: %5.1f%%' % (chunkscore.recall()*100), ' '*6, print 'F-Measure: %5.1f%%' % (chunkscore.f_measure()*100) # Missed chunks. if chunkscore.missed(): print 'Missed:' missed = chunkscore.missed() for chunk in missed[:10]: print ' ', ' '.join(c.__str__() for c in chunk) if len(chunkscore.missed()) > 10: print ' ...' # Incorrect chunks. if chunkscore.incorrect(): print 'Incorrect:' incorrect = chunkscore.incorrect() for chunk in incorrect[:10]: print ' ', ' '.join(c.__str__() for c in chunk) if len(chunkscore.incorrect()) > 10: print ' ...' print '\\'+('='*75)+'/' print
def _read_block(self, stream): return [tagstr2tree(t) for t in read_blankline_block(stream)]
from nltk import chunk tagged_text = """ The/DT market/NN for/IN system-management/NN software/NN for/IN Digital/NNP 's/POS hardware/NN is/VBZ fragmented/JJ enough/RB that/IN a/DT giant/NN such/JJ as/IN Computer/NNP Associates/NNPS should/MD do/VB well/RB there/RB ./. """ input = chunk.tagstr2tree(tagged_text) print input cp = chunk.RegexpParser("NP: {<DT><NN>}") print cp.parse(input)