Example #1
0
def demo_eval(chunkparser, text):
    """
    Demonstration code for evaluating a chunk parser, using a
    C{ChunkScore}.  This function assumes that C{text} contains one
    sentence per line, and that each sentence has the form expected by
    C{tree.chunk}.  It runs the given chunk parser on each sentence in
    the text, and scores the result.  It prints the final score
    (precision, recall, and f-measure); and reports the set of chunks
    that were missed and the set of chunks that were incorrect.  (At
    most 10 missing chunks and 10 incorrect chunks are reported).

    @param chunkparser: The chunkparser to be tested
    @type chunkparser: C{ChunkParserI}
    @param text: The chunked tagged text that should be used for
        evaluation.
    @type text: C{string}
    """

    from nltk import chunk, Tree

    # Evaluate our chunk parser.
    chunkscore = chunk.ChunkScore()

    for sentence in text.split('\n'):
        print sentence
        sentence = sentence.strip()
        if not sentence: continue
        gold = chunk.tagstr2tree(sentence)
        tokens = gold.leaves()
        test = chunkparser.parse(Tree('S', tokens), trace=1)
        chunkscore.score(gold, test)
        print

    print '/' + ('=' * 75) + '\\'
    print 'Scoring', chunkparser
    print('-' * 77)
    print 'Precision: %5.1f%%' % (chunkscore.precision() * 100), ' ' * 4,
    print 'Recall: %5.1f%%' % (chunkscore.recall() * 100), ' ' * 6,
    print 'F-Measure: %5.1f%%' % (chunkscore.f_measure() * 100)

    # Missed chunks.
    if chunkscore.missed():
        print 'Missed:'
        missed = chunkscore.missed()
        for chunk in missed[:10]:
            print '  ', ' '.join(c.__str__() for c in chunk)
        if len(chunkscore.missed()) > 10:
            print '  ...'

    # Incorrect chunks.
    if chunkscore.incorrect():
        print 'Incorrect:'
        incorrect = chunkscore.incorrect()
        for chunk in incorrect[:10]:
            print '  ', ' '.join(c.__str__() for c in chunk)
        if len(chunkscore.incorrect()) > 10:
            print '  ...'

    print '\\' + ('=' * 75) + '/'
    print
Example #2
0
def demo():

    s = "[ Pierre/NNP Vinken/NNP ] ,/, [ 61/CD years/NNS ] old/JJ ,/, will/MD join/VB [ the/DT board/NN ] ./."
    from nltk import chunk
    t = chunk.tagstr2tree(s, chunk_node='NP')
    print t.pprint()
    print

    s = """
These DT B-NP
research NN I-NP
protocols NNS I-NP
offer VBP B-VP
to TO B-PP
the DT B-NP
patient NN I-NP
not RB O
only RB O
the DT B-NP
very RB I-NP
best JJS I-NP
therapy NN I-NP
which WDT B-NP
we PRP B-NP
have VBP B-VP
established VBN I-VP
today NN B-NP
but CC B-NP
also RB I-NP
the DT B-NP
hope NN I-NP
of IN B-PP
something NN B-NP
still RB B-ADJP
better JJR I-ADJP
. . O
"""

    conll_tree = conllstr2tree(s, chunk_types=('NP', 'PP'))
    print conll_tree.pprint()

    # Demonstrate CoNLL output
    print "CoNLL output:"
    print chunk.tree2conllstr(conll_tree)
    print
Example #3
0
def demo():

    s = "[ Pierre/NNP Vinken/NNP ] ,/, [ 61/CD years/NNS ] old/JJ ,/, will/MD join/VB [ the/DT board/NN ] ./."
    from nltk import chunk
    t = chunk.tagstr2tree(s, chunk_node='NP')
    print t.pprint()
    print

    s = """
These DT B-NP
research NN I-NP
protocols NNS I-NP
offer VBP B-VP
to TO B-PP
the DT B-NP
patient NN I-NP
not RB O
only RB O
the DT B-NP
very RB I-NP
best JJS I-NP
therapy NN I-NP
which WDT B-NP
we PRP B-NP
have VBP B-VP
established VBN I-VP
today NN B-NP
but CC B-NP
also RB I-NP
the DT B-NP
hope NN I-NP
of IN B-PP
something NN B-NP
still RB B-ADJP
better JJR I-ADJP
. . O
"""

    conll_tree = conllstr2tree(s, chunk_types=('NP', 'PP'))
    print conll_tree.pprint()

    # Demonstrate CoNLL output
    print "CoNLL output:"
    print chunk.tree2conllstr(conll_tree)
    print
Example #4
0
def demo_eval(chunkparser, text):
    """
    Demonstration code for evaluating a chunk parser, using a
    C{ChunkScore}.  This function assumes that C{text} contains one
    sentence per line, and that each sentence has the form expected by
    C{tree.chunk}.  It runs the given chunk parser on each sentence in
    the text, and scores the result.  It prints the final score
    (precision, recall, and f-measure); and reports the set of chunks
    that were missed and the set of chunks that were incorrect.  (At
    most 10 missing chunks and 10 incorrect chunks are reported).

    @param chunkparser: The chunkparser to be tested
    @type chunkparser: C{ChunkParserI}
    @param text: The chunked tagged text that should be used for
        evaluation.
    @type text: C{string}
    """
    
    from nltk import chunk, Tree

    # Evaluate our chunk parser.
    chunkscore = chunk.ChunkScore()

    for sentence in text.split('\n'):
        print sentence
        sentence = sentence.strip()
        if not sentence: continue
        gold = chunk.tagstr2tree(sentence)
        tokens = gold.leaves()
        test = chunkparser.parse(Tree('S', tokens), trace=1)
        chunkscore.score(gold, test)
        print

    print '/'+('='*75)+'\\'
    print 'Scoring', chunkparser
    print ('-'*77)
    print 'Precision: %5.1f%%' % (chunkscore.precision()*100), ' '*4,
    print 'Recall: %5.1f%%' % (chunkscore.recall()*100), ' '*6,
    print 'F-Measure: %5.1f%%' % (chunkscore.f_measure()*100)
    

    # Missed chunks.
    if chunkscore.missed():
        print 'Missed:'
        missed = chunkscore.missed()
        for chunk in missed[:10]:
            print '  ', ' '.join(c.__str__() for c in chunk)
        if len(chunkscore.missed()) > 10:
            print '  ...'

    # Incorrect chunks.
    if chunkscore.incorrect():
        print 'Incorrect:'
        incorrect = chunkscore.incorrect()
        for chunk in incorrect[:10]:
            print '  ', ' '.join(c.__str__() for c in chunk)
        if len(chunkscore.incorrect()) > 10:
            print '  ...'
    
    print '\\'+('='*75)+'/'
    print
Example #5
0
 def _read_block(self, stream):
     return [tagstr2tree(t) for t in read_blankline_block(stream)]
Example #6
0
 def _read_block(self, stream):
     return [tagstr2tree(t) for t in read_blankline_block(stream)]
Example #7
0
from nltk import chunk

tagged_text = """
The/DT market/NN for/IN system-management/NN software/NN for/IN 
Digital/NNP 's/POS hardware/NN is/VBZ fragmented/JJ enough/RB 
that/IN a/DT giant/NN such/JJ as/IN Computer/NNP Associates/NNPS 
should/MD do/VB well/RB there/RB ./.
"""
input = chunk.tagstr2tree(tagged_text)
print input

cp = chunk.RegexpParser("NP: {<DT><NN>}")
print cp.parse(input)
Example #8
0
from nltk import chunk
tagged_text = """
The/DT market/NN for/IN system-management/NN software/NN for/IN 
Digital/NNP 's/POS hardware/NN is/VBZ fragmented/JJ enough/RB 
that/IN a/DT giant/NN such/JJ as/IN Computer/NNP Associates/NNPS 
should/MD do/VB well/RB there/RB ./.
"""
input = chunk.tagstr2tree(tagged_text)
print input

cp = chunk.RegexpParser("NP: {<DT><NN>}")
print cp.parse(input)