Ejemplo n.º 1
0
    print 'loading chunker %s' % args.chunker

if args.chunker == 'pattern':
    chunker = chunkers.PatternChunker()
else:
    chunker = load_model(args.chunker)

#######################
## coverage analysis ##
#######################

if args.score:
    if args.trace:
        print 'evaluating chunker score\n'

    chunked_sents = corpus.chunked_sents()

    if args.fraction != 1.0:
        cutoff = int(math.ceil(len(chunked_sents) * args.fraction))
        chunked_sents = chunked_sents[:cutoff]

    print chunker.evaluate(chunked_sents), '\n'

if args.trace:
    print 'analyzing chunker coverage of %s with %s\n' % (
        args.corpus, chunker.__class__.__name__)

iobs_found = FreqDist()
sents = corpus.sents()

if args.fraction != 1.0:
	print 'loading chunker %s' % args.chunker

if args.chunker == 'pattern':
	chunker = chunkers.PatternChunker()
else:
	chunker = load_model(args.chunker)

#######################
## coverage analysis ##
#######################

if args.score:
	if args.trace:
		print 'evaluating chunker score\n'
	
	chunked_sents = corpus.chunked_sents()
	
	if args.fraction != 1.0:
		cutoff = int(math.ceil(len(chunked_sents) * args.fraction))
		chunked_sents = chunked_sents[:cutoff]
	
	print chunker.evaluate(chunked_sents), '\n'

if args.trace:
	print 'analyzing chunker coverage of %s with %s\n' % (args.corpus, chunker.__class__.__name__)

iobs_found = FreqDist()
sents = corpus.sents()

if args.fraction != 1.0:
	cutoff = int(math.ceil(len(sents) * args.fraction))
tagger = nltk.data.load(args.tagger)

if args.trace:
    print "loading chunker %s" % args.chunker

chunker = nltk.data.load(args.chunker)

#######################
## coverage analysis ##
#######################

if args.score:
    if args.trace:
        print "evaluating chunker score\n"

    print chunker.evaluate(corpus.chunked_sents()), "\n"

if args.trace:
    print "analyzing chunker coverage of %s with %s\n" % (args.corpus, chunker.__class__.__name__)

iobs_found = FreqDist()

for sent in corpus.sents():
    tree = chunker.parse(tagger.tag(sent))

    for child in tree.subtrees(lambda t: t.node != "S"):
        iobs_found.inc(child.node)

iobs = iobs_found.samples()
justify = max(7, *[len(iob) for iob in iobs])