print 'Unknown words: %d' % len(unknown_words)
	
	if args.trace and unknown_words:
		print ', '.join(sorted(unknown_words))
	
	print ''
	print '  '.join(['Tag'.center(taglen), 'Found'.center(9), 'Actual'.center(10),
					'Precision'.center(13), 'Recall'.center(13)])
	print '  '.join(['='*taglen, '='*9, '='*10, '='*13, '='*13])
	
	for tag in sorted(set(tags_found.keys()) | set(tags_actual.keys())):
		found = tags_found[tag]
		actual = tags_actual[tag]
		precision = nltk.metrics.precision(tag_word_refs[tag], tag_word_test[tag])
		recall = nltk.metrics.recall(tag_word_refs[tag], tag_word_test[tag])
		print '  '.join([tag.ljust(taglen), str(found).rjust(9), str(actual).rjust(10),
			str(precision).ljust(13)[:13], str(recall).ljust(13)[:13]])
	
	print '  '.join(['='*taglen, '='*9, '='*10, '='*13, '='*13])
else:
	sents = corpus.sents(**kwargs)
	taglen = 7
	
	if args.fraction != 1.0:
		cutoff = int(math.ceil(len(sents) * args.fraction))
		sents = sents[:cutoff]
	
	for sent in sents:
		for word, tag in tagger.tag(sent):
			tags_found.inc(tag)
			
	print('Unknown words: %d' % len(unknown_words))
	
	if args.trace and unknown_words:
		print(', '.join(sorted(unknown_words)))
	
	print('')
	print('  '.join(['Tag'.center(taglen), 'Found'.center(9), 'Actual'.center(10),
					'Precision'.center(13), 'Recall'.center(13)]))
	print('  '.join(['='*taglen, '='*9, '='*10, '='*13, '='*13]))
	
	for tag in sorted(set(tags_found.keys()) | set(tags_actual.keys())):
		found = tags_found[tag]
		actual = tags_actual[tag]
		precision = nltk.metrics.precision(tag_word_refs[tag], tag_word_test[tag])
		recall = nltk.metrics.recall(tag_word_refs[tag], tag_word_test[tag])
		print('  '.join([tag.ljust(taglen), str(found).rjust(9), str(actual).rjust(10),
			str(precision).ljust(13)[:13], str(recall).ljust(13)[:13]]))
	
	print('  '.join(['='*taglen, '='*9, '='*10, '='*13, '='*13]))
else:
	sents = corpus.sents(**kwargs)
	taglen = 7
	
	if args.fraction != 1.0:
		cutoff = int(math.ceil(len(sents) * args.fraction))
		sents = sents[:cutoff]
	
	for sent in sents:
		for word, tag in tagger.tag(sent):
			tags_found[tag] += 1
			
Beispiel #3
0
    print ''
    print '  '.join([
        'Tag'.center(taglen), 'Found'.center(9), 'Actual'.center(10),
        'Precision'.center(13), 'Recall'.center(13)
    ])
    print '  '.join(['=' * taglen, '=' * 9, '=' * 10, '=' * 13, '=' * 13])

    for tag in sorted(set(tags_found.keys()) | set(tags_actual.keys())):
        found = tags_found[tag]
        actual = tags_actual[tag]
        precision = nltk.metrics.precision(tag_word_refs[tag],
                                           tag_word_test[tag])
        recall = nltk.metrics.recall(tag_word_refs[tag], tag_word_test[tag])
        print '  '.join([
            tag.ljust(taglen),
            str(found).rjust(9),
            str(actual).rjust(10),
            str(precision).ljust(13)[:13],
            str(recall).ljust(13)[:13]
        ])

    print '  '.join(['=' * taglen, '=' * 9, '=' * 10, '=' * 13, '=' * 13])
else:
    sents = corpus.sents(**kwargs)
    taglen = 7

    if args.fraction != 1.0:
        cutoff = int(math.ceil(len(sents) * args.fraction))
        sents = sents[:cutoff]
    if args.trace:
        print ", ".join(sorted(unknown_words))

    print ""
    print "  Tag      Found      Actual      Precision      Recall  "
    print "=======  =========  ==========  =============  =========="

    for tag in sorted(set(tags_found.keys()) | set(tags_actual.keys())):
        found = tags_found[tag]
        actual = tags_actual[tag]
        precision = nltk.metrics.precision(tag_word_refs[tag], tag_word_test[tag])
        recall = nltk.metrics.recall(tag_word_refs[tag], tag_word_test[tag])
        print "  ".join(
            [
                tag.ljust(7),
                str(found).rjust(9),
                str(actual).rjust(10),
                str(precision).ljust(13)[:13],
                str(recall).ljust(10)[:13],
            ]
        )

    print "=======  =========  ==========  =============  =========="
else:
    sents = corpus.sents()

    if args.fraction != 1.0:
        cutoff = int(math.ceil(len(sents) * args.fraction))
        sents = sents[:cutoff]