print 'Unknown words: %d' % len(unknown_words) if args.trace and unknown_words: print ', '.join(sorted(unknown_words)) print '' print ' '.join(['Tag'.center(taglen), 'Found'.center(9), 'Actual'.center(10), 'Precision'.center(13), 'Recall'.center(13)]) print ' '.join(['='*taglen, '='*9, '='*10, '='*13, '='*13]) for tag in sorted(set(tags_found.keys()) | set(tags_actual.keys())): found = tags_found[tag] actual = tags_actual[tag] precision = nltk.metrics.precision(tag_word_refs[tag], tag_word_test[tag]) recall = nltk.metrics.recall(tag_word_refs[tag], tag_word_test[tag]) print ' '.join([tag.ljust(taglen), str(found).rjust(9), str(actual).rjust(10), str(precision).ljust(13)[:13], str(recall).ljust(13)[:13]]) print ' '.join(['='*taglen, '='*9, '='*10, '='*13, '='*13]) else: sents = corpus.sents(**kwargs) taglen = 7 if args.fraction != 1.0: cutoff = int(math.ceil(len(sents) * args.fraction)) sents = sents[:cutoff] for sent in sents: for word, tag in tagger.tag(sent): tags_found.inc(tag)
print('Unknown words: %d' % len(unknown_words)) if args.trace and unknown_words: print(', '.join(sorted(unknown_words))) print('') print(' '.join(['Tag'.center(taglen), 'Found'.center(9), 'Actual'.center(10), 'Precision'.center(13), 'Recall'.center(13)])) print(' '.join(['='*taglen, '='*9, '='*10, '='*13, '='*13])) for tag in sorted(set(tags_found.keys()) | set(tags_actual.keys())): found = tags_found[tag] actual = tags_actual[tag] precision = nltk.metrics.precision(tag_word_refs[tag], tag_word_test[tag]) recall = nltk.metrics.recall(tag_word_refs[tag], tag_word_test[tag]) print(' '.join([tag.ljust(taglen), str(found).rjust(9), str(actual).rjust(10), str(precision).ljust(13)[:13], str(recall).ljust(13)[:13]])) print(' '.join(['='*taglen, '='*9, '='*10, '='*13, '='*13])) else: sents = corpus.sents(**kwargs) taglen = 7 if args.fraction != 1.0: cutoff = int(math.ceil(len(sents) * args.fraction)) sents = sents[:cutoff] for sent in sents: for word, tag in tagger.tag(sent): tags_found[tag] += 1
print '' print ' '.join([ 'Tag'.center(taglen), 'Found'.center(9), 'Actual'.center(10), 'Precision'.center(13), 'Recall'.center(13) ]) print ' '.join(['=' * taglen, '=' * 9, '=' * 10, '=' * 13, '=' * 13]) for tag in sorted(set(tags_found.keys()) | set(tags_actual.keys())): found = tags_found[tag] actual = tags_actual[tag] precision = nltk.metrics.precision(tag_word_refs[tag], tag_word_test[tag]) recall = nltk.metrics.recall(tag_word_refs[tag], tag_word_test[tag]) print ' '.join([ tag.ljust(taglen), str(found).rjust(9), str(actual).rjust(10), str(precision).ljust(13)[:13], str(recall).ljust(13)[:13] ]) print ' '.join(['=' * taglen, '=' * 9, '=' * 10, '=' * 13, '=' * 13]) else: sents = corpus.sents(**kwargs) taglen = 7 if args.fraction != 1.0: cutoff = int(math.ceil(len(sents) * args.fraction)) sents = sents[:cutoff]
if args.trace: print ", ".join(sorted(unknown_words)) print "" print " Tag Found Actual Precision Recall " print "======= ========= ========== ============= ==========" for tag in sorted(set(tags_found.keys()) | set(tags_actual.keys())): found = tags_found[tag] actual = tags_actual[tag] precision = nltk.metrics.precision(tag_word_refs[tag], tag_word_test[tag]) recall = nltk.metrics.recall(tag_word_refs[tag], tag_word_test[tag]) print " ".join( [ tag.ljust(7), str(found).rjust(9), str(actual).rjust(10), str(precision).ljust(13)[:13], str(recall).ljust(10)[:13], ] ) print "======= ========= ========== ============= ==========" else: sents = corpus.sents() if args.fraction != 1.0: cutoff = int(math.ceil(len(sents) * args.fraction)) sents = sents[:cutoff]