################ if not args.no_eval: print('evaluating %s' % tagger.__class__.__name__) print('accuracy: %f' % tagger.evaluate(test_sents)) ############## ## pickling ## ############## if not args.no_pickle: if args.filename: fname = os.path.expanduser(args.filename) else: # use the last part of the corpus name/path as the prefix parts = [os.path.split(args.corpus.rstrip('/'))[-1]] if args.brill: parts.append('brill') if args.classifier: parts.append('_'.join(args.classifier)) if args.sequential: parts.append(args.sequential) name = '%s.pickle' % '_'.join(parts) fname = os.path.join(os.path.expanduser('~/nltk_data/taggers'), name) dump_object(tagger, fname, trace=args.trace)
ref = refsets[label] test = testsets[label] if not args.no_precision: print('%s precision: %f' % (label, precision(ref, test) or 0)) if not args.no_recall: print('%s recall: %f' % (label, recall(ref, test) or 0)) if not args.no_fmeasure: print('%s f-measure: %f' % (label, f_measure(ref, test) or 0)) if args.show_most_informative and hasattr( classifier, 'show_most_informative_features') and not ( args.multi and args.binary) and not args.cross_fold: print('%d most informative features' % args.show_most_informative) classifier.show_most_informative_features(args.show_most_informative) ############## ## pickling ## ############## if not args.no_pickle: if args.filename: fname = os.path.expanduser(args.filename) else: name = '%s_%s.pickle' % (args.corpus, '_'.join(args.classifier)) fname = os.path.join(os.path.expanduser('~/nltk_data/classifiers'), name) dump_object(classifier, fname, trace=args.trace)
refsets, testsets = scoring.ref_test_sets(classifier, test_feats) for label in labels: ref = refsets[label] test = testsets[label] if not args.no_precision: print '%s precision: %f' % (label, precision(ref, test) or 0) if not args.no_recall: print '%s recall: %f' % (label, recall(ref, test) or 0) if not args.no_fmeasure: print '%s f-measure: %f' % (label, f_measure(ref, test) or 0) if args.show_most_informative and hasattr(classifier, 'show_most_informative_features') and not (args.multi and args.binary) and not args.cross_fold: print '%d most informative features' % args.show_most_informative classifier.show_most_informative_features(args.show_most_informative) ############## ## pickling ## ############## if not args.no_pickle: if args.filename: fname = os.path.expanduser(args.filename) else: name = '%s_%s.pickle' % (args.corpus, '_'.join(args.classifier)) fname = os.path.join(os.path.expanduser('~/nltk_data/classifiers'), name) dump_object(classifier, fname, trace=args.trace)
################ ## evaluation ## ################ if not args.no_eval: if args.trace: print('evaluating %s' % chunker.__class__.__name__) print(chunker.evaluate(test_chunks)) ############## ## pickling ## ############## if not args.no_pickle: if args.filename: fname = os.path.expanduser(args.filename) else: # use the last part of the corpus name/path as the prefix parts = [os.path.split(args.corpus.rstrip('/'))[-1]] if args.classifier: parts.append('_'.join(args.classifier)) elif args.sequential: parts.append(args.sequential) name = '%s.pickle' % '_'.join(parts) fname = os.path.join(os.path.expanduser('~/nltk_data/chunkers'), name) dump_object(chunker, fname, trace=args.trace)
########################## labels = combined.labels() label_classifiers = {} for h in args.hierarchy: label, path = h.split(':') if label not in labels: raise ValueError('%s is not in root labels: %s' % (label, labels)) label_classifiers[label] = nltk.data.load(path) if args.trace: print 'mapping %s to %s from %s' % (label, label_classifiers[label], path) if label_classifiers: if args.trace: 'combining %d label classifiers for root %s' % (len(label_classifiers), combined) combined = multi.HierarchicalClassifier(combined, label_classifiers) ############################## ## dump combined classifier ## ############################## fname = os.path.expanduser(args.filename) dump_object(combined, fname, trace=args.trace)
########################## ## Hierarchical combine ## ########################## labels = combined.labels() label_classifiers = {} for h in args.hierarchy: label, path = h.split(':') if label not in labels: raise ValueError('%s is not in root labels: %s' % (label, labels)) label_classifiers[label] = nltk.data.load(path) if args.trace: print 'mapping %s to %s from %s' % (label, label_classifiers[label], path) if label_classifiers: if args.trace: 'combining %d label classifiers for root %s' % (len(label_classifiers), combined) combined = multi.HierarchicalClassifier(combined, label_classifiers) ############################## ## dump combined classifier ## ############################## fname = os.path.expanduser(args.filename) dump_object(combined, fname, trace=args.trace)
################ ## evaluation ## ################ if not args.no_eval: if args.trace: print 'evaluating %s' % chunker.__class__.__name__ print chunker.evaluate(test_chunks) ############## ## pickling ## ############## if not args.no_pickle: if args.filename: fname = os.path.expanduser(args.filename) else: # use the last part of the corpus name/path as the prefix parts = [os.path.split(args.corpus.rstrip('/'))[-1]] if args.classifier: parts.append(args.classifier) elif args.sequential: parts.append(args.sequential) name = '%s.pickle' % '_'.join(parts) fname = os.path.join(os.path.expanduser('~/nltk_data/chunkers'), name) dump_object(chunker, fname, trace=args.trace)