コード例 #1
0
ファイル: train_tagger.py プロジェクト: Vakna/Quetzalcoatl
################

if not args.no_eval:
	print('evaluating %s' % tagger.__class__.__name__)
	print('accuracy: %f' % tagger.evaluate(test_sents))

##############
## pickling ##
##############

if not args.no_pickle:
	if args.filename:
		fname = os.path.expanduser(args.filename)
	else:
		# use the last part of the corpus name/path as the prefix
		parts = [os.path.split(args.corpus.rstrip('/'))[-1]]
		
		if args.brill:
			parts.append('brill')
		
		if args.classifier:
			parts.append('_'.join(args.classifier))
		
		if args.sequential:
			parts.append(args.sequential)
		
		name = '%s.pickle' % '_'.join(parts)
		fname = os.path.join(os.path.expanduser('~/nltk_data/taggers'), name)
	
	dump_object(tagger, fname, trace=args.trace)
コード例 #2
0
            ref = refsets[label]
            test = testsets[label]

            if not args.no_precision:
                print('%s precision: %f' % (label, precision(ref, test) or 0))

            if not args.no_recall:
                print('%s recall: %f' % (label, recall(ref, test) or 0))

            if not args.no_fmeasure:
                print('%s f-measure: %f' % (label, f_measure(ref, test) or 0))

if args.show_most_informative and hasattr(
        classifier, 'show_most_informative_features') and not (
            args.multi and args.binary) and not args.cross_fold:
    print('%d most informative features' % args.show_most_informative)
    classifier.show_most_informative_features(args.show_most_informative)

##############
## pickling ##
##############
if not args.no_pickle:
    if args.filename:
        fname = os.path.expanduser(args.filename)
    else:
        name = '%s_%s.pickle' % (args.corpus, '_'.join(args.classifier))
        fname = os.path.join(os.path.expanduser('~/nltk_data/classifiers'),
                             name)

    dump_object(classifier, fname, trace=args.trace)
コード例 #3
0
			refsets, testsets = scoring.ref_test_sets(classifier, test_feats)
		
		for label in labels:
			ref = refsets[label]
			test = testsets[label]
			
			if not args.no_precision:
				print '%s precision: %f' % (label, precision(ref, test) or 0)
			
			if not args.no_recall:
				print '%s recall: %f' % (label, recall(ref, test) or 0)
			
			if not args.no_fmeasure:
				print '%s f-measure: %f' % (label, f_measure(ref, test) or 0)

if args.show_most_informative and hasattr(classifier, 'show_most_informative_features') and not (args.multi and args.binary) and not args.cross_fold:
	print '%d most informative features' % args.show_most_informative
	classifier.show_most_informative_features(args.show_most_informative)

##############
## pickling ##
##############
if not args.no_pickle:
	if args.filename:
		fname = os.path.expanduser(args.filename)
	else:
		name = '%s_%s.pickle' % (args.corpus, '_'.join(args.classifier))
		fname = os.path.join(os.path.expanduser('~/nltk_data/classifiers'), name)
	
	dump_object(classifier, fname, trace=args.trace)
コード例 #4
0
################
## evaluation ##
################

if not args.no_eval:
	if args.trace:
		print('evaluating %s' % chunker.__class__.__name__)
	
	print(chunker.evaluate(test_chunks))

##############
## pickling ##
##############

if not args.no_pickle:
	if args.filename:
		fname = os.path.expanduser(args.filename)
	else:
		# use the last part of the corpus name/path as the prefix
		parts = [os.path.split(args.corpus.rstrip('/'))[-1]]
		
		if args.classifier:
			parts.append('_'.join(args.classifier))
		elif args.sequential:
			parts.append(args.sequential)
		
		name = '%s.pickle' % '_'.join(parts)
		fname = os.path.join(os.path.expanduser('~/nltk_data/chunkers'), name)
	
	dump_object(chunker, fname, trace=args.trace)
コード例 #5
0
##########################

labels = combined.labels()
label_classifiers = {}

for h in args.hierarchy:
    label, path = h.split(':')

    if label not in labels:
        raise ValueError('%s is not in root labels: %s' % (label, labels))

    label_classifiers[label] = nltk.data.load(path)

    if args.trace:
        print 'mapping %s to %s from %s' % (label, label_classifiers[label],
                                            path)

if label_classifiers:
    if args.trace:
        'combining %d label classifiers for root %s' % (len(label_classifiers),
                                                        combined)

    combined = multi.HierarchicalClassifier(combined, label_classifiers)

##############################
## dump combined classifier ##
##############################

fname = os.path.expanduser(args.filename)
dump_object(combined, fname, trace=args.trace)
コード例 #6
0
##########################
## Hierarchical combine ##
##########################

labels = combined.labels()
label_classifiers = {}

for h in args.hierarchy:
	label, path = h.split(':')
	
	if label not in labels:
		raise ValueError('%s is not in root labels: %s' % (label, labels))
	
	label_classifiers[label] = nltk.data.load(path)
	
	if args.trace:
		print 'mapping %s to %s from %s' % (label, label_classifiers[label], path)

if label_classifiers:
	if args.trace:
		'combining %d label classifiers for root %s' % (len(label_classifiers), combined)
	
	combined = multi.HierarchicalClassifier(combined, label_classifiers)

##############################
## dump combined classifier ##
##############################

fname = os.path.expanduser(args.filename)
dump_object(combined, fname, trace=args.trace)
コード例 #7
0
################
## evaluation ##
################

if not args.no_eval:
	if args.trace:
		print 'evaluating %s' % chunker.__class__.__name__
	
	print chunker.evaluate(test_chunks)

##############
## pickling ##
##############

if not args.no_pickle:
	if args.filename:
		fname = os.path.expanduser(args.filename)
	else:
		# use the last part of the corpus name/path as the prefix
		parts = [os.path.split(args.corpus.rstrip('/'))[-1]]
		
		if args.classifier:
			parts.append(args.classifier)
		elif args.sequential:
			parts.append(args.sequential)
		
		name = '%s.pickle' % '_'.join(parts)
		fname = os.path.join(os.path.expanduser('~/nltk_data/chunkers'), name)
	
	dump_object(chunker, fname, trace=args.trace)