Example #1
0
def test_pipeline(pipeline):
    """
		Support function used to test a pipeline using the specified testSet
	"""
    if not isinstance(pipeline, Pipeline):
        raise ValueError("pipeline must be an instance of Pipeline")

    timer.start()
    if not useConllFile:
        labeled_featuresets = read_tweets_file(originalFile, pipeline).values()
    else:
        labeled_featuresets = read_conll_file(originalFile, conllFile,
                                              pipeline).values()

    validator = CrossValidator(labeled_featuresets)
    print "Elapsed time for data set processing: %.0fs\n" % (timer.stop() /
                                                             1000)

    # test the classifiers
    for classifierName in classifiers:
        timer.start()
        print "- %s " % classifierName,
        print "accuracy:	%f" % validator.validate(classifiers[classifierName],
                                                  numOfBins)[0]
        print "  Elapsed time: %.0fs\n" % (timer.stop() / 1000)
Example #2
0
def test_pipeline(pipeline):
	"""
		Support function used to test a pipeline using the specified testSet
	"""
	if not isinstance(pipeline, Pipeline):
		raise ValueError("pipeline must be an instance of Pipeline")

	timer.start()
	if not useConllFile:
		labeled_featuresets = read_tweets_file(originalFile, pipeline).values()
	else:
		labeled_featuresets = read_conll_file(originalFile, conllFile, pipeline).values()

	validator = CrossValidator(labeled_featuresets)
	print "Elapsed time for data set processing: %.0fs\n" % (timer.stop()/1000)

	# test the classifiers
	for classifierName in classifiers:
		timer.start()
		print "- %s " % classifierName,
		print "accuracy:	%f" % validator.validate(classifiers[classifierName], numOfBins)[0]
		print "  Elapsed time: %.0fs\n" % (timer.stop()/1000)
print ""
print ""

pipeline = Pipeline(tokenizer, tagger, prefilters, postfilters)

file = ["tweeti-b", "tweeti-b.dev"]
if not args.n:
    # Load standard tweet file
    trainingfile = map(lambda path: args.datasetpath + path + ".tsv", file)
    labeled_featuresets = read_tweets_file(trainingfile, pipeline).values()
else:
    # If the not adapter filter has to be used, the program has to load the *.conll files instead
    # the conll files must be in the same dataset path specified by the user.
    trainingfile = map(lambda path: args.datasetpath + path + ".tsv", file)
    conllfile = map(lambda path: args.datasetpath + path + ".conll", file)
    labeled_featuresets = read_conll_file(trainingfile, conllfile,
                                          pipeline).values()

if not args.predict:
    ############ Cross Validation
    validator = CrossValidator(labeled_featuresets)
    timer.start()
    (acc, conf_matr, prec, recall,
     f_measure) = validator.validate(classifier, args.v)
    print "Accuracy:		%f" % acc
    print "Confusion Matrix:"
    for prec_label in conf_matr:
        for real_label in conf_matr[prec_label]:
            print "\tPredicted: " + prec_label + "\tReal: " + real_label + "\t" + str(
                conf_matr[prec_label][real_label])
    print "Precision:"
    for label in prec:
Example #4
0
# adjust all path
originalFile = map(lambda path: get_project_dir() + "resources/tweeti/" + path + ".tsv", file)
conllFile = map(lambda path: get_project_dir() + "resources/conll/" + path + ".conll", file)

# timer used for timing
timer = Timer()

# classifiers to test
classifiers = {"ShortTextClassifier": ShortTextClassifier(), "SVMClassifier": SVMClassifier(), "Bayes": BayesianClassifier()}
#classifiers = {"LinearClassifier": LinearClassifier()}

#classifiers = {"Bayes": BayesianClassifier()}
# loading and processing data set
timer.start()
labeled_featuresets = read_conll_file(originalFile, conllFile, Pipeline()).values()

validator = CrossValidator(labeled_featuresets)
print "Elapsed time for data set processing: %.0fs\n" % (timer.stop()/1000)

# test the classifiers
for classifierName in classifiers:
	timer.start()
	print "- %s " % classifierName
	(acc, conf_matr, prec, recall, f_measure) = validator.validate(classifiers[classifierName], numOfBins)
	print "Accuracy:		%f" % acc
	print "Confusion Matrix:"
	for prec_label in conf_matr:
		for real_label in conf_matr[prec_label]:
			print "\tPredicted: "+prec_label + "\tReal: "+ real_label +"\t"+ str(conf_matr[prec_label][real_label])
	print "Precision:"
print ""
print ""

pipeline = Pipeline( tokenizer, tagger, prefilters, postfilters )

file = ["tweeti-b", "tweeti-b.dev"]
if not args.n:
	# Load standard tweet file
	trainingfile = map(lambda path: args.datasetpath + path + ".tsv", file)
	labeled_featuresets = read_tweets_file(trainingfile, pipeline).values()
else:
	# If the not adapter filter has to be used, the program has to load the *.conll files instead
	# the conll files must be in the same dataset path specified by the user.
	trainingfile = map(lambda path: args.datasetpath + path + ".tsv", file)
	conllfile = map(lambda path: args.datasetpath + path + ".conll", file)
	labeled_featuresets = read_conll_file(trainingfile, conllfile,pipeline).values()

if not args.predict:
############ Cross Validation
	validator = CrossValidator(labeled_featuresets)
	timer.start()
	(acc, conf_matr, prec, recall, f_measure) = validator.validate(classifier, args.v)
	print "Accuracy:		%f" % acc
	print "Confusion Matrix:"
	for prec_label in conf_matr:
		for real_label in conf_matr[prec_label]:
			print "\tPredicted: "+prec_label + "\tReal: "+ real_label +"\t"+ str(conf_matr[prec_label][real_label])
	print "Precision:"
	for label in prec:
		print "\t"+ label + ":	%f" % prec[label]