def test_pipeline(pipeline): """ Support function used to test a pipeline using the specified testSet """ if not isinstance(pipeline, Pipeline): raise ValueError("pipeline must be an instance of Pipeline") timer.start() if not useConllFile: labeled_featuresets = read_tweets_file(originalFile, pipeline).values() else: labeled_featuresets = read_conll_file(originalFile, conllFile, pipeline).values() validator = CrossValidator(labeled_featuresets) print "Elapsed time for data set processing: %.0fs\n" % (timer.stop() / 1000) # test the classifiers for classifierName in classifiers: timer.start() print "- %s " % classifierName, print "accuracy: %f" % validator.validate(classifiers[classifierName], numOfBins)[0] print " Elapsed time: %.0fs\n" % (timer.stop() / 1000)
def test_pipeline(pipeline): """ Support function used to test a pipeline using the specified testSet """ if not isinstance(pipeline, Pipeline): raise ValueError("pipeline must be an instance of Pipeline") timer.start() if not useConllFile: labeled_featuresets = read_tweets_file(originalFile, pipeline).values() else: labeled_featuresets = read_conll_file(originalFile, conllFile, pipeline).values() validator = CrossValidator(labeled_featuresets) print "Elapsed time for data set processing: %.0fs\n" % (timer.stop()/1000) # test the classifiers for classifierName in classifiers: timer.start() print "- %s " % classifierName, print "accuracy: %f" % validator.validate(classifiers[classifierName], numOfBins)[0] print " Elapsed time: %.0fs\n" % (timer.stop()/1000)
print "" print "" pipeline = Pipeline(tokenizer, tagger, prefilters, postfilters) file = ["tweeti-b", "tweeti-b.dev"] if not args.n: # Load standard tweet file trainingfile = map(lambda path: args.datasetpath + path + ".tsv", file) labeled_featuresets = read_tweets_file(trainingfile, pipeline).values() else: # If the not adapter filter has to be used, the program has to load the *.conll files instead # the conll files must be in the same dataset path specified by the user. trainingfile = map(lambda path: args.datasetpath + path + ".tsv", file) conllfile = map(lambda path: args.datasetpath + path + ".conll", file) labeled_featuresets = read_conll_file(trainingfile, conllfile, pipeline).values() if not args.predict: ############ Cross Validation validator = CrossValidator(labeled_featuresets) timer.start() (acc, conf_matr, prec, recall, f_measure) = validator.validate(classifier, args.v) print "Accuracy: %f" % acc print "Confusion Matrix:" for prec_label in conf_matr: for real_label in conf_matr[prec_label]: print "\tPredicted: " + prec_label + "\tReal: " + real_label + "\t" + str( conf_matr[prec_label][real_label]) print "Precision:" for label in prec:
# adjust all path originalFile = map(lambda path: get_project_dir() + "resources/tweeti/" + path + ".tsv", file) conllFile = map(lambda path: get_project_dir() + "resources/conll/" + path + ".conll", file) # timer used for timing timer = Timer() # classifiers to test classifiers = {"ShortTextClassifier": ShortTextClassifier(), "SVMClassifier": SVMClassifier(), "Bayes": BayesianClassifier()} #classifiers = {"LinearClassifier": LinearClassifier()} #classifiers = {"Bayes": BayesianClassifier()} # loading and processing data set timer.start() labeled_featuresets = read_conll_file(originalFile, conllFile, Pipeline()).values() validator = CrossValidator(labeled_featuresets) print "Elapsed time for data set processing: %.0fs\n" % (timer.stop()/1000) # test the classifiers for classifierName in classifiers: timer.start() print "- %s " % classifierName (acc, conf_matr, prec, recall, f_measure) = validator.validate(classifiers[classifierName], numOfBins) print "Accuracy: %f" % acc print "Confusion Matrix:" for prec_label in conf_matr: for real_label in conf_matr[prec_label]: print "\tPredicted: "+prec_label + "\tReal: "+ real_label +"\t"+ str(conf_matr[prec_label][real_label]) print "Precision:"
print "" print "" pipeline = Pipeline( tokenizer, tagger, prefilters, postfilters ) file = ["tweeti-b", "tweeti-b.dev"] if not args.n: # Load standard tweet file trainingfile = map(lambda path: args.datasetpath + path + ".tsv", file) labeled_featuresets = read_tweets_file(trainingfile, pipeline).values() else: # If the not adapter filter has to be used, the program has to load the *.conll files instead # the conll files must be in the same dataset path specified by the user. trainingfile = map(lambda path: args.datasetpath + path + ".tsv", file) conllfile = map(lambda path: args.datasetpath + path + ".conll", file) labeled_featuresets = read_conll_file(trainingfile, conllfile,pipeline).values() if not args.predict: ############ Cross Validation validator = CrossValidator(labeled_featuresets) timer.start() (acc, conf_matr, prec, recall, f_measure) = validator.validate(classifier, args.v) print "Accuracy: %f" % acc print "Confusion Matrix:" for prec_label in conf_matr: for real_label in conf_matr[prec_label]: print "\tPredicted: "+prec_label + "\tReal: "+ real_label +"\t"+ str(conf_matr[prec_label][real_label]) print "Precision:" for label in prec: print "\t"+ label + ": %f" % prec[label]