def test_pipeline(pipeline): """ Support function used to test a pipeline using the specified testSet """ if not isinstance(pipeline, Pipeline): raise ValueError("pipeline must be an instance of Pipeline") timer.start() if not useConllFile: labeled_featuresets = read_tweets_file(originalFile, pipeline).values() else: labeled_featuresets = read_conll_file(originalFile, conllFile, pipeline).values() validator = CrossValidator(labeled_featuresets) print "Elapsed time for data set processing: %.0fs\n" % (timer.stop() / 1000) # test the classifiers for classifierName in classifiers: timer.start() print "- %s " % classifierName, print "accuracy: %f" % validator.validate(classifiers[classifierName], numOfBins)[0] print " Elapsed time: %.0fs\n" % (timer.stop() / 1000)
def test_pipeline(pipeline): """ Support function used to test a pipeline using the specified testSet """ if not isinstance(pipeline, Pipeline): raise ValueError("pipeline must be an instance of Pipeline") timer.start() if not useConllFile: labeled_featuresets = read_tweets_file(originalFile, pipeline).values() else: labeled_featuresets = read_conll_file(originalFile, conllFile, pipeline).values() validator = CrossValidator(labeled_featuresets) print "Elapsed time for data set processing: %.0fs\n" % (timer.stop()/1000) # test the classifiers for classifierName in classifiers: timer.start() print "- %s " % classifierName, print "accuracy: %f" % validator.validate(classifiers[classifierName], numOfBins)[0] print " Elapsed time: %.0fs\n" % (timer.stop()/1000)
from adapter.tagRemover import TagRemover tagrm = TagRemover() postfilters.append(tagrm) print " TagRemover." print "]" print "" print "" pipeline = Pipeline(tokenizer, tagger, prefilters, postfilters) file = ["tweeti-b", "tweeti-b.dev"] if not args.n: # Load standard tweet file trainingfile = map(lambda path: args.datasetpath + path + ".tsv", file) labeled_featuresets = read_tweets_file(trainingfile, pipeline).values() else: # If the not adapter filter has to be used, the program has to load the *.conll files instead # the conll files must be in the same dataset path specified by the user. trainingfile = map(lambda path: args.datasetpath + path + ".tsv", file) conllfile = map(lambda path: args.datasetpath + path + ".conll", file) labeled_featuresets = read_conll_file(trainingfile, conllfile, pipeline).values() if not args.predict: ############ Cross Validation validator = CrossValidator(labeled_featuresets) timer.start() (acc, conf_matr, prec, recall, f_measure) = validator.validate(classifier, args.v) print "Accuracy: %f" % acc
# classifier from classifier.shortTextClassifier import ShortTextClassifier classifier = ShortTextClassifier( ) # file paths originalFile = map(lambda path: get_project_dir() + "resources/tweeti/" + path + ".tsv", ["tweeti-b", "tweeti-b.dev"]) testingFile = map(lambda path: get_project_dir() + "resources/tweeti/" + path + ".tsv", ["twitter-test-input-B"]) # initialize the pipeline used to transform the tweets tokenizer = POSTokenizer() tagger = POSTagger() pipeline = Pipeline( tokenizer, tagger, [], [] ) # read the training file labeled_featuresets = read_tweets_file( originalFile, pipeline ).values( ) # training classifier.train( labeled_featuresets ) # read the test file labeled_featuresets_test = read_tweets_file( testingFile, pipeline ) for key in labeled_featuresets_test: labeled_featuresets_test[key] = labeled_featuresets_test[key][0] # classification labeled_featuresets_test = classifier.classify_dict( labeled_featuresets_test ) # output generation output = open( get_project_dir() + "resources/twitter-test-input-B.out", 'w') for key, label in labeled_featuresets_test.iteritems():
from adapter.tagRemover import TagRemover tagrm = TagRemover() postfilters.append(tagrm) print " TagRemover." print "]" print "" print "" pipeline = Pipeline( tokenizer, tagger, prefilters, postfilters ) file = ["tweeti-b", "tweeti-b.dev"] if not args.n: # Load standard tweet file trainingfile = map(lambda path: args.datasetpath + path + ".tsv", file) labeled_featuresets = read_tweets_file(trainingfile, pipeline).values() else: # If the not adapter filter has to be used, the program has to load the *.conll files instead # the conll files must be in the same dataset path specified by the user. trainingfile = map(lambda path: args.datasetpath + path + ".tsv", file) conllfile = map(lambda path: args.datasetpath + path + ".conll", file) labeled_featuresets = read_conll_file(trainingfile, conllfile,pipeline).values() if not args.predict: ############ Cross Validation validator = CrossValidator(labeled_featuresets) timer.start() (acc, conf_matr, prec, recall, f_measure) = validator.validate(classifier, args.v) print "Accuracy: %f" % acc print "Confusion Matrix:" for prec_label in conf_matr:
# file paths originalFile = map( lambda path: get_project_dir() + "resources/tweeti/" + path + ".tsv", ["tweeti-b", "tweeti-b.dev"]) testingFile = map( lambda path: get_project_dir() + "resources/tweeti/" + path + ".tsv", ["twitter-test-input-B"]) # initialize the pipeline used to transform the tweets tokenizer = POSTokenizer() tagger = POSTagger() pipeline = Pipeline(tokenizer, tagger, [], []) # read the training file labeled_featuresets = read_tweets_file(originalFile, pipeline).values() # training classifier.train(labeled_featuresets) # read the test file labeled_featuresets_test = read_tweets_file(testingFile, pipeline) for key in labeled_featuresets_test: labeled_featuresets_test[key] = labeled_featuresets_test[key][0] # classification labeled_featuresets_test = classifier.classify_dict(labeled_featuresets_test) # output generation output = open(get_project_dir() + "resources/twitter-test-input-B.out", 'w') for key, label in labeled_featuresets_test.iteritems():