Ejemplo n.º 1
0
def test_pipeline(pipeline):
    """
		Support function used to test a pipeline using the specified testSet
	"""
    if not isinstance(pipeline, Pipeline):
        raise ValueError("pipeline must be an instance of Pipeline")

    timer.start()
    if not useConllFile:
        labeled_featuresets = read_tweets_file(originalFile, pipeline).values()
    else:
        labeled_featuresets = read_conll_file(originalFile, conllFile,
                                              pipeline).values()

    validator = CrossValidator(labeled_featuresets)
    print "Elapsed time for data set processing: %.0fs\n" % (timer.stop() /
                                                             1000)

    # test the classifiers
    for classifierName in classifiers:
        timer.start()
        print "- %s " % classifierName,
        print "accuracy:	%f" % validator.validate(classifiers[classifierName],
                                                  numOfBins)[0]
        print "  Elapsed time: %.0fs\n" % (timer.stop() / 1000)
Ejemplo n.º 2
0
file = ["tweeti-b", "tweeti-b.dev"]
if not args.n:
    # Load standard tweet file
    trainingfile = map(lambda path: args.datasetpath + path + ".tsv", file)
    labeled_featuresets = read_tweets_file(trainingfile, pipeline).values()
else:
    # If the not adapter filter has to be used, the program has to load the *.conll files instead
    # the conll files must be in the same dataset path specified by the user.
    trainingfile = map(lambda path: args.datasetpath + path + ".tsv", file)
    conllfile = map(lambda path: args.datasetpath + path + ".conll", file)
    labeled_featuresets = read_conll_file(trainingfile, conllfile,
                                          pipeline).values()

if not args.predict:
    ############ Cross Validation
    validator = CrossValidator(labeled_featuresets)
    timer.start()
    (acc, conf_matr, prec, recall,
     f_measure) = validator.validate(classifier, args.v)
    print "Accuracy:		%f" % acc
    print "Confusion Matrix:"
    for prec_label in conf_matr:
        for real_label in conf_matr[prec_label]:
            print "\tPredicted: " + prec_label + "\tReal: " + real_label + "\t" + str(
                conf_matr[prec_label][real_label])
    print "Precision:"
    for label in prec:
        print "\t" + label + ":	%f" % prec[label]

    print "Recall:"
    for label in recall: