Beispiel #1
1
def print_precision_recall(classifier, test_dict):
    refsets = defaultdict(set)
    testsets = defaultdict(set)
    for i, (feats, label) in enumerate(test_dict):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        testsets[observed].add(i)
    print 'pos precision:', precision(refsets['positive'], testsets['positive'])
    print 'pos recall:', recall(refsets['positive'], testsets['positive'])
    print 'pos F-measure:', f_measure(refsets['positive'], testsets['positive'])
    print 'neg precision:', precision(refsets['negative'], testsets['negative'])
    print 'neg recall:', recall(refsets['negative'], testsets['negative'])
    print 'neg F-measure:', f_measure(refsets['negative'], testsets['negative'])
Beispiel #2
0
def evaluate_classifier(featx):
    negids = movie_reviews.fileids('neg')
    posids = movie_reviews.fileids('pos')

    # print(movie_reviews.words(fileids=[negids[0]]))
    # exit()

    negfeats = [(featx(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
    posfeats = [(featx(movie_reviews.words(fileids=[f])), 'pos') for f in posids]

    negcutoff = int(len(negfeats) * 3 / 4)
    poscutoff = int(len(posfeats) * 3 / 4)

    trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
    testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
    print('train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats)))

    classifier = NaiveBayesClassifier.train(trainfeats)
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)

    for i, (feats, label) in enumerate(testfeats):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        testsets[observed].add(i)

    print('accuracy:', nltk.classify.util.accuracy(classifier, testfeats))
    print('pos precision:', precision(refsets['pos'], testsets['pos']))
    print('pos recall:', recall(refsets['pos'], testsets['pos']))
    print('pos F-measure:', f_measure(refsets['pos'], testsets['pos']))
    print('neg precision:', precision(refsets['neg'], testsets['neg']))
    print('neg recall:', recall(refsets['neg'], testsets['neg']))
    print('neg F-measure:', f_measure(refsets['neg'], testsets['neg']))
    classifier.show_most_informative_features()
Beispiel #3
0
 def findFMetric(self, classifier):
     refsets, self.testSets = self.findsets(classifier)
     return f_measure(refsets['bullish'],
                      self.testSets['bullish']), f_measure(
                          refsets['bearish'],
                          self.testSets['bearish']), f_measure(
                              refsets['neutral'], self.testSets['neutral'])
Beispiel #4
0
def main():
    negfeats = []
    posfeats = []
    for i, f in enumerate(reviews[0]):
        print(f)
        if reviews[1][i] == 0:
            negfeats.append((word_feats(f.split()), "neg"))
        else:
            posfeats.append((word_feats(f.split()), "pos"))

    testNegfeats = []
    testPosfeats = []
    for i, f in enumerate(test[0]):
        if test[1][i] == 0:
            testNegfeats.append((word_feats(f.split()), "neg"))
        else:
            testPosfeats.append((word_feats(f.split()), "pos"))

    trainfeats = negfeats + posfeats
    testfeats = testNegfeats + testPosfeats

    print('train on %d instances, test on %d instances - Maximum Entropy' %
          (len(trainfeats), len(testfeats)))

    classifier = MaxentClassifier.train(trainfeats,
                                        'GIS',
                                        trace=0,
                                        encoding=None,
                                        labels=None,
                                        gaussian_prior_sigma=0,
                                        max_iter=1)

    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)
    for i, (feats, label) in enumerate(testfeats):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        testsets[observed].add(i)

    accuracy = nltk.classify.util.accuracy(classifier, testfeats)
    pos_precision = precision(refsets['pos'], testsets['pos'])
    pos_recall = recall(refsets['pos'], testsets['pos'])
    pos_fmeasure = f_measure(refsets['pos'], testsets['pos'])
    neg_precision = precision(refsets['neg'], testsets['neg'])
    neg_recall = recall(refsets['neg'], testsets['neg'])
    neg_fmeasure = f_measure(refsets['neg'], testsets['neg'])
    print(pos_recall)
    print(neg_recall)
    print()
    print('')
    print('---------------------------------------')
    print('          Maximum Entropy              ')
    print('---------------------------------------')
    print('accuracy:', accuracy)
    print('precision', (pos_precision + neg_precision) / 2)
    print('recall', (pos_recall + neg_recall) / 2)
    print('f-measure', (pos_fmeasure + neg_fmeasure) / 2)
Beispiel #5
0
def assess_classifier(classifier, test_data, text, max_ent_help=False):
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)

    numDataSets = len(test_data)
    onDataSet = 0
    # TN = 0
    # TP = 0
    # FN = 0
    # FP = 0

    # enumerate through the test data and classify them
    for i, (feats, label) in enumerate(test_data):
        refsets[label].add(i)
        if max_ent_help:
            if maxEntClf.classify(feats):
                observed = classifier.classify(feats)
            else:
                observed = False
        else:
            observed = classifier.classify(feats)
        testsets[observed].add(i)
        onDataSet += 1
        # if label == observed:
        #     if observed:
        #         TP += 1
        #     else:
        #         TN += 1
        # else:
        #     if observed:
        #         FP += 1
        #     else:
        #         FN += 1


        # printPercentage(onDataSet/numDataSets * 100, "Extracting Features: ")
    # precision = TP/(TP+FP)
    # recall = TP/(TP+FN)
    # f1Score = 2*((precision*recall)/(precision + recall))

    # calculate the precisionl, recall, f-measure
    laugh_precision = precision(refsets[True], testsets[True])
    laugh_recall = recall(refsets[True], testsets[True])
    laugh_f1 = f_measure(refsets[True], testsets[True])

    non_laugh_precision = precision(refsets[False], testsets[False])
    non_laugh_recall = recall(refsets[False], testsets[False])
    non_laugh_f1 = f_measure(refsets[False], testsets[False])

    acc = nltk.classify.accuracy(classifier, test_data)

    return [text, acc, laugh_precision, laugh_recall, laugh_f1, non_laugh_precision, non_laugh_recall, non_laugh_f1]
Beispiel #6
0
def main():
    results = {'Topic': [], 'Precision': [], 'Recall': [], 'F-measure': []}
    print('\nPreparing data...')
    (train_set, test_set) = get_train_test_sets('data/content')
    print('\nNB classifier training...')
    classifier = NaiveBayesClassifier.train(train_set)
    print('NB classifier is trained with {}% accuracy'.format(
        round(accuracy(classifier, test_set) * 100, 1)))

    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)

    for i, (feats, label) in enumerate(test_set):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        testsets[observed].add(i)

    for topic in topics:
        results['Topic'].append(topic)
        results['Precision'].append(
            round(precision(refsets[topic], testsets[topic]) * 100, 1))
        results['Recall'].append(
            round(recall(refsets[topic], testsets[topic]) * 100, 1))
        results['F-measure'].append(
            round(f_measure(refsets[topic], testsets[topic]) * 100, 1))

    del classifier, train_set, test_set, refsets, testsets
    gc.collect()

    print(results)
 def benchmarking(self, classifier,_test_set,all_f_measure=[],all_precision=[],all_recall=[]):
     from nltk import classify
     accuracy = classify.accuracy(classifier, _test_set)
     
     print("accuracy:",accuracy)
     
     from nltk.metrics import precision
     from nltk.metrics import recall
     from nltk.metrics import f_measure
     
     import collections
     refsets = collections.defaultdict(set)
     testsets = collections.defaultdict(set)
     for i, (feats, label) in enumerate(_test_set):
         refsets[label].add(i)
         observed = classifier.classify(feats)
         testsets[observed].add(i)
         
     prec=precision(refsets['class'], testsets['class'])
     rec=recall(refsets['class'], testsets['class'])
     f1=f_measure(refsets['class'], testsets['class'])
     print('precision:', prec)
     print('recall:', rec)
     print('F-measure:', f1)
             
     all_f_measure.append(f1)
     all_precision.append(prec)
     all_recall.append(rec)
     print('========Show top 10 most informative features========')
     classifier.show_most_informative_features(10)
    def benchmarking(self,
                     classifier,
                     _test_set,
                     all_f_measure=[],
                     all_precision=[],
                     all_recall=[]):
        from nltk import classify
        accuracy = classify.accuracy(classifier, _test_set)

        print("accuracy:", accuracy)

        from nltk.metrics import precision
        from nltk.metrics import recall
        from nltk.metrics import f_measure

        import collections
        refsets = collections.defaultdict(set)
        testsets = collections.defaultdict(set)
        for i, (feats, label) in enumerate(_test_set):
            refsets[label].add(i)
            observed = classifier.classify(feats)
            testsets[observed].add(i)

        prec = precision(refsets['class'], testsets['class'])
        rec = recall(refsets['class'], testsets['class'])
        f1 = f_measure(refsets['class'], testsets['class'])
        print('precision:', prec)
        print('recall:', rec)
        print('F-measure:', f1)

        all_f_measure.append(f1)
        all_precision.append(prec)
        all_recall.append(rec)
        print('========Show top 10 most informative features========')
        classifier.show_most_informative_features(10)
def train_and_score(classifier, train, test):

    try:
        if classifier.__name__ == 'MaxentClassifier':
            clf = classifier.train(train, algorithm='MEGAM')
        elif classifier.__name__ == 'DecisionTreeClassifier':
            clf = classifier.train(train,
                                   binary=True,
                                   entropy_cutoff=0.8,
                                   depth_cutoff=5,
                                   support_cutoff=3)
        else:
            clf = classifier.train(train)
    except AttributeError:
        clf = classifier.train(train)

    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)

    for i, (feats, label) in enumerate(test):
        refsets[label].add(i)
        observed = clf.classify(feats)
        testsets[observed].add(i)

    measures = []
    for key in refsets.keys():
        measures.append([
            precision(refsets[key], testsets[key]),
            recall(refsets[key], testsets[key]),
            f_measure(refsets[key], testsets[key])
        ])

    return measures
Beispiel #10
0
def evaluate_classifier(featx):

    stanford_pos_list = list()
    stanford_neg_list = list()
    with open(stanford_pos, 'r') as stanford_p:
        for line in stanford_p:
            stanford_pos_list.append(line.strip())

    with open(stanford_neg, 'r') as stanford_n:
        for line in stanford_n:
            stanford_neg_list.append(line.strip())

    stanford_p = stanford_pos_list[:5000]
    stanford_n = stanford_neg_list[:5000]

    negfeats = [(featx(nltk.word_tokenize(line)), 'neg')
                for line in stanford_n]
    posfeats = [(featx(nltk.word_tokenize(line)), 'pos')
                for line in stanford_p]

    negcutoff = int(len(negfeats) * 3 / 4)
    poscutoff = int(len(posfeats) * 3 / 4)

    trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
    testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
    print('train on %d instances, test on %d instances' %
          (len(trainfeats), len(testfeats)))

    classifier = NaiveBayesClassifier.train(trainfeats)
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)

    for i, (feats, label) in enumerate(testfeats):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        testsets[observed].add(i)

    print('accuracy:', nltk.classify.util.accuracy(classifier, testfeats))
    print('pos precision:', precision(refsets['pos'], testsets['pos']))
    print('pos recall:', recall(refsets['pos'], testsets['pos']))
    print('pos F-measure:', f_measure(refsets['pos'], testsets['pos']))
    print('neg precision:', precision(refsets['neg'], testsets['neg']))
    print('neg recall:', recall(refsets['neg'], testsets['neg']))
    print('neg F-measure:', f_measure(refsets['neg'], testsets['neg']))
    classifier.show_most_informative_features()
Beispiel #11
0
    def results(classifier, testing_set, training_set):
        now = datetime.now()
        # Trains classifier
        classifier = classifier.train(training_set)
        refsets = collections.defaultdict(set)
        testsets = collections.defaultdict(set)

        tp = 0
        fp = 0
        tn = 0
        fn = 0

        # Gets positive/false positives/negatives
        for i, (features, label) in enumerate(testing_set):
            refsets[label].add(i)
            observed = classifier.classify(features)
            testsets[observed].add(i)
            if label == 'exp' and observed == 'exp':
                tp += 1
            elif label == 'non' and observed == 'non':
                tn += 1
            elif label == 'exp' and observed == 'non':
                fn += 1
            else:
                fp += 1

        print "Time training: " + str(datetime.now() - now)
        print "True Positives: " + str(tp)
        print "False Positives: " + str(fp)
        print "True Negatives: " + str(tn)
        print "False Negatives: " + str(fn)
        print 'Explicit Precision: ', precision(refsets['exp'],
                                                testsets['exp'])
        print 'Explicit recall: ', recall(refsets['exp'], testsets['exp'])
        print 'Explicit F-Score: ', f_measure(refsets['exp'], testsets['exp'])
        print 'Non-Explicit Precision: ', precision(refsets['non'],
                                                    testsets['non'])
        print 'Non-Explicit Recall: ', recall(refsets['non'], testsets['non'])
        print 'Non-Explicit F-Score: ', f_measure(refsets['non'],
                                                  testsets['non'])

        print "Accuracy percent: ", (nltk.classify.accuracy(
            classifier, testing_set)) * 100
        return classifier
def eval_stats(results):
	'''
	Compute recall, precision, and f-measure from passed results.
	The expected format for results is a dictionary whose keys=<name of article>
	and values=tuple (<test category>, <reference category>, <scores>), where:
	test=category suggested by classifier, reference=pre-classified gold
	category, scores=can be None or dictionary whose keys=category names and
	values=matching score for this article.
	'''
	# Calculate number of correct matches
	correct = 0
	missed = defaultdict(tuple)
	for article_name, (suggested, real, scores) in results.iteritems():
		if suggested==real:
			correct += 1
		else:
			missed[article_name] = (suggested, real)
	success_ratio = correct / float(len(results))
	print "Ratio: %0.3f" % success_ratio
	
	# Print wrong matches
	for name, (suggested, real) in missed.iteritems():
		print "%s\t%s\t%s" % (name, suggested, real)
	
	# Create sets of references / test classification for evaluation
	cat_ref = defaultdict(set)
	cat_test= defaultdict(set)
	for name, (test_category, ref_category, scores) in results.iteritems():
		cat_ref[ref_category].add(name) 		# gold-tagged categories
		cat_test[test_category].add(name) 	# suggested categories

	# Precision, recall, f-measure, support (num of reference articles in
	# each category) for each category
	print "\nCategory\tPrecision\tRecall\tF-measure\tSupport" 
	measures = defaultdict(tuple)
	for category in cat_ref.keys():
		cat_prec = metrics.precision(cat_ref[category], cat_test[category])
		cat_rec = metrics.recall(cat_ref[category], cat_test[category])
		cat_f = metrics.f_measure(cat_ref[category], cat_test[category])
		cat_support = len(cat_ref[category])
		measures[category] = (cat_prec, cat_rec, cat_f, cat_support)
		print "%s\t%0.3f\t%0.3f\t%0.3f\t%d" % \
		(category, cat_prec, cat_rec, cat_f, cat_support)
	
	# Calculate precision, recall, f-measure for entire corpus:
	# This is a weighted average of the values of separate categories
	# SUM(product of all precisions, product of all supports)/sum(total number of supports)
	avg_prec = weighted_average([(cat_measure[0], cat_measure[3]) for \
		cat_measure in measures.values()])
	avg_rec = weighted_average([(cat_measure[1], cat_measure[3]) for \
		cat_measure in measures.values()])
	avg_f = weighted_average([(cat_measure[2], cat_measure[3]) for \
		cat_measure in measures.values()])
	total_support = sum([cat_support[3] for cat_support in measures.values()])
	
	print "%s\t%0.3f\t%0.3f\t%0.3f\t%d" % ("Total", avg_prec, avg_rec, avg_f, total_support)
Beispiel #13
0
def evaluate_precision_recall_fmeasure(corpus, category, tagger):

    # get a list of the gold standard tags, and the tags set by the tagger.
    gold = set(tag_list(corpus.tagged_sents(categories=category)))
    test = set(
        tag_list(apply_tagger(tagger,
                              corpus.tagged_sents(categories=category))))

    # return the precision and recall of the evaluated model.
    return [precision(gold, test), recall(gold, test), f_measure(gold, test)]
Beispiel #14
0
def evaluate_features(feature_extractor, N, only_acc=False):
    from nltk.corpus import movie_reviews
    from nltk.classify import NaiveBayesClassifier as naive
    from nltk.classify.util import accuracy
    from nltk.metrics import precision, recall, f_measure
    from sys import stdout

    negative = movie_reviews.fileids('neg')
    positive = movie_reviews.fileids('pos')
    negfeats = [(feature_extractor(movie_reviews.sents(fileids=[f])), 'neg')
                for f in negative]

    posfeats = [(feature_extractor(movie_reviews.sents(fileids=[f])), 'pos')
                for f in positive]
    negtrain, negtest = stratifiedSamples(negfeats, N)
    postrain, postest = stratifiedSamples(posfeats, N)

    trainfeats = negtrain + postrain
    testfeats = negtest + postest
    classifier = naive.train(trainfeats)
    if only_acc: return accuracy(classifier, testfeats)
    print 'accuracy: {}'.format(accuracy(classifier, testfeats))

    # Precision, Recall, F-measure
    from collections import defaultdict
    refsets = defaultdict(set)
    testsets = defaultdict(set)

    for i, (feats, label) in enumerate(testfeats):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        testsets[observed].add(i)

    print 'pos precision:', precision(refsets['pos'], testsets['pos'])
    print 'pos recall:', recall(refsets['pos'], testsets['pos'])
    print 'pos F-measure:', f_measure(refsets['pos'], testsets['pos'])
    print 'neg precision:', precision(refsets['neg'], testsets['neg'])
    print 'neg recall:', recall(refsets['neg'], testsets['neg'])
    print 'neg F-measure:', f_measure(refsets['neg'], testsets['neg'])
    stdout.flush()
    classifier.show_most_informative_features()
    return classifier
Beispiel #15
0
def evaluate_features(feature_extractor, N, only_acc=False):
    from nltk.corpus import movie_reviews
    from nltk.classify import NaiveBayesClassifier as naive
    from nltk.classify.util import accuracy
    from nltk.metrics import precision, recall, f_measure
    from sys import stdout
    
    negative = movie_reviews.fileids('neg')
    positive = movie_reviews.fileids('pos')
    negfeats = [(feature_extractor(movie_reviews.sents(fileids=[f])),
                 'neg') for f in negative]

    posfeats = [(feature_extractor(movie_reviews.sents(fileids=[f])),
                 'pos') for f in positive]
    negtrain, negtest = stratifiedSamples(negfeats, N)
    postrain, postest = stratifiedSamples(posfeats, N)

    trainfeats = negtrain + postrain
    testfeats = negtest + postest
    classifier = naive.train(trainfeats)
    if only_acc: return accuracy(classifier, testfeats)
    print 'accuracy: {}'.format(accuracy(classifier, testfeats))

    # Precision, Recall, F-measure
    from collections import defaultdict
    refsets = defaultdict(set)
    testsets = defaultdict(set)

    for i, (feats, label) in enumerate(testfeats):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        testsets[observed].add(i)
        
    print 'pos precision:', precision(refsets['pos'], testsets['pos'])
    print 'pos recall:', recall(refsets['pos'], testsets['pos'])
    print 'pos F-measure:', f_measure(refsets['pos'], testsets['pos'])
    print 'neg precision:', precision(refsets['neg'], testsets['neg'])
    print 'neg recall:', recall(refsets['neg'], testsets['neg'])
    print 'neg F-measure:', f_measure(refsets['neg'], testsets['neg'])
    stdout.flush()
    classifier.show_most_informative_features()
    return classifier
def calcAllClassesFMeasure(classSet, refsets, testsets):
    fSum = 0.0
    denominator = 0
    for category in classSet:
        num = f_measure(refsets[category], testsets[category])
        if num is None:
            continue
        fSum += num
        denominator += 1
    
    return fSum/denominator
def calcAllClassesFMeasure(classSet, refsets, testsets):
    fSum = 0.0
    denominator = 0
    for category in classSet:
        num = f_measure(refsets[category], testsets[category])
        if num is None:
            continue
        fSum += num
        denominator += 1

    return fSum / denominator
Beispiel #18
0
def evaluate(classifier, evalFeats, labels):
    #old eval without cross Validation
    try:
        print('accuracy: %f' % nltk.classify.util.accuracy(classifier, evalFeats))
    except ZeroDivisionError:
        print('accuracy: 0')

    refsets, testsets = ref_test_sets(classifier, evalFeats)
    for label in labels:
        ref = refsets[label]
        test = testsets[label]
        print('%s precision: %f' % (label, precision(ref, test) or 0))
        print('%s recall: %f' % (label, recall(ref, test) or 0))
        print('%s f-measure: %f' % (label, f_measure(ref, test) or 0))
Beispiel #19
0
    def word_similarity_dict(self, word):
        """
        Return a dictionary mapping from words to 'similarity scores,'
        indicating how often these two words occur in the same
        context.
        """
        word = self._key(word)
        word_contexts = set(self._word_to_contexts[word])

        scores = {}
        for w, w_contexts in self._word_to_contexts.items():
            scores[w] = f_measure(word_contexts, set(w_contexts))

        return scores
Beispiel #20
0
    def word_similarity_dict(self, word):
        """
        Return a dictionary mapping from words to 'similarity scores,'
        indicating how often these two words occur in the same
        context.
        """
        word = self._key(word)
        word_contexts = set(self._word_to_contexts[word])

        scores = {}
        for w, w_contexts in self._word_to_contexts.items():
            scores[w] = f_measure(word_contexts, set(w_contexts))

        return scores
    def evaluation(test_set, classifier):
        """Evaluate the classifier with the test set. Print the accuracy,
        precision, recall and f-measure."""
        refsets = collections.defaultdict(set)
        testsets = collections.defaultdict(set)

        for i, (feats, label) in enumerate(test_set):
            refsets[label].add(i)
            observed = classifier.classify(feats)
            testsets[observed].add(i)

        print('Accuracy:', accuracy(classifier, test_set))
        print('Precision:', precision(refsets['MALE'], testsets['MALE']))
        print('Recall:', recall(refsets['MALE'], testsets['MALE']))
        print('F Measure:', f_measure(refsets['MALE'], testsets['MALE']))
Beispiel #22
0
def assess_classifier(classifier, test_set, text):
    accuracy = nltk.classify.accuracy(classifier, test_set)
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)

    for i, (feats, label) in enumerate(test_set):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        testsets[observed].add(i)

    pos_pre = precision(refsets['positive'], testsets['positive'])
    pos_rec = recall(refsets['positive'], testsets['positive'])
    pos_fme = f_measure(refsets['positive'], testsets['positive'])
    neg_pre = precision(refsets['negative'], testsets['negative'])
    neg_rec = recall(refsets['negative'], testsets['negative'])
    neg_fme = f_measure(refsets['negative'], testsets['negative'])
    neu_pre = precision(refsets['neutral'], testsets['neutral'])
    neu_rec = recall(refsets['neutral'], testsets['neutral'])
    neu_fme = f_measure(refsets['negative'], testsets['neutral'])

    return [
        text, accuracy, pos_pre, pos_rec, pos_fme, neg_pre, neg_rec, neg_fme,
        neu_pre, neu_rec, neu_fme
    ]
Beispiel #23
0
    def evaluation(test_set, classifier):
        """Evaluate the classifier with the test set. Print the accuracy,
        precision, recall and f-measure."""

        refsets = collections.defaultdict(set)
        testsets = collections.defaultdict(set)

        for i, (featureset, label) in enumerate(test_set):
            refsets[label].add(i)
            observed = classifier.classify(featureset)
            testsets[observed].add(i)

        print('Evaluation Results')
        print("\t\t\t{:<20}{:0.2f}".format('classifier accuracy', accuracy(classifier, test_set)))

        print("\t\t\t{:<20}{:0.2f}".format('precision male', precision(refsets['male'], testsets['male'])))
        print("\t\t\t{:<20}{:0.2f}".format('precision female', precision(refsets['female'], testsets['female'])))

        print("\t\t\t{:<20}{:0.2f}".format('recall male', recall(refsets['male'], testsets['male'])))
        print("\t\t\t{:<20}{:0.2f}".format('recall female', recall(refsets['female'], testsets['female'])))

        print("\t\t\t{:<20}{:0.2f}".format('f_measure male', f_measure(refsets['male'], testsets['male'])))
        print("\t\t\t{:<20}{:0.2f}".format('f_measure female', f_measure(refsets['female'], testsets['female'])))
        print()
def measure(classifier, testfeats, alpha=0.5):
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)
    
    for i, (feats, label) in enumerate(testfeats):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        testsets[observed].add(i)
    
    precisions = {}
    recalls = {}
    f_measures = {}
    for label in classifier.labels():
        precisions[label] = metrics.precision(refsets[label], testsets[label])
        recalls[label] = metrics.recall(refsets[label], testsets[label])
        f_measures[label] = metrics.f_measure(refsets[label], testsets[label], alpha)
	
    return precisions, recalls, f_measures
Beispiel #25
0
def measure(classifier, testfeats, alpha=0.5):
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)
    
    for i, (feats, label) in enumerate(testfeats):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        testsets[observed].add(i)
    
    precisions = {}
    recalls = {}
    f_measures = {}
    for label in classifier.labels():
        precisions[label] = metrics.precision(refsets[label], testsets[label])
        recalls[label] = metrics.recall(refsets[label], testsets[label])
        f_measures[label] = metrics.f_measure(refsets[label], testsets[label], alpha)
	
    return precisions, recalls, f_measures
Beispiel #26
0
    def evaluate_precision_recall_f_measure(self):
        '''Evaluate precision, recall and f1 measure'''

        scores = dict(prec_pos=[], rec_pos=[], fmeas_pos=[], prec_neg=[], rec_neg=[], fmeas_neg=[])
        lfeats = self.label_feats_from_corpus()
        for i in range(1, 10):
            train_feats, test_feats, nb_classifier = self\
                .__get_elements_for_classification(lfeats, train_number=i, classifying=False)
            refsets = collections.defaultdict(set)
            testsets = collections.defaultdict(set)
            for i, (feats, label) in enumerate(test_feats):
                refsets[label].add(i)
                observed = nb_classifier.classify(feats)
                testsets[observed].add(i)
            precisions = {}
            recalls = {}
            f_measure = {}
            for label in nb_classifier.labels():
                precisions[label] = metrics.precision(
                    refsets[label], testsets[label]
                )
                recalls[label] = metrics.recall(refsets[label], testsets[label])
                f_measure[label] = metrics.f_measure(
                    refsets[label], testsets[label]
                )
            #print nb_classifier.show_most_informative_features(n=20)
            scores["prec_pos"].append(precisions["pos"])
            scores["prec_neg"].append(precisions["neg"])

            scores["rec_pos"].append(recalls["pos"])
            scores["rec_neg"].append(recalls["neg"])

            scores["fmeas_pos"].append(f_measure["pos"])
            scores["fmeas_neg"].append(f_measure["neg"])

        scores["prec_pos"] = sum(scores["prec_pos"]) / len(scores["prec_pos"])
        scores["prec_neg"] = sum(scores["prec_neg"]) / len(scores["prec_neg"])

        scores["rec_pos"] = sum(scores["rec_pos"]) / len(scores["rec_pos"])
        scores["rec_neg"] = sum(scores["rec_neg"]) / len(scores["rec_neg"])

        scores["fmeas_pos"] = sum(scores["fmeas_pos"]) / len(scores["fmeas_pos"])
        scores["fmeas_neg"] = sum(scores["fmeas_neg"]) / len(scores["fmeas_neg"])
        return scores
Beispiel #27
0
    def eval_classifier(self):
        '''
        Test the model and calculates the metrics of accuracy, precision,
        recall and f-measure
        '''
        test_set = apply_features(self.get_doc_features, self._test_docs, True)
        self._accuracy = accuracy(self._classifier, test_set)
        refsets = collections.defaultdict(set)
        testsets = collections.defaultdict(set)

        for i, (feats, label) in enumerate(test_set):
            refsets[label].add(i)
            observed = self._classifier.classify(feats)
            testsets[observed].add(i)
        self.count_categories(self._train_docs)
        for cat in self._categories:
            self._precision[cat] = precision(refsets[cat], testsets[cat])
            self._recall[cat] = recall(refsets[cat], testsets[cat])
            self._f_measure[cat] = f_measure(refsets[cat], testsets[cat])
Beispiel #28
0
    def measuring_accuracy(self):
        """Testing the model *accuracy*"""
        print(
            "Accuracy:", nltk.classify.accuracy(self.classifier, self.test_set)
        )
        self.classifier.show_most_informative_features(20)
        """Measuring **Precision,Recall,F-Measure** of a classifier.
             Finding **Confusion matrix**"""
        actual_set = collections.defaultdict(set)
        predicted_set = collections.defaultdict(set)
        # cm here refers to confusion matrix
        actual_set_cm = []
        predicted_set_cm = []
        for i, (feature, label) in enumerate(self.test_set):
            actual_set[label].add(i)
            actual_set_cm.append(label)
            predicted_label = self.classifier.classify(feature)
            predicted_set[predicted_label].add(i)
            predicted_set_cm.append(predicted_label)

        for category in self.data.keys():
            print(
                category,
                "precision :",
                precision(actual_set[category], predicted_set[category]),
            )
            print(
                category,
                "recall :",
                recall(actual_set[category], predicted_set[category]),
            )
            print(
                category,
                "f-measure :",
                f_measure(actual_set[category], predicted_set[category]),
            )
        confusion_matrix = ConfusionMatrix(actual_set_cm, predicted_set_cm)
        print("Confusion Matrix")
        print(confusion_matrix)
def precision_recall_f_measure(classifier, test_feats):
    refsets = defaultdict(set)
    testsets = defaultdict(set)

    for i, (feats, label) in enumerate(test_feats):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        testsets[observed].add(i)

    precisions = {}
    recalls = {}
    f_measures = {}

    for label in classifier.labels():
        precisions[label] = precision(refsets[label], testsets[label])
        recalls[label] = recall(refsets[label], testsets[label])
        f_measures[label] = f_measure(refsets[label], testsets[label])

    print("\nPrecision:")
    pprint(precisions, width=1)
    print("\nRecall")
    pprint(recalls, width=1)
    print("\nF Measure")
    pprint(f_measures, width=1)
Beispiel #30
0
#script to validate coding
import cPickle as pickle
import sys
from nltk.metrics import accuracy, ConfusionMatrix, precision, recall, f_measure
from collections import defaultdict
import classifier

if __name__=='__main__':
	validation_pickle=sys.argv[1]
	classifier_pickle=sys.argv[2]
	validation_set=pickle.load(open(validation_pickle, 'rb'))
	c=pickle.load(open(classifier_pickle, 'rb'))
	
	reference=defaultdict(set)
	observed=defaultdict(set)
	for i, (tweet, label) in enumerate(validation_set):
		reference[label].add(i)
		observation=c.classify(tweet)
		observed[observation].add(i)
	
	print "accuracy: %s" % accuracy(observed, reference)
	print "pos precision: %s" % precision(reference['positive'], observed['positive'])
	print "pos recall: %s" % recall(reference['positive'], observed['positive'])
	print "pos f-measure: %s" % f_measure(reference['positive'], observed['positive'])
	print "neg precision: %s" % precision(reference['negative'], observed['negative'])
	print "neg recall: %s" % recall(reference['negative'], observed['negative'])
	print "neg f-measure: %s" % f_measure(reference['negative'], observed['negative'])
	
Beispiel #31
0
#!/usr/bin/python
import nltk
from nltk.metrics import precision, recall, f_measure

reference = 'DET NN VB DET JJ NN NN IN DET NN'.split()
test    = 'DET VB VB DET NN NN NN IN DET NN'.split()
reference_set = set(reference)
test_set = set(test)

print "Precision: "
print precision(reference_set, test_set)

print "\n"

print "Recall: "
print recall(reference_set, test_set)

print "\n"

print "F_Measure: "
print f_measure(reference_set, test_set)

Beispiel #32
0
def cross_fold(instances,
               trainf,
               testf,
               folds=10,
               trace=1,
               metrics=True,
               informative=0):
    if folds < 2:
        raise ValueError('must have at least 3 folds')
    # ensure isn't an exhaustible iterable
    instances = list(instances)
    # randomize so get an even distribution, in case labeled instances are
    # ordered by label
    random.shuffle(instances)
    l = len(instances)
    step = l / folds

    if trace:
        print 'step %d over %d folds of %d instances' % (step, folds, l)

    accuracies = []
    precisions = collections.defaultdict(list)
    recalls = collections.defaultdict(list)
    f_measures = collections.defaultdict(list)

    for f in range(folds):
        if trace:
            print '\nfold %d' % (f + 1)
            print '-----%s' % ('-' * len('%s' % (f + 1)))

        start = f * step
        end = start + step
        train_instances = instances[:start] + instances[end:]
        test_instances = instances[start:end]

        if trace:
            print 'training on %d:%d + %d:%d' % (0, start, end, l)

        obj = trainf(train_instances)

        if trace:
            print 'testing on %d:%d' % (start, end)

        if metrics:
            refsets, testsets = ref_test_sets(obj, test_instances)

            for key in set(refsets.keys() + testsets.keys()):
                ref = refsets[key]
                test = testsets[key]
                p = precision(ref, test) or 0
                r = recall(ref, test) or 0
                f = f_measure(ref, test) or 0
                precisions[key].append(p)
                recalls[key].append(r)
                f_measures[key].append(f)

                if trace:
                    print '%s precision: %f' % (key, p)
                    print '%s recall: %f' % (key, r)
                    print '%s f-measure: %f' % (key, f)

        accuracy = testf(obj, test_instances)

        if trace:
            print 'accuracy: %f' % accuracy

        accuracies.append(accuracy)

        if trace and informative and hasattr(obj,
                                             'show_most_informative_features'):
            obj.show_most_informative_features(informative)

    if trace:
        print '\nmean and variance across folds'
        print '------------------------------'
        print 'accuracy mean: %f' % (sum(accuracies) / folds)
        print 'accuracy variance: %f' % array(accuracies).var()

        for key, ps in precisions.iteritems():
            print '%s precision mean: %f' % (key, sum(ps) / folds)
            print '%s precision variance: %f' % (key, array(ps).var())

        for key, rs in recalls.iteritems():
            print '%s recall mean: %f' % (key, sum(rs) / folds)
            print '%s recall variance: %f' % (key, array(rs).var())

        for key, fs in f_measures.iteritems():
            print '%s f_measure mean: %f' % (key, sum(fs) / folds)
            print '%s f_measure variance: %f' % (key, array(fs).var())

    return accuracies, precisions, recalls, f_measures
Beispiel #33
0
          str(nltk.classify.accuracy(classifier, cv_test)))
    foldAccuracies.append(str(nltk.classify.accuracy(classifier, cv_test)))
    # most informative feauures
    # now get fold stats such as precison, recall, f score
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)

    for i, (feats, label) in enumerate(cv_test):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        testsets[observed].add(i)
    foldPositivePrecisions.append(
        str(precision(refsets['spam'], testsets['spam'])))
    foldPositiveRecalls.append(str(recall(refsets['spam'], testsets['spam'])))
    foldPositiveFScores.append(
        str(f_measure(refsets['spam'], testsets['spam'])))
    foldNegativePrecisions.append(
        str(precision(refsets['ham'], testsets['ham'])))
    foldNegativeRecalls.append(str(recall(refsets['ham'], testsets['ham'])))
    foldNegativeFScores.append(str(f_measure(refsets['ham'], testsets['ham'])))

    print('Positive Precision:', precision(refsets['spam'], testsets['spam']))
    print('Positive Recall:', recall(refsets['spam'], testsets['spam']))
    print('Positive F1-Score:', f_measure(refsets['spam'], testsets['spam']))
    print('Negative Precision:', precision(refsets['ham'], testsets['ham']))
    print('Negative Recall:', recall(refsets['ham'], testsets['ham']))
    print('Negative F1-Score:', f_measure(refsets['ham'], testsets['ham']))
    classifier.show_most_informative_features(5)

total = 0
totalPrecPos = 0
def eval(test_alignments):
    f = open(test_alignments, "r")

    # initializing our "counters" used for the aggregate scores
    sentence_pairs = 0
    ibm1_precision_sum, ibm1_recall_sum, ibm1_aer_sum, ibm1_f1_sum = 0, 0, 0, 0
    ibm2_precision_sum, ibm2_recall_sum, ibm2_aer_sum, ibm2_f1_sum = 0, 0, 0, 0

    for line in f:
        sentence_pairs += 1

        strs = line.split("\t")

        print("-" * 47)
        print("Length of foreign sentence: ", len(strs[0].split()))
        print(strs[0])
        print(strs[1], "\n")

        ibm1_aligns = Alignment.fromstring(strs[2])
        ibm2_aligns = Alignment.fromstring(strs[3])
        hand_aligns = Alignment.fromstring(strs[4])
        '''
        Evaluate the sentence pair's precisiona and recall by utilizing the
        built in ntlk.metrics precision and recall functions. The functions 
        parameters are the following:
            1. Reference ("Gold Standard"): our hand alignments that follow the same format
            as the system produced alignments
            2. Test: the alignments produced by the model which will be put in
            comparison with the hand alignments 
        '''

        ibm1_precision, ibm1_recall, ibm1_aer, ibm1_f1 = precision(hand_aligns, ibm1_aligns), recall(hand_aligns, ibm1_aligns), \
                                                         alignment_error_rate(hand_aligns, ibm1_aligns), f_measure(hand_aligns, ibm1_aligns)

        ibm2_precision, ibm2_recall, ibm2_aer, ibm2_f1 = precision(hand_aligns, ibm2_aligns), recall(hand_aligns, ibm2_aligns), \
                                                         alignment_error_rate(hand_aligns, ibm2_aligns), f_measure(hand_aligns, ibm2_aligns)

        # Add it to our aggregate calculations
        ibm1_precision_sum += ibm1_precision
        ibm1_recall_sum += ibm1_recall
        ibm1_aer_sum += ibm1_aer
        ibm1_f1_sum += ibm1_f1

        ibm2_precision_sum += ibm2_precision
        ibm2_recall_sum += ibm2_recall
        ibm2_aer_sum += ibm2_aer
        ibm2_f1_sum += ibm2_f1

        print("IBM1 Precision: ", ibm1_precision, "\t", "IBM2 Precision: ",
              ibm2_precision)
        print("IBM1 Recall: ", ibm1_recall, "\t", "IBM2 Recall: ", ibm2_recall)
        print("IBM1 AER:", ibm1_aer, "\t", "IBM2 AER: ", ibm2_aer)
        print("IBM1 F1: ", ibm1_f1, "\t", "IBM2 F1: ", ibm2_f1)
        print("-" * 47, "\n")
    f.close()

    # Prints out the total statistics of the dataset
    print("-" * 23, "AVERAGE STATS", "-" * 23)
    print("Average IBM1 Precision: ", ibm1_precision_sum / sentence_pairs,
          "\t" * 2, "Average IBM2 Precision: ",
          ibm2_precision_sum / sentence_pairs)
    print("Average IBM1 Recall: ", ibm1_recall_sum / sentence_pairs, "\t" * 2,
          "Average IBM2 Recall: ", ibm2_recall_sum / sentence_pairs)
    print("Average IBM1 AER:", ibm1_aer_sum / sentence_pairs, "\t" * 2,
          "Average IBM2 AER: ", ibm2_aer_sum / sentence_pairs)
    print("Average IBM1 F1: ", ibm1_f1_sum / sentence_pairs, "\t" * 2,
          "Average IBM2 F1: ", ibm2_f1_sum / sentence_pairs)
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)

str = 'SINGLE FOLD RESULT ' + '(' + 'linear-svc' + ')'
#training with LinearSVC
classifier = SklearnClassifier(LinearSVC())
classifier.train(trainfeats)
for i, (feats, label) in enumerate(testfeats):
    refsets[label].add(i)
    observed = classifier.classify(feats)
    testsets[observed].add(i)
accuracy = nltk.classify.util.accuracy(classifier, testfeats) * 100
pos_precision = nltk.precision(refsets['pos'], testsets['pos'])

pos_recall = recall(refsets['pos'], testsets['pos'])
pos_fmeasure = f_measure(refsets['pos'], testsets['pos'])
neg_precision = precision(refsets['neg'], testsets['neg'])
neg_recall = recall(refsets['neg'], testsets['neg'])
neg_fmeasure = f_measure(refsets['neg'], testsets['neg'])
print('')
print('---------------------------------------')
print(str)
print('---------------------------------------')
print('accuracy: ', accuracy, '%')
print('precision', (pos_precision + neg_precision) / 2)
print('recall', (pos_recall + neg_recall) / 2)
print('f-measure', (pos_fmeasure + neg_fmeasure) / 2)

########################################################################################
########################################################################################
def evaluate_classifier(featx):

    negfeats = [(featx(f), 'neg') for f in word_split(negdata)]
    posfeats = [(featx(f), 'pos') for f in word_split(posdata)]

    negcutoff = int(len(negfeats) * 3 / 4)
    poscutoff = int(len(posfeats) * 3 / 4)

    trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
    testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]

    # using 3 classifiers
    classifier_list = ['nb', 'maxent', 'svm']

    for cl in classifier_list:
        if cl == 'maxent':
            classifierName = 'Maximum Entropy'
            classifier = MaxentClassifier.train(trainfeats,
                                                'GIS',
                                                trace=0,
                                                encoding=None,
                                                labels=None,
                                                gaussian_prior_sigma=0,
                                                max_iter=1)
        elif cl == 'svm':
            classifierName = 'SVM'
            classifier = SklearnClassifier(LinearSVC(), sparse=False)
            classifier.train(trainfeats)
        else:
            classifierName = 'Naive Bayes'
            classifier = NaiveBayesClassifier.train(trainfeats)

        refsets = collections.defaultdict(set)
        testsets = collections.defaultdict(set)

        for i, (feats, label) in enumerate(testfeats):
            refsets[label].add(i)
            observed = classifier.classify(feats)
            testsets[observed].add(i)

        accuracy = nltk.classify.util.accuracy(classifier, testfeats)
        pos_precision = precision(refsets['pos'], testsets['pos'])
        pos_recall = recall(refsets['pos'], testsets['pos'])
        pos_fmeasure = f_measure(refsets['pos'], testsets['pos'])
        neg_precision = precision(refsets['neg'], testsets['neg'])
        neg_recall = recall(refsets['neg'], testsets['neg'])
        neg_fmeasure = f_measure(refsets['neg'], testsets['neg'])

        print('')
        print('---------------------------------------')
        print('SINGLE FOLD RESULT ' + '(' + classifierName + ')')
        print('---------------------------------------')
        print('accuracy:', accuracy)
        print('precision', (pos_precision + neg_precision) / 2)
        print('recall', (pos_recall + neg_recall) / 2)
        print('f-measure', (pos_fmeasure + neg_fmeasure) / 2)

        #classifier.show_most_informative_features()

    print('')

    ## CROSS VALIDATION

    trainfeats = negfeats + posfeats

    # SHUFFLE TRAIN SET
    # As in cross validation, the test chunk might have only negative or only positive data
    random.shuffle(trainfeats)
    n = 5  # 5-fold cross-validation

    for cl in classifier_list:

        subset_size = int(len(trainfeats) / n)
        accuracy = []
        pos_precision = []
        pos_recall = []
        neg_precision = []
        neg_recall = []
        pos_fmeasure = []
        neg_fmeasure = []
        cv_count = 1
        for i in range(n):
            testing_this_round = trainfeats[i * subset_size:][:subset_size]
            training_this_round = trainfeats[:i * subset_size] + trainfeats[
                (i + 1) * subset_size:]

            if cl == 'maxent':
                classifierName = 'Maximum Entropy'
                classifier = MaxentClassifier.train(training_this_round,
                                                    'GIS',
                                                    trace=0,
                                                    encoding=None,
                                                    labels=None,
                                                    gaussian_prior_sigma=0,
                                                    max_iter=1)
            elif cl == 'svm':
                classifierName = 'SVM'
                classifier = SklearnClassifier(LinearSVC(), sparse=False)
                classifier.train(training_this_round)
            else:
                classifierName = 'Naive Bayes'
                classifier = NaiveBayesClassifier.train(training_this_round)

            refsets = collections.defaultdict(set)
            testsets = collections.defaultdict(set)
            for i, (feats, label) in enumerate(testing_this_round):
                refsets[label].add(i)
                observed = classifier.classify(feats)
                testsets[observed].add(i)

            cv_accuracy = nltk.classify.util.accuracy(classifier,
                                                      testing_this_round)
            cv_pos_precision = precision(refsets['pos'], testsets['pos'])
            cv_pos_recall = recall(refsets['pos'], testsets['pos'])
            cv_pos_fmeasure = f_measure(refsets['pos'], testsets['pos'])
            cv_neg_precision = precision(refsets['neg'], testsets['neg'])
            cv_neg_recall = recall(refsets['neg'], testsets['neg'])
            cv_neg_fmeasure = f_measure(refsets['neg'], testsets['neg'])

            accuracy.append(cv_accuracy)
            pos_precision.append(cv_pos_precision)
            pos_recall.append(cv_pos_recall)
            neg_precision.append(cv_neg_precision)
            neg_recall.append(cv_neg_recall)
            pos_fmeasure.append(cv_pos_fmeasure)
            neg_fmeasure.append(cv_neg_fmeasure)

            cv_count += 1

        print('---------------------------------------')
        print('N-FOLD CROSS VALIDATION RESULT ' + '(' + classifierName + ')')
        print('---------------------------------------')
        print('accuracy:', sum(accuracy) / n)
        print('precision',
              (sum(pos_precision) / n + sum(neg_precision) / n) / 2)
        print('recall', (sum(pos_recall) / n + sum(neg_recall) / n) / 2)
        print('f-measure', (sum(pos_fmeasure) / n + sum(neg_fmeasure) / n) / 2)
        print('')
Beispiel #37
0
#Precison and recall calculation
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)

for i, (feats, label) in enumerate(testfeats):
    refsets[label].add(i)
    observed = classifier.classify(feats)
    testsets[observed].add(i)

#35% false positives for the pos label.
print 'Positive precision:', precision(refsets['pos'], testsets['pos'])
#98% recall, so very few false negatives
print 'Positive recall:', recall(refsets['pos'], testsets['pos'])

print 'Positive F-measure:', f_measure(refsets['pos'], testsets['pos'])
print 'Negative precision:', precision(refsets['neg'], testsets['neg'])
print 'Negative recall:', recall(refsets['neg'], testsets['neg'])
print 'Negative F-measure:', f_measure(refsets['neg'], testsets['neg'])

#Accuracy
print '\nAccuracy:', nltk.classify.util.accuracy(classifier, testfeats)
classifier.show_most_informative_features()

#print classifier.classify(word_feats('bad'))


#Remove stopwords and present the paragraph words in order to make sense
def summary(words):
    return collections.OrderedDict([(word, True) for word in words
                                    if word not in stopwordset])
Beispiel #38
0
 def f_measure(self):
     return f_measure(self._reference, self._test)
    print 'Dictionary : ', dictionary.get_name(), '\n'
    print ConfusionMatrix(gold_standard,results).pp()
    print 'Accuracy: ', accuracy(gold_standard,results)
    for c in [0,1,-1]:
        print 'Metrics for class ', c
        gold = set()
        test = set()
        for i,x in enumerate(gold_standard):
            if x == c:
                gold.add(i)
        for i,x in enumerate(results):
            if x == c:
                test.add(i)
        print 'Precision: ', precision(gold, test)
        print 'Recall   : ', recall(gold, test)
        print 'F_measure: ', f_measure(gold, test)
    print '\n\n'


#################### Sentences classification ##########################

# Not reported in the paper because LIWC doesn't have neutral class

positive_sents = [reli.words_sentence_pos(s) for s in reli.sents(polarity='positive')]
negative_sents = [reli.words_sentence_pos(s) for s in reli.sents(polarity='negative')]
neutral_sents = [reli.words_sentence_pos(s) for s in reli.sents(polarity='neutral')]


print '#########################################################################'
print '###################### Sentences classification #########################'
print '#########################################################################'
                classifier, test_feats)
        else:
            refsets, testsets = scoring.ref_test_sets(classifier, test_feats)

        for label in labels:
            ref = refsets[label]
            test = testsets[label]

            if not args.no_precision:
                print('%s precision: %f' % (label, precision(ref, test) or 0))

            if not args.no_recall:
                print('%s recall: %f' % (label, recall(ref, test) or 0))

            if not args.no_fmeasure:
                print('%s f-measure: %f' % (label, f_measure(ref, test) or 0))

if args.show_most_informative and hasattr(
        classifier, 'show_most_informative_features') and not (
            args.multi and args.binary) and not args.cross_fold:
    print('%d most informative features' % args.show_most_informative)
    classifier.show_most_informative_features(args.show_most_informative)

##############
## pickling ##
##############
if not args.no_pickle:
    if args.filename:
        fname = os.path.expanduser(args.filename)
    else:
        name = '%s_%s.pickle' % (args.corpus, '_'.join(args.classifier))
			refsets, testsets = scoring.multi_ref_test_sets(classifier, test_feats)
		else:
			refsets, testsets = scoring.ref_test_sets(classifier, test_feats)
		
		for label in labels:
			ref = refsets[label]
			test = testsets[label]
			
			if not args.no_precision:
				print '%s precision: %f' % (label, precision(ref, test) or 0)
			
			if not args.no_recall:
				print '%s recall: %f' % (label, recall(ref, test) or 0)
			
			if not args.no_fmeasure:
				print '%s f-measure: %f' % (label, f_measure(ref, test) or 0)

if args.show_most_informative and args.algorithm != 'DecisionTree' and not (args.multi and args.binary):
	print '%d most informative features' % args.show_most_informative
	classifier.show_most_informative_features(args.show_most_informative)

##############
## pickling ##
##############

if not args.no_pickle:
	if args.filename:
		fname = os.path.expanduser(args.filename)
	else:
		name = '%s_%s.pickle' % (args.corpus, args.algorithm)
		fname = os.path.join(os.path.expanduser('~/nltk_data/classifiers'), name)
    trainfeats = negfeats[:4000] + posfeats[:4000]
    testfeats = negfeats[4000:] + posfeats[4000:]
    print("train on %d instances, test on %d instances" % (len(trainfeats), len(testfeats)))
    classifier = NaiveBayesClassifier.train(trainfeats)
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)
    for i, (feats, label) in enumerate(testfeats):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        testsets[observed].add(i)

    # cross validation  3-fold
    feats = negfeats + posfeats
    M = math.floor(len(feats) / 3)
    result = []
    for n in range(3):
        val_set = feats[n * M :][:M]
        train_set = feats[(n + 1) * M :] + feats[: n * M]
        classifier = nltk.NaiveBayesClassifier.train(train_set)
        result.append("{:.4f}".format(round(nltk.classify.accuracy(classifier, val_set) * 100, 4)))

    print("cross_validation:", result)

    print("pos precision:", precision(refsets["pos"], testsets["pos"]))
    print("pos recall:", recall(refsets["pos"], testsets["pos"]))
    print("pos F-measure:", f_measure(refsets["pos"], testsets["pos"]))
    print("neg precision:", precision(refsets["neg"], testsets["neg"]))
    print("neg recall:", recall(refsets["neg"], testsets["neg"]))
    print("neg F-measure:", f_measure(refsets["neg"], testsets["neg"]))
    classifier.show_most_informative_features()
Beispiel #43
0
def cross_fold(instances, trainf, testf, folds=10, trace=1, metrics=True, informative=0):
	if folds < 2:
		raise ValueError('must have at least 3 folds')
	# ensure isn't an exhaustible iterable
	instances = list(instances)
	# randomize so get an even distribution, in case labeled instances are
	# ordered by label
	random.shuffle(instances)
	l = len(instances)
	step = l / folds
	
	if trace:
		print('step %d over %d folds of %d instances' % (step, folds, l))
	
	accuracies = []
	precisions = collections.defaultdict(list)
	recalls = collections.defaultdict(list)
	f_measures = collections.defaultdict(list)
	
	for f in range(folds):
		if trace:
			print('\nfold %d' % (f+1))
			print('-----%s' % ('-'*len('%s' % (f+1))))
		
		start = f * step
		end = start + step
		train_instances = instances[:start] + instances[end:]
		test_instances = instances[start:end]
		
		if trace:
			print('training on %d:%d + %d:%d' % (0, start, end, l))
		
		obj = trainf(train_instances)
		
		if trace:
			print('testing on %d:%d' % (start, end))
		
		if metrics:
			refsets, testsets = ref_test_sets(obj, test_instances)
			
			for key in set(refsets.keys() + testsets.keys()):
				ref = refsets[key]
				test = testsets[key]
				p = precision(ref, test) or 0
				r = recall(ref, test) or 0
				f = f_measure(ref, test) or 0
				precisions[key].append(p)
				recalls[key].append(r)
				f_measures[key].append(f)
				
				if trace:
					print('%s precision: %f' % (key, p))
					print('%s recall: %f' % (key, r))
					print('%s f-measure: %f' % (key, f))
		
		accuracy = testf(obj, test_instances)
		
		if trace:
			print('accuracy: %f' % accuracy)
		
		accuracies.append(accuracy)
		
		if trace and informative and hasattr(obj, 'show_most_informative_features'):
			obj.show_most_informative_features(informative)
	
	if trace:
		print('\nmean and variance across folds')
		print('------------------------------')
		print('accuracy mean: %f' % (sum(accuracies) / folds))
		print('accuracy variance: %f' % array(accuracies).var())
		
		for key, ps in iteritems(precisions):
			print('%s precision mean: %f' % (key, sum(ps) / folds))
			print('%s precision variance: %f' % (key, array(ps).var()))
		
		for key, rs in iteritems(recalls):
			print('%s recall mean: %f' % (key, sum(rs) / folds))
			print('%s recall variance: %f' % (key, array(rs).var()))
		
		for key, fs in iteritems(f_measures):
			print('%s f_measure mean: %f' % (key, sum(fs) / folds))
			print('%s f_measure variance: %f' % (key, array(fs).var()))
	
	return accuracies, precisions, recalls, f_measures
def avaliate_classifiers(featureSet):
	print("Vamos treinar o classificador agora!")
	print("\n")
	#random.shuffle(featureSet)

	#Vai fazer o calculo de recall e precision
	# You need to build 2 sets for each classification label:
	# a reference set of correct values, and a test set of observed values.

	#Os primeiros 6686 + 500(dia 14) tweets sao positivos e resto(6757 + 500(dia 14)) negativo
	positive_tweets = featureSet[:7185]

	#Misturando as paradas pra nao ficar testando só os mesmos últimos
	random.shuffle(positive_tweets)

	#print(featureSet[7185])
	#Pra pegar 7185 do pos e 7185 do negativo mas o negativo tem 7213
	negative_tweets = featureSet[7185:14372]
	random.shuffle(negative_tweets)

	#Agora vou dividir cada classe em um conjunto de referencia e outro de teste
	pos_cutoff = len(positive_tweets)*3/4
	neg_cutoff = len(negative_tweets)*3/4

	# 75% dos tweets vao pra ser de referencia(treinamento) e o resto pra teste
	pos_references = positive_tweets[:pos_cutoff]
	pos_tests = positive_tweets[pos_cutoff:]

	neg_references = negative_tweets[:neg_cutoff]
	neg_tests = negative_tweets[neg_cutoff:]

	#COnjunto de treinamento e de testes pra calcular a accuracy
	training_set = pos_references + neg_references
	testing_set = pos_tests + neg_tests

	start_time = time.time()

	global classifier
	print("Comecou a treina-lo agora!")

	#training_set2 = [(t,l) for (t,l,twe) in training_set]

	classifier = nltk.NaiveBayesClassifier.train(training_set)
	#testing_set2 = [(t,l) for (t,l,twe) in testing_set]
	print("Naive Bayes Algo accuracy:", (nltk.classify.accuracy(classifier, testing_set)) * 100)
	classifier.show_most_informative_features(30)

	refsets = collections.defaultdict(set)
	testsets = collections.defaultdict(set)

	# for i, (feats, label, l) in enumerate(testing_set):
	#     refsets[label].add(i)
	#     observed = classifier.classify(feats)
	#     testsets[observed].add(i)
	#     print("--"*200)
	#     print()
	#     print("Classified as: ",observed)
	#     print()
	#     print(l)
	#     print()
	#     print("--"*200)
	#     raw_input("Press any key to continue:")
	 
	print 'pos precision:', precision(refsets['pos'], testsets['pos'])
	print 'pos recall:', recall(refsets['pos'], testsets['pos'])
	print 'pos F-measure:', f_measure(refsets['pos'], testsets['pos'])
	print 'neg precision:', precision(refsets['neg'], testsets['neg'])
	print 'neg recall:', recall(refsets['neg'], testsets['neg'])
	print 'neg F-measure:', f_measure(refsets['neg'], testsets['neg'])


	print("--- Classifier executed in %s seconds ---" % (time.time() - start_time))
def avaliate_new_classifier(featureSet):
	print("Vamos treinar o classificador agora!")
	print("\n")
	#random.shuffle(featureSet)

	#Cada um tem 197
	positive_tweets = featureSet[:196]

	#Misturando as paradas pra nao ficar testando só os mesmos últimos
	random.shuffle(positive_tweets)

	#print(featureSet[7185])
	#Pra pegar 7185 do pos e 7185 do negativo mas o negativo tem 7213
	negative_tweets = featureSet[196:293]
	random.shuffle(negative_tweets)

	neutral_tweets = featureSet[293:]
	random.shuffle(neutral_tweets)

	#Agora vou dividir cada classe em um conjunto de referencia e outro de teste
	pos_cutoff = len(positive_tweets)*3/4
	neg_cutoff = len(negative_tweets)*3/4
	neu_cutoff = len(neutral_tweets)*3/4

	# 75% dos tweets vao pra ser de referencia(treinamento) e o resto pra teste
	pos_references = positive_tweets[:pos_cutoff]
	pos_tests = positive_tweets[pos_cutoff:]

	neg_references = negative_tweets[:neg_cutoff]
	neg_tests = negative_tweets[neg_cutoff:]

	neu_references = neutral_tweets[:neu_cutoff]
	neu_tests = neutral_tweets[neu_cutoff:]

	#COnjunto de treinamento e de testes pra calcular a accuracy
	training_set = pos_references + neg_references + neu_references
	testing_set = pos_tests + neg_tests + neu_tests

	start_time = time.time()

	global classifier
	print("Comecou a treina-lo agora!")

	#training_set2 = [(t,l) for (t,l,twe) in training_set]

	classifier = nltk.NaiveBayesClassifier.train(training_set)
	#testing_set2 = [(t,l) for (t,l,twe) in testing_set]
	print("Naive Bayes Algo accuracy:", (nltk.classify.accuracy(classifier, testing_set)) * 100)
	classifier.show_most_informative_features(30)

	refsets = collections.defaultdict(set)
	testsets = collections.defaultdict(set)

	for i, (feats, label) in enumerate(testing_set):
	    refsets[label].add(i)
	    observed = classifier.classify(feats)
	    testsets[observed].add(i)
	 
	print 'pos precision:', precision(refsets['pos'], testsets['pos'])
	print 'pos recall:', recall(refsets['pos'], testsets['pos'])
	print 'pos F-measure:', f_measure(refsets['pos'], testsets['pos'])

	print 'neg precision:', precision(refsets['neg'], testsets['neg'])
	print 'neg recall:', recall(refsets['neg'], testsets['neg'])
	print 'neg F-measure:', f_measure(refsets['neg'], testsets['neg'])

	print 'neutral precision:', precision(refsets['neutral'], testsets['neutral'])
	print 'neutral recall:', recall(refsets['neutral'], testsets['neutral'])
	print 'neutral F-measure:', f_measure(refsets['neutral'], testsets['neutral'])


	print("--- Classifier executed in %s seconds ---" % (time.time() - start_time))
        stop = int(len(texts) * args.fraction)

        for t in texts[:stop]:
            feat = bag_of_words(norm_words(t))
            feats.append(feat)
            test_feats.append((feat, label))

    print "accuracy:", accuracy(classifier, test_feats)
    refsets, testsets = scoring.ref_test_sets(classifier, test_feats)

    for label in labels:
        ref = refsets[label]
        test = testsets[label]
        print "%s precision: %f" % (label, precision(ref, test) or 0)
        print "%s recall: %f" % (label, recall(ref, test) or 0)
        print "%s f-measure: %f" % (label, f_measure(ref, test) or 0)
else:
    if args.instances == "sents":
        texts = categorized_corpus.sents()
        total = len(texts)
    elif args.instances == "paras":
        texts = (itertools.chain(*para) for para in categorized_corpus.paras())
        total = len(categorized_corpus.paras)
    elif args.instances == "files":
        texts = (categorized_corpus.words(fileids=[fid]) for fid in categorized_corpus.fileids())
        total = len(categorized_corpus.fileids())

    stop = int(total * args.fraction)
    feats = (bag_of_words(norm_words(i)) for i in itertools.islice(texts, stop))

label_counts = collections.defaultdict(int)
Beispiel #47
-1
	def validate(self, validation_set):
		if self.classifier is None:
			raise Exception("self.classifier is None")
		reference=defaultdict(set)
		observed=defaultdict(set)
		observed['neutral']=set()

		for i, (tweet, label) in enumerate(validation_set):
			reference[label].add(i)
			observation=self.classify(tweet)
			observed[observation].add(i)
		acc=classify.accuracy(self.classifier, observed)
		posp=precision(reference['positive'],observed['positive'])
		posr=recall(reference['positive'], observed['positive'])
		posf=f_measure(reference['positive'], observed['positive'])
		negp=precision(reference['negative'],observed['negative'])
		negr=recall(reference['negative'], observed['negative'])
		negf=f_measure(reference['negative'], observed['negative'])
		
		print "accuracy: %s" % acc
		print "pos precision: %s" % posp
		print "pos recall: %s" % posr
		print "pos f-measure: %s" % posf
		print "neg precision: %s" % negp
		print "neg recall: %s" % negr
		print "neg f-measure: %s" % negf
		return (acc, posp, posr, posf, negp, negr, negf)