def print_precision_recall(classifier, test_dict): refsets = defaultdict(set) testsets = defaultdict(set) for i, (feats, label) in enumerate(test_dict): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) print 'pos precision:', precision(refsets['positive'], testsets['positive']) print 'pos recall:', recall(refsets['positive'], testsets['positive']) print 'pos F-measure:', f_measure(refsets['positive'], testsets['positive']) print 'neg precision:', precision(refsets['negative'], testsets['negative']) print 'neg recall:', recall(refsets['negative'], testsets['negative']) print 'neg F-measure:', f_measure(refsets['negative'], testsets['negative'])
def evaluate_classifier(featx): negids = movie_reviews.fileids('neg') posids = movie_reviews.fileids('pos') # print(movie_reviews.words(fileids=[negids[0]])) # exit() negfeats = [(featx(movie_reviews.words(fileids=[f])), 'neg') for f in negids] posfeats = [(featx(movie_reviews.words(fileids=[f])), 'pos') for f in posids] negcutoff = int(len(negfeats) * 3 / 4) poscutoff = int(len(posfeats) * 3 / 4) trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff] testfeats = negfeats[negcutoff:] + posfeats[poscutoff:] print('train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))) classifier = NaiveBayesClassifier.train(trainfeats) refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(testfeats): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) print('accuracy:', nltk.classify.util.accuracy(classifier, testfeats)) print('pos precision:', precision(refsets['pos'], testsets['pos'])) print('pos recall:', recall(refsets['pos'], testsets['pos'])) print('pos F-measure:', f_measure(refsets['pos'], testsets['pos'])) print('neg precision:', precision(refsets['neg'], testsets['neg'])) print('neg recall:', recall(refsets['neg'], testsets['neg'])) print('neg F-measure:', f_measure(refsets['neg'], testsets['neg'])) classifier.show_most_informative_features()
def findFMetric(self, classifier): refsets, self.testSets = self.findsets(classifier) return f_measure(refsets['bullish'], self.testSets['bullish']), f_measure( refsets['bearish'], self.testSets['bearish']), f_measure( refsets['neutral'], self.testSets['neutral'])
def main(): negfeats = [] posfeats = [] for i, f in enumerate(reviews[0]): print(f) if reviews[1][i] == 0: negfeats.append((word_feats(f.split()), "neg")) else: posfeats.append((word_feats(f.split()), "pos")) testNegfeats = [] testPosfeats = [] for i, f in enumerate(test[0]): if test[1][i] == 0: testNegfeats.append((word_feats(f.split()), "neg")) else: testPosfeats.append((word_feats(f.split()), "pos")) trainfeats = negfeats + posfeats testfeats = testNegfeats + testPosfeats print('train on %d instances, test on %d instances - Maximum Entropy' % (len(trainfeats), len(testfeats))) classifier = MaxentClassifier.train(trainfeats, 'GIS', trace=0, encoding=None, labels=None, gaussian_prior_sigma=0, max_iter=1) refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(testfeats): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) accuracy = nltk.classify.util.accuracy(classifier, testfeats) pos_precision = precision(refsets['pos'], testsets['pos']) pos_recall = recall(refsets['pos'], testsets['pos']) pos_fmeasure = f_measure(refsets['pos'], testsets['pos']) neg_precision = precision(refsets['neg'], testsets['neg']) neg_recall = recall(refsets['neg'], testsets['neg']) neg_fmeasure = f_measure(refsets['neg'], testsets['neg']) print(pos_recall) print(neg_recall) print() print('') print('---------------------------------------') print(' Maximum Entropy ') print('---------------------------------------') print('accuracy:', accuracy) print('precision', (pos_precision + neg_precision) / 2) print('recall', (pos_recall + neg_recall) / 2) print('f-measure', (pos_fmeasure + neg_fmeasure) / 2)
def assess_classifier(classifier, test_data, text, max_ent_help=False): refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) numDataSets = len(test_data) onDataSet = 0 # TN = 0 # TP = 0 # FN = 0 # FP = 0 # enumerate through the test data and classify them for i, (feats, label) in enumerate(test_data): refsets[label].add(i) if max_ent_help: if maxEntClf.classify(feats): observed = classifier.classify(feats) else: observed = False else: observed = classifier.classify(feats) testsets[observed].add(i) onDataSet += 1 # if label == observed: # if observed: # TP += 1 # else: # TN += 1 # else: # if observed: # FP += 1 # else: # FN += 1 # printPercentage(onDataSet/numDataSets * 100, "Extracting Features: ") # precision = TP/(TP+FP) # recall = TP/(TP+FN) # f1Score = 2*((precision*recall)/(precision + recall)) # calculate the precisionl, recall, f-measure laugh_precision = precision(refsets[True], testsets[True]) laugh_recall = recall(refsets[True], testsets[True]) laugh_f1 = f_measure(refsets[True], testsets[True]) non_laugh_precision = precision(refsets[False], testsets[False]) non_laugh_recall = recall(refsets[False], testsets[False]) non_laugh_f1 = f_measure(refsets[False], testsets[False]) acc = nltk.classify.accuracy(classifier, test_data) return [text, acc, laugh_precision, laugh_recall, laugh_f1, non_laugh_precision, non_laugh_recall, non_laugh_f1]
def main(): results = {'Topic': [], 'Precision': [], 'Recall': [], 'F-measure': []} print('\nPreparing data...') (train_set, test_set) = get_train_test_sets('data/content') print('\nNB classifier training...') classifier = NaiveBayesClassifier.train(train_set) print('NB classifier is trained with {}% accuracy'.format( round(accuracy(classifier, test_set) * 100, 1))) refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(test_set): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) for topic in topics: results['Topic'].append(topic) results['Precision'].append( round(precision(refsets[topic], testsets[topic]) * 100, 1)) results['Recall'].append( round(recall(refsets[topic], testsets[topic]) * 100, 1)) results['F-measure'].append( round(f_measure(refsets[topic], testsets[topic]) * 100, 1)) del classifier, train_set, test_set, refsets, testsets gc.collect() print(results)
def benchmarking(self, classifier,_test_set,all_f_measure=[],all_precision=[],all_recall=[]): from nltk import classify accuracy = classify.accuracy(classifier, _test_set) print("accuracy:",accuracy) from nltk.metrics import precision from nltk.metrics import recall from nltk.metrics import f_measure import collections refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(_test_set): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) prec=precision(refsets['class'], testsets['class']) rec=recall(refsets['class'], testsets['class']) f1=f_measure(refsets['class'], testsets['class']) print('precision:', prec) print('recall:', rec) print('F-measure:', f1) all_f_measure.append(f1) all_precision.append(prec) all_recall.append(rec) print('========Show top 10 most informative features========') classifier.show_most_informative_features(10)
def benchmarking(self, classifier, _test_set, all_f_measure=[], all_precision=[], all_recall=[]): from nltk import classify accuracy = classify.accuracy(classifier, _test_set) print("accuracy:", accuracy) from nltk.metrics import precision from nltk.metrics import recall from nltk.metrics import f_measure import collections refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(_test_set): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) prec = precision(refsets['class'], testsets['class']) rec = recall(refsets['class'], testsets['class']) f1 = f_measure(refsets['class'], testsets['class']) print('precision:', prec) print('recall:', rec) print('F-measure:', f1) all_f_measure.append(f1) all_precision.append(prec) all_recall.append(rec) print('========Show top 10 most informative features========') classifier.show_most_informative_features(10)
def train_and_score(classifier, train, test): try: if classifier.__name__ == 'MaxentClassifier': clf = classifier.train(train, algorithm='MEGAM') elif classifier.__name__ == 'DecisionTreeClassifier': clf = classifier.train(train, binary=True, entropy_cutoff=0.8, depth_cutoff=5, support_cutoff=3) else: clf = classifier.train(train) except AttributeError: clf = classifier.train(train) refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(test): refsets[label].add(i) observed = clf.classify(feats) testsets[observed].add(i) measures = [] for key in refsets.keys(): measures.append([ precision(refsets[key], testsets[key]), recall(refsets[key], testsets[key]), f_measure(refsets[key], testsets[key]) ]) return measures
def evaluate_classifier(featx): stanford_pos_list = list() stanford_neg_list = list() with open(stanford_pos, 'r') as stanford_p: for line in stanford_p: stanford_pos_list.append(line.strip()) with open(stanford_neg, 'r') as stanford_n: for line in stanford_n: stanford_neg_list.append(line.strip()) stanford_p = stanford_pos_list[:5000] stanford_n = stanford_neg_list[:5000] negfeats = [(featx(nltk.word_tokenize(line)), 'neg') for line in stanford_n] posfeats = [(featx(nltk.word_tokenize(line)), 'pos') for line in stanford_p] negcutoff = int(len(negfeats) * 3 / 4) poscutoff = int(len(posfeats) * 3 / 4) trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff] testfeats = negfeats[negcutoff:] + posfeats[poscutoff:] print('train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))) classifier = NaiveBayesClassifier.train(trainfeats) refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(testfeats): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) print('accuracy:', nltk.classify.util.accuracy(classifier, testfeats)) print('pos precision:', precision(refsets['pos'], testsets['pos'])) print('pos recall:', recall(refsets['pos'], testsets['pos'])) print('pos F-measure:', f_measure(refsets['pos'], testsets['pos'])) print('neg precision:', precision(refsets['neg'], testsets['neg'])) print('neg recall:', recall(refsets['neg'], testsets['neg'])) print('neg F-measure:', f_measure(refsets['neg'], testsets['neg'])) classifier.show_most_informative_features()
def results(classifier, testing_set, training_set): now = datetime.now() # Trains classifier classifier = classifier.train(training_set) refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) tp = 0 fp = 0 tn = 0 fn = 0 # Gets positive/false positives/negatives for i, (features, label) in enumerate(testing_set): refsets[label].add(i) observed = classifier.classify(features) testsets[observed].add(i) if label == 'exp' and observed == 'exp': tp += 1 elif label == 'non' and observed == 'non': tn += 1 elif label == 'exp' and observed == 'non': fn += 1 else: fp += 1 print "Time training: " + str(datetime.now() - now) print "True Positives: " + str(tp) print "False Positives: " + str(fp) print "True Negatives: " + str(tn) print "False Negatives: " + str(fn) print 'Explicit Precision: ', precision(refsets['exp'], testsets['exp']) print 'Explicit recall: ', recall(refsets['exp'], testsets['exp']) print 'Explicit F-Score: ', f_measure(refsets['exp'], testsets['exp']) print 'Non-Explicit Precision: ', precision(refsets['non'], testsets['non']) print 'Non-Explicit Recall: ', recall(refsets['non'], testsets['non']) print 'Non-Explicit F-Score: ', f_measure(refsets['non'], testsets['non']) print "Accuracy percent: ", (nltk.classify.accuracy( classifier, testing_set)) * 100 return classifier
def eval_stats(results): ''' Compute recall, precision, and f-measure from passed results. The expected format for results is a dictionary whose keys=<name of article> and values=tuple (<test category>, <reference category>, <scores>), where: test=category suggested by classifier, reference=pre-classified gold category, scores=can be None or dictionary whose keys=category names and values=matching score for this article. ''' # Calculate number of correct matches correct = 0 missed = defaultdict(tuple) for article_name, (suggested, real, scores) in results.iteritems(): if suggested==real: correct += 1 else: missed[article_name] = (suggested, real) success_ratio = correct / float(len(results)) print "Ratio: %0.3f" % success_ratio # Print wrong matches for name, (suggested, real) in missed.iteritems(): print "%s\t%s\t%s" % (name, suggested, real) # Create sets of references / test classification for evaluation cat_ref = defaultdict(set) cat_test= defaultdict(set) for name, (test_category, ref_category, scores) in results.iteritems(): cat_ref[ref_category].add(name) # gold-tagged categories cat_test[test_category].add(name) # suggested categories # Precision, recall, f-measure, support (num of reference articles in # each category) for each category print "\nCategory\tPrecision\tRecall\tF-measure\tSupport" measures = defaultdict(tuple) for category in cat_ref.keys(): cat_prec = metrics.precision(cat_ref[category], cat_test[category]) cat_rec = metrics.recall(cat_ref[category], cat_test[category]) cat_f = metrics.f_measure(cat_ref[category], cat_test[category]) cat_support = len(cat_ref[category]) measures[category] = (cat_prec, cat_rec, cat_f, cat_support) print "%s\t%0.3f\t%0.3f\t%0.3f\t%d" % \ (category, cat_prec, cat_rec, cat_f, cat_support) # Calculate precision, recall, f-measure for entire corpus: # This is a weighted average of the values of separate categories # SUM(product of all precisions, product of all supports)/sum(total number of supports) avg_prec = weighted_average([(cat_measure[0], cat_measure[3]) for \ cat_measure in measures.values()]) avg_rec = weighted_average([(cat_measure[1], cat_measure[3]) for \ cat_measure in measures.values()]) avg_f = weighted_average([(cat_measure[2], cat_measure[3]) for \ cat_measure in measures.values()]) total_support = sum([cat_support[3] for cat_support in measures.values()]) print "%s\t%0.3f\t%0.3f\t%0.3f\t%d" % ("Total", avg_prec, avg_rec, avg_f, total_support)
def evaluate_precision_recall_fmeasure(corpus, category, tagger): # get a list of the gold standard tags, and the tags set by the tagger. gold = set(tag_list(corpus.tagged_sents(categories=category))) test = set( tag_list(apply_tagger(tagger, corpus.tagged_sents(categories=category)))) # return the precision and recall of the evaluated model. return [precision(gold, test), recall(gold, test), f_measure(gold, test)]
def evaluate_features(feature_extractor, N, only_acc=False): from nltk.corpus import movie_reviews from nltk.classify import NaiveBayesClassifier as naive from nltk.classify.util import accuracy from nltk.metrics import precision, recall, f_measure from sys import stdout negative = movie_reviews.fileids('neg') positive = movie_reviews.fileids('pos') negfeats = [(feature_extractor(movie_reviews.sents(fileids=[f])), 'neg') for f in negative] posfeats = [(feature_extractor(movie_reviews.sents(fileids=[f])), 'pos') for f in positive] negtrain, negtest = stratifiedSamples(negfeats, N) postrain, postest = stratifiedSamples(posfeats, N) trainfeats = negtrain + postrain testfeats = negtest + postest classifier = naive.train(trainfeats) if only_acc: return accuracy(classifier, testfeats) print 'accuracy: {}'.format(accuracy(classifier, testfeats)) # Precision, Recall, F-measure from collections import defaultdict refsets = defaultdict(set) testsets = defaultdict(set) for i, (feats, label) in enumerate(testfeats): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) print 'pos precision:', precision(refsets['pos'], testsets['pos']) print 'pos recall:', recall(refsets['pos'], testsets['pos']) print 'pos F-measure:', f_measure(refsets['pos'], testsets['pos']) print 'neg precision:', precision(refsets['neg'], testsets['neg']) print 'neg recall:', recall(refsets['neg'], testsets['neg']) print 'neg F-measure:', f_measure(refsets['neg'], testsets['neg']) stdout.flush() classifier.show_most_informative_features() return classifier
def calcAllClassesFMeasure(classSet, refsets, testsets): fSum = 0.0 denominator = 0 for category in classSet: num = f_measure(refsets[category], testsets[category]) if num is None: continue fSum += num denominator += 1 return fSum/denominator
def calcAllClassesFMeasure(classSet, refsets, testsets): fSum = 0.0 denominator = 0 for category in classSet: num = f_measure(refsets[category], testsets[category]) if num is None: continue fSum += num denominator += 1 return fSum / denominator
def evaluate(classifier, evalFeats, labels): #old eval without cross Validation try: print('accuracy: %f' % nltk.classify.util.accuracy(classifier, evalFeats)) except ZeroDivisionError: print('accuracy: 0') refsets, testsets = ref_test_sets(classifier, evalFeats) for label in labels: ref = refsets[label] test = testsets[label] print('%s precision: %f' % (label, precision(ref, test) or 0)) print('%s recall: %f' % (label, recall(ref, test) or 0)) print('%s f-measure: %f' % (label, f_measure(ref, test) or 0))
def word_similarity_dict(self, word): """ Return a dictionary mapping from words to 'similarity scores,' indicating how often these two words occur in the same context. """ word = self._key(word) word_contexts = set(self._word_to_contexts[word]) scores = {} for w, w_contexts in self._word_to_contexts.items(): scores[w] = f_measure(word_contexts, set(w_contexts)) return scores
def evaluation(test_set, classifier): """Evaluate the classifier with the test set. Print the accuracy, precision, recall and f-measure.""" refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(test_set): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) print('Accuracy:', accuracy(classifier, test_set)) print('Precision:', precision(refsets['MALE'], testsets['MALE'])) print('Recall:', recall(refsets['MALE'], testsets['MALE'])) print('F Measure:', f_measure(refsets['MALE'], testsets['MALE']))
def assess_classifier(classifier, test_set, text): accuracy = nltk.classify.accuracy(classifier, test_set) refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(test_set): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) pos_pre = precision(refsets['positive'], testsets['positive']) pos_rec = recall(refsets['positive'], testsets['positive']) pos_fme = f_measure(refsets['positive'], testsets['positive']) neg_pre = precision(refsets['negative'], testsets['negative']) neg_rec = recall(refsets['negative'], testsets['negative']) neg_fme = f_measure(refsets['negative'], testsets['negative']) neu_pre = precision(refsets['neutral'], testsets['neutral']) neu_rec = recall(refsets['neutral'], testsets['neutral']) neu_fme = f_measure(refsets['negative'], testsets['neutral']) return [ text, accuracy, pos_pre, pos_rec, pos_fme, neg_pre, neg_rec, neg_fme, neu_pre, neu_rec, neu_fme ]
def evaluation(test_set, classifier): """Evaluate the classifier with the test set. Print the accuracy, precision, recall and f-measure.""" refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (featureset, label) in enumerate(test_set): refsets[label].add(i) observed = classifier.classify(featureset) testsets[observed].add(i) print('Evaluation Results') print("\t\t\t{:<20}{:0.2f}".format('classifier accuracy', accuracy(classifier, test_set))) print("\t\t\t{:<20}{:0.2f}".format('precision male', precision(refsets['male'], testsets['male']))) print("\t\t\t{:<20}{:0.2f}".format('precision female', precision(refsets['female'], testsets['female']))) print("\t\t\t{:<20}{:0.2f}".format('recall male', recall(refsets['male'], testsets['male']))) print("\t\t\t{:<20}{:0.2f}".format('recall female', recall(refsets['female'], testsets['female']))) print("\t\t\t{:<20}{:0.2f}".format('f_measure male', f_measure(refsets['male'], testsets['male']))) print("\t\t\t{:<20}{:0.2f}".format('f_measure female', f_measure(refsets['female'], testsets['female']))) print()
def measure(classifier, testfeats, alpha=0.5): refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(testfeats): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) precisions = {} recalls = {} f_measures = {} for label in classifier.labels(): precisions[label] = metrics.precision(refsets[label], testsets[label]) recalls[label] = metrics.recall(refsets[label], testsets[label]) f_measures[label] = metrics.f_measure(refsets[label], testsets[label], alpha) return precisions, recalls, f_measures
def evaluate_precision_recall_f_measure(self): '''Evaluate precision, recall and f1 measure''' scores = dict(prec_pos=[], rec_pos=[], fmeas_pos=[], prec_neg=[], rec_neg=[], fmeas_neg=[]) lfeats = self.label_feats_from_corpus() for i in range(1, 10): train_feats, test_feats, nb_classifier = self\ .__get_elements_for_classification(lfeats, train_number=i, classifying=False) refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(test_feats): refsets[label].add(i) observed = nb_classifier.classify(feats) testsets[observed].add(i) precisions = {} recalls = {} f_measure = {} for label in nb_classifier.labels(): precisions[label] = metrics.precision( refsets[label], testsets[label] ) recalls[label] = metrics.recall(refsets[label], testsets[label]) f_measure[label] = metrics.f_measure( refsets[label], testsets[label] ) #print nb_classifier.show_most_informative_features(n=20) scores["prec_pos"].append(precisions["pos"]) scores["prec_neg"].append(precisions["neg"]) scores["rec_pos"].append(recalls["pos"]) scores["rec_neg"].append(recalls["neg"]) scores["fmeas_pos"].append(f_measure["pos"]) scores["fmeas_neg"].append(f_measure["neg"]) scores["prec_pos"] = sum(scores["prec_pos"]) / len(scores["prec_pos"]) scores["prec_neg"] = sum(scores["prec_neg"]) / len(scores["prec_neg"]) scores["rec_pos"] = sum(scores["rec_pos"]) / len(scores["rec_pos"]) scores["rec_neg"] = sum(scores["rec_neg"]) / len(scores["rec_neg"]) scores["fmeas_pos"] = sum(scores["fmeas_pos"]) / len(scores["fmeas_pos"]) scores["fmeas_neg"] = sum(scores["fmeas_neg"]) / len(scores["fmeas_neg"]) return scores
def eval_classifier(self): ''' Test the model and calculates the metrics of accuracy, precision, recall and f-measure ''' test_set = apply_features(self.get_doc_features, self._test_docs, True) self._accuracy = accuracy(self._classifier, test_set) refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(test_set): refsets[label].add(i) observed = self._classifier.classify(feats) testsets[observed].add(i) self.count_categories(self._train_docs) for cat in self._categories: self._precision[cat] = precision(refsets[cat], testsets[cat]) self._recall[cat] = recall(refsets[cat], testsets[cat]) self._f_measure[cat] = f_measure(refsets[cat], testsets[cat])
def measuring_accuracy(self): """Testing the model *accuracy*""" print( "Accuracy:", nltk.classify.accuracy(self.classifier, self.test_set) ) self.classifier.show_most_informative_features(20) """Measuring **Precision,Recall,F-Measure** of a classifier. Finding **Confusion matrix**""" actual_set = collections.defaultdict(set) predicted_set = collections.defaultdict(set) # cm here refers to confusion matrix actual_set_cm = [] predicted_set_cm = [] for i, (feature, label) in enumerate(self.test_set): actual_set[label].add(i) actual_set_cm.append(label) predicted_label = self.classifier.classify(feature) predicted_set[predicted_label].add(i) predicted_set_cm.append(predicted_label) for category in self.data.keys(): print( category, "precision :", precision(actual_set[category], predicted_set[category]), ) print( category, "recall :", recall(actual_set[category], predicted_set[category]), ) print( category, "f-measure :", f_measure(actual_set[category], predicted_set[category]), ) confusion_matrix = ConfusionMatrix(actual_set_cm, predicted_set_cm) print("Confusion Matrix") print(confusion_matrix)
def precision_recall_f_measure(classifier, test_feats): refsets = defaultdict(set) testsets = defaultdict(set) for i, (feats, label) in enumerate(test_feats): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) precisions = {} recalls = {} f_measures = {} for label in classifier.labels(): precisions[label] = precision(refsets[label], testsets[label]) recalls[label] = recall(refsets[label], testsets[label]) f_measures[label] = f_measure(refsets[label], testsets[label]) print("\nPrecision:") pprint(precisions, width=1) print("\nRecall") pprint(recalls, width=1) print("\nF Measure") pprint(f_measures, width=1)
#script to validate coding import cPickle as pickle import sys from nltk.metrics import accuracy, ConfusionMatrix, precision, recall, f_measure from collections import defaultdict import classifier if __name__=='__main__': validation_pickle=sys.argv[1] classifier_pickle=sys.argv[2] validation_set=pickle.load(open(validation_pickle, 'rb')) c=pickle.load(open(classifier_pickle, 'rb')) reference=defaultdict(set) observed=defaultdict(set) for i, (tweet, label) in enumerate(validation_set): reference[label].add(i) observation=c.classify(tweet) observed[observation].add(i) print "accuracy: %s" % accuracy(observed, reference) print "pos precision: %s" % precision(reference['positive'], observed['positive']) print "pos recall: %s" % recall(reference['positive'], observed['positive']) print "pos f-measure: %s" % f_measure(reference['positive'], observed['positive']) print "neg precision: %s" % precision(reference['negative'], observed['negative']) print "neg recall: %s" % recall(reference['negative'], observed['negative']) print "neg f-measure: %s" % f_measure(reference['negative'], observed['negative'])
#!/usr/bin/python import nltk from nltk.metrics import precision, recall, f_measure reference = 'DET NN VB DET JJ NN NN IN DET NN'.split() test = 'DET VB VB DET NN NN NN IN DET NN'.split() reference_set = set(reference) test_set = set(test) print "Precision: " print precision(reference_set, test_set) print "\n" print "Recall: " print recall(reference_set, test_set) print "\n" print "F_Measure: " print f_measure(reference_set, test_set)
def cross_fold(instances, trainf, testf, folds=10, trace=1, metrics=True, informative=0): if folds < 2: raise ValueError('must have at least 3 folds') # ensure isn't an exhaustible iterable instances = list(instances) # randomize so get an even distribution, in case labeled instances are # ordered by label random.shuffle(instances) l = len(instances) step = l / folds if trace: print 'step %d over %d folds of %d instances' % (step, folds, l) accuracies = [] precisions = collections.defaultdict(list) recalls = collections.defaultdict(list) f_measures = collections.defaultdict(list) for f in range(folds): if trace: print '\nfold %d' % (f + 1) print '-----%s' % ('-' * len('%s' % (f + 1))) start = f * step end = start + step train_instances = instances[:start] + instances[end:] test_instances = instances[start:end] if trace: print 'training on %d:%d + %d:%d' % (0, start, end, l) obj = trainf(train_instances) if trace: print 'testing on %d:%d' % (start, end) if metrics: refsets, testsets = ref_test_sets(obj, test_instances) for key in set(refsets.keys() + testsets.keys()): ref = refsets[key] test = testsets[key] p = precision(ref, test) or 0 r = recall(ref, test) or 0 f = f_measure(ref, test) or 0 precisions[key].append(p) recalls[key].append(r) f_measures[key].append(f) if trace: print '%s precision: %f' % (key, p) print '%s recall: %f' % (key, r) print '%s f-measure: %f' % (key, f) accuracy = testf(obj, test_instances) if trace: print 'accuracy: %f' % accuracy accuracies.append(accuracy) if trace and informative and hasattr(obj, 'show_most_informative_features'): obj.show_most_informative_features(informative) if trace: print '\nmean and variance across folds' print '------------------------------' print 'accuracy mean: %f' % (sum(accuracies) / folds) print 'accuracy variance: %f' % array(accuracies).var() for key, ps in precisions.iteritems(): print '%s precision mean: %f' % (key, sum(ps) / folds) print '%s precision variance: %f' % (key, array(ps).var()) for key, rs in recalls.iteritems(): print '%s recall mean: %f' % (key, sum(rs) / folds) print '%s recall variance: %f' % (key, array(rs).var()) for key, fs in f_measures.iteritems(): print '%s f_measure mean: %f' % (key, sum(fs) / folds) print '%s f_measure variance: %f' % (key, array(fs).var()) return accuracies, precisions, recalls, f_measures
str(nltk.classify.accuracy(classifier, cv_test))) foldAccuracies.append(str(nltk.classify.accuracy(classifier, cv_test))) # most informative feauures # now get fold stats such as precison, recall, f score refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(cv_test): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) foldPositivePrecisions.append( str(precision(refsets['spam'], testsets['spam']))) foldPositiveRecalls.append(str(recall(refsets['spam'], testsets['spam']))) foldPositiveFScores.append( str(f_measure(refsets['spam'], testsets['spam']))) foldNegativePrecisions.append( str(precision(refsets['ham'], testsets['ham']))) foldNegativeRecalls.append(str(recall(refsets['ham'], testsets['ham']))) foldNegativeFScores.append(str(f_measure(refsets['ham'], testsets['ham']))) print('Positive Precision:', precision(refsets['spam'], testsets['spam'])) print('Positive Recall:', recall(refsets['spam'], testsets['spam'])) print('Positive F1-Score:', f_measure(refsets['spam'], testsets['spam'])) print('Negative Precision:', precision(refsets['ham'], testsets['ham'])) print('Negative Recall:', recall(refsets['ham'], testsets['ham'])) print('Negative F1-Score:', f_measure(refsets['ham'], testsets['ham'])) classifier.show_most_informative_features(5) total = 0 totalPrecPos = 0
def eval(test_alignments): f = open(test_alignments, "r") # initializing our "counters" used for the aggregate scores sentence_pairs = 0 ibm1_precision_sum, ibm1_recall_sum, ibm1_aer_sum, ibm1_f1_sum = 0, 0, 0, 0 ibm2_precision_sum, ibm2_recall_sum, ibm2_aer_sum, ibm2_f1_sum = 0, 0, 0, 0 for line in f: sentence_pairs += 1 strs = line.split("\t") print("-" * 47) print("Length of foreign sentence: ", len(strs[0].split())) print(strs[0]) print(strs[1], "\n") ibm1_aligns = Alignment.fromstring(strs[2]) ibm2_aligns = Alignment.fromstring(strs[3]) hand_aligns = Alignment.fromstring(strs[4]) ''' Evaluate the sentence pair's precisiona and recall by utilizing the built in ntlk.metrics precision and recall functions. The functions parameters are the following: 1. Reference ("Gold Standard"): our hand alignments that follow the same format as the system produced alignments 2. Test: the alignments produced by the model which will be put in comparison with the hand alignments ''' ibm1_precision, ibm1_recall, ibm1_aer, ibm1_f1 = precision(hand_aligns, ibm1_aligns), recall(hand_aligns, ibm1_aligns), \ alignment_error_rate(hand_aligns, ibm1_aligns), f_measure(hand_aligns, ibm1_aligns) ibm2_precision, ibm2_recall, ibm2_aer, ibm2_f1 = precision(hand_aligns, ibm2_aligns), recall(hand_aligns, ibm2_aligns), \ alignment_error_rate(hand_aligns, ibm2_aligns), f_measure(hand_aligns, ibm2_aligns) # Add it to our aggregate calculations ibm1_precision_sum += ibm1_precision ibm1_recall_sum += ibm1_recall ibm1_aer_sum += ibm1_aer ibm1_f1_sum += ibm1_f1 ibm2_precision_sum += ibm2_precision ibm2_recall_sum += ibm2_recall ibm2_aer_sum += ibm2_aer ibm2_f1_sum += ibm2_f1 print("IBM1 Precision: ", ibm1_precision, "\t", "IBM2 Precision: ", ibm2_precision) print("IBM1 Recall: ", ibm1_recall, "\t", "IBM2 Recall: ", ibm2_recall) print("IBM1 AER:", ibm1_aer, "\t", "IBM2 AER: ", ibm2_aer) print("IBM1 F1: ", ibm1_f1, "\t", "IBM2 F1: ", ibm2_f1) print("-" * 47, "\n") f.close() # Prints out the total statistics of the dataset print("-" * 23, "AVERAGE STATS", "-" * 23) print("Average IBM1 Precision: ", ibm1_precision_sum / sentence_pairs, "\t" * 2, "Average IBM2 Precision: ", ibm2_precision_sum / sentence_pairs) print("Average IBM1 Recall: ", ibm1_recall_sum / sentence_pairs, "\t" * 2, "Average IBM2 Recall: ", ibm2_recall_sum / sentence_pairs) print("Average IBM1 AER:", ibm1_aer_sum / sentence_pairs, "\t" * 2, "Average IBM2 AER: ", ibm2_aer_sum / sentence_pairs) print("Average IBM1 F1: ", ibm1_f1_sum / sentence_pairs, "\t" * 2, "Average IBM2 F1: ", ibm2_f1_sum / sentence_pairs)
refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) str = 'SINGLE FOLD RESULT ' + '(' + 'linear-svc' + ')' #training with LinearSVC classifier = SklearnClassifier(LinearSVC()) classifier.train(trainfeats) for i, (feats, label) in enumerate(testfeats): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) accuracy = nltk.classify.util.accuracy(classifier, testfeats) * 100 pos_precision = nltk.precision(refsets['pos'], testsets['pos']) pos_recall = recall(refsets['pos'], testsets['pos']) pos_fmeasure = f_measure(refsets['pos'], testsets['pos']) neg_precision = precision(refsets['neg'], testsets['neg']) neg_recall = recall(refsets['neg'], testsets['neg']) neg_fmeasure = f_measure(refsets['neg'], testsets['neg']) print('') print('---------------------------------------') print(str) print('---------------------------------------') print('accuracy: ', accuracy, '%') print('precision', (pos_precision + neg_precision) / 2) print('recall', (pos_recall + neg_recall) / 2) print('f-measure', (pos_fmeasure + neg_fmeasure) / 2) ######################################################################################## ########################################################################################
def evaluate_classifier(featx): negfeats = [(featx(f), 'neg') for f in word_split(negdata)] posfeats = [(featx(f), 'pos') for f in word_split(posdata)] negcutoff = int(len(negfeats) * 3 / 4) poscutoff = int(len(posfeats) * 3 / 4) trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff] testfeats = negfeats[negcutoff:] + posfeats[poscutoff:] # using 3 classifiers classifier_list = ['nb', 'maxent', 'svm'] for cl in classifier_list: if cl == 'maxent': classifierName = 'Maximum Entropy' classifier = MaxentClassifier.train(trainfeats, 'GIS', trace=0, encoding=None, labels=None, gaussian_prior_sigma=0, max_iter=1) elif cl == 'svm': classifierName = 'SVM' classifier = SklearnClassifier(LinearSVC(), sparse=False) classifier.train(trainfeats) else: classifierName = 'Naive Bayes' classifier = NaiveBayesClassifier.train(trainfeats) refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(testfeats): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) accuracy = nltk.classify.util.accuracy(classifier, testfeats) pos_precision = precision(refsets['pos'], testsets['pos']) pos_recall = recall(refsets['pos'], testsets['pos']) pos_fmeasure = f_measure(refsets['pos'], testsets['pos']) neg_precision = precision(refsets['neg'], testsets['neg']) neg_recall = recall(refsets['neg'], testsets['neg']) neg_fmeasure = f_measure(refsets['neg'], testsets['neg']) print('') print('---------------------------------------') print('SINGLE FOLD RESULT ' + '(' + classifierName + ')') print('---------------------------------------') print('accuracy:', accuracy) print('precision', (pos_precision + neg_precision) / 2) print('recall', (pos_recall + neg_recall) / 2) print('f-measure', (pos_fmeasure + neg_fmeasure) / 2) #classifier.show_most_informative_features() print('') ## CROSS VALIDATION trainfeats = negfeats + posfeats # SHUFFLE TRAIN SET # As in cross validation, the test chunk might have only negative or only positive data random.shuffle(trainfeats) n = 5 # 5-fold cross-validation for cl in classifier_list: subset_size = int(len(trainfeats) / n) accuracy = [] pos_precision = [] pos_recall = [] neg_precision = [] neg_recall = [] pos_fmeasure = [] neg_fmeasure = [] cv_count = 1 for i in range(n): testing_this_round = trainfeats[i * subset_size:][:subset_size] training_this_round = trainfeats[:i * subset_size] + trainfeats[ (i + 1) * subset_size:] if cl == 'maxent': classifierName = 'Maximum Entropy' classifier = MaxentClassifier.train(training_this_round, 'GIS', trace=0, encoding=None, labels=None, gaussian_prior_sigma=0, max_iter=1) elif cl == 'svm': classifierName = 'SVM' classifier = SklearnClassifier(LinearSVC(), sparse=False) classifier.train(training_this_round) else: classifierName = 'Naive Bayes' classifier = NaiveBayesClassifier.train(training_this_round) refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(testing_this_round): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) cv_accuracy = nltk.classify.util.accuracy(classifier, testing_this_round) cv_pos_precision = precision(refsets['pos'], testsets['pos']) cv_pos_recall = recall(refsets['pos'], testsets['pos']) cv_pos_fmeasure = f_measure(refsets['pos'], testsets['pos']) cv_neg_precision = precision(refsets['neg'], testsets['neg']) cv_neg_recall = recall(refsets['neg'], testsets['neg']) cv_neg_fmeasure = f_measure(refsets['neg'], testsets['neg']) accuracy.append(cv_accuracy) pos_precision.append(cv_pos_precision) pos_recall.append(cv_pos_recall) neg_precision.append(cv_neg_precision) neg_recall.append(cv_neg_recall) pos_fmeasure.append(cv_pos_fmeasure) neg_fmeasure.append(cv_neg_fmeasure) cv_count += 1 print('---------------------------------------') print('N-FOLD CROSS VALIDATION RESULT ' + '(' + classifierName + ')') print('---------------------------------------') print('accuracy:', sum(accuracy) / n) print('precision', (sum(pos_precision) / n + sum(neg_precision) / n) / 2) print('recall', (sum(pos_recall) / n + sum(neg_recall) / n) / 2) print('f-measure', (sum(pos_fmeasure) / n + sum(neg_fmeasure) / n) / 2) print('')
#Precison and recall calculation refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(testfeats): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) #35% false positives for the pos label. print 'Positive precision:', precision(refsets['pos'], testsets['pos']) #98% recall, so very few false negatives print 'Positive recall:', recall(refsets['pos'], testsets['pos']) print 'Positive F-measure:', f_measure(refsets['pos'], testsets['pos']) print 'Negative precision:', precision(refsets['neg'], testsets['neg']) print 'Negative recall:', recall(refsets['neg'], testsets['neg']) print 'Negative F-measure:', f_measure(refsets['neg'], testsets['neg']) #Accuracy print '\nAccuracy:', nltk.classify.util.accuracy(classifier, testfeats) classifier.show_most_informative_features() #print classifier.classify(word_feats('bad')) #Remove stopwords and present the paragraph words in order to make sense def summary(words): return collections.OrderedDict([(word, True) for word in words if word not in stopwordset])
def f_measure(self): return f_measure(self._reference, self._test)
print 'Dictionary : ', dictionary.get_name(), '\n' print ConfusionMatrix(gold_standard,results).pp() print 'Accuracy: ', accuracy(gold_standard,results) for c in [0,1,-1]: print 'Metrics for class ', c gold = set() test = set() for i,x in enumerate(gold_standard): if x == c: gold.add(i) for i,x in enumerate(results): if x == c: test.add(i) print 'Precision: ', precision(gold, test) print 'Recall : ', recall(gold, test) print 'F_measure: ', f_measure(gold, test) print '\n\n' #################### Sentences classification ########################## # Not reported in the paper because LIWC doesn't have neutral class positive_sents = [reli.words_sentence_pos(s) for s in reli.sents(polarity='positive')] negative_sents = [reli.words_sentence_pos(s) for s in reli.sents(polarity='negative')] neutral_sents = [reli.words_sentence_pos(s) for s in reli.sents(polarity='neutral')] print '#########################################################################' print '###################### Sentences classification #########################' print '#########################################################################'
classifier, test_feats) else: refsets, testsets = scoring.ref_test_sets(classifier, test_feats) for label in labels: ref = refsets[label] test = testsets[label] if not args.no_precision: print('%s precision: %f' % (label, precision(ref, test) or 0)) if not args.no_recall: print('%s recall: %f' % (label, recall(ref, test) or 0)) if not args.no_fmeasure: print('%s f-measure: %f' % (label, f_measure(ref, test) or 0)) if args.show_most_informative and hasattr( classifier, 'show_most_informative_features') and not ( args.multi and args.binary) and not args.cross_fold: print('%d most informative features' % args.show_most_informative) classifier.show_most_informative_features(args.show_most_informative) ############## ## pickling ## ############## if not args.no_pickle: if args.filename: fname = os.path.expanduser(args.filename) else: name = '%s_%s.pickle' % (args.corpus, '_'.join(args.classifier))
refsets, testsets = scoring.multi_ref_test_sets(classifier, test_feats) else: refsets, testsets = scoring.ref_test_sets(classifier, test_feats) for label in labels: ref = refsets[label] test = testsets[label] if not args.no_precision: print '%s precision: %f' % (label, precision(ref, test) or 0) if not args.no_recall: print '%s recall: %f' % (label, recall(ref, test) or 0) if not args.no_fmeasure: print '%s f-measure: %f' % (label, f_measure(ref, test) or 0) if args.show_most_informative and args.algorithm != 'DecisionTree' and not (args.multi and args.binary): print '%d most informative features' % args.show_most_informative classifier.show_most_informative_features(args.show_most_informative) ############## ## pickling ## ############## if not args.no_pickle: if args.filename: fname = os.path.expanduser(args.filename) else: name = '%s_%s.pickle' % (args.corpus, args.algorithm) fname = os.path.join(os.path.expanduser('~/nltk_data/classifiers'), name)
trainfeats = negfeats[:4000] + posfeats[:4000] testfeats = negfeats[4000:] + posfeats[4000:] print("train on %d instances, test on %d instances" % (len(trainfeats), len(testfeats))) classifier = NaiveBayesClassifier.train(trainfeats) refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(testfeats): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) # cross validation 3-fold feats = negfeats + posfeats M = math.floor(len(feats) / 3) result = [] for n in range(3): val_set = feats[n * M :][:M] train_set = feats[(n + 1) * M :] + feats[: n * M] classifier = nltk.NaiveBayesClassifier.train(train_set) result.append("{:.4f}".format(round(nltk.classify.accuracy(classifier, val_set) * 100, 4))) print("cross_validation:", result) print("pos precision:", precision(refsets["pos"], testsets["pos"])) print("pos recall:", recall(refsets["pos"], testsets["pos"])) print("pos F-measure:", f_measure(refsets["pos"], testsets["pos"])) print("neg precision:", precision(refsets["neg"], testsets["neg"])) print("neg recall:", recall(refsets["neg"], testsets["neg"])) print("neg F-measure:", f_measure(refsets["neg"], testsets["neg"])) classifier.show_most_informative_features()
def cross_fold(instances, trainf, testf, folds=10, trace=1, metrics=True, informative=0): if folds < 2: raise ValueError('must have at least 3 folds') # ensure isn't an exhaustible iterable instances = list(instances) # randomize so get an even distribution, in case labeled instances are # ordered by label random.shuffle(instances) l = len(instances) step = l / folds if trace: print('step %d over %d folds of %d instances' % (step, folds, l)) accuracies = [] precisions = collections.defaultdict(list) recalls = collections.defaultdict(list) f_measures = collections.defaultdict(list) for f in range(folds): if trace: print('\nfold %d' % (f+1)) print('-----%s' % ('-'*len('%s' % (f+1)))) start = f * step end = start + step train_instances = instances[:start] + instances[end:] test_instances = instances[start:end] if trace: print('training on %d:%d + %d:%d' % (0, start, end, l)) obj = trainf(train_instances) if trace: print('testing on %d:%d' % (start, end)) if metrics: refsets, testsets = ref_test_sets(obj, test_instances) for key in set(refsets.keys() + testsets.keys()): ref = refsets[key] test = testsets[key] p = precision(ref, test) or 0 r = recall(ref, test) or 0 f = f_measure(ref, test) or 0 precisions[key].append(p) recalls[key].append(r) f_measures[key].append(f) if trace: print('%s precision: %f' % (key, p)) print('%s recall: %f' % (key, r)) print('%s f-measure: %f' % (key, f)) accuracy = testf(obj, test_instances) if trace: print('accuracy: %f' % accuracy) accuracies.append(accuracy) if trace and informative and hasattr(obj, 'show_most_informative_features'): obj.show_most_informative_features(informative) if trace: print('\nmean and variance across folds') print('------------------------------') print('accuracy mean: %f' % (sum(accuracies) / folds)) print('accuracy variance: %f' % array(accuracies).var()) for key, ps in iteritems(precisions): print('%s precision mean: %f' % (key, sum(ps) / folds)) print('%s precision variance: %f' % (key, array(ps).var())) for key, rs in iteritems(recalls): print('%s recall mean: %f' % (key, sum(rs) / folds)) print('%s recall variance: %f' % (key, array(rs).var())) for key, fs in iteritems(f_measures): print('%s f_measure mean: %f' % (key, sum(fs) / folds)) print('%s f_measure variance: %f' % (key, array(fs).var())) return accuracies, precisions, recalls, f_measures
def avaliate_classifiers(featureSet): print("Vamos treinar o classificador agora!") print("\n") #random.shuffle(featureSet) #Vai fazer o calculo de recall e precision # You need to build 2 sets for each classification label: # a reference set of correct values, and a test set of observed values. #Os primeiros 6686 + 500(dia 14) tweets sao positivos e resto(6757 + 500(dia 14)) negativo positive_tweets = featureSet[:7185] #Misturando as paradas pra nao ficar testando só os mesmos últimos random.shuffle(positive_tweets) #print(featureSet[7185]) #Pra pegar 7185 do pos e 7185 do negativo mas o negativo tem 7213 negative_tweets = featureSet[7185:14372] random.shuffle(negative_tweets) #Agora vou dividir cada classe em um conjunto de referencia e outro de teste pos_cutoff = len(positive_tweets)*3/4 neg_cutoff = len(negative_tweets)*3/4 # 75% dos tweets vao pra ser de referencia(treinamento) e o resto pra teste pos_references = positive_tweets[:pos_cutoff] pos_tests = positive_tweets[pos_cutoff:] neg_references = negative_tweets[:neg_cutoff] neg_tests = negative_tweets[neg_cutoff:] #COnjunto de treinamento e de testes pra calcular a accuracy training_set = pos_references + neg_references testing_set = pos_tests + neg_tests start_time = time.time() global classifier print("Comecou a treina-lo agora!") #training_set2 = [(t,l) for (t,l,twe) in training_set] classifier = nltk.NaiveBayesClassifier.train(training_set) #testing_set2 = [(t,l) for (t,l,twe) in testing_set] print("Naive Bayes Algo accuracy:", (nltk.classify.accuracy(classifier, testing_set)) * 100) classifier.show_most_informative_features(30) refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) # for i, (feats, label, l) in enumerate(testing_set): # refsets[label].add(i) # observed = classifier.classify(feats) # testsets[observed].add(i) # print("--"*200) # print() # print("Classified as: ",observed) # print() # print(l) # print() # print("--"*200) # raw_input("Press any key to continue:") print 'pos precision:', precision(refsets['pos'], testsets['pos']) print 'pos recall:', recall(refsets['pos'], testsets['pos']) print 'pos F-measure:', f_measure(refsets['pos'], testsets['pos']) print 'neg precision:', precision(refsets['neg'], testsets['neg']) print 'neg recall:', recall(refsets['neg'], testsets['neg']) print 'neg F-measure:', f_measure(refsets['neg'], testsets['neg']) print("--- Classifier executed in %s seconds ---" % (time.time() - start_time))
def avaliate_new_classifier(featureSet): print("Vamos treinar o classificador agora!") print("\n") #random.shuffle(featureSet) #Cada um tem 197 positive_tweets = featureSet[:196] #Misturando as paradas pra nao ficar testando só os mesmos últimos random.shuffle(positive_tweets) #print(featureSet[7185]) #Pra pegar 7185 do pos e 7185 do negativo mas o negativo tem 7213 negative_tweets = featureSet[196:293] random.shuffle(negative_tweets) neutral_tweets = featureSet[293:] random.shuffle(neutral_tweets) #Agora vou dividir cada classe em um conjunto de referencia e outro de teste pos_cutoff = len(positive_tweets)*3/4 neg_cutoff = len(negative_tweets)*3/4 neu_cutoff = len(neutral_tweets)*3/4 # 75% dos tweets vao pra ser de referencia(treinamento) e o resto pra teste pos_references = positive_tweets[:pos_cutoff] pos_tests = positive_tweets[pos_cutoff:] neg_references = negative_tweets[:neg_cutoff] neg_tests = negative_tweets[neg_cutoff:] neu_references = neutral_tweets[:neu_cutoff] neu_tests = neutral_tweets[neu_cutoff:] #COnjunto de treinamento e de testes pra calcular a accuracy training_set = pos_references + neg_references + neu_references testing_set = pos_tests + neg_tests + neu_tests start_time = time.time() global classifier print("Comecou a treina-lo agora!") #training_set2 = [(t,l) for (t,l,twe) in training_set] classifier = nltk.NaiveBayesClassifier.train(training_set) #testing_set2 = [(t,l) for (t,l,twe) in testing_set] print("Naive Bayes Algo accuracy:", (nltk.classify.accuracy(classifier, testing_set)) * 100) classifier.show_most_informative_features(30) refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(testing_set): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) print 'pos precision:', precision(refsets['pos'], testsets['pos']) print 'pos recall:', recall(refsets['pos'], testsets['pos']) print 'pos F-measure:', f_measure(refsets['pos'], testsets['pos']) print 'neg precision:', precision(refsets['neg'], testsets['neg']) print 'neg recall:', recall(refsets['neg'], testsets['neg']) print 'neg F-measure:', f_measure(refsets['neg'], testsets['neg']) print 'neutral precision:', precision(refsets['neutral'], testsets['neutral']) print 'neutral recall:', recall(refsets['neutral'], testsets['neutral']) print 'neutral F-measure:', f_measure(refsets['neutral'], testsets['neutral']) print("--- Classifier executed in %s seconds ---" % (time.time() - start_time))
stop = int(len(texts) * args.fraction) for t in texts[:stop]: feat = bag_of_words(norm_words(t)) feats.append(feat) test_feats.append((feat, label)) print "accuracy:", accuracy(classifier, test_feats) refsets, testsets = scoring.ref_test_sets(classifier, test_feats) for label in labels: ref = refsets[label] test = testsets[label] print "%s precision: %f" % (label, precision(ref, test) or 0) print "%s recall: %f" % (label, recall(ref, test) or 0) print "%s f-measure: %f" % (label, f_measure(ref, test) or 0) else: if args.instances == "sents": texts = categorized_corpus.sents() total = len(texts) elif args.instances == "paras": texts = (itertools.chain(*para) for para in categorized_corpus.paras()) total = len(categorized_corpus.paras) elif args.instances == "files": texts = (categorized_corpus.words(fileids=[fid]) for fid in categorized_corpus.fileids()) total = len(categorized_corpus.fileids()) stop = int(total * args.fraction) feats = (bag_of_words(norm_words(i)) for i in itertools.islice(texts, stop)) label_counts = collections.defaultdict(int)
def validate(self, validation_set): if self.classifier is None: raise Exception("self.classifier is None") reference=defaultdict(set) observed=defaultdict(set) observed['neutral']=set() for i, (tweet, label) in enumerate(validation_set): reference[label].add(i) observation=self.classify(tweet) observed[observation].add(i) acc=classify.accuracy(self.classifier, observed) posp=precision(reference['positive'],observed['positive']) posr=recall(reference['positive'], observed['positive']) posf=f_measure(reference['positive'], observed['positive']) negp=precision(reference['negative'],observed['negative']) negr=recall(reference['negative'], observed['negative']) negf=f_measure(reference['negative'], observed['negative']) print "accuracy: %s" % acc print "pos precision: %s" % posp print "pos recall: %s" % posr print "pos f-measure: %s" % posf print "neg precision: %s" % negp print "neg recall: %s" % negr print "neg f-measure: %s" % negf return (acc, posp, posr, posf, negp, negr, negf)