def classification_metrics(gold, predict): ''' This function compute and print the classification metrics precision, recall and f1 score for different classes using the confusion matrix provided by NLTK. gold- the gold standard/actual values predict- the predicted values ''' cm = ConfusionMatrix(gold, predict) model_file = open('classification_metrics.txt', 'w') sys.stdout = model_file print("\t\t---------------------------------------") print("\t\t\t\tConfusion Matrix 1") print("\t\t---------------------------------------\n") print(cm) print("\t\t---------------------------------------") print("\t\t\t\tConfusion Matrix 2") print("\t\t----------------------------------------\n") print(cm.pretty_format(sort_by_count=True, show_percents=True, truncate=10)) labels = tag_set true_positives = Counter() false_negatives = Counter() false_positives = Counter() for i in labels: for j in labels: if i == j: true_positives[i] += cm[i, j] else: false_negatives[i] += cm[i, j] false_positives[j] += cm[i, j] print("\t---------------------------------------") print("\tPrecision Recall F-score") print("\t---------------------------------------\n") for i in sorted(labels): if true_positives[i] == 0: fscore = 0 else: precision = true_positives[i] / float(true_positives[i] + false_positives[i]) recall = true_positives[i] / float(true_positives[i] + false_negatives[i]) fscore = 2 * (precision * recall) / float(precision + recall) print(i, "\t", "%.2f" % precision, "\t", "%.2f" % recall, "\t", "%.2f" % fscore)
def print_ConfusionMatrix(ref, predicted): '''Prints confusion matrix with reference as row and predicted as column :param ref: List of actual genres of tests :param predicted: List of predicted genres of tests :return: None prints a confusion matrix ''' cm = ConfusionMatrix(ref, predicted) Conf_mat = cm.pretty_format(sort_by_count=False, truncate=15) print(Conf_mat)
print "Loading test data..." testset = np.load(path) # Load model print "Loading model..." with open(model, 'rb') as fmodel: cls = pickle.load(fmodel) # Run test sys.stdout.write("Testing:") pred = [] idx = 0 for i in testset[:, 0]: idx += 1 if idx % 1000 == 0: sys.stdout.write(".") sys.stdout.flush() pred.append(str(cls.classify(i))) # Result # * Convert Ref Label to ASCII ref = [str(label) for label in testset[:, 1]] accuracy = scores.accuracy(ref, pred) print "\nAccuracy: %.4f" % accuracy cm = ConfusionMatrix(ref, pred) print "Confusion Matrix: " print (cm.pretty_format(sort_by_count=True, show_percents=True, truncate=9)) # Finished? print "DONE!!"
print('neg precision:', precision(actual_set['neg'], predicted_set['neg'])) print('neg recall:', recall(actual_set['neg'], predicted_set['neg'])) print('neg F-measure:', f_measure(actual_set['neg'], predicted_set['neg'])) # confusion matrix ''' | Predicted NO | Predicted YES | -----------+---------------------+---------------------+ Actual NO | True Negative (TN) | False Positive (FP) | Actual YES | False Negative (FN) | True Positive (TP) | -----------+---------------------+---------------------+ ''' cm = ConfusionMatrix(actual_set_cm, predicted_set_cm) print(cm.pretty_format(sort_by_count=True, show_percents=True, truncate=9)) # --------------------------------------------------------------------------------------------------------------- # OUTPUT ''' ------------------------------ file ids ------------------------------ ['negative_tweets.json', 'positive_tweets.json', 'tweets.20150430-223406.json'] ------------------------------ size of dataset ------------------------------ positive: 5000 negative: 5000 another(pos+neg): 20000
if tested_sentences[i][0] == tagged_sentences[i][0]: if tested_sentences[i][1] == tagged_sentences[i][1]: correct += 1 else: wrong += 1 #calculate accuracy accuracy = correct / (correct + wrong) key_tags = [] test_tags = [] # To generate confusion matrix, all the tags of key file and tag file are # saved in lists key_tags & test_tags respectively. for i in range(len(tagged_sentences)): key_tags.append(tagged_sentences[i][1]) for i in range(len(tested_sentences)): test_tags.append(tested_sentences[i][1]) # Generated the confusion matrix from ConfusionMatrix function in nltk cm = ConfusionMatrix(key_tags, test_tags) #generate confusion matrix # Final Printing the below details in the report file print("Hi!!! Below is the accuracy report of your tagger.") print("Accuracy is: " + str("%.2f" % (accuracy * 100)) + "%\n") print("Confusion matrix:\n%s" % cm.pretty_format(sort_by_count=True)) #print confusion matrix end = time.clock() # end of execution # Print the execution time print("\nExecution time for scorer is %.2f" % (end - start) + " seconds")
def report(expected, predicted, labels, log): cm = ConfusionMatrix(expected, predicted) log.info("Confusion matrix:\n%s", cm) log.info("Confusion matrix: sorted by count\n%s", cm.pretty_format(sort_by_count=True)) true_positives = Counter() false_negatives = Counter() false_positives = Counter() missing_labels = Counter() #merge expected & predicted, & get unique values tested_labels = set(expected + predicted) for i in tested_labels: for j in tested_labels: if i == j: true_positives[i] += cm[i,j] else: false_negatives[i] += cm[i,j] false_positives[j] += cm[i,j] sb = '' for value, count in true_positives.most_common(): s = '{0}={1}, '.format(value, count) sb += s log.info("True Positives (%d): %s\n", sum(true_positives.values()), sb) sb = '' for value, count in false_negatives.most_common(): s = '{0}={1}, '.format(value, count) sb += s log.info("False Negatives (%d): %s\n", sum(false_negatives.values()), sb) sb = '' for value, count in false_positives.most_common(): s = '{0}={1}, '.format(value, count) sb += s log.info("False Positives (%d): %s\n", sum(false_positives.values()), sb) sb = '' last = len(tested_labels) - 1 for i, x in enumerate(sorted(tested_labels)): if true_positives[x] == 0: fscore = 0 else: precision = true_positives[x] / float(true_positives[x]+false_positives[x]) recall = true_positives[x] / float(true_positives[x]+false_negatives[x]) fscore = 2 * (precision * recall) / float(precision + recall) if i != last: sb += '{0}={1}, '.format(x, fscore) else: sb += '{0}={1}'.format(x, fscore) log.info('F Scores: {0}\n'.format(sb)) untested_labels = set(labels) - tested_labels if (len(untested_labels)): log.info('No F Scores for untested categories: {0}\n'.format(list(untested_labels)))