def test_plot(self): try: import matplotlib.pyplot # noqa except ImportError: import nose raise nose.SkipTest() y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit'] y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit'] cm = ConfusionMatrix(y_true, y_pred) # check plot works cm.plot() cm.plot(backend='seaborn') with self.assertRaises(ValueError): cm.plot(backend='xxx')
predictions.tolist()) confusion_matrix # Few stats cms = confusion_matrix.stats() print("Overall Accuracy is ", round(cms['overall']['Accuracy'], 2), ", Kappa is ", round(cms['overall']['Kappa'], 2)) # Predict on test data predictions = classifier.predict(test[listAllPredictiveFeatures]) confusion_matrix = ConfusionMatrix(test[strResponse].tolist(), predictions.tolist()) confusion_matrix # normalized confusion matrix confusion_matrix.plot(normalized=True) plt.show() #Statistics are also available as follows confusion_matrix.print_stats() cms = confusion_matrix.stats() print("Overall Accuracy is ", round(cms['overall']['Accuracy'], 2), ", Kappa is ", round(cms['overall']['Kappa'], 2)) #5000: Overall Accuracy is 0.8 , Kappa is 0.24 df = cms['class'].reset_index() df[df['index'].str.contains('Precision')] df[df['index'].str.contains('Sensitivity')] df[df['index'].str.contains('Specificity')] # 101 line # How to predict probabilities of each class
def evaluate(textFile, valueFile=None, varStatusBar=None, varCmOutput=None, varOutput=None): timestamp = strftime("%Y-%m-%d:%H-%M-%S") reportFile = "./reports/" + timestamp + ".txt" outputFile = "./evaluations/" + timestamp + ".csv" statsFile = "./statistics/" + timestamp + ".txt" wf = WordFilter() totalReal = [] totalPred = [] with open("./data/Priors.csv", "r") as priorFile: print(priorFile) priors = priorFile.readline().strip().split(',')[1:] priors = [log10(float(x)) for x in priors] testSize = 0 lst = [] lst.append(("Real Emotion", "Predicted Emotion", "Tweet")) for line in tqdm(textFile): testSize += 1 lineID = line.split(',')[0] words = wf.filterWords(line) predValues = [] unfound = [] for word in words: try: values = evaluateWord(word) except IOError: varStatusBar.set( "WordMap not found. Please train system first.") raise if values is not None: predValues.append(values) else: unfound.append(word) predValues = map(sum, zip(*predValues)) predProb = map(sum, zip(priors, predValues)) predEmotion = guessEmotion(predProb) valueFormat = ",".join("%.2f" % n for n in predValues) if valueFile: realValues = [ float(i) for i in valueFile.readline().strip().split(',')[1:] ] realEmotion = guessEmotion(realValues) if predEmotion != "No Words Found": totalReal.append(realEmotion) totalPred.append(predEmotion) if realEmotion != predEmotion: lst.append((realEmotion, predEmotion, line)) with open(outputFile, "a+") as output: output.write("{},{},{}\n".format(lineID, predEmotion, valueFormat)) with open(reportFile, "a+") as report: report.write("{}\n".format(line)) report.write("Filtered: {}\n".format(words)) report.write("Words not found:{}\n".format(unfound)) report.write("Emotion probabilities: {}\n".format(valueFormat)) report.write("Predicted emotion: {}\n".format(predEmotion)) if valueFile: report.write("Correct emotion: {}\n".format(realEmotion)) report.write("-" * 70) report.write("\n") if valueFile: varStatusBar.set("Evaluation Complete.") with open('./data/RealPred.csv', 'w') as realpredFile: writer = csv.writer(realpredFile, delimiter=',') writer.writerows(lst) cm = ConfusionMatrix(totalReal, totalPred) viewPlot = tkMessageBox.askyesno("Confusion Matrix", "View confusion matrix plot?") if viewPlot: normaliseData = tkMessageBox.askyesno("Confusion Matrix", "Normalise plot?") varOutput.set("Accuracy: " + str(cm.stats()['overall']['Accuracy'])) varCmOutput.set("Confusion Matrix: \n" + str(cm.stats()['cm'])) data = cm.stats() for key, value in data.items(): print(key, value) cm.plot(normalized=normaliseData) plt.show() with open(statsFile, "w+") as report: report.seek(0) report.write(str(cm)) report.write("\n")
from sklearn.naive_bayes import MultinomialNB import matplotlib.pyplot as plt from pandas_ml import ConfusionMatrix # The 20 newsgroups dataset comprises around 18000 newsgroups posts # on 20 topics split in two subsets newsgroups_train = datasets.fetch_20newsgroups(subset='train') newsgroups_test = datasets.fetch_20newsgroups(subset='test') vectorizer = TfidfVectorizer() X_train = vectorizer.fit_transform(newsgroups_train.data) X_test = vectorizer.transform(newsgroups_test.data) y_train = newsgroups_train.target y_test = newsgroups_test.target model = MultinomialNB() model.fit(X_train, y_train) predictions = model.predict(X_test) # print model.score(X_test, y_test) # print metrics.classification_report(y_test, predictions) labels = list(newsgroups_train.target_names) print(labels) cm = ConfusionMatrix(y_test, predictions, labels) cm.plot() plt.show()
#for i in range(0,len(y_predictions)): #print y_predictions[i], y_test.as_matrix()[i] print 'Accuracy:', clf.score(X_test, y_test) #printing the training data size for each element print collections.Counter(y_train.factorize()[0]) #draw confusion matrix #get all labels #le = preprocessing.LabelEncoder() #le.fit(y.as_matrix()) #labels = le.classes_ cm = ConfusionMatrix(y_test.as_matrix(), y_predictions) cm.plot(normalized=True) #cross validation scores scores = cross_val_score(clf, X, y, cv=5) print scores.mean(), scores from sklearn.metrics import classification_report le = preprocessing.LabelEncoder() le.fit(y.as_matrix()) target_names = le.classes_ print classification_report(y_test, y_predictions, target_names=target_names) print collections.Counter(y_test.factorize()[0]) ''' This will plot the correlation between the attributes
saver = tf.train.Saver() saver.restore(session, savepoint) state = session.run(graph.initial_state) for batch, labelx in enumerate( getBatches(testData, hyperparameters['batchSize']), 1): feedDict = { graph.input_data: labelx, graph.keep_prob: hyperparameters['dropoutProb'], graph.initial_state: state } getPredictions = session.run(graph.predictions, feed_dict=feedDict) for i in range(len(getPredictions)): finalPredictions.append(getPredictions[i, :]) finalPredictions = np.asarray(finalPredictions) predictions = np.argmax(finalPredictions, axis=1) actualPredictions = testRatings.argmax(axis=1)[:predictions.shape[0]] cm = ConfusionMatrix(actualPredictions, predictions) cm.plot(backend='seaborn', normalized=True) plt.title('Confusion Matrix Stars prediction') plt.figure(figsize=(12, 10)) test_correct_pred = np.equal(predictions, testRatings) test_accuracy = np.mean(test_correct_pred.astype(float)) print("Test accuracy is: " + str(test_accuracy))
model = GaussianNB() model.fit(X, target) yhat = model.predict(X) print('Accuracy:') print(metrics.accuracy_score(target, yhat)) print('Classification report:') print(metrics.classification_report(target, yhat)) print('Confusion matrix:') cm = ConfusionMatrix(target, yhat) print(cm) print('Stats:') cm.print_stats() ax = cm.plot(backend='seaborn', annot=True, fmt='g') ax.set_title('Confusion Matrix') plt.show() plt.clf() fpr, tpr, threshold = metrics.roc_curve(target, yhat) roc_auc = metrics.auc(fpr, tpr) plt.title('Receiver Operating Characteristic') plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % roc_auc) plt.legend(loc='lower right') plt.plot([0, 1], [0, 1], 'r--') plt.xlim([0, 1]) plt.ylim([0, 1]) plt.ylabel('True Positive Rate') plt.xlabel('False Positive Rate') plt.show()