def test_pandas_confusion_normalized(): y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2] cm = ConfusionMatrix(y_true, y_pred) df = cm.to_dataframe() df_norm = cm.to_dataframe(normalized=True) assert(df_norm.sum(axis=1).sum() == len(df))
def test_pandas_confusion_normalized_issue1(): # should insure issue 1 is fixed # see http://stackoverflow.com/questions/19233771/sklearn-plot-confusion-matrix-with-labels/31720054#31720054 y_true = ['business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business'] y_pred = ['health', 'business', 'business', 'business', 'business', 'business', 'health', 'health', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'health', 'health', 'business', 'health'] cm = ConfusionMatrix(y_true, y_pred) df = cm.to_dataframe() df_norm = cm.to_dataframe(normalized=True) assert(df_norm.sum(axis=1, skipna=False).fillna(1).sum() == len(df))
def train_test_and_evaluate(pipeline, X_train, y_train, X_test, y_test): pipeline.fit(X_train, y_train) y_pred_class = pipeline.predict(X_test) unique_label = np.unique(y_test) matrix = ConfusionMatrix(y_test, y_pred_class, labels=['True Value', 'Predicted Value']) print('-' * 75 + '\nConfusion Matrix\n') print(matrix) print('f1_score', f1_score(y_test, y_pred_class, average="macro")) print('precision', precision_score(y_test, y_pred_class, average="macro")) print('recall', recall_score(y_test, y_pred_class, average="macro")) return pipeline, matrix.to_dataframe(), y_pred_class
def get_confusion_matrix(_results_file): df = pd.read_csv(results_file, sep='\t', header=None) true_lbls = df[1] pred_lbls = df[2] confusion_matrix = ConfusionMatrix(true_lbls, pred_lbls) confusion_matrix.plot() cm_file = _results_file.replace('.txt', '_cm.jpg') plt.savefig(cm_file) print() print(confusion_matrix) print() cm = confusion_matrix.to_dataframe() correct = 0 for i in range(cm.shape[0]): correct += cm.iloc[i][i] recall = cm.iloc[i][i] * 100 / cm.sum(axis=0)[i] prec = cm.iloc[i][i] * 100 / cm.sum(axis=1)[i] print('Class %s recall = %.4f precision = %.4f' % (cm.columns[i], recall, prec)) print('Overall accuracy = %.4f' % float(correct * 100 / sum(cm.sum(axis=0))))
def main(save, show): basepath = os.path.dirname(__file__) # y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] # y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2] # cm = ConfusionMatrix(y_true, y_pred) # cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"]) # y_true = [2, 0, 2, 2, 0, 1] # y_pred = [0, 0, 2, 2, 0, 2] # cm = ConfusionMatrix(y_true, y_pred) # cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"]) y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit'] y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit'] cm = ConfusionMatrix(y_true, y_pred) # y_true = ["cat", "ant", "cat", "cat", "ant", "bird"] # y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"] # >>> cm(y_true, y_pred, labels=["ant", "bird", "cat"]) # array([[2, 0, 0], # [0, 0, 1], # [1, 0, 2]]) # cm = ConfusionMatrix(y_true, y_pred) print("Confusion matrix:\n%s" % cm) df = cm.to_dataframe() print(df) print(df.dtypes) cm.plot() filename = 'cm.png' if save: plt.savefig(os.path.join(basepath, '..', 'screenshots', filename)) if show: plt.show() cm.plot(normalized=True) filename = 'cm_norm.png' if save: plt.savefig(os.path.join(basepath, '..', 'screenshots', filename)) if show: plt.show() cm.print_stats() print(cm.classification_report) print("sklearn confusion_matrix:\n%s" % confusion_matrix(y_true, y_pred)) print(classification_report(y_true, y_pred)) # stat = 'precision' # print(cm._avg_stat(stat)) # print(cm.ACC) # import seaborn as sns # cm.plot(normalized=True, backend=Backend.Seaborn) # sns.plt.show() print("Binarize a confusion matrix") y_true = ["cat", "ant", "cat", "cat", "ant", "bird"] y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"] cm = ConfusionMatrix(y_true, y_pred) print(cm) binary_cm = cm.binarize(['ant', 'cat']) # A bird is not a "land_animal" print(binary_cm)
def main(save, show): basepath = os.path.dirname(__file__) # y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] # y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2] # cm = ConfusionMatrix(y_true, y_pred) # cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"]) # y_true = [2, 0, 2, 2, 0, 1] # y_pred = [0, 0, 2, 2, 0, 2] # cm = ConfusionMatrix(y_true, y_pred) # cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"]) y_true = [ 'rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit' ] y_pred = [ 'cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit' ] cm = ConfusionMatrix(y_true, y_pred) # y_true = ["cat", "ant", "cat", "cat", "ant", "bird"] # y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"] # >>> cm(y_true, y_pred, labels=["ant", "bird", "cat"]) # array([[2, 0, 0], # [0, 0, 1], # [1, 0, 2]]) # cm = ConfusionMatrix(y_true, y_pred) print("Confusion matrix:\n%s" % cm) df = cm.to_dataframe() print(df) print(df.dtypes) cm.plot() filename = 'cm.png' if save: plt.savefig(os.path.join(basepath, '..', 'screenshots', filename)) if show: plt.show() cm.plot(normalized=True) filename = 'cm_norm.png' if save: plt.savefig(os.path.join(basepath, '..', 'screenshots', filename)) if show: plt.show() cm.print_stats() print(cm.classification_report) print("sklearn confusion_matrix:\n%s" % confusion_matrix(y_true, y_pred)) print(classification_report(y_true, y_pred)) # stat = 'precision' # print(cm._avg_stat(stat)) # print(cm.ACC) # import seaborn as sns # cm.plot(normalized=True, backend=Backend.Seaborn) # sns.plt.show() print("Binarize a confusion matrix") y_true = ["cat", "ant", "cat", "cat", "ant", "bird"] y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"] cm = ConfusionMatrix(y_true, y_pred) print(cm) binary_cm = cm.binarize(['ant', 'cat']) # A bird is not a "land_animal" print(binary_cm)
plt.plot(c, cv_scores, '-o') plt.xscale('log') # In[ ]: predicted = clf.predict(X_test) expected = y_test print(accuracy_score(expected, predicted)) # In[ ]: predicted_probs = clf.predict_proba(X_test) print(log_loss(y_test, predicted_probs)) # In[ ]: cm = ConfusionMatrix(expected, predicted) cm_stats = cm.to_dataframe().apply(lambda x: x/sum(x), axis=1) cm_stats.to_csv('data/confusion_matrix_stats.csv') # In[ ]: mpl.rcParams['figure.figsize'] = (10.0, 5.0) cm.plot(normalized=True)