def make_plots(y_test, y_pred, y_prob, algorithm, timestamp): def _save_and_close(type): plt.savefig('static/img/{}/{}-{}.png'.format(type, algorithm, timestamp), dpi=200) plt.close('all') size = (20, 20) name = classifier_names[algorithm] plot_confusion_matrix(y_test, y_pred, normalize=True, figsize=size, title_fontsize=40, text_fontsize=30, title=name) _save_and_close('cm') if y_prob is not None: plot_precision_recall_curve(y_test, y_prob, figsize=size, title_fontsize=40, text_fontsize=25, title=name) _save_and_close('precrec') plot_roc_curve(y_test, y_prob, figsize=size, title_fontsize=40, text_fontsize=25, title=name) _save_and_close('roc')
def main(): args = parse_args() real_labels, predicted_labels = read_file(args.testlog) skplt.plot_confusion_matrix(real_labels, predicted_labels, normalize=True, title=' ', text_fontsize="large") plt.savefig("{}/{}/confusion_matrix.pdf".format(args.outputdir, args.configname), bbox_inches='tight') cm = confusion_matrix(real_labels, predicted_labels) np.set_printoptions(precision=2) cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] diagonal = np.squeeze(np.asarray(np.matrix(cm).diagonal())) with open("{}/{}".format(args.outputdir, "confusion_matrix_all"), "a+") as f: f.write(args.configname + "|") for x in diagonal: f.write(str(x) + " ") f.write("\n")
def plotConfusionMatrix(self, models_results): for key, values in models_results.items(): fig, axes = plt.subplots(3, 3, figsize=(15, 15)) indexes = [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2)] for i, clfr in enumerate(values): skplt.plot_confusion_matrix( y_true=clfr.y_test, y_pred=clfr.predictions, normalize=True, ax=axes[indexes[i]], title="Matrix de Confusão Normalizada") plt.sca(axes[indexes[i]]) axes[indexes[i]].set_xlabel("Treinamento/Teste ({}/{})".format( round(clfr.weight_train * 100, 0), round(clfr.weight_test * 100, 0))) # set x label axes[indexes[i]].get_xaxis().set_ticks( []) # hidden x axis text axes[indexes[i]].get_yaxis().set_ticks([]) plt.tight_layout() # fig.subplots_adjust(hspace=0.3) fig.subplots_adjust(top=0.95) #fig.suptitle(key, fontsize=16) plt.savefig("plots/confusion_matrix_{}.pdf".format(key))
def evaluate_features(X, y, clf=None): """General helper function for evaluating effectiveness of passed features in ML model Prints out Log loss, accuracy, and confusion matrix with 3-fold stratified cross-validation Args: X (array-like): Features array. Shape (n_samples, n_features) y (array-like): Labels array. Shape (n_samples,) clf: Classifier to use. If None, default Log reg is use. """ if clf is None: clf = LogisticRegression() probas = cross_val_predict(clf, X, y, cv=StratifiedKFold(random_state=8), n_jobs=-1, method='predict_proba', verbose=2) pred_indices = np.argmax(probas, axis=1) classes = np.unique(y) preds = classes[pred_indices] print('Log loss: {}'.format(log_loss(y, probas))) print('Accuracy: {}'.format(accuracy_score(y, preds))) skplt.plot_confusion_matrix(y, preds)
def generate_confusion_matrix(real_labels, predicted_labels): skplt.plot_confusion_matrix( real_labels, predicted_labels, normalize=True, title='Normalized Confusion Matrix', text_fontsize="large" ) plt.savefig('confusion_matrix.png', bbox_inches='tight')
def print_score(m, df, y): print('Accuracy: [Train , Val]') res = m.score(df, y) print(res) print('Train Confusion Matrix') df_train_proba = m.predict_proba(df) df_train_pred_indices = np.argmax(df_train_proba, axis=1) classes_train = np.unique(y) preds_train = classes_train[df_train_pred_indices] skplt.plot_confusion_matrix(y, preds_train)
def build_matrix(file_path, title, save_path, real_labels): with open(file_path) as f: predicted_labels = f.readlines() predicted_labels = [float(x.strip()) for x in predicted_labels] plt.figure() skplt.plot_confusion_matrix(real_labels, predicted_labels, text_fontsize="large", normalize=True, title=title) plt.savefig(save_path, bbox_inches='tight')
def classify_all(self, filename): self.test_file = pd.read_csv(filename, sep=',', index_col=None) test = np.array(self.test_file.values[:, :3]) test_data_class = self.test_file.Class self.output = self.NB.predict(test) probability = self.NB.predict_proba(test) cm = metrics.confusion_matrix(test_data_class, self.output) accuracy = accuracy_score(test_data_class, self.output) print("Accuracy for Naive Bayes") print(accuracy * 100) print("Confusion Matrix for Naive Bayes") #print(cm) skplt.plot_confusion_matrix(test_data_class, self.output) plt.show() return self.output, accuracy * 100
def evaluate_features(X, y, clf=None): if clf is None: clf = LogisticRegression() probas = cross_val_predict(clf, X, y, cv=StratifiedKFold(random_state=8), n_jobs=-1, method='predict_proba', verbose=2) pred_indices = np.argmax(probas, axis=1) classes = np.unique(y) preds = classes[pred_indices] print('Log loss: {}'.format(log_loss(y, probas))) print('Accuracy: {}'.format(accuracy_score(y, preds))) skplt.plot_confusion_matrix(y, preds)
def plotConfusionMatrix(self, models_results): for key, values in sorted(models_results.items()): fig, ax = plt.subplots() skplt.plot_confusion_matrix(y_true=values.y_test, y_pred=values.predictions, normalize=True, ax=ax, title="Matrix de Confusão Normalizada") plt.sca(ax) ax.set_xlabel("") # set x label ax.get_xaxis().set_ticks([]) # hidden x axis text ax.get_yaxis().set_ticks([]) plt.tight_layout() #fig.subplots_adjust(top=0.95) plt.savefig(self.description + "/confusion_matrix_{}.pdf".format(key))
def test_array_like(self): ax = skplt.plot_confusion_matrix([0, 1], [1, 0])
def plot_confusion_matrix_with_cv(clf, X, y, labels=None, true_labels=None, pred_labels=None, title=None, normalize=False, hide_zeros=False, x_tick_rotation=0, do_cv=True, cv=None, shuffle=True, random_state=None, ax=None, figsize=None, cmap='Blues', title_fontsize="large", text_fontsize="medium"): """Generates the confusion matrix for a given classifier and dataset. Args: clf: Classifier instance that implements ``fit`` and ``predict`` methods. X (array-like, shape (n_samples, n_features)): Training vector, where n_samples is the number of samples and n_features is the number of features. y (array-like, shape (n_samples) or (n_samples, n_features)): Target relative to X for classification. labels (array-like, shape (n_classes), optional): List of labels to index the matrix. This may be used to reorder or select a subset of labels. If none is given, those that appear at least once in ``y`` are used in sorted order. (new in v0.2.5) true_labels (array-like, optional): The true labels to display. If none is given, then all of the labels are used. pred_labels (array-like, optional): The predicted labels to display. If none is given, then all of the labels are used. title (string, optional): Title of the generated plot. Defaults to "Confusion Matrix" if normalize` is True. Else, defaults to "Normalized Confusion Matrix. normalize (bool, optional): If True, normalizes the confusion matrix before plotting. Defaults to False. hide_zeros (bool, optional): If True, does not plot cells containing a value of zero. Defaults to False. x_tick_rotation (int, optional): Rotates x-axis tick labels by the specified angle. This is useful in cases where there are numerous categories and the labels overlap each other. do_cv (bool, optional): If True, the classifier is cross-validated on the dataset using the cross-validation strategy in `cv` to generate the confusion matrix. If False, the confusion matrix is generated without training or cross-validating the classifier. This assumes that the classifier has already been called with its `fit` method beforehand. cv (int, cross-validation generator, iterable, optional): Determines the cross-validation strategy to be used for splitting. Possible inputs for cv are: - None, to use the default 3-fold cross-validation, - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. For integer/None inputs, if ``y`` is binary or multiclass, :class:`StratifiedKFold` used. If the estimator is not a classifier or if ``y`` is neither binary nor multiclass, :class:`KFold` is used. shuffle (bool, optional): Used when do_cv is set to True. Determines whether to shuffle the training data before splitting using cross-validation. Default set to True. random_state (int :class:`RandomState`): Pseudo-random number generator state used for random sampling. ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot the learning curve. If None, the plot is drawn on a new set of axes. figsize (2-tuple, optional): Tuple denoting figure size of the plot e.g. (6, 6). Defaults to ``None``. cmap (string or :class:`matplotlib.colors.Colormap` instance, optional): Colormap used for plotting the projection. View Matplotlib Colormap documentation for available options. https://matplotlib.org/users/colormaps.html title_fontsize (string or int, optional): Matplotlib-style fontsizes. Use e.g. "small", "medium", "large" or integer-values. Defaults to "large". text_fontsize (string or int, optional): Matplotlib-style fontsizes. Use e.g. "small", "medium", "large" or integer-values. Defaults to "medium". Returns: ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was drawn. Example: >>> rf = classifier_factory(RandomForestClassifier()) >>> rf.plot_confusion_matrix(X, y, normalize=True) <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490> >>> plt.show() .. image:: _static/examples/plot_confusion_matrix.png :align: center :alt: Confusion matrix """ y = np.array(y) if not do_cv: y_pred = clf.predict(X) y_true = y else: if cv is None: cv = StratifiedKFold(shuffle=shuffle, random_state=random_state) elif isinstance(cv, int): cv = StratifiedKFold(n_splits=cv, shuffle=shuffle, random_state=random_state) else: pass clf_clone = clone(clf) preds_list = [] trues_list = [] for train_index, test_index in cv.split(X, y): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] clf_clone.fit(X_train, y_train) preds = clf_clone.predict(X_test) preds_list.append(preds) trues_list.append(y_test) y_pred = np.concatenate(preds_list) y_true = np.concatenate(trues_list) ax = plotters.plot_confusion_matrix(y_true=y_true, y_pred=y_pred, labels=labels, true_labels=true_labels, pred_labels=pred_labels, title=title, normalize=normalize, hide_zeros=hide_zeros, x_tick_rotation=x_tick_rotation, ax=ax, figsize=figsize, cmap=cmap, title_fontsize=title_fontsize, text_fontsize=text_fontsize) return ax
cv = StratifiedKFold(n_splits=3, shuffle=True) from sklearn.model_selection import cross_val_score score = cross_val_score(LogisticRegression(), X_all, y_all, scoring='neg_mean_squared_error', cv=cv).mean() score = cross_val_score(LogisticRegression(), X_all, y_all, scoring='accuracy', cv=cv).mean() #### Learning Curve from scikitplot import plotters as skplt skplt.plot_learning_curve(LogisticRegression(), X_all, y_all) plt.show() skplt.plot_roc_curve(y_true=y_val, y_probas=y_proba) plt.show() skplt.plot_precision_recall_curve(y_true=y_val, y_probas=y_proba) plt.show() skplt.plot_confusion_matrix(y_true=y_val, y_pred=y_pred, normalize=True) plt.show() #### XGBoost from xgboost import XGBRegressor import xgboost as xgb params = { 'objective': 'binary:logistic', 'eval_metric': 'logloss', } dtrain = xgb.DMatrix(X_all, label=y_all) history = xgb.cv(params, dtrain, num_boost_round=1024, early_stopping_rounds=5, verbose_eval=20) booster = xgb.train(params, dtrain) xgb.plot_importance(booster=booster)
def plot_confusion_matrix(clf, X, y, title=None, normalize=False, do_cv=True, cv=None, shuffle=True, random_state=None, ax=None): """Generates the confusion matrix for a given classifier and dataset. Args: clf: Classifier instance that implements ``fit`` and ``predict`` methods. X (array-like, shape (n_samples, n_features)): Training vector, where n_samples is the number of samples and n_features is the number of features. y (array-like, shape (n_samples) or (n_samples, n_features)): Target relative to X for classification. title (string, optional): Title of the generated plot. Defaults to "Confusion Matrix" if `normalize` is True. Else, defaults to "Normalized Confusion Matrix. normalize (bool, optional): If True, normalizes the confusion matrix before plotting. Defaults to False. do_cv (bool, optional): If True, the classifier is cross-validated on the dataset using the cross-validation strategy in `cv` to generate the confusion matrix. If False, the confusion matrix is generated without training or cross-validating the classifier. This assumes that the classifier has already been called with its `fit` method beforehand. cv (int, cross-validation generator, iterable, optional): Determines the cross-validation strategy to be used for splitting. Possible inputs for cv are: - None, to use the default 3-fold cross-validation, - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. For integer/None inputs, if ``y`` is binary or multiclass, :class:`StratifiedKFold` used. If the estimator is not a classifier or if ``y`` is neither binary nor multiclass, :class:`KFold` is used. shuffle (bool, optional): Used when do_cv is set to True. Determines whether to shuffle the training data before splitting using cross-validation. Default set to True. random_state (int :class:`RandomState`): Pseudo-random number generator state used for random sampling. ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot the learning curve. If None, the plot is drawn on a new set of axes. Returns: ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was drawn. Example: >>> rf = classifier_factory(RandomForestClassifier()) >>> rf.plot_learning_curve(X, y, normalize=True) <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490> >>> plt.show() .. image:: _static/examples/plot_confusion_matrix.png :align: center :alt: Confusion matrix """ y = np.array(y) if not do_cv: y_pred = clf.predict(X) y_true = y else: if cv is None: cv = StratifiedKFold(shuffle=shuffle, random_state=random_state) elif isinstance(cv, int): cv = StratifiedKFold(n_splits=cv, shuffle=shuffle, random_state=random_state) else: pass clf_clone = clone(clf) preds_list = [] trues_list = [] for train_index, test_index in cv.split(X, y): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] clf_clone.fit(X_train, y_train) preds = clf_clone.predict(X_test) preds_list.append(preds) trues_list.append(y_test) y_pred = np.concatenate(preds_list) y_true = np.concatenate(trues_list) ax = plotters.plot_confusion_matrix(y_true=y_true, y_pred=y_pred, title=title, normalize=normalize, ax=ax) return ax
def plot_cmat(y_test, y_pred): skplt.plot_confusion_matrix(y_test,y_pred) plt.show()
def plot_cmat(yte, ypred): skplt.plot_confusion_matrix(yte, ypred) plt.show()
def plot_cmat(yte, ypred, title): '''Plotting confusion matrix''' skplt.plot_confusion_matrix(yte, ypred, normalize=True) plt.title(title) plt.show()
def plot_confusion_matrix(clf, X, y, labels=None, title=None, normalize=False, hide_zeros=False, x_tick_rotation=0, do_cv=True, cv=None, shuffle=True, random_state=None, ax=None, figsize=None, title_fontsize="large", text_fontsize="medium"): """Generates the confusion matrix for a given classifier and dataset. Args: clf: Classifier instance that implements ``fit`` and ``predict`` methods. X (array-like, shape (n_samples, n_features)): Training vector, where n_samples is the number of samples and n_features is the number of features. y (array-like, shape (n_samples) or (n_samples, n_features)): Target relative to X for classification. labels (array-like, shape (n_classes), optional): List of labels to index the matrix. This may be used to reorder or select a subset of labels. If none is given, those that appear at least once in ``y`` are used in sorted order. (new in v0.2.5) title (string, optional): Title of the generated plot. Defaults to "Confusion Matrix" if `normalize` is True. Else, defaults to "Normalized Confusion Matrix. normalize (bool, optional): If True, normalizes the confusion matrix before plotting. Defaults to False. hide_zeros (bool, optional): If True, does not plot cells containing a value of zero. Defaults to False. x_tick_rotation (int, optional): Rotates x-axis tick labels by the specified angle. This is useful in cases where there are numerous categories and the labels overlap each other. do_cv (bool, optional): If True, the classifier is cross-validated on the dataset using the cross-validation strategy in `cv` to generate the confusion matrix. If False, the confusion matrix is generated without training or cross-validating the classifier. This assumes that the classifier has already been called with its `fit` method beforehand. cv (int, cross-validation generator, iterable, optional): Determines the cross-validation strategy to be used for splitting. Possible inputs for cv are: - None, to use the default 3-fold cross-validation, - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. For integer/None inputs, if ``y`` is binary or multiclass, :class:`StratifiedKFold` used. If the estimator is not a classifier or if ``y`` is neither binary nor multiclass, :class:`KFold` is used. shuffle (bool, optional): Used when do_cv is set to True. Determines whether to shuffle the training data before splitting using cross-validation. Default set to True. random_state (int :class:`RandomState`): Pseudo-random number generator state used for random sampling. ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot the learning curve. If None, the plot is drawn on a new set of axes. figsize (2-tuple, optional): Tuple denoting figure size of the plot e.g. (6, 6). Defaults to ``None``. title_fontsize (string or int, optional): Matplotlib-style fontsizes. Use e.g. "small", "medium", "large" or integer-values. Defaults to "large". text_fontsize (string or int, optional): Matplotlib-style fontsizes. Use e.g. "small", "medium", "large" or integer-values. Defaults to "medium". Returns: ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was drawn. Example: >>> rf = classifier_factory(RandomForestClassifier()) >>> rf.plot_confusion_matrix(X, y, normalize=True) <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490> >>> plt.show() .. image:: _static/examples/plot_confusion_matrix.png :align: center :alt: Confusion matrix """ y = np.array(y) if not do_cv: y_pred = clf.predict(X) y_true = y else: if cv is None: cv = StratifiedKFold(shuffle=shuffle, random_state=random_state) elif isinstance(cv, int): cv = StratifiedKFold(n_splits=cv, shuffle=shuffle, random_state=random_state) else: pass clf_clone = clone(clf) preds_list = [] trues_list = [] for train_index, test_index in cv.split(X, y): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] clf_clone.fit(X_train, y_train) preds = clf_clone.predict(X_test) preds_list.append(preds) trues_list.append(y_test) y_pred = np.concatenate(preds_list) y_true = np.concatenate(trues_list) ax = plotters.plot_confusion_matrix(y_true=y_true, y_pred=y_pred, labels=labels, title=title, normalize=normalize, hide_zeros=hide_zeros, x_tick_rotation=x_tick_rotation, ax=ax, figsize=figsize, title_fontsize=title_fontsize, text_fontsize=text_fontsize) return ax
def plot_cmat(yte, ypred): '''Plotting confusion matrix''' skplt.plot_confusion_matrix(yte, ypred) plt.show()
def test_array_like(self): ax = skplt.plot_confusion_matrix([0, 1], [1, 0])
def plot_confusion_matrix(self, normalize=True): # add thresholding skplt.plot_confusion_matrix(self.y_test, self.y_pred, normalize=normalize) plt.show()
def plot_cmat(yte, ypred): '''Plotting confusion matrix''' skplt.plot_confusion_matrix(yte, ypred) plt.show() plt.savefig('rnn.png')
from sklearn.datasets import load_svmlight_file import matplotlib.pyplot as plt import scikitplot.plotters as skplt import numpy as np import csv if __name__ == '__main__': X_test, y_test = load_svmlight_file('test.dat') with open('test.dat.predict') as f: predictions = f.readlines() predictions = [float(x.strip()) for x in predictions] skplt.plot_confusion_matrix(y_true=y_test, y_pred=predictions, normalize=True, title="Matrix de Confusão Normalizada") plt.savefig('confusion-matrix_1.pdf', bbox_inches='tight') with open('prob/test.dat.prob_temp.predict') as textFile: predictions2 = [line.split() for line in textFile] plt.figure() plt.title("Distribuição de Probabilidade - SVM") positive = [] negative = [] for target, pr in zip(y_test, predictions2): predict = pr[0]
def main(X_data, y_data, test_size): X_train, X_test, y_train, y_test = cross_validation.train_test_split( X_data, y_data, test_size=test_size) # cria uma DT clf = tree.DecisionTreeClassifier() clf.fit(X_train, y_train) # predicao do classificador y_pred = clf.predict(X_test) return y_test, y_pred if __name__ == "__main__": sizes = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] X_data, y_data = load_svmlight_file('./data') if not os.path.exists('./arvore/'): os.makedirs('./arvore/') for x in sizes: y_test, y_pred = main(X_data, y_data, x) skplt.plot_confusion_matrix( y_test, y_pred, normalize=True, title='Normalized Confusion Matrix (test size: ' + str(x) + ')', text_fontsize="large") plt.savefig('./arvore/' + str(int(x * 10)) + '.png', bbox_inches='tight')
"""An example showing the plot_confusion_matrix method used by a scikit-learn classifier""" from sklearn.ensemble import RandomForestClassifier from sklearn.datasets import load_digits as load_data import matplotlib.pyplot as plt from scikitplot import classifier_factory X, y = load_data(return_X_y=True) rf = classifier_factory(RandomForestClassifier()) rf.plot_confusion_matrix(X, y, normalize=True) plt.show() # Using the more flexible functions API from scikitplot import plotters as skplt rf = RandomForestClassifier() rf = rf.fit(X, y) preds = rf.predict(X) skplt.plot_confusion_matrix(y_true=y, y_pred=preds) plt.show()
Y_train, epochs=10, batch_size=batch_size, validation_split=0.2, callbacks=[ckpt_callback]) model = load_model('lstm_model_x') probas = model.predict(X_test) pred_indices = np.argmax(probas, axis=1) classes = np.array(range(1, 10)) preds = classes[pred_indices] print('Log loss: {}'.format( log_loss(classes[np.argmax(Y_test, axis=1)], probas))) print('Accuracy: {}'.format( accuracy_score(classes[np.argmax(Y_test, axis=1)], preds))) skplt.plot_confusion_matrix(classes[np.argmax(Y_test, axis=1)], preds) Xtest = tokenizer.texts_to_sequences(testx['Text'].values) Xtest = pad_sequences(Xtest, maxlen=2000) probas = model.predict(Xtest) submission_df = pd.DataFrame(probas, columns=['class' + str(c + 1) for c in range(9)]) submission_df['ID'] = df_test['ID'] submission_df.head() #submission_df.to_csv('submissionX.csv', index=False)
def plot_cmat(yte, ypred): # confusion matrix warnings.filterwarnings("ignore") '''Plotting confusion matrix''' skplt.plot_confusion_matrix(yte, ypred) plt.show()