def plot_relative_color_confusion_matrix( X, ax, display_labels=None, plt_kwargs={}, label_relative=False, fix_colorbar=True, ): # Rows of X are true class, so sum of rows is number in the true class. # Normalize to these counts. X_n = np.einsum('ij,i->ij', X, 1. / X.sum(axis=1)) cm = metrics.ConfusionMatrixDisplay( X_n, display_labels= display_labels # if display_labels is not None else np.arange(X.shape[0], dtype=int), ) cm.plot(ax=ax, **plt_kwargs) # Change labels to the absolute counts if not label_relative: for I, t in np.ndenumerate(cm.text_): t.set_text(f"{X[I]:d}") # Fix the colorbar limits so the last tick draws if fix_colorbar: ax.images[-1].set_clim(0, 1)
def plot_results(blob, roc_ax, cm_ax): confusion_matrix = blob["confusion_matrix"] roc_b = blob["roc_b"] roc_a_base = blob["roc_a_base"] area_under_curve = blob["area_under_curve"] mean_roc_b = roc_b.mean(0) std_roc_b = roc_b.std(0) mean_area_under_curve = np.mean(area_under_curve) # avoid going out of bounds upper_bound = np.minimum(mean_roc_b + std_roc_b, 1) lower_bound = mean_roc_b - std_roc_b # plot mean curve roc_ax.plot(roc_a_base, mean_roc_b, label="mean auc="+"{0:0.3%}".format(mean_area_under_curve)) roc_ax.fill_between(roc_a_base, lower_bound, upper_bound, color="grey", alpha=0.3, label=r'$\pm$ 1 std dev') roc_ax.set_xlabel("mean false positive rate") roc_ax.set_ylabel("mean true positive rate") roc_ax.legend(loc=4) cmd = metrics.ConfusionMatrixDisplay(confusion_matrix, display_labels=labels()) cmd.plot(ax=cm_ax)
def plot_confusion_matrix(labels, pred_labels): fig = plt.figure(figsize=(10, 10)) ax = fig.add_subplot(1, 1, 1) cm = metrics.confusion_matrix(labels, pred_labels) cm = metrics.ConfusionMatrixDisplay(cm, range(10)) cm.plot(values_format='d', cmap='Blues', ax=ax) plt.show()
def confusion_matrix( prediction_report: pd.DataFrame, min_iou, min_score, idx_class_dict: Dict[int, str] = None, normalize=False) -> Tuple[np.ndarray, metrics.ConfusionMatrixDisplay]: df = prediction_report[prediction_report['IOU'] >= min_iou] df = df[df['score'] >= min_score] class_idxs = sorted( list( set(df['true_class_id'].unique().tolist() + df['pred_class_id'].unique().tolist()))) labels = [] if idx_class_dict is not None: for idx in class_idxs: if idx == 0: labels.append('no_danno') else: labels.append(idx_class_dict[idx]) else: labels = class_idxs cm = metrics.confusion_matrix(df['true_class_id'], df['pred_class_id']) if normalize: cm = cm / cm.sum() cm_display = metrics.ConfusionMatrixDisplay(cm, display_labels=labels) return cm, cm_display
def display_metrics(classifier, X_test, Y_test): """Display results from running the classifier on testing data Parameters ---------- classifier : sklearn classifier object Classifier to test Returns ------- none """ # Test data accuracy score score = classifier.score(X_test, Y_test) print('Score:', score) print() # Test data f1 score Y_pred = classifier.predict(X_test) f1_score = metrics.f1_score(Y_test, Y_pred) print('F1 score:', f1_score) print() # Test data confusion matrix conf_matrix = metrics.confusion_matrix(Y_test, Y_pred) matrix_plot = metrics.ConfusionMatrixDisplay(conf_matrix) matrix_plot.plot() # Precision-Recall curve curve = metrics.plot_precision_recall_curve(classifier, X_test, Y_test) curve.ax_.set_title('Game winner prediction Precision-Recall curve')
def plot_confusion_matrix(): import matplotlib with matplotlib.rc_context( { "font.size": min(10, 50.0 / self.num_classes), "axes.labelsize": 10, } ): sk_metrics.ConfusionMatrixDisplay( confusion_matrix=confusion_matrix, display_labels=self.label_list, ).plot(cmap="Blues")
def SVM_func(tr_img, te_img, tr_lbl, te_lbl, te_img1, trans): k = input("Choice of Kernel: type integers: rbf-1, poly-2, linear-3: \n ") if k == '1': print('rbf kernel chosen \n') kernel = 'rbf' elif k == '2': print('poly kernel chosen \n') kernel = 'poly' else: print('linear kernel chosen \n') kernel = 'linear' model = SVC(C=1, kernel=kernel) # fitting labels and images for training data print("Fitting model") model.fit(tr_img, tr_lbl) # Training accuracy and creating confusion matrix: pred_tr_lbl = model.predict(tr_img) print("\nTraining Accuracy = ", metrics.accuracy_score(y_true=tr_lbl, y_pred=pred_tr_lbl)) tr_CM = metrics.confusion_matrix(tr_lbl, pred_tr_lbl) tr_CM_disp = metrics.ConfusionMatrixDisplay(tr_CM).plot() # Testing accuracy and creating confusion matrix: pred_te_lbl = model.predict(te_img) print("\nTesting Accuracy = ", metrics.accuracy_score(y_true=te_lbl, y_pred=pred_te_lbl)) te_CM = metrics.confusion_matrix(te_lbl, pred_te_lbl) te_CM_disp = metrics.ConfusionMatrixDisplay(te_CM).plot() # Plotting predictions: fig, axis = plt.subplots(3, 3, figsize=(10, 10)) for i, a in enumerate(axis.flat): a.imshow(te_img1[i], cmap='binary') a.set(title=f'Act - {te_lbl[i]}Pred - {pred_te_lbl[i]}') plt.savefig(f'{trans}_{kernel}_results.png') plt.close()
def plot_confusion_matrix(): import matplotlib import matplotlib.pyplot as plt with matplotlib.rc_context({ "font.size": min(8, math.ceil(50.0 / self.num_classes)), "axes.labelsize": 8, }): _, ax = plt.subplots(1, 1, figsize=(6.0, 4.0), dpi=175) sk_metrics.ConfusionMatrixDisplay( confusion_matrix=confusion_matrix, display_labels=self.label_list, ).plot(cmap="Blues", ax=ax)
def getConfusionMatrix(self, y_pred: np.ndarray, y_true: np.ndarray, normalize: str = None, plot: bool = True): cm = metrics.confusion_matrix(y_true, y_pred, labels=self.classesIdx, normalize=normalize) if plot: disp = metrics.ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=self.classes) print("plot takes too long....") disp.plot() return cm
def conf_mat(): data = pd.read_csv(args.data) lab_enc = preprocessing.LabelEncoder() lab_enc.fit(CLASSES) true_labels = lab_enc.transform(data['true_label']) pred_labels = lab_enc.transform(data['pred_label']) offset_theta = data['offset_theta'] offset_phi = data['offset_phi'] conf_mat = metrics.confusion_matrix(y_true=true_labels, y_pred=pred_labels, normalize='true') disp = metrics.ConfusionMatrixDisplay(confusion_matrix=conf_mat, display_labels=CLASSES) disp.plot(cmap='Blues') plt.show()
def generateConfusionMatrix(classifier, y_test, predicted, title=None, labels=None, plot=False): ''' Plots confusion matrix for a classifier ''' conf_matrix = metrics.confusion_matrix(y_test, predicted) conf_disp = metrics.ConfusionMatrixDisplay(conf_matrix, labels) if plot: conf_disp.plot(cmap=plt.cm.Blues) plt.show() return conf_matrix
def show_metrics( model_name: str, y_true: np.ndarray, y_pred: np.ndarray, threshold: float = 0.5, ): preds = (y_pred > threshold).astype(int) # creating a classification report cm = metrics.confusion_matrix(y_true, preds) cr = metrics.classification_report(y_true, preds, output_dict=True) df = pd.DataFrame(cr).transpose() df.to_csv( os.path.join( config.output_dir, f"{model_name}_classification_report_{config.data_mode}.csv", ), index=True, ) print(f"Classification report:\n{df}") # ROC details fpr, tpr, thresh = metrics.roc_curve(y_true, y_pred) roc_details = pd.DataFrame() roc_details["fpr"] = fpr roc_details["tpr"] = tpr roc_details["threshold"] = thresh roc_details.to_csv( os.path.join( config.output_dir, f"{model_name}_roc_details_{config.data_mode}.csv", ), index=False, ) cm_disp = metrics.ConfusionMatrixDisplay(cm, display_labels=[0, 1]) cm_disp.plot() fig = cm_disp.figure_ fig.savefig( os.path.join( config.output_dir, f"{model_name}_confusion_matrix_{config.data_mode}.png", ), dpi=200, ) plt.show()
def confusion_matrix(Y, y_pred, model_name): matrices = metrics.multilabel_confusion_matrix(Y, y_pred, labels=[0, 1, 2, 3, 4]) print(matrices) labels = [ "Snow/Ice", "Mountains/Rocks", "Plants/Forrests", "Stars", "Sandy Desert" ] fig, axs = plt.subplots(2, 3, figsize=(10, 8), constrained_layout=True) for i in range(len(labels)): ax = axs[0, i] if i <= 2 else axs[1, i - 3] disp = metrics.ConfusionMatrixDisplay(confusion_matrix=matrices[i], display_labels=[False, True]) disp.plot(ax=ax, values_format='d') disp.ax_.set_title(labels[i]) fig.delaxes(axs[1, 2]) filename = model_name + "_Confusion_matrix_for_" + labels[i].replace( "/", "or") + ".eps" plt.savefig(filename, format="eps") files.download(filename)
def multiConfusionPlot(X_train, X_test, y_train, y_test): classifiers = { "customLogistic": CustomlogisticRegression(), "LogisiticRegression": LogisticRegression(max_iter=1e4), "KNearest": KNeighborsClassifier(), "Support Vector Classifier": SVC(), "MLPClassifier": MLPClassifier(), } f, axes = plt.subplots(1, 5, figsize=(20, 5), sharey='row') for i, (key, classifier) in enumerate(classifiers.items()): # if classifier == CustomlogisticRegression(): # classifier.fit(X_train,y_train) # y_pred = classifier.predict(X_test) # else: # y_pred = classifier.fit(X_train, y_train).predict(X_test) classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) cf_matrix = metrics.confusion_matrix(y_test, y_pred) disp = metrics.ConfusionMatrixDisplay(cf_matrix) disp.plot(ax=axes[i], xticks_rotation=45) fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred) aucScore = metrics.auc(fpr, tpr) disp.ax_.set_title(key + ":" + "{:.2e}".format(aucScore)) disp.im_.colorbar.remove() disp.ax_.set_xlabel('') if i != 0: disp.ax_.set_ylabel('') f.text(0.4, 0.1, 'Predicted label', ha='left') plt.subplots_adjust(wspace=0.40, hspace=0.1) "imBalancedOneHotMinMax" "BalancedOneHotMinMax" "BalancedCategoricalMinMax" f.suptitle("BalancedLabelMinMax") f.colorbar(disp.im_, ax=axes) plt.show()
def plot_confusion_matrix(predictor): classifiers = predictor.classifiers y_pred = predictor.predict(predictor.train_data)[classifiers] y_true = predictor.train_data[classifiers] ncol = 2 if len(classifiers) > 1 else 1 nrow = int(np.ceil(len(classifiers) / ncol)) for i, classifier in enumerate(classifiers): ax = pl.subplot(nrow, ncol, i + 1) labels = y_true[classifier].unique() cm = metrics.confusion_matrix(y_true[classifier], y_pred[classifier], normalize='true', labels=labels) disp = metrics.ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels) disp.plot(ax=ax) pl.title(classifier) pl.gca().grid(False)
def plot_confusion_matrix(y_true, y_pred_proba, threshold=0.5, ax=None): """混合行列を整形してプロットとして表示する関数 Args: y_true(1-D array-like shape of [n_samples, ]): 2値の目的ラベルの配列(ラベルは0または1) y_pred_proba(1-D array-like shape of [n_samples, ]): 陽性(ラベルが1)である確率の予測値の配列 threshold(float, default=0.5): 陽性と分類する確率の閾値. 陽性(ラベルが1)である確率の予測値がthreshold以上なら1に変換する ax (matplotlib axes, default=None): プロットするaxオブジェクト. Noneならば新しいfig, axを作成する Returns: None """ # 予測確率を2値ラベルに変換する y_pred_label = np.where(y_pred_proba >= threshold, 1, 0) # 混合行列の算出 confusion_matrix_ = skm.confusion_matrix(y_true, y_pred_label, labels=[1, 0]) # 描画の作成 if ax is None: fig, ax = plt.subplots(1, 1) else: fig = ax.figure # 混合行列のディスプレイ作成のためのConfusionMatrixDisplayインスタンス作成 disp = skm.ConfusionMatrixDisplay(confusion_matrix=confusion_matrix_, display_labels=[1, 0] ) # 描画の作成 disp.plot( include_values=True, cmap='Blues', ax=ax, xticks_rotation='horizontal', values_format='d', ) ax.set_title(f'Confusion Matrix: Pos_decision_threshold={threshold}')
def generate_confusion_matrix(y_true, y_pred, output): """ Generate a confusion matrix for a dataset. Parameters ---------- x_set : pd.DataFrame the features set. y_set : pd.Series the target set. output : str the output image path Returns ------- confusion matric plot """ cm = metrics.confusion_matrix(y_true, y_pred) cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=cm).plot() cm_display.ax_.set_title('Classifier confusion matrix') plt.savefig(output + '_classifier_confusion_matrix.png') return
imagesTrain = np.array(imagesTrain) imagesTest = np.array(imagesTest) labelsTrain = np.array(labelsTrain) labelsTest = np.array(labelsTest) results = model.fit(imagesTrain, labelsTrain, epochs=epocs, batch_size=batchSize, validation_data=(imagesTest, labelsTest)) predictions = model.predict(imagesTest) matrix = metrics.confusion_matrix(labelsTest, predictions.argmax(axis=1)) metrics.ConfusionMatrixDisplay(confusion_matrix=matrix, display_labels=categories).plot() plt.savefig("trainlog/imagepic_" + str(sampleSize) + "-resize-" + str(int(time.time())) + ".png", bbox_inches="tight") plt.close() saveFile = open(saveFileName, "a") saveFile.write( str(sampleSize) + "\t1\t" + str(results.history["accuracy"][len(results.history["accuracy"]) - 1]) + "\t" + str(results.history["val_accuracy"][ len(results.history["val_accuracy"]) - 1]) + "\n") saveFile.close() print("")
elapse = (time.time() - start) / 60 # 3.2. Save the trained model if necessary if SAVE_MODEL: torch.save(model.state_dict(), SAVE_MODEL) # 4.1. Visualize the loss curves plt.title( f'Training and Validation Losses (time: {elapse:.2f} [min] @ CUDA: {USE_CUDA})' ) loss_array = np.array(loss_list) plt.plot(loss_array[:, 0], loss_array[:, 1], label='Training Loss') plt.plot(loss_array[:, 0], loss_array[:, 2], label='Validation Loss') plt.xlabel('Epochs') plt.ylabel('Loss values') plt.xlim(loss_array[0, 0], loss_array[-1, 0]) plt.grid() plt.legend() plt.show() # 4.2. Visualize the confusion matrix predicts = [predict(datum, model) for datum in data_valid.data] conf_mat = metrics.confusion_matrix(data_valid.targets, predicts) conf_fig = metrics.ConfusionMatrixDisplay(conf_mat) conf_fig.plot() # 5. Test your image print(predict(data_train.data[0], model)) # 5 with PIL.Image.open('data/cnn_mnist_test.png').convert('L') as image: print(predict(image, model)) # 3
yhat_classes[yhat > 0.25] = 3 yhat_classes[yhat > 0.5] = 4 yhat_classes[yhat > 1] = 5 yhat_classes = np.reshape(yhat_classes, (361315, -1)) # confusion matrix for all values matrix_con = metrics.confusion_matrix(y_sim_classes, yhat_classes, labels=[0, 1, 2, 3, 4, 5]) #.ravel() # confusion matrix for values above 1 cm matrix_con_without_0 = metrics.confusion_matrix(y_sim_classes, yhat_classes, labels=[1, 2, 3, 4, 5]) cm_display = metrics.ConfusionMatrixDisplay(matrix_con, display_labels=[0, 1, 2, 3, 4, 5]).plot(2) y_sim_classes_map = np.reshape(y_sim_classes, (569, 635)) plt.figure(14) #simulation plt.imshow(y_sim_classes_map, cmap='Greys', vmin=0, vmax=5) plt.colorbar() plt.title('y_sim_classes_map') plt.show() yhat_classes_map = np.reshape(yhat_classes, (569, 635)) plt.figure(15) #simulation plt.imshow(yhat_classes_map, cmap='Greys', vmin=0, vmax=5) plt.colorbar() plt.title('yhat_classes_map') plt.show()
# Using model to predict yhat = LR.predict(X_test) yhat_prob = LR.predict_proba(X_test) print(yhat) print(yhat_prob) # Evaluation using Jaccard Index print('average=None', metrics.jaccard_score(y_test, yhat, average=None)) print('micro', metrics.jaccard_score(y_test, yhat, average='micro')) print('macro', metrics.jaccard_score(y_test, yhat, average='macro')) print('weighted', metrics.jaccard_score(y_test, yhat, average='weighted')) # Evaluation using confusion matrix cm = metrics.confusion_matrix(y_test, yhat) disp = metrics.ConfusionMatrixDisplay( confusion_matrix=cm, display_labels=["Iris-setosa", "Iris-versicolor", "Iris-virginica"]) disp.plot() # Evaluation using confusion matrix (normalize=true -> return probability over true label (row)) cm_true = metrics.confusion_matrix(y_test, yhat, normalize='true') disp_true = metrics.ConfusionMatrixDisplay( confusion_matrix=cm_true, display_labels=["Iris-setosa", "Iris-versicolor", "Iris-virginica"]) disp_true.plot() # Evaluation using confusion matrix (normalize=pred -> return probability over predicted(col)) cm_pred = metrics.confusion_matrix(y_test, yhat, normalize='pred') disp_pred = metrics.ConfusionMatrixDisplay( confusion_matrix=cm_pred, display_labels=["Iris-setosa", "Iris-versicolor", "Iris-virginica"])
def main(): plt.rcParams['figure.dpi'] = 300 plt.rcParams['font.size'] = 7 # Classes classes = ["dog", "cat", "Null"] # classes = ["dog", "cat"] # DataFrames actual_df = pd.read_csv("example\\actual.csv") actual_df = preprocess_df(actual_df) detected_df = pd.read_csv("example\\detected.csv") detected_df = preprocess_df(detected_df) detected_df = remove_overlapping_objects(detected_df) # Calculating df = calculate_metrics(actual_df, detected_df, prob_thresh=0, iou_thresh=0.0) df.to_csv("example\\result_df.csv", index=False) # ============ Collect data for sklearn ============= y_true = [] y_pred = [] y_score = [] for i, row in df[df['a_xmin'] != 'Null'].iterrows(): true_class = row['a_label'] y_true.append(true_class) pred_class = row['d_label'] y_pred.append(pred_class) prob = row['d_prob'] if prob == "Null": y_score.append(0) else: y_score.append(float(prob)) y_true = np.array(y_true) y_pred = np.array(y_pred) y_score = np.array(y_score) # for true, pred in zip(y_true, y_pred): # print(true, pred) print("Accuracy ", 100 * (y_true == y_pred).sum() / len(y_true)) # ========= Confusion Matrix =========== cm = sm.confusion_matrix(y_true, y_pred, labels=sorted(classes)) plot_confusion_matrix(cm, classes=sorted(classes)) plt.show() cm_display = sm.ConfusionMatrixDisplay( cm, display_labels=sorted(classes)).plot() plt.show() # ========= Classification Report =========== cp = sm.classification_report(y_true, y_pred, labels=sorted(classes), output_dict=False) print(cp) # ========= PR Curve =========== precision = {} recall = {} thresh = {} for i in classes: precision[i], recall[i], thresh[i] = sm.precision_recall_curve( y_true, y_score, pos_label=i) plt.plot(recall[i], precision[i], lw=2, label=f'{i}') plt.xlabel("recall") plt.ylabel("precision") plt.legend(loc="best") plt.title("precision vs. recall curve") plt.show() print("PR Curve") # for pr, rec, thresh_ in zip(precision["full_lined"], recall["full_lined"], thresh["full_lined"]): # print(pr, rec, thresh_) # ========= ROC Curve =========== fpr = {} tpr = {} thresh = {} roc_auc = {} for i in classes: fpr[i], tpr[i], thresh[i] = sm.roc_curve(y_true, y_score, pos_label=i) roc_auc[i] = sm.auc(fpr[i], tpr[i]) plt.plot(fpr[i], tpr[i], lw=2, label=f'{i} (area = {roc_auc[i]:0.2f})') ns_probs = [0 for _ in range(len(y_true))] ns_fpr, ns_tpr, _ = sm.roc_curve(y_true, ns_probs, pos_label="nolines") plt.plot(ns_fpr, ns_tpr, linestyle='--', label='No Skill') plt.xlabel("false positive rate") plt.ylabel("true positive rate") plt.legend(loc="best") plt.title("ROC curve") plt.show() print("ROC Curve")
def plot_confusion_matrix(self, title, outname, **kwargs): disp = metrics.ConfusionMatrixDisplay(**kwargs) disp.plot() disp.ax_.set_title(title) fname = "/".join((self.outpath, outname)) plt.savefig(fname)
def train_eval(config, exp_path): dataset = MarkerExpressionDataset(config) if dataset.data_clean is not None: with open(os.path.join(exp_path, 'dirty_data.txt'), 'w') as f: f.write('---data clean method: %s---\n' % dataset.data_clean) for marker, item in dataset.outlier_samples.items(): f.write('marker %s:\n' % marker) for class_id in dataset.classes: f.write('class %s:\n' % class_id) for sample_id in item.keys(): if item[sample_id]['class'] == class_id: f.write('\t%s\n' % sample_id) if dataset.feature_selection is not None or dataset.feature_transformation is not None: with open( os.path.join(exp_path, 'feature_selection_and_transformation.txt'), 'w') as f: if dataset.feature_selection is not None: f.write('---feature selection method: %s---\n' % dataset.feature_selection['method']) if 'kwargs' in dataset.feature_selection: f.write('---feature selection kwargs: %s---\n' % str(dataset.feature_selection['kwargs'])) if dataset.feature_transformation is not None: f.write('---feature transformation method: %s---\n' % dataset.feature_transformation['method']) if 'kwargs' in dataset.feature_transformation: f.write('---feature transformation kwargs: %s---\n' % str(dataset.feature_transformation['kwargs'])) for marker in dataset.markers: f.write('marker %s:\n' % marker) if dataset.fs_metric_params is not None: f.write( '---feature selection and transformation kwargs: %s---\n' % str(dataset.fs_metric_params[marker])) if dataset.feature_selection is not None: features = dataset.features feature_index = 0 f.write('---selected features---\n') if dataset.feature_selection['method'] == 'custom': support_flags = dataset.feature_selection['selection'][ marker] else: support_flags = dataset.feature_selector[ marker].get_support() for flag in support_flags: f.write('%s:\t%s\n' % (features[feature_index], flag)) feature_index = (feature_index + 1) % len(features) if dataset.feature_transformation is not None: components = dataset.feature_transformer[ marker].components_ f.write('---feature transformation components---:\n%s' % components.tolist()) # if 'feature_mean' in config: # feature_mean = config['feature_mean'] # coefficients = np.abs(feature_mean*components.sum(axis=0)).\ # reshape([len(dataset.features), -1]).sum(axis=0) # else: # coefficients = np.abs(components.sum(axis=0)).reshape([len(dataset.features), -1]).sum(axis=0) # coefficients = coefficients / coefficients.sum() # # f.write('---feature transformation coefficients---:\n%s' % coefficients.tolist()) threshold = config.get('threshold', 'roc_optimal') metrics_names = ['sensitivity', 'specificity', 'roc_auc_score'] metrics_avg_names = ['roc_auc_score_avg', 'roc_auc_score_avg_std'] fig, ax = plt.subplots(9, len(dataset.markers), squeeze=False, figsize=(6 * len(dataset.markers), 40)) metrics_file = open(os.path.join(exp_path, 'metrics.txt'), 'w') metrics_fig_filename = os.path.join(exp_path, 'conf_mat.png') best_params = dict() all_marker_train_metrics = [] all_marker_test_metrics = [] for i, marker in enumerate(dataset.markers): model = get_model(config) if 'model_kwargs_search' in config: # parameter search print('parameter search for marker %s...' % marker) all_x, all_y, cv_index = dataset.get_all_data(marker) best_model = GridSearchCV(model, param_grid=config['model_kwargs_search'], cv=cv_index, scoring='roc_auc_ovr') best_model.fit(all_x, all_y) best_params[marker] = best_model.best_params_ print('search done') else: best_model = model best_params[marker] = config['model_kwargs'] # run train and test train_xs = [] train_ys = [] train_ys_score = [] test_xs = [] test_ys = [] test_ys_score = [] for fold_i, (train_x, train_y, test_x, test_y) in enumerate(dataset.get_split_data(marker)): model = base.clone(model) model.set_params(**best_params[marker]) model.fit(train_x, train_y) # model.classes_ = dataset.classes train_xs += train_x train_ys += train_y test_xs += test_x test_ys += test_y train_y_score = model.predict_proba(train_x).tolist() train_ys_score += train_y_score test_y_score = model.predict_proba(test_x).tolist() test_ys_score += test_y_score # model_filename = os.path.join(exp_path, 'model', '%s_%s_fold_%d.pkl' # % (config['model'], marker, fold_i)) # maybe_create_path(os.path.dirname(model_filename)) # with open(model_filename, 'wb') as f: # pickle.dump(model, f) train_metrics = eval_results(train_ys, train_ys_score, labels=dataset.classes, average='macro', threshold=threshold, num_fold=dataset.num_fold) test_metrics = eval_results(test_ys, test_ys_score, labels=dataset.classes, average='macro', threshold=train_metrics['used_threshold'], num_fold=dataset.num_fold) all_marker_train_metrics.append(train_metrics) all_marker_test_metrics.append(test_metrics) # print metrics to console and file double_print('marker: %s' % marker, metrics_file) double_print('metrics on training set:', metrics_file) for j, class_j in enumerate(dataset.classes): log_str = '[class: %s. threshold: %1.1f] ' % ( class_j, 100 * train_metrics['used_threshold'][j]) for metrics_name in metrics_names: log_str += '%s: %1.1f. ' % (metrics_name, train_metrics[metrics_name][j]) double_print(log_str, metrics_file) for metrics_name in metrics_avg_names: double_print( '%s: %1.1f' % (metrics_name, train_metrics[metrics_name]), metrics_file) double_print('metrics on test set:', metrics_file) for j, class_j in enumerate(dataset.classes): log_str = '[class: %s. threshold: %1.1f] ' % ( class_j, 100 * test_metrics['used_threshold'][j]) for metrics_name in metrics_names: log_str += '%s: %1.1f. ' % (metrics_name, test_metrics[metrics_name][j]) double_print(log_str, metrics_file) for metrics_name in metrics_avg_names: double_print( '%s: %1.1f' % (metrics_name, test_metrics[metrics_name]), metrics_file) # generate figure current_ax = ax[0, i] dataset.plot_data_clean_distribution(current_ax, marker) current_ax.set_title('data cleaning on marker %s' % marker) current_ax = ax[1, i] contour_flag = len(train_xs[0]) == 2 # dup_reduced = list(tuple(tuple([train_xs[j] + [train_ys[j]] for j in range(len(train_xs))]))) # dup_reduced_train_xs = [item[:-1] for item in dup_reduced] # dup_reduced_train_ys = [item[-1] for item in dup_reduced] # dup_reduced_train_ys_str = [str(item) for item in dup_reduced_train_ys] dup_reduced_train_xs = train_x + test_x dup_reduced_train_ys = train_y + test_y dup_reduced_train_ys_str = [str(item) for item in dup_reduced_train_ys] classes_str = [str(item) for item in dataset.classes] plot_feature_distribution( dup_reduced_train_xs, ax=current_ax, t_sne=True, hue=dup_reduced_train_ys_str, hue_order=classes_str, style=dup_reduced_train_ys_str, style_order=classes_str, # x_lim='box', y_lim='box', x_lim='min_max_extend', y_lim='min_max_extend', contour=contour_flag, z_generator=best_model.predict) current_ax.set_title('%s trained on whole set' % marker) current_ax = ax[2, i] metrics.ConfusionMatrixDisplay( train_metrics['conf_mat'], display_labels=dataset.classes).plot(ax=current_ax) current_ax.set_title('%s on train set of all folds' % marker) current_ax = ax[3, i] for j in range(len(dataset.classes)): roc_curve = train_metrics['roc_curve'][j] roc_auc_score = train_metrics['roc_auc_score'][j] class_id = dataset.classes[j] sen = train_metrics['sensitivity'][j] / 100 spe = train_metrics['specificity'][j] / 100 metrics.RocCurveDisplay(fpr=roc_curve[0], tpr=roc_curve[1], roc_auc=roc_auc_score, estimator_name='class %s' % class_id).plot(ax=current_ax) current_ax.scatter(1 - spe, sen) current_ax = ax[4, i] table_val_list = [ dataset.classes, [100 * item for item in train_metrics['used_threshold']] ] row_labels = ['cls', 'thr'] for metrics_name in metrics_names: table_val_list.append(train_metrics[metrics_name]) row_labels.append(metrics_name[:min(3, len(metrics_name))]) additional_text = [] for metrics_name in metrics_avg_names: additional_text.append('%s: %1.1f' % (metrics_name, train_metrics[metrics_name])) additional_text.append(best_params[marker]) plot_table(table_val_list, row_labels, ax=current_ax, additional_text=additional_text) current_ax = ax[5, i] contour_flag = len(train_xs[0]) == 2 test_y_str = [str(item) for item in test_y] classes_str = [str(item) for item in dataset.classes] plot_feature_distribution( test_x, ax=current_ax, t_sne=True, hue=test_y_str, hue_order=classes_str, style=test_y_str, style_order=classes_str, # x_lim='box', y_lim='box', x_lim='min_max_extend', y_lim='min_max_extend', contour=contour_flag, z_generator=model.predict) current_ax.set_title('%s on test set of the last fold' % marker) current_ax = ax[6, i] metrics.ConfusionMatrixDisplay( test_metrics['conf_mat'], display_labels=dataset.classes).plot(ax=current_ax) current_ax.set_title('%s on test set of all folds' % marker) current_ax = ax[7, i] for j in range(len(dataset.classes)): roc_curve = test_metrics['roc_curve'][j] roc_auc_score = test_metrics['roc_auc_score'][j] class_id = dataset.classes[j] sen = test_metrics['sensitivity'][j] / 100 spe = test_metrics['specificity'][j] / 100 metrics.RocCurveDisplay(fpr=roc_curve[0], tpr=roc_curve[1], roc_auc=roc_auc_score, estimator_name='class %s' % class_id).plot(ax=current_ax) current_ax.scatter(1 - spe, sen) current_ax = ax[8, i] table_val_list = [ dataset.classes, [100 * item for item in test_metrics['used_threshold']] ] row_labels = ['cls', 'thr'] for metrics_name in metrics_names: table_val_list.append(test_metrics[metrics_name]) row_labels.append(metrics_name[:min(3, len(metrics_name))]) additional_text = [] for metrics_name in metrics_avg_names: additional_text.append('%s: %1.1f' % (metrics_name, test_metrics[metrics_name])) plot_table(table_val_list, row_labels, ax=current_ax, additional_text=additional_text) for metrics_name in metrics_avg_names: all_marker_values = [ item[metrics_name] for item in all_marker_train_metrics ] double_print( 'overall train %s: %1.1f' % (metrics_name, sum(all_marker_values) / len(all_marker_values)), metrics_file) for metrics_name in metrics_avg_names: all_marker_values = [ item[metrics_name] for item in all_marker_test_metrics ] double_print( 'overall test %s: %1.1f' % (metrics_name, sum(all_marker_values) / len(all_marker_values)), metrics_file) metrics_file.close() save_yaml(os.path.join(exp_path, 'best_params.yaml'), best_params) fig.savefig(metrics_fig_filename, bbox_inches='tight', pad_inches=1)
import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import plotly.express as px from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier from sklearn import metrics iris = pd.read_csv("Iris.csv") y = iris.Species x = iris.drop(['Species','Id'], axis=1) x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2) clf = DecisionTreeClassifier() clf = clf.fit(x_train, y_train) y_predict = clf.predict(x_test) print("accuracy : ", metrics.accuracy_score(y_test, y_predict)) cm = metrics.confusion_matrix(y_test, y_predict) cm_display = metrics.ConfusionMatrixDisplay(cm, display_labels=cm).plot() plt.show()
def plot_con_matrix(con_matrix): disp = metrics.ConfusionMatrixDisplay(con_matrix, display_labels=["Novel", "Normal"]) disp = disp.plot(cmap="Blues", values_format=".0f") return disp.figure_
def evaluate_architecture(self, with_test = False): """Architecture evaluation utility. Populate this function with evaluation utilities for your neural network. You can use external libraries such as scikit-learn for this if necessary. """ train_x, train_y = self.train_data print("Training Data Shape = ", train_x.shape, train_y.shape) val_x, val_y = self.val_data print("Validation Data Shape = ", val_x.shape, val_y.shape) # Calculate and print accuracies based on model predictions acc1 = analyse(self.fitted_model, train_x, train_y) acc2 = analyse(self.fitted_model, val_x, val_y) #print(train_x, train_y) print("Train Accuracy = ", acc1[0]) auc_score1 = metrics.roc_auc_score(train_y, acc1[2].detach().numpy()) print("Train AUC Score = ", auc_score1) print("Validation Accuracy = ", acc2[0]) auc_score2 = metrics.roc_auc_score(val_y, acc2[2].detach().numpy()) print("Validation AUC Score = ", auc_score2) labels = ['No Claim', 'Claim'] if with_test: test_x, test_y = self.test_data print("Test Data Shape = ", test_x.shape, test_y.shape) acc3 = analyse(self.fitted_model, test_x, test_y.reshape((len(test_y),))) print("Test Accuracy = ", acc3[0]) auc_score3 = metrics.roc_auc_score(test_y, acc3[2].detach().numpy()) print("Test AUC Score = ", auc_score3) f, (ax1, ax2, ax3) = plt.subplots(1, 3) confusion_test = metrics.confusion_matrix(test_y, acc3[1].numpy(), normalize='true') # Plot confusion for test data metrics.ConfusionMatrixDisplay(confusion_test, labels).plot(ax=ax3) ax3.set_title("Test Set", fontsize=17) ax3.set_ylabel("") plot_width = 15 else: f, (ax1, ax2) = plt.subplots(1, 2) plot_width = 10 # Construct training and validation normalised confusion matricies confusion_train = metrics.confusion_matrix(train_y, acc1[1].numpy(), normalize='true') confusion_val = metrics.confusion_matrix(val_y, acc2[1].numpy(), normalize='true') # Plot training and validation set confusion matricies metrics.ConfusionMatrixDisplay(confusion_train, labels).plot(ax=ax1) ax1.set_title("Training Set", fontsize=17) metrics.ConfusionMatrixDisplay(confusion_val, labels).plot(ax=ax2) ax2.set_title("Validation Set", fontsize=17) ax2.set_ylabel("") plt.gcf().set_size_inches(plot_width+1, 5) plt.savefig("confusion_matrix.pdf", bbox_inches='tight') plt.show() return
target_size=(img_width,img_height), batch_size=32, class_mode='categorical', shuffle = True, seed=1234) #Test loss and accuracy on the shuffled test dataset history = source_model.evaluate(test_generator2) #Confusion Matrix pred_labels = source_model.predict(test_generator) pred_labels_num = np.argmax(pred_labels, axis = 1) cm = metrics.confusion_matrix(test_generator.classes, np.argmax(pred_labels, axis = 1)) #metrics.ConfusionMatrixDisplay(cm, display_labels = [0,1,2,3,4,5,6,7,8,9]).plot() #With label names: metrics.ConfusionMatrixDisplay(cm, display_labels = test_generator.class_indices).plot() plt.show() #Sample image predictions ROWS = 3 COLUMNS = 10 ix = 1 for i in range(ROWS): for j in range(COLUMNS): # specify subplot and turn of axis idx = np.random.choice(len(test_generator[4*j][0])) img = test_generator[4*j][0][idx] ax = plt.subplot(ROWS, COLUMNS, ix) ax.set_xticks([])
def tda_intensity_classifier(subj_dir, space, PC, labels, i_band): """ Pipeline of a Topological Classifier :param subj_dir: Directory of the subject where we will save the accuracies and Confusion Matrix :param space: If electrode space or font space :param PC: Point Cloud we will classify :param labels: labels of the points :param i_band: frequancy band :return: test size,random selections matrix, accuracy of dimension 0 silhouettes """ #We define the dimensions and the feature vectors we will use, as well as the frequency band, and define the number of times we will repeat the classification dimensions = ["zero", "one"] n_dim = len(dimensions) feat_vect = [ DimensionLandScape(), DimensionSilhouette(), TopologicalDescriptors() ] feat_vect_names = [ 'Landscapes', 'Silhouettes', 'Descriptors', 'Bottleneck' ] n_vectors = len(feat_vect) n_rep = 10 band_dic = {-1: 'noFilter', 0: 'alpha', 1: 'beta', 2: 'gamma'} band = band_dic[i_band] #Initiialize matrices where we will save several information (accuracies distribution, confusion matrix, random predictions matrix) rand_n = np.zeros((n_rep, n_vectors + 1, n_dim)) test_size = np.zeros(n_rep) topo_perf = np.zeros([n_dim, n_vectors + 1, n_rep]) knn_perf = np.zeros(n_rep) knn_conf_matrix = np.zeros((n_rep, 3, 3)) #Initialize 1 Nearest Neighbor classifier clf = sklnn.KNeighborsClassifier(n_neighbors=1, algorithm='brute', metric='correlation') if not os.path.exists(subj_dir + space + '/1nn_clf'): print("create directory(plot):", subj_dir + space + '/1nn_clf') os.makedirs(subj_dir + space + '/1nn_clf') #perf_shuf = np.zeros([n_dim,n_vectors+1,n_rep]) topo_conf_matrix = np.zeros([n_dim, n_vectors + 1, n_rep, 3, 3]) if not os.path.exists(subj_dir + space + '/topological_clf'): print("create directory(plot):", subj_dir + space + '/topological_clf') os.makedirs(subj_dir + space + '/topological_clf') t_int = time.time() #We lool which motivational state has less points trials_per_m = min((labels == 0).sum(), (labels == 1).sum(), (labels == 2).sum()) if trials_per_m == 0: #If there is a motivational state without a point we will not classify np.save( subj_dir + space + '/topological_clf/' + band + 'perf_intensity.npy', topo_perf) np.save(subj_dir + space + '/1nn_clf/' + band + 'perf_intensity.npy', knn_perf) return -1, np.zeros((n_vectors + 1, n_dim)), -1 #We balabce the dataset by downsampling #We begin the classificatino cv_schem = skms.StratifiedShuffleSplit(n_splits=1, test_size=0.2) for i_rep in range(n_rep): X_m0_dwnsamp = PC[labels == 0][np.random.choice( len(PC[labels == 0]), trials_per_m)] X_m1_dwnsamp = PC[labels == 1][np.random.choice( len(PC[labels == 1]), trials_per_m)] X_m2_dwnsamp = PC[labels == 2][np.random.choice( len(PC[labels == 2]), trials_per_m)] PC_dwnsamp = np.concatenate((X_m0_dwnsamp, X_m1_dwnsamp, X_m2_dwnsamp), axis=0) labels_dwnsamp = np.concatenate( (np.zeros(trials_per_m), np.ones(trials_per_m), np.ones(trials_per_m) * 2)) X_motiv = [] tda_vect = { 0: defaultdict(lambda: defaultdict(lambda: [])), 1: defaultdict(lambda: defaultdict(lambda: [])), 2: defaultdict(lambda: defaultdict(lambda: [])) } for ind_train, ind_test in cv_schem.split(PC_dwnsamp, labels_dwnsamp): #Save test size, define X_train and y_train and initialize prediction matrix test_size[i_rep] = len(ind_test) X_train = PC_dwnsamp[ind_train] y_train = labels_dwnsamp[ind_train] #1nn knn_pred = np.zeros(len(ind_train)) clf.fit(X_train, y_train) knn_pred = clf.predict(PC_dwnsamp[ind_test]) knn_perf[i_rep] = skm.accuracy_score(knn_pred, labels_dwnsamp[ind_test]) knn_conf_matrix[i_rep, :, :] += skm.confusion_matrix( y_true=labels_dwnsamp[ind_test], y_pred=knn_pred, normalize='true') #topological classifier topo_pred = np.zeros(len(ind_train)) topo_pred_array = np.zeros( (len(ind_test), n_vectors + 1, n_dim, 3)) #For each motivational state we compute Persistence Diagrams for i_motiv in range(3): X_motiv.append(X_train[y_train == i_motiv]) n_coor = X_motiv[i_motiv].shape[0] matrix = np.zeros((n_coor, n_coor)) row, col = np.triu_indices(n_coor, 1) distancies = pdist(X_motiv[i_motiv]) matrix[row, col] = distancies matrix[col, row] = distancies Rips_complex_sample = gd.RipsComplex( distance_matrix=matrix) #,max_edge_length=max_edge) #Rips_complex_sample = gd.AlphaComplex(distance_matrix=matrix)#,max_edge_length=max_edge) Rips_simplex_tree_sample = Rips_complex_sample.create_simplex_tree( max_dimension=2) persistence = Rips_simplex_tree_sample.persistence() dim_list = np.array(list(map(lambda x: x[0], persistence))) point_list = np.array(list(map(lambda x: x[1], persistence))) zero_dim = point_list[np.logical_and( point_list[:, 1] != float('inf'), dim_list == 0)] one_dim = point_list[np.logical_and( point_list[:, 1] != float('inf'), dim_list == 1)] persistence = (zero_dim, one_dim) #For each dimension we compute different topological feature vectors. for i_dim in range(n_dim): dimensionscaler = DimensionDiagramScaler( dimensions=dimensions[i_dim]) dimensionscaler.fit(persistence) dim_persistence = np.array( dimensionscaler.transform(persistence)) for i_vector in range(n_vectors): tda_compt = feat_vect[i_vector] tda_compt.fit([dim_persistence]) tda_vect[i_motiv][i_vector][ i_dim] = tda_compt.transform([dim_persistence]) tda_vect[i_motiv][n_vectors][ i_dim] = dim_persistence #Saving directly the persistence in one dimension (later we will compute Bottleneck distance) #We normalize the descriptor Vector descriptors0 = np.concatenate( (tda_vect[0][2][0], tda_vect[1][2][0], tda_vect[2][2][0]), axis=0) descriptors1 = np.concatenate( (tda_vect[0][2][1], tda_vect[1][2][1], tda_vect[2][2][1]), axis=0) max0 = descriptors0.max(axis=0) max1 = descriptors1.max(axis=0) min0 = descriptors0.min(axis=0) min1 = descriptors1.min(axis=0) descriptors0 = (descriptors0 - min0) / (max0 - min0) descriptors1 = (descriptors1 - min1) / (max1 - min1) maxs = [max0, max1] mins = [min0, min1] for m in range(3): tda_vect[m][2][0] = descriptors0[m] tda_vect[m][2][1] = descriptors1[m] #For each point of the test set we add this point to all three Point Clouds i = 0 for index in ind_test: for i_motiv in range(3): X_temp = np.concatenate( (X_motiv[i_motiv], PC[index].reshape(1, -1)), axis=0) n_coor = X_temp.shape[0] matrix = np.zeros((n_coor, n_coor)) row, col = np.triu_indices(n_coor, 1) distancies = pdist(X_temp) matrix[row, col] = distancies matrix[col, row] = distancies Rips_complex_sample = gd.RipsComplex( distance_matrix=matrix) #,max_edge_length=max_edge) #Rips_complex_sample = gd.AlphaComplex(distance_matrix=matrix)#,max_edge_length=max_edge) Rips_simplex_tree_sample = Rips_complex_sample.create_simplex_tree( max_dimension=2) persistence = Rips_simplex_tree_sample.persistence() dim_list = np.array(list(map(lambda x: x[0], persistence))) point_list = np.array( list(map(lambda x: x[1], persistence))) zero_dim = point_list[np.logical_and( point_list[:, 1] != float('inf'), dim_list == 0)] one_dim = point_list[np.logical_and( point_list[:, 1] != float('inf'), dim_list == 1)] persistence = (zero_dim, one_dim) #For each dimension and feature vector we compute the euclidean norm to assign a distance on how much the topology has changed for i_dim in range(n_dim): dimensionscaler = DimensionDiagramScaler( dimensions=dimensions[i_dim]) dimensionscaler.fit(persistence) dimensional_persistence = np.array( dimensionscaler.transform(persistence)) for i_vector in range(n_vectors - 1): tda_compt = feat_vect[i_vector] tda_compt.fit([dimensional_persistence]) topo_pred_array[ i, i_vector, i_dim, i_motiv] = np.linalg.norm( tda_compt.transform( [dimensional_persistence]) - tda_vect[i_motiv][i_vector][i_dim]) tda_compt = feat_vect[n_vectors - 1] tda_compt.fit([dimensional_persistence]) topo_pred_array[ i, n_vectors - 1, i_dim, i_motiv] = np.linalg.norm( ((tda_compt.transform( [dimensional_persistence]) - mins[i_dim]) / (maxs[i_dim] - mins[i_dim])) - tda_vect[i_motiv][n_vectors - 1][i_dim]) topo_pred_array[i, n_vectors, i_dim, i_motiv] = gd.bottleneck_distance( dimensional_persistence, tda_vect[i_motiv][n_vectors] [i_dim], 0.01) i = i + 1 #We predict and compute accuracy and confusion martix for i_vector in range(n_vectors + 1): for i_dim in range(n_dim): topo_pred, rand_n[i_rep, i_vector, i_dim] = topological_clf( topo_pred_array[:, i_vector, i_dim, :]) topo_perf[i_dim, i_vector, i_rep] = skm.accuracy_score( topo_pred, labels_dwnsamp[ind_test]) topo_conf_matrix[i_dim, i_vector, i_rep, :, :] += skm.confusion_matrix( y_true=labels_dwnsamp[ind_test], y_pred=topo_pred, normalize='true') print((time.time() - t_int) / 60, 'minuts for classification') #We plot accuracies and confusion matrices for 1nn np.save(subj_dir + space + '/1nn_clf/' + band + 'perf_intensity.npy', knn_perf) np.save( subj_dir + space + '/1nn_clf/' + band + 'conf_matrix_intensity.npy', knn_conf_matrix) fmt_grph = 'png' cmapcolours = ['Blues', 'Greens', 'Oranges', 'Reds'] plt.rcParams['xtick.labelsize'] = 16 plt.rcParams['ytick.labelsize'] = 8 plt.figure(figsize=[16, 9]) plt.violinplot(knn_perf) chance_level = np.max(np.unique(labels, return_counts=True)[1]) / labels.size #plt.plot([-1,2],[chance_level]*2,'--k') plt.ylabel('accuracy ' + band, fontsize=8) plt.title(band + ' 1nn classification') plt.yticks([0, 0.2, 0.4, 0.6, 0.8, 1]) plt.savefig(subj_dir + space + '/1nn_clf/1nn_accuracies_intensity_' + band + '.png', format=fmt_grph) plt.close() plt.rcParams['xtick.labelsize'] = 24 plt.rcParams['ytick.labelsize'] = 24 plt.rcParams.update({'font.size': 24}) plt.figure(figsize=[16, 9]) disp = skm.ConfusionMatrixDisplay(knn_conf_matrix[:, :, :].mean(0), display_labels=['M0', 'M1', 'M2']) disp.plot(include_values=True, cmap=cmapcolours[i_band], colorbar=True) plt.xlabel('true label', fontsize=12) plt.ylabel('predicted label', fontsize=12) plt.title('Confusion Matix for band ' + band + ' and a 1NN classifier', fontsize=18) plt.savefig(subj_dir + space + '/1nn_clf/1nn_confusion_matrix_intensities_' + band + '.png', format=fmt_grph) plt.close() #We plot accuracies and confusion matrices for topological classifiers np.save( subj_dir + space + '/topological_clf/' + band + 'perf_intensity.npy', topo_perf) np.save( subj_dir + space + '/topological_clf/' + band + 'conf_matrix_intensity.npy', topo_conf_matrix) fmt_grph = 'png' cmapcolours = ['Blues', 'Greens', 'Oranges', 'Reds'] plt.rcParams['xtick.labelsize'] = 24 plt.rcParams['ytick.labelsize'] = 20 fig, axes = plt.subplots(nrows=n_dim, ncols=1, figsize=(24, 12)) for i_dim in range(n_dim): # the chance level is defined as the trivial classifier that predicts the label with more occurrences chance_level = np.max( np.unique(labels_dwnsamp, return_counts=True)[1]) / labels_dwnsamp.size axes[i_dim].violinplot(topo_perf[i_dim, 0, :], positions=[-0.2], widths=[0.3]) axes[i_dim].violinplot(topo_perf[i_dim, 1, :], positions=[0.2], widths=[0.3]) axes[i_dim].violinplot(topo_perf[i_dim, 2, :], positions=[0.6], widths=[0.3]) axes[i_dim].violinplot(topo_perf[i_dim, 3, :], positions=[1], widths=[0.3]) axes[i_dim].plot([-1, 2], [chance_level] * 2, '--k') axes[i_dim].axis(xmin=-0.6, xmax=1.4, ymin=0, ymax=1.05) axes[i_dim].set_ylabel('accuracy ' + band, fontsize=16) axes[i_dim].set_title('band ' + band + ' dimension ' + dimensions[i_dim], fontsize=24) fig.suptitle( 'Accuracies for different dimensions and metrics of band ' + band, fontsize=36) plt.setp(axes, xticks=[-0.2, 0.2, 0.6, 1], xticklabels=feat_vect_names, yticks=[0, 0.2, 0.4, 0.6, 0.8, 1]) plt.savefig(subj_dir + space + '/topological_clf/accuracies_intensity_' + band + '.png', format=fmt_grph) plt.close(fig) plt.rcParams['xtick.labelsize'] = 24 plt.rcParams['ytick.labelsize'] = 24 plt.rcParams.update({'font.size': 24}) fig2, axes2 = plt.subplots(nrows=n_dim, ncols=n_vectors + 1, figsize=(60, 30)) for i_vector in range(n_vectors + 1): for i_dim in range(n_dim): disp = skm.ConfusionMatrixDisplay( topo_conf_matrix[i_dim, i_vector, :, :, :].mean(0), display_labels=['M0', 'M1', 'M2']) disp.plot(ax=axes2[i_dim][i_vector], include_values=True, cmap=cmapcolours[i_band], colorbar=True) axes2[i_dim][i_vector].set_xlabel('true label', fontsize=24) axes2[i_dim][i_vector].set_ylabel('predicted label', fontsize=24) axes2[i_dim][i_vector].set_title('band ' + band + ' dimension ' + dimensions[i_dim] + ' w/ ' + feat_vect_names[i_vector], fontsize=36) fig2.suptitle( 'Confusion Matrices for different dimensions and feature vectors of band ' + band, fontsize=48) plt.subplots_adjust(top=0.65) plt.setp(axes, xticks=[0, 1, 2], yticks=[0, 1, 2]) #fig2.tight_layout(pad=0.5) plt.savefig(subj_dir + space + '/topological_clf/confusion_matrix_intensities_' + band + '.png', format=fmt_grph) plt.close(fig2) return test_size.mean(), rand_n.mean(axis=0), topo_perf[0, 1, :].mean()
#list = [1, 3, 5, 6, 7, 8] #print([x.columns[i] for i in list]) #x = x.to_numpy(dtype=float) #test.evaluate_input3(x, y.to_numpy(dtype=float)) #train_data, test_data = test.separate_data(x, y) #test.fit(train_data[0], train_data[1], True) predictions_test = test.predict(pd.DataFrame(test_data[0])) confusion_test = metrics.confusion_matrix(test.test_data[1], predictions_test, normalize='true') labels = ['No Claim', 'Claim'] metrics.ConfusionMatrixDisplay(confusion_test, labels).plot() test.evaluate_architecture(True) #test.evaluate_architecture() """ #test.evaluate_input3(x, y) x_clean = test._preprocessor(x) #print(x_clean.shape) #test.fit(x, y) test.evaluate_input3(x, y) data_set = np.genfromtxt("part2_training_data.csv", dtype=float, delimiter=',', skip_header=1) num_att = len(data_set[0]) # number of parameters claims = np.array(data_set[:, (num_att - 1)], dtype=np.float32)