Ejemplo n.º 1
0
def plot_relative_color_confusion_matrix(
    X,
    ax,
    display_labels=None,
    plt_kwargs={},
    label_relative=False,
    fix_colorbar=True,
):

    # Rows of X are true class, so sum of rows is number in the true class.
    # Normalize to these counts.
    X_n = np.einsum('ij,i->ij', X, 1. / X.sum(axis=1))

    cm = metrics.ConfusionMatrixDisplay(
        X_n,
        display_labels=
        display_labels  # if display_labels is not None else np.arange(X.shape[0], dtype=int),
    )
    cm.plot(ax=ax, **plt_kwargs)

    # Change labels to the absolute counts
    if not label_relative:
        for I, t in np.ndenumerate(cm.text_):
            t.set_text(f"{X[I]:d}")

    # Fix the colorbar limits so the last tick draws
    if fix_colorbar:
        ax.images[-1].set_clim(0, 1)
Ejemplo n.º 2
0
def plot_results(blob, roc_ax, cm_ax):
    confusion_matrix = blob["confusion_matrix"]
    roc_b = blob["roc_b"]
    roc_a_base = blob["roc_a_base"]
    area_under_curve = blob["area_under_curve"]

    mean_roc_b = roc_b.mean(0)
    std_roc_b = roc_b.std(0)

    mean_area_under_curve = np.mean(area_under_curve)

    # avoid going out of bounds
    upper_bound = np.minimum(mean_roc_b + std_roc_b, 1)
    lower_bound = mean_roc_b - std_roc_b

    # plot mean curve
    roc_ax.plot(roc_a_base, mean_roc_b, label="mean auc="+"{0:0.3%}".format(mean_area_under_curve))
    roc_ax.fill_between(roc_a_base, lower_bound, upper_bound, color="grey", 
            alpha=0.3, label=r'$\pm$ 1 std dev')
    roc_ax.set_xlabel("mean false positive rate")
    roc_ax.set_ylabel("mean true positive rate")
    roc_ax.legend(loc=4)

    cmd = metrics.ConfusionMatrixDisplay(confusion_matrix, display_labels=labels())
    cmd.plot(ax=cm_ax)
Ejemplo n.º 3
0
def plot_confusion_matrix(labels, pred_labels):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(1, 1, 1)
    cm = metrics.confusion_matrix(labels, pred_labels)
    cm = metrics.ConfusionMatrixDisplay(cm, range(10))
    cm.plot(values_format='d', cmap='Blues', ax=ax)
    plt.show()
Ejemplo n.º 4
0
def confusion_matrix(
        prediction_report: pd.DataFrame,
        min_iou,
        min_score,
        idx_class_dict: Dict[int, str] = None,
        normalize=False) -> Tuple[np.ndarray, metrics.ConfusionMatrixDisplay]:
    df = prediction_report[prediction_report['IOU'] >= min_iou]
    df = df[df['score'] >= min_score]

    class_idxs = sorted(
        list(
            set(df['true_class_id'].unique().tolist() +
                df['pred_class_id'].unique().tolist())))

    labels = []
    if idx_class_dict is not None:
        for idx in class_idxs:
            if idx == 0:
                labels.append('no_danno')
            else:
                labels.append(idx_class_dict[idx])
    else:
        labels = class_idxs

    cm = metrics.confusion_matrix(df['true_class_id'], df['pred_class_id'])
    if normalize:
        cm = cm / cm.sum()
    cm_display = metrics.ConfusionMatrixDisplay(cm, display_labels=labels)
    return cm, cm_display
def display_metrics(classifier, X_test, Y_test):
    """Display results from running the classifier on testing data

    Parameters
    ----------
    classifier : sklearn classifier object
        Classifier to test

    Returns
    -------
    none
    """
    # Test data accuracy score
    score = classifier.score(X_test, Y_test)
    print('Score:', score)
    print()

    # Test data f1 score
    Y_pred = classifier.predict(X_test)
    f1_score = metrics.f1_score(Y_test, Y_pred)
    print('F1 score:', f1_score)
    print()

    # Test data confusion matrix
    conf_matrix = metrics.confusion_matrix(Y_test, Y_pred)
    matrix_plot = metrics.ConfusionMatrixDisplay(conf_matrix)
    matrix_plot.plot()

    # Precision-Recall curve
    curve = metrics.plot_precision_recall_curve(classifier, X_test, Y_test)
    curve.ax_.set_title('Game winner prediction Precision-Recall curve')
Ejemplo n.º 6
0
        def plot_confusion_matrix():
            import matplotlib

            with matplotlib.rc_context(
                {
                    "font.size": min(10, 50.0 / self.num_classes),
                    "axes.labelsize": 10,
                }
            ):
                sk_metrics.ConfusionMatrixDisplay(
                    confusion_matrix=confusion_matrix,
                    display_labels=self.label_list,
                ).plot(cmap="Blues")
Ejemplo n.º 7
0
def SVM_func(tr_img, te_img, tr_lbl, te_lbl, te_img1, trans):
    k = input("Choice of Kernel: type integers: rbf-1, poly-2, linear-3: \n ")
    if k == '1':
        print('rbf kernel chosen \n')
        kernel = 'rbf'
    elif k == '2':
        print('poly kernel chosen \n')
        kernel = 'poly'
    else:
        print('linear kernel chosen \n')
        kernel = 'linear'
    model = SVC(C=1, kernel=kernel)

    # fitting labels and images for training data
    print("Fitting model")
    model.fit(tr_img, tr_lbl)

    # Training accuracy and creating confusion matrix:
    pred_tr_lbl = model.predict(tr_img)
    print("\nTraining Accuracy = ",
          metrics.accuracy_score(y_true=tr_lbl, y_pred=pred_tr_lbl))
    tr_CM = metrics.confusion_matrix(tr_lbl, pred_tr_lbl)
    tr_CM_disp = metrics.ConfusionMatrixDisplay(tr_CM).plot()

    # Testing accuracy and creating confusion matrix:
    pred_te_lbl = model.predict(te_img)
    print("\nTesting Accuracy = ",
          metrics.accuracy_score(y_true=te_lbl, y_pred=pred_te_lbl))
    te_CM = metrics.confusion_matrix(te_lbl, pred_te_lbl)
    te_CM_disp = metrics.ConfusionMatrixDisplay(te_CM).plot()

    # Plotting predictions:
    fig, axis = plt.subplots(3, 3, figsize=(10, 10))
    for i, a in enumerate(axis.flat):
        a.imshow(te_img1[i], cmap='binary')
        a.set(title=f'Act - {te_lbl[i]}Pred - {pred_te_lbl[i]}')
    plt.savefig(f'{trans}_{kernel}_results.png')
    plt.close()
Ejemplo n.º 8
0
        def plot_confusion_matrix():
            import matplotlib
            import matplotlib.pyplot as plt

            with matplotlib.rc_context({
                    "font.size":
                    min(8, math.ceil(50.0 / self.num_classes)),
                    "axes.labelsize":
                    8,
            }):
                _, ax = plt.subplots(1, 1, figsize=(6.0, 4.0), dpi=175)
                sk_metrics.ConfusionMatrixDisplay(
                    confusion_matrix=confusion_matrix,
                    display_labels=self.label_list,
                ).plot(cmap="Blues", ax=ax)
Ejemplo n.º 9
0
 def getConfusionMatrix(self,
                        y_pred: np.ndarray,
                        y_true: np.ndarray,
                        normalize: str = None,
                        plot: bool = True):
     cm = metrics.confusion_matrix(y_true,
                                   y_pred,
                                   labels=self.classesIdx,
                                   normalize=normalize)
     if plot:
         disp = metrics.ConfusionMatrixDisplay(confusion_matrix=cm,
                                               display_labels=self.classes)
         print("plot takes too long....")
         disp.plot()
     return cm
Ejemplo n.º 10
0
def conf_mat():
    data = pd.read_csv(args.data)
    lab_enc = preprocessing.LabelEncoder()
    lab_enc.fit(CLASSES)
    true_labels = lab_enc.transform(data['true_label'])
    pred_labels = lab_enc.transform(data['pred_label'])
    offset_theta = data['offset_theta']
    offset_phi = data['offset_phi']
    conf_mat = metrics.confusion_matrix(y_true=true_labels,
                                        y_pred=pred_labels,
                                        normalize='true')
    disp = metrics.ConfusionMatrixDisplay(confusion_matrix=conf_mat,
                                          display_labels=CLASSES)
    disp.plot(cmap='Blues')
    plt.show()
Ejemplo n.º 11
0
def generateConfusionMatrix(classifier,
                            y_test,
                            predicted,
                            title=None,
                            labels=None,
                            plot=False):
    '''
    Plots confusion matrix for a classifier
    '''

    conf_matrix = metrics.confusion_matrix(y_test, predicted)
    conf_disp = metrics.ConfusionMatrixDisplay(conf_matrix, labels)
    if plot:
        conf_disp.plot(cmap=plt.cm.Blues)
        plt.show()
    return conf_matrix
Ejemplo n.º 12
0
def show_metrics(
    model_name: str,
    y_true: np.ndarray,
    y_pred: np.ndarray,
    threshold: float = 0.5,
):
    preds = (y_pred > threshold).astype(int)

    # creating a classification report
    cm = metrics.confusion_matrix(y_true, preds)
    cr = metrics.classification_report(y_true, preds, output_dict=True)
    df = pd.DataFrame(cr).transpose()
    df.to_csv(
        os.path.join(
            config.output_dir,
            f"{model_name}_classification_report_{config.data_mode}.csv",
        ),
        index=True,
    )
    print(f"Classification report:\n{df}")

    # ROC details
    fpr, tpr, thresh = metrics.roc_curve(y_true, y_pred)
    roc_details = pd.DataFrame()
    roc_details["fpr"] = fpr
    roc_details["tpr"] = tpr
    roc_details["threshold"] = thresh
    roc_details.to_csv(
        os.path.join(
            config.output_dir,
            f"{model_name}_roc_details_{config.data_mode}.csv",
        ),
        index=False,
    )

    cm_disp = metrics.ConfusionMatrixDisplay(cm, display_labels=[0, 1])
    cm_disp.plot()
    fig = cm_disp.figure_
    fig.savefig(
        os.path.join(
            config.output_dir,
            f"{model_name}_confusion_matrix_{config.data_mode}.png",
        ),
        dpi=200,
    )
    plt.show()
def confusion_matrix(Y, y_pred, model_name):
    matrices = metrics.multilabel_confusion_matrix(Y,
                                                   y_pred,
                                                   labels=[0, 1, 2, 3, 4])
    print(matrices)
    labels = [
        "Snow/Ice", "Mountains/Rocks", "Plants/Forrests", "Stars",
        "Sandy Desert"
    ]
    fig, axs = plt.subplots(2, 3, figsize=(10, 8), constrained_layout=True)
    for i in range(len(labels)):
        ax = axs[0, i] if i <= 2 else axs[1, i - 3]
        disp = metrics.ConfusionMatrixDisplay(confusion_matrix=matrices[i],
                                              display_labels=[False, True])
        disp.plot(ax=ax, values_format='d')
        disp.ax_.set_title(labels[i])
    fig.delaxes(axs[1, 2])
    filename = model_name + "_Confusion_matrix_for_" + labels[i].replace(
        "/", "or") + ".eps"
    plt.savefig(filename, format="eps")
    files.download(filename)
Ejemplo n.º 14
0
def multiConfusionPlot(X_train, X_test, y_train, y_test):
    classifiers = {
        "customLogistic": CustomlogisticRegression(),
        "LogisiticRegression": LogisticRegression(max_iter=1e4),
        "KNearest": KNeighborsClassifier(),
        "Support Vector Classifier": SVC(),
        "MLPClassifier": MLPClassifier(),
    }

    f, axes = plt.subplots(1, 5, figsize=(20, 5), sharey='row')

    for i, (key, classifier) in enumerate(classifiers.items()):
        # if classifier == CustomlogisticRegression():
        #     classifier.fit(X_train,y_train)
        #     y_pred = classifier.predict(X_test)
        # else:
        #     y_pred = classifier.fit(X_train, y_train).predict(X_test)
        classifier.fit(X_train, y_train)
        y_pred = classifier.predict(X_test)
        cf_matrix = metrics.confusion_matrix(y_test, y_pred)
        disp = metrics.ConfusionMatrixDisplay(cf_matrix)
        disp.plot(ax=axes[i], xticks_rotation=45)

        fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred)
        aucScore = metrics.auc(fpr, tpr)

        disp.ax_.set_title(key + ":" + "{:.2e}".format(aucScore))
        disp.im_.colorbar.remove()
        disp.ax_.set_xlabel('')
        if i != 0:
            disp.ax_.set_ylabel('')

    f.text(0.4, 0.1, 'Predicted label', ha='left')
    plt.subplots_adjust(wspace=0.40, hspace=0.1)
    "imBalancedOneHotMinMax"
    "BalancedOneHotMinMax"
    "BalancedCategoricalMinMax"
    f.suptitle("BalancedLabelMinMax")
    f.colorbar(disp.im_, ax=axes)
    plt.show()
Ejemplo n.º 15
0
def plot_confusion_matrix(predictor):

    classifiers = predictor.classifiers

    y_pred = predictor.predict(predictor.train_data)[classifiers]
    y_true = predictor.train_data[classifiers]

    ncol = 2 if len(classifiers) > 1 else 1
    nrow = int(np.ceil(len(classifiers) / ncol))

    for i, classifier in enumerate(classifiers):
        ax = pl.subplot(nrow, ncol, i + 1)
        labels = y_true[classifier].unique()
        cm = metrics.confusion_matrix(y_true[classifier],
                                      y_pred[classifier],
                                      normalize='true',
                                      labels=labels)
        disp = metrics.ConfusionMatrixDisplay(confusion_matrix=cm,
                                              display_labels=labels)
        disp.plot(ax=ax)
        pl.title(classifier)
        pl.gca().grid(False)
Ejemplo n.º 16
0
def plot_confusion_matrix(y_true, y_pred_proba, threshold=0.5, ax=None):
    """混合行列を整形してプロットとして表示する関数

    Args:
        y_true(1-D array-like shape of [n_samples, ]): 2値の目的ラベルの配列(ラベルは0または1)
        y_pred_proba(1-D array-like shape of [n_samples, ]): 陽性(ラベルが1)である確率の予測値の配列
        threshold(float, default=0.5): 陽性と分類する確率の閾値. 陽性(ラベルが1)である確率の予測値がthreshold以上なら1に変換する
        ax (matplotlib axes, default=None): プロットするaxオブジェクト. Noneならば新しいfig, axを作成する

    Returns:
        None
    """
    # 予測確率を2値ラベルに変換する
    y_pred_label = np.where(y_pred_proba >= threshold, 1, 0)

    # 混合行列の算出
    confusion_matrix_ = skm.confusion_matrix(y_true, y_pred_label, labels=[1, 0])

    # 描画の作成
    if ax is None:
        fig, ax = plt.subplots(1, 1)
    else:
        fig = ax.figure

    # 混合行列のディスプレイ作成のためのConfusionMatrixDisplayインスタンス作成
    disp = skm.ConfusionMatrixDisplay(confusion_matrix=confusion_matrix_,
                                      display_labels=[1, 0]
                                      )

    # 描画の作成
    disp.plot(
        include_values=True,
        cmap='Blues',
        ax=ax,
        xticks_rotation='horizontal',
        values_format='d',
    )

    ax.set_title(f'Confusion Matrix: Pos_decision_threshold={threshold}')
Ejemplo n.º 17
0
def generate_confusion_matrix(y_true, y_pred, output):
    """
    Generate a confusion matrix for a dataset.

    Parameters
    ----------
    x_set : pd.DataFrame
        the features set.
    y_set : pd.Series
        the target set.
    output : str
        the output image path

    Returns
    -------
        confusion matric plot

    """
    cm = metrics.confusion_matrix(y_true, y_pred)
    cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=cm).plot()
    cm_display.ax_.set_title('Classifier confusion matrix')
    plt.savefig(output + '_classifier_confusion_matrix.png')
    return
Ejemplo n.º 18
0
    imagesTrain = np.array(imagesTrain)
    imagesTest = np.array(imagesTest)
    labelsTrain = np.array(labelsTrain)
    labelsTest = np.array(labelsTest)

    results = model.fit(imagesTrain,
                        labelsTrain,
                        epochs=epocs,
                        batch_size=batchSize,
                        validation_data=(imagesTest, labelsTest))

    predictions = model.predict(imagesTest)
    matrix = metrics.confusion_matrix(labelsTest, predictions.argmax(axis=1))

    metrics.ConfusionMatrixDisplay(confusion_matrix=matrix,
                                   display_labels=categories).plot()
    plt.savefig("trainlog/imagepic_" + str(sampleSize) + "-resize-" +
                str(int(time.time())) + ".png",
                bbox_inches="tight")
    plt.close()

    saveFile = open(saveFileName, "a")
    saveFile.write(
        str(sampleSize) + "\t1\t" +
        str(results.history["accuracy"][len(results.history["accuracy"]) -
                                        1]) + "\t" +
        str(results.history["val_accuracy"][
            len(results.history["val_accuracy"]) - 1]) + "\n")
    saveFile.close()

    print("")
Ejemplo n.º 19
0
    elapse = (time.time() - start) / 60

    # 3.2. Save the trained model if necessary
    if SAVE_MODEL:
        torch.save(model.state_dict(), SAVE_MODEL)

    # 4.1. Visualize the loss curves
    plt.title(
        f'Training and Validation Losses (time: {elapse:.2f} [min] @ CUDA: {USE_CUDA})'
    )
    loss_array = np.array(loss_list)
    plt.plot(loss_array[:, 0], loss_array[:, 1], label='Training Loss')
    plt.plot(loss_array[:, 0], loss_array[:, 2], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss values')
    plt.xlim(loss_array[0, 0], loss_array[-1, 0])
    plt.grid()
    plt.legend()
    plt.show()

    # 4.2. Visualize the confusion matrix
    predicts = [predict(datum, model) for datum in data_valid.data]
    conf_mat = metrics.confusion_matrix(data_valid.targets, predicts)
    conf_fig = metrics.ConfusionMatrixDisplay(conf_mat)
    conf_fig.plot()

    # 5. Test your image
    print(predict(data_train.data[0], model))  # 5
    with PIL.Image.open('data/cnn_mnist_test.png').convert('L') as image:
        print(predict(image, model))  # 3
Ejemplo n.º 20
0
yhat_classes[yhat > 0.25] = 3
yhat_classes[yhat > 0.5] = 4
yhat_classes[yhat > 1] = 5
yhat_classes = np.reshape(yhat_classes, (361315, -1))

# confusion matrix for all values
matrix_con = metrics.confusion_matrix(y_sim_classes,
                                      yhat_classes,
                                      labels=[0, 1, 2, 3, 4, 5])  #.ravel()
# confusion matrix for values above 1 cm
matrix_con_without_0 = metrics.confusion_matrix(y_sim_classes,
                                                yhat_classes,
                                                labels=[1, 2, 3, 4, 5])

cm_display = metrics.ConfusionMatrixDisplay(matrix_con,
                                            display_labels=[0, 1, 2, 3, 4,
                                                            5]).plot(2)

y_sim_classes_map = np.reshape(y_sim_classes, (569, 635))
plt.figure(14)  #simulation
plt.imshow(y_sim_classes_map, cmap='Greys', vmin=0, vmax=5)
plt.colorbar()
plt.title('y_sim_classes_map')
plt.show()

yhat_classes_map = np.reshape(yhat_classes, (569, 635))
plt.figure(15)  #simulation
plt.imshow(yhat_classes_map, cmap='Greys', vmin=0, vmax=5)
plt.colorbar()
plt.title('yhat_classes_map')
plt.show()
# Using model to predict
yhat = LR.predict(X_test)
yhat_prob = LR.predict_proba(X_test)
print(yhat)
print(yhat_prob)

# Evaluation using Jaccard Index
print('average=None', metrics.jaccard_score(y_test, yhat, average=None))
print('micro', metrics.jaccard_score(y_test, yhat, average='micro'))
print('macro', metrics.jaccard_score(y_test, yhat, average='macro'))
print('weighted', metrics.jaccard_score(y_test, yhat, average='weighted'))

# Evaluation using confusion matrix
cm = metrics.confusion_matrix(y_test, yhat)
disp = metrics.ConfusionMatrixDisplay(
    confusion_matrix=cm,
    display_labels=["Iris-setosa", "Iris-versicolor", "Iris-virginica"])
disp.plot()

# Evaluation using confusion matrix (normalize=true -> return probability over true label (row))
cm_true = metrics.confusion_matrix(y_test, yhat, normalize='true')
disp_true = metrics.ConfusionMatrixDisplay(
    confusion_matrix=cm_true,
    display_labels=["Iris-setosa", "Iris-versicolor", "Iris-virginica"])
disp_true.plot()

# Evaluation using confusion matrix (normalize=pred -> return probability over predicted(col))
cm_pred = metrics.confusion_matrix(y_test, yhat, normalize='pred')
disp_pred = metrics.ConfusionMatrixDisplay(
    confusion_matrix=cm_pred,
    display_labels=["Iris-setosa", "Iris-versicolor", "Iris-virginica"])
Ejemplo n.º 22
0
def main():
    plt.rcParams['figure.dpi'] = 300
    plt.rcParams['font.size'] = 7

    # Classes
    classes = ["dog", "cat", "Null"]
    # classes = ["dog", "cat"]

    # DataFrames
    actual_df = pd.read_csv("example\\actual.csv")
    actual_df = preprocess_df(actual_df)

    detected_df = pd.read_csv("example\\detected.csv")
    detected_df = preprocess_df(detected_df)
    detected_df = remove_overlapping_objects(detected_df)

    # Calculating
    df = calculate_metrics(actual_df,
                           detected_df,
                           prob_thresh=0,
                           iou_thresh=0.0)

    df.to_csv("example\\result_df.csv", index=False)

    # ============ Collect data for sklearn =============
    y_true = []
    y_pred = []
    y_score = []
    for i, row in df[df['a_xmin'] != 'Null'].iterrows():

        true_class = row['a_label']
        y_true.append(true_class)
        pred_class = row['d_label']
        y_pred.append(pred_class)

        prob = row['d_prob']
        if prob == "Null":
            y_score.append(0)
        else:
            y_score.append(float(prob))

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    y_score = np.array(y_score)

    # for true, pred in zip(y_true, y_pred):
    #     print(true, pred)

    print("Accuracy ", 100 * (y_true == y_pred).sum() / len(y_true))

    # ========= Confusion Matrix ===========
    cm = sm.confusion_matrix(y_true, y_pred, labels=sorted(classes))
    plot_confusion_matrix(cm, classes=sorted(classes))
    plt.show()

    cm_display = sm.ConfusionMatrixDisplay(
        cm, display_labels=sorted(classes)).plot()
    plt.show()

    # ========= Classification Report ===========

    cp = sm.classification_report(y_true,
                                  y_pred,
                                  labels=sorted(classes),
                                  output_dict=False)
    print(cp)

    # ========= PR Curve ===========
    precision = {}
    recall = {}
    thresh = {}

    for i in classes:
        precision[i], recall[i], thresh[i] = sm.precision_recall_curve(
            y_true, y_score, pos_label=i)
        plt.plot(recall[i], precision[i], lw=2, label=f'{i}')

    plt.xlabel("recall")
    plt.ylabel("precision")
    plt.legend(loc="best")
    plt.title("precision vs. recall curve")
    plt.show()

    print("PR Curve")

    # for pr, rec, thresh_ in zip(precision["full_lined"], recall["full_lined"], thresh["full_lined"]):
    #     print(pr, rec, thresh_)

    # ========= ROC Curve ===========
    fpr = {}
    tpr = {}
    thresh = {}
    roc_auc = {}

    for i in classes:
        fpr[i], tpr[i], thresh[i] = sm.roc_curve(y_true, y_score, pos_label=i)
        roc_auc[i] = sm.auc(fpr[i], tpr[i])
        plt.plot(fpr[i], tpr[i], lw=2, label=f'{i} (area = {roc_auc[i]:0.2f})')

    ns_probs = [0 for _ in range(len(y_true))]
    ns_fpr, ns_tpr, _ = sm.roc_curve(y_true, ns_probs, pos_label="nolines")
    plt.plot(ns_fpr, ns_tpr, linestyle='--', label='No Skill')

    plt.xlabel("false positive rate")
    plt.ylabel("true positive rate")
    plt.legend(loc="best")
    plt.title("ROC curve")
    plt.show()

    print("ROC Curve")
Ejemplo n.º 23
0
 def plot_confusion_matrix(self, title, outname, **kwargs):
     disp = metrics.ConfusionMatrixDisplay(**kwargs)
     disp.plot()
     disp.ax_.set_title(title)
     fname = "/".join((self.outpath, outname))
     plt.savefig(fname)
Ejemplo n.º 24
0
def train_eval(config, exp_path):
    dataset = MarkerExpressionDataset(config)
    if dataset.data_clean is not None:
        with open(os.path.join(exp_path, 'dirty_data.txt'), 'w') as f:
            f.write('---data clean method: %s---\n' % dataset.data_clean)
            for marker, item in dataset.outlier_samples.items():
                f.write('marker %s:\n' % marker)
                for class_id in dataset.classes:
                    f.write('class %s:\n' % class_id)
                    for sample_id in item.keys():
                        if item[sample_id]['class'] == class_id:
                            f.write('\t%s\n' % sample_id)

    if dataset.feature_selection is not None or dataset.feature_transformation is not None:
        with open(
                os.path.join(exp_path,
                             'feature_selection_and_transformation.txt'),
                'w') as f:
            if dataset.feature_selection is not None:
                f.write('---feature selection method: %s---\n' %
                        dataset.feature_selection['method'])
                if 'kwargs' in dataset.feature_selection:
                    f.write('---feature selection kwargs: %s---\n' %
                            str(dataset.feature_selection['kwargs']))
            if dataset.feature_transformation is not None:
                f.write('---feature transformation method: %s---\n' %
                        dataset.feature_transformation['method'])
                if 'kwargs' in dataset.feature_transformation:
                    f.write('---feature transformation kwargs: %s---\n' %
                            str(dataset.feature_transformation['kwargs']))

            for marker in dataset.markers:
                f.write('marker %s:\n' % marker)
                if dataset.fs_metric_params is not None:
                    f.write(
                        '---feature selection and transformation kwargs: %s---\n'
                        % str(dataset.fs_metric_params[marker]))
                if dataset.feature_selection is not None:
                    features = dataset.features
                    feature_index = 0
                    f.write('---selected features---\n')
                    if dataset.feature_selection['method'] == 'custom':
                        support_flags = dataset.feature_selection['selection'][
                            marker]
                    else:
                        support_flags = dataset.feature_selector[
                            marker].get_support()
                    for flag in support_flags:
                        f.write('%s:\t%s\n' % (features[feature_index], flag))
                        feature_index = (feature_index + 1) % len(features)
                if dataset.feature_transformation is not None:
                    components = dataset.feature_transformer[
                        marker].components_
                    f.write('---feature transformation components---:\n%s' %
                            components.tolist())
                    # if 'feature_mean' in config:
                    #     feature_mean = config['feature_mean']
                    #     coefficients = np.abs(feature_mean*components.sum(axis=0)).\
                    #         reshape([len(dataset.features), -1]).sum(axis=0)
                    # else:
                    #     coefficients = np.abs(components.sum(axis=0)).reshape([len(dataset.features), -1]).sum(axis=0)
                    # coefficients = coefficients / coefficients.sum()
                    #
                    # f.write('---feature transformation coefficients---:\n%s' % coefficients.tolist())

    threshold = config.get('threshold', 'roc_optimal')
    metrics_names = ['sensitivity', 'specificity', 'roc_auc_score']
    metrics_avg_names = ['roc_auc_score_avg', 'roc_auc_score_avg_std']

    fig, ax = plt.subplots(9,
                           len(dataset.markers),
                           squeeze=False,
                           figsize=(6 * len(dataset.markers), 40))
    metrics_file = open(os.path.join(exp_path, 'metrics.txt'), 'w')
    metrics_fig_filename = os.path.join(exp_path, 'conf_mat.png')
    best_params = dict()
    all_marker_train_metrics = []
    all_marker_test_metrics = []
    for i, marker in enumerate(dataset.markers):
        model = get_model(config)
        if 'model_kwargs_search' in config:
            # parameter search
            print('parameter search for marker %s...' % marker)
            all_x, all_y, cv_index = dataset.get_all_data(marker)
            best_model = GridSearchCV(model,
                                      param_grid=config['model_kwargs_search'],
                                      cv=cv_index,
                                      scoring='roc_auc_ovr')
            best_model.fit(all_x, all_y)
            best_params[marker] = best_model.best_params_
            print('search done')
        else:
            best_model = model
            best_params[marker] = config['model_kwargs']

        # run train and test
        train_xs = []
        train_ys = []
        train_ys_score = []
        test_xs = []
        test_ys = []
        test_ys_score = []
        for fold_i, (train_x, train_y, test_x,
                     test_y) in enumerate(dataset.get_split_data(marker)):
            model = base.clone(model)
            model.set_params(**best_params[marker])
            model.fit(train_x, train_y)
            # model.classes_ = dataset.classes
            train_xs += train_x
            train_ys += train_y
            test_xs += test_x
            test_ys += test_y
            train_y_score = model.predict_proba(train_x).tolist()
            train_ys_score += train_y_score
            test_y_score = model.predict_proba(test_x).tolist()
            test_ys_score += test_y_score
            # model_filename = os.path.join(exp_path, 'model', '%s_%s_fold_%d.pkl'
            #                               % (config['model'], marker, fold_i))
            # maybe_create_path(os.path.dirname(model_filename))
            # with open(model_filename, 'wb') as f:
            #     pickle.dump(model, f)

        train_metrics = eval_results(train_ys,
                                     train_ys_score,
                                     labels=dataset.classes,
                                     average='macro',
                                     threshold=threshold,
                                     num_fold=dataset.num_fold)
        test_metrics = eval_results(test_ys,
                                    test_ys_score,
                                    labels=dataset.classes,
                                    average='macro',
                                    threshold=train_metrics['used_threshold'],
                                    num_fold=dataset.num_fold)
        all_marker_train_metrics.append(train_metrics)
        all_marker_test_metrics.append(test_metrics)

        # print metrics to console and file
        double_print('marker: %s' % marker, metrics_file)
        double_print('metrics on training set:', metrics_file)
        for j, class_j in enumerate(dataset.classes):
            log_str = '[class: %s. threshold: %1.1f] ' % (
                class_j, 100 * train_metrics['used_threshold'][j])
            for metrics_name in metrics_names:
                log_str += '%s: %1.1f. ' % (metrics_name,
                                            train_metrics[metrics_name][j])
            double_print(log_str, metrics_file)
        for metrics_name in metrics_avg_names:
            double_print(
                '%s: %1.1f' % (metrics_name, train_metrics[metrics_name]),
                metrics_file)
        double_print('metrics on test set:', metrics_file)
        for j, class_j in enumerate(dataset.classes):
            log_str = '[class: %s. threshold: %1.1f] ' % (
                class_j, 100 * test_metrics['used_threshold'][j])
            for metrics_name in metrics_names:
                log_str += '%s: %1.1f. ' % (metrics_name,
                                            test_metrics[metrics_name][j])
            double_print(log_str, metrics_file)
        for metrics_name in metrics_avg_names:
            double_print(
                '%s: %1.1f' % (metrics_name, test_metrics[metrics_name]),
                metrics_file)

        # generate figure
        current_ax = ax[0, i]
        dataset.plot_data_clean_distribution(current_ax, marker)
        current_ax.set_title('data cleaning on marker %s' % marker)

        current_ax = ax[1, i]
        contour_flag = len(train_xs[0]) == 2
        # dup_reduced = list(tuple(tuple([train_xs[j] + [train_ys[j]] for j in range(len(train_xs))])))
        # dup_reduced_train_xs = [item[:-1] for item in dup_reduced]
        # dup_reduced_train_ys = [item[-1] for item in dup_reduced]
        # dup_reduced_train_ys_str = [str(item) for item in dup_reduced_train_ys]
        dup_reduced_train_xs = train_x + test_x
        dup_reduced_train_ys = train_y + test_y
        dup_reduced_train_ys_str = [str(item) for item in dup_reduced_train_ys]
        classes_str = [str(item) for item in dataset.classes]
        plot_feature_distribution(
            dup_reduced_train_xs,
            ax=current_ax,
            t_sne=True,
            hue=dup_reduced_train_ys_str,
            hue_order=classes_str,
            style=dup_reduced_train_ys_str,
            style_order=classes_str,
            # x_lim='box', y_lim='box',
            x_lim='min_max_extend',
            y_lim='min_max_extend',
            contour=contour_flag,
            z_generator=best_model.predict)
        current_ax.set_title('%s trained on whole set' % marker)

        current_ax = ax[2, i]
        metrics.ConfusionMatrixDisplay(
            train_metrics['conf_mat'],
            display_labels=dataset.classes).plot(ax=current_ax)
        current_ax.set_title('%s on train set of all folds' % marker)

        current_ax = ax[3, i]
        for j in range(len(dataset.classes)):
            roc_curve = train_metrics['roc_curve'][j]
            roc_auc_score = train_metrics['roc_auc_score'][j]
            class_id = dataset.classes[j]
            sen = train_metrics['sensitivity'][j] / 100
            spe = train_metrics['specificity'][j] / 100
            metrics.RocCurveDisplay(fpr=roc_curve[0],
                                    tpr=roc_curve[1],
                                    roc_auc=roc_auc_score,
                                    estimator_name='class %s' %
                                    class_id).plot(ax=current_ax)
            current_ax.scatter(1 - spe, sen)

        current_ax = ax[4, i]
        table_val_list = [
            dataset.classes,
            [100 * item for item in train_metrics['used_threshold']]
        ]
        row_labels = ['cls', 'thr']
        for metrics_name in metrics_names:
            table_val_list.append(train_metrics[metrics_name])
            row_labels.append(metrics_name[:min(3, len(metrics_name))])
        additional_text = []
        for metrics_name in metrics_avg_names:
            additional_text.append('%s: %1.1f' %
                                   (metrics_name, train_metrics[metrics_name]))
        additional_text.append(best_params[marker])
        plot_table(table_val_list,
                   row_labels,
                   ax=current_ax,
                   additional_text=additional_text)

        current_ax = ax[5, i]
        contour_flag = len(train_xs[0]) == 2
        test_y_str = [str(item) for item in test_y]
        classes_str = [str(item) for item in dataset.classes]
        plot_feature_distribution(
            test_x,
            ax=current_ax,
            t_sne=True,
            hue=test_y_str,
            hue_order=classes_str,
            style=test_y_str,
            style_order=classes_str,
            # x_lim='box', y_lim='box',
            x_lim='min_max_extend',
            y_lim='min_max_extend',
            contour=contour_flag,
            z_generator=model.predict)
        current_ax.set_title('%s on test set of the last fold' % marker)

        current_ax = ax[6, i]
        metrics.ConfusionMatrixDisplay(
            test_metrics['conf_mat'],
            display_labels=dataset.classes).plot(ax=current_ax)
        current_ax.set_title('%s on test set of all folds' % marker)

        current_ax = ax[7, i]
        for j in range(len(dataset.classes)):
            roc_curve = test_metrics['roc_curve'][j]
            roc_auc_score = test_metrics['roc_auc_score'][j]
            class_id = dataset.classes[j]
            sen = test_metrics['sensitivity'][j] / 100
            spe = test_metrics['specificity'][j] / 100
            metrics.RocCurveDisplay(fpr=roc_curve[0],
                                    tpr=roc_curve[1],
                                    roc_auc=roc_auc_score,
                                    estimator_name='class %s' %
                                    class_id).plot(ax=current_ax)
            current_ax.scatter(1 - spe, sen)

        current_ax = ax[8, i]
        table_val_list = [
            dataset.classes,
            [100 * item for item in test_metrics['used_threshold']]
        ]
        row_labels = ['cls', 'thr']
        for metrics_name in metrics_names:
            table_val_list.append(test_metrics[metrics_name])
            row_labels.append(metrics_name[:min(3, len(metrics_name))])
        additional_text = []
        for metrics_name in metrics_avg_names:
            additional_text.append('%s: %1.1f' %
                                   (metrics_name, test_metrics[metrics_name]))
        plot_table(table_val_list,
                   row_labels,
                   ax=current_ax,
                   additional_text=additional_text)

    for metrics_name in metrics_avg_names:
        all_marker_values = [
            item[metrics_name] for item in all_marker_train_metrics
        ]
        double_print(
            'overall train %s: %1.1f' %
            (metrics_name, sum(all_marker_values) / len(all_marker_values)),
            metrics_file)
    for metrics_name in metrics_avg_names:
        all_marker_values = [
            item[metrics_name] for item in all_marker_test_metrics
        ]
        double_print(
            'overall test %s: %1.1f' %
            (metrics_name, sum(all_marker_values) / len(all_marker_values)),
            metrics_file)
    metrics_file.close()
    save_yaml(os.path.join(exp_path, 'best_params.yaml'), best_params)
    fig.savefig(metrics_fig_filename, bbox_inches='tight', pad_inches=1)
Ejemplo n.º 25
0
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns 
import plotly.express as px

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics

iris = pd.read_csv("Iris.csv")

y = iris.Species
x = iris.drop(['Species','Id'], axis=1)
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2)

clf = DecisionTreeClassifier()
clf = clf.fit(x_train, y_train)


y_predict = clf.predict(x_test)
print("accuracy : ", metrics.accuracy_score(y_test, y_predict))

cm = metrics.confusion_matrix(y_test, y_predict)
cm_display = metrics.ConfusionMatrixDisplay(cm, display_labels=cm).plot()
plt.show()
Ejemplo n.º 26
0
def plot_con_matrix(con_matrix):
    disp = metrics.ConfusionMatrixDisplay(con_matrix,
                                          display_labels=["Novel", "Normal"])
    disp = disp.plot(cmap="Blues", values_format=".0f")
    return disp.figure_
    def evaluate_architecture(self, with_test = False):
        """Architecture evaluation utility.

        Populate this function with evaluation utilities for your
        neural network.

        You can use external libraries such as scikit-learn for this
        if necessary.
        """
        train_x, train_y = self.train_data
        print("Training Data Shape = ", train_x.shape, train_y.shape)
        val_x, val_y = self.val_data
        print("Validation Data Shape = ", val_x.shape, val_y.shape)

        # Calculate and print accuracies based on model predictions
        acc1 = analyse(self.fitted_model, train_x, train_y)
        acc2 = analyse(self.fitted_model, val_x, val_y)
        #print(train_x, train_y)
        print("Train Accuracy = ", acc1[0])
        auc_score1 = metrics.roc_auc_score(train_y, acc1[2].detach().numpy())
        print("Train AUC Score = ", auc_score1)
        print("Validation Accuracy = ", acc2[0])
        auc_score2 = metrics.roc_auc_score(val_y, acc2[2].detach().numpy())
        print("Validation AUC Score = ", auc_score2)

        labels = ['No Claim', 'Claim']

        if with_test:
            test_x, test_y = self.test_data
            print("Test Data Shape = ", test_x.shape, test_y.shape)
            acc3 = analyse(self.fitted_model, test_x, test_y.reshape((len(test_y),)))
            print("Test Accuracy = ", acc3[0])
            auc_score3 = metrics.roc_auc_score(test_y,
                                              acc3[2].detach().numpy())
            print("Test AUC Score = ", auc_score3)

            f, (ax1, ax2, ax3) = plt.subplots(1, 3)
            confusion_test = metrics.confusion_matrix(test_y, acc3[1].numpy(),
                                                  normalize='true')
            # Plot confusion for test data
            metrics.ConfusionMatrixDisplay(confusion_test, labels).plot(ax=ax3)
            ax3.set_title("Test Set", fontsize=17)
            ax3.set_ylabel("")
            plot_width = 15
        else:
            f, (ax1, ax2) = plt.subplots(1, 2)
            plot_width = 10

        # Construct training and validation normalised confusion matricies
        confusion_train = metrics.confusion_matrix(train_y, acc1[1].numpy(),
                                              normalize='true')
        confusion_val = metrics.confusion_matrix(val_y, acc2[1].numpy(),
                                              normalize='true')

        # Plot training and validation set confusion matricies
        metrics.ConfusionMatrixDisplay(confusion_train, labels).plot(ax=ax1)
        ax1.set_title("Training Set", fontsize=17)
        metrics.ConfusionMatrixDisplay(confusion_val, labels).plot(ax=ax2)
        ax2.set_title("Validation Set", fontsize=17)
        ax2.set_ylabel("")

        plt.gcf().set_size_inches(plot_width+1, 5)
        plt.savefig("confusion_matrix.pdf", bbox_inches='tight')
        plt.show()

        return
            target_size=(img_width,img_height),
            batch_size=32,
            class_mode='categorical',
            shuffle = True, seed=1234)
            
#Test loss and accuracy on the shuffled test dataset            
history = source_model.evaluate(test_generator2)


#Confusion Matrix
pred_labels = source_model.predict(test_generator)
pred_labels_num = np.argmax(pred_labels, axis = 1)
cm = metrics.confusion_matrix(test_generator.classes, np.argmax(pred_labels, axis = 1))
#metrics.ConfusionMatrixDisplay(cm, display_labels = [0,1,2,3,4,5,6,7,8,9]).plot() 
#With label names:
metrics.ConfusionMatrixDisplay(cm, display_labels = test_generator.class_indices).plot() 
plt.show()



#Sample image predictions
ROWS = 3 
COLUMNS = 10  
ix = 1 
for i in range(ROWS): 
    for j in range(COLUMNS): 
        # specify subplot and turn of axis 
        idx = np.random.choice(len(test_generator[4*j][0])) 
        img = test_generator[4*j][0][idx] 
        ax = plt.subplot(ROWS, COLUMNS, ix) 
        ax.set_xticks([]) 
Ejemplo n.º 29
0
def tda_intensity_classifier(subj_dir, space, PC, labels, i_band):
    """
    Pipeline of a Topological Classifier
    
    :param subj_dir: Directory of the subject where we will save the accuracies and Confusion Matrix
    :param space: If electrode space or font space
    :param PC: Point Cloud we will classify
    :param labels: labels of the points
    :param i_band: frequancy band
    :return: test size,random selections matrix, accuracy of dimension 0 silhouettes
    """

    #We define the dimensions and the feature vectors we will use, as well as the frequency band, and define the number of times we will repeat the classification
    dimensions = ["zero", "one"]
    n_dim = len(dimensions)
    feat_vect = [
        DimensionLandScape(),
        DimensionSilhouette(),
        TopologicalDescriptors()
    ]
    feat_vect_names = [
        'Landscapes', 'Silhouettes', 'Descriptors', 'Bottleneck'
    ]
    n_vectors = len(feat_vect)
    n_rep = 10
    band_dic = {-1: 'noFilter', 0: 'alpha', 1: 'beta', 2: 'gamma'}
    band = band_dic[i_band]
    #Initiialize matrices where we will save several information (accuracies distribution, confusion matrix, random predictions matrix)
    rand_n = np.zeros((n_rep, n_vectors + 1, n_dim))
    test_size = np.zeros(n_rep)
    topo_perf = np.zeros([n_dim, n_vectors + 1, n_rep])

    knn_perf = np.zeros(n_rep)
    knn_conf_matrix = np.zeros((n_rep, 3, 3))
    #Initialize 1 Nearest Neighbor classifier
    clf = sklnn.KNeighborsClassifier(n_neighbors=1,
                                     algorithm='brute',
                                     metric='correlation')

    if not os.path.exists(subj_dir + space + '/1nn_clf'):
        print("create directory(plot):", subj_dir + space + '/1nn_clf')
        os.makedirs(subj_dir + space + '/1nn_clf')
    #perf_shuf = np.zeros([n_dim,n_vectors+1,n_rep])
    topo_conf_matrix = np.zeros([n_dim, n_vectors + 1, n_rep, 3, 3])

    if not os.path.exists(subj_dir + space + '/topological_clf'):
        print("create directory(plot):", subj_dir + space + '/topological_clf')
        os.makedirs(subj_dir + space + '/topological_clf')

    t_int = time.time()
    #We lool which motivational state has less points
    trials_per_m = min((labels == 0).sum(), (labels == 1).sum(),
                       (labels == 2).sum())
    if trials_per_m == 0:  #If there is a motivational state without a point we will not classify
        np.save(
            subj_dir + space + '/topological_clf/' + band +
            'perf_intensity.npy', topo_perf)
        np.save(subj_dir + space + '/1nn_clf/' + band + 'perf_intensity.npy',
                knn_perf)
        return -1, np.zeros((n_vectors + 1, n_dim)), -1

    #We balabce the dataset by downsampling

    #We begin the classificatino
    cv_schem = skms.StratifiedShuffleSplit(n_splits=1, test_size=0.2)
    for i_rep in range(n_rep):
        X_m0_dwnsamp = PC[labels == 0][np.random.choice(
            len(PC[labels == 0]), trials_per_m)]
        X_m1_dwnsamp = PC[labels == 1][np.random.choice(
            len(PC[labels == 1]), trials_per_m)]
        X_m2_dwnsamp = PC[labels == 2][np.random.choice(
            len(PC[labels == 2]), trials_per_m)]
        PC_dwnsamp = np.concatenate((X_m0_dwnsamp, X_m1_dwnsamp, X_m2_dwnsamp),
                                    axis=0)
        labels_dwnsamp = np.concatenate(
            (np.zeros(trials_per_m), np.ones(trials_per_m),
             np.ones(trials_per_m) * 2))
        X_motiv = []
        tda_vect = {
            0: defaultdict(lambda: defaultdict(lambda: [])),
            1: defaultdict(lambda: defaultdict(lambda: [])),
            2: defaultdict(lambda: defaultdict(lambda: []))
        }
        for ind_train, ind_test in cv_schem.split(PC_dwnsamp, labels_dwnsamp):
            #Save test size, define X_train and y_train and initialize prediction matrix
            test_size[i_rep] = len(ind_test)
            X_train = PC_dwnsamp[ind_train]
            y_train = labels_dwnsamp[ind_train]
            #1nn
            knn_pred = np.zeros(len(ind_train))
            clf.fit(X_train, y_train)
            knn_pred = clf.predict(PC_dwnsamp[ind_test])
            knn_perf[i_rep] = skm.accuracy_score(knn_pred,
                                                 labels_dwnsamp[ind_test])
            knn_conf_matrix[i_rep, :, :] += skm.confusion_matrix(
                y_true=labels_dwnsamp[ind_test],
                y_pred=knn_pred,
                normalize='true')
            #topological classifier
            topo_pred = np.zeros(len(ind_train))
            topo_pred_array = np.zeros(
                (len(ind_test), n_vectors + 1, n_dim, 3))
            #For each motivational state we compute Persistence Diagrams
            for i_motiv in range(3):
                X_motiv.append(X_train[y_train == i_motiv])
                n_coor = X_motiv[i_motiv].shape[0]
                matrix = np.zeros((n_coor, n_coor))
                row, col = np.triu_indices(n_coor, 1)
                distancies = pdist(X_motiv[i_motiv])
                matrix[row, col] = distancies
                matrix[col, row] = distancies

                Rips_complex_sample = gd.RipsComplex(
                    distance_matrix=matrix)  #,max_edge_length=max_edge)
                #Rips_complex_sample = gd.AlphaComplex(distance_matrix=matrix)#,max_edge_length=max_edge)
                Rips_simplex_tree_sample = Rips_complex_sample.create_simplex_tree(
                    max_dimension=2)
                persistence = Rips_simplex_tree_sample.persistence()
                dim_list = np.array(list(map(lambda x: x[0], persistence)))
                point_list = np.array(list(map(lambda x: x[1], persistence)))
                zero_dim = point_list[np.logical_and(
                    point_list[:, 1] != float('inf'), dim_list == 0)]
                one_dim = point_list[np.logical_and(
                    point_list[:, 1] != float('inf'), dim_list == 1)]
                persistence = (zero_dim, one_dim)
                #For each dimension we compute different topological feature vectors.
                for i_dim in range(n_dim):
                    dimensionscaler = DimensionDiagramScaler(
                        dimensions=dimensions[i_dim])
                    dimensionscaler.fit(persistence)
                    dim_persistence = np.array(
                        dimensionscaler.transform(persistence))
                    for i_vector in range(n_vectors):
                        tda_compt = feat_vect[i_vector]
                        tda_compt.fit([dim_persistence])
                        tda_vect[i_motiv][i_vector][
                            i_dim] = tda_compt.transform([dim_persistence])
                    tda_vect[i_motiv][n_vectors][
                        i_dim] = dim_persistence  #Saving directly the persistence in one dimension (later we will compute Bottleneck distance)

            #We normalize the descriptor Vector
            descriptors0 = np.concatenate(
                (tda_vect[0][2][0], tda_vect[1][2][0], tda_vect[2][2][0]),
                axis=0)
            descriptors1 = np.concatenate(
                (tda_vect[0][2][1], tda_vect[1][2][1], tda_vect[2][2][1]),
                axis=0)
            max0 = descriptors0.max(axis=0)
            max1 = descriptors1.max(axis=0)
            min0 = descriptors0.min(axis=0)
            min1 = descriptors1.min(axis=0)
            descriptors0 = (descriptors0 - min0) / (max0 - min0)
            descriptors1 = (descriptors1 - min1) / (max1 - min1)
            maxs = [max0, max1]
            mins = [min0, min1]
            for m in range(3):
                tda_vect[m][2][0] = descriptors0[m]
                tda_vect[m][2][1] = descriptors1[m]
            #For each point of the test set we add this point to all three Point Clouds
            i = 0
            for index in ind_test:
                for i_motiv in range(3):
                    X_temp = np.concatenate(
                        (X_motiv[i_motiv], PC[index].reshape(1, -1)), axis=0)
                    n_coor = X_temp.shape[0]
                    matrix = np.zeros((n_coor, n_coor))
                    row, col = np.triu_indices(n_coor, 1)
                    distancies = pdist(X_temp)
                    matrix[row, col] = distancies
                    matrix[col, row] = distancies

                    Rips_complex_sample = gd.RipsComplex(
                        distance_matrix=matrix)  #,max_edge_length=max_edge)
                    #Rips_complex_sample = gd.AlphaComplex(distance_matrix=matrix)#,max_edge_length=max_edge)
                    Rips_simplex_tree_sample = Rips_complex_sample.create_simplex_tree(
                        max_dimension=2)
                    persistence = Rips_simplex_tree_sample.persistence()
                    dim_list = np.array(list(map(lambda x: x[0], persistence)))
                    point_list = np.array(
                        list(map(lambda x: x[1], persistence)))
                    zero_dim = point_list[np.logical_and(
                        point_list[:, 1] != float('inf'), dim_list == 0)]
                    one_dim = point_list[np.logical_and(
                        point_list[:, 1] != float('inf'), dim_list == 1)]
                    persistence = (zero_dim, one_dim)
                    #For each dimension and feature vector we compute the euclidean norm to assign a distance on how much the topology has changed
                    for i_dim in range(n_dim):
                        dimensionscaler = DimensionDiagramScaler(
                            dimensions=dimensions[i_dim])
                        dimensionscaler.fit(persistence)
                        dimensional_persistence = np.array(
                            dimensionscaler.transform(persistence))
                        for i_vector in range(n_vectors - 1):
                            tda_compt = feat_vect[i_vector]
                            tda_compt.fit([dimensional_persistence])

                            topo_pred_array[
                                i, i_vector, i_dim, i_motiv] = np.linalg.norm(
                                    tda_compt.transform(
                                        [dimensional_persistence]) -
                                    tda_vect[i_motiv][i_vector][i_dim])

                        tda_compt = feat_vect[n_vectors - 1]
                        tda_compt.fit([dimensional_persistence])

                        topo_pred_array[
                            i, n_vectors - 1, i_dim, i_motiv] = np.linalg.norm(
                                ((tda_compt.transform(
                                    [dimensional_persistence]) - mins[i_dim]) /
                                 (maxs[i_dim] - mins[i_dim])) -
                                tda_vect[i_motiv][n_vectors - 1][i_dim])

                        topo_pred_array[i, n_vectors, i_dim,
                                        i_motiv] = gd.bottleneck_distance(
                                            dimensional_persistence,
                                            tda_vect[i_motiv][n_vectors]
                                            [i_dim], 0.01)
                i = i + 1

            #We predict and compute accuracy and confusion martix
            for i_vector in range(n_vectors + 1):
                for i_dim in range(n_dim):
                    topo_pred, rand_n[i_rep, i_vector,
                                      i_dim] = topological_clf(
                                          topo_pred_array[:, i_vector,
                                                          i_dim, :])

                    topo_perf[i_dim, i_vector, i_rep] = skm.accuracy_score(
                        topo_pred, labels_dwnsamp[ind_test])
                    topo_conf_matrix[i_dim, i_vector,
                                     i_rep, :, :] += skm.confusion_matrix(
                                         y_true=labels_dwnsamp[ind_test],
                                         y_pred=topo_pred,
                                         normalize='true')
    print((time.time() - t_int) / 60, 'minuts for classification')

    #We plot accuracies and confusion matrices for 1nn
    np.save(subj_dir + space + '/1nn_clf/' + band + 'perf_intensity.npy',
            knn_perf)
    np.save(
        subj_dir + space + '/1nn_clf/' + band + 'conf_matrix_intensity.npy',
        knn_conf_matrix)
    fmt_grph = 'png'
    cmapcolours = ['Blues', 'Greens', 'Oranges', 'Reds']
    plt.rcParams['xtick.labelsize'] = 16
    plt.rcParams['ytick.labelsize'] = 8
    plt.figure(figsize=[16, 9])

    plt.violinplot(knn_perf)
    chance_level = np.max(np.unique(labels,
                                    return_counts=True)[1]) / labels.size
    #plt.plot([-1,2],[chance_level]*2,'--k')

    plt.ylabel('accuracy ' + band, fontsize=8)
    plt.title(band + ' 1nn classification')
    plt.yticks([0, 0.2, 0.4, 0.6, 0.8, 1])

    plt.savefig(subj_dir + space + '/1nn_clf/1nn_accuracies_intensity_' +
                band + '.png',
                format=fmt_grph)
    plt.close()

    plt.rcParams['xtick.labelsize'] = 24
    plt.rcParams['ytick.labelsize'] = 24
    plt.rcParams.update({'font.size': 24})

    plt.figure(figsize=[16, 9])

    disp = skm.ConfusionMatrixDisplay(knn_conf_matrix[:, :, :].mean(0),
                                      display_labels=['M0', 'M1', 'M2'])
    disp.plot(include_values=True, cmap=cmapcolours[i_band], colorbar=True)

    plt.xlabel('true label', fontsize=12)
    plt.ylabel('predicted label', fontsize=12)
    plt.title('Confusion Matix for band ' + band + ' and a 1NN classifier',
              fontsize=18)

    plt.savefig(subj_dir + space +
                '/1nn_clf/1nn_confusion_matrix_intensities_' + band + '.png',
                format=fmt_grph)
    plt.close()
    #We plot accuracies and confusion matrices for topological classifiers

    np.save(
        subj_dir + space + '/topological_clf/' + band + 'perf_intensity.npy',
        topo_perf)
    np.save(
        subj_dir + space + '/topological_clf/' + band +
        'conf_matrix_intensity.npy', topo_conf_matrix)
    fmt_grph = 'png'
    cmapcolours = ['Blues', 'Greens', 'Oranges', 'Reds']
    plt.rcParams['xtick.labelsize'] = 24
    plt.rcParams['ytick.labelsize'] = 20
    fig, axes = plt.subplots(nrows=n_dim, ncols=1, figsize=(24, 12))

    for i_dim in range(n_dim):
        # the chance level is defined as the trivial classifier that predicts the label with more occurrences
        chance_level = np.max(
            np.unique(labels_dwnsamp,
                      return_counts=True)[1]) / labels_dwnsamp.size

        axes[i_dim].violinplot(topo_perf[i_dim, 0, :],
                               positions=[-0.2],
                               widths=[0.3])
        axes[i_dim].violinplot(topo_perf[i_dim, 1, :],
                               positions=[0.2],
                               widths=[0.3])
        axes[i_dim].violinplot(topo_perf[i_dim, 2, :],
                               positions=[0.6],
                               widths=[0.3])
        axes[i_dim].violinplot(topo_perf[i_dim, 3, :],
                               positions=[1],
                               widths=[0.3])

        axes[i_dim].plot([-1, 2], [chance_level] * 2, '--k')
        axes[i_dim].axis(xmin=-0.6, xmax=1.4, ymin=0, ymax=1.05)
        axes[i_dim].set_ylabel('accuracy ' + band, fontsize=16)
        axes[i_dim].set_title('band ' + band + ' dimension ' +
                              dimensions[i_dim],
                              fontsize=24)
        fig.suptitle(
            'Accuracies for different dimensions and metrics of band ' + band,
            fontsize=36)
        plt.setp(axes,
                 xticks=[-0.2, 0.2, 0.6, 1],
                 xticklabels=feat_vect_names,
                 yticks=[0, 0.2, 0.4, 0.6, 0.8, 1])

    plt.savefig(subj_dir + space + '/topological_clf/accuracies_intensity_' +
                band + '.png',
                format=fmt_grph)
    plt.close(fig)
    plt.rcParams['xtick.labelsize'] = 24
    plt.rcParams['ytick.labelsize'] = 24
    plt.rcParams.update({'font.size': 24})

    fig2, axes2 = plt.subplots(nrows=n_dim,
                               ncols=n_vectors + 1,
                               figsize=(60, 30))

    for i_vector in range(n_vectors + 1):
        for i_dim in range(n_dim):
            disp = skm.ConfusionMatrixDisplay(
                topo_conf_matrix[i_dim, i_vector, :, :, :].mean(0),
                display_labels=['M0', 'M1', 'M2'])
            disp.plot(ax=axes2[i_dim][i_vector],
                      include_values=True,
                      cmap=cmapcolours[i_band],
                      colorbar=True)

            axes2[i_dim][i_vector].set_xlabel('true label', fontsize=24)
            axes2[i_dim][i_vector].set_ylabel('predicted label', fontsize=24)
            axes2[i_dim][i_vector].set_title('band ' + band + ' dimension ' +
                                             dimensions[i_dim] + ' w/ ' +
                                             feat_vect_names[i_vector],
                                             fontsize=36)

            fig2.suptitle(
                'Confusion Matrices for different dimensions and feature vectors of band '
                + band,
                fontsize=48)

            plt.subplots_adjust(top=0.65)
            plt.setp(axes, xticks=[0, 1, 2], yticks=[0, 1, 2])

    #fig2.tight_layout(pad=0.5)
    plt.savefig(subj_dir + space +
                '/topological_clf/confusion_matrix_intensities_' + band +
                '.png',
                format=fmt_grph)
    plt.close(fig2)
    return test_size.mean(), rand_n.mean(axis=0), topo_perf[0, 1, :].mean()
    #list = [1, 3, 5, 6, 7, 8]
    #print([x.columns[i] for i in list])
    #x = x.to_numpy(dtype=float)
    #test.evaluate_input3(x, y.to_numpy(dtype=float))


    #train_data, test_data = test.separate_data(x, y)
    #test.fit(train_data[0], train_data[1], True)
    predictions_test = test.predict(pd.DataFrame(test_data[0]))


    confusion_test = metrics.confusion_matrix(test.test_data[1], predictions_test,
                                             normalize='true')

    labels = ['No Claim', 'Claim']
    metrics.ConfusionMatrixDisplay(confusion_test, labels).plot()

    test.evaluate_architecture(True)
    #test.evaluate_architecture()

    """
    #test.evaluate_input3(x, y)
    x_clean = test._preprocessor(x)
    #print(x_clean.shape)
    #test.fit(x, y)
    test.evaluate_input3(x, y)
    
    data_set = np.genfromtxt("part2_training_data.csv", dtype=float, delimiter=',', skip_header=1)
    num_att = len(data_set[0])  # number of parameters

    claims = np.array(data_set[:, (num_att - 1)], dtype=np.float32)