Esempio n. 1
0
def plot_bnn_results():
    path1 = "../Data/outputs/pred-bnn-mrna.csv"
    path2 = "../Data/outputs/pred-bnn-meth.csv"
    path3 = "../Data/outputs/pred-bnn-micro mrna.csv"
    annotation_path = "../Data/data/preprocessed_annotation_global.csv"
    data1 = pd.read_csv(path1).drop(columns=["Unnamed: 0"])
    data2 = pd.read_csv(path2).drop(columns=["Unnamed: 0"])
    data3 = pd.read_csv(path3).drop(columns=["Unnamed: 0"])
    model = "bnn"
    filename = ["mrna", "meth", "micro mrna"]
    with open('../Data/outputs/bnn mrna.txt', 'w') as f:
        y_pred = np.argmax(data1.values, axis=1)
        true_path = "../Data/outputs/true-labels.csv"
        target = pd.read_csv(true_path).drop(columns=["Unnamed: 0"])
        names = pd.read_csv(annotation_path)["label"].astype('category').cat.categories
        cnf_matrix = confusion_matrix(target.drop(0).values, y_pred)
        np.set_printoptions(precision=2)
        # PlotDir non-normalized confusion matrix
        plt.figure.Figure(figsize=(10, 10))
        plot_confusion_matrix(cnf_matrix, title=model + "-" + filename[0], classes=names)
        print(classification_report(target.drop(0), y_pred, ), file=f)
    with open('../Data/outputs/bnn meth.txt', 'w') as f:
        y_pred = np.argmax(data2.values, axis=1)
        true_path = "../Data/outputs/true-labels.csv"
        target = pd.read_csv(true_path).drop(columns=["Unnamed: 0"])
        names = pd.read_csv(annotation_path)["label"].astype('category').cat.categories
        cnf_matrix = confusion_matrix(target.drop(0).values, y_pred)
        np.set_printoptions(precision=2)
        # PlotDir non-normalized confusion matrix
        plt.figure.Figure(figsize=(10, 10))
        plot_confusion_matrix(cnf_matrix, title=model + "-" + filename[1], classes=names)
        print(classification_report(target.drop(0), y_pred, ), file=f)
    with open('../Data/outputs/bnn-micro-rna.txt', 'w') as f:
        y_pred = np.argmax(data3.values, axis=1)
        true_path = "../Data/outputs/true-labels.csv"
        target = pd.read_csv(true_path).drop(columns=["Unnamed: 0"])
        names = pd.read_csv(annotation_path)["label"].astype('category').cat.categories
        cnf_matrix = confusion_matrix(target.drop(0).values, y_pred)
        np.set_printoptions(precision=2)
        # PlotDir non-normalized confusion matrix
        plt.figure.Figure(figsize=(10, 10))
        plot_confusion_matrix(cnf_matrix, title=model + "-" + filename[2], classes=names)
        print(classification_report(target.drop(0), y_pred, ), file=f)

    with open('../Data/outputs/bnn comparison.txt', 'w') as f:
        y_pred = np.argmax(np.maximum(data1, np.maximum(data2, data3)).values, axis=1)
        true_path = "../Data/outputs/true-labels.csv"
        target = pd.read_csv(true_path).drop(columns=["Unnamed: 0"])
        names = pd.read_csv(annotation_path)["label"].astype('category').cat.categories
        cnf_matrix = confusion_matrix(target.drop(0).values, y_pred)
        np.set_printoptions(precision=2)
        # PlotDir non-normalized confusion matrix
        plt.figure.Figure(figsize=(10, 10))
        plot_confusion_matrix(cnf_matrix, title="comparisonbnn", classes=names)
        print(classification_report(target.drop(0), y_pred, ), file=f)
        scores = np.append(scores, totalscore)
        components = np.append(components, n_components)
        ##components.append(n_components)
        # print("final score : %f" % totalscore)
        print("plot")
        cnf_matrix = confusion_matrix(
            y_test['label'].astype('category').cat.codes, y_pred)
        # plt.figure(figsize=(10, 10))
        # plot_roc(names.shape[0], y_pred, y_test_bal, names, title)
        print()
        np.set_printoptions(precision=2)
        # PlotDir non-normalized confusion matrix
        plt.figure.Figure(figsize=(10, 10))

        plot_confusion_matrix(cnf_matrix,
                              title="without-unknown-testset-" + modelname +
                              "-" + filename,
                              classes=names2)
        with open(
                "../Data/outputs/without-unknown-testset-" + modelname + "-" +
                filename + ".txt", 'w') as f:
            print(classification_report(
                y_test['label'].astype('category').cat.codes,
                y_pred,
            ),
                  file=f)

        if modelname == "bnn":
            # clf = bnn.BNN(n_components, 20, 5)
            # clf.train_step(x_train_transformed, y_train)
            tot, correct_predictions, predicted_for_images, new_prediction, probabilities = clf.test_batch(
                torch.from_numpy(X_transformed).float(), y2, names, plot=False)
Esempio n. 3
0
    max_constraint = torch.max(constraint, dim=1)
    y_pred[(max_prob.values / 3 < 0.9) | (max_constraint.values < 0.25)] = 5
    y_true = X['y_true']
    #y_true=y_true[max_prob.values.numpy()/3<0.9]

    print("plot")
    cnf_matrix = confusion_matrix(y_true, y_pred)
    # plt.figure(figsize=(10, 10))
    # plot_roc(names.shape[0], y_pred, y_test_bal, names, title)
    print()
    np.set_printoptions(precision=2)
    # PlotDir non-normalized confusion matrix
    plt.figure.Figure(figsize=(10, 10))

    plot_confusion_matrix(cnf_matrix,
                          title="with-unknown-testset-" + modelname +
                          "-comparison-new",
                          classes=names)
    with open(
            "../Data/outputs/with-unknown-testset-" + modelname +
            "-comparison-new.txt", 'w') as f:

        print(classification_report(
            y_true,
            y_pred,
        ), file=f)

path = "../Data/outputs/pred-stomaco-"
for modelname in modelnames:
    data = []
    for filename in filenames:
        X = pd.read_csv(path + modelname + "-" + filename + ".csv")
                              axis=1)) == labels).sum().item()
            print("accuracy: %d %%" % (100 * correct / total))
            import pandas as pd

            pd.DataFrame(probabilities).to_csv("../Data/outputs/pred-" +
                                               modelname + filename + ".csv")
            pd.DataFrame(true_labels).to_csv("../Data/outputs/true-labels.csv")
            cnf_matrix = confusion_matrix(true_labels[1:],
                                          np.argmax(probabilities, axis=1))
            print()
            np.set_printoptions(precision=2)
            # PlotDir non-normalized confusion matrix
            plt.figure.Figure(figsize=(10, 10))

            plot_confusion_matrix(cnf_matrix,
                                  title=modelname + filename,
                                  classes=names)

            X2 = pd.read_csv("../Data/data/anomalies_preprocessed_Matrix_" +
                             filename + ".csv",
                             index_col=False,
                             header=None)
            y2 = pd.read_csv(
                "../Data/data/anomalies_preprocessed_annotation_global.csv"
            )["label"]
            if filename == "mrna":
                X2 = pd.DataFrame(X2[X2.std().sort_values(
                    ascending=False).head(1200).index].values.tolist())
            X_transformed = pca.transform(X2)
            y_pred = outlier_detector.predict(X_transformed)
            plot_outliers(X_transformed, y_pred, X_train_transformed,
Esempio n. 5
0
        true_labels.append(y_test)
        print(filename)
        print("best parameters")
        # print(model.best_params_)
        print("Confusion matrix")
        #totalscore = accuracy_score(y_test, y_pred)
        #print("final score : %f" % totalscore)
        cnf_matrix = confusion_matrix(y_test, y_pred)
        # plt.figure(figsize=(10, 10))
        # plot_roc(names.shape[0], y_pred, y_test_bal, names, title)
        print()
        np.set_printoptions(precision=2)
        # PlotDir non-normalized confusion matrix
        plt.figure.Figure(figsize=(10, 10))
        plot_confusion_matrix(cnf_matrix,
                              title=modelname + "-" + filename,
                              classes=names)

        print(modelname + filename + " " + str(model.best_params_), file=f)
        print(classification_report(
            y_test,
            y_pred,
        ), file=f)
names = np.append(names, "unknown")
unknown_index = np.logical_not(
    np.logical_or(
        predictions[0] == predictions[1],
        np.logical_or(predictions[0] == predictions[2],
                      predictions[1] == predictions[2])))

y_pred[predictions[0] == predictions[1]] = predictions[0][predictions[0] ==
Esempio n. 6
0
                             filename + ".csv",
                             index_col=False,
                             header=None)
            y2 = pd.read_csv(
                "../Data/data/anomalies_preprocessed_annotation_global.csv"
            )["label"]
            if filename == "mrna":
                X2 = pd.DataFrame(X2[X2.std().sort_values(
                    ascending=False).head(1200).index].values.tolist())
            X_transformed = pca.transform(X2)
            print('Prediction when network is forced to predict')

            probabilities, true_labels = clf.test_forced(
                X_transformed,
                y2.astype('category').cat.codes)
            y_pred = np.argmax(probabilities, axis=1)
            y_pred[np.max(probabilities, axis=1) < 0.6] = 5
            cnf_matrix = confusion_matrix(true_labels[1:],
                                          np.argmax(probabilities, axis=1))
            # plt.figure(figsize=(10, 10))
            # plot_roc(names.shape[0], y_pred, y_test_bal, names, title)
            print()
            np.set_printoptions(precision=2)
            # PlotDir non-normalized confusion matrix
            plt.figure.Figure(figsize=(10, 10))

            plot_confusion_matrix(cnf_matrix,
                                  title=modelname + "-anomalies-" + filename,
                                  classes=names)
    plot_mlp_results()
                        matrix.sum(2),
                        matrix.sum(2).sum(1).view(y_pred.shape[0], 1))
                    max_prob = torch.max(matrix.sum(2), dim=1)
                    max_constraint = torch.max(constraint, dim=1)
                    y_pred[(max_prob.values / 3 < 0.9) |
                           (max_constraint.values < 0.25)] = 5
                else:
                    pathto = "/last_step-notconservative-" + dataset + "-" + modelname

                cnf_matrix = confusion_matrix(y_true, y_pred)
                print("plot")
                np.set_printoptions(precision=2)
                # PlotDir non-normalized confusion matrix
                plt.figure.Figure(figsize=(10, 10))
                plot_confusion_matrix(cnf_matrix,
                                      title=pathto + "-confusionmatrix",
                                      classes=names)
                with open(outputpath + pathto + "-comparison-new.txt",
                          'w') as f:
                    print("total predicted as unknown " + str(
                        (y_pred.numpy() == 5).sum()),
                          file=f)
                    y_true2 = y_true[y_pred.numpy() != 5]
                    y_pred2 = y_pred[y_pred.numpy() != 5]
                    if y_pred2.shape[0] != 0:
                        print(classification_report(
                            y_true2,
                            y_pred2,
                        ),
                              file=f)
    else:
        print("Total images: ", tot)
        print("Predicted for: ", predicted_for_images)
        print("Accuracy when predicted: ",
              correct_predictions / predicted_for_images)
        print("Confusion matrix")
        # totalscore = accuracy_score(y_test,new_prediction)
        # print("final score : %f" % totalscore)
        cnf_matrix = confusion_matrix(y_test, new_prediction)
        # plt.figure(figsize=(10, 10))
        # plot_roc(names.shape[0], y_pred, y_test_bal, names, title)
        print()
        np.set_printoptions(precision=2)
        # PlotDir non-normalized confusion matrix
        plt.figure.Figure(figsize=(10, 10))
        plot_confusion_matrix(cnf_matrix,
                              title=modelname + "-" + filename,
                              classes=names.append(pd.Index(["Unknown"])))

        # print(modelname + filename + " " + str(model.best_params_), file=f)
        print(classification_report(
            y_test,
            new_prediction,
        ), file=f)

        X2 = pd.read_csv("../Data/data/anomalies_preprocessed_Matrix_" +
                         filename + ".csv",
                         index_col=False,
                         header=None)
        y2 = pd.read_csv(
            "../Data/data/anomalies_preprocessed_annotation_global.csv"
        )["label"]
Esempio n. 9
0
def train_and_test(x_train_transformed, y_train, x_test_transformed, y_test,
                   n_components, names, outputname):
    models2 = []
    modelnames = ["rotationForest", "mlptree", "mlp", "bnn"]
    for modelname in modelnames:
        start = time.time()
        # train and test bnn
        if modelname == "bnn":
            clf = BNN(n_components, 20, 5)
            clf.train_step(x_train_transformed, y_train)
            end = time.time()
            tot, correct_predictions, predicted_for_images, new_prediction, probabilities = clf.test_batch(
                torch.from_numpy(x_test_transformed).float(),
                y_test,
                names,
                plot=False)
            y_pred = new_prediction
            maxprob = np.max(probabilities, axis=1)

        elif modelname == "mlp":
            # train and test mlp
            clf = MLP(n_components, 20, 5)
            clf.train_step(x_train_transformed, y_train)
            end = time.time()
            probabilities, true_labels = clf.test_forced(
                x_test_transformed, y_test)
            y_pred = np.argmax(probabilities, axis=1)
            maxprob = np.max(probabilities, axis=1)
            y_pred[maxprob < 0.9] = 5

        elif modelname == "rotationForest":
            # train and test mlp
            clf = RotationForest()
            clf.train_step(x_train_transformed, y_train)
            end = time.time()
            probabilities, true_labels = clf.test_forced(
                x_test_transformed, y_test)
            y_pred = np.argmax(probabilities, axis=1)
            maxprob = np.max(probabilities, axis=1)
            y_pred[maxprob < 0.9] = 5

        elif modelname == "mlptree":
            # train and test mlptree
            clf = MlpTree(n_components, 20, 5)
            clf.train_step(x_train_transformed, y_train)
            end = time.time()
            maxprob, y_pred, true_labels, probabilities = clf.test_forced(
                x_test_transformed, y_test)
            y_pred[maxprob < 0.9] = 5
        else:
            return

        # save results
        df = pd.DataFrame({
            'official_name':
            y_test['official_name'].tolist(),
            'max_probability':
            maxprob.tolist(),
            'probabilities':
            np.array(probabilities).tolist(),
            'y_pred':
            y_pred.tolist(),
            'y_true':
            y_test["label"].astype('category').cat.codes.tolist()
        })

        print("time computation for " + modelname + "is " + str(end - start))
        df.to_csv("../Data/outputs3/pred-" + outputname + "-" + modelname +
                  "" + ".csv")
        PlotInstograms(df, "istogramma" + outputname + "-" + modelname)

        # save outliers name
        outliers_names = y_test[y_pred == 5]['official_name']
        outliers_names.to_csv("../Data/outputs3/outliers-" + outputname + "-" +
                              modelname + ".csv",
                              index=False)

        # print("final score : %f" % totalscore)
        print("plot")
        cnf_matrix = confusion_matrix(
            y_test['label'].astype('category').cat.codes, y_pred)
        # plt.figure(figsize=(10, 10))
        # plot_roc(names.shape[0], y_pred, y_test_bal, names, title)
        print()
        np.set_printoptions(precision=2)
        # PlotDir non-normalized confusion matrix
        plt.figure.Figure(figsize=(10, 10))

        plot_confusion_matrix(cnf_matrix,
                              title="with-unknown-" + outputname + "-" +
                              modelname + "",
                              classes=names.append(pd.Index(["Unknown"])))
        with open(
                "../Data/outputs3/" + outputname + "-" + modelname + "-" +
                ".txt", 'w') as f:
            print(classification_report(
                y_test['label'].astype('category').cat.codes,
                y_pred,
            ),
                  file=f)
        models2.append(clf)
    return models2, modelnames
Esempio n. 10
0
def test(x_transformed, y2, models, modelnames, names, outputname):
    for clf, modelname in zip(models, modelnames):
        if modelname == "bnn":
            # test bnn on stomach
            tot, correct_predictions, predicted_for_images, new_prediction, probabilities = clf.test_batch(
                torch.from_numpy(x_transformed).float(), y2, names, plot=False)
            y_pred = new_prediction
            maxprob = np.max(probabilities, axis=1)

        elif modelname == "mlp":
            # test bnn on stomach
            probabilities, true_labels = clf.test_forced(x_transformed, y2)
            y_pred = np.argmax(probabilities, axis=1)
            maxprob = np.max(probabilities, axis=1)
            y_pred[maxprob < 0.9] = 5

        elif modelname == "rotationForest":
            # test bnn on stomach
            probabilities, true_labels = clf.test_forced(x_transformed, y2)
            y_pred = np.argmax(probabilities, axis=1)
            maxprob = np.max(probabilities, axis=1)
            y_pred[maxprob < 0.9] = 5

        elif modelname == "mlptree":
            # test bnn on stomach
            maxprob, y_pred, true_labels, probabilities = clf.test_forced(
                x_transformed, y2)
            y_pred[maxprob < 0.9] = 5
        else:
            return
        # save results

        df = pd.DataFrame({
            'official_name':
            y2['official_name'].tolist(),
            'max_probability':
            maxprob.tolist(),
            'probabilities':
            np.array(probabilities).tolist(),
            'y_pred':
            y_pred.tolist(),
            'y_true':
            y2["label"].astype('category').cat.codes.tolist()
        })
        PlotInstograms(df, "istogramma" + outputname + "-" + modelname)
        df.to_csv("../Data/outputs3/pred-" + outputname + "-" + modelname +
                  "" + ".csv")
        outliers_names = y2[y_pred == 5]['official_name']
        print(outliers_names)
        outliers_names.to_csv("../Data/outputs3/outliers-name-" + outputname +
                              "-" + modelname + "" + ".csv",
                              index=False)
        y_pred = y_pred.astype(np.float)
        y_true = y2['label'].astype('category').cat.codes
        y_true[y_true != 5] = 0
        y_true[y_true == 5] = 1
        y_pred[y_pred != 5] = 0
        y_pred[y_pred == 5] = 1

        cnf_matrix = confusion_matrix(y_true, y_pred)
        # plt.figure(figsize=(10, 10))
        # plot_roc(names.shape[0], y_pred, y_test_bal, names, title)
        print()
        np.set_printoptions(precision=2)
        # PlotDir non-normalized confusion matrix
        plt.figure.Figure(figsize=(10, 10))

        plot_confusion_matrix(cnf_matrix,
                              title="" + outputname + "-" + modelname + "",
                              classes=["predicted", "unknown"])

        with open(
                "../Data/outputs3/" + outputname + "-" + modelname + "-" +
                ".txt", 'w') as f:
            print(classification_report(
                y2['label'].astype('category').cat.codes,
                y_pred,
            ),
                  file=f)