def plot_bnn_results(): path1 = "../Data/outputs/pred-bnn-mrna.csv" path2 = "../Data/outputs/pred-bnn-meth.csv" path3 = "../Data/outputs/pred-bnn-micro mrna.csv" annotation_path = "../Data/data/preprocessed_annotation_global.csv" data1 = pd.read_csv(path1).drop(columns=["Unnamed: 0"]) data2 = pd.read_csv(path2).drop(columns=["Unnamed: 0"]) data3 = pd.read_csv(path3).drop(columns=["Unnamed: 0"]) model = "bnn" filename = ["mrna", "meth", "micro mrna"] with open('../Data/outputs/bnn mrna.txt', 'w') as f: y_pred = np.argmax(data1.values, axis=1) true_path = "../Data/outputs/true-labels.csv" target = pd.read_csv(true_path).drop(columns=["Unnamed: 0"]) names = pd.read_csv(annotation_path)["label"].astype('category').cat.categories cnf_matrix = confusion_matrix(target.drop(0).values, y_pred) np.set_printoptions(precision=2) # PlotDir non-normalized confusion matrix plt.figure.Figure(figsize=(10, 10)) plot_confusion_matrix(cnf_matrix, title=model + "-" + filename[0], classes=names) print(classification_report(target.drop(0), y_pred, ), file=f) with open('../Data/outputs/bnn meth.txt', 'w') as f: y_pred = np.argmax(data2.values, axis=1) true_path = "../Data/outputs/true-labels.csv" target = pd.read_csv(true_path).drop(columns=["Unnamed: 0"]) names = pd.read_csv(annotation_path)["label"].astype('category').cat.categories cnf_matrix = confusion_matrix(target.drop(0).values, y_pred) np.set_printoptions(precision=2) # PlotDir non-normalized confusion matrix plt.figure.Figure(figsize=(10, 10)) plot_confusion_matrix(cnf_matrix, title=model + "-" + filename[1], classes=names) print(classification_report(target.drop(0), y_pred, ), file=f) with open('../Data/outputs/bnn-micro-rna.txt', 'w') as f: y_pred = np.argmax(data3.values, axis=1) true_path = "../Data/outputs/true-labels.csv" target = pd.read_csv(true_path).drop(columns=["Unnamed: 0"]) names = pd.read_csv(annotation_path)["label"].astype('category').cat.categories cnf_matrix = confusion_matrix(target.drop(0).values, y_pred) np.set_printoptions(precision=2) # PlotDir non-normalized confusion matrix plt.figure.Figure(figsize=(10, 10)) plot_confusion_matrix(cnf_matrix, title=model + "-" + filename[2], classes=names) print(classification_report(target.drop(0), y_pred, ), file=f) with open('../Data/outputs/bnn comparison.txt', 'w') as f: y_pred = np.argmax(np.maximum(data1, np.maximum(data2, data3)).values, axis=1) true_path = "../Data/outputs/true-labels.csv" target = pd.read_csv(true_path).drop(columns=["Unnamed: 0"]) names = pd.read_csv(annotation_path)["label"].astype('category').cat.categories cnf_matrix = confusion_matrix(target.drop(0).values, y_pred) np.set_printoptions(precision=2) # PlotDir non-normalized confusion matrix plt.figure.Figure(figsize=(10, 10)) plot_confusion_matrix(cnf_matrix, title="comparisonbnn", classes=names) print(classification_report(target.drop(0), y_pred, ), file=f)
scores = np.append(scores, totalscore) components = np.append(components, n_components) ##components.append(n_components) # print("final score : %f" % totalscore) print("plot") cnf_matrix = confusion_matrix( y_test['label'].astype('category').cat.codes, y_pred) # plt.figure(figsize=(10, 10)) # plot_roc(names.shape[0], y_pred, y_test_bal, names, title) print() np.set_printoptions(precision=2) # PlotDir non-normalized confusion matrix plt.figure.Figure(figsize=(10, 10)) plot_confusion_matrix(cnf_matrix, title="without-unknown-testset-" + modelname + "-" + filename, classes=names2) with open( "../Data/outputs/without-unknown-testset-" + modelname + "-" + filename + ".txt", 'w') as f: print(classification_report( y_test['label'].astype('category').cat.codes, y_pred, ), file=f) if modelname == "bnn": # clf = bnn.BNN(n_components, 20, 5) # clf.train_step(x_train_transformed, y_train) tot, correct_predictions, predicted_for_images, new_prediction, probabilities = clf.test_batch( torch.from_numpy(X_transformed).float(), y2, names, plot=False)
max_constraint = torch.max(constraint, dim=1) y_pred[(max_prob.values / 3 < 0.9) | (max_constraint.values < 0.25)] = 5 y_true = X['y_true'] #y_true=y_true[max_prob.values.numpy()/3<0.9] print("plot") cnf_matrix = confusion_matrix(y_true, y_pred) # plt.figure(figsize=(10, 10)) # plot_roc(names.shape[0], y_pred, y_test_bal, names, title) print() np.set_printoptions(precision=2) # PlotDir non-normalized confusion matrix plt.figure.Figure(figsize=(10, 10)) plot_confusion_matrix(cnf_matrix, title="with-unknown-testset-" + modelname + "-comparison-new", classes=names) with open( "../Data/outputs/with-unknown-testset-" + modelname + "-comparison-new.txt", 'w') as f: print(classification_report( y_true, y_pred, ), file=f) path = "../Data/outputs/pred-stomaco-" for modelname in modelnames: data = [] for filename in filenames: X = pd.read_csv(path + modelname + "-" + filename + ".csv")
axis=1)) == labels).sum().item() print("accuracy: %d %%" % (100 * correct / total)) import pandas as pd pd.DataFrame(probabilities).to_csv("../Data/outputs/pred-" + modelname + filename + ".csv") pd.DataFrame(true_labels).to_csv("../Data/outputs/true-labels.csv") cnf_matrix = confusion_matrix(true_labels[1:], np.argmax(probabilities, axis=1)) print() np.set_printoptions(precision=2) # PlotDir non-normalized confusion matrix plt.figure.Figure(figsize=(10, 10)) plot_confusion_matrix(cnf_matrix, title=modelname + filename, classes=names) X2 = pd.read_csv("../Data/data/anomalies_preprocessed_Matrix_" + filename + ".csv", index_col=False, header=None) y2 = pd.read_csv( "../Data/data/anomalies_preprocessed_annotation_global.csv" )["label"] if filename == "mrna": X2 = pd.DataFrame(X2[X2.std().sort_values( ascending=False).head(1200).index].values.tolist()) X_transformed = pca.transform(X2) y_pred = outlier_detector.predict(X_transformed) plot_outliers(X_transformed, y_pred, X_train_transformed,
true_labels.append(y_test) print(filename) print("best parameters") # print(model.best_params_) print("Confusion matrix") #totalscore = accuracy_score(y_test, y_pred) #print("final score : %f" % totalscore) cnf_matrix = confusion_matrix(y_test, y_pred) # plt.figure(figsize=(10, 10)) # plot_roc(names.shape[0], y_pred, y_test_bal, names, title) print() np.set_printoptions(precision=2) # PlotDir non-normalized confusion matrix plt.figure.Figure(figsize=(10, 10)) plot_confusion_matrix(cnf_matrix, title=modelname + "-" + filename, classes=names) print(modelname + filename + " " + str(model.best_params_), file=f) print(classification_report( y_test, y_pred, ), file=f) names = np.append(names, "unknown") unknown_index = np.logical_not( np.logical_or( predictions[0] == predictions[1], np.logical_or(predictions[0] == predictions[2], predictions[1] == predictions[2]))) y_pred[predictions[0] == predictions[1]] = predictions[0][predictions[0] ==
filename + ".csv", index_col=False, header=None) y2 = pd.read_csv( "../Data/data/anomalies_preprocessed_annotation_global.csv" )["label"] if filename == "mrna": X2 = pd.DataFrame(X2[X2.std().sort_values( ascending=False).head(1200).index].values.tolist()) X_transformed = pca.transform(X2) print('Prediction when network is forced to predict') probabilities, true_labels = clf.test_forced( X_transformed, y2.astype('category').cat.codes) y_pred = np.argmax(probabilities, axis=1) y_pred[np.max(probabilities, axis=1) < 0.6] = 5 cnf_matrix = confusion_matrix(true_labels[1:], np.argmax(probabilities, axis=1)) # plt.figure(figsize=(10, 10)) # plot_roc(names.shape[0], y_pred, y_test_bal, names, title) print() np.set_printoptions(precision=2) # PlotDir non-normalized confusion matrix plt.figure.Figure(figsize=(10, 10)) plot_confusion_matrix(cnf_matrix, title=modelname + "-anomalies-" + filename, classes=names) plot_mlp_results()
matrix.sum(2), matrix.sum(2).sum(1).view(y_pred.shape[0], 1)) max_prob = torch.max(matrix.sum(2), dim=1) max_constraint = torch.max(constraint, dim=1) y_pred[(max_prob.values / 3 < 0.9) | (max_constraint.values < 0.25)] = 5 else: pathto = "/last_step-notconservative-" + dataset + "-" + modelname cnf_matrix = confusion_matrix(y_true, y_pred) print("plot") np.set_printoptions(precision=2) # PlotDir non-normalized confusion matrix plt.figure.Figure(figsize=(10, 10)) plot_confusion_matrix(cnf_matrix, title=pathto + "-confusionmatrix", classes=names) with open(outputpath + pathto + "-comparison-new.txt", 'w') as f: print("total predicted as unknown " + str( (y_pred.numpy() == 5).sum()), file=f) y_true2 = y_true[y_pred.numpy() != 5] y_pred2 = y_pred[y_pred.numpy() != 5] if y_pred2.shape[0] != 0: print(classification_report( y_true2, y_pred2, ), file=f) else:
print("Total images: ", tot) print("Predicted for: ", predicted_for_images) print("Accuracy when predicted: ", correct_predictions / predicted_for_images) print("Confusion matrix") # totalscore = accuracy_score(y_test,new_prediction) # print("final score : %f" % totalscore) cnf_matrix = confusion_matrix(y_test, new_prediction) # plt.figure(figsize=(10, 10)) # plot_roc(names.shape[0], y_pred, y_test_bal, names, title) print() np.set_printoptions(precision=2) # PlotDir non-normalized confusion matrix plt.figure.Figure(figsize=(10, 10)) plot_confusion_matrix(cnf_matrix, title=modelname + "-" + filename, classes=names.append(pd.Index(["Unknown"]))) # print(modelname + filename + " " + str(model.best_params_), file=f) print(classification_report( y_test, new_prediction, ), file=f) X2 = pd.read_csv("../Data/data/anomalies_preprocessed_Matrix_" + filename + ".csv", index_col=False, header=None) y2 = pd.read_csv( "../Data/data/anomalies_preprocessed_annotation_global.csv" )["label"]
def train_and_test(x_train_transformed, y_train, x_test_transformed, y_test, n_components, names, outputname): models2 = [] modelnames = ["rotationForest", "mlptree", "mlp", "bnn"] for modelname in modelnames: start = time.time() # train and test bnn if modelname == "bnn": clf = BNN(n_components, 20, 5) clf.train_step(x_train_transformed, y_train) end = time.time() tot, correct_predictions, predicted_for_images, new_prediction, probabilities = clf.test_batch( torch.from_numpy(x_test_transformed).float(), y_test, names, plot=False) y_pred = new_prediction maxprob = np.max(probabilities, axis=1) elif modelname == "mlp": # train and test mlp clf = MLP(n_components, 20, 5) clf.train_step(x_train_transformed, y_train) end = time.time() probabilities, true_labels = clf.test_forced( x_test_transformed, y_test) y_pred = np.argmax(probabilities, axis=1) maxprob = np.max(probabilities, axis=1) y_pred[maxprob < 0.9] = 5 elif modelname == "rotationForest": # train and test mlp clf = RotationForest() clf.train_step(x_train_transformed, y_train) end = time.time() probabilities, true_labels = clf.test_forced( x_test_transformed, y_test) y_pred = np.argmax(probabilities, axis=1) maxprob = np.max(probabilities, axis=1) y_pred[maxprob < 0.9] = 5 elif modelname == "mlptree": # train and test mlptree clf = MlpTree(n_components, 20, 5) clf.train_step(x_train_transformed, y_train) end = time.time() maxprob, y_pred, true_labels, probabilities = clf.test_forced( x_test_transformed, y_test) y_pred[maxprob < 0.9] = 5 else: return # save results df = pd.DataFrame({ 'official_name': y_test['official_name'].tolist(), 'max_probability': maxprob.tolist(), 'probabilities': np.array(probabilities).tolist(), 'y_pred': y_pred.tolist(), 'y_true': y_test["label"].astype('category').cat.codes.tolist() }) print("time computation for " + modelname + "is " + str(end - start)) df.to_csv("../Data/outputs3/pred-" + outputname + "-" + modelname + "" + ".csv") PlotInstograms(df, "istogramma" + outputname + "-" + modelname) # save outliers name outliers_names = y_test[y_pred == 5]['official_name'] outliers_names.to_csv("../Data/outputs3/outliers-" + outputname + "-" + modelname + ".csv", index=False) # print("final score : %f" % totalscore) print("plot") cnf_matrix = confusion_matrix( y_test['label'].astype('category').cat.codes, y_pred) # plt.figure(figsize=(10, 10)) # plot_roc(names.shape[0], y_pred, y_test_bal, names, title) print() np.set_printoptions(precision=2) # PlotDir non-normalized confusion matrix plt.figure.Figure(figsize=(10, 10)) plot_confusion_matrix(cnf_matrix, title="with-unknown-" + outputname + "-" + modelname + "", classes=names.append(pd.Index(["Unknown"]))) with open( "../Data/outputs3/" + outputname + "-" + modelname + "-" + ".txt", 'w') as f: print(classification_report( y_test['label'].astype('category').cat.codes, y_pred, ), file=f) models2.append(clf) return models2, modelnames
def test(x_transformed, y2, models, modelnames, names, outputname): for clf, modelname in zip(models, modelnames): if modelname == "bnn": # test bnn on stomach tot, correct_predictions, predicted_for_images, new_prediction, probabilities = clf.test_batch( torch.from_numpy(x_transformed).float(), y2, names, plot=False) y_pred = new_prediction maxprob = np.max(probabilities, axis=1) elif modelname == "mlp": # test bnn on stomach probabilities, true_labels = clf.test_forced(x_transformed, y2) y_pred = np.argmax(probabilities, axis=1) maxprob = np.max(probabilities, axis=1) y_pred[maxprob < 0.9] = 5 elif modelname == "rotationForest": # test bnn on stomach probabilities, true_labels = clf.test_forced(x_transformed, y2) y_pred = np.argmax(probabilities, axis=1) maxprob = np.max(probabilities, axis=1) y_pred[maxprob < 0.9] = 5 elif modelname == "mlptree": # test bnn on stomach maxprob, y_pred, true_labels, probabilities = clf.test_forced( x_transformed, y2) y_pred[maxprob < 0.9] = 5 else: return # save results df = pd.DataFrame({ 'official_name': y2['official_name'].tolist(), 'max_probability': maxprob.tolist(), 'probabilities': np.array(probabilities).tolist(), 'y_pred': y_pred.tolist(), 'y_true': y2["label"].astype('category').cat.codes.tolist() }) PlotInstograms(df, "istogramma" + outputname + "-" + modelname) df.to_csv("../Data/outputs3/pred-" + outputname + "-" + modelname + "" + ".csv") outliers_names = y2[y_pred == 5]['official_name'] print(outliers_names) outliers_names.to_csv("../Data/outputs3/outliers-name-" + outputname + "-" + modelname + "" + ".csv", index=False) y_pred = y_pred.astype(np.float) y_true = y2['label'].astype('category').cat.codes y_true[y_true != 5] = 0 y_true[y_true == 5] = 1 y_pred[y_pred != 5] = 0 y_pred[y_pred == 5] = 1 cnf_matrix = confusion_matrix(y_true, y_pred) # plt.figure(figsize=(10, 10)) # plot_roc(names.shape[0], y_pred, y_test_bal, names, title) print() np.set_printoptions(precision=2) # PlotDir non-normalized confusion matrix plt.figure.Figure(figsize=(10, 10)) plot_confusion_matrix(cnf_matrix, title="" + outputname + "-" + modelname + "", classes=["predicted", "unknown"]) with open( "../Data/outputs3/" + outputname + "-" + modelname + "-" + ".txt", 'w') as f: print(classification_report( y2['label'].astype('category').cat.codes, y_pred, ), file=f)