def make_plots(train, test, pipelines): extensions = ['svg', 'eps', 'png'] X_train, y_train = load_X_y(train) X_test, y_test = load_X_y(test) pipelines.sort() clf = pipelines._results[0] y_pred = clf.predict(X_test) classifiers_with_predict_proba = find_classifiers_with_predict_proba() plt.clf() if clf.classifier.__class__.__name__ in classifiers_with_predict_proba: y_probas = clf.predict_proba(X_test)[:,1] fpr, tpr, _ = metrics.roc_curve(y_test, y_probas) plt.plot([0, 1], [0, 1], 'k--') plt.plot(fpr, tpr) plt.xlabel('False positive rate') plt.ylabel('True positive rate') fig = plt.gcf() fig.set_size_inches(4,3) plt.tight_layout() for ext in extensions: plt.savefig("./figures/roc_curve."+ext) else: print(clf.classifier.__class__.__name__,"not in predict proba list") cm = confusion_matrix(y_target=y_test, y_predicted=y_pred, binary=True) plot_confusion_matrix(conf_mat=cm, colorbar=True) fig = plt.gcf() fig.set_size_inches(4,3) plt.tight_layout() for ext in extensions: plt.savefig("./figures/confusion_matrix."+ext) '''plot_learning_curves(X_train, y_train, X_test, y_test,
def reduce_cm(self, cms, save=False): if self.hparams.dataset_combo is not None: labels = self.train_set.dataset.datasets[0].dataset.cls_labels else: labels = self.train_set.dataset.cls_labels cms = torch.reshape(cms, (-1, self.num_cls, self.num_cls)) cm = torch.sum(cms, dim=0, keepdim=False) iou_cls = iou_from_confmat(cm, num_classes=len(labels)) logger.debug(f"CM - {cm}") logger.info(f"CM IoU - {100*iou_cls}") recall = np.diag(cm) / cm.sum(axis = 1) precision = np.diag(cm) / cm.sum(axis = 0) recall_overall = torch.mean(recall) precision_overall = torch.mean(precision) logger.info(f"precision {100*precision} ({100*precision_overall}) | recall {100*recall} ({100*recall_overall})") cm1 = cm / cm.sum(axis=1, keepdim=True) # normalize confusion matrix cm2 = cm / cm.sum(axis=0, keepdim=True) # normalize confusion matrix if save: if len(labels) > self.num_cls: labels.pop(0) confusionmatrix_file = f"{self.hparams.dataset}-{self.hparams.mode}-{self.test_checkpoint}" logger.info(f"Saving confusion matrix {confusionmatrix_file}") plot_confusion_matrix(cm1.numpy(), labels=labels, filename=confusionmatrix_file+"-1", folder=f"{self.result_folder}") plot_confusion_matrix(cm2.numpy(), labels=labels, filename=confusionmatrix_file+"-2", folder=f"{self.result_folder}") return 0
def multiclass_report(predictions, col_true='CLASS'): class_names = np.unique(predictions[col_true]) print('Multiclass classification results:') y_true = predictions[col_true] y_pred = predictions['CLASS_PHOTO'] acc, acc_err = bootstrap_metric(accuracy_score, y_true, y_pred) print('Accuracy = {:.4f} ({:.4f})'.format(acc, acc_err)) f1 = f1_score(y_true, y_pred, average=None) print('F1 per class = {}'.format(f1)) logloss, logloss_err = bootstrap_metric( log_loss, y_true, predictions[['GALAXY_PHOTO', 'QSO_PHOTO', 'STAR_PHOTO']]) print('Logloss = {:.4f} ({:.4f})'.format(logloss, logloss_err)) # Confusion matrices cnf_matrix = confusion_matrix(y_true, y_pred) title = 'SDSS' if col_true == 'CLASS' else '2QZ/6QZ' plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, title=title) plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True, title=title) plt.show() plot_proba_histograms(predictions)
def display_confusion_matrix(normalized=False, predictions=None): if predictions == None: predictions = joblib.load("part_1_predictions.pkl") y_test = joblib.load("part_1_y_test.pkl") """ print("Confusion matrix:") print(confusion_matrix(y_test, predictions)) """ cm = confusion_matrix(y_test, predictions) class_names = joblib.load("part_1_target_names.pkl") if normalized: # Plot normalized confusion matrix plt.figure() plot_confusion_matrix(cm, classes=class_names, normalize=True, title='Confusion matrix') else: # Plot non-normalized confusion matrix plt.figure() plot_confusion_matrix(cm, classes=class_names, title='Confusion matrix')
def main(): #In ed.get_sets(), you can easily change the training set from the 30 first to the 30 last x_train, x_test, y_train, y_test, t_train, t_test = ed.get_sets() alpha = mf.find_optimal_alpha() iterations = 10000 num_features = 4 #If no features has been removed num_classes = 3 W = mf.calculate_W(iterations, alpha, t_train, x_train) pred_train = mf.predict(x_train, W) pred_test = mf.predict(x_test, W) conf_matrix_test = plt.confusion_matrix(pred_test, y_test, 3) conf_matrix_train = plt.confusion_matrix(pred_train, y_train, 3) total_errors_train = mf.find_total_errors(conf_matrix_train) total_errors_test = mf.find_total_errors(conf_matrix_test) print("The optimal alpha is: ", alpha) print("Error rate training: ", total_errors_train / 90) print("Error rate test: ", total_errors_test / 60) plt.plot_confusion_matrix( conf_matrix_train, ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]) plt.plot_confusion_matrix( conf_matrix_test, ["Iris-setosa", "Iris-versicolor", "Iris-virginica"])
def cross_model_selection(X,Y,pars, _test_size=0.3,save = False): #evaluation set X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, Y, test_size=_test_size, random_state=0) # Set the parameters by cross-validation (on x train only!) tuned_parameters = pars scores = ['accuracy'] for score in scores: print("# Tuning hyper-parameters for %s" % score) print() scaler = preprocessing.StandardScaler() pca = PCA() clf = svm.SVC(C=1) pca_svm = Pipeline([('scaler',scaler),('pca', pca), ('clf',clf),]) best_pipe = GridSearchCV(pca_svm, tuned_parameters, cv=5, scoring=score,verbose=0) best_pipe.fit(X_train, y_train) print("Best parameters set found on development set:") print() print(best_pipe.best_estimator_) print() print("Grid scores on development set:") print() for params, mean_score, scores in best_pipe.grid_scores_: print("%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std() / 2, params)) print() print("Detailed classification report:") print() print("The model is trained on the full development set.") print("The scores are computed on the full evaluation set.") print() y_true, y_pred = y_test, best_pipe.predict(X_test) print("Confusion Matrix:\n") cm = (confusion_matrix(y_true, y_pred)) print cm print() print ("Accuracy:\n") print accuracy_score(y_true,y_pred) print() print(classification_report(y_true, y_pred)) print() plt.plot_confusion_matrix(cm) # riallena su tutti i dati e salva su file if save: best_pipe.best_estimator_.fit(X,Y) joblib.dump(best_pipe.best_estimator_, 'model.pkl') #scale_pipe = Pipeline(best_pipe.best_estimator_.steps[0:2]) #X_scaled = scale_pipe.fit_transform(X) #plt.contour_plot(X_scaled,Y,best_pipe.best_estimator_) return
def show_confusion_matrix(test_y, pred_y, args): cm = confusion_matrix(test_y, pred_y, labels=sorted(list(set(test_y)))) if args.norm: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] np.set_printoptions(precision=2) print('Confusion matrix') if args.cm: print(cm) if args.plot: from plotting import plot_confusion_matrix # Import here due to potential matplotlib issues plot_confusion_matrix(cm, test_y) print(classification_report(test_y, pred_y, sorted(list(set(test_y)))))
def run_network(n_batch): x_train, x_test, y_train, y_test = get_data(argv[1], 5000) net = Network(120, n_batch) net.train_network(x_train, y_train) val = net.pred_network(x_test, y_test) #get predictions from network y_guesses = [el['classes'] for el in val] plot_f1_scores(y_test, y_guesses, n_batch) mat = confusion_matrix(y_test, y_guesses).T plot_confusion_matrix(mat, n_batch)
def show_confusion_matrix(test_y, pred_y, args): cm = confusion_matrix(test_y, pred_y, labels=sorted(list(set(test_y)))) if args.norm: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] np.set_printoptions(precision=2) logging.debug('Showing Confusion Matrix') if args.cm: print( f'\n{pd.DataFrame(cm, index=sorted(list(set(test_y))), columns=sorted(list(set(test_y))))}\n' ) if args.plot: from plotting import plot_confusion_matrix # Import here due to potential matplotlib issues plot_confusion_matrix(cm, test_y) print( classification_report(test_y, pred_y, labels=sorted(list(set(test_y)))))
def evaluate(self, y_pred, y_test, fold_nr): num_classes = self.config.data['classes'] pred = np.argmax(y_pred, axis=1) true = np.argmax(y_test, axis=1) metric_functions = { 'ACC': metrics.accuracy, 'PREC': metrics.precision, 'SPEC': metrics.specificity, 'REC': metrics.recall, 'F1': metrics.f1, 'MCC': metrics.mcc } # TODO: TEST IF MCC WORKS AFTER ALL (AND SPECIFICITY) cm = confusion_matrix(true, pred, labels=list(range(num_classes))) weighted = class_metric = None for metric, compute_metric in metric_functions.items(): if metric == 'SPEC': weighted, class_metric = compute_metric(true, pred, cm, num_classes) else: weighted, class_metric = compute_metric(true, pred) if metric == 'MCC' or metric == 'ACC': class_metric = [weighted] * num_classes print(f'| {metric}: {weighted:.2f}') self.evaluations[metric].append(weighted) self.class_evaluations[metric].append(class_metric) # Override function with computed metric and return this object metric_functions[metric] = weighted ACC = metric_functions['ACC'] MCC = metric_functions['MCC'] cm_title = f'K: {fold_nr} | ACC: {ACC:.2f} | MCC: {MCC:.2f}' np.save(f'{self.config.log_dir}/cm/cm{fold_nr}.npy', cm) plotting.plot_confusion_matrix(cm, cm_title, self.config, k=fold_nr) return metric_functions
def RF_pipeline(x, name_of_features, y, name_of_classes, train_percent, n_jobs=-1, n_estimators=500, directory='RFres/'): # split data x_train, x_test, y_train, y_test = train_test_split( x, y, train_size=train_percent, random_state=0) pipeline = Pipeline([('RF', RandomForestClassifier(n_jobs=n_jobs, n_estimators=n_estimators, random_state=0, class_weight='balanced'))]) #do the fit and feature selection pipeline.fit(x_train, y_train) # check accuracy and other metrics: y_pred = pipeline.predict(x_test) accuracy = (accuracy_score(y_test, y_pred)) # Compute the F1 Score f1 = f1_score(y_test, y_pred, labels=name_of_classes, average='weighted') #make plot of feature importances plotting.plot_feature_importance(name_of_features, pipeline, title='Feature Importance RF', directory=directory) # Compute and plot the confusion matrix cnf_matrix = confusion_matrix(y_test, y_pred) plotting.plot_confusion_matrix(cnf_matrix, classes=name_of_classes, directory=directory, title='Confusion matrix Random Forest') return pipeline, y_pred, accuracy, f1
def log_confusion_matrix(self, epoch, predictions, y_true, test_name): log_dir = self.tensorboard_callback.log_dir + '/images' file_writer = tf.summary.create_file_writer(log_dir) class_names = ['GALAXY', 'QSO', 'STAR'] y_true_decoded = [class_names[i] for i in np.argmax(y_true, axis=1)] cm = confusion_matrix(y_true_decoded, predictions['CLASS_PHOTO']) cm_fig = plot_confusion_matrix(cm, classes=class_names, normalize=False, title=None, return_figure=True) cm_image = plot_to_image(cm_fig) with file_writer.as_default(): tf.summary.image('confusion matrix - {}'.format(test_name), cm_image, step=epoch)
def basicResults(clfObj, trgX, trgY, tstX, tstY, params, clf_type=None, dataset=None, feature_names=None, scorer='accuracy', complexity_curve=False, complexity_params=None, clf_name=""): np.random.seed(55) if clf_type is None or dataset is None: raise print("Starting grid search--------") cv = ms.GridSearchCV(clfObj, n_jobs=1, param_grid=params, refit=True, verbose=10, cv=5, scoring=scorer) cv.fit(trgX, trgY) # export_decision_tree(cv, feature_names, dataset) print("Ended grid search--------") regTable = pd.DataFrame(cv.cv_results_) regTable.to_csv('./output/{}_{}_reg.csv'.format(clf_type, dataset), index=False) test_score = cv.score(tstX, tstY) test_y_predicted = cv.predict(tstX) # PLOT Confusion Matrix cnf_matrix = confusion_matrix(tstY, test_y_predicted) plt = plot_confusion_matrix(cnf_matrix, title='Confusion Matrix: {} - {}'.format( clf_type, dataset)) OUTPUT_DIRECTORY = "output" plt.savefig('{}/images/{}_{}_CM.png'.format(OUTPUT_DIRECTORY, clf_type, dataset), format='png', dpi=150, bbox_inches='tight') with open('./output/test results.csv', 'a') as f: f.write('{},{},{},{}\n'.format(clf_type, dataset, test_score, cv.best_params_)) N = trgY.shape[0] # Plot Learning Curve # curve = ms.learning_curve(cv.best_estimator_,trgX,trgY,cv=3,train_sizes=np.linspace(0.1, 1.0, 20),verbose=10,scoring=scorer) curve = ms.learning_curve(cv.best_estimator_, trgX, trgY, cv=3, train_sizes=np.linspace(0.2, 1.0, 10), verbose=10, scoring=scorer) curve_train_scores = pd.DataFrame(index=curve[0], data=curve[1]) curve_test_scores = pd.DataFrame(index=curve[0], data=curve[2]) curve_train_scores.to_csv('./output/{}_{}_LC_train.csv'.format( clf_type, dataset)) curve_test_scores.to_csv('./output/{}_{}_LC_test.csv'.format( clf_type, dataset)) plt = plot_learning_curve('Learning Curve: {} - {}'.format( clf_type, dataset), curve[0], curve[1], curve[2], y_label=scorer) plt.savefig('{}/images/{}_{}_LC.png'.format(OUTPUT_DIRECTORY, clf_type, dataset), format='png', dpi=150) if complexity_curve: make_complexity_curve(trgX, trgY, complexity_params['name'], complexity_params['display_name'], complexity_params['values'], clfObj, clf_name=clf_name, dataset=dataset, dataset_readable_name=dataset) print("Drew complexity curve") return cv
def confusion_matrix_report(y_test, y_pred_proba, thres=0.5): y_pred_proba_customizado = y_pred_proba >= thres print(classification_report(y_test, y_pred_proba_customizado)) plot_confusion_matrix(y_test, y_pred_proba_customizado)
training_data = np.vstack((X_positive, X_negative)) testing_data = np.hstack((Y_positive, Y_negative)) clf = SVM(C=10) clf.fit(training_data, testing_data) Y_evaluate = clf.evaluate(X_test) evaluate_matrix.append(Y_evaluate) #(45, 1000) evaluate_matrix = np.array(evaluate_matrix) evaluate_matrix = evaluate_matrix.T prediction = DAG_decide(combination, evaluate_matrix) # Creating Confusion Matrix confusion_matrix = np.zeros((len(numbers), len(numbers))) for i in range(len(prediction)): confusion_matrix[prediction[i]][Y_test[i]] += 1 # Plotting plot_confusion_matrix(confusion_matrix, 'Confusion Matrix for DAGSVM', 'DAGSVM.png') correct = np.trace(confusion_matrix) accuracy = (correct / num_test) * 100 print("%d out of %d predictions correct" % (correct, num_test)) print("The accuracy in percentage is ") print(accuracy)
batch_size=batch_size, epochs=n_epochs, callbacks=[learning_rate_callback, model_checkpoint, tensorboard_callback], ) model = create_model( model_dir=pretrained_model_dir, model_type=model_type, max_seq_len=max_seq_len, n_classes=n_classes, load_pretrained_weights=False, ) model.load_weights(os.path.join(model_dir, "model.h5")) predictions = model.predict(x_test, batch_size=batch_size) y_pred = np.argmax(predictions, axis=-1) logger.info("{} test accuracy: {}".format(model_type.upper(), accuracy_score(y_test, y_pred))) _, counts = np.unique(y_test, return_counts=True) matrix = plotting.plot_confusion_matrix( y_test, y_pred, ["{} ({:d})".format(s, c) for s, c in zip(label_encoder.classes_, counts)], figsize=(16, 14), normalize=True, save_path=os.path.join(model_dir, "confusion_matrix.png"), ) model_config = {"model_dir": model_dir, "model_type": model_type, "max_seq_len": max_seq_len} loader.export_model_config( model_config=model_config, pretrained_model_dir=pretrained_model_dir, datahelper=datahelper )
def make_plots(model, test_loader, outpath, target, device, epoch, which_data): print('Making plots on ' + which_data) t0 = time.time() # load the necessary predictions to make the plots gen_ids = torch.load(outpath + f'/gen_ids.pt', map_location=device) gen_p4 = torch.load(outpath + f'/gen_p4.pt', map_location=device) pred_ids = torch.load(outpath + f'/pred_ids.pt', map_location=device) pred_p4 = torch.load(outpath + f'/pred_p4.pt', map_location=device) cand_ids = torch.load(outpath + f'/cand_ids.pt', map_location=device) cand_p4 = torch.load(outpath + f'/cand_p4.pt', map_location=device) list_for_multiplicities = torch.load(outpath + f'/list_for_multiplicities.pt', map_location=device) predictions = torch.load(outpath + f'/predictions.pt', map_location=device) # reformat a bit ygen = predictions["ygen"].reshape(-1, 7) ypred = predictions["ypred"].reshape(-1, 7) ycand = predictions["ycand"].reshape(-1, 7) # make confusion matrix for MLPF conf_matrix_mlpf = sklearn.metrics.confusion_matrix(gen_ids.cpu(), pred_ids.cpu(), labels=range(6), normalize="true") plotting.plot_confusion_matrix( conf_matrix_mlpf, ["none", "ch.had", "n.had", "g", "el", "mu"], fname=outpath + '/conf_matrix_mlpf' + str(epoch), epoch=epoch) torch.save(conf_matrix_mlpf, outpath + '/conf_matrix_mlpf' + str(epoch) + '.pt') # make confusion matrix for rule based PF conf_matrix_cand = sklearn.metrics.confusion_matrix(gen_ids.cpu(), cand_ids.cpu(), labels=range(6), normalize="true") plotting.plot_confusion_matrix( conf_matrix_cand, ["none", "ch.had", "n.had", "g", "el", "mu"], fname=outpath + '/conf_matrix_cand' + str(epoch), epoch=epoch) torch.save(conf_matrix_cand, outpath + '/conf_matrix_cand' + str(epoch) + '.pt') # making all the other plots if 'test' in which_data: sample = "QCD, 14 TeV, PU200" else: sample = "$t\\bar{t}$, 14 TeV, PU200" # make distribution plots plot_distributions_pid( 1, gen_ids, gen_p4, pred_ids, pred_p4, cand_ids, cand_p4, # distribution plots for chhadrons target, epoch, outpath, legend_title=sample + "\n") plot_distributions_pid( 2, gen_ids, gen_p4, pred_ids, pred_p4, cand_ids, cand_p4, # distribution plots for nhadrons target, epoch, outpath, legend_title=sample + "\n") plot_distributions_pid( 3, gen_ids, gen_p4, pred_ids, pred_p4, cand_ids, cand_p4, # distribution plots for photons target, epoch, outpath, legend_title=sample + "\n") plot_distributions_pid( 4, gen_ids, gen_p4, pred_ids, pred_p4, cand_ids, cand_p4, # distribution plots for electrons target, epoch, outpath, legend_title=sample + "\n") plot_distributions_pid( 5, gen_ids, gen_p4, pred_ids, pred_p4, cand_ids, cand_p4, # distribution plots for muons target, epoch, outpath, legend_title=sample + "\n") plot_distributions_all( gen_ids, gen_p4, pred_ids, pred_p4, cand_ids, cand_p4, # distribution plots for all together target, epoch, outpath, legend_title=sample + "\n") # make pt, eta plots to visualize dataset ax, _ = plot_pt_eta(ygen) plt.savefig(outpath + "/gen_pt_eta.png", bbox_inches="tight") # plot particle multiplicity plots fig, ax = plt.subplots(1, 1, figsize=(8, 2 * 8)) ret_num_particles_null = plot_num_particles_pid(list_for_multiplicities, "null", ax) plt.savefig(outpath + "/multiplicity_plots/num_null.png", bbox_inches="tight") plt.close(fig) fig, ax = plt.subplots(1, 1, figsize=(8, 2 * 8)) ret_num_particles_chhad = plot_num_particles_pid(list_for_multiplicities, "chhadron", ax) plt.savefig(outpath + "/multiplicity_plots/num_chhadron.png", bbox_inches="tight") plt.close(fig) fig, ax = plt.subplots(1, 1, figsize=(8, 2 * 8)) ret_num_particles_nhad = plot_num_particles_pid(list_for_multiplicities, "nhadron", ax) plt.savefig(outpath + "/multiplicity_plots/num_nhadron.png", bbox_inches="tight") plt.close(fig) fig, ax = plt.subplots(1, 1, figsize=(8, 2 * 8)) ret_num_particles_photon = plot_num_particles_pid(list_for_multiplicities, "photon", ax) plt.savefig(outpath + "/multiplicity_plots/num_photon.png", bbox_inches="tight") plt.close(fig) fig, ax = plt.subplots(1, 1, figsize=(8, 2 * 8)) ret_num_particles_electron = plot_num_particles_pid( list_for_multiplicities, "electron", ax) plt.savefig(outpath + "/multiplicity_plots/num_electron.png", bbox_inches="tight") plt.close(fig) fig, ax = plt.subplots(1, 1, figsize=(8, 2 * 8)) ret_num_particles_muon = plot_num_particles_pid(list_for_multiplicities, "muon", ax) plt.savefig(outpath + "/multiplicity_plots/num_muon.png", bbox_inches="tight") plt.close(fig) # make efficiency and fake rate plots for charged hadrons ax, _ = draw_efficiency_fakerate(ygen, ypred, ycand, 1, "pt", np.linspace(0, 3, 61), outpath + "/efficiency_plots/eff_fake_pid1_pt.png", both=True, legend_title=sample + "\n") ax, _ = draw_efficiency_fakerate(ygen, ypred, ycand, 1, "eta", np.linspace(-3, 3, 61), outpath + "/efficiency_plots/eff_fake_pid1_eta.png", both=True, legend_title=sample + "\n") ax, _ = draw_efficiency_fakerate( ygen, ypred, ycand, 1, "energy", np.linspace(0, 50, 75), outpath + "/efficiency_plots/eff_fake_pid1_energy.png", both=True, legend_title=sample + "\n") # make efficiency and fake rate plots for neutral hadrons ax, _ = draw_efficiency_fakerate(ygen, ypred, ycand, 2, "pt", np.linspace(0, 3, 61), outpath + "/efficiency_plots/eff_fake_pid2_pt.png", both=True, legend_title=sample + "\n") ax, _ = draw_efficiency_fakerate(ygen, ypred, ycand, 2, "eta", np.linspace(-3, 3, 61), outpath + "/efficiency_plots/eff_fake_pid2_eta.png", both=True, legend_title=sample + "\n") ax, _ = draw_efficiency_fakerate( ygen, ypred, ycand, 2, "energy", np.linspace(0, 50, 75), outpath + "/efficiency_plots/eff_fake_pid2_energy.png", both=True, legend_title=sample + "\n") # make resolution plots for chhadrons: pid=1 fig, (ax1) = plt.subplots(1, 1, figsize=(8, 8)) res_chhad_pt = plot_reso(ygen, ypred, ycand, 1, "pt", 2, ax=ax1, legend_title=sample + "\n") plt.savefig(outpath + "/resolution_plots/res_pid1_pt.png", bbox_inches="tight") plt.tight_layout() plt.close(fig) fig, (ax2) = plt.subplots(1, 1, figsize=(8, 8)) res_chhad_eta = plot_reso(ygen, ypred, ycand, 1, "eta", 0.2, ax=ax2, legend_title=sample + "\n") plt.savefig(outpath + "/resolution_plots/res_pid1_eta.png", bbox_inches="tight") plt.tight_layout() plt.close(fig) fig, (ax3) = plt.subplots(1, 1, figsize=(8, 8)) res_chhad_E = plot_reso(ygen, ypred, ycand, 1, "energy", 0.2, ax=ax3, legend_title=sample + "\n") plt.savefig(outpath + "/resolution_plots/res_pid1_energy.png", bbox_inches="tight") plt.tight_layout() plt.close(fig) # make resolution plots for nhadrons: pid=2 fig, (ax1) = plt.subplots(1, 1, figsize=(8, 8)) res_nhad_pt = plot_reso(ygen, ypred, ycand, 2, "pt", 2, ax=ax1, legend_title=sample + "\n") plt.savefig(outpath + "/resolution_plots/res_pid2_pt.png", bbox_inches="tight") plt.tight_layout() plt.close(fig) fig, (ax2) = plt.subplots(1, 1, figsize=(8, 8)) res_nhad_eta = plot_reso(ygen, ypred, ycand, 2, "eta", 0.2, ax=ax2, legend_title=sample + "\n") plt.savefig(outpath + "/resolution_plots/res_pid2_eta.png", bbox_inches="tight") plt.tight_layout() plt.close(fig) fig, (ax3) = plt.subplots(1, 1, figsize=(8, 8)) res_nhad_E = plotting.plot_reso(ygen, ypred, ycand, 2, "energy", 0.2, ax=ax3, legend_title=sample + "\n") plt.savefig(outpath + "/resolution_plots/res_pid2_energy.png", bbox_inches="tight") plt.tight_layout() plt.close(fig) # make resolution plots for photons: pid=3 fig, (ax1) = plt.subplots(1, 1, figsize=(8, 8)) res_photon_pt = plot_reso(ygen, ypred, ycand, 3, "pt", 2, ax=ax1, legend_title=sample + "\n") plt.savefig(outpath + "/resolution_plots/res_pid3_pt.png", bbox_inches="tight") plt.tight_layout() plt.close(fig) fig, (ax2) = plt.subplots(1, 1, figsize=(8, 8)) res_photon_eta = plot_reso(ygen, ypred, ycand, 3, "eta", 0.2, ax=ax2, legend_title=sample + "\n") plt.savefig(outpath + "/resolution_plots/res_pid3_eta.png", bbox_inches="tight") plt.tight_layout() plt.close(fig) fig, (ax3) = plt.subplots(1, 1, figsize=(8, 8)) res_photon_E = plot_reso(ygen, ypred, ycand, 3, "energy", 0.2, ax=ax3, legend_title=sample + "\n") plt.savefig(outpath + "/resolution_plots/res_pid3_energy.png", bbox_inches="tight") plt.tight_layout() plt.close(fig) # make resolution plots for electrons: pid=4 fig, (ax1) = plt.subplots(1, 1, figsize=(8, 8)) res_electron_pt = plot_reso(ygen, ypred, ycand, 4, "pt", 2, ax=ax1, legend_title=sample + "\n") plt.savefig(outpath + "/resolution_plots/res_pid4_pt.png", bbox_inches="tight") plt.tight_layout() plt.close(fig) fig, (ax2) = plt.subplots(1, 1, figsize=(8, 8)) res_electron_eta = plot_reso(ygen, ypred, ycand, 4, "eta", 0.2, ax=ax2, legend_title=sample + "\n") plt.savefig(outpath + "/resolution_plots/res_pid4_eta.png", bbox_inches="tight") plt.tight_layout() plt.close(fig) fig, (ax3) = plt.subplots(1, 1, figsize=(8, 8)) res_electron_E = plot_reso(ygen, ypred, ycand, 4, "energy", 0.2, ax=ax3, legend_title=sample + "\n") plt.savefig(outpath + "/resolution_plots/res_pid4_energy.png", bbox_inches="tight") plt.tight_layout() plt.close(fig) # make resolution plots for muons: pid=5 fig, (ax1) = plt.subplots(1, 1, figsize=(8, 8)) res_muon_pt = plot_reso(ygen, ypred, ycand, 5, "pt", 2, ax=ax1, legend_title=sample + "\n") plt.savefig(outpath + "/resolution_plots/res_pid5_pt.png", bbox_inches="tight") plt.tight_layout() plt.close(fig) fig, (ax2) = plt.subplots(1, 1, figsize=(8, 8)) res_muon_eta = plot_reso(ygen, ypred, ycand, 5, "eta", 0.2, ax=ax2, legend_title=sample + "\n") plt.savefig(outpath + "/resolution_plots/res_pid5_eta.png", bbox_inches="tight") plt.tight_layout() plt.close(fig) fig, (ax3) = plt.subplots(1, 1, figsize=(8, 8)) res_muon_E = plot_reso(ygen, ypred, ycand, 5, "energy", 0.2, ax=ax3, legend_title=sample + "\n") plt.savefig(outpath + "/resolution_plots/res_pid5_energy.png", bbox_inches="tight") plt.tight_layout() plt.close(fig) t1 = time.time() print('Time taken to make plots is:', round(((t1 - t0) / 60), 2), 'min')
baseline_score, values, graph_labels, output_file, args.nofigs ) # Find best confusion matrix maxind = np.argmax(values) max_cm = cms[maxind] max_label = graph_labels[maxind] conf_file = '../Figures/' + c + 'CONF_MTX_' +\ time.strftime("%Y%m%d-%H%M%S") + '.png' plot_confusion_matrix(max_cm, c, max_label, conf_file, args.pipeline[0], args.nofigs) # ############################################## # SAVE TOP CLFS (SENT ONLY) # ############################################## if args.pipeline[0] == 'sent': verboseprint("Saving the top clf to pickle dump") # Pull best from evaluation from curr_best verboseprint("Best overall: %s (params: %s) with %s giving score %f" % (best['clf'], best['params'], best['perm'][0], best['score'])) # Retrain the model with the data if best['params']: print best['params'] clfs[best['clf']][0].set_params(**best['params'])
clf.fit(training_data, testing_data) Y_predict = label_prediction(clf.predict(X_test), pair) predict_matrix.append(Y_predict) predict_matrix = np.array(predict_matrix).astype(int) predict_matrix = predict_matrix.T for row in range(predict_matrix.shape[0]): counts = np.bincount(predict_matrix[row]) prediction.append(np.argmax(counts)) prediction = np.array(prediction) # Creating Confusion Matrix confusion_matrix = np.zeros((len(numbers), len(numbers))) for i in range(len(prediction)): confusion_matrix[prediction[i]][Y_test[i]] += 1 # Plotting plot_confusion_matrix(confusion_matrix, 'Confusion Matrix for One-Versus-One SVM', 'one-versus-one.png') correct = np.trace(confusion_matrix) accuracy = (correct / num_test) * 100 print("%d out of %d predictions correct" % (correct, num_test)) print("The accuracy in percentage is ") print(accuracy)
X_train_clustered, number) training_data = np.vstack((X_positive, X_negative)) testing_data = np.hstack((Y_positive, Y_negative)) print("[SVM] Start Training SVM...") clf = SVM(C=10) clf.fit(training_data, testing_data) Y_predict = clf.predict(X_test) prediction_binary.append(Y_predict) prediction = np.argmax(np.array(prediction_binary), axis=0) # Creating Confusion Matrix confusion_matrix = np.zeros((len(numbers), len(numbers))) for i in range(len(prediction)): confusion_matrix[prediction[i]][Y_test[i]] += 1 # Plotting plot_confusion_matrix(confusion_matrix, 'Confusion Matrix for One-Versus-The-Rest SVM', 'one_versus_the_rest.png') correct = np.trace(confusion_matrix) accuracy = (correct / num_test) * 100 print("%d out of %d predictions correct" % (correct, num_test)) print("The accuracy in percentage is ") print(accuracy)
# Confusion matrix and ROC curve for model accuracy. # In[22]: from sklearn.metrics import confusion_matrix from plotting import plot_confusion_matrix from sklearn.metrics import roc_curve from sklearn.metrics import roc_auc_score cm_nn = confusion_matrix(y_test, y_pred_nn) fpr, tpr, thresholds = roc_curve(y_test, y_pred_nn) plot_confusion_matrix(cm_nn, ['Died', 'Survived']) # In[23]: plt.plot(fpr,tpr) plt.xlabel('False positive rate') plt.ylabel('True positive rate') plt.title('ROC curve - Neural Network') plt.show() roc_auc_score(y_test, y_pred_nn) # ___
loss_save += loss.item() losses.append(loss_save) e_epoch = time.time() print('Epoch: ', epoch, ' Loss: ', losses[-1], " Took: ", e_epoch - s_epoch) end = time.time() print("Elapsed:", end - start) # Evaluation out = cnn(Variable(test_data.X.to(device))) y_pred = (out.data.argmax(dim=1)).cpu().numpy() y_test = test_data.Y.numpy() # get classification rates avg_class_rate = np.sum(np.equal(y_pred, y_test)) / len(y_test) class_rate_per_class = [0.0] * num_classes for c in range(num_classes): argC = np.argwhere(y_test == c) class_rate_per_class[c] = np.sum(np.equal(y_pred[argC], c)) / len(argC) print("Accuracy:", avg_class_rate) print("Rate Per Class:", class_rate_per_class) import plotting plotting.plot_confusion_matrix(y_test, y_pred) plotting.plot_loss_vs_epoch(losses)
classes = ['C1', 'C2', 'C3', 'C4'] #KMeans i = 1 if i in equivalences: adjust_cluster = adjust_clusters(kmeans_clusters, equivalences[i]) else: adjust_cluster = kmeans_clusters cm_kmeans = build_cm(NC, true_clusters, adjust_cluster) fig, ax = plt.subplots() plot_confusion_matrix(cm_kmeans, classes, normalize=True, title='Confusion matrix K-Means', cmap=plt.cm.Blues, xlabel="Predicted", ylabel="True") plt.savefig("../figures/confusion-matrix-kmeans-ds4") #PCA i = 2 if i in equivalences: adjust_cluster = adjust_clusters(pca_clusters, equivalences[i]) else: adjust_cluster = pca_clusters cm_pca = build_cm(NC, true_clusters, adjust_cluster) fig, ax = plt.subplots()
y_val = y_val.reshape(-1, 1) x_test = x_test.reshape(-1, 1, 28*28).astype(np.float32) y_test = y_test.reshape(-1, 1) encoder = OneHotEncoder(categories='auto', sparse=False) y_train_encoded = encoder.fit_transform(y_train) y_val_encoded = encoder.transform(y_val) assert (np.argmax(y_train_encoded, axis=1) == y_train.ravel()).all() assert (np.argmax(y_val_encoded, axis=1) == y_val.ravel()).all() network = NeuralNetwork([784, 16, 10], activation_functions=['sigmoid', 'softmax']) history = network.fit(x_train, y_train_encoded, x_val, y_val_encoded, batch_size=32, epochs=20, lr=1.5, l2=1.0, verbose=1, compute_loss=True, compute_accuracy=True) plot_training_history(history['train_loss'], history['test_loss'], history['train_accuracy'], history['test_accuracy']) # evaluate model on test data y_pred = np.array([network.predict(x) for x in x_test]) plot_confusion_matrix(y_test, y_pred, classes=np.arange(10), normalize=True) plt.show()
f"pred {p_idx+1} | target {g_idx+1} -> loss {loss_sord}") logger.debug(f"SORD -> {loss_sord}") cm_sord[g_idx][p_idx] = loss_sord.item() cm_kl[g_idx][p_idx] = loss_kl.item() if cm_kl[g_idx][p_idx] < 0.0000001: cm_kl[g_idx][p_idx] = 0 logger.debug(cm_sord) rankings = "|" + "|".join([str(l) for l in level.values()]) + "|" from plotting import plot_confusion_matrix plot_confusion_matrix( cm_sord, labels=["impossible", "possible", "preferable"], filename=f"sordloss-{rankings}-dist{args.dist}-alpha{args.alpha}", folder="results/sordloss", vmax=None, cmap="gray_r", cbar=False, annot=True, vmin=0) plot_confusion_matrix(cm_kl, labels=["impossible", "possible", "preferable"], filename=f"klloss", folder="results/sordloss", vmax=None, cmap="gray_r", cbar=False, annot=True, vmin=0) # # # # level = {
def visualization(X, X_test, X_train, y_train, y_test, y_pred_train, y_pred, df, y, label_names, pred_proba, score, filenames, filenames_train, filenames_test): """ Produces visualization of - confusion matrix for train and test set each (+ normalized version) - scatterplot collage with classification results and useful information - image scatterplots of categories of interest :param X_test: image features from test :param X_train: image features from train :param y_train_ y encoded into integers for train data :param y_test: y encoded into integers for test data :param y_pred_train: predicted encoded label for training data :param y_pred: predicted encoded label for testign data :param df: pandas dataframe of the meta-data given in properties.csv :param y: y with original label names :param label_names: list of all available classes :param pred_proba: confidence of the classifier for the test samples :param score: test score :param filenames :param filenames_train :param filenames_test """ print('>> Visualization') ### confusion matrices ### if (len(np.unique(y_train)) == len(label_names)): cm_train = metrics.confusion_matrix(y_train, y_pred_train) plotting.plot_confusion_matrix(cm_train, classes=label_names, img_name="absolute_cupsnbottles_train", cmap=plt.cm.Blues) plotting.plot_confusion_matrix( cm_train, classes=label_names, img_name="norm_cupsnbottles_train", normalize=True, title='Normalized confusion matrix, trainings data', cmap=plt.cm.Blues) if (len(np.unique(y_test)) == len(label_names)): cm = metrics.confusion_matrix(y_test, y_pred) plotting.plot_confusion_matrix(cm, classes=label_names, img_name="absolute_cupsnbottles", cmap=plt.cm.Greens) plotting.plot_confusion_matrix(cm, classes=label_names, img_name="norm_cupsnbottles", normalize=True, title='Normalized confusion matrix', cmap=plt.cm.Greens) ### t-sne scatterplot ### if (pred_proba is not None): title = classifier + ', trained on ' + str( len(X_train)) + ' samples. Score: ' + str(score) X_embedded = plotting.t_sne_plot(X, X_test, y_test, y_pred, filenames_test, pred_proba, label_names, title, config.num_samples, classifier, "cupsnbottles", dims) ### image scatterplots ### X_all_embedded = tools.t_sne(X) indices_to_plot = None # image scatterplot misclassifications with frame depicting classification confidence inds_misclassification = np.argwhere(y_pred != y_test).flatten() if len(inds_misclassification) > 0: imgs = tools.load_images(config.path_dataset, filenames_test[inds_misclassification], filenames) title_imgs = str( len(imgs) ) + ' test samples that were misclassified by ' + classifier plotting.image_conf_scatter(X_all_embedded, imgs, filenames_test[inds_misclassification], filenames, title_imgs, pred_proba[inds_misclassification], 'misclassifications') # image scatterplot ambiguous in test with frame denoting classification success if config.ambiguous_test_part > 0: indicesAmbiguous = np.array(df.loc[(df.ambiguous == 1) & (df.overlap == 0)]["index"]) files_to_plot = np.intersect1d(indicesAmbiguous, filenames_test) imgs = tools.load_images(config.path_dataset, files_to_plot, filenames) title_imgs = str(len( imgs)) + ' ambiguous samples as classified by ' + classifier _, inds_in_test, _ = np.intersect1d(filenames_test, files_to_plot, return_indices=True) plotting.image_conf_scatter(X_all_embedded, imgs, files_to_plot, filenames, title_imgs, pred_proba[inds_in_test], 'ambiguous') # image scatterplot overlap in test with frame denoting classification success if config.overlap_test_part > 0: indicesOverlap = np.array(df.loc[(df.ambiguous == 0) & (df.overlap == 1)]["index"]) files_to_plot = np.intersect1d(indicesOverlap, filenames_test) imgs = tools.load_images(config.path_dataset, files_to_plot, filenames) title_imgs = str( len(imgs)) + ' overlap samples as classified by ' + classifier _, inds_in_test, _ = np.intersect1d(filenames_test, files_to_plot, return_indices=True) plotting.image_conf_scatter(X_all_embedded, imgs, files_to_plot, filenames, title_imgs, pred_proba[inds_in_test], 'overlap') # image scatterplot low confidence (100 images by default) if pred_proba is not None: default_nb = 100 if len(pred_proba) < default_nb: default_nb = len(pred_proba) pred_proba, filenames_test = (list(t) for t in zip( *sorted(zip(pred_proba, filenames_test)))) imgs = tools.load_images(config.path_dataset, np.arange(default_nb), filenames_test) title_imgs = str( default_nb ) + ' lowest confidence samples as classified by ' + classifier plotting.image_conf_scatter(X_all_embedded, imgs, filenames_test[:default_nb], filenames, title_imgs, pred_proba[:default_nb], 'lowest_confidence') print('>> DONE Visualization')
# instantiate classifiers... hyp = {'n_estimators': [10, 300, 500], 'max_depth': [None, 1, 3, 10, 50, 100]} #hyp = {'n_estimators': [1], 'max_depth': [1, 3]} #hyp = {'max_iter': [50, 100, 200], 'hidden_layer_sizes': [(50,), (100,)]} rf = RandomForestClassifier(n_jobs=-1, verbose=2) mlp = MLPClassifier(verbose=True) gridcv = GridSearchCV(rf, hyp, verbose=2) # train and evaluate them gridcv.fit(X_train, y_train) bestest = gridcv.best_estimator_ score = bestest.score(X_test, y_test) print "SCORE: ", score print "BEST ESTIMATOR: ", bestest preds = bestest.predict(X_test) cnfmat = confusion_matrix(y_test, preds, labels=np.arange(np.max(y_train) + 1)) conf_file = '../Figures/RF_EMB_CONF_MTX_' +\ time.strftime("%Y%m%d-%H%M%S") + '.png' plot_confusion_matrix(cnfmat, 'RF', 'EMB', conf_file, 'emoji', False)
Y_valid = test_data[:,-1] X_test = test_data[:,0:acc_featur_n] Y_test = test_data[:,-1] accuracy_mat = np.zeros([testing_round,6]) # SVM Clf Training clf_svm = svm.SVC() clf_svm.fit(X, Y) pred = clf_svm.predict(X_test) print 'SVM ACC = %f' % accuracy_score(pred, Y_test) c_mat = confusion_matrix(Y_test , pred, labels=["stand", "back", "right", "left", "stomach"]) classes = np.array(['Standing','Supine','Right Lateral','Left Lateral','Prone'], dtype = 'S10') if PLOT_ON: plt.figure() plotting.plot_confusion_matrix(c_mat, classes, title='Confusion matrix for SVM, without normalization') plt.figure() plotting.plot_confusion_matrix(c_mat, classes, normalize=True, title='Confusion matrix for SVM, with normalization') #print 'Confusion Matrix:' #print " sd bk rt lf sm" #print c_mat #print 'please press ENTER' # Random Forest Clf Training for it in range(testing_round): print 'Training data size: %d row, %d col' % (row_n,col_n) print 'Training process for ACC data: %d X %d ...' % (len(X), len(X[0,:])) all_accuracies = np.zeros(50, dtype = float) clfs = list()