def save_confusion_matrix(y_test, y_pred, target_names, path, figsize=(15, 15), suffix=''): import matplotlib.pyplot as plt cm = confusion_matrix(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1)) plt.figure(figsize=figsize) plot_confusion_matrix(cm, target_names, normalize=True, suffix=suffix) plt.savefig(os.path.join(path, 'Confusion_matrix{}.png'.format(suffix)), bbox_inches='tight') plt.close()
def crossval_on_all(noise_session, gesture_data, n_splits=10): dataset = gesture_data.dataset() noise_dataset = noise_session.dataset() # Splitting the dataset into 10 folds in a stratified manner sss = StratifiedShuffleSplit(n_splits=n_splits) fold_accuracies = np.zeros((n_splits, )) fold_f1 = np.zeros((n_splits, )) confusion_matrices = np.zeros((n_splits, N_CLASSES, N_CLASSES)) for (train_indexes, test_indexes), i in zip( sss.split(np.zeros(dataset.length()), dataset.labels_gesture_type), range(n_splits)): # Balance the training data (due to double sequences) dataset_train = dataset.select_indexes(train_indexes) dataset_test = dataset.select_indexes(test_indexes) fold_accuracies[i], fold_f1[i], confusion_matrices[ i], probs_d, probs_t, labels = evaluate_fold( noise_dataset, dataset_train, dataset_test) print("Fold " + str(i) + " accuracy: " + str(fold_accuracies[i]) + ", f1: " + str(fold_f1[i])) mean_acc = np.mean(fold_accuracies) mean_f1 = np.mean(fold_f1) print("Mean accuracy: " + str(mean_acc)) print("Mean f1 score: " + str(mean_f1)) cm_sum = np.sum(confusion_matrices, axis=0) plot_confusion_matrix(cm_sum, [ 'Null', 'Snap left', 'Snap right', 'Knock left', 'Knock right', 'Clap', 'Knock left 2x', 'Knock right 2x', 'Clap 2x' ], normalize=True) return mean_acc
def save_plots(y_test, predictions, categories, pipe_name, report): plot_confusion_matrix(y_test, predictions, categories, pipe_name, current_checkpoint_directory, normalize=True) save_text( os.path.join(current_checkpoint_directory, "{}.{}".format(pipe_name, report_file_extension)), report)
def run_classifier(data, target): data_train, data_test, target_train, target_test = train_test_split(data, target, test_size=0.25, random_state=42) #pline = Pipeline([('clf', SVC(C=1.0, kernel='rbf', gamma=0.01))]) svc = SVC(probability=True) print "Shape of training set: %s" % (data_train.shape,) print "Shape of test set: %s" % (data_test.shape,) #params = { # 'kernel': ('poly', 'rbf', 'sigmoid', 'precomputed'), # 'gamma': (0.01, 0.03, 0.1, 0.3, 1, 3, 5), # 'C': (0.1, 0.3, 1, 3, 10, 30, 50, 100), #} params = [ {'C': [0.1, 0.3, 1, 3, 10, 30], 'gamma': [0.01, 0.03, 0.1, 0.3, 1, 3, 5], 'degree': [2,3,4], 'kernel': ['rbf', 'poly', 'sigmoid']}, # {'C': (0.1, 0.3, 1, 3, 10, 30, 50, 100), 'gamma': (0.01, 0.03, 0.1, 0.3, 1, 3, 5), 'kernel': ['rbf']}, ] gsearch = GridSearchCV(svc, params, n_jobs=2, verbose=1, scoring='f1_micro', cv=5) gsearch.fit(data_train, target_train) print 'Best score: %0.3f' % gsearch.best_score_ print 'Best parameters set:' best_params = gsearch.best_estimator_.get_params() #for param_name in sorted(params.keys()): # print '\t%s: %r' % (param_name, best_params[param_name]) for dict_item in params: for param_name in sorted(dict_item.keys()): print '\t%s: %r' % (param_name, best_params[param_name]) preds = gsearch.predict(data_test) print classification_report(target_test, preds, target_names=mg_fft.GENRES) conf_matr = confusion_matrix(target_test, preds) conf_matr = normalize(conf_matr) plots.plot_confusion_matrix(conf_matr, mg_fft.GENRES, "Confusion Matrix", "Music Genres") #gather data for ROC curves fprs = [] tprs = [] AUCs = [] for label in range(len(mg_fft.GENRES)): target_label_test = np.asarray(target_test==label, dtype=int) proba = gsearch.predict_proba(data_test) proba_label = proba[:, label] fpr, tpr, roc_thresholds = roc_curve(target_label_test, proba_label) fprs.append(fpr) tprs.append(tpr) AUCs.append(roc_auc_score(target_label_test, proba_label)) plots.plot_roc_curves(fprs, tprs, AUCs, mg_fft.GENRES)
def train(self, test_size=0.2, random_state=None): # Split dataset into training and validation. X_train, X_test, y_train, y_test = cross_validation\ .train_test_split(self.dataset.matrix, self.dataset.labels, test_size=test_size, random_state=random_state) self.clf.fit(X_train, y_train) y_pred = self.clf.predict(X_test) print 'accuracy on testing set', self.clf.score(X_test, y_test) cm = confusion_matrix(y_test, y_pred) print cm print classification_report(y_test, y_pred, target_names=['verde', 'amarillo', 'rojo']) plot_confusion_matrix(cm) return cm
# save predictions to csv y_pred = predictions.argmax(axis=-1) y_pred = label_encoder.inverse_transform(y_pred) df_results = pd.read_csv(os.path.join('data', 'test.csv')) df_predictions = pd.DataFrame(y_pred) df_results['predictions'] = df_predictions df_results.to_csv(os.path.join(results_dir, 'predictions.csv'), index=False) # plot confusion matrix test_gen = custom_generator(df_test, label_encoder, bin_size, False, (224, 224), 1) plots.plot_confusion_matrix(figures_dir, 'confusion_' + checkpoint_filename[:-5], test_gen, df_test.shape[0], label_encoder, predictions, show_values=True) # plot ROC curves auc_micro, auc_macro = plots.plot_ROC(figures_dir, 'ROC_' + checkpoint_filename[:-5], test_gen, df_test.shape[0], label_encoder, predictions) # save results d = { 'learning_rate': [learning_rate], 'batch_size': [batch_size], 'num_frozen_layers': [num_frozen_layers],
print("Training Accuracy: {:.4f}".format(accuracy)) loss, accuracy = model.evaluate(valid_seq_x, valid_y, verbose=False) print("Testing Accuracy: {:.4f}".format(accuracy)) # predict the labels on validation dataset predictions = model.predict(valid_seq_x) predictions = predictions.argmax(axis=-1) print(metrics.accuracy_score(predictions, valid_y)) matrix = metrics.confusion_matrix(predictions, valid_y) print(matrix) np.set_printoptions(precision=2) # Plot non-normalized confusion matrix plot_confusion_matrix( valid_y, predictions, classes=class_names, title='Confusion matrix, without normalization testing accuracy: {:.4f}'. format(accuracy)) # Plot normalized confusion matrix #plot_confusion_matrix(valid_y, predictions, classes=class_names, normalize=True, # title='Normalized confusion matrix') #plt.show() plt.savefig('confusion_matrix.png') sleep(60) shuffle(tweets) for t in tweets[:20]: tweet = p.clean(t[3]) text = sequence.pad_sequences(tokenizer.texts_to_sequences([tweet]),
best_val_acc = 0.0 train_losses, train_accuracies = [], [] valid_losses, valid_accuracies = [], [] for epoch in range(NUM_EPOCHS): train_loss, train_accuracy = train(model, device, train_loader, criterion, optimizer, epoch) valid_loss, valid_accuracy, valid_results = evaluate( model, device, valid_loader, criterion) train_losses.append(train_loss) valid_losses.append(valid_loss) train_accuracies.append(train_accuracy) valid_accuracies.append(valid_accuracy) is_best = valid_accuracy > best_val_acc # let's keep the model that has the best accuracy, but you can also use another metric. if is_best: torch.save(model, os.path.join(PATH_OUTPUT, save_file)) plot_learning_curves(train_losses, valid_losses, train_accuracies, valid_accuracies) best_model = torch.load(os.path.join(PATH_OUTPUT, save_file)) test_loss, test_accuracy, test_results = evaluate(best_model, device, test_loader, criterion) class_names = ['Seizure', 'TumorArea', 'HealthyArea', 'EyesClosed', 'EyesOpen'] plot_confusion_matrix(test_results, class_names)
folds = 100 cv = KFold(n_splits=folds) scores = model_selection.cross_val_score(lr, x, y, cv=cv) y_pred = model_selection.cross_val_predict(lr, x, y, cv=cv) print("%1d-fold cross validation average accuracy: %.3f" % (folds, scores.mean())) print("--- %s seconds ---" % (time.time() - start_time)) # classification report print(classification_report(y, y_pred)) # confusion matrix cm = normalize(confusion_matrix(y, y_pred), axis=1, norm='l1') plot_confusion_matrix(cm, genre_list, "Name", "Confusion Matrix") print("Creating roc curves") # ROC curve fprs = [] tprs = [] AUCs = [] for label in range(len(genre_list)): y_label = np.asarray(y == label, dtype=int) proba = model_selection.cross_val_predict(lr, x, y, cv=cv, method='predict_proba')
# inverse transform true labels y_true = np.array(y_true).argmax(axis=-1) y_true = label_encoder.inverse_transform(y_true) # save predictions to csv y_pred = predictions.argmax(axis=-1) y_pred = label_encoder.inverse_transform(y_pred) df_results = pd.read_csv(os.path.join('data', 'test.csv')) df_predictions = pd.DataFrame(y_pred) df_results['ytrue'] = pd.DataFrame(y_true) df_results['predictions'] = df_predictions df_results.to_csv(os.path.join(results_dir, 'predictions.csv'), index=False) # plot confusion matrix test_gen = custom_generator(df_test, label_encoder, bin_size, False, (224, 224), 1) plots.plot_confusion_matrix(results_dir, 'confusion_matrix', test_gen, df_test.shape[0], label_encoder, predictions, show_values=True) # plot ROC curves auc_micro, auc_macro = plots.plot_ROC(results_dir, 'roc_curves', test_gen, df_test.shape[0], label_encoder, predictions) # save results d = {'model': [model], 'learning_rate': [learning_rate], 'batch_size': [batch_size], 'num_frozen_layers': [num_frozen_layers], 'use_class_weights': [use_class_weights], 'bin_size': [bin_size], 'test_loss': [test_loss], 'test_accuracy': [test_accuracy], 'test_mean_absolute_bin': [test_mean_absolute_bin_error], 'micro_auc': [auc_micro], 'macro_auc': [auc_macro], 'folder': [start_time]} df_parameters = pd.DataFrame(d, columns=['model', 'learning_rate', 'batch_size', 'num_frozen_layers', 'use_class_weights', 'bin_size', 'test_loss', 'test_accuracy', 'test_mean_absolute_bin', 'micro_auc', 'macro_auc', 'folder']) df_parameters.to_csv(os.path.join(results_dir, 'results.csv'), index=False)
validation_data = (X_test_norm, y_test), show_accuracy = True, verbose=2) # Calculate metrics y_pred_train = model.predict_classes(X_train_norm) # Predicted y_train classification print "Train Acc:" metrics.calc_acc(y_pred_train, y_train) y_pred_test = model.predict_classes(X_test_norm) # Predicted y_test classification print "Train Acc:" metrics.calc_acc(y_pred_test, y_test) # Classification report y_act = get_labels.get_label_1D(y_test) print(classification_report(y_act, y_pred_test, target_names=['spiral', 'elliptical', 'uncertain'])) # Plot confusion matrix cm = confusion_matrix(y_act, y_pred_test) plots.plot_confusion_matrix(cm, normed=True) # Get probabilities for each classification probas = model.predict_proba(X_test_norm) # Calculate true positive and false positive rates treating each classifier as binary sp_fpr, sp_tpr, _ = roc_curve(y_test20[:,0], probas[:,0]) ell_fpr, ell_tpr, _ = roc_curve(y_test20[:,1], probas[:,1]) unc_fpr, unc_tpr, _ = roc_curve(y_test20[:,2], probas[:,2]) # Plot ROC curves plots.plot_ROC_curve(sp_tpr, sp_fpr, ell_tpr, ell_fpr, unc_tpr, unc_fpr) # Print AUC scores print "AUC for spirals:" roc_auc_score(y_test[:,0], probas[:,0]) print "AUC for ellipticals:" roc_auc_score(y_test[:,0], probas[:,1]) print "AUC for uncertain:" roc_auc_score(y_test[:,0], probas[:,2])
def main(): data_path = '../data/' model_path = '../model_6/model_6' mod_types = [ 'gfsk', 'gmsk', 'qam4', 'qam16', 'qam64', 'psk2', 'psk4', 'psk8' ] feature_types = ['cumulants', 'amplitude_stats', 'phase_stats'] # feature_types = ['amplitude_stats', 'phase_stats'] feature_names = [ '|C20|', '|C21|', '|C40|', '|C41|', '|C42|', '|C60|', '|C61|', '|C62|', '|C63|', '∠C20', '∠C21', '∠C40', '∠C41', '∠C42', '∠C60', '∠C61', '∠C62', '∠C63', 'Magnitude mean', 'Magnitude std', 'Phase mean', 'Phase std' ] num_frames = int(1e3) frame_len = 2048 frame_step = 256 snr_list = None files = dict() for mod_type in mod_types: files[mod_type] = list() files[mod_type].append(data_path + mod_type + '.txt') print('Extracting features') features_dict = extract_features(files, mod_types, feature_types=feature_types, frame_len=frame_len, frame_step=frame_step, num_frames=num_frames, snr_list=snr_list, verbose=True) plot_feature_stats(features_dict, feature_names) print('Number of features: {0}'.format( features_dict[mod_types[0]].shape[1])) print('Converting features') (features, labels) = convert_features(features_dict) (train_feats, test_feats, train_labels, test_labels) = train_test_split(features, labels, test_size=0.1) print('Number of samples: {0}'.format(features.shape[0])) print('Number of training samples: {0}'.format(train_feats.shape[0])) print('Number of testing samples: {0}'.format(test_feats.shape[0])) print('Training model') classifier = train_neural_network(train_feats, train_labels) print('Saving parameters') save_parameters(model_path, classifier.coefs_, classifier.intercepts_) print('Testing model') pred_labels = classifier.predict(test_feats) plot_confusion_matrix(test_labels, pred_labels, np.array(mod_types), title='', normalize=True) # plot_learning_curve(classifier, train_feats, train_labels, # title='Learning Curve', cv=3, n_jobs=2, train_sizes=np.linspace(0.1, 1.0, 10)) accuracy = accuracy_score(test_labels, pred_labels) print('Accuracy: {:.2f}'.format(accuracy)) report = classification_report(test_labels, pred_labels, target_names=mod_types) print(report) print('Saving model') import pickle with open(model_path + '.pkl', 'wb') as file: pickle.dump(classifier, file) plt.show()
def eval(self): """ evaluation """ print("\n--Evaluation on Test Set:") # evaluation of model eval_score = self.net_handler.eval_nn(eval_set='test', batch_archive=self.batch_archive, collect_things=True, verbose=False) # score print of collected eval_score.info_collected(self.net_handler.nn_arch, self.audio_dataset.param_path, self.cfg_ml['train_params'], info_file=self.score_file, do_print=False) # log to file if self.cfg_ml['logging_enabled']: logging.info( eval_score.info_detail_log(self.net_handler.nn_arch, self.audio_dataset.param_path, self.cfg_ml['train_params'])) # print confusion matrix print("confusion matrix:\n{}\n".format(eval_score.cm)) # plot confusion matrix plot_confusion_matrix(eval_score.cm, self.batch_archive.classes, plot_path=self.model_path, name='confusion_test') # -- # evaluation on my set if self.batch_archive.x_my is None: if self.cfg_ml['logging_enabled']: logging.info('\n') return print("\n--Evaluation on My Set:") # evaluation of model eval_score = self.net_handler.eval_nn(eval_set='my', batch_archive=self.batch_archive, collect_things=True, verbose=True) # score print of collected eval_score.info_collected(self.net_handler.nn_arch, self.audio_dataset.param_path, self.cfg_ml['train_params'], info_file=self.score_file, do_print=False) # log to file if self.cfg_ml['logging_enabled']: logging.info( eval_score.info_detail_log(self.net_handler.nn_arch, self.audio_dataset.param_path, self.cfg_ml['train_params'])) # confusion matrix print("confusion matrix:\n{}\n".format(eval_score.cm)) # plot confusion matrix plot_confusion_matrix(eval_score.cm, self.batch_archive.classes, plot_path=self.model_path, name='confusion_my') # new line for log if self.cfg_ml['logging_enabled']: logging.info('\n')
def main(): num_epochs = 3 batch_size = 50 # Define working directory locations data_dir = '../../data/' graph_dir = '../../graphs' output_dir = '../output/model' os.makedirs(output_dir, exist_ok=True) # Set environment seeds torch.manual_seed(0) if torch.cuda.is_available(): torch.cuda.manual_seed(0) torch.manual_seed(42) prng = np.random.RandomState(42) # Prepare values for model metric reporting all_test_loss = [] all_test_accuracy = [] all_test_results = [] all_best_acc = 0.0 best_file = 'SleepCNNBest.pth' # Extract raw EEG and annotation data from full raw dataset eeg_list_all, annotation_list_all = load_sleep_dataset(data_dir) start = time.time() for fold in range(20): # Divide subject list for 20-fold cross validation subjects = [x for x in range(20)] test = [fold] subjects.remove(fold) training = prng.choice(subjects, 15, replace=False) validate = list(set(subjects) - set(training)) # Format train, validation, and test datasets for PyTorch train_dataset = process_data_for_1d(load_sleep_dataset_targets( training, eeg_list_all, annotation_list_all), not_test_set=True) validation_dataset = process_data_for_1d(load_sleep_dataset_targets( validate, eeg_list_all, annotation_list_all), not_test_set=True) test_dataset = process_data_for_1d(load_sleep_dataset_targets( test, eeg_list_all, annotation_list_all), not_test_set=False) # Load dataset splits into PyTorch train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) valid_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True) # Load CNN model and parameters model = m.SleepCNN_1D() if torch.cuda.is_available(): print('Unleashing CUDA, zoom zoom!') model = model.cuda() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = model.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters()) criterion.to(device) save_file = 'SleepCNN.pth' best_val_acc = 0.0 train_losses, train_accuracies = [], [] valid_losses, valid_accuracies = [], [] for epoch in range(num_epochs): # Iteratively train and validate network train_loss, train_accuracy = train(model, device, train_loader, criterion, optimizer, epoch, fold) valid_loss, valid_accuracy, valid_results = evaluate( model, device, valid_loader, criterion) train_losses.append(train_loss) valid_losses.append(valid_loss) train_accuracies.append(train_accuracy) valid_accuracies.append(valid_accuracy) # Save model with best accuracy for final evaluation if valid_accuracy > best_val_acc: best_val_acc = valid_accuracy torch.save(model, os.path.join(output_dir, save_file)) if valid_accuracy > all_best_acc: all_best_acc = valid_accuracy torch.save(model, os.path.join(output_dir, best_file)) # Run final evaluation with best seen performing model best_model = torch.load(os.path.join(output_dir, save_file)) test_loss, test_accuracy, test_results = evaluate( best_model, device, test_loader, criterion) # Update master list of results for use in cross-validation all_test_loss.append(test_loss) all_test_accuracy.append(test_accuracy) [all_test_results.append(x) for x in test_results] end = time.time() print(f'Total fold time took {(end - start) / 60.} mins.') # Summarize results across all folds from cross-validation joblib.dump(all_test_results, graph_dir + '/all_test_results.gz') y_true = [x[0] for x in all_test_results] y_pred = [x[1] for x in all_test_results] accuracy = metrics.accuracy_score(y_true, y_pred) recall = metrics.recall_score(y_true, y_pred, average=None) f1_score = metrics.f1_score(y_true, y_pred, average=None) precision = metrics.precision_score(y_true, y_pred, average=None) global_precision = metrics.precision_score(y_true, y_pred, average='micro') # Save model summary metrics to file with open(graph_dir + '/result_metrics.txt', 'w') as f: f.write(f'Class Precision: {precision}') f.write(f'\nGlobal precision: {global_precision}') f.write(f'\nClass Recall: {recall}') f.write(f'\nClass F1-score: {f1_score}') f.write(f'\nAccuracy: {accuracy}') # Plot final confusion matrices with info from all cv folds class_names = ['Non-REM 1', 'Non-REM 2', 'Non-REM 3', 'REM', 'Wake'] plot_confusion_matrix(all_test_results, class_names, outdir=graph_dir, normalize=False) plot_confusion_matrix(all_test_results, class_names, outdir=graph_dir, normalize=True)
cc_classifier.fit(train_cc) # Evaluate classifier on train data cm = np.zeros((len(classes), len(classes))) for i, cc in enumerate(train_cc): print(f"Evaluation of [TRAIN DATA] {train_cc_files[i]}") cc.set_predicted_labels(cc_classifier.predict(cc)) cc.eval_classification_error(ground_truth_type="componentwise") cc.eval_classification_error(ground_truth_type="pointwise") this_cm = cc.eval_classification_error(ground_truth_type="pointwise", include_unassociated_points=True, classes=np.array( list(classes.keys()))) plot_confusion_matrix(this_cm, list(classes.values()), data_type='train_' + train_cc_files[i].split('.')[0], id=id_experimentation, folder=plot_backup_folder) cm += this_cm plot_confusion_matrix(cm, list(classes.values()), data_type='train', id=id_experimentation, folder=plot_backup_folder) # Evaluate classifier on test data cm = np.zeros((len(classes), len(classes))) for i, cc in enumerate(test_cc): print(f"Evaluation of [TEST DATA] {test_cc_files[i]}") cc.set_predicted_labels(cc_classifier.predict(cc)) cc.eval_classification_error(ground_truth_type="componentwise")
def main(experiment_path, plot_results=False): (kfold_data, X_test) = prepare_data_cv('../input') models_proba = [] models_acc = [] models_roc = [] models_logloss = [] models_map = [] for idx, data in enumerate(kfold_data): X_train, y_train, X_valid, y_valid = data model = load_model(get_resnet_18, weights=None) callbacks = get_model_callbacks(save_dir=os.path.join(experiment_path, 'fold_%02d' % idx)) data_generator = get_data_generator(X_train, y_train, batch_size=128) model.fit_generator( data_generator, steps_per_epoch=10, epochs=1000, verbose=True, validation_data=(X_valid, y_valid), callbacks=callbacks, shuffle=True) model.load_weights(filepath=os.path.join(experiment_path, ('fold_%02d/model/model_weights.hdf5' % idx))) _, acc_val = model.evaluate(X_valid, y_valid, verbose=False) proba = model.predict(X_valid) proba_test = model.predict(X_test)[:, 1] models_proba.append(proba_test) models_acc.append(acc_val) models_roc.append(roc_auc_score(y_valid.argmax(axis=1), proba[:, 1])) models_map.append(average_precision_score(y_valid.argmax(axis=1), proba[:, 1])) models_logloss.append(logloss_softmax(y_valid, proba)) prepare_submission([proba_test], os.path.join(experiment_path, 'fold_%02d/prediction.csv' % idx)) if plot_results: plots_path = os.path.join(experiment_path, 'fold_%02d/plots' % idx) if not os.path.exists(plots_path): os.makedirs(plots_path) plot_precision_recall(proba[:, 1], y_valid.argmax(axis=1), path=os.path.join(plots_path, 'recall_precision.jpg')) plot_roc(proba[:, 1], y_valid.argmax(axis=1), path=os.path.join(plots_path, 'roc.jpg')) plot_confusion_matrix(proba[:, 1], y_valid.argmax(axis=1), path=os.path.join(plots_path, 'conf.jpg')) print('Loss:\nMean: %f\nStd: %f\nMin: %f\nMax: %f\n\n' % (np.mean(models_logloss), np.std(models_logloss), np.min(models_logloss), np.max(models_logloss))) print('Acc:\nMean: %f\nStd: %f\nMin: %f\nMax: %f\n\n' % (np.mean(models_acc), np.std(models_acc), np.min(models_acc), np.max(models_acc))) print('ROC AUC:\nMean: %f\nStd: %f\nMin: %f\nMax: %f\n\n' % (np.mean(models_roc), np.std(models_roc), np.min(models_roc), np.max(models_roc))) print('mAP:\nMean: %f\nStd: %f\nMin: %f\nMax: %f\n\n' % (np.mean(models_map), np.std(models_map), np.min(models_map), np.max(models_map))) prepare_submission(models_proba, os.path.join(experiment_path, 'submission.csv'))
if is_best: best_val_auc = valid_roc torch.save(model, os.path.join(PATH_OUTPUT, "MyCNN.pth")) best_model = torch.load(os.path.join(PATH_OUTPUT, "MyCNN.pth")) plot_learning_curves(train_losses, valid_losses, train_accuracies, valid_accuracies) plot_learning_curves_roc(train_rocs, valid_rocs, filename='learning_curves_roc.png') train_loss, train_accuracy, train_results = evaluate(model, device, train_loader, criterion) plot_confusion_matrix(train_results, ["Alive", "Dead"]) valid_loss, valid_accuracy, valid_results = evaluate(model, device, valid_loader, criterion) plot_confusion_matrix(valid_results, ["Alive", "Dead"]) valid_results = np.array(valid_results) actual = valid_results[:, :1] pred = valid_results[:, 1:] fpr, tpr, _ = roc_curve(actual, pred) roc_auc = auc(fpr, tpr) print("Roc_auc: " + str(roc_auc)) y_true = [x[0] for x in valid_results] y_pred = [x[1] for x in valid_results]
def main(): global args parser = argparse.ArgumentParser( description="Convolutional NN Testing Script") parser.add_argument("-c", "--config", dest="configfile", default='config.yml', help="Path to yaml configuration file") parser.add_argument("-m", "--modelnames", dest="modelnames", nargs="*", default=None, required=False, help="Model name to test") rot_parse = parser.add_mutually_exclusive_group() rot_parse.add_argument("-r", "--rand_rot_angle", dest="rand_rot_angle", default=0., type=float, help="Random image rotation angle range [deg]") rot_parse.add_argument( "-f", "--fixed_rot_angle", dest="fixed_rot_angle", nargs=3, type=float, help="(low, high, spacing) fixed image rotation angle [deg]") args = parser.parse_args() target_names = [ 'Planes', 'Cars', 'Birds', 'Cats', 'Deer', 'Dogs', 'Frogs', 'Horses', 'Boats', 'Trucks' ] # Determine which rotation to apply run_fixed_rotation = False i_results_prefix = 'random' rot_angle_list = [args.rand_rot_angle] rot_comment = "Random rotation range (deg): [-{}, {}]".format( rot_angle_list[0], rot_angle_list[0]) if args.fixed_rot_angle is not None: i_results_prefix = 'fixed' run_fixed_rotation = True ang_range = args.fixed_rot_angle rot_angle_list = np.arange(ang_range[0], ang_range[1], ang_range[2]) rot_comment = "Fixed rotation angle(s) (deg): {}".format( rot_angle_list) # Get configuration file hconfig = ModelConfigurator(args.configfile) # Extract config parameters datapath = hconfig.datapath # Get requested models, if None, take config's list model_list = args.modelnames if model_list is None: model_list = hconfig.avail_models # Directory structures for data and model saving data_dir_struct = DataDirStruct(datapath) # Dictionary of test results out_dict = {} out_dict['theta'] = np.array(rot_angle_list, dtype='float32') # List of accuracies for each model acc_model_list = [] loss_model_list = [] # Loop over requested models for mod_i in model_list: mod_i = mod_i.strip() print('\nTesting {} over following rotations: {} ...\n'.format( mod_i, rot_angle_list)) # Set model config parameters hconfig.model_config(mod_i) # Extract model path from config model_dir_struct = ModelDirStruct(main_dir=hconfig.model_outpath, test_model=True) ## Load model to test # Load pretrained model from file json_file = open(model_dir_struct.model_file, 'r') trained_model_json = json_file.read() json_file.close() trained_model = model_from_json(trained_model_json, custom_layer_dict) # Load weights into model trained_model.load_weights(model_dir_struct.weights_file) print("Loaded {} from disk".format(mod_i)) # Compile trained model trained_model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) # Print test results to file results_file = os.path.join(model_dir_struct.main_dir, 'tests.log') glob_text_file = open(results_file, 'w') glob_text_file.write('#Index\tAngle\tAccuracies\n') # List of accuracies for each rotation acc_rot_list = [] loss_rot_list = [] # Run over rotation angles in list, # or just single value used for random range for i, rot_angle in enumerate(rot_angle_list): print('On {} angle {}'.format(i_results_prefix, rot_angle)) test_prefix = 'test_%s_rot_%03i' % (i_results_prefix, i) # Print test results to file i_results_file = os.path.join(model_dir_struct.main_dir, test_prefix + '.log') i_text_file = open(i_results_file, 'w') # Testing generator test_gen = test_img_generator(dir_struct=data_dir_struct, config_struct=hconfig, fixed_rotation=run_fixed_rotation, rotation_angle=rot_angle) # Truth labels for sample y_truth = test_gen.classes # Evaluate loaded model on test data scores = trained_model.evaluate_generator(test_gen, steps=None, verbose=1) print("Test %s: %.2f%%" % (trained_model.metrics_names[1], scores[1] * 100)) # Save each rotation loss & accuracy loss_rot_list.append(scores[0]) acc_rot_list.append(scores[1]) # Running prediction Y_pred = trained_model.predict_generator(test_gen, steps=None, verbose=1) y_predict = np.argmax(Y_pred, axis=1) # Confusion matrix print('Confusion Matrix') cm = confusion_matrix(y_truth, y_predict) cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print(cm) plot_confusion_matrix(cm=cm, classes=target_names, outname='cm_%s' % test_prefix, model_dir_struct=model_dir_struct) # Classification report print('Classification Report') class_report = classification_report(y_truth, y_predict, target_names=target_names) print(class_report) # Print test results to file i_text_file.write( '\n\nRotation Angle: {} deg\n\n'.format(rot_angle)) i_text_file.write('\n\nConfusion Matrix:\n\n') i_text_file.write('{}'.format(cm)) i_text_file.write('\n\n\nClassification Report:\n\n') i_text_file.write('{}'.format(class_report)) i_text_file.close() print('Saved single rotation test results to {}'.format( i_results_file)) # Saving accuracy diagonals to file glob_text_file.write('{}\t{}\t{}'.format( i, rot_angle, cm.diagonal()).replace('[', '').replace(']', '')) glob_text_file.close() print('Saved test results to {}'.format(results_file)) # Model's accuracies acc_model_list.append(acc_rot_list) loss_model_list.append(loss_rot_list) out_dict[mod_i + '_accuracy'] = np.array(acc_rot_list, dtype='float32') out_dict[mod_i + '_loss'] = np.array(loss_rot_list, dtype='float32') print('Accuracies for {}: {}'.format(mod_i, acc_rot_list)) print('\nRotations, accuracies and losses for all') print_dict(out_dict) if run_fixed_rotation: # Save test information to pickle file head_dir = os.path.split(model_dir_struct.main_dir)[0] model_names = '_'.join(model_list).replace(" ", "") rot_seq = rot_angle_list[0] rot_names = '%s' % rot_seq if len(rot_angle_list) > 1: rot_seq = (rot_angle_list[0], len(rot_angle_list) - 2, rot_angle_list[-1]) rot_names = '_'.join(map(str, rot_seq)).replace(" ", "") # Prefix pprefix = 'rot_' + i_results_prefix + '_test_' + model_names + "_" + rot_names # Pickel file pklname = pprefix + '.pkl' filename = os.path.join(head_dir, pklname) with open(filename, 'wb') as file_pi: pickle.dump(out_dict, file_pi) print("\nSaved rotation test to disk: {}\n".format(filename)) # Plot rotation metrics plot_rotation_metrics(out_dict, ['Accuracy', 'Loss'], pprefix, head_dir)
def compute_confusion_matrix(gt, pred, classes, class_names, user_producer=True, normalize=False, axis=1, plot=True, title=None): """Compute the confusion matrix arguments --------- gt: numpy.ndarray one-hot lables of patches shape = (n_patches, patch_size_padded, patch_size_padded, n_classes) pred: numpy.ndarray probability maps of classes of patches shape = (n_patches, patch_size_padded, patch_size_padded, n_classes) user_producer: boolean if true: the user, producer, and total accuracy will be calculated normalize: boolean default=False axis: int one of 0 or 1. Default=1 0 for division by column total and 1 for division by row total i.e. thus in the TP-cells if axis=0:User's acc, if axis=1:Producer's acc. plot: boolean if true: the confusion matrix will be plotted. default is True. title: string title of the plot. default = None cmap: matplotlib color map cmap of the plot. default = plt.cm.Blues calls: ------ compute_user_producer_acc() plot_confusion_matrix() returns ------- cm: numpy.ndarray confuion matrix of shape (n_classes, nclasses) if plot=True plot: fig plot of the confusion amtrix """ y_true = np.zeros(gt.shape[:3], dtype=np.uint8) y_pred = np.zeros(pred.shape[:3], dtype=np.uint8) for i in range(gt.shape[0]): y_true[i] = np.argmax(gt[i], axis=2) y_pred[i] = np.argmax(pred[i], axis=2) # Compute confusion matrix cm = confusion_matrix(y_true.flatten(), y_pred.flatten(), labels=classes) if normalize: cm = cm.astype('float') / cm.sum(axis=axis)[:, np.newaxis] if user_producer: if normalize: print("user and producer accuracy can only be calculated for un- \ normalized confusion matrices") else: cm = compute_user_producer_acc(cm) class_names.extend('accuracy') if plot: plot_confusion_matrix(cm, class_names, normalize, title) return (cm)
os.path.join( C.OUTPUT_DIR, "{}SleepRNN_{}.pth".format("full_", model.details))) # Save results and confusion matrix in case of time-out with open( C.OUTPUT_DIR + "{}results_{}.csv".format("full_", model.details), "w") as f: f.write("true,pred\n") for r in valid_results: f.write("{},{}\n".format(r[0], r[1])) f.close() class_names = ['0', '1', '2', '3', '4'] plot_confusion_matrix(valid_results, class_names, "full_", model.details) # plot learning curves plot_learning_curves(train_losses, valid_losses, train_accuracies, valid_accuracies, "full_", model.details) """ HYPERPARAMETER TUNING (COMMENT OUT IF NOT USING) """ # unit_range = [8, 16, 24, 32, 64] # layer_range = [2, 3, 4] # for fold in range(0, 3): # print("FOLD #{}".format(fold)) # # Data loading # print("Train set loading") # train_path = C.SPLITS_DIR + 'cv_train{}.txt'.format(fold) # train_loader = import_to_dataloader(train_path)
def main(): global args parser = argparse.ArgumentParser( description="Convolutional NN Testing Script") parser.add_argument("-c", "--config", dest="configfile", default='config.yml', help="Path to yaml configuration file") parser.add_argument("-m", "--modelnames", dest="modelnames", nargs="*", default=None, required=False, help="Model name to test") parser.add_argument("-n", "--num_samples", dest="num_samples", default=10, type=int, help="Number of test samples") parser.add_argument("-s", "--seed", dest="rngseed", default=123, type=int, help="RNG Seed to test different samples") rot_parse = parser.add_mutually_exclusive_group() rot_parse.add_argument("-r", "--rand_rot_angle", dest="rand_rot_angle", default=0., type=float, help="Random image rotation angle range [deg]") rot_parse.add_argument( "-f", "--fixed_rot_angle", dest="fixed_rot_angle", nargs=3, type=float, help="(low, high, spacing) fixed image rotation angle [deg]") args = parser.parse_args() # Get requested sample size num_samples = args.num_samples # Determine which rotation to apply run_fixed_rotation = False i_results_prefix = 'random' rot_angle_list = [args.rand_rot_angle] rot_comment = "Random rotation range (deg): [-{}, {}]".format( rot_angle_list[0], rot_angle_list[0]) if args.fixed_rot_angle is not None: i_results_prefix = 'fixed' run_fixed_rotation = True ang_range = args.fixed_rot_angle rot_angle_list = np.arange(ang_range[0], ang_range[1], ang_range[2]) rot_comment = "Fixed rotation angle(s) (deg): {}".format( rot_angle_list) # Get configuration file hconfig = ModelConfigurator(args.configfile) # Extract config parameters datapath = hconfig.datapath # Class names class_labels = hconfig.labels # Get requested models, if None, take config's list model_list = args.modelnames if model_list is None: model_list = hconfig.avail_models # Directory structures for data and model saving data_dir_struct = DataDirStruct(datapath) # Dictionary of test results out_dict = {} out_dict['theta'] = np.array(rot_angle_list, dtype='float32') # List of accuracies, losses, probs for each model acc_model_list = [] loss_model_list = [] prob_model_list = [] # Loop over requested models for mod_i in model_list: mod_i = mod_i.strip() print('\nTesting {} over following rotations: {} ...\n'.format( mod_i, rot_angle_list)) # Set model config parameters hconfig.model_config(mod_i) # Extract model path from config model_dir_struct = ModelDirStruct(main_dir=hconfig.model_outpath, test_model=True) ## Load model to test # Load pretrained model from file json_file = open(model_dir_struct.model_file, 'r') trained_model_json = json_file.read() json_file.close() trained_model = model_from_json(trained_model_json, custom_layer_dict) # Load weights into model trained_model.load_weights(model_dir_struct.weights_file) print("Loaded model from disk") # Compile trained model trained_model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) # List of accuracies for each rotation prob_rot_list = [] acc_rot_list = [] loss_rot_list = [] # Run over rotation angles in list, # or just single value used for random range for i, rot_angle in enumerate(rot_angle_list): print('On {} angle {}'.format(i_results_prefix, rot_angle)) # Choose same batch np.random.seed(args.rngseed) test_prefix = 'test_%s_rot_%.0f' % (i_results_prefix, rot_angle) # Testing generator test_gen = test_img_generator(dir_struct=data_dir_struct, config_struct=hconfig, fixed_rotation=run_fixed_rotation, rotation_angle=rot_angle) # Get Samples x_batch, y_truth = test_gen.next() # Evaluate scores scores = trained_model.evaluate(x_batch, y_truth, verbose=1) print("Test %s: %.2f%%" % (trained_model.metrics_names[1], scores[1] * 100)) # Predict classification Y_pred = trained_model.predict(x_batch) y_predict = np.argmax(Y_pred, axis=1) # Confusion matrix print('Confusion Matrix') cm = confusion_matrix(np.argmax(y_truth, axis=1), y_predict) cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print(cm) plot_confusion_matrix(cm=cm, classes=class_labels, outname='cm_%s' % test_prefix, model_dir_struct=model_dir_struct) # Classification report print('Classification Report') class_report = classification_report(np.argmax(y_truth, axis=1), y_predict, target_names=class_labels) print(class_report) # Mean accuracy for batch # Save each rotation loss & accuracy loss_rot_list.append(scores[0]) acc_rot_list.append(scores[1]) # Mean classification probability for truth class mean_prob = np.sum(Y_pred * y_truth) / num_samples # Save each rotation loss & accuracy prob_rot_list.append(mean_prob) # Model's accuracies acc_model_list.append(acc_rot_list) loss_model_list.append(loss_rot_list) prob_model_list.append(prob_rot_list) out_dict[mod_i + '_accuracy'] = np.array(acc_rot_list, dtype='float32') out_dict[mod_i + '_loss'] = np.array(loss_rot_list, dtype='float32') out_dict[mod_i + '_probability'] = np.array(prob_rot_list, dtype='float32') print('Accuracies for {}: {}'.format(mod_i, acc_rot_list)) print('Mean Accuracy for {}: {}'.format( mod_i, np.mean(np.array(acc_rot_list)))) print('StdD Accuracy for {}: {}'.format(mod_i, np.std( np.array(acc_rot_list)))) print('\nRotations and accuracies for all') print_dict(out_dict) print('Saved some figures in {}'.format(model_dir_struct.plots_dir)) if run_fixed_rotation: # Save test information to pickle file head_dir = os.path.split(model_dir_struct.main_dir)[0] model_names = '_'.join(model_list).replace(" ", "") rot_seq = rot_angle_list[0] rot_names = '%s' % rot_seq if len(rot_angle_list) > 1: rot_seq = (rot_angle_list[0], len(rot_angle_list) - 2, rot_angle_list[-1]) rot_names = '_'.join(map(str, rot_seq)).replace(" ", "") # Prefix pprefix = 'rot_' + i_results_prefix + \ '_batch_' + str(num_samples) + '_test_' + \ model_names + "_" + rot_names # Pickel file pklname = pprefix + '.pkl' filename = os.path.join(head_dir, pklname) with open(filename, 'wb') as file_pi: pickle.dump(out_dict, file_pi) print("\nSaved rotation test to disk: {}\n".format(filename)) # Plot rotation metrics plot_rotation_metrics(out_dict, ['Accuracy', 'Loss', 'Probability'], pprefix, head_dir)
def model_performance(Xtrain, Xtest, Ytrain, Ytest, k=5, randseed=545510477, analysis_type='sepsis1', balanced_class_weight=True): ''' :param Xtrain, Xtest, Ytrain, Ytest: Train and test data :param k: k for K-fold cross-validation :param randseed: seed for randomizer :param analysis_type: String for type of analysis to be used as filename :return: None ''' print("Model performance start") # Grid-search hyperparameter optimization # Create regularization hyperparameter space C = np.power(2, np.arange(0, 20, 2)) * 0.1 logr = LogisticRegression( random_state=randseed, max_iter= 1000 # Use this if the solver doesn't converge; Increases processing time ) # Parameters to test for hyperparameter optimization if balanced_class_weight: class_weight_param = 'balanced' else: class_weight_param = None param_grid = { # 'pca__n_components': [2, 4, 6, 9], # comment out line if all features to be used 'logr__C': C, 'logr__penalty': ['l1', 'l2'], # 'logr__solver': ['newton-cg', 'lbfgs', 'sag'], # for l2 penalty # 'logr__solver': ['liblinear','saga'], # for l1 penalty 'logr__solver': ['newton-cg', 'lbfgs', 'liblinear', 'saga'], 'logr__class_weight': [class_weight_param] } # Pipeline to optimize PCA and Logistic Regression parameters pca = PCA() pipe = Pipeline(steps=[('pca', pca), ('logr', logr)]) clf = GridSearchCV(pipe, param_grid, cv=5, scoring='roc_auc', error_score=0.0, verbose=0) print(" GridSearchCV hyperparameter optimization start") clf.fit(Xtrain, Ytrain) print(" GridSearchCV hyperparameter optimization end") modelFolder = '../../output/models' distutils.dir_util.mkpath(modelFolder) filename = modelFolder + '/' + analysis_type + '_model.sav' # Save best model as file pickle.dump(clf, open(filename, 'wb')) # Using best performing parameters for LR classifier acc, auc_ = get_acc_auc_kfold(clf.best_estimator_, Xtrain, Ytrain, k=k) print("______________________________________________") print(("Classifier: Logistic Regression")) print("Best parameter (CV score=%0.3f):" % clf.best_score_) print(clf.best_params_) print(("Average Accuracy in KFold CV: %0.4f" % acc)) print(("Average AUC in KFold CV: %0.4f" % auc_)) # Compute ROC curve and ROC area fpr, tpr, _ = roc_curve(Ytest, clf.predict_proba(Xtest)[:, 1]) roc_auc = auc(fpr, tpr) # Check model performance on test set Ytest_pred = clf.best_estimator_.predict(Xtest) acc = accuracy_score(Ytest, Ytest_pred) auc_ = roc_auc_score(Ytest, Ytest_pred) print(("Accuracy in Test set: %0.4f" % acc)) print(("AUC in Test set: %0.4f" % auc_)) print("") # print (Ytest_pred) class_names = ['Control', 'Cases'] # Compute confusion matrix cnf_matrix = confusion_matrix(Ytest, Ytest_pred) np.set_printoptions(precision=2) # Plot non-normalized confusion matrix plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, analysis_type=analysis_type, normalize=False, title='Confusion matrix, without normalization') print("______________________________________________") return fpr, tpr, roc_auc
def evaluate(segmentation_module, loader, cfg, gpu, activations, num_class, patch_size, patch_size_padded, class_names, channels, index_test, visualize, results_dir, arch_encoder): acc_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() acc_meter_patch = AverageMeter() intersection_meter_patch = AverageMeter() union_meter_patch = AverageMeter() time_meter = AverageMeter() # initiate confusion matrix conf_matrix = np.zeros((num_class, num_class)) conf_matrix_patch = np.zeros((num_class, num_class)) # turn on for initialise for umap area_activations_mean = np.zeros((len(index_test), 32 // 4 * 32 // 4)) area_activations_max = np.zeros((len(index_test), 32 // 4 * 32 // 4)) area_cl = np.zeros((len(index_test), ), dtype=np.int) area_loc = np.zeros((len(index_test), 3), dtype=np.int) j = 0 segmentation_module.eval() pbar = tqdm(total=len(loader)) for batch_data in loader: # process data batch_data = batch_data[0] seg_label = as_numpy(batch_data['seg_label'][0]) img_resized_list = batch_data['img_data'] torch.cuda.synchronize() tic = time.perf_counter() with torch.no_grad(): segSize = (seg_label.shape[0], seg_label.shape[1]) scores = torch.zeros(1, num_class, segSize[0], segSize[1]) scores = async_copy_to(scores, gpu) for img in img_resized_list: feed_dict = batch_data.copy() feed_dict['img_data'] = img del feed_dict['img_ori'] del feed_dict['info'] feed_dict = async_copy_to(feed_dict, gpu) # forward pass scores_tmp = segmentation_module(feed_dict, segSize=segSize) scores = scores + scores_tmp _, pred = torch.max(scores, dim=1) pred = as_numpy(pred.squeeze(0).cpu()) torch.cuda.synchronize() time_meter.update(time.perf_counter() - tic) # calculate accuracy acc, pix = accuracy(pred, seg_label) acc_patch, pix_patch = accuracy( pred[patch_size:2 * patch_size, patch_size:2 * patch_size], seg_label[patch_size:2 * patch_size, patch_size:2 * patch_size]) intersection, union = intersectionAndUnion(pred, seg_label, num_class) intersection_patch, union_patch = intersectionAndUnion( pred[patch_size:2 * patch_size, patch_size:2 * patch_size], seg_label[patch_size:2 * patch_size, patch_size:2 * patch_size], num_class) acc_meter.update(acc, pix) intersection_meter.update(intersection) union_meter.update(union) acc_meter_patch.update(acc_patch, pix_patch) intersection_meter_patch.update(intersection_patch) union_meter_patch.update(union_patch) conf_matrix = updateConfusionMatrix(conf_matrix, pred, seg_label) # update conf matrix patch conf_matrix_patch = updateConfusionMatrix( conf_matrix_patch, pred[patch_size:2 * patch_size, patch_size:2 * patch_size], seg_label[patch_size:2 * patch_size, patch_size:2 * patch_size]) # visualization if visualize: info = batch_data['info'] img_name = info.split('/')[-1] #np.save(os.path.join(test_dir, 'result', img_name), pred) np.save(os.path.join(results_dir, img_name), pred) # ============================================================================= # if visualize: # visualize_result( # (batch_data['img_ori'], seg_label, batch_data['info']), # pred, # os.path.join(test_dir, 'result') # ) # ============================================================================= pbar.update(1) # turn on for UMAP row, col, cl = find_constant_area( seg_label, 32, patch_size_padded ) #TODO patch_size_padded must be patch_size if only inner patch is checked. if not (row == 999999): activ_mean = np.mean( as_numpy(activations.features.squeeze(0).cpu()), axis=0, keepdims=True)[:, row // 4:row // 4 + 8, col // 4:col // 4 + 8].reshape(1, 8 * 8) activ_max = np.max(as_numpy(activations.features.squeeze(0).cpu()), axis=0, keepdims=True)[:, row // 4:row // 4 + 8, col // 4:col // 4 + 8].reshape( 1, 8 * 8) area_activations_mean[j] = activ_mean area_activations_max[j] = activ_max area_cl[j] = cl area_loc[j, 0] = row area_loc[j, 1] = col area_loc[j, 2] = int(batch_data['info'].split('.')[0]) j += 1 else: area_activations_mean[j] = np.full((1, 64), np.nan, dtype=np.float32) area_activations_max[j] = np.full((1, 64), np.nan, dtype=np.float32) area_cl[j] = 999999 area_loc[j, 0] = row area_loc[j, 1] = col area_loc[j, 2] = int(batch_data['info'].split('.')[0]) j += 1 #activ = np.mean(as_numpy(activations.features.squeeze(0).cpu()),axis=0)[row//4:row//4+8, col//4:col//4+8] #activ = as_numpy(activations.features.squeeze(0).cpu()) # summary iou = intersection_meter.sum / (union_meter.sum + 1e-10) for i, _iou in enumerate(iou): print('class [{}], IoU: {:.4f}'.format(i, _iou)) iou_patch = intersection_meter_patch.sum / (union_meter_patch.sum + 1e-10) for i, _iou_patch in enumerate(iou_patch): print('class [{}], patch IoU: {:.4f}'.format(i, _iou_patch)) print('[Eval Summary]:') print( 'Mean IoU: {:.4f}, Accuracy: {:.2f}%, Inference Time: {:.4f}s'.format( iou.mean(), acc_meter.average() * 100, time_meter.average())) print( 'Patch: Mean IoU: {:.4f}, Accuracy: {:.2f}%, Inference Time: {:.4f}s'. format(iou_patch.mean(), acc_meter_patch.average() * 100, time_meter.average())) print('Confusion matrix:') plot_confusion_matrix(conf_matrix, class_names, normalize=True, title='confusion matrix patch+padding', cmap=plt.cm.Blues) plot_confusion_matrix(conf_matrix_patch, class_names, normalize=True, title='confusion matrix patch', cmap=plt.cm.Blues) np.save(os.path.join(results_dir, 'confmatrix.npy'), conf_matrix) np.save(os.path.join(results_dir, 'confmatrix_patch.npy'), conf_matrix_patch) # turn on for UMAP np.save(os.path.join(results_dir, 'activations_mean.npy'), area_activations_mean) np.save(os.path.join(results_dir, 'activations_max.npy'), area_activations_max) np.save(os.path.join(results_dir, 'activations_labels.npy'), area_cl) np.save(os.path.join(results_dir, 'activations_loc.npy'), area_loc) mcc = compute_mcc(conf_matrix) mcc_patch = compute_mcc(conf_matrix_patch) # save summary of results in csv summary = pd.DataFrame([[ arch_encoder, patch_size, channels, acc_meter.average(), acc_meter_patch.average(), iou.mean(), iou_patch.mean(), mcc, mcc_patch ]], columns=[ 'model', 'patch_size', 'channels', 'test_accuracy', 'test_accuracy_patch', 'meanIoU', 'meanIoU_patch', 'mcc', 'mcc_patch' ]) summary.to_csv(os.path.join(results_dir, 'summary_results.csv'))
def main(args): writer = SummaryWriter(comment=args.exp_name) os.makedirs(args.weights, exist_ok=True) train_transform = iaa.Sequential([ iaa.Resize((args.size, args.size)), iaa.Fliplr(p=0.5), iaa.Flipud(p=0.5), iaa.Rotate(rotate=(-180, 180)), iaa.AdditivePoissonNoise(lam=(0, 10.,)), iaa.GammaContrast(gamma=(.5, 1.5)), iaa.GaussianBlur(sigma=(.0, .8)), iaa.Sometimes(0.25, iaa.CoarseDropout(p=(0, 0.03), size_percent=(0, 0.05))), ]) valid_transform = iaa.Sequential([ iaa.Resize((args.size, args.size)), ]) train_dataset = YAMLClassificationDataset(dataset=args.in_ds, transform=train_transform, split=['training'], normalization=normalization_isic) valid_dataset = YAMLClassificationDataset(dataset=args.in_ds, transform=valid_transform, split=['validation'], normalization=normalization_isic) test_dataset = YAMLClassificationDataset(dataset=args.in_ds, transform=valid_transform, split=['test'], normalization=normalization_isic) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, drop_last=True) valid_dataloader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, drop_last=False) test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, drop_last=False) dataloaders = {"train": train_dataloader, "valid": valid_dataloader, 'test': test_dataloader} device = torch.device('cpu' if not args.gpu else 'cuda') # Model, loss, optimizer print('Loading model...') model = SkinLesionModel(args.model) if args.onnx_export: # export onnx dummy_input = torch.ones(4, 3, args.size, args.size, device='cpu') model.train() torch.onnx.export(model, dummy_input, f'{args.model}.onnx', verbose=True, export_params=True, training=torch.onnx.TrainingMode.TRAINING, opset_version=12, do_constant_folding=False, input_names=['input'], output_names=['output'], dynamic_axes={'input': {0: 'batch_size'}, # variable length axes 'output': {0: 'batch_size'}}) # Change last linear layer model.fc = torch.nn.Linear(model.fc.in_features, args.num_classes) if torch.cuda.device_count() > 1 and args.gpu: model = torch.nn.DataParallel(model, device_ids=np.where(np.array(args.gpu) == 1)[0]) print(f'Move model to {device}') model = model.to(device) # loss_fn = nn.modules.loss.CrossEntropyLoss(weight=torch.from_numpy(get_weights()).to(device)) loss_fn = nn.modules.loss.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) if args.ckpts is None: best_valid_acc = 0. load_epoch = 0 else: checkpoint = torch.load(args.ckpts) model.load_state_dict(checkpoint['state_dict']) load_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) best_valid_acc = checkpoint['best_metric'] print("Loaded checkpoint epoch ", load_epoch, " with best metric ", best_valid_acc) train_acc = 0 valid_acc = 0 print('Starting training') for epoch in range(load_epoch, args.epochs): loss_train = [] loss_valid = [] for phase in ["train", "valid"]: if phase == "train": model.train() else: model.eval() correct = 0 total = 0 pred_list = [] gt_list = [] with tqdm(desc=f"{phase} {epoch}/{args.epochs}", unit="batch", total=len(dataloaders[phase]), file=sys.stdout) as pbar: for i, (x, gt, names) in enumerate(dataloaders[phase]): # torchvision.utils.save_image(x, f'batch_{i}.jpg') x, gt = x.to(device), gt.to(device) with torch.set_grad_enabled(phase == "train"): pred = model(x) loss = loss_fn(pred, gt) loss_item = loss.item() pred = torch.nn.functional.softmax(pred, dim=1) pred_np = pred.detach().cpu().numpy() pred_np = pred_np.argmax(axis=1) pred_list.extend(pred_np) gt_np = gt.detach().cpu().numpy() gt_list.extend(gt_np) correct += (pred_np == gt_np).sum() total += pred_np.shape[0] if phase == "train": optimizer.zero_grad() loss.backward() optimizer.step() loss_train.append(loss_item) elif phase == "valid": loss_valid.append(loss_item) pbar.set_postfix(loss=loss_item, accuracy=correct / total) pbar.update() accuracy = correct / total cm = confusion_matrix(np.array(pred_list).reshape(-1), np.array(gt_list).reshape(-1)) print(f'{phase} {epoch}/{args.epochs}: accuracy={accuracy:.4f}') fig = plt.figure(figsize=(args.num_classes, args.num_classes)) plot_confusion_matrix(cm, [0, 1, 2, 3, 4, 5, 6, 7]) writer.add_figure(f'{phase}/confusion', fig, epoch) if phase == 'train': train_acc = accuracy writer.add_scalar(f'{phase}/loss', np.mean(loss_train), epoch) writer.add_scalar(f'{phase}/accuracy', train_acc, epoch) else: valid_acc = accuracy writer.add_scalar(f'{phase}/loss', np.mean(loss_valid), epoch) writer.add_scalar(f'{phase}/accuracy', valid_acc, epoch) if valid_acc > best_valid_acc: best_valid_acc = valid_acc state = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'best_metric': best_valid_acc } torch.save(state, os.path.join(args.weights, f'{args.model}.pth'))
def plot_confusion_matrix(cm, labels, split): fig, ax = plots.plot_confusion_matrix(cm, labels, split) fig.savefig(evaluation_dir + '/confusion_matrix_%s.png' % split)
valid_losses.append(valid_loss) train_accuracies.append(train_accuracy) valid_accuracies.append(valid_accuracy) is_best = valid_accuracy > best_val_acc # let's keep the model that has the best accuracy, but you can also use another metric. if is_best: best_val_acc = valid_accuracy torch.save(model, os.path.join(PATH_OUTPUT, "MyVariableRNN.pth")) best_model = torch.load(os.path.join(PATH_OUTPUT, "MyVariableRNN.pth")) plot_learning_curves(train_losses, valid_losses, train_accuracies, valid_accuracies) class_names = ['Alive', 'Dead'] plot_confusion_matrix(valid_results, class_names) # TODO: Complete predict_mortality def predict_mortality(model, device, data_loader): model.eval() # TODO: Evaluate the data (from data_loader) using model, # TODO: return a List of probabilities results = [] # https://piazza.com/class/jjjilbkqk8m1r4?cid=1103 with torch.no_grad(): for i, (input, target) in enumerate(data_loader): if isinstance(input, tuple): input = tuple([ e.to(device) if type(e) == torch.Tensor else e
AI_Enhancer_probs = A_I_Enhancer_classifier.predict_proba(A_I_Enhancer_X_test) AI_Enhancer_probs = AI_Enhancer_probs[:, 1] y_AI_Enhancer_pred_labels = A_I_Enhancer_encoder.inverse_transform( y_AI_Enhancer_pred) y_AI_Enhancer_test_labels = A_I_Enhancer_encoder.inverse_transform( A_I_Enhancer_y_test) cm = confusion_matrix(y_AI_Enhancer_test_labels, y_AI_Enhancer_pred_labels) print('Confusion Matrix:\n') print(cm) print('Accuracy score: ' + str(accuracy_score(A_I_Enhancer_y_test, y_AI_Enhancer_pred))) print('F1 score: ' + str(f1_score(A_I_Enhancer_y_test, y_AI_Enhancer_pred)) + '\n') plot_confusion_matrix(cm, filename='A_I_Enhancer_RF_cm.png', target_names=['A-E', 'I-E'], title='Active Inactive Enhancer Random Forest') fpr, tpr, roc_threshold = roc_curve(A_I_Enhancer_y_test, AI_Enhancer_probs) precision, recall, precision_thresholds = precision_recall_curve( A_I_Enhancer_y_test, AI_Enhancer_probs) roc_auc = auc(fpr, tpr) pr_auc = auc(recall, precision) print('AUROC: ' + str(roc_auc)) print('AUPRC: ' + str(pr_auc)) plotRoc_curve(fpr, tpr, roc_auc, 'AI_Enhancer_RF_roc.png', 'ROC curve Active Inactive Enhancer Random Forest') plotPrecisionRecall_curve(precision, recall, pr_auc, 'AI_Enhancer_RF_pr.png', 'P-R curve Active Inactive Enhancer Random Forest') #Training and testing Active Inactive Enhancer Neural Network print('Training Active Inactive Enhancer Neural Network')
def A_Enh_prom_NeuralNetwork(X_train, y_train, balanced=False): if balanced: name = 'balanced' else: name = '' print('Training Active Enhancer Active Promoter Neural Network ' + name) keras_classifier = KerasClassifier(build_fn=create_model, input_units=0, hidden_layers=0, hidden_units=0) param_grid = [{ 'input_units': [X_train.shape[1]], 'hidden_layers': [1, 2, 3], 'hidden_units': [10, 20, 50], 'batch_size': [1000], 'epochs': [100] }] AEP_neural_network = GridSearchCV(estimator=keras_classifier, param_grid=param_grid, scoring='f1', n_jobs=-1, cv=3) AEP_neural_network = AEP_neural_network.fit(X_train, y_train) print('best neural network parameters are: \n') print(AEP_neural_network.best_params_) print('best accuracy on 3-fold cross validation: ' + str(AEP_neural_network.best_score_)) y_A_Enh_Prom_pred = AEP_neural_network.predict(A_Enh_Prom_X_test) A_Enh_Prom_probs = AEP_neural_network.predict_proba(A_Enh_Prom_X_test) A_Enh_Prom_probs = A_Enh_Prom_probs[:, 1] y_A_Enh_Prom_pred_labels = A_Enh_Prom_encoder.inverse_transform( y_A_Enh_Prom_pred) y_A_Enh_Prom_test_labels = A_Enh_Prom_encoder.inverse_transform( A_Enh_Prom_y_test) cm = confusion_matrix(y_A_Enh_Prom_test_labels, y_A_Enh_Prom_pred_labels) print('Confusion Matrix:\n') print(cm) plot_confusion_matrix( cm, filename='A_Enh_Prom_NN_cm_' + name + '.png', target_names=['A-P', 'A-E'], title='Active Enhancer Active Promoter Neural Network ' + name) print('Accuracy score: ' + str(accuracy_score(A_Enh_Prom_y_test, y_A_Enh_Prom_pred))) print('F1 score: ' + str(f1_score(A_Enh_Prom_y_test, y_A_Enh_Prom_pred)) + '\n') fpr, tpr, roc_threshold = roc_curve(A_Enh_Prom_y_test, A_Enh_Prom_probs) precision, recall, precision_thresholds = precision_recall_curve( A_Enh_Prom_y_test, A_Enh_Prom_probs) roc_auc = auc(fpr, tpr) pr_auc = auc(recall, precision) print('AUROC: ' + str(roc_auc)) print('AUPRC: ' + str(pr_auc)) plotRoc_curve( fpr, tpr, roc_auc, 'A_Enh_Prom_NN_roc_' + name + '.png', 'ROC curve Active Enhancer Active Promoter Neural Network ' + name) plotPrecisionRecall_curve( precision, recall, pr_auc, 'A_Enh_Prom_NN_pr_' + name + '.png', 'P-R curve Active Enhancer Active Promoter Neural Network ' + name)
# print accuracy eval_log = eval_score.info_log(do_print=False) # -- # info output # log to file if cfg['ml']['logging_enabled']: logging.info(eval_log) # print confusion matrix print("confusion matrix:\n{}\n".format(eval_score.cm)) # plot confusion matrix plot_confusion_matrix(eval_score.cm, batch_archiv.classes, plot_path=path_coll.model_path, name='confusion_test') # -- # evaluation on my set if batch_archiv.x_my is not None: print("\n--Evaluation on My Set:") # evaluation of model eval_score = nn_handler.eval_nn(eval_set='my', batch_archiv=batch_archiv, calc_cm=True, verbose=True) print("confusion matrix:\n{}\n".format(eval_score.cm)) # plot confusion matrix plot_confusion_matrix(eval_score.cm, batch_archiv.classes, plot_path=path_coll.model_path, name='confusion_my')