def report_metrics(get_model, hyperparams, x_train, x_test, y_train, y_test): # The Gaussian Process' space is continous, so we need to round some values neurons, epochs = map(lambda x: int(round(x)), (hyperparams['neurons'], hyperparams['epochs'])) # Create and fit the model model = get_model(layers=[neurons], lr=hyperparams['lr'], lr_decay=hyperparams['lr_decay'], input_shape=(None, len(x_train[0][0]))) train_losses, train_accs = np.zeros(epochs), np.zeros(epochs) test_losses, test_accs = np.zeros(epochs), np.zeros(epochs) for epoch in range(epochs): for X, Y in zip(x_train, y_train): model.train_on_batch(np.array([X]), np.array([Y])) train_evals = train_utils.get_scores(model, x_train, y_train) test_evals = train_utils.get_scores(model, x_test, y_test) train_losses[epoch] += np.mean([x[0] for x in train_evals]) test_losses[epoch] += np.mean([x[0] for x in test_evals]) train_accs[epoch] += np.mean([x[1] for x in train_evals]) test_accs[epoch] += np.mean([x[1] for x in test_evals]) # Final evaluation of the model evals = train_utils.get_scores(model, x_test, y_test) losses = [x[0] for x in evals] accuracies = [x[1] for x in evals] # Store predictions y_pred = train_utils.predict(model, x_test) y_true = [np.argmax(y) for y in y_test] cnf_matrix = metrics.confusion_matrix(y_true, y_pred) # Print stats print("Test loss and Confidence Interval: %.2f (+/- %.2f)" % (np.mean(losses), np.std(losses))) print("Test accuracy and Confidence Interval: %.2f%% (+/- %.2f%%)" % (np.mean(accuracies) * 100, np.std(accuracies) * 100)) print( metrics.classification_report(y_true, y_pred, target_names=CLASS_NAMES)) # Plot figures plot_confusion_matrix(cnf_matrix, classes=CLASS_NAMES) plot_model_training(train_losses, test_losses, train_accs, test_accs) plt.show()
# # Error analysis # ### get human readable class names # index to class name decode = { val_folder.class_to_idx[k]: decode[int(k)] for k in val_folder.class_to_idx } # ### get all predictions and all misclassified images val_iterator_no_shuffle = DataLoader(val_folder, batch_size=64, shuffle=False) val_predictions, val_true_targets, erroneous_samples, erroneous_targets, erroneous_predictions = predict( model, val_iterator_no_shuffle, return_erroneous=True) # erroneous_samples: images that were misclassified # erroneous_targets: their true labels # erroneous_predictions: predictions for them # ### number of misclassified images (there are overall 5120 images in the val dataset) n_errors = len(erroneous_targets) n_errors # ### logloss and accuracies log_loss(val_true_targets, val_predictions) accuracy_score(val_true_targets, val_predictions.argmax(1))
def kfold_report_metrics(get_model, hyperparams, features, labels): # The Gaussian Process' space is continous, so we need to round some values neurons, epochs = map(lambda x: int(round(x)), (hyperparams['neurons'], hyperparams['epochs'])) # K-fold stratified cross-validation n_splits = 10 skf = StratifiedKFold(n_splits=n_splits, shuffle=True) train_losses, train_accs = np.zeros(epochs), np.zeros(epochs) test_losses, test_accs = np.zeros(epochs), np.zeros(epochs) scores, y_true, y_pred = [], [], [] for train_index, test_index in skf.split(features, labels): x_train, x_test = [features[i] for i in train_index ], [features[i] for i in test_index] y_train = to_categorical([labels[i] for i in train_index]) y_test = to_categorical([labels[i] for i in test_index]) # Create and fit the model model = get_model(layers=[neurons], lr=hyperparams['lr'], lr_decay=hyperparams['lr_decay'], input_shape=(None, len(x_train[0][0]))) for epoch in range(epochs): for X, Y in zip(x_train, y_train): model.train_on_batch(np.array([X]), np.array([Y])) train_evals = train_utils.get_scores(model, x_train, y_train) test_evals = train_utils.get_scores(model, x_test, y_test) train_losses[epoch] += np.mean([x[0] for x in train_evals]) test_losses[epoch] += np.mean([x[0] for x in test_evals]) train_accs[epoch] += np.mean([x[1] for x in train_evals]) test_accs[epoch] += np.mean([x[1] for x in test_evals]) # Final evaluation of the model evals = train_utils.get_scores(model, x_test, y_test) losses = [x[0] for x in evals] accuracies = [x[1] for x in evals] scores.append([np.mean(losses), np.mean(accuracies)]) # Store predictions and ground truths y_pred.extend(train_utils.predict(model, x_test)) y_true.extend([labels[i] for i in test_index]) losses = [x[0] for x in scores] accuracies = [x[1] for x in scores] cnf_matrix = metrics.confusion_matrix(y_true, y_pred) # Print stats print("Test loss and Confidence Interval: %.2f (+/- %.2f)" % (np.mean(losses), np.std(losses))) print("Test accuracy and Confidence Interval: %.2f%% (+/- %.2f%%)" % (np.mean(accuracies) * 100, np.std(accuracies) * 100)) print( metrics.classification_report(y_true, y_pred, target_names=CLASS_NAMES)) # Plot figures plot_confusion_matrix(cnf_matrix, classes=CLASS_NAMES) plot_model_training(train_losses / n_splits, test_losses / n_splits, train_accs / n_splits, test_accs / n_splits) plt.show()