def main(FLAGS): # Hyperparameters batch_size = FLAGS.batch_size # Default 32 # Loading testing dataset test_steer_dataset = create_dataset(FLAGS.test_dir) test_loader = torch.utils.data.DataLoader(dataset=test_steer_dataset, batch_size=batch_size, shuffle=False) # Cropped image dimensions crop_img_width, crop_img_height = FLAGS.crop_img_width, FLAGS.crop_img_height # Image mode if FLAGS.img_mode == 'rgb': img_channels = 3 elif FLAGS.img_mode == 'grayscale': img_channels = 1 else: raise IOError("Unidentified image mode: use 'grayscale' or 'rgb'") # Output dimension output_dim = 1 if FLAGS.model_to_test == 'resnet8_MCDO': model = resnet8_MCDO(img_channels, crop_img_height, crop_img_width, output_dim).to(device) model_ckpt = os.path.join(FLAGS.experiment_rootdir, 'resnet8_MCDO.pt') model.load_state_dict(torch.load(model_ckpt)) elif 'resnet8': model = resnet8(img_channels, crop_img_height, crop_img_width, output_dim).to(device) model_ckpt = os.path.join(FLAGS.experiment_rootdir, 'resnet8.pt') model.load_state_dict(torch.load(model_ckpt)) else: raise IOError("Model to test must be 'resnet8' or 'resnet8_MCDO'.") # Get predictions and ground truth _, pred_steerings, real_steerings, epistemic_variance = utils.compute_predictions_and_gt( model, test_loader, device, FLAGS) # ************************* Steering evaluation *************************** # Compute random and constant baselines for steerings random_steerings = random_regression_baseline(real_steerings) constant_steerings = constant_baseline(real_steerings) # Create dictionary with filenames dict_fname = { 'test_regression.json': pred_steerings, 'random_regression.json': random_steerings, 'constant_regression.json': constant_steerings } # Create the folder for current experiment settings if not already there if FLAGS.is_MCDO: parsed_exp_path = os.path.join(FLAGS.experiment_rootdir, "MCDO_T{}".format(FLAGS.T)) else: parsed_exp_path = os.path.join(FLAGS.experiment_rootdir, "standard") if not os.path.exists(parsed_exp_path): os.makedirs(parsed_exp_path) # Evaluate predictions: EVA, residuals, and highest errors for fname, pred in dict_fname.items(): abs_fname = os.path.join(parsed_exp_path, fname) evaluate_regression(pred, real_steerings, abs_fname) if epistemic_variance is not None: dictionary = {"epistemic_variances": epistemic_variance.tolist()} utils.write_to_file( dictionary, os.path.join(parsed_exp_path, 'epistemic_variances.json')) # Write predicted and real steerings dict_test = { 'pred_steerings': pred_steerings.tolist(), 'real_steerings': real_steerings.tolist() } utils.write_to_file( dict_test, os.path.join(parsed_exp_path, 'predicted_and_real_steerings.json'))
def _main(): # Set testing mode (dropout/batchnormalization) K.set_learning_phase(TEST_PHASE) # Output dimension (empty place probability) output_dim = 1 # Generate testing data test_datagen = data_utils.DataGenerator(rescale=1. / 255) # Iterator object containing testing data to be generated batch by batch test_generator = test_datagen.flow_from_directory( FLAGS.test_dir, output_dim, shuffle=False, img_mode=FLAGS.img_mode, target_size=(FLAGS.img_height, FLAGS.img_width), batch_size=FLAGS.batch_size) # Load json and create model json_model_path = os.path.join(FLAGS.experiment_rootdir, FLAGS.json_model_fname) model = utils.jsonToModel(json_model_path) # Load weights weights_load_path = os.path.join(FLAGS.experiment_rootdir, FLAGS.weights_fname) try: model.load_weights(weights_load_path) print("Loaded model from {}".format(weights_load_path)) except: print("Impossible to find weight path. Returning untrained model") # Compile model model.compile(loss='categorical_crossentropy', optimizer='adam') # Get predictions and ground truth n_samples = test_generator.samples nb_batches = int(np.ceil(n_samples / FLAGS.batch_size)) probs_per_class, ground_truth = utils.compute_predictions_and_gt( model, test_generator, nb_batches, verbose=1) # Predicted probabilities pred_probs = np.max(probs_per_class, axis=-1) # Prediced labels pred_labels = np.argmax(probs_per_class, axis=-1) # Real labels (ground truth) real_labels = np.argmax(ground_truth, axis=-1) # Evaluate predictions: Average accuracy and highest errors print("-----------------------------------------------") print("Evalutaion:") evaluation = evaluate_classification(pred_probs, pred_labels, real_labels) print("-----------------------------------------------") # Save evaluation utils.write_to_file( evaluation, os.path.join(FLAGS.experiment_rootdir, 'test_results.json')) # Save predicted and real steerings as a dictionary labels_dict = { 'pred_labels': pred_labels.tolist(), 'real_labels': real_labels.tolist() } utils.write_to_file( labels_dict, os.path.join(FLAGS.experiment_rootdir, 'predicted_and_real_labels.json')) # Visualize confusion matrix utils.plot_confusion_matrix(FLAGS.experiment_rootdir, real_labels, pred_labels, CLASSES, normalize=True)
def main(FLAGS): if not os.path.exists(FLAGS.experiment_rootdir_comp_adf): os.makedirs(FLAGS.experiment_rootdir_comp_adf) # Train only if cuda is available if device.type == 'cuda': # Create the experiment rootdir adf if not already there if not os.path.exists(FLAGS.experiment_rootdir_adf): os.makedirs(FLAGS.experiment_rootdir_adf) # Hyperparameters batch_size = FLAGS.batch_size # Default 32 # Loading testing dataset test_steer_dataset = create_dataset(FLAGS.test_dir) test_loader = torch.utils.data.DataLoader(dataset=test_steer_dataset, batch_size=batch_size, shuffle=False) targets = [] for image, target in test_steer_dataset: targets.append(np.asscalar(target.cpu().numpy())) # Cropped image dimensions crop_img_width, crop_img_height = FLAGS.crop_img_width, FLAGS.crop_img_height # Image mode if FLAGS.img_mode == 'rgb': img_channels = 3 elif FLAGS.img_mode == 'grayscale': img_channels = 1 else: raise IOError("Unidentified image mode: use 'grayscale' or 'rgb'") # Output dimension output_dim = 1 # Load standard model model = resnet8_MCDO(img_channels, crop_img_height, crop_img_width, output_dim).to(device) model_ckpt = os.path.join(FLAGS.experiment_rootdir, 'resnet8_MCDO.pt') model.load_state_dict(torch.load(model_ckpt)) # Load heteroscedastic model model_het = resnet8_MCDO_ale(img_channels, crop_img_height, crop_img_width, output_dim).to(device) model_het_ckpt = os.path.join(FLAGS.experiment_rootdir, 'resnet8_MCDO_ale.pt') model_het.load_state_dict(torch.load(model_het_ckpt)) model_adf = Resnet8_MCDO_adf(img_channels, output_dim, FLAGS.noise_var, FLAGS.min_var).to(device) model_adf.load_state_dict(torch.load(model_ckpt)) # Compute epistemic variance FLAGS.is_MCDO = True print("Computing epistemic variances") # Get predictions and ground truth _, pred_steerings_mean_MCDO, real_steerings, epistemic_variances = \ utils.compute_predictions_and_gt(model, test_loader, device, FLAGS) # Compute total variance print("Computing total variances with heteroscedastic") # Get predictions and ground truth _, pred_steerings_mean_het, aleatoric_variances, real_steerings, total_variances = \ utils.compute_predictions_and_gt_het(model_het, test_loader, device, FLAGS) # Compute total variance print("Computing total variances with ADF") # Get predictions and ground truth _, pred_steerings_mean_adf_MCDO, aleatoric_variances_adf, real_steerings, total_variances_adf = \ utils.compute_predictions_and_gt_adf(model_adf, test_loader, device, FLAGS) # Compute log-likelihoods ll_epi = utils.log_likelihood(pred_steerings_mean_MCDO, targets, np.sqrt(epistemic_variances)) ll_ale_het = utils.log_likelihood(pred_steerings_mean_het, targets, np.sqrt(aleatoric_variances)) ll_tot_het = utils.log_likelihood(pred_steerings_mean_het, targets, np.sqrt(total_variances)) ll_ale_adf = utils.log_likelihood(pred_steerings_mean_adf_MCDO, targets, np.sqrt(aleatoric_variances_adf)) ll_tot_adf = utils.log_likelihood(pred_steerings_mean_adf_MCDO, targets, np.sqrt(total_variances_adf)) print( "Log-likelihood considering EPISTEMIC uncertainty is: {}". format(ll_epi)) print( "Log-likelihood considering ALEATORIC_het uncertainty is: {}". format(ll_ale_het)) print( "Log-likelihood considering TOTAL_het uncertainty is: {}". format(ll_tot_het)) print( "Log-likelihood considering ALEATORIC_adf uncertainty is: {}\n" .format(ll_ale_adf)) print( "Log-likelihood considering TOTAL_adf uncertainty is: {}\n" .format(ll_tot_adf)) else: raise IOError('Cuda is not available.')
def _main(): # Set testing mode (dropout/batchnormalization) K.set_learning_phase(TEST_PHASE) # Generate testing data test_datagen = utils.DroneDataGenerator(rescale=1. / 255) test_generator = test_datagen.flow_from_directory( FLAGS.test_dir, shuffle=False, color_mode=FLAGS.img_mode, target_size=(FLAGS.img_width, FLAGS.img_height), crop_size=(FLAGS.crop_img_height, FLAGS.crop_img_width), batch_size=FLAGS.batch_size) # Load json and create model json_model_path = os.path.join(FLAGS.experiment_rootdir, FLAGS.json_model_fname) model = utils.jsonToModel(json_model_path) # Load weights weights_load_path = os.path.join(FLAGS.experiment_rootdir, FLAGS.weights_fname) try: model.load_weights(weights_load_path) print("Loaded model from {}".format(weights_load_path)) except: print("Impossible to find weight path. Returning untrained model") # Compile model model.compile(loss='mse', optimizer='adam') # Get predictions and ground truth n_samples = test_generator.samples nb_batches = int(np.ceil(n_samples / FLAGS.batch_size)) predictions, ground_truth, t = utils.compute_predictions_and_gt( model, test_generator, nb_batches, verbose=1) # Param t. t=1 steering, t=0 collision t_mask = t == 1 # ************************* Steering evaluation *************************** # Predicted and real steerings pred_steerings = predictions[t_mask, 0] real_steerings = ground_truth[t_mask, 0] # Compute random and constant baselines for steerings random_steerings = random_regression_baseline(real_steerings) constant_steerings = constant_baseline(real_steerings) # Create dictionary with filenames dict_fname = { 'test_regression.json': pred_steerings, 'random_regression.json': random_steerings, 'constant_regression.json': constant_steerings } # Evaluate predictions: EVA, residuals, and highest errors print('direction:') for fname, pred in dict_fname.items(): abs_fname = os.path.join(FLAGS.experiment_rootdir, fname) evaluate_regression(pred, real_steerings, abs_fname) # Write predicted and real steerings dict_test = { 'pred_steerings': pred_steerings.tolist(), 'real_steerings': real_steerings.tolist() } utils.write_to_file( dict_test, os.path.join(FLAGS.experiment_rootdir, 'predicted_and_real_steerings.json')) # ************************* collision(translation) evaluation *************************** # Predicted and real labels pred_prob = predictions[~t_mask, 1] real_labels = ground_truth[~t_mask, 1] # Compute random and constant baselines for steerings random_labels = random_regression_baseline(real_labels) constant_labels = constant_baseline(real_labels) # Create dictionary with filenames dict_fname = { 'translation-test_regression.json': pred_prob, 'translation-random_regression.json': random_labels, 'translation-constant_regression.json': constant_labels } # Evaluate predictions: EVA, residuals, and highest errors print('translation:') for fname, pred in dict_fname.items(): abs_fname = os.path.join(FLAGS.experiment_rootdir, fname) evaluate_regression(pred, real_labels, abs_fname) # Write predicted and real steerings dict_test = { 'pred_labels': pred_prob.tolist(), 'real_probs': real_labels.tolist() } utils.write_to_file( dict_test, os.path.join(FLAGS.experiment_rootdir, 'predicted_and_real_labels.json'))
def main(FLAGS): if not os.path.exists(FLAGS.experiment_rootdir_comp): os.makedirs(FLAGS.experiment_rootdir_comp) # Train only if cuda is available if device.type == 'cuda': # Create the experiment rootdir adf if not already there if not os.path.exists(FLAGS.experiment_rootdir): os.makedirs(FLAGS.experiment_rootdir_adf) # Hyperparameters batch_size = FLAGS.batch_size # Default 32 # Loading testing dataset test_steer_dataset = create_dataset(FLAGS.test_dir) test_loader = torch.utils.data.DataLoader(dataset=test_steer_dataset, batch_size=batch_size, shuffle=False) # Cropped image dimensions crop_img_width, crop_img_height = FLAGS.crop_img_width, FLAGS.crop_img_height # Image mode if FLAGS.img_mode=='rgb': img_channels = 3 elif FLAGS.img_mode == 'grayscale': img_channels = 1 else: raise IOError("Unidentified image mode: use 'grayscale' or 'rgb'") # Output dimension output_dim = 1 model = resnet8_MCDO(img_channels, crop_img_height, crop_img_width, output_dim).to(device) model_ckpt = os.path.join(FLAGS.experiment_rootdir,'resnet8_MCDO.pt') model.load_state_dict(torch.load(model_ckpt)) model_adf = Resnet8_MCDO_adf(img_channels, output_dim, FLAGS.noise_var, FLAGS.min_var).to(device) model_adf.load_state_dict(torch.load(model_ckpt)) # Ensure that MCDO is NOT enabled FLAGS.is_MCDO = False T_FLAG = FLAGS.T # Compute stats without MCDO FLAGS.T = 0 # Get predictions and ground truth print("Computing standard predictions\n...") MC_samples, pred_steerings_mean, real_steerings, _ = \ utils.compute_predictions_and_gt(model, test_loader, device, FLAGS) # Evaluate predictions: EVA, residuals print("Evaluation of standard predictions") evas_std, rmse_std = evaluate_regression_stats(pred_steerings_mean, real_steerings) # Compute stats with ADF FLAGS.is_MCDO = True FLAGS.T = T_FLAG # Get predictions and ground truth print("Computing adf predictions\n...") MC_samples, _, ale_variances, _, tot_variances = \ utils.compute_predictions_and_gt_adf(model_adf, test_loader, device, FLAGS) MC_samples_means = MC_samples['mean'] MC_samples_vars = MC_samples['var'] evas_ls = [] rmse_ls = [] epistemic_var_ls = [] total_var_ls = [] # At T-th iteration, take the mean of only the first T samples for T in range(1,T_FLAG+1): pred_steerings_cur = np.mean(MC_samples_means[0:T,:], axis=0) # Evaluate predictions: EVA, residuals print("Evaluation of predictions for {} MC samples".format(T)) evas, rmse = evaluate_regression_stats(pred_steerings_cur, real_steerings) # Compute epistemic and total variances and mean over them epistemic_var = np.mean(np.var(MC_samples_means[0:T,:], axis=0),axis=0) aleatoric_var = np.mean(np.mean(MC_samples_vars[0:T,:], axis=0),axis=0) total_var = epistemic_var + aleatoric_var evas_ls.append(evas) rmse_ls.append(rmse) epistemic_var_ls.append(epistemic_var) total_var_ls.append(total_var) plot_variances(epistemic_var_ls,total_var_ls) plot_stats(evas_std,evas_ls,'EVA') plot_stats(rmse_std,rmse_ls,'RMSE') print("Saved plots for EVA, RMSE and Variances comparison in folder " + FLAGS.experiment_rootdir_comp) # Compute highest and lowest variances indexes epi_variances = tot_variances - ale_variances max_epi_variances, min_epi_variances = compute_min_max_variances(epi_variances) max_ale_variances, min_ale_variances = compute_min_max_variances(ale_variances) max_tot_variances, min_tot_variances = compute_min_max_variances(tot_variances) print("\nSamples with highest epistemic uncertainty: ", max_epi_variances ) print("\nSamples with lowest epistemic uncertainty: ", min_epi_variances ) print("\nSamples with highest aleatoric uncertainty: ", max_ale_variances ) print("\nSamples with lowest aleatoric uncertainty: ", min_ale_variances ) print("\nSamples with highest total uncertainty: ", max_tot_variances ) print("\nSamples with lowest total uncertainty: ", min_tot_variances ) # Show qualitative results show_lowest_highest(test_steer_dataset, epi_variances, min_epi_variances, max_epi_variances, mode='Epistemic') show_lowest_highest(test_steer_dataset, ale_variances, min_ale_variances, max_ale_variances, mode='Aleatoric') show_lowest_highest(test_steer_dataset, tot_variances, min_tot_variances, max_tot_variances, mode='Total') else: raise IOError('Cuda is not available.')
def main(FLAGS): if not os.path.exists(FLAGS.experiment_rootdir_comp): os.makedirs(FLAGS.experiment_rootdir_comp) # Train only if cuda is available if device.type == 'cuda': # Create the experiment rootdir adf if not already there if not os.path.exists(FLAGS.experiment_rootdir): os.makedirs(FLAGS.experiment_rootdir_adf) # Hyperparameters batch_size = FLAGS.batch_size # Default 32 # Loading testing dataset test_steer_dataset = create_dataset(FLAGS.test_dir) test_loader = torch.utils.data.DataLoader(dataset=test_steer_dataset, batch_size=batch_size, shuffle=False) # Cropped image dimensions crop_img_width, crop_img_height = FLAGS.crop_img_width, FLAGS.crop_img_height # Image mode if FLAGS.img_mode=='rgb': img_channels = 3 elif FLAGS.img_mode == 'grayscale': img_channels = 1 else: raise IOError("Unidentified image mode: use 'grayscale' or 'rgb'") # Output dimension output_dim = 1 model = resnet8_MCDO(img_channels, crop_img_height, crop_img_width, output_dim).to(device) model_ckpt = os.path.join(FLAGS.experiment_rootdir,'resnet8_MCDO.pt') model.load_state_dict(torch.load(model_ckpt)) # Load trained model weights on ADF model model_adf = Resnet8_MCDO_adf(img_channels, output_dim, FLAGS.noise_var, FLAGS.min_var).to(device) model_adf.load_state_dict(torch.load(model_ckpt)) # Ensure that MCDO is NOT enabled FLAGS.is_MCDO = False T_FLAG = FLAGS.T # Compute stats without MCDO FLAGS.T = 0 # Get predictions and ground truth print("Computing standard predictions\n...") MC_samples, pred_steerings_mean, real_steerings, _ = \ utils.compute_predictions_and_gt(model, test_loader, device, FLAGS) # Evaluate predictions: EVA, residuals print("Evaluation of standard predictions") evas_std, rmse_std = evaluate_regression_stats(pred_steerings_mean, real_steerings) # Compute stats with ADF FLAGS.is_MCDO = True FLAGS.T = T_FLAG # Get predictions and ground truth print("Computing adf predictions\n...") _, _, ale_variances, _, tot_variances = \ utils.compute_predictions_and_gt_adf(model_adf, test_loader, device, FLAGS) # Compute highest and lowest variances indexes epi_variances = tot_variances - ale_variances max_epi_variances, min_epi_variances = compute_min_max_variances(epi_variances) max_ale_variances, min_ale_variances = compute_min_max_variances(ale_variances) max_tot_variances, min_tot_variances = compute_min_max_variances(tot_variances) print("\nSamples with highest epistemic uncertainty: ", max_epi_variances ) print("\nSamples with lowest epistemic uncertainty: ", min_epi_variances ) print("\nSamples with highest aleatoric uncertainty: ", max_ale_variances ) print("\nSamples with lowest aleatoric uncertainty: ", min_ale_variances ) print("\nSamples with highest total uncertainty: ", max_tot_variances ) print("\nSamples with lowest total uncertainty: ", min_tot_variances ) # Qualitative evaluation of uncertainty with adversarial examples if FLAGS.gen_adv_key == 'high_var': indexes_epi = max_epi_variances indexes_ale = max_ale_variances indexes_tot = max_tot_variances elif FLAGS.gen_adv_key == 'low_var': indexes_epi = min_epi_variances indexes_ale = min_ale_variances indexes_tot = min_tot_variances # Attack standard model and ADF model adv_inputs, adv_preds, epi_adv_var, ale_adv_var, tot_adv_var = \ attack(model_adf, test_steer_dataset, indexes_epi) # Compare epistemic variances before and after attacks compare_adv_var(adv_inputs, adv_preds, epi_adv_var, test_steer_dataset, pred_steerings_mean, epi_variances, indexes_epi, "Epistemic") # Attack standard model and ADF model adv_inputs, adv_preds, epi_adv_var, ale_adv_var, tot_adv_var = \ attack(model_adf, test_steer_dataset, indexes_ale) # Compare aleatoric variances before and after attacks compare_adv_var(adv_inputs, adv_preds, ale_adv_var, test_steer_dataset, pred_steerings_mean, ale_variances, indexes_ale, "Aleatoric") # Attack standard model and ADF model adv_inputs, adv_preds, epi_adv_var, ale_adv_var, tot_adv_var = \ attack(model_adf, test_steer_dataset, indexes_tot) # Compare total variances before and after attacks compare_adv_var(adv_inputs, adv_preds, tot_adv_var, test_steer_dataset, pred_steerings_mean, tot_variances, indexes_tot, "Total") else: raise IOError('Cuda is not available.')
def _main(): # Set testing mode (dropout/batch normalization) k.set_learning_phase(TEST_PHASE) # Split the data into training, validation and test sets if FLAGS.initial_epoch == 0: data_utils.cross_val_create(FLAGS.data_path) # Generate testing data test_data_gen = data_utils.DataGenerator() # Iterator object containing testing data to be generated batch by batch test_generator = test_data_gen.flow_from_directory( 'test', shuffle=False, target_size=(FLAGS.img_height, FLAGS.img_width), batch_size=FLAGS.batch_size) # Load json and create model json_model_path = os.path.join(FLAGS.experiment_rootdir, FLAGS.json_model_fname) model = utils.json_to_model(json_model_path) # Load weights weights_load_path = os.path.abspath('./experiment_6/weights_039.h5') try: model.load_weights(weights_load_path) print("Loaded model from {}".format(weights_load_path)) except ImportError: print("Impossible to find weight path. Returning untrained model") # Compile model model.compile(optimizer='adam', loss='mean_squared_error') # Get predictions and ground truth n_samples = test_generator.samples nb_batches = int(np.ceil(n_samples / FLAGS.batch_size)) probs_per_class, ground_truth = utils.compute_predictions_and_gt( model, test_generator, nb_batches, verbose=FLAGS.verbose) # Predicted probabilities pred_probs = np.max(probs_per_class, axis=-1) # Predicted labels pred_labels = np.argmax(probs_per_class, axis=-1) # Real labels (ground truth) real_labels = np.argmax(ground_truth, axis=-1) # Evaluate predictions: Average accuracy and highest errors print("-----------------------------------------------") print("Evaluation:") evaluation = evaluate_classification(pred_probs, pred_labels, real_labels) print("-----------------------------------------------") # Save evaluation utils.write_to_file( evaluation, os.path.join(FLAGS.experiment_rootdir, 'test_results.json')) # Save predicted and real steerings as a dictionary labels_dict = { 'pred_labels': pred_labels.tolist(), 'real_labels': real_labels.tolist() } utils.write_to_file( labels_dict, os.path.join(FLAGS.experiment_rootdir, 'predicted_and_real_labels.json')) # Visualize confusion matrix utils.plot_confusion_matrix('test', FLAGS.experiment_rootdir, real_labels, pred_labels, CLASSES, normalize=True) print('Accuracy:', accuracy_score(real_labels, pred_labels)) print('F1 score:', f1_score(real_labels, pred_labels, average='micro')) print('Recall:', recall_score(real_labels, pred_labels, average='micro')) print('Precision:', precision_score(real_labels, pred_labels, average='micro')) print('\n clasification report:\n', classification_report(real_labels, pred_labels)) print('\n confussion matrix:\n', confusion_matrix(real_labels, pred_labels))