def compute_out_of_distribution_score( model, model_params, df, num_classes, parameters, temperature=2, magnitude=0.0001, delta=0.90385 ): tf.compat.v1.disable_eager_execution() image_data_format = K.image_data_format() generator = ImageIterator( image_paths=df['path'].tolist(), labels=None, augmentation_pipeline=LesionClassifier.create_aug_pipeline( 0, model_params.input_size, True ), preprocessing_function=model_params.preprocessing_func, batch_size=parameters.batch_size, shuffle=False, rescale=None, pregen_augmented_images=False, data_format=image_data_format ) compute_perturbations, get_scaled_dense_pred_output = get_perturbation_helper_func( model, temperature, num_classes ) df_score = df[['image']].copy() softmax_scores = [] learning_phase = 0 # 0 = test, 1 = train steps = math.ceil(df.shape[0] / parameters.batch_size) for _ in trange(steps): images = next(generator) perturbations = compute_perturbations([images, learning_phase])[0] # Get sign of perturbations perturbations = np.sign(perturbations) # DenseNet201 need normalization perturbations = norm_perturbations(perturbations, image_data_format) # Add perturbations to images perturbative_images = images - magnitude * perturbations # Calculate the confidence after adding perturbations dense_pred_outputs = get_scaled_dense_pred_output([perturbative_images, learning_phase])[0] softmax_probs = softmax(dense_pred_outputs) softmax_scores.extend(np.max(softmax_probs, axis=-1).tolist()) df_score['softmax_score'] = softmax_scores #df_score['out_dist_score'] = 1 - logistic(x=df_score['softmax_score'], x0=delta, k=20) df_score.insert(loc=2, column="out_dist_score", value=np.where(df_score['softmax_score'] > delta, 0.0, 1.0)) return df_score
def compute_out_of_distribution_score(model_folder, df, num_classes, batch_size=32, temperature=2, magnitude=0.0002, delta=0.90385): model_filepath = os.path.join(model_folder, 'DenseNet201_best_balanced_acc.hdf5') print('Loading model: ', model_filepath) model = load_model( filepath=model_filepath, custom_objects={'balanced_accuracy': balanced_accuracy(num_classes)}) image_data_format = K.image_data_format() model_param_map = get_transfer_model_param_map() generator = ImageIterator( image_paths=df['path'].tolist(), labels=None, augmentation_pipeline=LesionClassifier.create_aug_pipeline_val( model_param_map['DenseNet201'].input_size), preprocessing_function=model_param_map['DenseNet201']. preprocessing_func, batch_size=batch_size, shuffle=False, rescale=None, pregen_augmented_images=False, data_format=image_data_format) compute_perturbations, get_scaled_dense_pred_output = get_perturbation_helper_func( model, temperature, num_classes) df_score = df[['image']].copy() softmax_scores = [] learning_phase = 0 # 0 = test, 1 = train steps = math.ceil(df.shape[0] / batch_size) for _ in trange(steps): images = next(generator) perturbations = compute_perturbations([images, learning_phase])[0] # Get sign of perturbations perturbations = np.sign(perturbations) # DenseNet201 need normalization perturbations = norm_perturbations(perturbations, image_data_format) # Add perturbations to images perturbative_images = images - magnitude * perturbations # Calculate the confidence after adding perturbations dense_pred_outputs = get_scaled_dense_pred_output( [perturbative_images, learning_phase])[0] softmax_probs = softmax(dense_pred_outputs) softmax_scores.extend(np.max(softmax_probs, axis=-1).tolist()) del model K.clear_session() df_score['softmax_score'] = softmax_scores df_score['out_dist_score'] = 1 - logistic( x=df_score['softmax_score'], x0=delta, k=20) return df_score
def predict_dataframe(model, df, x_col='path', y_col='category', id_col='image', category_names=None, augmentation_pipeline=None, preprocessing_function=None, batch_size=32, workers=1): generator = ImageIterator( image_paths=df[x_col].tolist(), labels=None, augmentation_pipeline=augmentation_pipeline, batch_size=batch_size, shuffle= False, # shuffle must be False otherwise will get a wrong balanced accuracy preprocessing_function=preprocessing_function, pregen_augmented_images=False, # Only 1 epoch. data_format=K.image_data_format()) # Predict # https://keras.io/getting-started/faq/#how-can-i-obtain-the-output-of-an-intermediate-layer intermediate_layer_model = Model( inputs=model.input, outputs=model.get_layer('dense_pred').output) logits = intermediate_layer_model.predict_generator(generator, verbose=1, workers=workers) softmax_probs = softmax(logits).astype( float ) # explicitly convert softmax values to floating point because 0 and 1 are invalid, but 0.0 and 1.0 are valid # softmax probabilities df_softmax = pd.DataFrame(softmax_probs, columns=category_names) if y_col in df.columns: df_softmax[y_col] = df[y_col].to_numpy() df_softmax['pred_' + y_col] = np.argmax(softmax_probs, axis=1) df_softmax.insert(0, id_col, df[id_col].to_numpy()) return df_softmax
def compute_odin_softmax_scores(in_dist_pred_result_folder, in_dist_image_folder, out_dist_pred_result_folder, out_dist_image_folder, model_folder, softmax_score_folder, num_classes, batch_size): """ Calculate softmax scores for different combinations of ODIN parameters. """ print('Begin to compute ODIN softmax scores') model_names = ['DenseNet201', 'Xception', 'ResNeXt50'] # postfixes = ['best_balanced_acc', 'best_loss', 'latest'] postfixes = ['best_balanced_acc'] distributions = ['In', 'Out'] # This file is used for recording what parameter combinations were already computed. progress_file = os.path.join(softmax_score_folder, 'Done.txt') done_set = set() if os.path.exists(progress_file): with open(progress_file, 'r') as f: done_set = set(line.rstrip('\n') for line in f) # ODIN parameters temperatures = [1000, 500, 200, 100, 50, 20, 10, 5, 2, 1] magnitudes = np.round(np.arange(0, 0.0041, 0.0002), 4) model_param_map = get_transfer_model_param_map() image_data_format = K.image_data_format() learning_phase = 0 # 0 = test, 1 = train for modelattr in (ModelAttr(x, y) for x in model_names for y in postfixes): # In-distribution data df = {} df['In'] = pd.read_csv( os.path.join( in_dist_pred_result_folder, "{}_{}.csv".format(modelattr.model_name, modelattr.postfix))) df['In']['path'] = df['In'].apply(lambda row: os.path.join( in_dist_image_folder, row['image'] + '.jpg'), axis=1) generator_in = ImageIterator( image_paths=df['In']['path'].tolist(), labels=None, augmentation_pipeline=LesionClassifier.create_aug_pipeline_val( model_param_map[modelattr.model_name].input_size), preprocessing_function=model_param_map[ modelattr.model_name].preprocessing_func, batch_size=batch_size, shuffle=False, rescale=None, pregen_augmented_images=True, data_format=image_data_format) # Out-distribution data df['Out'] = pd.read_csv( os.path.join( out_dist_pred_result_folder, "{}_{}.csv".format(modelattr.model_name, modelattr.postfix))) df['Out']['path'] = df['Out'].apply(lambda row: os.path.join( out_dist_image_folder, row['image'] + '.jpg'), axis=1) generator_out = ImageIterator( image_paths=df['Out']['path'].tolist(), labels=None, augmentation_pipeline=LesionClassifier.create_aug_pipeline_val( model_param_map[modelattr.model_name].input_size), preprocessing_function=model_param_map[ modelattr.model_name].preprocessing_func, batch_size=batch_size, shuffle=False, rescale=None, pregen_augmented_images=True, data_format=image_data_format) # Load model model_filepath = os.path.join( model_folder, "{}_{}.hdf5".format(modelattr.model_name, modelattr.postfix)) print('Loading model: ', model_filepath) model = load_model(filepath=model_filepath, custom_objects={ 'balanced_accuracy': balanced_accuracy(num_classes) }) need_norm_perturbations = (modelattr.model_name == 'DenseNet201' or modelattr.model_name == 'ResNeXt50') for temperature in temperatures: compute_perturbations, get_scaled_dense_pred_output = get_perturbation_helper_func( model, temperature, num_classes) for magnitude in magnitudes: for dist in distributions: # Skip if the parameter combination has done param_comb_id = "{}_{}, {}, {}, {}".format( modelattr.model_name, modelattr.postfix, dist, temperature, magnitude) if param_comb_id in done_set: print('Skip ', param_comb_id) continue generator = generator_in if dist == 'In' else generator_out print( "\n===== Temperature: {}, Magnitude: {}, {}-Distribution =====" .format(temperature, magnitude, dist)) softmax_score_sub_folder = os.path.join( softmax_score_folder, "{}_{}".format(temperature, magnitude)) os.makedirs(softmax_score_sub_folder, exist_ok=True) steps = math.ceil(df[dist].shape[0] / batch_size) generator.reset() f = open( os.path.join( softmax_score_sub_folder, "{}_{}_ODIN_{}.txt".format(modelattr.model_name, modelattr.postfix, dist)), 'w') for _ in trange(steps): images = next(generator) perturbations = compute_perturbations( [images, learning_phase])[0] # Get sign of perturbations perturbations = np.sign(perturbations) # Normalize the perturbations to the same space of image # https://github.com/facebookresearch/odin/issues/5 # Perturbations divided by ISIC Training Set STD if need_norm_perturbations: perturbations = norm_perturbations( perturbations, image_data_format) # Add perturbations to images perturbative_images = images - magnitude * perturbations # Calculate the confidence after adding perturbations dense_pred_outputs = get_scaled_dense_pred_output( [perturbative_images, learning_phase])[0] softmax_probs = softmax(dense_pred_outputs) softmax_scores = np.max(softmax_probs, axis=-1) for s in softmax_scores: f.write("{}\n".format(s)) f.close() with open(progress_file, 'a') as f_done: f_done.write("{}\n".format(param_comb_id)) del model K.clear_session()