예제 #1
0
파일: odin.py 프로젝트: FabioTomaz/msc
def compute_out_of_distribution_score(
    model, 
    model_params,
    df, 
    num_classes, 
    parameters, 
    temperature=2, 
    magnitude=0.0001, 
    delta=0.90385
):
    tf.compat.v1.disable_eager_execution()

    image_data_format = K.image_data_format()
    generator = ImageIterator(
        image_paths=df['path'].tolist(),
        labels=None,
        augmentation_pipeline=LesionClassifier.create_aug_pipeline(
            0,
            model_params.input_size,
            True
        ),
        preprocessing_function=model_params.preprocessing_func,
        batch_size=parameters.batch_size,
        shuffle=False,
        rescale=None,
        pregen_augmented_images=False,
        data_format=image_data_format
    )

    compute_perturbations, get_scaled_dense_pred_output = get_perturbation_helper_func(
        model, 
        temperature, 
        num_classes
    )

    df_score = df[['image']].copy()
    softmax_scores = []
    learning_phase = 0 # 0 = test, 1 = train
    steps = math.ceil(df.shape[0] / parameters.batch_size)
    for _ in trange(steps):
        images = next(generator)
        perturbations = compute_perturbations([images, learning_phase])[0]
        # Get sign of perturbations
        perturbations = np.sign(perturbations)
        # DenseNet201 need normalization
        perturbations = norm_perturbations(perturbations, image_data_format)
        # Add perturbations to images
        perturbative_images = images - magnitude * perturbations
        # Calculate the confidence after adding perturbations
        dense_pred_outputs = get_scaled_dense_pred_output([perturbative_images, learning_phase])[0]
        softmax_probs = softmax(dense_pred_outputs)
        softmax_scores.extend(np.max(softmax_probs, axis=-1).tolist())

    df_score['softmax_score'] = softmax_scores
    #df_score['out_dist_score'] = 1 - logistic(x=df_score['softmax_score'], x0=delta, k=20)
    df_score.insert(loc=2, column="out_dist_score", value=np.where(df_score['softmax_score'] > delta, 0.0, 1.0))

    return df_score
예제 #2
0
def compute_out_of_distribution_score(model_folder,
                                      df,
                                      num_classes,
                                      batch_size=32,
                                      temperature=2,
                                      magnitude=0.0002,
                                      delta=0.90385):
    model_filepath = os.path.join(model_folder,
                                  'DenseNet201_best_balanced_acc.hdf5')
    print('Loading model: ', model_filepath)
    model = load_model(
        filepath=model_filepath,
        custom_objects={'balanced_accuracy': balanced_accuracy(num_classes)})
    image_data_format = K.image_data_format()
    model_param_map = get_transfer_model_param_map()
    generator = ImageIterator(
        image_paths=df['path'].tolist(),
        labels=None,
        augmentation_pipeline=LesionClassifier.create_aug_pipeline_val(
            model_param_map['DenseNet201'].input_size),
        preprocessing_function=model_param_map['DenseNet201'].
        preprocessing_func,
        batch_size=batch_size,
        shuffle=False,
        rescale=None,
        pregen_augmented_images=False,
        data_format=image_data_format)

    compute_perturbations, get_scaled_dense_pred_output = get_perturbation_helper_func(
        model, temperature, num_classes)

    df_score = df[['image']].copy()
    softmax_scores = []
    learning_phase = 0  # 0 = test, 1 = train
    steps = math.ceil(df.shape[0] / batch_size)
    for _ in trange(steps):
        images = next(generator)
        perturbations = compute_perturbations([images, learning_phase])[0]
        # Get sign of perturbations
        perturbations = np.sign(perturbations)
        # DenseNet201 need normalization
        perturbations = norm_perturbations(perturbations, image_data_format)
        # Add perturbations to images
        perturbative_images = images - magnitude * perturbations
        # Calculate the confidence after adding perturbations
        dense_pred_outputs = get_scaled_dense_pred_output(
            [perturbative_images, learning_phase])[0]
        softmax_probs = softmax(dense_pred_outputs)
        softmax_scores.extend(np.max(softmax_probs, axis=-1).tolist())

    del model
    K.clear_session()

    df_score['softmax_score'] = softmax_scores
    df_score['out_dist_score'] = 1 - logistic(
        x=df_score['softmax_score'], x0=delta, k=20)
    return df_score
예제 #3
0
    def predict_dataframe(model,
                          df,
                          x_col='path',
                          y_col='category',
                          id_col='image',
                          category_names=None,
                          augmentation_pipeline=None,
                          preprocessing_function=None,
                          batch_size=32,
                          workers=1):

        generator = ImageIterator(
            image_paths=df[x_col].tolist(),
            labels=None,
            augmentation_pipeline=augmentation_pipeline,
            batch_size=batch_size,
            shuffle=
            False,  # shuffle must be False otherwise will get a wrong balanced accuracy
            preprocessing_function=preprocessing_function,
            pregen_augmented_images=False,  # Only 1 epoch.
            data_format=K.image_data_format())

        # Predict
        # https://keras.io/getting-started/faq/#how-can-i-obtain-the-output-of-an-intermediate-layer
        intermediate_layer_model = Model(
            inputs=model.input, outputs=model.get_layer('dense_pred').output)
        logits = intermediate_layer_model.predict_generator(generator,
                                                            verbose=1,
                                                            workers=workers)

        softmax_probs = softmax(logits).astype(
            float
        )  # explicitly convert softmax values to floating point because 0 and 1 are invalid, but 0.0 and 1.0 are valid

        # softmax probabilities
        df_softmax = pd.DataFrame(softmax_probs, columns=category_names)
        if y_col in df.columns:
            df_softmax[y_col] = df[y_col].to_numpy()
        df_softmax['pred_' + y_col] = np.argmax(softmax_probs, axis=1)
        df_softmax.insert(0, id_col, df[id_col].to_numpy())

        return df_softmax
예제 #4
0
def compute_odin_softmax_scores(in_dist_pred_result_folder,
                                in_dist_image_folder,
                                out_dist_pred_result_folder,
                                out_dist_image_folder, model_folder,
                                softmax_score_folder, num_classes, batch_size):
    """ Calculate softmax scores for different combinations of ODIN parameters. """
    print('Begin to compute ODIN softmax scores')
    model_names = ['DenseNet201', 'Xception', 'ResNeXt50']
    # postfixes = ['best_balanced_acc', 'best_loss', 'latest']
    postfixes = ['best_balanced_acc']
    distributions = ['In', 'Out']

    # This file is used for recording what parameter combinations were already computed.
    progress_file = os.path.join(softmax_score_folder, 'Done.txt')
    done_set = set()
    if os.path.exists(progress_file):
        with open(progress_file, 'r') as f:
            done_set = set(line.rstrip('\n') for line in f)

    # ODIN parameters
    temperatures = [1000, 500, 200, 100, 50, 20, 10, 5, 2, 1]
    magnitudes = np.round(np.arange(0, 0.0041, 0.0002), 4)

    model_param_map = get_transfer_model_param_map()
    image_data_format = K.image_data_format()
    learning_phase = 0  # 0 = test, 1 = train

    for modelattr in (ModelAttr(x, y) for x in model_names for y in postfixes):
        # In-distribution data
        df = {}
        df['In'] = pd.read_csv(
            os.path.join(
                in_dist_pred_result_folder,
                "{}_{}.csv".format(modelattr.model_name, modelattr.postfix)))
        df['In']['path'] = df['In'].apply(lambda row: os.path.join(
            in_dist_image_folder, row['image'] + '.jpg'),
                                          axis=1)
        generator_in = ImageIterator(
            image_paths=df['In']['path'].tolist(),
            labels=None,
            augmentation_pipeline=LesionClassifier.create_aug_pipeline_val(
                model_param_map[modelattr.model_name].input_size),
            preprocessing_function=model_param_map[
                modelattr.model_name].preprocessing_func,
            batch_size=batch_size,
            shuffle=False,
            rescale=None,
            pregen_augmented_images=True,
            data_format=image_data_format)

        # Out-distribution data
        df['Out'] = pd.read_csv(
            os.path.join(
                out_dist_pred_result_folder,
                "{}_{}.csv".format(modelattr.model_name, modelattr.postfix)))
        df['Out']['path'] = df['Out'].apply(lambda row: os.path.join(
            out_dist_image_folder, row['image'] + '.jpg'),
                                            axis=1)
        generator_out = ImageIterator(
            image_paths=df['Out']['path'].tolist(),
            labels=None,
            augmentation_pipeline=LesionClassifier.create_aug_pipeline_val(
                model_param_map[modelattr.model_name].input_size),
            preprocessing_function=model_param_map[
                modelattr.model_name].preprocessing_func,
            batch_size=batch_size,
            shuffle=False,
            rescale=None,
            pregen_augmented_images=True,
            data_format=image_data_format)

        # Load model
        model_filepath = os.path.join(
            model_folder, "{}_{}.hdf5".format(modelattr.model_name,
                                              modelattr.postfix))
        print('Loading model: ', model_filepath)
        model = load_model(filepath=model_filepath,
                           custom_objects={
                               'balanced_accuracy':
                               balanced_accuracy(num_classes)
                           })
        need_norm_perturbations = (modelattr.model_name == 'DenseNet201'
                                   or modelattr.model_name == 'ResNeXt50')

        for temperature in temperatures:
            compute_perturbations, get_scaled_dense_pred_output = get_perturbation_helper_func(
                model, temperature, num_classes)

            for magnitude in magnitudes:
                for dist in distributions:
                    # Skip if the parameter combination has done
                    param_comb_id = "{}_{}, {}, {}, {}".format(
                        modelattr.model_name, modelattr.postfix, dist,
                        temperature, magnitude)
                    if param_comb_id in done_set:
                        print('Skip ', param_comb_id)
                        continue

                    generator = generator_in if dist == 'In' else generator_out

                    print(
                        "\n===== Temperature: {}, Magnitude: {}, {}-Distribution ====="
                        .format(temperature, magnitude, dist))
                    softmax_score_sub_folder = os.path.join(
                        softmax_score_folder,
                        "{}_{}".format(temperature, magnitude))
                    os.makedirs(softmax_score_sub_folder, exist_ok=True)

                    steps = math.ceil(df[dist].shape[0] / batch_size)
                    generator.reset()
                    f = open(
                        os.path.join(
                            softmax_score_sub_folder,
                            "{}_{}_ODIN_{}.txt".format(modelattr.model_name,
                                                       modelattr.postfix,
                                                       dist)), 'w')
                    for _ in trange(steps):
                        images = next(generator)
                        perturbations = compute_perturbations(
                            [images, learning_phase])[0]
                        # Get sign of perturbations
                        perturbations = np.sign(perturbations)

                        # Normalize the perturbations to the same space of image
                        # https://github.com/facebookresearch/odin/issues/5
                        # Perturbations divided by ISIC Training Set STD
                        if need_norm_perturbations:
                            perturbations = norm_perturbations(
                                perturbations, image_data_format)

                        # Add perturbations to images
                        perturbative_images = images - magnitude * perturbations

                        # Calculate the confidence after adding perturbations
                        dense_pred_outputs = get_scaled_dense_pred_output(
                            [perturbative_images, learning_phase])[0]
                        softmax_probs = softmax(dense_pred_outputs)
                        softmax_scores = np.max(softmax_probs, axis=-1)
                        for s in softmax_scores:
                            f.write("{}\n".format(s))
                    f.close()

                    with open(progress_file, 'a') as f_done:
                        f_done.write("{}\n".format(param_comb_id))
        del model
        K.clear_session()