def main(args):

    # get parsed arguments from user
    input_dir = args.input_dir
    architecture = args.architecture
    color_mode = args.color
    loss = args.loss
    batch_size = args.batch
    epochs = args.epochs
    lr_estimate = args.lr_estimate
    policy = args.policy

    # check arguments
    check_arguments(architecture, color_mode, loss)

    # get autoencoder
    autoencoder = AutoEncoder(input_dir, architecture, color_mode, loss,
                              batch_size)

    # load data as generators that yield batches of preprocessed images
    preprocessor = Preprocessor(
        input_directory=input_dir,
        rescale=autoencoder.rescale,
        shape=autoencoder.shape,
        color_mode=autoencoder.color_mode,
    )
    train_generator = preprocessor.get_train_generator(
        batch_size=autoencoder.batch_size, shuffle=True)
    validation_generator = preprocessor.get_val_generator(
        batch_size=autoencoder.batch_size,
        shuffle=False,
        purpose="val",
    )

    # find best learning rates for training
    lr_opt = autoencoder.find_lr_opt(train_generator, validation_generator,
                                     lr_estimate)

    # train with optimal learning rate
    autoencoder.fit(lr_opt=lr_opt, epochs=epochs, policy=policy)

    # save model and configuration
    autoencoder.save()

    # inspect validation and test images for visual assessement
    if args.inspect:
        inspection.inspect_images(model_path=autoencoder.save_path)
    logger.info("done.")
    return
Exemple #2
0
def main(args):
    # Get validation arguments
    model_path = args.path
    method = args.method
    dtype = args.dtype

    # ============= LOAD MODEL AND PREPROCESSING CONFIGURATION ================

    # load model and info
    model, info, _ = utils.load_model_HDF5(model_path)
    # set parameters
    input_directory = info["data"]["input_directory"]
    architecture = info["model"]["architecture"]
    loss = info["model"]["loss"]
    rescale = info["preprocessing"]["rescale"]
    shape = info["preprocessing"]["shape"]
    color_mode = info["preprocessing"]["color_mode"]
    vmin = info["preprocessing"]["vmin"]
    vmax = info["preprocessing"]["vmax"]
    nb_validation_images = info["data"]["nb_validation_images"]

    # get the correct preprocessing function
    preprocessing_function = get_preprocessing_function(architecture)

    # ========= LOAD AND PREPROCESS VALIDATION & FINETUNING IMAGES =============

    # initialize preprocessor
    preprocessor = Preprocessor(
        input_directory=input_directory,
        rescale=rescale,
        shape=shape,
        color_mode=color_mode,
        preprocessing_function=preprocessing_function,
    )

    # -------------------------------------------------------------------

    # get validation generator
    validation_generator = preprocessor.get_val_generator(
        batch_size=nb_validation_images, shuffle=False)

    # retrieve preprocessed validation images from generator
    imgs_val_input = validation_generator.next()[0]

    # retrieve validation image_names
    filenames_val = validation_generator.filenames

    # reconstruct (i.e predict) validation images
    imgs_val_pred = model.predict(imgs_val_input)

    # convert to grayscale if RGB
    if color_mode == "rgb":
        imgs_val_input = tf.image.rgb_to_grayscale(imgs_val_input).numpy()
        imgs_val_pred = tf.image.rgb_to_grayscale(imgs_val_pred).numpy()

    # remove last channel since images are grayscale
    imgs_val_input = imgs_val_input[:, :, :, 0]
    imgs_val_pred = imgs_val_pred[:, :, :, 0]

    # instantiate TensorImages object to compute validation resmaps
    tensor_val = resmaps.TensorImages(
        imgs_input=imgs_val_input,
        imgs_pred=imgs_val_pred,
        vmin=vmin,
        vmax=vmax,
        method=method,
        dtype=dtype,
        filenames=filenames_val,
    )

    # -------------------------------------------------------------------

    # get finetuning generator
    nb_test_images = preprocessor.get_total_number_test_images()

    finetuning_generator = preprocessor.get_finetuning_generator(
        batch_size=nb_test_images, shuffle=False)

    # retrieve preprocessed test images from generator
    imgs_test_input = finetuning_generator.next()[0]
    filenames_test = finetuning_generator.filenames

    # select a representative subset of test images for finetuning
    #  using stratified sampling
    assert "good" in finetuning_generator.class_indices
    index_array = finetuning_generator.index_array
    classes = finetuning_generator.classes
    _, index_array_ft, _, classes_ft = train_test_split(
        index_array,
        classes,
        test_size=FINETUNE_SPLIT,
        random_state=42,
        stratify=classes,
    )

    # get correct classes corresponding to selected images
    good_class_i = finetuning_generator.class_indices["good"]
    y_ft_true = np.array(
        [0 if class_i == good_class_i else 1 for class_i in classes_ft])

    # select test images for finetuninig
    imgs_ft_input = imgs_test_input[index_array_ft]
    filenames_ft = list(np.array(filenames_test)[index_array_ft])

    # reconstruct (i.e predict) finetuning images
    imgs_ft_pred = model.predict(imgs_ft_input)

    # convert to grayscale if RGB
    if color_mode == "rgb":
        imgs_ft_input = tf.image.rgb_to_grayscale(imgs_ft_input).numpy()
        imgs_ft_pred = tf.image.rgb_to_grayscale(imgs_ft_pred).numpy()

    # remove last channel since images are grayscale
    imgs_ft_input = imgs_ft_input[:, :, :, 0]
    imgs_ft_pred = imgs_ft_pred[:, :, :, 0]

    # instantiate TensorImages object to compute finetuning resmaps
    tensor_ft = resmaps.TensorImages(
        imgs_input=imgs_ft_input,
        imgs_pred=imgs_ft_pred,
        vmin=vmin,
        vmax=vmax,
        method=method,
        dtype=dtype,
        filenames=filenames_ft,
    )

    # ======================== COMPUTE THRESHOLDS ===========================

    # initialize finetuning dictionary
    dict_finetune = {
        "min_area": [],
        "threshold": [],
        "TPR": [],
        "TNR": [],
        "FPR": [],
        "FNR": [],
        "score": [],
    }

    # create discrete min_area values
    min_areas = np.arange(start=5, stop=505, step=STEP_MIN_AREA)
    length = len(min_areas)

    for i, min_area in enumerate(min_areas):
        print("step {}/{} | current min_area = {}".format(
            i + 1, length, min_area))
        # compute threshold corresponding to current min_area
        threshold = determine_threshold(
            resmaps=tensor_val.resmaps,
            min_area=min_area,
            thresh_min=tensor_val.thresh_min,
            thresh_max=tensor_val.thresh_max,
            thresh_step=tensor_val.thresh_step,
        )

        # apply the min_area, threshold pair to finetuning images
        y_ft_pred = predict_classes(resmaps=tensor_ft.resmaps,
                                    min_area=min_area,
                                    threshold=threshold)

        # confusion matrix
        tnr, fpr, fnr, tpr = confusion_matrix(y_ft_true,
                                              y_ft_pred,
                                              normalize="true").ravel()

        # record current results
        dict_finetune["min_area"].append(min_area)
        dict_finetune["threshold"].append(threshold)
        dict_finetune["TPR"].append(tpr)
        dict_finetune["TNR"].append(tnr)
        dict_finetune["FPR"].append(fpr)
        dict_finetune["FNR"].append(fnr)
        dict_finetune["score"].append((tpr + tnr) / 2)

    # get min_area, threshold pair corresponding to best score
    max_score_i = np.argmax(dict_finetune["score"])
    max_score = float(dict_finetune["score"][max_score_i])
    best_min_area = int(dict_finetune["min_area"][max_score_i])
    best_threshold = float(dict_finetune["threshold"][max_score_i])

    # ===================== SAVE VALIDATION RESULTS ========================

    # create a results directory if not existent
    model_dir_name = os.path.basename(str(Path(model_path).parent))

    save_dir = os.path.join(
        os.getcwd(),
        "results",
        input_directory,
        architecture,
        loss,
        model_dir_name,
        "finetuning",
        "{}_{}".format(method, dtype),
    )
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)

    # save area and threshold pair
    finetuning_result = {
        "best_min_area": best_min_area,
        "best_threshold": best_threshold,
        "best_score": max_score,
        "method": method,
        "dtype": dtype,
        "split": FINETUNE_SPLIT,
    }
    print("finetuning results: {}".format(finetuning_result))

    # save validation result
    with open(os.path.join(save_dir, "finetuning_result.json"),
              "w") as json_file:
        json.dump(finetuning_result, json_file, indent=4, sort_keys=False)
    logger.info("finetuning results saved at {}".format(save_dir))

    # save finetuning plots
    plot_min_area_threshold(dict_finetune,
                            index_best=max_score_i,
                            save_dir=save_dir)
    plot_scores(dict_finetune, index_best=max_score_i, save_dir=save_dir)

    return
def main(args):

    # get parsed arguments from user
    input_dir = args.input_dir
    architecture = args.architecture
    color_mode = args.color
    loss = args.loss
    batch_size = args.batch

    # check arguments
    check_arguments(architecture, color_mode, loss)

    # get autoencoder
    autoencoder = AutoEncoder(input_dir, architecture, color_mode, loss,
                              batch_size)

    # load data as generators that yield batches of preprocessed images
    preprocessor = Preprocessor(
        input_directory=input_dir,
        rescale=autoencoder.rescale,
        shape=autoencoder.shape,
        color_mode=autoencoder.color_mode,
        preprocessing_function=autoencoder.preprocessing_function,
    )
    train_generator = preprocessor.get_train_generator(
        batch_size=autoencoder.batch_size, shuffle=True)
    validation_generator = preprocessor.get_val_generator(
        batch_size=autoencoder.batch_size, shuffle=True)

    # find best learning rates for training
    autoencoder.find_opt_lr(train_generator, validation_generator)

    # train
    autoencoder.fit()

    # save model
    autoencoder.save()

    if args.inspect:
        # -------------- INSPECTING VALIDATION IMAGES --------------
        logger.info("generating inspection plots of validation images...")

        # create a directory to save inspection plots
        inspection_val_dir = os.path.join(autoencoder.save_dir,
                                          "inspection_val")
        if not os.path.isdir(inspection_val_dir):
            os.makedirs(inspection_val_dir)

        inspection_val_generator = preprocessor.get_val_generator(
            batch_size=autoencoder.learner.val_data.samples, shuffle=False)

        imgs_val_input = inspection_val_generator.next()[0]
        filenames_val = inspection_val_generator.filenames

        # get reconstructed images (i.e predictions) on validation dataset
        logger.info("reconstructing validation images...")
        imgs_val_pred = autoencoder.model.predict(imgs_val_input)

        # convert to grayscale if RGB
        if color_mode == "rgb":
            imgs_val_input = tf.image.rgb_to_grayscale(imgs_val_input).numpy()
            imgs_val_pred = tf.image.rgb_to_grayscale(imgs_val_pred).numpy()

        # remove last channel since images are grayscale
        imgs_val_input = imgs_val_input[:, :, :, 0]
        imgs_val_pred = imgs_val_pred[:, :, :, 0]

        # instantiate TensorImages object to compute validation resmaps
        tensor_val = postprocessing.TensorImages(
            imgs_input=imgs_val_input,
            imgs_pred=imgs_val_pred,
            vmin=autoencoder.vmin,
            vmax=autoencoder.vmax,
            method=autoencoder.loss,
            dtype="float64",
            filenames=filenames_val,
        )

        # generate and save inspection validation plots
        tensor_val.generate_inspection_plots(group="validation",
                                             save_dir=inspection_val_dir)

        # -------------- INSPECTING TEST IMAGES --------------
        logger.info("generating inspection plots of test images...")

        # create a directory to save inspection plots
        inspection_test_dir = os.path.join(autoencoder.save_dir,
                                           "inspection_test")
        if not os.path.isdir(inspection_test_dir):
            os.makedirs(inspection_test_dir)

        nb_test_images = preprocessor.get_total_number_test_images()

        inspection_test_generator = preprocessor.get_test_generator(
            batch_size=nb_test_images, shuffle=False)

        imgs_test_input = inspection_test_generator.next()[0]
        filenames_test = inspection_test_generator.filenames

        # get reconstructed images (i.e predictions) on validation dataset
        logger.info("reconstructing test images...")
        imgs_test_pred = autoencoder.model.predict(imgs_test_input)

        # convert to grayscale if RGB
        if color_mode == "rgb":
            imgs_test_input = tf.image.rgb_to_grayscale(
                imgs_test_input).numpy()
            imgs_test_pred = tf.image.rgb_to_grayscale(imgs_test_pred).numpy()

        # remove last channel since images are grayscale
        imgs_test_input = imgs_test_input[:, :, :, 0]
        imgs_test_pred = imgs_test_pred[:, :, :, 0]

        # instantiate TensorImages object to compute test resmaps
        tensor_test = postprocessing.TensorImages(
            imgs_input=imgs_test_input,
            imgs_pred=imgs_test_pred,
            vmin=autoencoder.vmin,
            vmax=autoencoder.vmax,
            method=autoencoder.loss,
            dtype="float64",
            filenames=filenames_test,
        )

        # generate and save inspection test plots
        tensor_test.generate_inspection_plots(group="test",
                                              save_dir=inspection_test_dir)

    logger.info("done.")
    return
def main(args):
    # parse arguments
    model_path = args.path
    save = args.save

    # ============= LOAD MODEL AND PREPROCESSING CONFIGURATION ================

    # load model and info
    model, info, _ = utils.load_model_HDF5(model_path)
    # set parameters
    input_directory = info["data"]["input_directory"]
    architecture = info["model"]["architecture"]
    loss = info["model"]["loss"]
    rescale = info["preprocessing"]["rescale"]
    shape = info["preprocessing"]["shape"]
    color_mode = info["preprocessing"]["color_mode"]
    vmin = info["preprocessing"]["vmin"]
    vmax = info["preprocessing"]["vmax"]
    nb_validation_images = info["data"]["nb_validation_images"]

    # =================== LOAD VALIDATION PARAMETERS =========================

    model_dir_name = os.path.basename(str(Path(model_path).parent))
    finetune_dir = os.path.join(
        os.getcwd(),
        "results",
        input_directory,
        architecture,
        loss,
        model_dir_name,
        "finetuning",
    )
    subdirs = os.listdir(finetune_dir)
    for subdir in subdirs:
        logger.info("testing with finetuning parameters from \n{}...".format(
            os.path.join(finetune_dir, subdir)))
        try:
            with open(
                    os.path.join(finetune_dir, subdir,
                                 "finetuning_result.json"), "r") as read_file:
                validation_result = json.load(read_file)
        except FileNotFoundError:
            logger.warning("run finetune.py before testing.\nexiting script.")
            sys.exit()

        min_area = validation_result["best_min_area"]
        threshold = validation_result["best_threshold"]
        method = validation_result["method"]
        dtype = validation_result["dtype"]

        # ====================== PREPROCESS TEST IMAGES ==========================

        # get the correct preprocessing function
        preprocessing_function = get_preprocessing_function(architecture)

        # initialize preprocessor
        preprocessor = Preprocessor(
            input_directory=input_directory,
            rescale=rescale,
            shape=shape,
            color_mode=color_mode,
            preprocessing_function=preprocessing_function,
        )

        # get test generator
        nb_test_images = preprocessor.get_total_number_test_images()
        test_generator = preprocessor.get_test_generator(
            batch_size=nb_test_images, shuffle=False)

        # retrieve test images from generator
        imgs_test_input = test_generator.next()[0]

        # retrieve test image names
        filenames = test_generator.filenames

        # predict on test images
        imgs_test_pred = model.predict(imgs_test_input)

        # instantiate TensorImages object
        tensor_test = postprocessing.TensorImages(
            imgs_input=imgs_test_input,
            imgs_pred=imgs_test_pred,
            vmin=vmin,
            vmax=vmax,
            method=method,
            dtype=dtype,
            filenames=filenames,
        )

        # ====================== CLASSIFICATION ==========================

        # retrieve ground truth
        y_true = get_true_classes(filenames)

        # predict classes on test images
        y_pred = predict_classes(resmaps=tensor_test.resmaps,
                                 min_area=min_area,
                                 threshold=threshold)

        # confusion matrix
        tnr, fp, fn, tpr = confusion_matrix(y_true, y_pred,
                                            normalize="true").ravel()

        # initialize dictionary to store test results
        test_result = {
            "min_area": min_area,
            "threshold": threshold,
            "TPR": tpr,
            "TNR": tnr,
            "score": (tpr + tnr) / 2,
            "method": method,
            "dtype": dtype,
        }

        # ====================== SAVE TEST RESULTS =========================

        # create directory to save test results
        save_dir = os.path.join(
            os.getcwd(),
            "results",
            input_directory,
            architecture,
            loss,
            model_dir_name,
            "test",
            subdir,
        )

        if not os.path.isdir(save_dir):
            os.makedirs(save_dir)

        # save test result
        with open(os.path.join(save_dir, "test_result.json"),
                  "w") as json_file:
            json.dump(test_result, json_file, indent=4, sort_keys=False)

        # save classification of image files in a .txt file
        classification = {
            "filenames": filenames,
            "predictions": y_pred,
            "truth": y_true,
            "accurate_predictions": np.array(y_true) == np.array(y_pred),
        }
        df_clf = pd.DataFrame.from_dict(classification)
        with open(os.path.join(save_dir, "classification.txt"), "w") as f:
            f.write(
                "min_area = {}, threshold = {}, method = {}, dtype = {}\n\n".
                format(min_area, threshold, method, dtype))
            f.write(df_clf.to_string(header=True, index=True))

        # print classification results to console
        with pd.option_context("display.max_rows", None, "display.max_columns",
                               None):
            print(df_clf)

        # save segmented resmaps
        if save:
            save_segmented_images(tensor_test.resmaps, threshold, filenames,
                                  save_dir)

        # print test_results to console
        print("test results: {}".format(test_result))
Exemple #5
0
    def load_and_preprocess_data(self,
                                 path_trainingfile,
                                 path_developfile,
                                 path_testfile,
                                 tweet_length=40):
        """
        Load and preprocess the three datasets (training, development, test) for classification

        :param path_trainingfile: path to the file containing the training dataset
        :param path_developfile: path to the file containing the development dataset
        :param path_testfile: path to the file containing the test dataset
        :param tweet_length: padding length of tweets to which all tweets will be truncated/expanded
        :return: void

        """
        self.tweet_length = tweet_length

        # instantiate new Preprocessor for cleaning and tokenizing the tweets
        pp = Preprocessor()

        # Load, preprocess and split Training dataset
        df_train = pd.read_csv(path_trainingfile,
                               sep='\t',
                               header=None,
                               names=['id', 'sentiment', 'text'],
                               quoting=3)
        df_train['tokens'] = df_train['text'].apply(pp.tokenize_tweet)
        x_train_raw = df_train['tokens'].values
        y_train_raw = df_train['sentiment'].values

        print('Loaded %s Tweets as Training Data' % len(x_train_raw))

        # Load, preprocess and split Development/Evaluation dataset
        df_develop = pd.read_csv(path_developfile,
                                 sep='\t',
                                 header=None,
                                 names=['id', 'sentiment', 'text'],
                                 quoting=3)
        df_develop['tokens'] = df_develop['text'].apply(pp.tokenize_tweet)
        x_develop_raw = df_develop['tokens'].values
        y_develop_raw = df_develop['sentiment'].values

        print('Loaded %s Tweets as Development Data' % len(x_develop_raw))

        # Load, preprocess and split Test dataset
        df_test = pd.read_csv(path_testfile,
                              sep='\t',
                              header=None,
                              names=['id', 'sentiment', 'text'],
                              quoting=3)
        df_test['tokens'] = df_test['text'].apply(pp.tokenize_tweet)
        x_test_raw = df_test['tokens'].values
        y_test_raw = df_test['sentiment'].values

        print('Loaded %s Tweets as Test Data' % len(x_test_raw))

        # Build overall vocabulary by adding every token to a set (automatically avoids duplicates)
        unique_tokens = set()

        for tweet in x_train_raw:
            for token in tweet:
                unique_tokens.add(token)

        for tweet in x_develop_raw:
            for token in tweet:
                unique_tokens.add(token)

        for tweet in x_test_raw:
            for token in tweet:
                unique_tokens.add(token)

        self.vocabulary = {
            word: index
            for index, word in enumerate(sorted(unique_tokens))
        }

        print('Overall vocabulary size: %s' % len(self.vocabulary))

        # Translate Tokens to Indices in the Tweets and pad them to the same length
        x_train_indices = [[self.vocabulary[token] for token in tweet]
                           for tweet in x_train_raw]
        x_develop_indices = [[self.vocabulary[token] for token in tweet]
                             for tweet in x_develop_raw]
        x_test_indices = [[self.vocabulary[token] for token in tweet]
                          for tweet in x_test_raw]

        self.x_train = pad_sequences(x_train_indices, tweet_length)
        self.x_develop = pad_sequences(x_develop_indices, tweet_length)
        self.x_test = pad_sequences(x_test_indices, tweet_length)

        # Transform Sentiment Labels in binary format
        encoder = LabelEncoder()
        encoder.fit(y_train_raw)

        self.y_train = to_categorical(encoder.transform(y_train_raw))
        self.y_develop = to_categorical(encoder.transform(y_develop_raw))
        self.y_test = to_categorical(encoder.transform(y_test_raw))

        # Short Summary for Debugging
        print("Shapes of Input Tensors X :", self.x_train.shape,
              self.x_develop.shape, self.x_test.shape)
        print("Shapes of Output Tensors Y:", self.y_train.shape,
              self.y_develop.shape, self.y_test.shape)
Exemple #6
0
def main(args):
    # Get validation arguments
    model_path = args.path
    color = args.color  # NOT YET TAKEN INTO ACCOUNT
    method = args.method
    dtype = args.dtype

    # ============= LOAD MODEL AND PREPROCESSING CONFIGURATION ================

    # load model and info
    model, info, _ = utils.load_model_HDF5(model_path)
    # set parameters
    input_directory = info["data"]["input_directory"]
    architecture = info["model"]["architecture"]
    loss = info["model"]["loss"]
    rescale = info["preprocessing"]["rescale"]
    shape = info["preprocessing"]["shape"]
    color_mode = info["preprocessing"]["color_mode"]
    vmin = info["preprocessing"]["vmin"]
    vmax = info["preprocessing"]["vmax"]
    nb_validation_images = info["data"]["nb_validation_images"]

    # ========= LOAD AND PREPROCESS VALIDATION & FINETUNING IMAGES =============

    # initialize preprocessor
    preprocessor = Preprocessor(
        input_directory=input_directory,
        rescale=rescale,
        shape=shape,
        color_mode=color_mode,
    )

    # -------------------------------------------------------------------

    # get validation generator
    validation_generator = preprocessor.get_val_generator(
        batch_size=nb_validation_images, shuffle=False)

    # retrieve preprocessed validation images from generator
    imgs_val_input = validation_generator.next()[0]

    # retrieve validation image_names
    filenames_val = validation_generator.filenames

    # reconstruct (i.e predict) validation images
    imgs_val_pred = model.predict(imgs_val_input)

    # instantiate TensorImages object to compute validation resmaps
    tensor_val = postprocessing.TensorImages(
        imgs_input=imgs_val_input,
        imgs_pred=imgs_val_pred,
        vmin=vmin,
        vmax=vmax,
        color="grayscale",
        method=method,
        dtype=dtype,
        filenames=filenames_val,
    )

    # -------------------------------------------------------------------

    # get finetuning generator
    nb_test_images = preprocessor.get_total_number_test_images()

    finetuning_generator = preprocessor.get_finetuning_generator(
        batch_size=nb_test_images, shuffle=False)

    # retrieve preprocessed test images from generator
    imgs_test_input = finetuning_generator.next()[0]
    filenames_test = finetuning_generator.filenames

    # select a representative subset of test images for finetuning
    # using stratified sampling
    assert "good" in finetuning_generator.class_indices
    index_array = finetuning_generator.index_array
    classes = finetuning_generator.classes
    _, index_array_ft, _, classes_ft = train_test_split(
        index_array,
        classes,
        test_size=FINETUNE_SPLIT,
        random_state=42,
        stratify=classes,
    )

    # get correct classes corresponding to selected images
    good_class_i = finetuning_generator.class_indices["good"]
    y_ft_true = np.array(
        [0 if class_i == good_class_i else 1 for class_i in classes_ft])

    # select test images for finetuninig
    imgs_ft_input = imgs_test_input[index_array_ft]
    filenames_ft = list(np.array(filenames_test)[index_array_ft])

    # reconstruct (i.e predict) finetuning images
    imgs_ft_pred = model.predict(imgs_ft_input)

    # instantiate TensorImages object to compute finetuning resmaps
    tensor_ft = postprocessing.TensorImages(
        imgs_input=imgs_ft_input,
        imgs_pred=imgs_ft_pred,
        vmin=vmin,
        vmax=vmax,
        color="grayscale",
        method=method,
        dtype=dtype,
        filenames=filenames_ft,
    )

    # ======================== COMPUTE THRESHOLDS ===========================

    # initialize finetuning dictionary
    dict_finetune = {
        "min_area": [],
        "threshold": [],
        "TPR": [],
        "TNR": [],
        "FPR": [],
        "FNR": [],
        "score": [],
    }

    # initialize discrete min_area values
    min_areas = np.arange(
        start=START_MIN_AREA,
        stop=STOP_MIN_AREA,
        step=STEP_MIN_AREA,
    )

    # initialize thresholds
    thresholds = np.arange(
        start=tensor_val.thresh_min,
        stop=tensor_val.thresh_max + tensor_val.thresh_step,
        step=tensor_val.thresh_step,
    )

    # compute largest anomaly areas in resmaps for increasing thresholds
    logger.info(
        "step 1/2: computing largest anomaly areas for increasing thresholds..."
    )
    largest_areas = calculate_largest_areas(
        resmaps=tensor_val.resmaps,
        thresholds=thresholds,
    )

    # select best minimum area and threshold pair to use for testing
    logger.info(
        "step 2/2: selecting best minimum area and threshold pair for testing..."
    )
    printProgressBar(0,
                     len(min_areas),
                     prefix="Progress:",
                     suffix="Complete",
                     length=80)

    for i, min_area in enumerate(min_areas):
        # compare current min_area with the largest area
        for index, largest_area in enumerate(largest_areas):
            if min_area > largest_area:
                break

        # select threshold corresponding to current min_area
        threshold = thresholds[index]

        # apply the min_area, threshold pair to finetuning images
        y_ft_pred = predict_classes(resmaps=tensor_ft.resmaps,
                                    min_area=min_area,
                                    threshold=threshold)

        # confusion matrix
        tnr, fpr, fnr, tpr = confusion_matrix(y_ft_true,
                                              y_ft_pred,
                                              normalize="true").ravel()

        # record current results
        dict_finetune["min_area"].append(min_area)
        dict_finetune["threshold"].append(threshold)
        dict_finetune["TPR"].append(tpr)
        dict_finetune["TNR"].append(tnr)
        dict_finetune["FPR"].append(fpr)
        dict_finetune["FNR"].append(fnr)
        dict_finetune["score"].append((tpr + tnr) / 2)

        # print progress bar
        printProgressBar(i + 1,
                         len(min_areas),
                         prefix="Progress:",
                         suffix="Complete",
                         length=80)

    # get min_area, threshold pair corresponding to best score
    max_score_i = np.argmax(dict_finetune["score"])
    max_score = float(dict_finetune["score"][max_score_i])
    best_min_area = int(dict_finetune["min_area"][max_score_i])
    best_threshold = float(dict_finetune["threshold"][max_score_i])

    # ===================== SAVE FINETUNING RESULTS ========================

    # create a results directory if not existent
    model_dir_name = os.path.basename(str(Path(model_path).parent))

    save_dir = os.path.join(
        os.getcwd(),
        "results",
        input_directory,
        architecture,
        loss,
        model_dir_name,
        "finetuning",
        "{}_{}".format(method, dtype),
    )
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)

    # save area and threshold pair
    finetuning_result = {
        "best_min_area": best_min_area,
        "best_threshold": best_threshold,
        "best_score": max_score,
        "method": method,
        "dtype": dtype,
        "split": FINETUNE_SPLIT,
    }
    logger.info("finetuning results: {}".format(finetuning_result))

    # save validation result
    with open(os.path.join(save_dir, "finetuning_result.json"),
              "w") as json_file:
        json.dump(finetuning_result, json_file, indent=4, sort_keys=False)
    logger.info("finetuning results saved at {}".format(save_dir))

    # save finetuning plots
    plot_min_area_threshold(dict_finetune,
                            index_best=max_score_i,
                            save_dir=save_dir)
    plot_scores(dict_finetune, index_best=max_score_i, save_dir=save_dir)

    return
def inspect_images(model_path):
    # load model for inspection
    logger.info("loading model for inspection...")
    model, info, _ = utils.load_model_HDF5(model_path)
    save_dir = os.path.dirname(model_path)

    input_dir = info["data"]["input_directory"]
    # architecture = info["model"]["architecture"]
    # loss = info["model"]["loss"]
    rescale = info["preprocessing"]["rescale"]
    shape = info["preprocessing"]["shape"]
    color_mode = info["preprocessing"]["color_mode"]
    vmin = info["preprocessing"]["vmin"]
    vmax = info["preprocessing"]["vmax"]
    nb_validation_images = info["data"]["nb_validation_images"]

    # instantiate preprocessor object to preprocess validation and test inspection images
    preprocessor = Preprocessor(
        input_directory=input_dir,
        rescale=rescale,
        shape=shape,
        color_mode=color_mode,
    )

    # -------------- INSPECTING VALIDATION IMAGES --------------
    logger.info("generating inspection plots for validation images...")

    inspection_val_generator = preprocessor.get_val_generator(
        batch_size=nb_validation_images, shuffle=False)

    imgs_val_input = inspection_val_generator.next()[0]
    filenames_val = inspection_val_generator.filenames

    # get indices of validation inspection images
    val_insp_i = [
        filenames_val.index(filename)
        for filename in config.FILENAMES_VAL_INSPECTION
    ]
    imgs_val_input = imgs_val_input[val_insp_i]

    # reconstruct validation inspection images (i.e predict)
    imgs_val_pred = model.predict(imgs_val_input)

    # instantiate ResmapPlotter object to compute resmaps
    postproc_val = postprocessing.ResmapPlotter(
        imgs_input=imgs_val_input,
        imgs_pred=imgs_val_pred,
        filenames=config.FILENAMES_VAL_INSPECTION,
        color="grayscale",
        vmin=vmin,
        vmax=vmax,
    )

    # generate resmaps and save
    fig_res_val = postproc_val.generate_inspection_figure()
    fig_res_val.savefig(os.path.join(save_dir, "fig_insp_val.svg"))

    # -------------- INSPECTING TEST IMAGES --------------
    logger.info("generating inspection plots for test images...")

    nb_test_images = preprocessor.get_total_number_test_images()

    inspection_test_generator = preprocessor.get_test_generator(
        batch_size=nb_test_images, shuffle=False)
    # get preprocessed test images
    imgs_test_input = inspection_test_generator.next()[0]
    filenames_test = inspection_test_generator.filenames

    # get indices of test inspection images
    test_insp_i = [
        filenames_test.index(filename)
        for filename in config.FILENAMES_TEST_INSPECTION
    ]
    imgs_test_input = imgs_test_input[test_insp_i]

    # reconstruct inspection test images (i.e predict)
    imgs_test_pred = model.predict(imgs_test_input)

    # instantiate ResmapPlotter object to compute resmaps
    postproc_test = postprocessing.ResmapPlotter(
        imgs_input=imgs_test_input,
        imgs_pred=imgs_test_pred,
        filenames=config.FILENAMES_TEST_INSPECTION,
        color="grayscale",
        vmin=vmin,
        vmax=vmax,
    )

    # generate resmaps and save
    fig_res_test = postproc_test.generate_inspection_figure()
    fig_res_test.savefig(os.path.join(save_dir, "fig_insp_test.svg"))

    # --------------------------------------------------

    # fig_score_insp = postproc_test.generate_score_scatter_plot(
    #     inspection_test_generator, model_path, filenames_test_insp
    # )
    # fig_score_insp.savefig(os.path.join(save_dir, "fig_score_insp.svg"))

    # fig_score_test = postproc_test.generate_score_scatter_plot(
    #     inspection_test_generator, model_path
    # )
    # fig_score_test.savefig(os.path.join(save_dir, "fig_score_test.svg"))
    return