Esempio n. 1
0
    def run_cropping(self,
                     list_of_files,
                     overwrite_existing=False,
                     output_folder=None):
        """
        also copied ground truth nifti segmentation into the preprocessed folder so that we can use them for evaluation
        on the cluster
        :param list_of_files: list of list of files [[PATIENTID_TIMESTEP_0000.nii.gz], [PATIENTID_TIMESTEP_0000.nii.gz]]
        :param overwrite_existing:
        :param output_folder:
        :return:
        """
        if output_folder is not None:
            self.output_folder = output_folder

        output_folder_gt = os.path.join(self.output_folder, "gt_segmentations")
        maybe_mkdir_p(output_folder_gt)
        for j, case in enumerate(list_of_files):
            if case[-1] is not None:
                shutil.copy(case[-1], output_folder_gt)

        list_of_args = []
        for j, case in enumerate(list_of_files):
            case_identifier = get_case_identifier(case)
            list_of_args.append((case, case_identifier, overwrite_existing))

        p = Pool(self.num_threads)
        p.map(self._load_crop_save_star, list_of_args)
        p.close()
        p.join()
Esempio n. 2
0
def apply_postprocessing_to_folder(input_folder: str,
                                   output_folder: str,
                                   for_which_classes: list,
                                   min_valid_object_size: dict = None,
                                   num_processes=8):
    """
    applies removing of all but the largest connected component to all niftis in a folder
    :param min_valid_object_size:
    :param min_valid_object_size:
    :param input_folder:
    :param output_folder:
    :param for_which_classes:
    :param num_processes:
    :return:
    """
    maybe_mkdir_p(output_folder)
    p = Pool(num_processes)
    nii_files = subfiles(input_folder, suffix=".nii.gz", join=False)
    input_files = [join(input_folder, i) for i in nii_files]
    out_files = [join(output_folder, i) for i in nii_files]
    results = p.starmap_async(
        load_remove_save,
        zip(input_files, out_files, [for_which_classes] * len(input_files),
            [min_valid_object_size] * len(input_files)))
    res = results.get()
    p.close()
    p.join()
Esempio n. 3
0
    def run(self, target_spacings, input_folder_with_cropped_npz, output_folder, data_identifier,
            num_threads=default_num_threads, force_separate_z=None):
        """

        :param target_spacings: list of lists [[1.25, 1.25, 5]]
        :param input_folder_with_cropped_npz: dim: c, x, y, z | npz_file['data'] np.savez_compressed(fname.npz, data=arr)
        :param output_folder:
        :param num_threads:
        :param force_separate_z: None
        :return:
        """
        print("Initializing to run preprocessing")
        print("npz folder:", input_folder_with_cropped_npz)
        print("output_folder:", output_folder)
        list_of_cropped_npz_files = subfiles(input_folder_with_cropped_npz, True, None, ".npz", True)
        maybe_mkdir_p(output_folder)
        num_stages = len(target_spacings)
        if not isinstance(num_threads, (list, tuple, np.ndarray)):
            num_threads = [num_threads] * num_stages

        assert len(num_threads) == num_stages

        for i in range(num_stages):
            all_args = []
            output_folder_stage = os.path.join(output_folder, data_identifier + "_stage%d" % i)
            maybe_mkdir_p(output_folder_stage)
            spacing = target_spacings[i]
            for j, case in enumerate(list_of_cropped_npz_files):
                case_identifier = get_case_identifier_from_npz(case)
                args = spacing, case_identifier, output_folder_stage, input_folder_with_cropped_npz, force_separate_z
                all_args.append(args)
            p = Pool(num_threads[i])
            p.map(self._run_star, all_args)
            p.close()
            p.join()
Esempio n. 4
0
def split_4d(input_folder,
             num_processes=default_num_threads,
             overwrite_task_output_id=None):
    assert isdir(join(input_folder, "imagesTr")) and isdir(join(input_folder, "labelsTr")) and \
           isfile(join(input_folder, "dataset.json")), \
        "The input folder must be a valid Task folder from the Medical Segmentation Decathlon with at least the " \
        "imagesTr and labelsTr subfolders and the dataset.json file"

    while input_folder.endswith("/"):
        input_folder = input_folder[:-1]

    full_task_name = input_folder.split("/")[-1]

    assert full_task_name.startswith(
        "Task"
    ), "The input folder must point to a folder that starts with TaskXX_"

    first_underscore = full_task_name.find("_")
    assert first_underscore == 6, "Input folder start with TaskXX with XX being a 3-digit id: 00, 01, 02 etc"

    input_task_id = int(full_task_name[4:6])
    if overwrite_task_output_id is None:
        overwrite_task_output_id = input_task_id

    task_name = full_task_name[7:]

    output_folder = join(nnUNet_raw_data,
                         "Task%03.0d_" % overwrite_task_output_id + task_name)

    if isdir(output_folder):
        shutil.rmtree(output_folder)

    files = []
    output_dirs = []

    maybe_mkdir_p(output_folder)
    for subdir in ["imagesTr", "imagesTs"]:
        curr_out_dir = join(output_folder, subdir)
        if not isdir(curr_out_dir):
            os.mkdir(curr_out_dir)
        curr_dir = join(input_folder, subdir)
        nii_files = [
            join(curr_dir, i) for i in os.listdir(curr_dir)
            if i.endswith(".nii.gz")
        ]
        nii_files.sort()
        for n in nii_files:
            files.append(n)
            output_dirs.append(curr_out_dir)

    shutil.copytree(join(input_folder, "labelsTr"),
                    join(output_folder, "labelsTr"))

    p = Pool(num_processes)
    p.starmap(split_4d_nifti, zip(files, output_dirs))
    p.close()
    p.join()
    shutil.copy(join(input_folder, "dataset.json"), output_folder)
Esempio n. 5
0
def plan_and_preprocess(task_string,
                        processes_lowres=default_num_threads,
                        processes_fullres=3,
                        no_preprocessing=False):
    from nnunet.experiment_planning.experiment_planner_baseline_2DUNet import ExperimentPlanner2D
    from nnunet.experiment_planning.experiment_planner_baseline_3DUNet import ExperimentPlanner

    preprocessing_output_dir_this_task_train = join(preprocessing_output_dir,
                                                    task_string)
    cropped_out_dir = join(nnUNet_cropped_data, task_string)
    maybe_mkdir_p(preprocessing_output_dir_this_task_train)

    shutil.copy(join(cropped_out_dir, "dataset_properties.pkl"),
                preprocessing_output_dir_this_task_train)
    shutil.copy(join(nnUNet_raw_data, task_string, "dataset.json"),
                preprocessing_output_dir_this_task_train)

    exp_planner = ExperimentPlanner(cropped_out_dir,
                                    preprocessing_output_dir_this_task_train)
    exp_planner.plan_experiment()
    if not no_preprocessing:
        exp_planner.run_preprocessing((processes_lowres, processes_fullres))

    exp_planner = ExperimentPlanner2D(
        cropped_out_dir, preprocessing_output_dir_this_task_train)
    exp_planner.plan_experiment()
    if not no_preprocessing:
        exp_planner.run_preprocessing(processes_fullres)

    # write which class is in which slice to all training cases (required to speed up 2D Dataloader)
    # This is done for all data so that if we wanted to use them with 2D we could do so

    if not no_preprocessing:
        p = Pool(default_num_threads)

        # if there is more than one my_data_identifier (different brnaches) then this code will run for all of them if
        # they start with the same string. not problematic, but not pretty
        stages = [
            i for i in subdirs(
                preprocessing_output_dir_this_task_train, join=True, sort=True)
            if i.split("/")[-1].find("stage") != -1
        ]
        for s in stages:
            print(s.split("/")[-1])
            list_of_npz_files = subfiles(s, True, None, ".npz", True)
            list_of_pkl_files = [i[:-4] + ".pkl" for i in list_of_npz_files]
            all_classes = []
            for pk in list_of_pkl_files:
                with open(pk, 'rb') as f:
                    props = pickle.load(f)
                all_classes_tmp = np.array(props['classes'])
                all_classes.append(all_classes_tmp[all_classes_tmp >= 0])
            p.map(add_classes_in_slice_info,
                  zip(list_of_npz_files, list_of_pkl_files, all_classes))
        p.close()
        p.join()
Esempio n. 6
0
    def __init__(self, num_threads, output_folder=None):
        """
        This one finds a mask of nonzero elements (must be nonzero in all modalities) and crops the image to that mask.
        In the case of BRaTS and ISLES data this results in a significant reduction in image size
        :param num_threads:
        :param output_folder: whete to store the cropped data
        :param list_of_files:
        """
        self.output_folder = output_folder
        self.num_threads = num_threads

        if self.output_folder is not None:
            maybe_mkdir_p(self.output_folder)
Esempio n. 7
0
def crop(task_string, override=False, num_threads=default_num_threads):
    cropped_out_dir = join(nnUNet_cropped_data, task_string)
    maybe_mkdir_p(cropped_out_dir)

    if override and isdir(cropped_out_dir):
        shutil.rmtree(cropped_out_dir)
        maybe_mkdir_p(cropped_out_dir)

    splitted_4d_output_dir_task = join(nnUNet_raw_data, task_string)
    lists, _ = create_lists_from_splitted_dataset(splitted_4d_output_dir_task)

    imgcrop = ImageCropper(num_threads, cropped_out_dir)
    imgcrop.run_cropping(lists, overwrite_existing=override)
    shutil.copy(join(nnUNet_raw_data, task_string, "dataset.json"),
                cropped_out_dir)
Esempio n. 8
0
    def initialize(self, training=True, force_load_plans=False):
        """
        For prediction of test cases just set training=False, this will prevent loading of training data and
        training batchgenerator initialization
        :param training:
        :return:
        """

        maybe_mkdir_p(self.output_folder)

        if force_load_plans or (self.plans is None):
            self.load_plans_file()

        self.process_plans(self.plans)

        self.setup_DA_params()

        self.folder_with_preprocessed_data = join(
            self.dataset_directory,
            self.plans['data_identifier'] + "_stage%d" % self.stage)
        if training:
            self.dl_tr, self.dl_val = self.get_basic_generators()
            if self.unpack_data:
                self.print_to_log_file("unpacking dataset")
                unpack_dataset(self.folder_with_preprocessed_data)
                self.print_to_log_file("done")
            else:
                self.print_to_log_file(
                    "INFO: Not unpacking data! Training may be slow due to that. Pray you are not using 2d or you "
                    "will wait all winter for your model to finish!")
            self.tr_gen, self.val_gen = get_default_augmentation(
                self.dl_tr, self.dl_val,
                self.data_aug_params['patch_size_for_spatialtransform'],
                self.data_aug_params)
            self.print_to_log_file("TRAINING KEYS:\n %s" %
                                   (str(self.dataset_tr.keys())),
                                   also_print_to_console=False)
            self.print_to_log_file("VALIDATION KEYS:\n %s" %
                                   (str(self.dataset_val.keys())),
                                   also_print_to_console=False)
        else:
            pass
        self.initialize_network()
        self.initialize_optimizer_and_scheduler()
        # assert isinstance(self.network, (SegmentationNetwork, nn.DataParallel))
        self.was_initialized = True
Esempio n. 9
0
def predict_next_stage(trainer,
                       stage_to_be_predicted_folder,
                       force_separate_z=False,
                       interpolation_order=1,
                       interpolation_order_z=0):
    output_folder = join(pardir(trainer.output_folder), "pred_next_stage")
    maybe_mkdir_p(output_folder)

    export_pool = Pool(2)
    results = []

    for pat in trainer.dataset_val.keys():
        print(pat)
        data_file = trainer.dataset_val[pat]['data_file']
        data_preprocessed = np.load(data_file)['data'][:-1]
        predicted = trainer.predict_preprocessed_data_return_softmax(
            data_preprocessed, True, 1, False, 1,
            trainer.data_aug_params['mirror_axes'], True, True, 2,
            trainer.patch_size, True)
        data_file_nofolder = data_file.split("/")[-1]
        data_file_nextstage = join(stage_to_be_predicted_folder,
                                   data_file_nofolder)
        data_nextstage = np.load(data_file_nextstage)['data']
        target_shp = data_nextstage.shape[1:]
        output_file = join(
            output_folder,
            data_file_nextstage.split("/")[-1][:-4] + "_segFromPrevStage.npz")

        if np.prod(
                predicted.shape) > (2e9 / 4 * 0.85):  # *0.85 just to be save
            np.save(output_file[:-4] + ".npy", predicted)
            predicted = output_file[:-4] + ".npy"

        results.append(
            export_pool.starmap_async(
                resample_and_save,
                [(predicted, target_shp, output_file, force_separate_z,
                  interpolation_order, interpolation_order_z)]))

    _ = [i.get() for i in results]
    export_pool.close()
    export_pool.join()
Esempio n. 10
0
    def print_to_log_file(self,
                          *args,
                          also_print_to_console=True,
                          add_timestamp=True):

        timestamp = time()
        dt_object = datetime.fromtimestamp(timestamp)

        if add_timestamp:
            args = ("%s:" % dt_object, *args)

        if self.log_file is None:
            maybe_mkdir_p(self.output_folder)
            timestamp = datetime.now()
            self.log_file = join(
                self.output_folder,
                "training_log_%d_%d_%d_%02.0d_%02.0d_%02.0d.txt" %
                (timestamp.year, timestamp.month, timestamp.day,
                 timestamp.hour, timestamp.minute, timestamp.second))
            with open(self.log_file, 'w') as f:
                f.write("Starting... \n")
        successful = False
        max_attempts = 5
        ctr = 0
        while not successful and ctr < max_attempts:
            try:
                with open(self.log_file, 'a+') as f:
                    for a in args:
                        f.write(str(a))
                        f.write(" ")
                    f.write("\n")
                successful = True
            except IOError:
                print(
                    "%s: failed to log: " % datetime.fromtimestamp(timestamp),
                    sys.exc_info())
                sleep(0.5)
                ctr += 1
        if also_print_to_console:
            print(*args)
Esempio n. 11
0
 def run(self, target_spacings, input_folder_with_cropped_npz, output_folder, data_identifier,
         num_threads=default_num_threads, force_separate_z=None):
     print("Initializing to run preprocessing")
     print("npz folder:", input_folder_with_cropped_npz)
     print("output_folder:", output_folder)
     list_of_cropped_npz_files = subfiles(input_folder_with_cropped_npz, True, None, ".npz", True)
     assert len(list_of_cropped_npz_files) != 0, "set list of files first"
     maybe_mkdir_p(output_folder)
     all_args = []
     num_stages = len(target_spacings)
     for i in range(num_stages):
         output_folder_stage = os.path.join(output_folder, data_identifier + "_stage%d" % i)
         maybe_mkdir_p(output_folder_stage)
         spacing = target_spacings[i]
         for j, case in enumerate(list_of_cropped_npz_files):
             case_identifier = get_case_identifier_from_npz(case)
             args = spacing, case_identifier, output_folder_stage, input_folder_with_cropped_npz, force_separate_z
             all_args.append(args)
     p = Pool(num_threads)
     p.map(self._run_star, all_args)
     p.close()
     p.join()
Esempio n. 12
0
    def run_training(self):
        _ = self.tr_gen.next()
        _ = self.val_gen.next()

        torch.cuda.empty_cache()

        self._maybe_init_amp()

        self.plot_network_architecture()

        if cudnn.benchmark and cudnn.deterministic:
            warn(
                "torch.backends.cudnn.deterministic is True indicating a deterministic training is desired. "
                "But torch.backends.cudnn.benchmark is True as well and this will prevent deterministic training! "
                "If you want deterministic then set benchmark=False")

        maybe_mkdir_p(self.output_folder)

        if not self.was_initialized:
            self.initialize(True)

        while self.epoch < self.max_num_epochs:
            self.print_to_log_file("\nepoch: ", self.epoch)
            epoch_start_time = time()
            train_losses_epoch = []

            # train one epoch
            self.network.train()
            for b in range(self.num_batches_per_epoch):
                l = self.run_iteration(self.tr_gen, True)
                train_losses_epoch.append(l)

            self.all_tr_losses.append(np.mean(train_losses_epoch))
            self.print_to_log_file("train loss : %.4f" %
                                   self.all_tr_losses[-1])

            with torch.no_grad():
                # validation with train=False
                self.network.eval()
                val_losses = []
                for b in range(self.num_val_batches_per_epoch):
                    l = self.run_iteration(self.val_gen, False, True)
                    val_losses.append(l)
                self.all_val_losses.append(np.mean(val_losses))
                self.print_to_log_file("validation loss: %.4f" %
                                       self.all_val_losses[-1])

                if self.also_val_in_tr_mode:
                    self.network.train()
                    # validation with train=True
                    val_losses = []
                    for b in range(self.num_val_batches_per_epoch):
                        l = self.run_iteration(self.val_gen, False)
                        val_losses.append(l)
                    self.all_val_losses_tr_mode.append(np.mean(val_losses))
                    self.print_to_log_file(
                        "validation loss (train=True): %.4f" %
                        self.all_val_losses_tr_mode[-1])

            epoch_end_time = time()

            self.update_train_loss_MA(
            )  # needed for lr scheduler and stopping of training

            continue_training = self.on_epoch_end()
            if not continue_training:
                # allows for early stopping
                break

            self.epoch += 1
            self.print_to_log_file("This epoch took %f s\n" %
                                   (epoch_end_time - epoch_start_time))

        self.epoch -= 1  # if we don't do this we can get a problem with loading model_final_checkpoint.

        self.save_checkpoint(
            join(self.output_folder, "model_final_checkpoint.model"))
        # now we can delete latest as it will be identical with final
        if isfile(join(self.output_folder, "model_latest.model")):
            os.remove(join(self.output_folder, "model_latest.model"))
        if isfile(join(self.output_folder, "model_latest.model.pkl")):
            os.remove(join(self.output_folder, "model_latest.model.pkl"))
Esempio n. 13
0
def determine_postprocessing(base,
                             gt_labels_folder,
                             raw_subfolder_name="validation_raw",
                             temp_folder="temp",
                             final_subf_name="validation_final",
                             processes=default_num_threads,
                             dice_threshold=0,
                             debug=False,
                             advanced_postprocessing=False,
                             pp_filename="postprocessing.json",
                             f_dict=None):
    """
    :param base:
    :param gt_labels_folder: subfolder of base with niftis of ground truth labels
    :param raw_subfolder_name: subfolder of base with niftis of predicted (non-postprocessed) segmentations
    :param temp_folder: used to store temporary data, will be deleted after we are done here undless debug=True
    :param final_subf_name: final results will be stored here (subfolder of base)
    :param processes:
    :param dice_threshold: only apply postprocessing if results is better than old_result+dice_threshold (can be used as eps)
    :param debug: if True then the temporary files will not be deleted
    :return:
    """
    # lets see what classes are in the dataset
    classes = [
        int(i)
        for i in load_json(join(base, raw_subfolder_name, "summary.json"))
        ['results']['mean'].keys() if int(i) != 0
    ]

    folder_all_classes_as_fg = join(base, temp_folder + "_allClasses")
    folder_per_class = join(base, temp_folder + "_perClass")

    if isdir(folder_all_classes_as_fg):
        shutil.rmtree(folder_all_classes_as_fg)
    if isdir(folder_per_class):
        shutil.rmtree(folder_per_class)

    # multiprocessing rules
    p = Pool(processes)

    assert isfile(join(base, raw_subfolder_name, "summary.json")), "join(base, raw_subfolder_name) does not " \
                                                                   "contain a summary.json"

    # these are all the files we will be dealing with
    fnames = subfiles(join(base, raw_subfolder_name),
                      suffix=".nii.gz",
                      join=False)

    # make output and temp dir
    maybe_mkdir_p(folder_all_classes_as_fg)
    maybe_mkdir_p(folder_per_class)
    maybe_mkdir_p(join(base, final_subf_name))

    pp_results = {}
    pp_results['dc_per_class_raw'] = {}
    pp_results['dc_per_class_pp_all'] = {
    }  # dice scores after treating all foreground classes as one
    pp_results['dc_per_class_pp_per_class'] = {
    }  # dice scores after removing everything except larges cc
    # independently for each class after we already did dc_per_class_pp_all
    pp_results['for_which_classes'] = []
    pp_results['min_valid_object_sizes'] = {}

    validation_result_raw = load_json(
        join(base, raw_subfolder_name, "summary.json"))['results']
    pp_results['num_samples'] = len(validation_result_raw['all'])
    validation_result_raw = validation_result_raw['mean']

    if advanced_postprocessing:
        # first treat all foreground classes as one and remove all but the largest foreground connected component
        results = []
        for f in fnames:
            predicted_segmentation = join(base, raw_subfolder_name, f)
            # now remove all but the largest connected component for each class
            output_file = join(folder_all_classes_as_fg, f)
            results.append(
                p.starmap_async(load_remove_save,
                                ((predicted_segmentation, output_file,
                                  (classes, )), )))

        results = [i.get() for i in results]

        # aggregate max_size_removed and min_size_kept
        max_size_removed = {}
        min_size_kept = {}
        for tmp in results:
            mx_rem, min_kept = tmp[0]
            for k in mx_rem:
                if mx_rem[k] is not None:
                    if max_size_removed.get(k) is None:
                        max_size_removed[k] = mx_rem[k]
                    else:
                        max_size_removed[k] = max(max_size_removed[k],
                                                  mx_rem[k])
            for k in min_kept:
                if min_kept[k] is not None:
                    if min_size_kept.get(k) is None:
                        min_size_kept[k] = min_kept[k]
                    else:
                        min_size_kept[k] = min(min_size_kept[k], min_kept[k])

        print("foreground vs background, smallest valid object size was",
              min_size_kept[tuple(classes)])
        print("removing only objects smaller than that...")

    else:
        min_size_kept = None

    # we need to rerun the step from above, now with the size constraint
    pred_gt_tuples = []
    results = []
    # first treat all foreground classes as one and remove all but the largest foreground connected component
    for f in fnames:
        predicted_segmentation = join(base, raw_subfolder_name, f)
        # now remove all but the largest connected component for each class
        output_file = join(folder_all_classes_as_fg, f)
        results.append(
            p.starmap_async(load_remove_save,
                            ((predicted_segmentation, output_file,
                              (classes, ), min_size_kept), )))
        if f_dict:
            f = f_dict[f.split('.')[0]]['properties']['seg_file'].split(
                os.sep)[-1]  # zhuc
        pred_gt_tuples.append([output_file, join(gt_labels_folder, f)])

    _ = [i.get() for i in results]

    # evaluate postprocessed predictions
    _ = aggregate_scores(pred_gt_tuples,
                         labels=classes,
                         json_output_file=join(folder_all_classes_as_fg,
                                               "summary.json"),
                         json_author="Fabian",
                         num_threads=processes)

    # now we need to figure out if doing this improved the dice scores. We will implement that defensively in so far
    # that if a single class got worse as a result we won't do this. We can change this in the future but right now I
    # prefer to do it this way
    validation_result_PP_test = load_json(
        join(folder_all_classes_as_fg, "summary.json"))['results']['mean']

    for c in classes:
        dc_raw = validation_result_raw[str(c)]['Dice']
        dc_pp = validation_result_PP_test[str(c)]['Dice']
        pp_results['dc_per_class_raw'][str(c)] = dc_raw
        pp_results['dc_per_class_pp_all'][str(c)] = dc_pp

    # true if new is better
    do_fg_cc = False
    comp = [
        pp_results['dc_per_class_pp_all'][str(cl)] >
        (pp_results['dc_per_class_raw'][str(cl)] + dice_threshold)
        for cl in classes
    ]
    before = np.mean(
        [pp_results['dc_per_class_raw'][str(cl)] for cl in classes])
    after = np.mean(
        [pp_results['dc_per_class_pp_all'][str(cl)] for cl in classes])
    print("Foreground vs background")
    print("before:", before)
    print("after: ", after)
    if any(comp):
        # at least one class improved - yay!
        # now check if another got worse
        # true if new is worse
        any_worse = any([
            pp_results['dc_per_class_pp_all'][str(cl)] <
            pp_results['dc_per_class_raw'][str(cl)] for cl in classes
        ])
        if not any_worse:
            pp_results['for_which_classes'].append(classes)
            if min_size_kept is not None:
                pp_results['min_valid_object_sizes'].update(
                    deepcopy(min_size_kept))
            do_fg_cc = True
            print(
                "Removing all but the largest foreground region improved results!"
            )
            print('for_which_classes', classes)
            print('min_valid_object_sizes', min_size_kept)
    else:
        # did not improve things - don't do it
        pass

    if len(classes) > 1:
        # now depending on whether we do remove all but the largest foreground connected component we define the source dir
        # for the next one to be the raw or the temp dir
        if do_fg_cc:
            source = folder_all_classes_as_fg
        else:
            source = join(base, raw_subfolder_name)

        if advanced_postprocessing:
            # now run this for each class separately
            results = []
            for f in fnames:
                predicted_segmentation = join(source, f)
                output_file = join(folder_per_class, f)
                results.append(
                    p.starmap_async(
                        load_remove_save,
                        ((predicted_segmentation, output_file, classes), )))

            results = [i.get() for i in results]

            # aggregate max_size_removed and min_size_kept
            max_size_removed = {}
            min_size_kept = {}
            for tmp in results:
                mx_rem, min_kept = tmp[0]
                for k in mx_rem:
                    if mx_rem[k] is not None:
                        if max_size_removed.get(k) is None:
                            max_size_removed[k] = mx_rem[k]
                        else:
                            max_size_removed[k] = max(max_size_removed[k],
                                                      mx_rem[k])
                for k in min_kept:
                    if min_kept[k] is not None:
                        if min_size_kept.get(k) is None:
                            min_size_kept[k] = min_kept[k]
                        else:
                            min_size_kept[k] = min(min_size_kept[k],
                                                   min_kept[k])

            print(
                "classes treated separately, smallest valid object sizes are")
            print(min_size_kept)
            print("removing only objects smaller than that")
        else:
            min_size_kept = None

        # rerun with the size thresholds from above
        pred_gt_tuples = []
        results = []
        for f in fnames:
            predicted_segmentation = join(source, f)
            output_file = join(folder_per_class, f)
            results.append(
                p.starmap_async(load_remove_save,
                                ((predicted_segmentation, output_file, classes,
                                  min_size_kept), )))
            if f_dict:  # zhuc
                gt_name = f_dict[f.split(
                    '.')[0]]['properties']['seg_file'].split(os.sep)[-1]
            else:
                gt_name = f
            pred_gt_tuples.append(
                [output_file, join(gt_labels_folder, gt_name)])

        _ = [i.get() for i in results]

        # evaluate postprocessed predictions
        _ = aggregate_scores(pred_gt_tuples,
                             labels=classes,
                             json_output_file=join(folder_per_class,
                                                   "summary.json"),
                             json_author="Fabian",
                             num_threads=processes)

        if do_fg_cc:
            old_res = deepcopy(validation_result_PP_test)
        else:
            old_res = validation_result_raw

        # these are the new dice scores
        validation_result_PP_test = load_json(
            join(folder_per_class, "summary.json"))['results']['mean']

        for c in classes:
            dc_raw = old_res[str(c)]['Dice']
            dc_pp = validation_result_PP_test[str(c)]['Dice']
            pp_results['dc_per_class_pp_per_class'][str(c)] = dc_pp
            print(c)
            print("before:", dc_raw)
            print("after: ", dc_pp)

            if dc_pp > (dc_raw + dice_threshold):
                pp_results['for_which_classes'].append(int(c))
                if min_size_kept is not None:
                    pp_results['min_valid_object_sizes'].update(
                        {c: min_size_kept[c]})
                print(
                    "Removing all but the largest region for class %d improved results!"
                    % c)
                print('min_valid_object_sizes', min_size_kept)
    else:
        print(
            "Only one class present, no need to do each class separately as this is covered in fg vs bg"
        )

    if not advanced_postprocessing:
        pp_results['min_valid_object_sizes'] = None

    print("done")
    print("for which classes:")
    print(pp_results['for_which_classes'])
    print("min_object_sizes")
    print(pp_results['min_valid_object_sizes'])

    pp_results['validation_raw'] = raw_subfolder_name
    pp_results['validation_final'] = final_subf_name

    # now that we have a proper for_which_classes, apply that
    pred_gt_tuples = []
    results = []
    for f in fnames:
        predicted_segmentation = join(base, raw_subfolder_name, f)

        # now remove all but the largest connected component for each class
        output_file = join(base, final_subf_name, f)
        results.append(
            p.starmap_async(load_remove_save,
                            ((predicted_segmentation, output_file,
                              pp_results['for_which_classes'],
                              pp_results['min_valid_object_sizes']), )))

        pred_gt_tuples.append([output_file, join(gt_labels_folder, f)])

    _ = [i.get() for i in results]
    # evaluate postprocessed predictions
    _ = aggregate_scores(pred_gt_tuples,
                         labels=classes,
                         json_output_file=join(base, final_subf_name,
                                               "summary.json"),
                         json_author="Fabian",
                         num_threads=processes)

    pp_results['min_valid_object_sizes'] = str(
        pp_results['min_valid_object_sizes'])

    save_json(pp_results, join(base, pp_filename))

    # delete temp
    if not debug:
        shutil.rmtree(folder_per_class)
        shutil.rmtree(folder_all_classes_as_fg)

    p.close()
    p.join()
    print("done")
Esempio n. 14
0
        [join(nnunet.__path__[0], "training", "network_training")],
        trainerclass, "nnunet.training.network_training")

    if trainer_class is None:
        raise RuntimeError(
            "Could not find trainer class in nnunet.training.network_training")
    else:
        assert issubclass(
            trainer_class, nnUNetTrainer
        ), "network_trainer was found but is not derived from nnUNetTrainer"

    trainer = trainer_class(plans_file,
                            fold,
                            folder_with_preprocessed_data,
                            output_folder=output_folder_name,
                            dataset_directory=dataset_directory,
                            batch_dice=batch_dice,
                            stage=stage)

    trainer.initialize(False)
    trainer.load_dataset()
    trainer.do_split()
    trainer.load_best_checkpoint(train=False)

    stage_to_be_predicted_folder = join(
        dataset_directory, trainer.plans['data_identifier'] + "_stage%d" % 1)
    output_folder = join(pardir(trainer.output_folder), "pred_next_stage")
    maybe_mkdir_p(output_folder)

    predict_next_stage(trainer, stage_to_be_predicted_folder)
Esempio n. 15
0
    def validate(self,
                 do_mirroring: bool = True,
                 use_train_mode: bool = False,
                 tiled: bool = True,
                 step: int = 2,
                 save_softmax: bool = True,
                 use_gaussian: bool = True,
                 overwrite: bool = True,
                 validation_folder_name: str = 'validation_raw',
                 debug: bool = False,
                 all_in_gpu: bool = False,
                 force_separate_z: bool = None,
                 interpolation_order: int = 3,
                 interpolation_order_z: int = 0):
        """
        if debug=True then the temporary files generated for postprocessing determination will be kept
        :return:
        """
        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        # predictions as they come from the network go here
        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)
        # this is for debug purposes
        my_input_args = {
            'do_mirroring': do_mirroring,
            'use_train_mode': use_train_mode,
            'tiled': tiled,
            'step': step,
            'save_softmax': save_softmax,
            'use_gaussian': use_gaussian,
            'overwrite': overwrite,
            'validation_folder_name': validation_folder_name,
            'debug': debug,
            'all_in_gpu': all_in_gpu,
            'force_separate_z': force_separate_z,
            'interpolation_order': interpolation_order,
            'interpolation_order_z': interpolation_order_z,
        }
        save_json(my_input_args, join(output_folder, "validation_args.json"))

        if do_mirroring:
            if not self.data_aug_params['do_mirror']:
                raise RuntimeError(
                    "We did not train with mirroring so you cannot do inference with mirroring enabled"
                )
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        if config.DEBUG_MODE:  # zhuc
            default_num_threads = 1
        export_pool = Pool(default_num_threads)
        results = []

        cnt = 0  # zhuc
        for k in self.dataset_val.keys():
            properties = self.dataset[k]['properties']
            cnt += 1
            if config.DEBUG_MODE and cnt > 2:
                break

            # fname = properties['list_of_data_files'][0].split("/")[-1][:-12]
            fname = k  # zhuc
            if overwrite or (not isfile(join(output_folder, fname + ".nii.gz"))) or \
                    (save_softmax and not isfile(join(output_folder, fname + ".npz"))):
                data = np.load(self.dataset[k]['data_file'])['data']

                print(k, data.shape)
                data[-1][data[-1] == -1] = 0

                softmax_pred = self.predict_preprocessed_data_return_softmax(
                    data[:-1],
                    do_mirroring,
                    1,
                    use_train_mode,
                    1,
                    mirror_axes,
                    tiled,
                    True,
                    step,
                    self.patch_size,
                    use_gaussian=use_gaussian,
                    all_in_gpu=all_in_gpu)

                softmax_pred = softmax_pred.transpose(
                    [0] + [i + 1 for i in self.transpose_backward])

                if save_softmax:
                    softmax_fname = join(output_folder, fname + ".npz")
                else:
                    softmax_fname = None
                """There is a problem with python process communication that prevents us from communicating obejcts 
                larger than 2 GB between processes (basically when the length of the pickle string that will be sent is 
                communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long 
                enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually 
                patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will 
                then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either 
                filename or np.ndarray and will handle this automatically"""
                if np.prod(softmax_pred.shape) > (
                        2e9 / 4 * 0.85):  # *0.85 just to be save
                    np.save(join(output_folder, fname + ".npy"), softmax_pred)
                    softmax_pred = join(output_folder, fname + ".npy")

                if config.DEBUG_MODE:  # zhuc
                    results.append(
                        save_segmentation_nifti_from_softmax(
                            softmax_pred, join(output_folder,
                                               fname + ".nii.gz"), properties,
                            interpolation_order, None, None, None,
                            softmax_fname, None, force_separate_z,
                            interpolation_order_z))
                else:
                    results.append(
                        export_pool.starmap_async(
                            save_segmentation_nifti_from_softmax,
                            ((softmax_pred,
                              join(output_folder, fname + ".nii.gz"),
                              properties, interpolation_order, None, None,
                              None, softmax_fname, None, force_separate_z,
                              interpolation_order_z), )))

            pred_gt_tuples.append([
                join(output_folder, fname + ".nii.gz"),
                # join(self.gt_niftis_folder, fname + ".nii.gz")])
                join(self.gt_niftis_folder,
                     properties['seg_file'].split(os.sep)[-1])
            ])  # zhuc

        if not config.DEBUG_MODE:  # zhuc
            _ = [i.get() for i in results]
        self.print_to_log_file("finished prediction")

        # evaluate raw predictions
        self.print_to_log_file("evaluation of raw predictions")
        task = self.dataset_directory.split(os.sep)[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(
            pred_gt_tuples,
            labels=list(range(self.num_classes)),
            json_output_file=join(output_folder, "summary.json"),
            json_name=job_name + " val tiled %s" % (str(tiled)),
            json_author="Fabian",
            json_task=task,
            num_threads=default_num_threads)

        # in the old nnunet we would stop here. Now we add a postprocessing. This postprocessing can remove everything
        # except the largest connected component for each class. To see if this improves results, we do this for all
        # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will
        # have this applied during inference as well
        self.print_to_log_file("determining postprocessing")
        determine_postprocessing(self.output_folder,
                                 self.gt_niftis_folder,
                                 validation_folder_name,
                                 final_subf_name=validation_folder_name +
                                 "_postprocessed",
                                 debug=debug,
                                 processes=default_num_threads,
                                 f_dict=self.dataset)
        # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed"
        # They are always in that folder, even if no postprocessing as applied!

        # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another
        # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be
        # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to
        # be used later
        gt_nifti_folder = join(self.output_folder_base, "gt_niftis")
        maybe_mkdir_p(gt_nifti_folder)
        for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"):
            success = False
            attempts = 0
            e = None
            while not success and attempts < 10:
                try:
                    shutil.copy(f, gt_nifti_folder)
                    success = True
                except OSError as e:
                    attempts += 1
                    sleep(1)
            if not success:
                print("Could not copy gt nifti file %s into folder %s" %
                      (f, gt_nifti_folder))
                if e is not None:
                    raise e
Esempio n. 16
0
"""

base = os.environ[
    'nnUNet_raw_data_base'] if "nnUNet_raw_data_base" in os.environ.keys(
    ) else None
preprocessing_output_dir = os.environ[
    'nnUNet_preprocessed'] if "nnUNet_preprocessed" in os.environ.keys(
    ) else None
network_training_output_dir_base = os.path.join(
    os.environ['RESULTS_FOLDER']) if "RESULTS_FOLDER" in os.environ.keys(
    ) else None

if base is not None:
    nnUNet_raw_data = join(base, "nnUNet_raw_data")
    nnUNet_cropped_data = join(base, "nnUNet_cropped_data")
    maybe_mkdir_p(nnUNet_raw_data)
    maybe_mkdir_p(nnUNet_cropped_data)
else:
    print(
        "nnUNet_raw_data_base is not defined and nnU-Net can only be used on data for which preprocessed files "
        "are already present on your system. nnU-Net cannot be used for experiment planning and preprocessing like "
        "this. If this is not intended, please read nnunet/paths.md for information on how to set this up properly."
    )
    nnUNet_cropped_data = nnUNet_raw_data = None

if preprocessing_output_dir is not None:
    maybe_mkdir_p(preprocessing_output_dir)
else:
    print(
        "nnUNet_preprocessed is not defined and nnU-Net can not be used for preprocessing "
        "or training. If this is not intended, please read nnunet/pathy.md for information on how to set this up."