def consolidate_folds(output_folder_base, validation_folder_name: str = 'validation_raw',
                      advanced_postprocessing: bool = False, folds: Tuple[int] = (0, 1, 2, 3, 4)):
   
    output_folder_raw = join(output_folder_base, "cv_niftis_raw")
    output_folder_gt = join(output_folder_base, "gt_niftis")
    collect_cv_niftis(output_folder_base, output_folder_raw, validation_folder_name,
                      folds)

    num_niftis_gt = len(subfiles(join(output_folder_base, "gt_niftis")))
    # count niftis in there
    num_niftis = len(subfiles(output_folder_raw))
    if num_niftis != num_niftis_gt:
        shutil.rmtree(output_folder_raw)
        raise AssertionError("If does not seem like you trained all the folds! Train all folds first!")

    # load a summary file so that we can know what class labels to expect
    summary_fold0 = load_json(join(output_folder_base, "fold_0", validation_folder_name, "summary.json"))['results'][
        'mean']
    classes = [int(i) for i in summary_fold0.keys()]
    niftis = subfiles(output_folder_raw, join=False, suffix=".nii.gz")
    test_pred_pairs = [(join(output_folder_gt, i), join(output_folder_raw, i)) for i in niftis]

    # determine_postprocessing needs a summary.json file in the folder where the raw predictions are. We could compute
    # that from the summary files of the five folds but I am feeling lazy today
    aggregate_scores(test_pred_pairs, labels=classes, json_output_file=join(output_folder_raw, "summary.json"),
                     num_threads=default_num_threads)

    determine_postprocessing(output_folder_base, output_folder_gt, 'cv_niftis_raw',
                             final_subf_name="cv_niftis_postprocessed", processes=default_num_threads,
                             advanced_postprocessing=advanced_postprocessing)
    
Exemple #2
0
    def test(self):
        from evaluation.evaluator import aggregate_scores, Evaluator
        from collections import defaultdict

        self.elog.print('=====TEST=====')
        self.model.eval()

        pred_dict = defaultdict(list)
        gt_dict = defaultdict(list)

        batch_counter = 0

        if self.config.visualize_segm:
            color_class_converter = LabelTensorToColor()

        with torch.no_grad():
            for data_batch in self.test_data_loader:
                print('testing...', batch_counter)
                batch_counter += 1

                # Get data_batches
                mr_data = data_batch['data'][0].float().to(self.device)
                mr_target = data_batch['seg'][0].float().to(self.device)

                pred = self.model(mr_data)
                pred_argmax = torch.argmax(pred.data.cpu(),
                                           dim=1,
                                           keepdim=True)

                fnames = data_batch['fnames']
                for i, fname in enumerate(fnames):
                    pred_dict[fname[0]].append(
                        pred_argmax[i].detach().cpu().numpy())
                    gt_dict[fname[0]].append(
                        mr_target[i].detach().cpu().numpy())

                if batch_counter == 35 and self.config.visualize_segm:
                    segm_visualization(mr_data, mr_target, pred_argmax,
                                       color_class_converter, self.config)

        test_ref_list = []
        for key in pred_dict.keys():
            test_ref_list.append(
                (np.stack(pred_dict[key]), np.stack(gt_dict[key])))

        scores = aggregate_scores(test_ref_list,
                                  evaluator=Evaluator,
                                  json_author=self.config.author,
                                  json_task=self.config.name,
                                  json_name=self.config.name,
                                  json_output_file=self.elog.work_dir +
                                  "/{}_".format(self.config.author) +
                                  self.config.name + '.json')

        self.scores = scores

        print("Scores:\n", scores)
    def test(self):
        from evaluation.evaluator import aggregate_scores, Evaluator
        from collections import defaultdict

        self.elog.print('=====TEST=====')
        self.model.eval()

        pred_dict = defaultdict(list)
        gt_dict = defaultdict(list)

        batch_counter = 0
        with torch.no_grad():
            for data_batch in self.test_data_loader:
                print('testing...', batch_counter)
                batch_counter += 1

                # Get data_batches
                mr_data = data_batch['data'][0].float().to(self.device)
                mr_target = data_batch['seg'][0].float().to(self.device)

                pred = self.model(mr_data)
                pred_argmax = torch.argmax(pred.data.cpu(),
                                           dim=1,
                                           keepdim=True)

                fnames = data_batch['fnames']
                for i, fname in enumerate(fnames):
                    pred_dict[fname[0]].append(
                        pred_argmax[i].detach().cpu().numpy())
                    gt_dict[fname[0]].append(
                        mr_target[i].detach().cpu().numpy())

        test_ref_list = []
        for key in pred_dict.keys():
            test_ref_list.append(
                (np.stack(pred_dict[key]), np.stack(gt_dict[key])))
        save_segmentation(
            np.asarray(pred_dict[key]).squeeze(),
            self.config.data_root_dir + '/brains/imagesTr/',
            self.elog.work_dir + "/{}".format('segmentation'), key)
        scores = aggregate_scores(test_ref_list,
                                  evaluator=Evaluator,
                                  json_author=self.config.author,
                                  json_task=self.config.name,
                                  json_name=self.config.name,
                                  json_output_file=self.elog.work_dir +
                                  "/{}_".format(self.config.author) +
                                  self.config.name + '.json')

        print("Scores:\n", scores)
Exemple #4
0
def predict_val(pre_folder,gt_folder):
    import pickle  
    pred_gt_tuples = []
    for fname in os.listdir(pre_folder):
        if(fname.split('.')[-1]=="gz"):
            pred_gt_tuples.append([join(pre_folder, fname), join(gt_folder, fname.replace("Image", "Label"))])
    task = pre_folder.split('/')[-2]
    
    f = open(join(pre_folder,"plans.pkl"),'rb')  
    info = pickle.load(f)  
    num_classes = info['num_classes'] + 1  # background is no longer in num_classes
    _ = aggregate_scores(pred_gt_tuples, labels=list(range(num_classes)),
                         json_output_file=join(pre_folder, "summary.json"),
                         json_task=task, num_threads=default_num_threads)
        
Exemple #5
0
    def validate(self,
                 do_mirroring=True,
                 use_train_mode=False,
                 tiled=True,
                 step=2,
                 save_softmax=True,
                 use_gaussian=True,
                 compute_global_dice=True,
                 override=True,
                 validation_folder_name='validation'):
        """
        2018_12_05: I added global accumulation of TP, FP and FN for the validation in here. This is because I believe
        that selecting models is easier when computing the Dice globally instead of independently for each case and
        then averaging over cases. The Lung dataset in particular is very unstable because of the small size of the
        Lung Lesions. My theory is that even though the global Dice is different than the acutal target metric it is
        still a good enough substitute that allows us to get a lot more stable results when rerunning the same
        experiment twice. FYI: computer vision community uses the global jaccard for the evaluation of Cityscapes etc,
        not the per-image jaccard averaged over images.
        The reason I am accumulating TP/FP/FN here and not from the nifti files (which are used by our Evaluator) is
        that all predictions made here will have identical voxel spacing whereas voxel spacings in the nifti files
        will be different (which we could compensate for by using the volume per voxel but that would require the
        evaluator to understand spacings which is does not at this point)

        :param do_mirroring:
        :param use_train_mode:
        :param mirror_axes:
        :param tiled:
        :param tile_in_z:
        :param step:
        :param use_nifti:
        :param save_softmax:
        :param use_gaussian:
        :param use_temporal_models:
        :return:
        """
        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()
        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)

        if do_mirroring:
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        export_pool = Pool(4)
        results = []
        global_tp = OrderedDict()
        global_fp = OrderedDict()
        global_fn = OrderedDict()

        for k in self.dataset_val.keys():
            print(k)
            properties = self.dataset[k]['properties']
            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]
            if override or (not isfile(join(output_folder,
                                            fname + ".nii.gz"))):
                data = np.load(self.dataset[k]['data_file'])['data']

                print(k, data.shape)
                data[-1][data[-1] == -1] = 0

                softmax_pred = self.predict_preprocessed_data_return_softmax(
                    data[:-1],
                    do_mirroring,
                    1,
                    use_train_mode,
                    1,
                    mirror_axes,
                    tiled,
                    True,
                    step,
                    self.patch_size,
                    use_gaussian=use_gaussian)

                if compute_global_dice:
                    predicted_segmentation = softmax_pred.argmax(0)
                    gt_segmentation = data[-1]
                    labels = properties['classes']
                    labels = [int(i) for i in labels if i > 0]
                    for l in labels:
                        if l not in global_fn.keys():
                            global_fn[l] = 0
                        if l not in global_fp.keys():
                            global_fp[l] = 0
                        if l not in global_tp.keys():
                            global_tp[l] = 0
                        conf = ConfusionMatrix(
                            (predicted_segmentation == l).astype(int),
                            (gt_segmentation == l).astype(int))
                        conf.compute()
                        global_fn[l] += conf.fn
                        global_fp[l] += conf.fp
                        global_tp[l] += conf.tp

                softmax_pred = softmax_pred.transpose(
                    [0] + [i + 1 for i in self.transpose_backward])

                if save_softmax:
                    softmax_fname = join(output_folder, fname + ".npz")
                else:
                    softmax_fname = None
                """There is a problem with python process communication that prevents us from communicating obejcts 
                larger than 2 GB between processes (basically when the length of the pickle string that will be sent is 
                communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long 
                enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually 
                patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will 
                then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either 
                filename or np.ndarray and will handle this automatically"""
                if np.prod(softmax_pred.shape) > (2e9 / 4 *
                                                  0.9):  # *0.9 just to be save
                    np.save(join(output_folder, fname + ".npy"), softmax_pred)
                    softmax_pred = join(output_folder, fname + ".npy")
                results.append(
                    export_pool.starmap_async(
                        save_segmentation_nifti_from_softmax,
                        ((softmax_pred, join(output_folder,
                                             fname + ".nii.gz"), properties, 3,
                          None, None, None, softmax_fname, None), )))
                # save_segmentation_nifti_from_softmax(softmax_pred, join(output_folder, fname + ".nii.gz"),
                #                                               properties, 3, None, None,
                #                                               None,
                #                                               softmax_fname,
                #                                               None)

            pred_gt_tuples.append([
                join(output_folder, fname + ".nii.gz"),
                join(self.gt_niftis_folder, fname + ".nii.gz")
            ])

        _ = [i.get() for i in results]
        print("finished prediction, now evaluating...")

        task = self.dataset_directory.split("/")[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(
            pred_gt_tuples,
            labels=list(range(self.num_classes)),
            json_output_file=join(output_folder, "summary.json"),
            json_name=job_name + " val tiled %s" % (str(tiled)),
            json_author="Fabian",
            json_task=task,
            num_threads=3)
        if compute_global_dice:
            global_dice = OrderedDict()
            all_labels = list(global_fn.keys())
            for l in all_labels:
                global_dice[int(l)] = float(
                    2 * global_tp[l] /
                    (2 * global_tp[l] + global_fn[l] + global_fp[l]))
            write_json(global_dice, join(output_folder, "global_dice.json"))
Exemple #6
0
    def validate(self, do_mirroring=True, use_train_mode=False, tiled=True, step=2, save_softmax=True,
                 use_gaussian=True, validation_folder_name='validation'):
        """

        :param do_mirroring:
        :param use_train_mode:
        :param mirror_axes:
        :param tiled:
        :param tile_in_z:
        :param step:
        :param use_nifti:
        :param save_softmax:
        :param use_gaussian:
        :param use_temporal_models:
        :return:
        """
        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)

        if do_mirroring:
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        process_manager = Pool(2)
        results = []

        for k in self.dataset_val.keys():
            properties = self.dataset[k]['properties']
            data = np.load(self.dataset[k]['data_file'])['data']

            # concat segmentation of previous step
            seg_from_prev_stage = np.load(join(self.folder_with_segs_from_prev_stage,
                                               k + "_segFromPrevStage.npz"))['data'][None]

            transpose_forward = self.plans.get('transpose_forward')
            if transpose_forward is not None:
                data = data.transpose([0] + [i+1 for i in transpose_forward])
                seg_from_prev_stage = seg_from_prev_stage.transpose([0] + [i+1 for i in transpose_forward])

            print(data.shape)
            data[-1][data[-1] == -1] = 0
            data_for_net = np.concatenate((data[:-1], to_one_hot(seg_from_prev_stage[0], range(1, self.num_classes))))
            softmax_pred = self.predict_preprocessing_return_softmax(data_for_net, do_mirroring, 1,
                                                                         use_train_mode, 1, mirror_axes, tiled,
                                                                         True, step, self.patch_size,
                                                                         use_gaussian=use_gaussian)

            if transpose_forward is not None:
                transpose_backward = self.plans.get('transpose_backward')
                softmax_pred = softmax_pred.transpose([0] + [i+1 for i in transpose_backward])

            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]

            if save_softmax:
                softmax_fname = join(output_folder, fname + ".npz")
            else:
                softmax_fname = None

            if np.prod(softmax_pred.shape) > (2e9 / 4 * 0.9): # *0.9 just to be save
                np.save(fname + ".npy", softmax_pred)
                softmax_pred = fname + ".npy"
            results.append(process_manager.starmap_async(store_seg_from_softmax,
                                                         ((softmax_pred, join(output_folder, fname + ".nii.gz"),
                                                           properties, 1, None, None, None, softmax_fname, None),
                                                          )
                                                         )
                           )

            pred_gt_tuples.append([join(output_folder, fname + ".nii.gz"),
                                   join(self.gt_niftis_folder, fname + ".nii.gz")])

        _ = [i.get() for i in results]

        task = self.dataset_directory.split("/")[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(pred_gt_tuples, labels=list(range(self.num_classes)),
                             json_output_file=join(output_folder, "summary.json"), json_name=job_name,
                             json_author="Fabian", json_description="",
                             json_task=task)
Exemple #7
0
            out_files.append(join(output_folder, p[:-4] + ".nii.gz"))
            gt_segmentations.append(
                join(folder_with_gt_segs, p[:-4] + ".nii.gz"))

    p = Pool(8)
    p.map(merge, zip(files1, files2, property_files, out_files))
    p.close()
    p.join()
    """for args in zip(files1, files2, property_files, out_files, [only_keep_largest_connected_component] * len(files1)):
        print(args[0], args[1])
        merge(args)"""

    if not isfile(join(output_folder,
                       "summary_allFolds.json")) and len(out_files) > 0:
        out_dir_all_json = join(network_training_output_dir, "summary_jsons")
        # now evaluate if all these gt files exist
        aggregate_scores(tuple(zip(out_files, gt_segmentations)),
                         labels=plans['all_classes'],
                         json_output_file=join(output_folder,
                                               "summary_allFolds.json"),
                         json_task=task,
                         json_name=task + "__" + output_folder.split("/")[-1],
                         num_threads=4)
        json_out = load_json(join(output_folder, "summary_allFolds.json"))
        json_out["experiment_name"] = output_folder.split("/")[-1]
        save_json(json_out, join(output_folder, "summary_allFolds.json"))
        shutil.copy(
            join(output_folder, "summary_allFolds.json"),
            join(out_dir_all_json,
                 "%s__%s.json" % (task, output_folder.split("/")[-1])))
def determine_postprocessing(base, gt_labels_folder, raw_subfolder_name="validation_raw",
                             temp_folder="temp",
                             final_subf_name="validation_final", processes=default_num_threads,
                             dice_threshold=0, debug=False,
                             advanced_postprocessing=False,
                             pp_filename="postprocessing.json"):
    """
    :param base:
    :param gt_labels_folder: subfolder of base with niftis of ground truth labels
    :param raw_subfolder_name: subfolder of base with niftis of predicted (non-postprocessed) segmentations
    :param temp_folder: used to store temporary data, will be deleted after we are done here undless debug=True
    :param final_subf_name: final results will be stored here (subfolder of base)
    :param processes:
    :param dice_threshold: only apply postprocessing if results is better than old_result+dice_threshold (can be used as eps)
    :param debug: if True then the temporary files will not be deleted
    :return:
    """
    # lets see what classes are in the dataset
    classes = [int(i) for i in load_json(join(base, raw_subfolder_name, "summary.json"))['results']['mean'].keys() if int(i) != 0]

    folder_all_classes_as_fg = join(base, temp_folder + "_allClasses")
    folder_per_class = join(base, temp_folder + "_perClass")

    if isdir(folder_all_classes_as_fg):
        shutil.rmtree(folder_all_classes_as_fg)
    if isdir(folder_per_class):
        shutil.rmtree(folder_per_class)

    # multiprocessing rules
    p = Pool(processes)

    assert isfile(join(base, raw_subfolder_name, "summary.json")), "join(base, raw_subfolder_name) does not contain a summary.json"

    # these are all the files we will be dealing with
    fnames = subfiles(join(base, raw_subfolder_name), suffix=".nii.gz", join=False)

    # make output and temp dir
    maybe_mkdir_p(folder_all_classes_as_fg)
    maybe_mkdir_p(folder_per_class)
    maybe_mkdir_p(join(base, final_subf_name))

    pp_results = {}
    pp_results['dc_per_class_raw'] = {}
    pp_results['dc_per_class_pp_all'] = {}  # dice scores after treating all foreground classes as one
    pp_results['dc_per_class_pp_per_class'] = {}  # dice scores after removing everything except larges cc
    # independently for each class after we already did dc_per_class_pp_all
    pp_results['for_which_classes'] = []
    pp_results['min_valid_object_sizes'] = {}


    validation_result_raw = load_json(join(base, raw_subfolder_name, "summary.json"))['results']
    pp_results['num_samples'] = len(validation_result_raw['all'])
    validation_result_raw = validation_result_raw['mean']

    if advanced_postprocessing:
        # first treat all foreground classes as one and remove all but the largest foreground connected component
        results = []
        for f in fnames:
            predicted_segmentation = join(base, raw_subfolder_name, f)
            # now remove all but the largest connected component for each class
            output_file = join(folder_all_classes_as_fg, f)
            results.append(p.starmap_async(load_remove_save, ((predicted_segmentation, output_file, (classes,)),)))

        results = [i.get() for i in results]

        # aggregate max_size_removed and min_size_kept
        max_size_removed = {}
        min_size_kept = {}
        for tmp in results:
            mx_rem, min_kept = tmp[0]
            for k in mx_rem:
                if mx_rem[k] is not None:
                    if max_size_removed.get(k) is None:
                        max_size_removed[k] = mx_rem[k]
                    else:
                        max_size_removed[k] = max(max_size_removed[k], mx_rem[k])
            for k in min_kept:
                if min_kept[k] is not None:
                    if min_size_kept.get(k) is None:
                        min_size_kept[k] = min_kept[k]
                    else:
                        min_size_kept[k] = min(min_size_kept[k], min_kept[k])

        print("foreground vs background, smallest valid object size was", min_size_kept[tuple(classes)])
        print("removing only objects smaller than that...")

    else:
        min_size_kept = None

    # we need to rerun the step from above, now with the size constraint
    pred_gt_tuples = []
    results = []
    # first treat all foreground classes as one and remove all but the largest foreground connected component
    for f in fnames:
        predicted_segmentation = join(base, raw_subfolder_name, f)
        # now remove all but the largest connected component for each class
        output_file = join(folder_all_classes_as_fg, f)
        results.append(
            p.starmap_async(load_remove_save, ((predicted_segmentation, output_file, (classes,), min_size_kept),)))
        pred_gt_tuples.append([output_file, join(gt_labels_folder, f)])

    _ = [i.get() for i in results]

    # evaluate postprocessed predictions
    _ = aggregate_scores(pred_gt_tuples, labels=classes,
                         json_output_file=join(folder_all_classes_as_fg, "summary.json"), num_threads=processes)

    # now we need to figure out if doing this improved the dice scores. We will implement that defensively in so far
    # that if a single class got worse as a result we won't do this. We can change this in the future but right now I
    # prefer to do it this way
    validation_result_PP_test = load_json(join(folder_all_classes_as_fg, "summary.json"))['results']['mean']

    for c in classes:
        dc_raw = validation_result_raw[str(c)]['Dice']
        dc_pp = validation_result_PP_test[str(c)]['Dice']
        pp_results['dc_per_class_raw'][str(c)] = dc_raw
        pp_results['dc_per_class_pp_all'][str(c)] = dc_pp

    # true if new is better
    do_fg_cc = False
    comp = [pp_results['dc_per_class_pp_all'][str(cl)] > (pp_results['dc_per_class_raw'][str(cl)] + dice_threshold) for
            cl in classes]
    before = np.mean([pp_results['dc_per_class_raw'][str(cl)] for cl in classes])
    after = np.mean([pp_results['dc_per_class_pp_all'][str(cl)] for cl in classes])
    print("Foreground vs background")
    print("before:", before)
    print("after: ", after)
    if any(comp):
        # at least one class improved - yay!
        # now check if another got worse
        # true if new is worse
        any_worse = any(
            [pp_results['dc_per_class_pp_all'][str(cl)] < pp_results['dc_per_class_raw'][str(cl)] for cl in classes])
        if not any_worse:
            pp_results['for_which_classes'].append(classes)
            if min_size_kept is not None:
                pp_results['min_valid_object_sizes'].update(deepcopy(min_size_kept))
            do_fg_cc = True
            print("Removing all but the largest foreground region improved results!")
            print('for_which_classes', classes)
            print('min_valid_object_sizes', min_size_kept)
    else:
        # did not improve things - don't do it
        pass

    if len(classes) > 1:
        # now depending on whether we do remove all but the largest foreground connected component we define the source dir
        # for the next one to be the raw or the temp dir
        if do_fg_cc:
            source = folder_all_classes_as_fg
        else:
            source = join(base, raw_subfolder_name)

        if advanced_postprocessing:
            # now run this for each class separately
            results = []
            for f in fnames:
                predicted_segmentation = join(source, f)
                output_file = join(folder_per_class, f)
                results.append(p.starmap_async(load_remove_save, ((predicted_segmentation, output_file, classes),)))

            results = [i.get() for i in results]

            # aggregate max_size_removed and min_size_kept
            max_size_removed = {}
            min_size_kept = {}
            for tmp in results:
                mx_rem, min_kept = tmp[0]
                for k in mx_rem:
                    if mx_rem[k] is not None:
                        if max_size_removed.get(k) is None:
                            max_size_removed[k] = mx_rem[k]
                        else:
                            max_size_removed[k] = max(max_size_removed[k], mx_rem[k])
                for k in min_kept:
                    if min_kept[k] is not None:
                        if min_size_kept.get(k) is None:
                            min_size_kept[k] = min_kept[k]
                        else:
                            min_size_kept[k] = min(min_size_kept[k], min_kept[k])

            print("classes treated separately, smallest valid object sizes are")
            print(min_size_kept)
            print("removing only objects smaller than that")
        else:
            min_size_kept = None

        # rerun with the size thresholds from above
        pred_gt_tuples = []
        results = []
        for f in fnames:
            predicted_segmentation = join(source, f)
            output_file = join(folder_per_class, f)
            results.append(p.starmap_async(load_remove_save, ((predicted_segmentation, output_file, classes, min_size_kept),)))
            pred_gt_tuples.append([output_file, join(gt_labels_folder, f)])

        _ = [i.get() for i in results]

        # evaluate postprocessed predictions
        _ = aggregate_scores(pred_gt_tuples, labels=classes,
                             json_output_file=join(folder_per_class, "summary.json"), num_threads=processes)

        if do_fg_cc:
            old_res = deepcopy(validation_result_PP_test)
        else:
            old_res = validation_result_raw

        # these are the new dice scores
        validation_result_PP_test = load_json(join(folder_per_class, "summary.json"))['results']['mean']

        for c in classes:
            dc_raw = old_res[str(c)]['Dice']
            dc_pp = validation_result_PP_test[str(c)]['Dice']
            pp_results['dc_per_class_pp_per_class'][str(c)] = dc_pp
            print(c)
            print("before:", dc_raw)
            print("after: ", dc_pp)

            if dc_pp > (dc_raw + dice_threshold):
                pp_results['for_which_classes'].append(int(c))
                if min_size_kept is not None:
                    pp_results['min_valid_object_sizes'].update({c: min_size_kept[c]})
                print("Removing all but the largest region for class %d improved results!" % c)
                print('min_valid_object_sizes', min_size_kept)
    else:
        print("Only one class present, no need to do each class separately as this is covered in fg vs bg")

    if not advanced_postprocessing:
        pp_results['min_valid_object_sizes'] = None

    print("done")
    print("for which classes:")
    print(pp_results['for_which_classes'])
    print("min_object_sizes")
    print(pp_results['min_valid_object_sizes'])

    pp_results['validation_raw'] = raw_subfolder_name
    pp_results['validation_final'] = final_subf_name

    # now that we have a proper for_which_classes, apply that
    pred_gt_tuples = []
    results = []
    for f in fnames:
        predicted_segmentation = join(base, raw_subfolder_name, f)

        # now remove all but the largest connected component for each class
        output_file = join(base, final_subf_name, f)
        results.append(p.starmap_async(load_remove_save, (
            (predicted_segmentation, output_file, pp_results['for_which_classes'],
             pp_results['min_valid_object_sizes']),)))

        pred_gt_tuples.append([output_file, join(gt_labels_folder, f)])

    _ = [i.get() for i in results]
    # evaluate postprocessed predictions
    _ = aggregate_scores(pred_gt_tuples, labels=classes,
                         json_output_file=join(base, final_subf_name, "summary.json"), num_threads=processes)

    pp_results['min_valid_object_sizes'] = str(pp_results['min_valid_object_sizes'])

    save_json(pp_results, join(base, pp_filename))

    # delete temp
    if not debug:
        shutil.rmtree(folder_per_class)
        shutil.rmtree(folder_all_classes_as_fg)

    p.close()
    p.join()
    print("done")
Exemple #9
0
    def validate(self,
                 do_mirroring=True,
                 use_train_mode=False,
                 tiled=True,
                 step=2,
                 save_softmax=True,
                 use_gaussian=True,
                 compute_global_dice=True,
                 override=True,
                 validation_folder_name='validation'):
        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)

        if do_mirroring:
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        export_pool = Pool(4)
        results = []
        global_tp = OrderedDict()
        global_fp = OrderedDict()
        global_fn = OrderedDict()

        for k in self.dataset_val.keys():
            print(k)
            properties = self.dataset[k]['properties']
            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]
            if override or (not isfile(join(output_folder,
                                            fname + ".nii.gz"))):
                data = np.load(self.dataset[k]['data_file'])['data']

                print(k, data.shape)
                data[-1][data[-1] == -1] = 0

                softmax_pred = self.predict_preprocessed_data_return_softmax(
                    data[:-1],
                    do_mirroring,
                    1,
                    use_train_mode,
                    1,
                    mirror_axes,
                    tiled,
                    True,
                    step,
                    self.patch_size,
                    use_gaussian=use_gaussian)

                if compute_global_dice:
                    predicted_segmentation = softmax_pred.argmax(0)
                    gt_segmentation = data[-1]
                    labels = properties['classes']
                    labels = [int(i) for i in labels if i > 0]
                    for l in labels:
                        if l not in global_fn.keys():
                            global_fn[l] = 0
                        if l not in global_fp.keys():
                            global_fp[l] = 0
                        if l not in global_tp.keys():
                            global_tp[l] = 0
                        conf = ConfusionMatrix(
                            (predicted_segmentation == l).astype(int),
                            (gt_segmentation == l).astype(int))
                        conf.compute()
                        global_fn[l] += conf.fn
                        global_fp[l] += conf.fp
                        global_tp[l] += conf.tp

                softmax_pred = softmax_pred.transpose(
                    [0] + [i + 1 for i in self.transpose_backward])

                if save_softmax:
                    softmax_fname = join(output_folder, fname + ".npz")
                else:
                    softmax_fname = None

                if np.prod(softmax_pred.shape) > (2e9 / 4 *
                                                  0.9):  # *0.9 just to be save
                    np.save(join(output_folder, fname + ".npy"), softmax_pred)
                    softmax_pred = join(output_folder, fname + ".npy")
                results.append(
                    export_pool.starmap_async(
                        save_segmentation_nifti_from_softmax,
                        ((softmax_pred, join(output_folder,
                                             fname + ".nii.gz"), properties, 3,
                          None, None, None, softmax_fname, None), )))
                # save_segmentation_nifti_from_softmax(softmax_pred, join(output_folder, fname + ".nii.gz"),
                #                                               properties, 3, None, None,
                #                                               None,
                #                                               softmax_fname,
                #                                               None)

            pred_gt_tuples.append([
                join(output_folder, fname + ".nii.gz"),
                join(self.gt_niftis_folder, fname + ".nii.gz")
            ])

        _ = [i.get() for i in results]
        print("finished prediction, now evaluating...")

        task = self.dataset_directory.split("/")[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(
            pred_gt_tuples,
            labels=list(range(self.num_classes)),
            json_output_file=join(output_folder, "summary.json"),
            json_name=job_name + " val tiled %s" % (str(tiled)),
            json_author="Fabian",
            json_task=task,
            num_threads=3)
        if compute_global_dice:
            global_dice = OrderedDict()
            all_labels = list(global_fn.keys())
            for l in all_labels:
                global_dice[int(l)] = float(
                    2 * global_tp[l] /
                    (2 * global_tp[l] + global_fn[l] + global_fp[l]))
            write_json(global_dice, join(output_folder, "global_dice.json"))
Exemple #10
0
def ensemble(training_output_folder1, training_output_folder2, output_folder,
             task, validation_folder, folds):
    print("\nEnsembling folders\n", training_output_folder1, "\n",
          training_output_folder2)

    output_folder_base = output_folder
    output_folder = join(output_folder_base, "ensembled_raw")

    # only_keep_largest_connected_component is the same for all stages
    dataset_directory = join(preprocessing_output_dir, task)
    plans = load_pickle(join(training_output_folder1,
                             "plans.pkl"))  # we need this only for the labels

    files1 = []
    files2 = []
    property_files = []
    out_files = []
    gt_segmentations = []

    folder_with_gt_segs = join(dataset_directory, "gt_segmentations")

    for f in folds:
        validation_folder_net1 = join(training_output_folder1, "fold_%d" % f,
                                      validation_folder)
        validation_folder_net2 = join(training_output_folder2, "fold_%d" % f,
                                      validation_folder)
        patient_identifiers1 = subfiles(validation_folder_net1, False, None,
                                        'npz', True)
        patient_identifiers2 = subfiles(validation_folder_net2, False, None,
                                        'npz', True)
        # we don't do postprocessing anymore so there should not be any of that noPostProcess
        patient_identifiers1_nii = [
            i for i in subfiles(validation_folder_net1,
                                False,
                                None,
                                suffix='nii.gz',
                                sort=True)
            if not i.endswith("noPostProcess.nii.gz")
            and not i.endswith('_postprocessed.nii.gz')
        ]
        patient_identifiers2_nii = [
            i for i in subfiles(validation_folder_net2,
                                False,
                                None,
                                suffix='nii.gz',
                                sort=True)
            if not i.endswith("noPostProcess.nii.gz")
            and not i.endswith('_postprocessed.nii.gz')
        ]
        assert len(patient_identifiers1) == len(
            patient_identifiers1_nii
        ), "npz seem to be missing. run validation with --npz"
        assert len(patient_identifiers1) == len(
            patient_identifiers1_nii
        ), "npz seem to be missing. run validation with --npz"
        assert all([
            i[:-4] == j[:-7]
            for i, j in zip(patient_identifiers1, patient_identifiers1_nii)
        ]), "npz seem to be missing. run validation with --npz"
        assert all([
            i[:-4] == j[:-7]
            for i, j in zip(patient_identifiers2, patient_identifiers2_nii)
        ]), "npz seem to be missing. run validation with --npz"

        all_patient_identifiers = patient_identifiers1
        for p in patient_identifiers2:
            if p not in all_patient_identifiers:
                all_patient_identifiers.append(p)

        # assert these patients exist for both methods
        assert all([
            isfile(join(validation_folder_net1, i))
            for i in all_patient_identifiers
        ])
        assert all([
            isfile(join(validation_folder_net2, i))
            for i in all_patient_identifiers
        ])

        maybe_mkdir_p(output_folder)

        for p in all_patient_identifiers:
            files1.append(join(validation_folder_net1, p))
            files2.append(join(validation_folder_net2, p))
            property_files.append(join(validation_folder_net1, p)[:-3] + "pkl")
            out_files.append(join(output_folder, p[:-4] + ".nii.gz"))
            gt_segmentations.append(
                join(folder_with_gt_segs, p[:-4] + ".nii.gz"))

    p = Pool(default_num_threads)
    p.map(merge, zip(files1, files2, property_files, out_files))
    p.close()
    p.join()

    if not isfile(join(output_folder, "summary.json")) and len(out_files) > 0:
        aggregate_scores(tuple(zip(out_files, gt_segmentations)),
                         labels=plans['all_classes'],
                         json_output_file=join(output_folder, "summary.json"),
                         json_task=task,
                         json_name=task + "__" +
                         output_folder_base.split("/")[-1],
                         num_threads=default_num_threads)

    if not isfile(join(output_folder_base, "postprocessing.json")):
        determine_postprocessing(output_folder_base,
                                 folder_with_gt_segs,
                                 "ensembled_raw",
                                 "temp",
                                 "ensembled_postprocessed",
                                 default_num_threads,
                                 dice_threshold=0)

        out_dir_all_json = join(network_training_output_dir, "summary_jsons")
        json_out = load_json(
            join(output_folder_base, "ensembled_postprocessed",
                 "summary.json"))

        json_out["experiment_name"] = output_folder_base.split("/")[-1]
        save_json(
            json_out,
            join(output_folder_base, "ensembled_postprocessed",
                 "summary.json"))

        maybe_mkdir_p(out_dir_all_json)
        shutil.copy(
            join(output_folder_base, "ensembled_postprocessed",
                 "summary.json"),
            join(out_dir_all_json,
                 "%s__%s.json" % (task, output_folder_base.split("/")[-1])))
Exemple #11
0
    def validate(self,
                 do_mirroring: bool = True,
                 use_sliding_window: bool = True,
                 step_size: float = 0.5,
                 save_softmax: bool = True,
                 use_gaussian: bool = True,
                 overwrite: bool = True,
                 validation_folder_name: str = 'validation_raw',
                 debug: bool = False,
                 all_in_gpu: bool = False,
                 segmentation_export_kwargs: dict = None):

        current_mode = self.network.training
        self.network.eval()

        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        if segmentation_export_kwargs is None:
            if 'segmentation_export_params' in self.plans.keys():
                force_separate_z = self.plans['segmentation_export_params'][
                    'force_separate_z']
                interpolation_order = self.plans['segmentation_export_params'][
                    'interpolation_order']
                interpolation_order_z = self.plans[
                    'segmentation_export_params']['interpolation_order_z']
            else:
                force_separate_z = None
                interpolation_order = 1
                interpolation_order_z = 0
        else:
            force_separate_z = segmentation_export_kwargs['force_separate_z']
            interpolation_order = segmentation_export_kwargs[
                'interpolation_order']
            interpolation_order_z = segmentation_export_kwargs[
                'interpolation_order_z']

        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)

        if do_mirroring:
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        export_pool = Pool(2)
        results = []

        transpose_backward = self.plans.get('transpose_backward')

        for k in self.dataset_val.keys():
            properties = load_pickle(self.dataset[k]['properties_file'])
            data = np.load(self.dataset[k]['data_file'])['data']

            # concat segmentation of previous step
            seg_from_prev_stage = np.load(
                join(self.folder_with_segs_from_prev_stage,
                     k + "_segFromPrevStage.npz"))['data'][None]

            print(data.shape)
            data[-1][data[-1] == -1] = 0
            data_for_net = np.concatenate(
                (data[:-1],
                 to_one_hot(seg_from_prev_stage[0], range(1,
                                                          self.num_classes))))

            softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax(
                data_for_net,
                do_mirroring=do_mirroring,
                mirror_axes=mirror_axes,
                use_sliding_window=use_sliding_window,
                step_size=step_size,
                use_gaussian=use_gaussian,
                all_in_gpu=all_in_gpu,
                mixed_precision=self.fp16)[1]

            if transpose_backward is not None:
                transpose_backward = self.plans.get('transpose_backward')
                softmax_pred = softmax_pred.transpose(
                    [0] + [i + 1 for i in transpose_backward])

            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]

            if save_softmax:
                softmax_fname = join(output_folder, fname + ".npz")
            else:
                softmax_fname = None

            if np.prod(softmax_pred.shape) > (2e9 / 4 *
                                              0.85):  # *0.85 just to be save
                np.save(fname + ".npy", softmax_pred)
                softmax_pred = fname + ".npy"

            results.append(
                export_pool.starmap_async(
                    save_segmentation_nifti_from_softmax,
                    ((softmax_pred, join(output_folder, fname + ".nii.gz"),
                      properties, interpolation_order,
                      self.regions_class_order, None, None, softmax_fname,
                      None, force_separate_z, interpolation_order_z), )))

            pred_gt_tuples.append([
                join(output_folder, fname + ".nii.gz"),
                join(self.gt_niftis_folder, fname + ".nii.gz")
            ])

        _ = [i.get() for i in results]

        task = self.dataset_directory.split("/")[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(pred_gt_tuples,
                             labels=list(range(self.num_classes)),
                             json_output_file=join(output_folder,
                                                   "summary.json"),
                             json_name=job_name,
                             json_description="",
                             json_task=task)

        # in the old nnunet we would stop here. Now we add a postprocessing. This postprocessing can remove everything
        # except the largest connected component for each class. To see if this improves results, we do this for all
        # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will
        # have this applied during inference as well
        self.print_to_log_file("determining postprocessing")
        determine_postprocessing(self.output_folder,
                                 self.gt_niftis_folder,
                                 validation_folder_name,
                                 final_subf_name=validation_folder_name +
                                 "_postprocessed",
                                 debug=debug)
        # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed"
        # They are always in that folder, even if no postprocessing as applied!

        # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another
        # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be
        # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to
        # be used later
        gt_nifti_folder = join(self.output_folder_base, "gt_niftis")
        maybe_mkdir_p(gt_nifti_folder)
        for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"):
            success = False
            attempts = 0
            while not success and attempts < 10:
                try:
                    shutil.copy(f, gt_nifti_folder)
                    success = True
                except OSError:
                    attempts += 1
                    sleep(1)

        self.network.train(current_mode)
        export_pool.close()
        export_pool.join()
Exemple #12
0
    def validate(self,
                 do_mirroring: bool = True,
                 use_sliding_window: bool = True,
                 step_size: float = 0.5,
                 save_softmax: bool = True,
                 use_gaussian: bool = True,
                 overwrite: bool = True,
                 validation_folder_name: str = 'validation_raw',
                 debug: bool = False,
                 all_in_gpu: bool = False,
                 segmentation_export_kwargs: dict = None):
        if self.local_rank == 0:
            if isinstance(self.network, DDP):
                net = self.network.module
            else:
                net = self.network
            ds = net.do_ds
            net.do_ds = False

            current_mode = self.network.training
            self.network.eval()

            assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
            if self.dataset_val is None:
                self.load_dataset()
                self.do_split()

            if segmentation_export_kwargs is None:
                if 'segmentation_export_params' in self.plans.keys():
                    force_separate_z = self.plans[
                        'segmentation_export_params']['force_separate_z']
                    interpolation_order = self.plans[
                        'segmentation_export_params']['interpolation_order']
                    interpolation_order_z = self.plans[
                        'segmentation_export_params']['interpolation_order_z']
                else:
                    force_separate_z = None
                    interpolation_order = 1
                    interpolation_order_z = 0
            else:
                force_separate_z = segmentation_export_kwargs[
                    'force_separate_z']
                interpolation_order = segmentation_export_kwargs[
                    'interpolation_order']
                interpolation_order_z = segmentation_export_kwargs[
                    'interpolation_order_z']

            # predictions as they come from the network go here
            output_folder = join(self.output_folder, validation_folder_name)
            maybe_mkdir_p(output_folder)
            # this is for debug purposes
            my_input_args = {
                'do_mirroring': do_mirroring,
                'use_sliding_window': use_sliding_window,
                'step_size': step_size,
                'save_softmax': save_softmax,
                'use_gaussian': use_gaussian,
                'overwrite': overwrite,
                'validation_folder_name': validation_folder_name,
                'debug': debug,
                'all_in_gpu': all_in_gpu,
                'segmentation_export_kwargs': segmentation_export_kwargs,
            }
            save_json(my_input_args, join(output_folder,
                                          "validation_args.json"))

            if do_mirroring:
                if not self.data_aug_params['do_mirror']:
                    raise RuntimeError(
                        "We did not train with mirroring so you cannot do inference with mirroring enabled"
                    )
                mirror_axes = self.data_aug_params['mirror_axes']
            else:
                mirror_axes = ()

            pred_gt_tuples = []

            export_pool = Pool(default_num_threads)
            results = []

            all_keys = list(self.dataset_val.keys())
            my_keys = all_keys[self.local_rank::dist.get_world_size()]
            # we cannot simply iterate over all_keys because we need to know pred_gt_tuples and valid_labels of all cases
            # for evaluation (which is done by local rank 0)
            for k in my_keys:
                properties = load_pickle(self.dataset[k]['properties_file'])
                fname = properties['list_of_data_files'][0].split(
                    "/")[-1][:-12]
                pred_gt_tuples.append([
                    join(output_folder, fname + ".nii.gz"),
                    join(self.gt_niftis_folder, fname + ".nii.gz")
                ])
                if k in my_keys:
                    if overwrite or (not isfile(join(output_folder, fname + ".nii.gz"))) or \
                            (save_softmax and not isfile(join(output_folder, fname + ".npz"))):
                        data = np.load(self.dataset[k]['data_file'])['data']

                        print(k, data.shape)
                        data[-1][data[-1] == -1] = 0

                        softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax(
                            data[:-1],
                            do_mirroring=do_mirroring,
                            mirror_axes=mirror_axes,
                            use_sliding_window=use_sliding_window,
                            step_size=step_size,
                            use_gaussian=use_gaussian,
                            all_in_gpu=all_in_gpu,
                            mixed_precision=self.fp16)[1]

                        softmax_pred = softmax_pred.transpose(
                            [0] + [i + 1 for i in self.transpose_backward])

                        if save_softmax:
                            softmax_fname = join(output_folder, fname + ".npz")
                        else:
                            softmax_fname = None
                        """There is a problem with python process communication that prevents us from communicating obejcts
                        larger than 2 GB between processes (basically when the length of the pickle string that will be sent is
                        communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long
                        enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually
                        patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will
                        then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either
                        filename or np.ndarray and will handle this automatically"""
                        if np.prod(softmax_pred.shape) > (
                                2e9 / 4 * 0.85):  # *0.85 just to be save
                            np.save(join(output_folder, fname + ".npy"),
                                    softmax_pred)
                            softmax_pred = join(output_folder, fname + ".npy")

                        results.append(
                            export_pool.starmap_async(
                                save_segmentation_nifti_from_softmax,
                                ((softmax_pred,
                                  join(output_folder, fname + ".nii.gz"),
                                  properties, interpolation_order,
                                  self.regions_class_order, None, None,
                                  softmax_fname, None, force_separate_z,
                                  interpolation_order_z), )))

            _ = [i.get() for i in results]
            self.print_to_log_file("finished prediction")

            distributed.barrier()

            if self.local_rank == 0:
                # evaluate raw predictions
                self.print_to_log_file("evaluation of raw predictions")
                task = self.dataset_directory.split("/")[-1]
                job_name = self.experiment_name
                _ = aggregate_scores(pred_gt_tuples,
                                     labels=list(range(self.num_classes)),
                                     json_output_file=join(
                                         output_folder, "summary.json"),
                                     json_name=job_name + " val tiled %s" %
                                     (str(use_sliding_window)),
                                     json_task=task,
                                     num_threads=default_num_threads)

                # in the old nnunet we would stop here. Now we add a postprocessing. This postprocessing can remove everything
                # except the largest connected component for each class. To see if this improves results, we do this for all
                # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will
                # have this applied during inference as well
                self.print_to_log_file("determining postprocessing")
                determine_postprocessing(
                    self.output_folder,
                    self.gt_niftis_folder,
                    validation_folder_name,
                    final_subf_name=validation_folder_name + "_postprocessed",
                    debug=debug)
                # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed"
                # They are always in that folder, even if no postprocessing as applied!

                # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another
                # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be
                # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to
                # be used later
                gt_nifti_folder = join(self.output_folder_base, "gt_niftis")
                maybe_mkdir_p(gt_nifti_folder)
                for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"):
                    success = False
                    attempts = 0
                    e = None
                    while not success and attempts < 10:
                        try:
                            shutil.copy(f, gt_nifti_folder)
                            success = True
                        except OSError as e:
                            attempts += 1
                            sleep(1)
                    if not success:
                        print(
                            "Could not copy gt nifti file %s into folder %s" %
                            (f, gt_nifti_folder))
                        if e is not None:
                            raise e

            self.network.train(current_mode)
            net.do_ds = ds
    def validate(self,
                 do_mirroring=True,
                 use_train_mode=False,
                 tiled=True,
                 step=2,
                 save_softmax=True,
                 use_gaussian=True,
                 validation_folder_name='validation'):
        """

        :param do_mirroring:
        :param use_train_mode:
        :param mirror_axes:
        :param tiled:
        :param tile_in_z:
        :param step:
        :param use_nifti:
        :param save_softmax:
        :param use_gaussian:
        :param use_temporal_models:
        :return:
        """
        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)

        if do_mirroring:
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        process_manager = Pool(2)
        results = []

        transpose_backward = self.plans.get('transpose_backward')

        for k in self.dataset_val.keys():
            properties = self.dataset[k]['properties']
            data = np.load(self.dataset[k]['data_file'])['data']

            # concat segmentation of previous step
            seg_from_prev_stage = np.load(
                join(self.folder_with_segs_from_prev_stage,
                     k + "_segFromPrevStage.npz"))['data'][None]

            print(data.shape)
            data[-1][data[-1] == -1] = 0
            data_for_net = np.concatenate(
                (data[:-1],
                 to_one_hot(seg_from_prev_stage[0], range(1,
                                                          self.num_classes))))
            softmax_pred = self.predict_preprocessed_data_return_softmax(
                data_for_net,
                do_mirroring,
                1,
                use_train_mode,
                1,
                mirror_axes,
                tiled,
                True,
                step,
                self.patch_size,
                use_gaussian=use_gaussian)

            if transpose_backward is not None:
                transpose_backward = self.plans.get('transpose_backward')
                softmax_pred = softmax_pred.transpose(
                    [0] + [i + 1 for i in transpose_backward])

            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]

            if save_softmax:
                softmax_fname = join(output_folder, fname + ".npz")
            else:
                softmax_fname = None
            """There is a problem with python process communication that prevents us from communicating obejcts 
            larger than 2 GB between processes (basically when the length of the pickle string that will be sent is 
            communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long 
            enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually 
            patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will 
            then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either 
            filename or np.ndarray and will handle this automatically"""
            if np.prod(softmax_pred.shape) > (2e9 / 4 *
                                              0.9):  # *0.9 just to be save
                np.save(fname + ".npy", softmax_pred)
                softmax_pred = fname + ".npy"
            results.append(
                process_manager.starmap_async(
                    save_segmentation_nifti_from_softmax,
                    ((softmax_pred, join(
                        output_folder, fname + ".nii.gz"), properties, 1, None,
                      None, None, softmax_fname, None), )))

            pred_gt_tuples.append([
                join(output_folder, fname + ".nii.gz"),
                join(self.gt_niftis_folder, fname + ".nii.gz")
            ])

        _ = [i.get() for i in results]

        task = self.dataset_directory.split("/")[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(pred_gt_tuples,
                             labels=list(range(self.num_classes)),
                             json_output_file=join(output_folder,
                                                   "summary.json"),
                             json_name=job_name,
                             json_author="Fabian",
                             json_description="",
                             json_task=task)
Exemple #14
0
    def validate(self,
                 do_mirroring: bool = True,
                 use_sliding_window: bool = True,
                 step_size: float = 0.5,
                 save_softmax: bool = True,
                 use_gaussian: bool = True,
                 overwrite: bool = True,
                 validation_folder_name: str = 'validation_raw',
                 debug: bool = False,
                 all_in_gpu: bool = False,
                 segmentation_export_kwargs: dict = None,
                 postprocessing: bool = False):
        current_mode = self.network.training
        self.network.eval()

        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
        # if self.dataset_val is None:
        #     self.load_dataset()
        #     self.do_split()

        if segmentation_export_kwargs is None:
            if 'segmentation_export_params' in self.plans.keys():
                force_separate_z = self.plans['segmentation_export_params'][
                    'force_separate_z']
                interpolation_order = self.plans['segmentation_export_params'][
                    'interpolation_order']
                interpolation_order_z = self.plans[
                    'segmentation_export_params']['interpolation_order_z']
            else:
                force_separate_z = None
                interpolation_order = 1
                interpolation_order_z = 0
        else:
            force_separate_z = segmentation_export_kwargs['force_separate_z']
            interpolation_order = segmentation_export_kwargs[
                'interpolation_order']
            interpolation_order_z = segmentation_export_kwargs[
                'interpolation_order_z']

        # predictions as they come from the network go here
        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)
        # this is for debug purposes
        my_input_args = {
            'do_mirroring': do_mirroring,
            'use_sliding_window': use_sliding_window,
            'step_size': step_size,
            'save_softmax': save_softmax,
            'use_gaussian': use_gaussian,
            'overwrite': overwrite,
            'validation_folder_name': validation_folder_name,
            'debug': debug,
            'all_in_gpu': all_in_gpu,
            'segmentation_export_kwargs': segmentation_export_kwargs,
        }
        save_json(my_input_args, join(output_folder, "validation_args.json"))

        if do_mirroring:
            if not self.data_aug_params['do_mirror']:
                raise RuntimeError(
                    "We did not train with mirroring so you cannot do inference with mirroring enabled"
                )
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        export_pool = Pool(default_num_threads)
        results = []

        for k in self.dataset_val.keys():
            properties = load_pickle(self.dataset[k]['properties_file'])
            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]
            if overwrite or (not isfile(join(output_folder, fname + ".nii.gz"))) or \
                    (save_softmax and not isfile(join(output_folder, fname + ".npz"))):
                data = np.load(self.dataset[k]['data_file'])['data']
                print(k, data.shape)
                data[-1][data[-1] == -1] = 0
                softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax(
                    data[:-1],
                    do_mirroring=do_mirroring,
                    mirror_axes=mirror_axes,
                    use_sliding_window=use_sliding_window,
                    step_size=step_size,
                    use_gaussian=use_gaussian,
                    all_in_gpu=all_in_gpu,
                    mixed_precision=self.fp16)[1]

                softmax_pred = softmax_pred.transpose(
                    [0] + [i + 1 for i in self.transpose_backward])
                if save_softmax:
                    softmax_fname = join(output_folder, fname + ".npz")
                else:
                    softmax_fname = None

                if np.prod(softmax_pred.shape) > (
                        2e9 / 4 * 0.85):  # *0.85 just to be save
                    np.save(join(output_folder, fname + ".npy"), softmax_pred)
                    softmax_pred = join(output_folder, fname + ".npy")

                results.append(
                    export_pool.starmap_async(
                        save_segmentation_nifti_from_softmax,
                        ((softmax_pred, join(output_folder, fname + ".nii.gz"),
                          properties, interpolation_order,
                          self.regions_class_order, None, None, softmax_fname,
                          None, force_separate_z, interpolation_order_z), )))

            pred_gt_tuples.append([
                join(output_folder, fname + ".nii.gz"),
                join(self.gt_niftis_folder, fname + ".nii.gz")
            ])

        _ = [i.get() for i in results]
        self.print_to_log_file("finished prediction")

        # evaluate raw predictions
        self.print_to_log_file("evaluation of raw predictions")
        task = self.dataset_directory.split("/")[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(
            pred_gt_tuples,
            labels=list(range(self.num_classes)),
            json_output_file=join(output_folder, "summary.json"),
            json_name=job_name + " val tiled %s" % (str(use_sliding_window)),
            json_task=task,
            num_threads=default_num_threads)

        if postprocessing:
            self.print_to_log_file("determining postprocessing")
            # output in final_subf_name("validation_raw__postprocessed")
            determine_postprocessing(self.output_folder,
                                     self.gt_niftis_folder,
                                     validation_folder_name,
                                     final_subf_name=validation_folder_name +
                                     "_postprocessed",
                                     debug=debug)

            gt_nifti_folder = join(self.output_folder_base, "gt_niftis")
            maybe_mkdir_p(gt_nifti_folder)
            for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"):
                success = False
                attempts = 0
                e = None
                while not success and attempts < 10:
                    try:
                        shutil.copy(f, gt_nifti_folder)
                        success = True
                    except OSError as e:
                        attempts += 1
                        sleep(1)
                if not success:
                    print("Could not copy gt nifti file %s into folder %s" %
                          (f, gt_nifti_folder))
                    if e is not None:
                        raise e

        self.network.train(current_mode)