Ejemplo n.º 1
0
def merge(folders,
          output_folder,
          threads,
          override=True,
          postprocessing_file=None,
          store_npz=False):
    maybe_mkdir_p(output_folder)

    if postprocessing_file is not None:
        output_folder_orig = deepcopy(output_folder)
        output_folder = join(output_folder, 'not_postprocessed')
        maybe_mkdir_p(output_folder)
    else:
        output_folder_orig = None

    patient_ids = [subfiles(i, suffix=".npz", join=False) for i in folders]
    patient_ids = [i for j in patient_ids for i in j]
    patient_ids = [i[:-4] for i in patient_ids]
    patient_ids = np.unique(patient_ids)

    for f in folders:
        assert all([isfile(join(f, i + ".npz")) for i in patient_ids]), "Not all patient npz are available in " \
                                                                        "all folders"
        assert all([isfile(join(f, i + ".pkl")) for i in patient_ids]), "Not all patient pkl are available in " \
                                                                        "all folders"

    files = []
    property_files = []
    out_files = []
    for p in patient_ids:
        files.append([join(f, p + ".npz") for f in folders])
        property_files.append(join(folders[0], p + ".pkl"))
        out_files.append(join(output_folder, p + ".nii.gz"))

    plans = load_pickle(join(folders[0], "plans.pkl"))

    only_keep_largest_connected_component, min_region_size_per_class = plans['keep_only_largest_region'], \
                                                                       plans['min_region_size_per_class']
    p = Pool(threads)
    p.map(
        merge_files,
        zip(files, property_files, out_files,
            [only_keep_largest_connected_component] * len(out_files),
            [min_region_size_per_class] * len(out_files),
            [override] * len(out_files), [store_npz] * len(out_files)))
    p.close()
    p.join()

    if postprocessing_file is not None:
        for_which_classes, min_valid_obj_size = load_postprocessing(
            postprocessing_file)
        print('Postprocessing...')
        apply_postprocessing_to_folder(output_folder, output_folder_orig,
                                       for_which_classes, min_valid_obj_size,
                                       threads)
        shutil.copy(postprocessing_file, output_folder_orig)
Ejemplo n.º 2
0
def merge(folders,
          output_folder,
          threads,
          override=True,
          postprocessing_file=None,
          store_npz=False):
    os.makedirs(output_folder, exist_ok=True)

    if postprocessing_file is not None:
        output_folder_orig = deepcopy(output_folder)
        output_folder = join(output_folder, 'not_postprocessed')
        os.makedirs(output_folder, exist_ok=True)
    else:
        output_folder_orig = None

    patient_ids = [subfiles(i, suffix=".npz", join=False) for i in folders]
    patient_ids = [i for j in patient_ids for i in j]
    patient_ids = [i[:-4] for i in patient_ids]
    patient_ids = np.unique(patient_ids)

    for f in folders:
        assert all([isfile(join(f, i + ".npz")) for i in patient_ids]), "Not all patient npz are available in " \
                                                                        "all folders"
        assert all([isfile(join(f, i + ".pkl")) for i in patient_ids]), "Not all patient pkl are available in " \
                                                                        "all folders"

    files = []
    property_files = []
    out_files = []
    for p in patient_ids:
        files.append([join(f, p + ".npz") for f in folders])
        property_files.append([join(f, p + ".pkl") for f in folders])
        out_files.append(join(output_folder, p + ".nii.gz"))

    p = Pool(threads)
    p.starmap(
        merge_files,
        zip(files, property_files, out_files, [override] * len(out_files),
            [store_npz] * len(out_files)))
    p.close()
    p.join()

    if postprocessing_file is not None:
        for_which_classes, min_valid_obj_size = load_postprocessing(
            postprocessing_file)
        print('Postprocessing...')
        apply_postprocessing_to_folder(output_folder, output_folder_orig,
                                       for_which_classes, min_valid_obj_size,
                                       threads)
        shutil.copy(postprocessing_file, output_folder_orig)
Ejemplo n.º 3
0
def predict_cases_fastest(model,
                          list_of_lists,
                          output_filenames,
                          folds,
                          num_threads_preprocessing,
                          num_threads_nifti_save,
                          segs_from_prev_stage=None,
                          do_tta=True,
                          mixed_precision=True,
                          overwrite_existing=False,
                          all_in_gpu=True,
                          step_size=0.5,
                          checkpoint_name="model_final_checkpoint"):
    assert len(list_of_lists) == len(output_filenames)
    if segs_from_prev_stage is not None:
        assert len(segs_from_prev_stage) == len(output_filenames)

    pool = Pool(num_threads_nifti_save)
    results = []

    cleaned_output_files = []
    for o in output_filenames:
        dr, f = os.path.split(o)
        if len(dr) > 0:
            maybe_mkdir_p(dr)
        if not f.endswith(".nii.gz"):
            f, _ = os.path.splitext(f)
            f = f + ".nii.gz"
        cleaned_output_files.append(join(dr, f))

    if not overwrite_existing:
        print("number of cases:", len(list_of_lists))
        not_done_idx = [
            i for i, j in enumerate(cleaned_output_files) if not isfile(j)
        ]

        cleaned_output_files = [cleaned_output_files[i] for i in not_done_idx]
        list_of_lists = [list_of_lists[i] for i in not_done_idx]
        if segs_from_prev_stage is not None:
            segs_from_prev_stage = [
                segs_from_prev_stage[i] for i in not_done_idx
            ]

        print("number of cases that still need to be predicted:",
              len(cleaned_output_files))

    print("emptying cuda cache")
    torch.cuda.empty_cache()

    print("loading parameters for folds,", folds)
    trainer, params = load_model_and_checkpoint_files(
        model,
        folds,
        mixed_precision=mixed_precision,
        checkpoint_name=checkpoint_name)

    print("starting preprocessing generator")
    preprocessing = preprocess_multithreaded(trainer, list_of_lists,
                                             cleaned_output_files,
                                             num_threads_preprocessing,
                                             segs_from_prev_stage)

    print("starting prediction...")
    for preprocessed in preprocessing:
        print("getting data from preprocessor")
        output_filename, (d, dct) = preprocessed
        print("got something")
        if isinstance(d, str):
            print("what I got is a string, so I need to load a file")
            data = np.load(d)
            os.remove(d)
            d = data

        # preallocate the output arrays
        # same dtype as the return value in predict_preprocessed_data_return_seg_and_softmax (saves time)
        all_softmax_outputs = np.zeros(
            (len(params), trainer.num_classes, *d.shape[1:]), dtype=np.float16)
        all_seg_outputs = np.zeros((len(params), *d.shape[1:]), dtype=int)
        print("predicting", output_filename)

        for i, p in enumerate(params):
            trainer.load_checkpoint_ram(p, False)
            res = trainer.predict_preprocessed_data_return_seg_and_softmax(
                d,
                do_mirroring=do_tta,
                mirror_axes=trainer.data_aug_params['mirror_axes'],
                use_sliding_window=True,
                step_size=step_size,
                use_gaussian=True,
                all_in_gpu=all_in_gpu,
                mixed_precision=mixed_precision)
            if len(params) > 1:
                # otherwise we dont need this and we can save ourselves the time it takes to copy that
                all_softmax_outputs[i] = res[1]
            all_seg_outputs[i] = res[0]

        print("aggregating predictions")
        if len(params) > 1:
            softmax_mean = np.mean(all_softmax_outputs, 0)
            seg = softmax_mean.argmax(0)
        else:
            seg = all_seg_outputs[0]

        print("applying transpose_backward")
        transpose_forward = trainer.plans.get('transpose_forward')
        if transpose_forward is not None:
            transpose_backward = trainer.plans.get('transpose_backward')
            seg = seg.transpose([i for i in transpose_backward])

        print("initializing segmentation export")
        results.append(
            pool.starmap_async(save_segmentation_nifti,
                               ((seg, output_filename, dct, 0, None), )))
        print("done")

    print(
        "inference done. Now waiting for the segmentation export to finish...")
    _ = [i.get() for i in results]
    # now apply postprocessing
    # first load the postprocessing properties if they are present. Else raise a well visible warning
    results = []
    pp_file = join(model, "postprocessing.json")
    if isfile(pp_file):
        print("postprocessing...")
        shutil.copy(pp_file, os.path.dirname(output_filenames[0]))
        # for_which_classes stores for which of the classes everything but the largest connected component needs to be
        # removed
        for_which_classes, min_valid_obj_size = load_postprocessing(pp_file)
        results.append(
            pool.starmap_async(
                load_remove_save,
                zip(output_filenames, output_filenames,
                    [for_which_classes] * len(output_filenames),
                    [min_valid_obj_size] * len(output_filenames))))
        _ = [i.get() for i in results]
    else:
        print(
            "WARNING! Cannot run postprocessing because the postprocessing file is missing. Make sure to run "
            "consolidate_folds in the output folder of the model first!\nThe folder you need to run this in is "
            "%s" % model)

    pool.close()
    pool.join()
Ejemplo n.º 4
0
def predict_cases(model,
                  list_of_lists,
                  output_filenames,
                  folds,
                  save_npz,
                  num_threads_preprocessing,
                  num_threads_nifti_save,
                  segs_from_prev_stage=None,
                  do_tta=True,
                  mixed_precision=True,
                  overwrite_existing=False,
                  all_in_gpu=False,
                  step_size=0.5,
                  checkpoint_name="model_final_checkpoint",
                  segmentation_export_kwargs: dict = None,
                  disable_sliding_window: bool = False):
    """
    :param segmentation_export_kwargs:
    :param model: folder where the model is saved, must contain fold_x subfolders
    :param list_of_lists: [[case0_0000.nii.gz, case0_0001.nii.gz], [case1_0000.nii.gz, case1_0001.nii.gz], ...]
    :param output_filenames: [output_file_case0.nii.gz, output_file_case1.nii.gz, ...]
    :param folds: default: (0, 1, 2, 3, 4) (but can also be 'all' or a subset of the five folds, for example use (0, )
    for using only fold_0
    :param save_npz: default: False
    :param num_threads_preprocessing:
    :param num_threads_nifti_save:
    :param segs_from_prev_stage:
    :param do_tta: default: True, can be set to False for a 8x speedup at the cost of a reduced segmentation quality
    :param overwrite_existing: default: True
    :param mixed_precision: if None then we take no action. If True/False we overwrite what the model has in its init
    :return:
    """
    assert len(list_of_lists) == len(output_filenames)
    if segs_from_prev_stage is not None:
        assert len(segs_from_prev_stage) == len(output_filenames)

    pool = Pool(num_threads_nifti_save)
    results = []

    cleaned_output_files = []
    for o in output_filenames:
        dr, f = os.path.split(o)
        if len(dr) > 0:
            maybe_mkdir_p(dr)
        if not f.endswith(".nii.gz"):
            f, _ = os.path.splitext(f)
            f = f + ".nii.gz"
        cleaned_output_files.append(join(dr, f))

    if not overwrite_existing:
        print("number of cases:", len(list_of_lists))
        # if save_npz=True then we should also check for missing npz files
        not_done_idx = [
            i for i, j in enumerate(cleaned_output_files)
            if (not isfile(j)) or (save_npz and not isfile(j[:-7] + '.npz'))
        ]

        cleaned_output_files = [cleaned_output_files[i] for i in not_done_idx]
        list_of_lists = [list_of_lists[i] for i in not_done_idx]
        if segs_from_prev_stage is not None:
            segs_from_prev_stage = [
                segs_from_prev_stage[i] for i in not_done_idx
            ]

        print("number of cases that still need to be predicted:",
              len(cleaned_output_files))

    print("emptying cuda cache")
    torch.cuda.empty_cache()

    print("loading parameters for folds,", folds)
    trainer, params = load_model_and_checkpoint_files(
        model,
        folds,
        mixed_precision=mixed_precision,
        checkpoint_name=checkpoint_name)

    if segmentation_export_kwargs is None:
        if 'segmentation_export_params' in trainer.plans.keys():
            force_separate_z = trainer.plans['segmentation_export_params'][
                'force_separate_z']
            interpolation_order = trainer.plans['segmentation_export_params'][
                'interpolation_order']
            interpolation_order_z = trainer.plans[
                'segmentation_export_params']['interpolation_order_z']
        else:
            force_separate_z = None
            interpolation_order = 1
            interpolation_order_z = 0
    else:
        force_separate_z = segmentation_export_kwargs['force_separate_z']
        interpolation_order = segmentation_export_kwargs['interpolation_order']
        interpolation_order_z = segmentation_export_kwargs[
            'interpolation_order_z']

    print("starting preprocessing generator")
    preprocessing = preprocess_multithreaded(trainer, list_of_lists,
                                             cleaned_output_files,
                                             num_threads_preprocessing,
                                             segs_from_prev_stage)
    print("starting prediction...")
    all_output_files = []
    for preprocessed in preprocessing:
        output_filename, (d, dct) = preprocessed
        all_output_files.append(all_output_files)
        if isinstance(d, str):
            data = np.load(d)
            os.remove(d)
            d = data

        print("predicting", output_filename)
        softmax = []
        for p in params:
            trainer.load_checkpoint_ram(p, False)
            softmax.append(
                trainer.predict_preprocessed_data_return_seg_and_softmax(
                    d,
                    do_mirroring=do_tta,
                    mirror_axes=trainer.data_aug_params['mirror_axes'],
                    use_sliding_window=not disable_sliding_window,
                    step_size=step_size,
                    use_gaussian=True,
                    all_in_gpu=all_in_gpu,
                    mixed_precision=mixed_precision)[1][None])

        softmax = np.vstack(softmax)
        softmax_mean = np.mean(softmax, 0)

        transpose_forward = trainer.plans.get('transpose_forward')
        if transpose_forward is not None:
            transpose_backward = trainer.plans.get('transpose_backward')
            softmax_mean = softmax_mean.transpose(
                [0] + [i + 1 for i in transpose_backward])

        if save_npz:
            npz_file = output_filename[:-7] + ".npz"
        else:
            npz_file = None

        if hasattr(trainer, 'regions_class_order'):
            region_class_order = trainer.regions_class_order
        else:
            region_class_order = None
        """There is a problem with python process communication that prevents us from communicating obejcts 
        larger than 2 GB between processes (basically when the length of the pickle string that will be sent is 
        communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long 
        enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually 
        patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will 
        then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either 
        filename or np.ndarray and will handle this automatically"""
        bytes_per_voxel = 4
        if all_in_gpu:
            bytes_per_voxel = 2  # if all_in_gpu then the return value is half (float16)
        if np.prod(softmax_mean.shape) > (2e9 / bytes_per_voxel *
                                          0.85):  # * 0.85 just to be save
            print(
                "This output is too large for python process-process communication. Saving output temporarily to disk"
            )
            np.save(output_filename[:-7] + ".npy", softmax_mean)
            softmax_mean = output_filename[:-7] + ".npy"

        results.append(
            pool.starmap_async(
                save_segmentation_nifti_from_softmax,
                ((softmax_mean, output_filename, dct, interpolation_order,
                  region_class_order, None, None, npz_file, None,
                  force_separate_z, interpolation_order_z), )))

    print(
        "inference done. Now waiting for the segmentation export to finish...")
    _ = [i.get() for i in results]
    # now apply postprocessing
    # first load the postprocessing properties if they are present. Else raise a well visible warning
    results = []
    pp_file = join(model, "postprocessing.json")
    if isfile(pp_file):
        print("postprocessing...")
        shutil.copy(pp_file,
                    os.path.abspath(os.path.dirname(output_filenames[0])))
        # for_which_classes stores for which of the classes everything but the largest connected component needs to be
        # removed
        for_which_classes, min_valid_obj_size = load_postprocessing(pp_file)
        results.append(
            pool.starmap_async(
                load_remove_save,
                zip(output_filenames, output_filenames,
                    [for_which_classes] * len(output_filenames),
                    [min_valid_obj_size] * len(output_filenames))))
        _ = [i.get() for i in results]
    else:
        print(
            "WARNING! Cannot run postprocessing because the postprocessing file is missing. Make sure to run "
            "consolidate_folds in the output folder of the model first!\nThe folder you need to run this in is "
            "%s" % model)

    pool.close()
    pool.join()
Ejemplo n.º 5
0
def convert_variant2_predicted_test_to_submission_format(
    folder_with_predictions,
    output_folder="/home/fabian/drives/datasets/results/nnUNet/test_sets/Task038_CHAOS_Task_3_5_Variant2/ready_to_submit",
    postprocessing_file="/home/fabian/drives/datasets/results/nnUNet/ensembles/Task038_CHAOS_Task_3_5_Variant2/ensemble_2d__nnUNetTrainerV2__nnUNetPlansv2.1--3d_fullres__nnUNetTrainerV2__nnUNetPlansv2.1/postprocessing.json"
):
    """
    output_folder is where the extracted template is
    :param folder_with_predictions:
    :param output_folder:
    :return:
    """
    postprocessing_file = "/media/fabian/Results/nnUNet/3d_fullres/Task039_CHAOS_Task_3_5_Variant2_highres/" \
                          "nnUNetTrainerV2__nnUNetPlansfixed/postprocessing.json"

    # variant 2 treats in and out phase as two training examples, so we need to ensemble these two again
    final_predictions_folder = join(output_folder, "final")
    maybe_mkdir_p(final_predictions_folder)
    t1_patient_names = [
        i.split("_")[-1][:-7] for i in subfiles(
            folder_with_predictions, prefix="T1", suffix=".nii.gz", join=False)
    ]
    folder_for_ensembing0 = join(output_folder, "ens0")
    folder_for_ensembing1 = join(output_folder, "ens1")
    maybe_mkdir_p(folder_for_ensembing0)
    maybe_mkdir_p(folder_for_ensembing1)
    # now copy all t1 out phases in ens0 and all in phases in ens1. Name them the same.
    for t1 in t1_patient_names:
        shutil.copy(join(folder_with_predictions, "T1_in_%s.npz" % t1),
                    join(folder_for_ensembing1, "T1_%s.npz" % t1))
        shutil.copy(join(folder_with_predictions, "T1_in_%s.pkl" % t1),
                    join(folder_for_ensembing1, "T1_%s.pkl" % t1))
        shutil.copy(join(folder_with_predictions, "T1_out_%s.npz" % t1),
                    join(folder_for_ensembing0, "T1_%s.npz" % t1))
        shutil.copy(join(folder_with_predictions, "T1_out_%s.pkl" % t1),
                    join(folder_for_ensembing0, "T1_%s.pkl" % t1))
    shutil.copy(join(folder_with_predictions, "plans.pkl"),
                join(folder_for_ensembing0, "plans.pkl"))
    shutil.copy(join(folder_with_predictions, "plans.pkl"),
                join(folder_for_ensembing1, "plans.pkl"))

    # there is a problem with T1_35 that I need to correct manually (different crop size, will not negatively impact results)
    #ens0_softmax = np.load(join(folder_for_ensembing0, "T1_35.npz"))['softmax']
    ens1_softmax = np.load(join(folder_for_ensembing1, "T1_35.npz"))['softmax']
    #ens0_props = load_pickle(join(folder_for_ensembing0, "T1_35.pkl"))
    #ens1_props = load_pickle(join(folder_for_ensembing1, "T1_35.pkl"))
    ens1_softmax = ens1_softmax[:, :, :-1, :]
    np.savez_compressed(join(folder_for_ensembing1, "T1_35.npz"),
                        softmax=ens1_softmax)
    shutil.copy(join(folder_for_ensembing0, "T1_35.pkl"),
                join(folder_for_ensembing1, "T1_35.pkl"))

    # now call my ensemble function
    merge((folder_for_ensembing0, folder_for_ensembing1),
          final_predictions_folder,
          8,
          True,
          postprocessing_file=postprocessing_file)
    # copy t2 files to final_predictions_folder as well
    t2_files = subfiles(folder_with_predictions,
                        prefix="T2",
                        suffix=".nii.gz",
                        join=False)
    for t2 in t2_files:
        shutil.copy(join(folder_with_predictions, t2),
                    join(final_predictions_folder, t2))

    # apply postprocessing
    from nnunet.postprocessing.connected_components import apply_postprocessing_to_folder, load_postprocessing
    postprocessed_folder = join(output_folder, "final_postprocessed")
    for_which_classes, min_valid_obj_size = load_postprocessing(
        postprocessing_file)
    apply_postprocessing_to_folder(final_predictions_folder,
                                   postprocessed_folder, for_which_classes,
                                   min_valid_obj_size, 8)

    # now export the niftis in the weird png format
    # task 3
    output_dir = join(output_folder, "CHAOS_submission_template_new", "Task3",
                      "MR")
    for t1 in t1_patient_names:
        output_folder_here = join(output_dir, t1, "T1DUAL", "Results")
        nifti_file = join(postprocessed_folder, "T1_%s.nii.gz" % t1)
        write_pngs_from_nifti(nifti_file,
                              output_folder_here,
                              converter=convert_seg_to_intensity_task3)
    for t2 in t2_files:
        patname = t2.split("_")[-1][:-7]
        output_folder_here = join(output_dir, patname, "T2SPIR", "Results")
        nifti_file = join(postprocessed_folder, "T2_%s.nii.gz" % patname)
        write_pngs_from_nifti(nifti_file,
                              output_folder_here,
                              converter=convert_seg_to_intensity_task3)

    # task 5
    output_dir = join(output_folder, "CHAOS_submission_template_new", "Task5",
                      "MR")
    for t1 in t1_patient_names:
        output_folder_here = join(output_dir, t1, "T1DUAL", "Results")
        nifti_file = join(postprocessed_folder, "T1_%s.nii.gz" % t1)
        write_pngs_from_nifti(nifti_file,
                              output_folder_here,
                              converter=convert_seg_to_intensity_task5)
    for t2 in t2_files:
        patname = t2.split("_")[-1][:-7]
        output_folder_here = join(output_dir, patname, "T2SPIR", "Results")
        nifti_file = join(postprocessed_folder, "T2_%s.nii.gz" % patname)
        write_pngs_from_nifti(nifti_file,
                              output_folder_here,
                              converter=convert_seg_to_intensity_task5)