def merge(folders, output_folder, threads, override=True, postprocessing_file=None, store_npz=False): maybe_mkdir_p(output_folder) if postprocessing_file is not None: output_folder_orig = deepcopy(output_folder) output_folder = join(output_folder, 'not_postprocessed') maybe_mkdir_p(output_folder) else: output_folder_orig = None patient_ids = [subfiles(i, suffix=".npz", join=False) for i in folders] patient_ids = [i for j in patient_ids for i in j] patient_ids = [i[:-4] for i in patient_ids] patient_ids = np.unique(patient_ids) for f in folders: assert all([isfile(join(f, i + ".npz")) for i in patient_ids]), "Not all patient npz are available in " \ "all folders" assert all([isfile(join(f, i + ".pkl")) for i in patient_ids]), "Not all patient pkl are available in " \ "all folders" files = [] property_files = [] out_files = [] for p in patient_ids: files.append([join(f, p + ".npz") for f in folders]) property_files.append(join(folders[0], p + ".pkl")) out_files.append(join(output_folder, p + ".nii.gz")) plans = load_pickle(join(folders[0], "plans.pkl")) only_keep_largest_connected_component, min_region_size_per_class = plans['keep_only_largest_region'], \ plans['min_region_size_per_class'] p = Pool(threads) p.map( merge_files, zip(files, property_files, out_files, [only_keep_largest_connected_component] * len(out_files), [min_region_size_per_class] * len(out_files), [override] * len(out_files), [store_npz] * len(out_files))) p.close() p.join() if postprocessing_file is not None: for_which_classes, min_valid_obj_size = load_postprocessing( postprocessing_file) print('Postprocessing...') apply_postprocessing_to_folder(output_folder, output_folder_orig, for_which_classes, min_valid_obj_size, threads) shutil.copy(postprocessing_file, output_folder_orig)
def merge(folders, output_folder, threads, override=True, postprocessing_file=None, store_npz=False): os.makedirs(output_folder, exist_ok=True) if postprocessing_file is not None: output_folder_orig = deepcopy(output_folder) output_folder = join(output_folder, 'not_postprocessed') os.makedirs(output_folder, exist_ok=True) else: output_folder_orig = None patient_ids = [subfiles(i, suffix=".npz", join=False) for i in folders] patient_ids = [i for j in patient_ids for i in j] patient_ids = [i[:-4] for i in patient_ids] patient_ids = np.unique(patient_ids) for f in folders: assert all([isfile(join(f, i + ".npz")) for i in patient_ids]), "Not all patient npz are available in " \ "all folders" assert all([isfile(join(f, i + ".pkl")) for i in patient_ids]), "Not all patient pkl are available in " \ "all folders" files = [] property_files = [] out_files = [] for p in patient_ids: files.append([join(f, p + ".npz") for f in folders]) property_files.append([join(f, p + ".pkl") for f in folders]) out_files.append(join(output_folder, p + ".nii.gz")) p = Pool(threads) p.starmap( merge_files, zip(files, property_files, out_files, [override] * len(out_files), [store_npz] * len(out_files))) p.close() p.join() if postprocessing_file is not None: for_which_classes, min_valid_obj_size = load_postprocessing( postprocessing_file) print('Postprocessing...') apply_postprocessing_to_folder(output_folder, output_folder_orig, for_which_classes, min_valid_obj_size, threads) shutil.copy(postprocessing_file, output_folder_orig)
def predict_cases_fastest(model, list_of_lists, output_filenames, folds, num_threads_preprocessing, num_threads_nifti_save, segs_from_prev_stage=None, do_tta=True, mixed_precision=True, overwrite_existing=False, all_in_gpu=True, step_size=0.5, checkpoint_name="model_final_checkpoint"): assert len(list_of_lists) == len(output_filenames) if segs_from_prev_stage is not None: assert len(segs_from_prev_stage) == len(output_filenames) pool = Pool(num_threads_nifti_save) results = [] cleaned_output_files = [] for o in output_filenames: dr, f = os.path.split(o) if len(dr) > 0: maybe_mkdir_p(dr) if not f.endswith(".nii.gz"): f, _ = os.path.splitext(f) f = f + ".nii.gz" cleaned_output_files.append(join(dr, f)) if not overwrite_existing: print("number of cases:", len(list_of_lists)) not_done_idx = [ i for i, j in enumerate(cleaned_output_files) if not isfile(j) ] cleaned_output_files = [cleaned_output_files[i] for i in not_done_idx] list_of_lists = [list_of_lists[i] for i in not_done_idx] if segs_from_prev_stage is not None: segs_from_prev_stage = [ segs_from_prev_stage[i] for i in not_done_idx ] print("number of cases that still need to be predicted:", len(cleaned_output_files)) print("emptying cuda cache") torch.cuda.empty_cache() print("loading parameters for folds,", folds) trainer, params = load_model_and_checkpoint_files( model, folds, mixed_precision=mixed_precision, checkpoint_name=checkpoint_name) print("starting preprocessing generator") preprocessing = preprocess_multithreaded(trainer, list_of_lists, cleaned_output_files, num_threads_preprocessing, segs_from_prev_stage) print("starting prediction...") for preprocessed in preprocessing: print("getting data from preprocessor") output_filename, (d, dct) = preprocessed print("got something") if isinstance(d, str): print("what I got is a string, so I need to load a file") data = np.load(d) os.remove(d) d = data # preallocate the output arrays # same dtype as the return value in predict_preprocessed_data_return_seg_and_softmax (saves time) all_softmax_outputs = np.zeros( (len(params), trainer.num_classes, *d.shape[1:]), dtype=np.float16) all_seg_outputs = np.zeros((len(params), *d.shape[1:]), dtype=int) print("predicting", output_filename) for i, p in enumerate(params): trainer.load_checkpoint_ram(p, False) res = trainer.predict_preprocessed_data_return_seg_and_softmax( d, do_mirroring=do_tta, mirror_axes=trainer.data_aug_params['mirror_axes'], use_sliding_window=True, step_size=step_size, use_gaussian=True, all_in_gpu=all_in_gpu, mixed_precision=mixed_precision) if len(params) > 1: # otherwise we dont need this and we can save ourselves the time it takes to copy that all_softmax_outputs[i] = res[1] all_seg_outputs[i] = res[0] print("aggregating predictions") if len(params) > 1: softmax_mean = np.mean(all_softmax_outputs, 0) seg = softmax_mean.argmax(0) else: seg = all_seg_outputs[0] print("applying transpose_backward") transpose_forward = trainer.plans.get('transpose_forward') if transpose_forward is not None: transpose_backward = trainer.plans.get('transpose_backward') seg = seg.transpose([i for i in transpose_backward]) print("initializing segmentation export") results.append( pool.starmap_async(save_segmentation_nifti, ((seg, output_filename, dct, 0, None), ))) print("done") print( "inference done. Now waiting for the segmentation export to finish...") _ = [i.get() for i in results] # now apply postprocessing # first load the postprocessing properties if they are present. Else raise a well visible warning results = [] pp_file = join(model, "postprocessing.json") if isfile(pp_file): print("postprocessing...") shutil.copy(pp_file, os.path.dirname(output_filenames[0])) # for_which_classes stores for which of the classes everything but the largest connected component needs to be # removed for_which_classes, min_valid_obj_size = load_postprocessing(pp_file) results.append( pool.starmap_async( load_remove_save, zip(output_filenames, output_filenames, [for_which_classes] * len(output_filenames), [min_valid_obj_size] * len(output_filenames)))) _ = [i.get() for i in results] else: print( "WARNING! Cannot run postprocessing because the postprocessing file is missing. Make sure to run " "consolidate_folds in the output folder of the model first!\nThe folder you need to run this in is " "%s" % model) pool.close() pool.join()
def predict_cases(model, list_of_lists, output_filenames, folds, save_npz, num_threads_preprocessing, num_threads_nifti_save, segs_from_prev_stage=None, do_tta=True, mixed_precision=True, overwrite_existing=False, all_in_gpu=False, step_size=0.5, checkpoint_name="model_final_checkpoint", segmentation_export_kwargs: dict = None, disable_sliding_window: bool = False): """ :param segmentation_export_kwargs: :param model: folder where the model is saved, must contain fold_x subfolders :param list_of_lists: [[case0_0000.nii.gz, case0_0001.nii.gz], [case1_0000.nii.gz, case1_0001.nii.gz], ...] :param output_filenames: [output_file_case0.nii.gz, output_file_case1.nii.gz, ...] :param folds: default: (0, 1, 2, 3, 4) (but can also be 'all' or a subset of the five folds, for example use (0, ) for using only fold_0 :param save_npz: default: False :param num_threads_preprocessing: :param num_threads_nifti_save: :param segs_from_prev_stage: :param do_tta: default: True, can be set to False for a 8x speedup at the cost of a reduced segmentation quality :param overwrite_existing: default: True :param mixed_precision: if None then we take no action. If True/False we overwrite what the model has in its init :return: """ assert len(list_of_lists) == len(output_filenames) if segs_from_prev_stage is not None: assert len(segs_from_prev_stage) == len(output_filenames) pool = Pool(num_threads_nifti_save) results = [] cleaned_output_files = [] for o in output_filenames: dr, f = os.path.split(o) if len(dr) > 0: maybe_mkdir_p(dr) if not f.endswith(".nii.gz"): f, _ = os.path.splitext(f) f = f + ".nii.gz" cleaned_output_files.append(join(dr, f)) if not overwrite_existing: print("number of cases:", len(list_of_lists)) # if save_npz=True then we should also check for missing npz files not_done_idx = [ i for i, j in enumerate(cleaned_output_files) if (not isfile(j)) or (save_npz and not isfile(j[:-7] + '.npz')) ] cleaned_output_files = [cleaned_output_files[i] for i in not_done_idx] list_of_lists = [list_of_lists[i] for i in not_done_idx] if segs_from_prev_stage is not None: segs_from_prev_stage = [ segs_from_prev_stage[i] for i in not_done_idx ] print("number of cases that still need to be predicted:", len(cleaned_output_files)) print("emptying cuda cache") torch.cuda.empty_cache() print("loading parameters for folds,", folds) trainer, params = load_model_and_checkpoint_files( model, folds, mixed_precision=mixed_precision, checkpoint_name=checkpoint_name) if segmentation_export_kwargs is None: if 'segmentation_export_params' in trainer.plans.keys(): force_separate_z = trainer.plans['segmentation_export_params'][ 'force_separate_z'] interpolation_order = trainer.plans['segmentation_export_params'][ 'interpolation_order'] interpolation_order_z = trainer.plans[ 'segmentation_export_params']['interpolation_order_z'] else: force_separate_z = None interpolation_order = 1 interpolation_order_z = 0 else: force_separate_z = segmentation_export_kwargs['force_separate_z'] interpolation_order = segmentation_export_kwargs['interpolation_order'] interpolation_order_z = segmentation_export_kwargs[ 'interpolation_order_z'] print("starting preprocessing generator") preprocessing = preprocess_multithreaded(trainer, list_of_lists, cleaned_output_files, num_threads_preprocessing, segs_from_prev_stage) print("starting prediction...") all_output_files = [] for preprocessed in preprocessing: output_filename, (d, dct) = preprocessed all_output_files.append(all_output_files) if isinstance(d, str): data = np.load(d) os.remove(d) d = data print("predicting", output_filename) softmax = [] for p in params: trainer.load_checkpoint_ram(p, False) softmax.append( trainer.predict_preprocessed_data_return_seg_and_softmax( d, do_mirroring=do_tta, mirror_axes=trainer.data_aug_params['mirror_axes'], use_sliding_window=not disable_sliding_window, step_size=step_size, use_gaussian=True, all_in_gpu=all_in_gpu, mixed_precision=mixed_precision)[1][None]) softmax = np.vstack(softmax) softmax_mean = np.mean(softmax, 0) transpose_forward = trainer.plans.get('transpose_forward') if transpose_forward is not None: transpose_backward = trainer.plans.get('transpose_backward') softmax_mean = softmax_mean.transpose( [0] + [i + 1 for i in transpose_backward]) if save_npz: npz_file = output_filename[:-7] + ".npz" else: npz_file = None if hasattr(trainer, 'regions_class_order'): region_class_order = trainer.regions_class_order else: region_class_order = None """There is a problem with python process communication that prevents us from communicating obejcts larger than 2 GB between processes (basically when the length of the pickle string that will be sent is communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either filename or np.ndarray and will handle this automatically""" bytes_per_voxel = 4 if all_in_gpu: bytes_per_voxel = 2 # if all_in_gpu then the return value is half (float16) if np.prod(softmax_mean.shape) > (2e9 / bytes_per_voxel * 0.85): # * 0.85 just to be save print( "This output is too large for python process-process communication. Saving output temporarily to disk" ) np.save(output_filename[:-7] + ".npy", softmax_mean) softmax_mean = output_filename[:-7] + ".npy" results.append( pool.starmap_async( save_segmentation_nifti_from_softmax, ((softmax_mean, output_filename, dct, interpolation_order, region_class_order, None, None, npz_file, None, force_separate_z, interpolation_order_z), ))) print( "inference done. Now waiting for the segmentation export to finish...") _ = [i.get() for i in results] # now apply postprocessing # first load the postprocessing properties if they are present. Else raise a well visible warning results = [] pp_file = join(model, "postprocessing.json") if isfile(pp_file): print("postprocessing...") shutil.copy(pp_file, os.path.abspath(os.path.dirname(output_filenames[0]))) # for_which_classes stores for which of the classes everything but the largest connected component needs to be # removed for_which_classes, min_valid_obj_size = load_postprocessing(pp_file) results.append( pool.starmap_async( load_remove_save, zip(output_filenames, output_filenames, [for_which_classes] * len(output_filenames), [min_valid_obj_size] * len(output_filenames)))) _ = [i.get() for i in results] else: print( "WARNING! Cannot run postprocessing because the postprocessing file is missing. Make sure to run " "consolidate_folds in the output folder of the model first!\nThe folder you need to run this in is " "%s" % model) pool.close() pool.join()
def convert_variant2_predicted_test_to_submission_format( folder_with_predictions, output_folder="/home/fabian/drives/datasets/results/nnUNet/test_sets/Task038_CHAOS_Task_3_5_Variant2/ready_to_submit", postprocessing_file="/home/fabian/drives/datasets/results/nnUNet/ensembles/Task038_CHAOS_Task_3_5_Variant2/ensemble_2d__nnUNetTrainerV2__nnUNetPlansv2.1--3d_fullres__nnUNetTrainerV2__nnUNetPlansv2.1/postprocessing.json" ): """ output_folder is where the extracted template is :param folder_with_predictions: :param output_folder: :return: """ postprocessing_file = "/media/fabian/Results/nnUNet/3d_fullres/Task039_CHAOS_Task_3_5_Variant2_highres/" \ "nnUNetTrainerV2__nnUNetPlansfixed/postprocessing.json" # variant 2 treats in and out phase as two training examples, so we need to ensemble these two again final_predictions_folder = join(output_folder, "final") maybe_mkdir_p(final_predictions_folder) t1_patient_names = [ i.split("_")[-1][:-7] for i in subfiles( folder_with_predictions, prefix="T1", suffix=".nii.gz", join=False) ] folder_for_ensembing0 = join(output_folder, "ens0") folder_for_ensembing1 = join(output_folder, "ens1") maybe_mkdir_p(folder_for_ensembing0) maybe_mkdir_p(folder_for_ensembing1) # now copy all t1 out phases in ens0 and all in phases in ens1. Name them the same. for t1 in t1_patient_names: shutil.copy(join(folder_with_predictions, "T1_in_%s.npz" % t1), join(folder_for_ensembing1, "T1_%s.npz" % t1)) shutil.copy(join(folder_with_predictions, "T1_in_%s.pkl" % t1), join(folder_for_ensembing1, "T1_%s.pkl" % t1)) shutil.copy(join(folder_with_predictions, "T1_out_%s.npz" % t1), join(folder_for_ensembing0, "T1_%s.npz" % t1)) shutil.copy(join(folder_with_predictions, "T1_out_%s.pkl" % t1), join(folder_for_ensembing0, "T1_%s.pkl" % t1)) shutil.copy(join(folder_with_predictions, "plans.pkl"), join(folder_for_ensembing0, "plans.pkl")) shutil.copy(join(folder_with_predictions, "plans.pkl"), join(folder_for_ensembing1, "plans.pkl")) # there is a problem with T1_35 that I need to correct manually (different crop size, will not negatively impact results) #ens0_softmax = np.load(join(folder_for_ensembing0, "T1_35.npz"))['softmax'] ens1_softmax = np.load(join(folder_for_ensembing1, "T1_35.npz"))['softmax'] #ens0_props = load_pickle(join(folder_for_ensembing0, "T1_35.pkl")) #ens1_props = load_pickle(join(folder_for_ensembing1, "T1_35.pkl")) ens1_softmax = ens1_softmax[:, :, :-1, :] np.savez_compressed(join(folder_for_ensembing1, "T1_35.npz"), softmax=ens1_softmax) shutil.copy(join(folder_for_ensembing0, "T1_35.pkl"), join(folder_for_ensembing1, "T1_35.pkl")) # now call my ensemble function merge((folder_for_ensembing0, folder_for_ensembing1), final_predictions_folder, 8, True, postprocessing_file=postprocessing_file) # copy t2 files to final_predictions_folder as well t2_files = subfiles(folder_with_predictions, prefix="T2", suffix=".nii.gz", join=False) for t2 in t2_files: shutil.copy(join(folder_with_predictions, t2), join(final_predictions_folder, t2)) # apply postprocessing from nnunet.postprocessing.connected_components import apply_postprocessing_to_folder, load_postprocessing postprocessed_folder = join(output_folder, "final_postprocessed") for_which_classes, min_valid_obj_size = load_postprocessing( postprocessing_file) apply_postprocessing_to_folder(final_predictions_folder, postprocessed_folder, for_which_classes, min_valid_obj_size, 8) # now export the niftis in the weird png format # task 3 output_dir = join(output_folder, "CHAOS_submission_template_new", "Task3", "MR") for t1 in t1_patient_names: output_folder_here = join(output_dir, t1, "T1DUAL", "Results") nifti_file = join(postprocessed_folder, "T1_%s.nii.gz" % t1) write_pngs_from_nifti(nifti_file, output_folder_here, converter=convert_seg_to_intensity_task3) for t2 in t2_files: patname = t2.split("_")[-1][:-7] output_folder_here = join(output_dir, patname, "T2SPIR", "Results") nifti_file = join(postprocessed_folder, "T2_%s.nii.gz" % patname) write_pngs_from_nifti(nifti_file, output_folder_here, converter=convert_seg_to_intensity_task3) # task 5 output_dir = join(output_folder, "CHAOS_submission_template_new", "Task5", "MR") for t1 in t1_patient_names: output_folder_here = join(output_dir, t1, "T1DUAL", "Results") nifti_file = join(postprocessed_folder, "T1_%s.nii.gz" % t1) write_pngs_from_nifti(nifti_file, output_folder_here, converter=convert_seg_to_intensity_task5) for t2 in t2_files: patname = t2.split("_")[-1][:-7] output_folder_here = join(output_dir, patname, "T2SPIR", "Results") nifti_file = join(postprocessed_folder, "T2_%s.nii.gz" % patname) write_pngs_from_nifti(nifti_file, output_folder_here, converter=convert_seg_to_intensity_task5)