def preprocess_predict_nifti(self, input_files, output_file=None, softmax_ouput_file=None): """ Use this to predict new data :param input_files: :param output_file: :param softmax_ouput_file: :return: """ print("preprocessing...") d, s, properties = self.preprocess_patient(input_files) print("predicting...") pred = self.predict_preprocessed_data_return_softmax( d, self.data_aug_params["mirror"], 1, False, 1, self.data_aug_params['mirror_axes'], True, True, 2, self.patch_size, True) pred = pred.transpose([0] + [i + 1 for i in self.transpose_backward]) print("resampling to original spacing and nifti export...") save_segmentation_nifti_from_softmax(pred, output_file, properties, 3, None, None, None, softmax_ouput_file, None) print("done")
def preprocess_predict_nifti(self, input_files: List[str], output_file: str = None, softmax_ouput_file: str = None, mixed_precision: bool = True) -> None: """ Use this to predict new data :param input_files: :param output_file: :param softmax_ouput_file: :param mixed_precision: :return: """ print("preprocessing...") d, s, properties = self.preprocess_patient(input_files) print("predicting...") pred = self.predict_preprocessed_data_return_seg_and_softmax(d, self.data_aug_params["do_mirror"], self.data_aug_params['mirror_axes'], True, 0.5, True, 'constant', {'constant_values': 0}, self.patch_size, True, mixed_precision=mixed_precision)[1] pred = pred.transpose([0] + [i + 1 for i in self.transpose_backward]) if 'segmentation_export_params' in self.plans.keys(): force_separate_z = self.plans['segmentation_export_params']['force_separate_z'] interpolation_order = self.plans['segmentation_export_params']['interpolation_order'] interpolation_order_z = self.plans['segmentation_export_params']['interpolation_order_z'] else: force_separate_z = None interpolation_order = 1 interpolation_order_z = 0 print("resampling to original spacing and nifti export...") save_segmentation_nifti_from_softmax(pred, output_file, properties, interpolation_order, self.regions_class_order, None, None, softmax_ouput_file, None, force_separate_z=force_separate_z, interpolation_order_z=interpolation_order_z) print("done")
def preprocess_predict_nifti(self, input_files: List[str], output_file: str = None, softmax_ouput_file: str = None) -> None: """ Use this to predict new data :param input_files: :param output_file: :param softmax_ouput_file: :return: """ print("preprocessing...") d, s, properties = self.preprocess_patient(input_files) print("predicting...") pred = self.predict_preprocessed_data_return_seg_and_softmax( d, self.data_aug_params["do_mirror"], self.data_aug_params['mirror_axes'], True, 0.5, True, 'constant', {'constant_values': 0}, self.patch_size, True)[1] pred = pred.transpose([0] + [i + 1 for i in self.transpose_backward]) print("resampling to original spacing and nifti export...") save_segmentation_nifti_from_softmax(pred, output_file, properties, 3, None, None, None, softmax_ouput_file, None) print("done")
def preprocess_predict_nifti(self, input_files, output_file=None, softmax_ouput_file=None, mixed_precision: bool = True): """ Use this to predict new data :param input_files: :param output_file: :param softmax_ouput_file: :param mixed_precision: :return: """ print("preprocessing...") d, s, properties = self.preprocess_patient(input_files) print("predicting...") pred = self.predict_preprocessed_data_return_seg_and_softmax(d, do_mirroring=self.data_aug_params["do_mirror"], mirror_axes=self.data_aug_params['mirror_axes'], use_sliding_window=True, step_size=0.5, use_gaussian=True, pad_border_mode='constant', pad_kwargs={'constant_values': 0}, all_in_gpu=True, mixed_precision=mixed_precision)[1] pred = pred.transpose([0] + [i + 1 for i in self.transpose_backward]) print("resampling to original spacing and nifti export...") save_segmentation_nifti_from_softmax(pred, output_file, properties, 3, None, None, None, softmax_ouput_file, None, force_separate_z=False, interpolation_order_z=3) print("done")
def merge(args): file1, file2, properties_file, out_file = args if not isfile(out_file): res1 = np.load(file1)['softmax'] res2 = np.load(file2)['softmax'] props = load_pickle(properties_file) mn = np.mean((res1, res2), 0) save_segmentation_nifti_from_softmax(mn, out_file, props, 1, None, None, None)
def merge_files(args): files, properties_file, out_file, only_keep_largest_connected_component, min_region_size_per_class, override = args if override or not isfile(out_file): softmax = [np.load(f)['softmax'][None] for f in files] softmax = np.vstack(softmax) softmax = np.mean(softmax, 0) props = load_pickle(properties_file) save_segmentation_nifti_from_softmax(softmax, out_file, props, 1, None, None, None)
def merge(args): file1, file2, properties_file, out_file = args if not isfile(out_file): res1 = np.load(file1)['softmax'] res2 = np.load(file2)['softmax'] props = load_pickle(properties_file) mn = np.mean((res1, res2), 0) # Softmax probabilities are already at target spacing so this will not do any resampling (resampling parameters # don't matter here) save_segmentation_nifti_from_softmax(mn, out_file, props, 3, None, None, None, force_separate_z=None, interpolation_order_z=0)
def merge_files(args): files, properties_file, out_file, only_keep_largest_connected_component, min_region_size_per_class, override, store_npz = args if override or not isfile(out_file): softmax = [np.load(f)['softmax'][None] for f in files] softmax = np.vstack(softmax) softmax = np.mean(softmax, 0) props = load_pickle(properties_file) save_segmentation_nifti_from_softmax(softmax, out_file, props, 3, None, None, None, force_separate_z=None) if store_npz: np.savez_compressed(out_file[:-7] + ".npz", softmax=softmax) save_pickle(props, out_file[:-7] + ".pkl")
def merge(args): file1, file2, properties_file, out_file = args if not isfile(out_file): res1 = np.load(file1)['softmax'] res2 = np.load(file2)['softmax'] props = load_pickle(properties_file) mn = np.mean((res1, res2), 0) save_segmentation_nifti_from_softmax(mn, out_file, props, 3, None, None, None, force_separate_z=None, interpolation_order_z=0)
def preprocess_predict_nifti(self, input_files, output_file=None, softmax_ouput_file=None): """ Use this to predict new data :param input_files: :param output_file: :param softmax_ouput_file: :return: """ print("preprocessing...") d, s, properties = self.preprocess_patient(input_files) print("predicting...") pred = self.predict_preprocessed_data_return_softmax(d, True, 1, False, 1, (0, 1, 2), True, True, 2, self.patch_size, True) # TODO use da params for mirror print("resampling to original spacing and nifti export...") save_segmentation_nifti_from_softmax(pred, output_file, properties, 3, None, None, None, softmax_ouput_file, None) print("done")
def merge_files(files, properties_files, out_file, override, store_npz): if override or not isfile(out_file): softmax = [np.load(f)['softmax'][None] for f in files] softmax = np.vstack(softmax) softmax = np.mean(softmax, 0) props = [load_pickle(f) for f in properties_files] reg_class_orders = [ p['regions_class_order'] if 'regions_class_order' in p.keys() else None for p in props ] if not all([i is None for i in reg_class_orders]): # if reg_class_orders are not None then they must be the same in all pkls tmp = reg_class_orders[0] for r in reg_class_orders[1:]: assert tmp == r, 'If merging files with regions_class_order, the regions_class_orders of all ' \ 'files must be the same. regions_class_order: %s, \n files: %s' % \ (str(reg_class_orders), str(files)) regions_class_order = tmp else: regions_class_order = None # Softmax probabilities are already at target spacing so this will not do any resampling (resampling parameters # don't matter here) save_segmentation_nifti_from_softmax(softmax, out_file, props[0], 3, regions_class_order, None, None, force_separate_z=None) if store_npz: np.savez_compressed(out_file[:-7] + ".npz", softmax=softmax) save_pickle(props, out_file[:-7] + ".pkl")
def predict_cases(model, list_of_lists, output_filenames, folds, save_npz, num_threads_preprocessing, num_threads_nifti_save, segs_from_prev_stage=None, do_tta=True, overwrite_existing=False): assert len(list_of_lists) == len(output_filenames) if segs_from_prev_stage is not None: assert len(segs_from_prev_stage) == len(output_filenames) # prman = Pool(num_threads_nifti_save) # results = [] cleaned_output_files = [] for o in output_filenames: dr, f = os.path.split(o) if len(dr) > 0: maybe_mkdir_p(dr) if not f.endswith(".nii.gz"): f, _ = os.path.splitext(f) f = f + ".nii.gz" cleaned_output_files.append(join(dr, f)) if not overwrite_existing: print("number of cases:", len(list_of_lists)) not_done_idx = [ i for i, j in enumerate(cleaned_output_files) if not isfile(j) ] cleaned_output_files = [cleaned_output_files[i] for i in not_done_idx] list_of_lists = [list_of_lists[i] for i in not_done_idx] if segs_from_prev_stage is not None: segs_from_prev_stage = [ segs_from_prev_stage[i] for i in not_done_idx ] print("number of cases that still need to be predicted:", len(cleaned_output_files)) # print("emptying cuda cache") # torch.cuda.empty_cache() print("loading parameters for folds,", folds) trainer, params = load_model_and_checkpoint_files(model, folds) print("starting preprocessing generator") preprocessing = preprocess_multithreaded(trainer, list_of_lists, cleaned_output_files, num_threads_preprocessing, segs_from_prev_stage) print("starting prediction...") for preprocessed in preprocessing: output_filename, (d, dct) = preprocessed if isinstance(d, str): data = np.load(d) os.remove(d) d = data print("predicting", output_filename) softmax = [] for p in params: trainer.load_checkpoint_ram(p, False) softmax.append( trainer.predict_preprocessed_data_return_softmax( d, do_tta, 1, False, 1, trainer.data_aug_params['mirror_axes'], True, True, 2, trainer.patch_size, True)[None]) softmax = np.vstack(softmax) softmax_mean = np.mean(softmax, 0) transpose_forward = trainer.plans.get('transpose_forward') if transpose_forward is not None: transpose_backward = trainer.plans.get('transpose_backward') softmax_mean = softmax_mean.transpose( [0] + [i + 1 for i in transpose_backward]) if save_npz: npz_file = output_filename[:-7] + ".npz" else: npz_file = None """There is a problem with python process communication that prevents us from communicating obejcts larger than 2 GB between processes (basically when the length of the pickle string that will be sent is communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either filename or np.ndarray and will handle this automatically""" # if np.prod(softmax_mean.shape) > (2e9 / 4 * 0.9): # *0.9 just to be save # print("This output is too large for python process-process communication. Saving output temporarily to disk") # np.save(output_filename[:-7] + ".npy", softmax_mean) # softmax_mean = output_filename[:-7] + ".npy" # results.append(prman.starmap_async(save_segmentation_nifti_from_softmax, # ((softmax_mean, output_filename, dct, 1, None, None, None, npz_file), ) # )) save_segmentation_nifti_from_softmax(softmax_mean, output_filename, dct, 1, None, None, None, npz_file)
def validate(self, do_mirroring: bool = True, use_train_mode: bool = False, tiled: bool = True, step: int = 2, save_softmax: bool = True, use_gaussian: bool = True, overwrite: bool = True, validation_folder_name: str = 'validation_raw', debug: bool = False, all_in_gpu: bool = False, force_separate_z: bool = None, interpolation_order: int = 3, interpolation_order_z: int = 0): """ if debug=True then the temporary files generated for postprocessing determination will be kept :return: """ assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)" if self.dataset_val is None: self.load_dataset() self.do_split() # predictions as they come from the network go here output_folder = join(self.output_folder, validation_folder_name) maybe_mkdir_p(output_folder) # this is for debug purposes my_input_args = { 'do_mirroring': do_mirroring, 'use_train_mode': use_train_mode, 'tiled': tiled, 'step': step, 'save_softmax': save_softmax, 'use_gaussian': use_gaussian, 'overwrite': overwrite, 'validation_folder_name': validation_folder_name, 'debug': debug, 'all_in_gpu': all_in_gpu, 'force_separate_z': force_separate_z, 'interpolation_order': interpolation_order, 'interpolation_order_z': interpolation_order_z, } save_json(my_input_args, join(output_folder, "validation_args.json")) if do_mirroring: if not self.data_aug_params['do_mirror']: raise RuntimeError( "We did not train with mirroring so you cannot do inference with mirroring enabled" ) mirror_axes = self.data_aug_params['mirror_axes'] else: mirror_axes = () pred_gt_tuples = [] if config.DEBUG_MODE: # zhuc default_num_threads = 1 export_pool = Pool(default_num_threads) results = [] cnt = 0 # zhuc for k in self.dataset_val.keys(): properties = self.dataset[k]['properties'] cnt += 1 if config.DEBUG_MODE and cnt > 2: break # fname = properties['list_of_data_files'][0].split("/")[-1][:-12] fname = k # zhuc if overwrite or (not isfile(join(output_folder, fname + ".nii.gz"))) or \ (save_softmax and not isfile(join(output_folder, fname + ".npz"))): data = np.load(self.dataset[k]['data_file'])['data'] print(k, data.shape) data[-1][data[-1] == -1] = 0 softmax_pred = self.predict_preprocessed_data_return_softmax( data[:-1], do_mirroring, 1, use_train_mode, 1, mirror_axes, tiled, True, step, self.patch_size, use_gaussian=use_gaussian, all_in_gpu=all_in_gpu) softmax_pred = softmax_pred.transpose( [0] + [i + 1 for i in self.transpose_backward]) if save_softmax: softmax_fname = join(output_folder, fname + ".npz") else: softmax_fname = None """There is a problem with python process communication that prevents us from communicating obejcts larger than 2 GB between processes (basically when the length of the pickle string that will be sent is communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either filename or np.ndarray and will handle this automatically""" if np.prod(softmax_pred.shape) > ( 2e9 / 4 * 0.85): # *0.85 just to be save np.save(join(output_folder, fname + ".npy"), softmax_pred) softmax_pred = join(output_folder, fname + ".npy") if config.DEBUG_MODE: # zhuc results.append( save_segmentation_nifti_from_softmax( softmax_pred, join(output_folder, fname + ".nii.gz"), properties, interpolation_order, None, None, None, softmax_fname, None, force_separate_z, interpolation_order_z)) else: results.append( export_pool.starmap_async( save_segmentation_nifti_from_softmax, ((softmax_pred, join(output_folder, fname + ".nii.gz"), properties, interpolation_order, None, None, None, softmax_fname, None, force_separate_z, interpolation_order_z), ))) pred_gt_tuples.append([ join(output_folder, fname + ".nii.gz"), # join(self.gt_niftis_folder, fname + ".nii.gz")]) join(self.gt_niftis_folder, properties['seg_file'].split(os.sep)[-1]) ]) # zhuc if not config.DEBUG_MODE: # zhuc _ = [i.get() for i in results] self.print_to_log_file("finished prediction") # evaluate raw predictions self.print_to_log_file("evaluation of raw predictions") task = self.dataset_directory.split(os.sep)[-1] job_name = self.experiment_name _ = aggregate_scores( pred_gt_tuples, labels=list(range(self.num_classes)), json_output_file=join(output_folder, "summary.json"), json_name=job_name + " val tiled %s" % (str(tiled)), json_author="Fabian", json_task=task, num_threads=default_num_threads) # in the old nnunet we would stop here. Now we add a postprocessing. This postprocessing can remove everything # except the largest connected component for each class. To see if this improves results, we do this for all # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will # have this applied during inference as well self.print_to_log_file("determining postprocessing") determine_postprocessing(self.output_folder, self.gt_niftis_folder, validation_folder_name, final_subf_name=validation_folder_name + "_postprocessed", debug=debug, processes=default_num_threads, f_dict=self.dataset) # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed" # They are always in that folder, even if no postprocessing as applied! # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to # be used later gt_nifti_folder = join(self.output_folder_base, "gt_niftis") maybe_mkdir_p(gt_nifti_folder) for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"): success = False attempts = 0 e = None while not success and attempts < 10: try: shutil.copy(f, gt_nifti_folder) success = True except OSError as e: attempts += 1 sleep(1) if not success: print("Could not copy gt nifti file %s into folder %s" % (f, gt_nifti_folder)) if e is not None: raise e
def predict_cases(model, list_of_lists, output_filenames, folds, save_npz, num_threads_preprocessing, num_threads_nifti_save, segs_from_prev_stage=None, do_tta=True, mixed_precision=True, overwrite_existing=False, all_in_gpu=False, step_size=0.5, checkpoint_name="model_final_checkpoint", segmentation_export_kwargs: dict = None, disable_postprocessing: bool = False): """ :param segmentation_export_kwargs: :param model: folder where the model is saved, must contain fold_x subfolders :param list_of_lists: [[case0_0000.nii.gz, case0_0001.nii.gz], [case1_0000.nii.gz, case1_0001.nii.gz], ...] :param output_filenames: [output_file_case0.nii.gz, output_file_case1.nii.gz, ...] :param folds: default: (0, 1, 2, 3, 4) (but can also be 'all' or a subset of the five folds, for example use (0, ) for using only fold_0 :param save_npz: default: False :param num_threads_preprocessing: :param num_threads_nifti_save: :param segs_from_prev_stage: :param do_tta: default: True, can be set to False for a 8x speedup at the cost of a reduced segmentation quality :param overwrite_existing: default: True :param mixed_precision: if None then we take no action. If True/False we overwrite what the model has in its init :return: """ assert len(list_of_lists) == len(output_filenames) if segs_from_prev_stage is not None: assert len(segs_from_prev_stage) == len(output_filenames) pool = Pool(num_threads_nifti_save) results = [] cleaned_output_files = [] for o in output_filenames: dr, f = os.path.split(o) if len(dr) > 0: maybe_mkdir_p(dr) if not f.endswith(".nii.gz"): f, _ = os.path.splitext(f) f = f + ".nii.gz" cleaned_output_files.append(join(dr, f)) if not overwrite_existing: print("number of cases:", len(list_of_lists)) # if save_npz=True then we should also check for missing npz files not_done_idx = [ i for i, j in enumerate(cleaned_output_files) if (not isfile(j)) or (save_npz and not isfile(j[:-7] + '.npz')) ] cleaned_output_files = [cleaned_output_files[i] for i in not_done_idx] list_of_lists = [list_of_lists[i] for i in not_done_idx] if segs_from_prev_stage is not None: segs_from_prev_stage = [ segs_from_prev_stage[i] for i in not_done_idx ] print("number of cases that still need to be predicted:", len(cleaned_output_files)) print("emptying cuda cache") torch.cuda.empty_cache() print("loading parameters for folds,", folds) trainer, params = load_model_and_checkpoint_files( model, folds, mixed_precision=mixed_precision, checkpoint_name=checkpoint_name) if segmentation_export_kwargs is None: if 'segmentation_export_params' in trainer.plans.keys(): force_separate_z = trainer.plans['segmentation_export_params'][ 'force_separate_z'] interpolation_order = trainer.plans['segmentation_export_params'][ 'interpolation_order'] interpolation_order_z = trainer.plans[ 'segmentation_export_params']['interpolation_order_z'] else: force_separate_z = None interpolation_order = 1 interpolation_order_z = 0 else: force_separate_z = segmentation_export_kwargs['force_separate_z'] interpolation_order = segmentation_export_kwargs['interpolation_order'] interpolation_order_z = segmentation_export_kwargs[ 'interpolation_order_z'] print("starting preprocessing generator") preprocessing = preprocess_multithreaded(trainer, list_of_lists, cleaned_output_files, num_threads_preprocessing, segs_from_prev_stage) print("starting prediction...") all_output_files = [] for preprocessed in preprocessing: output_filename, (d, dct) = preprocessed all_output_files.append(all_output_files) if isinstance(d, str): data = np.load(d) os.remove(d) d = data print("predicting", output_filename) trainer.load_checkpoint_ram(params[0], False) softmax = trainer.predict_preprocessed_data_return_seg_and_softmax( d, do_mirroring=do_tta, mirror_axes=trainer.data_aug_params['mirror_axes'], use_sliding_window=True, step_size=step_size, use_gaussian=True, all_in_gpu=all_in_gpu, mixed_precision=mixed_precision)[1] for p in params[1:]: trainer.load_checkpoint_ram(p, False) softmax += trainer.predict_preprocessed_data_return_seg_and_softmax( d, do_mirroring=do_tta, mirror_axes=trainer.data_aug_params['mirror_axes'], use_sliding_window=True, step_size=step_size, use_gaussian=True, all_in_gpu=all_in_gpu, mixed_precision=mixed_precision)[1] if len(params) > 1: softmax /= len(params) transpose_forward = trainer.plans.get('transpose_forward') if transpose_forward is not None: transpose_backward = trainer.plans.get('transpose_backward') softmax = softmax.transpose([0] + [i + 1 for i in transpose_backward]) if save_npz: npz_file = output_filename[:-7] + ".npz" else: npz_file = None if hasattr(trainer, 'regions_class_order'): region_class_order = trainer.regions_class_order else: region_class_order = None """There is a problem with python process communication that prevents us from communicating obejcts larger than 2 GB between processes (basically when the length of the pickle string that will be sent is communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either filename or np.ndarray and will handle this automatically""" if use_alt_resampling: save_segmentation_nifti_from_softmax(softmax, output_filename, dct, interpolation_order, region_class_order, None, None, npz_file, None, force_separate_z, interpolation_order_z) else: bytes_per_voxel = 4 if all_in_gpu: bytes_per_voxel = 2 # if all_in_gpu then the return value is half (float16) if np.prod(softmax.shape) > (2e9 / bytes_per_voxel * 0.85): # * 0.85 just to be save print( "This output is too large for python process-process communication. Saving output temporarily to disk" ) np.save(output_filename[:-7] + ".npy", softmax) softmax = output_filename[:-7] + ".npy" results.append( pool.starmap_async( save_segmentation_nifti_from_softmax, ((softmax, output_filename, dct, interpolation_order, region_class_order, None, None, npz_file, None, force_separate_z, interpolation_order_z), ))) print( "inference done. Now waiting for the segmentation export to finish...") _ = [i.get() for i in results] # now apply postprocessing # first load the postprocessing properties if they are present. Else raise a well visible warning if not disable_postprocessing: results = [] pp_file = join(model, "postprocessing.json") if isfile(pp_file): print("postprocessing...") shutil_sol.copyfile( pp_file, os.path.abspath(os.path.dirname(output_filenames[0]))) # for_which_classes stores for which of the classes everything but the largest connected component needs to be # removed for_which_classes, min_valid_obj_size = load_postprocessing( pp_file) results.append( pool.starmap_async( load_remove_save, zip(output_filenames, output_filenames, [for_which_classes] * len(output_filenames), [min_valid_obj_size] * len(output_filenames)))) _ = [i.get() for i in results] else: print( "WARNING! Cannot run postprocessing because the postprocessing file is missing. Make sure to run " "consolidate_folds in the output folder of the model first!\nThe folder you need to run this in is " "%s" % model) pool.close() pool.join()