def run(ctx, sleep, processes, quiet, once, timeout, group, **kwargs): from birder.core.queue import read click.secho('Running %s processes.' % processes) p = Pool(processes=processes, initializer=init_worker) config = {'echo': not quiet, 'timestamp': tz_now(), 'timeout': timeout} while True: # message = read() # if message: # pass client.set('timestamp', tz_now().strftime('%Y-%m-%d %H:%M:%S')) params = [(t, config) for t in registry if t.enabled] for param in params: param[1]['timestamp'] = tz_now() try: p.starmap_async(monit, params).get(9999999) if not once: if group: click.secho('-' * 80) time.sleep(sleep) except (KeyboardInterrupt, SystemExit): break if once: break
def main(FLAGS): save_dir = FLAGS.save_dir new_WH = (FLAGS.target_width, FLAGS.target_height) new_labels_name = FLAGS.target_csv data = pd.read_csv(FLAGS.src_csv) data["Frame"] = data["Frame"].map(lambda x: "images/" + x) create_clean_dir(save_dir) logger.info("Cleaned {} directory".format(save_dir)) logger.info("Resizing begins") start = time.time() pool = Pool() pool.starmap_async(read_image_and_resize, [(image_path, new_WH, save_dir) for image_path in data["Frame"].unique()]) pool.close() pool.join() end = time.time() logger.info("Time elapsed: {}".format(end - start)) logger.info("Resizing ends") logger.info("Adjusting dataframe") image_path = data["Frame"][0] image = read_image(image_path) H, W, _ = image.shape src_size = (W, H) labels = adjust_bbox(data, src_size, new_WH) labels["Frame"] = labels["Frame"].map(lambda x: os.path.join(save_dir, os.path.basename(x))) create_clean_dir("mask") logger.info("Cleaned {} directory".format("mask")) logger.info("Masking begin") start = time.time() pool = Pool() tasks = [(new_WH, image_path, labels, "mask")for image_path in labels["Frame"].unique()] pool.starmap_async(generate_mask_pipeline, tasks) pool.close() pool.join() end = time.time() logger.info("Masking ends. Time elapsed: {}".format(end - start)) labels["Mask"] = labels["Frame"].map(lambda x: "mask/" + os.path.basename(x)) labels.to_csv(new_labels_name, index=False) logger.info("Adjustment saved to {}".format(new_labels_name))
def determine_brats_postprocessing(folder_with_preds, folder_with_gt, postprocessed_output_dir, processes=8, thresholds=(0, 10, 50, 100, 200, 500, 750, 1000, 1500, 2500, 10000), replace_with=2): # find pairs nifti_gt = subfiles(folder_with_gt, suffix=".nii.gz", sort=True) p = Pool(processes) nifti_pred = subfiles(folder_with_preds, suffix='.nii.gz', sort=True) results = p.starmap_async(load_niftis_threshold_compute_dice, zip(nifti_gt, nifti_pred, [thresholds] * len(nifti_pred))) results = results.get() all_dc_per_threshold = {} for t in thresholds: all_dc_per_threshold[t] = np.array([i[1][t] for i in results]) print(t, np.mean(all_dc_per_threshold[t])) means = [np.mean(all_dc_per_threshold[t]) for t in thresholds] best_threshold = thresholds[np.argmax(means)] print('best', best_threshold, means[np.argmax(means)]) maybe_mkdir_p(postprocessed_output_dir) p.starmap(apply_brats_threshold, zip(nifti_pred, [postprocessed_output_dir]*len(nifti_pred), [best_threshold]*len(nifti_pred), [replace_with] * len(nifti_pred))) p.close() p.join() save_pickle((thresholds, means, best_threshold, all_dc_per_threshold), join(postprocessed_output_dir, "threshold.pkl"))
def apply_postprocessing_to_folder(input_folder: str, output_folder: str, for_which_classes: list, min_valid_object_size: dict = None, num_processes=8): """ applies removing of all but the largest connected component to all niftis in a folder :param min_valid_object_size: :param min_valid_object_size: :param input_folder: :param output_folder: :param for_which_classes: :param num_processes: :return: """ if not os.path.isdir(output_folder): os.makedirs(output_folder) p = Pool(num_processes) nii_files = subfiles(input_folder, suffix=".nii.gz", join=False) input_files = [input_folder + "/" + i for i in nii_files] out_files = [output_folder + "/" + i for i in nii_files] results = p.starmap_async( load_remove_save, zip(input_files, out_files, [for_which_classes] * len(input_files), [min_valid_object_size] * len(input_files))) res = results.get() p.close() p.join()
def main(): t0 = time.time() name_link = read_and_download() pool = Pool(processes=cpu_count()) # 传入多参数数时改成元组,和使用starmap及starmap_async result = pool.starmap_async(download, name_link) # 异步非阻塞,一次只传入一个值 # print(result.get()) # 可以查看多进程返回的结果 pool.close() # 关闭进程池,不再接受新的进程 pool.join() # 调用join之前,先调用close函数,否则会出错。join函数等待所有子进程结束 t1 = time.time() - t0 print(t1)
def multiprocessing_plot_overlay(list_of_image_files, list_of_seg_files, list_of_output_files, overlay_intensity, num_processes=8): p = Pool(num_processes) r = p.starmap_async( plot_overlay, zip(list_of_image_files, list_of_seg_files, list_of_output_files, [overlay_intensity] * len(list_of_output_files))) r.get() p.close() p.join()
def multiprocessing_plot_overlay_preprocessed(list_of_case_files, list_of_output_files, overlay_intensity, num_processes=8, modality_index=0): p = Pool(num_processes) r = p.starmap_async( plot_overlay_preprocessed, zip(list_of_case_files, list_of_output_files, [overlay_intensity] * len(list_of_output_files), [modality_index] * len(list_of_output_files))) r.get() p.close() p.join()
def evaluate_verse_folder(folder_pred, folder_gt, out_json="/home/fabian/verse.json"): p = Pool(default_num_threads) files_gt_bare = subfiles(folder_gt, join=False) assert all([isfile(join(folder_pred, i)) for i in files_gt_bare ]), "some files are missing in the predicted folder" files_pred = [join(folder_pred, i) for i in files_gt_bare] files_gt = [join(folder_gt, i) for i in files_gt_bare] results = p.starmap_async(evaluate_verse_case, zip(files_gt, files_pred)) results = results.get() dct = {i: j for i, j in zip(files_gt_bare, results)} results_stacked = np.vstack(results) results_mean = np.nanmean(results_stacked, 0) overall_mean = np.nanmean(results_mean) save_json((dct, list(results_mean), overall_mean), out_json)
def main(FLAGS): """Main Function Notes: 1. Read image and resize to Target Width, Height 2. Resize bounding box coordinates accordingly 3. Create masks with the bounding box background is 0 and vehicle is 255 """ new_WH = (FLAGS.target_width, FLAGS.target_height) data = pd.read_csv(FLAGS.src_csv) # Only consider car and truck images data = data[data["Label"].isin(["Car", "Truck"])].reset_index(drop=True) # 123.jpg -> object-detection-crowdai/123.jpg data["Frame"] = data["Frame"].map( lambda x: os.path.join(FLAGS.data_dir, x)) # IF dir exists, clean it create_clean_dir(FLAGS.save_dir) LOGGER.info("Cleaned {} directory".format(FLAGS.save_dir)) LOGGER.info("Resizing begins") start = time.time() pool = Pool() pool.starmap_async(read_image_and_resize, [(image_path, new_WH, FLAGS.save_dir) for image_path in data["Frame"].unique()]) pool.close() pool.join() end = time.time() LOGGER.info("Time elapsed: {}".format(end - start)) LOGGER.info("Resizing ends") LOGGER.info("Adjusting dataframe") # Read any image file to get the WIDTH and HEIGHT image_path = data["Frame"][0] image = read_image(image_path) H, W, _ = image.shape src_size = (W, H) labels = adjust_bbox(data, src_size, new_WH) # object-.../123.jpg -> data_resize/123.jpg labels["Frame"] = labels["Frame"].map( lambda x: os.path.join(FLAGS.save_dir, os.path.basename(x))) create_clean_dir("mask") LOGGER.info("Cleaned {} directory".format("mask")) LOGGER.info("Masking begin") start = time.time() pool = Pool() tasks = [(new_WH, image_path, labels, "mask") for image_path in labels["Frame"].unique()] pool.starmap_async(generate_mask_pipeline, tasks) pool.close() pool.join() end = time.time() LOGGER.info("Masking ends. Time elapsed: {}".format(end - start)) labels["Mask"] = labels["Frame"].map( lambda x: os.path.join("mask", os.path.basename(x))) labels.to_csv(FLAGS.target_csv, index=False) LOGGER.info("Adjustment saved to {}".format(FLAGS.target_csv))
def validate(self, do_mirroring: bool = True, use_sliding_window: bool = True, step_size: float = 0.5, save_softmax: bool = True, use_gaussian: bool = True, overwrite: bool = True, validation_folder_name: str = 'validation_raw', debug: bool = False, all_in_gpu: bool = False, force_separate_z: bool = None, interpolation_order: int = 3, interpolation_order_z=0): """ disable nnunet postprocessing. this would just waste computation time and does not benefit brats !!!We run this with use_sliding_window=False per default (see on_epoch_end). This triggers fully convolutional inference. THIS ONLY MAKES SENSE WHEN TRAINING ON FULL IMAGES! Make sure use_sliding_window=True when running with default patch size (128x128x128)!!! per default this does not use test time data augmentation (mirroring). The reference implementation, however, does. I disabled it here because this eats up a lot of computation time """ validation_start = time() current_mode = self.network.training self.network.eval() assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)" if self.dataset_val is None: self.load_dataset() self.do_split() # predictions as they come from the network go here output_folder = join(self.output_folder, validation_folder_name) maybe_mkdir_p(output_folder) # this is for debug purposes my_input_args = { 'do_mirroring': do_mirroring, 'use_sliding_window': use_sliding_window, 'step_size': step_size, 'save_softmax': save_softmax, 'use_gaussian': use_gaussian, 'overwrite': overwrite, 'validation_folder_name': validation_folder_name, 'debug': debug, 'all_in_gpu': all_in_gpu, 'force_separate_z': force_separate_z, 'interpolation_order': interpolation_order, 'interpolation_order_z': interpolation_order_z, } save_json(my_input_args, join(output_folder, "validation_args.json")) if do_mirroring: if not self.data_aug_params['do_mirror']: raise RuntimeError( "We did not train with mirroring so you cannot do inference with mirroring enabled" ) mirror_axes = self.data_aug_params['mirror_axes'] else: mirror_axes = () export_pool = Pool(default_num_threads) results = [] for k in self.dataset_val.keys(): properties = load_pickle(self.dataset[k]['properties_file']) fname = properties['list_of_data_files'][0].split("/")[-1][:-12] if overwrite or (not isfile(join(output_folder, fname + ".nii.gz"))) or \ (save_softmax and not isfile(join(output_folder, fname + ".npz"))): data = np.load(self.dataset[k]['data_file'])['data'] #print(k, data.shape) softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax( data[:-1], do_mirroring=do_mirroring, mirror_axes=mirror_axes, use_sliding_window=use_sliding_window, step_size=step_size, use_gaussian=use_gaussian, all_in_gpu=all_in_gpu, verbose=False, mixed_precision=self.fp16)[1] # this does not do anything in brats -> remove this line # softmax_pred = softmax_pred.transpose([0] + [i + 1 for i in self.transpose_backward]) if save_softmax: softmax_fname = join(output_folder, fname + ".npz") else: softmax_fname = None results.append( export_pool.starmap_async( save_segmentation_nifti_from_softmax, ((softmax_pred, join(output_folder, fname + ".nii.gz"), properties, interpolation_order, None, None, None, softmax_fname, None, force_separate_z, interpolation_order_z, False), ))) _ = [i.get() for i in results] self.print_to_log_file("finished prediction") # evaluate raw predictions self.print_to_log_file("evaluation of raw predictions") # this writes a csv file into output_folder evaluate_regions(output_folder, self.gt_niftis_folder, self.evaluation_regions) csv_file = np.loadtxt(join(output_folder, 'summary.csv'), skiprows=1, dtype=str, delimiter=',')[:, 1:] # these are the values that are compute with np.nanmean aggregation whole, core, enhancing = csv_file[-4, :].astype(float) # do some cleanup if torch.cuda.is_available(): torch.cuda.empty_cache() self.network.train(current_mode) validation_end = time() self.print_to_log_file('Running the validation took %f seconds' % (validation_end - validation_start)) self.print_to_log_file( '(the time needed for validation is included in the total epoch time!)' ) return whole, core, enhancing
class ProcessPoolStrategy(ParallelStrategy, _PoolRunnableStrategy, _Resultable): _Processors_Pool: Pool = None _Processors_List: List[Union[ApplyResult, AsyncResult]] = None def __init__(self, pool_size: int): super().__init__(pool_size=pool_size) def initialization(self, queue_tasks: Optional[Union[_BaseQueueTask, _BaseList]] = None, features: Optional[Union[_BaseFeatureAdapterFactory, _BaseList]] = None, *args, **kwargs) -> None: super(ProcessPoolStrategy, self).initialization(queue_tasks=queue_tasks, features=features, *args, **kwargs) # Activate multiprocessing.managers.BaseManager server activate_manager_server() # Initialize and build the Processes Pool. __pool_initializer: Callable = kwargs.get("pool_initializer", None) __pool_initargs: IterableType = kwargs.get("pool_initargs", None) self._Processors_Pool = Pool(processes=self.pool_size, initializer=__pool_initializer, initargs=__pool_initargs) def apply(self, tasks_size: int, function: Callable, args: Tuple = (), kwargs: Dict = {}) -> None: self.reset_result() __process_running_result = None try: __process_running_result = [ self._Processors_Pool.apply(func=function, args=args, kwds=kwargs) for _ in range(tasks_size) ] __exception = None __process_run_successful = True except Exception as e: __exception = e __process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=__process_run_successful, result=__process_running_result, exception=None) def async_apply(self, tasks_size: int, function: Callable, args: Tuple = (), kwargs: Dict = {}, callback: Callable = None, error_callback: Callable = None) -> None: self.reset_result() self._Processors_List = [ self._Processors_Pool.apply_async(func=function, args=args, kwds=kwargs, callback=callback, error_callback=error_callback) for _ in range(tasks_size) ] for process in self._Processors_List: _process_running_result = None _process_run_successful = None _exception = None try: _process_running_result = process.get() _process_run_successful = process.successful() except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=_process_run_successful, result=_process_running_result, exception=_exception) def apply_with_iter(self, functions_iter: List[Callable], args_iter: List[Tuple] = None, kwargs_iter: List[Dict] = None) -> None: self.reset_result() __process_running_result = None if args_iter is None: args_iter = [() for _ in functions_iter] if kwargs_iter is None: kwargs_iter = [{} for _ in functions_iter] try: __process_running_result = [ self._Processors_Pool.apply(func=_func, args=_args, kwds=_kwargs) for _func, _args, _kwargs in zip(functions_iter, args_iter, kwargs_iter) ] __exception = None __process_run_successful = True except Exception as e: __exception = e __process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=__process_run_successful, result=__process_running_result, exception=None) def async_apply_with_iter( self, functions_iter: List[Callable], args_iter: List[Tuple] = None, kwargs_iter: List[Dict] = None, callback_iter: List[Callable] = None, error_callback_iter: List[Callable] = None) -> None: self.reset_result() if args_iter is None: args_iter = [() for _ in functions_iter] if kwargs_iter is None: kwargs_iter = [{} for _ in functions_iter] if callback_iter is None: callback_iter = [None for _ in functions_iter] if error_callback_iter is None: error_callback_iter = [None for _ in functions_iter] self._Processors_List = [ self._Processors_Pool.apply_async(func=_func, args=_args, kwds=_kwargs, callback=_callback, error_callback=_error_callback) for _func, _args, _kwargs, _callback, _error_callback in zip( functions_iter, args_iter, kwargs_iter, callback_iter, error_callback_iter) ] for process in self._Processors_List: _process_running_result = None _process_run_successful = None _exception = None try: _process_running_result = process.get() _process_run_successful = process.successful() except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=_process_run_successful, result=_process_running_result, exception=_exception) def map(self, function: Callable, args_iter: IterableType = (), chunksize: int = None) -> None: self.reset_result() _process_running_result = None try: _process_running_result = self._Processors_Pool.map( func=function, iterable=args_iter, chunksize=chunksize) _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def async_map(self, function: Callable, args_iter: IterableType = (), chunksize: int = None, callback: Callable = None, error_callback: Callable = None) -> None: self.reset_result() _process_running_result = None _exception = None _map_result = self._Processors_Pool.map_async( func=function, iterable=args_iter, chunksize=chunksize, callback=callback, error_callback=error_callback) try: _process_running_result = _map_result.get() _process_run_successful = _map_result.successful() except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def map_by_args(self, function: Callable, args_iter: IterableType[IterableType] = (), chunksize: int = None) -> None: self.reset_result() _process_running_result = None try: _process_running_result = self._Processors_Pool.starmap( func=function, iterable=args_iter, chunksize=chunksize) _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def async_map_by_args(self, function: Callable, args_iter: IterableType[IterableType] = (), chunksize: int = None, callback: Callable = None, error_callback: Callable = None) -> None: self.reset_result() _map_result = self._Processors_Pool.starmap_async( func=function, iterable=args_iter, chunksize=chunksize, callback=callback, error_callback=error_callback) _process_running_result = _map_result.get() _process_run_successful = _map_result.successful() # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=None) def imap(self, function: Callable, args_iter: IterableType = (), chunksize: int = 1) -> None: self.reset_result() _process_running_result = None try: imap_running_result = self._Processors_Pool.imap( func=function, iterable=args_iter, chunksize=chunksize) _process_running_result = [ result for result in imap_running_result ] _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def imap_unordered(self, function: Callable, args_iter: IterableType = (), chunksize: int = 1) -> None: self.reset_result() _process_running_result = None try: imap_running_result = self._Processors_Pool.imap_unordered( func=function, iterable=args_iter, chunksize=chunksize) _process_running_result = [ result for result in imap_running_result ] _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def _result_saving(self, successful: bool, result: List, exception: Exception) -> None: _process_result = { "successful": successful, "result": result, "exception": exception } self._Processors_Running_Result.append(_process_result) def close(self) -> None: self._Processors_Pool.close() self._Processors_Pool.join() def terminal(self) -> None: self._Processors_Pool.terminate() def get_result(self) -> List[_ProcessPoolResult]: return self.result() def _saving_process(self) -> List[_ProcessPoolResult]: _pool_results = [] for __result in self._Processors_Running_Result: _pool_result = _ProcessPoolResult() _pool_result.is_successful = __result["successful"] _pool_result.data = __result["result"] _pool_results.append(_pool_result) return _pool_results
def starmap_async(self, func, iterable, *args, **kwargs): return Pool.starmap_async(self, ExceptionLogger(func), iterable, *args, **kwargs)
def determine_postprocessing(base, gt_labels_folder, raw_subfolder_name="validation_raw", temp_folder="temp", final_subf_name="validation_final", processes=default_num_threads, dice_threshold=0, debug=False, advanced_postprocessing=False, pp_filename="postprocessing.json"): """ :param base: :param gt_labels_folder: subfolder of base with niftis of ground truth labels :param raw_subfolder_name: subfolder of base with niftis of predicted (non-postprocessed) segmentations :param temp_folder: used to store temporary data, will be deleted after we are done here undless debug=True :param final_subf_name: final results will be stored here (subfolder of base) :param processes: :param dice_threshold: only apply postprocessing if results is better than old_result+dice_threshold (can be used as eps) :param debug: if True then the temporary files will not be deleted :return: """ # lets see what classes are in the dataset classes = [ int(i) for i in load_json(base + "/" + raw_subfolder_name + "/" + "summary.json")['results']['mean'].keys() if int(i) != 0 ] folder_all_classes_as_fg = base + "/" + temp_folder + "_allClasses" folder_per_class = base + "/" + temp_folder + "_perClass" if isdir(folder_all_classes_as_fg): shutil.rmtree(folder_all_classes_as_fg) if isdir(folder_per_class): shutil.rmtree(folder_per_class) # multiprocessing rules p = Pool(processes) assert isfile( base+"/"+ raw_subfolder_name+"/"+ "summary.json" ), "join(base, raw_subfolder_name) does not " \ "contain a summary.json" # these are all the files we will be dealing with fnames = subfiles(base + "/" + raw_subfolder_name, suffix=".nii.gz", join=False) # make output and temp dir if not os.path.isdir(folder_all_classes_as_fg): os.makedirs(folder_all_classes_as_fg) if not os.path.isdir(folder_per_class): os.makedirs(folder_per_class) if not os.path.isdir(base + "/" + final_subf_name): os.makedirs(base + "/" + final_subf_name) pp_results = {} pp_results['dc_per_class_raw'] = {} pp_results['dc_per_class_pp_all'] = { } # dice scores after treating all foreground classes as one pp_results['dc_per_class_pp_per_class'] = { } # dice scores after removing everything except larges cc # independently for each class after we already did dc_per_class_pp_all pp_results['for_which_classes'] = [] pp_results['min_valid_object_sizes'] = {} validation_result_raw = load_json(base + "/" + raw_subfolder_name + "/" + "summary.json")['results'] pp_results['num_samples'] = len(validation_result_raw['all']) validation_result_raw = validation_result_raw['mean'] if advanced_postprocessing: # first treat all foreground classes as one and remove all but the largest foreground connected component results = [] for f in fnames: predicted_segmentation = base + "/" + raw_subfolder_name + "/" + f # now remove all but the largest connected component for each class output_file = folder_all_classes_as_fg + "/" + f results.append( p.starmap_async(load_remove_save, ((predicted_segmentation, output_file, (classes, )), ))) results = [i.get() for i in results] # aggregate max_size_removed and min_size_kept max_size_removed = {} min_size_kept = {} for tmp in results: mx_rem, min_kept = tmp[0] for k in mx_rem: if mx_rem[k] is not None: if max_size_removed.get(k) is None: max_size_removed[k] = mx_rem[k] else: max_size_removed[k] = max(max_size_removed[k], mx_rem[k]) for k in min_kept: if min_kept[k] is not None: if min_size_kept.get(k) is None: min_size_kept[k] = min_kept[k] else: min_size_kept[k] = min(min_size_kept[k], min_kept[k]) print("foreground vs background, smallest valid object size was", min_size_kept[tuple(classes)]) print("removing only objects smaller than that...") else: min_size_kept = None # we need to rerun the step from above, now with the size constraint pred_gt_tuples = [] results = [] # first treat all foreground classes as one and remove all but the largest foreground connected component for f in fnames: predicted_segmentation = base + "/" + raw_subfolder_name + "/" + f # now remove all but the largest connected component for each class output_file = folder_all_classes_as_fg + "/" + f results.append( p.starmap_async(load_remove_save, ((predicted_segmentation, output_file, (classes, ), min_size_kept), ))) pred_gt_tuples.append([output_file, gt_labels_folder + "/" + f]) _ = [i.get() for i in results] # evaluate postprocessed predictions _ = aggregate_scores(pred_gt_tuples, labels=classes, json_output_file=folder_all_classes_as_fg + "/" + "summary.json", json_author="Fabian", num_threads=processes) # now we need to figure out if doing this improved the dice scores. We will implement that defensively in so far # that if a single class got worse as a result we won't do this. We can change this in the future but right now I # prefer to do it this way validation_result_PP_test = load_json(folder_all_classes_as_fg + "/" + "summary.json")['results']['mean'] for c in classes: dc_raw = validation_result_raw[str(c)]['Dice'] dc_pp = validation_result_PP_test[str(c)]['Dice'] pp_results['dc_per_class_raw'][str(c)] = dc_raw pp_results['dc_per_class_pp_all'][str(c)] = dc_pp # true if new is better do_fg_cc = False comp = [ pp_results['dc_per_class_pp_all'][str(cl)] > (pp_results['dc_per_class_raw'][str(cl)] + dice_threshold) for cl in classes ] before = np.mean( [pp_results['dc_per_class_raw'][str(cl)] for cl in classes]) after = np.mean( [pp_results['dc_per_class_pp_all'][str(cl)] for cl in classes]) print("Foreground vs background") print("before:", before) print("after: ", after) if any(comp): # at least one class improved - yay! # now check if another got worse # true if new is worse any_worse = any([ pp_results['dc_per_class_pp_all'][str(cl)] < pp_results['dc_per_class_raw'][str(cl)] for cl in classes ]) if not any_worse: pp_results['for_which_classes'].append(classes) if min_size_kept is not None: pp_results['min_valid_object_sizes'].update( deepcopy(min_size_kept)) do_fg_cc = True print( "Removing all but the largest foreground region improved results!" ) print('for_which_classes', classes) print('min_valid_object_sizes', min_size_kept) else: # did not improve things - don't do it pass if len(classes) > 1: # now depending on whether we do remove all but the largest foreground connected component we define the source dir # for the next one to be the raw or the temp dir if do_fg_cc: source = folder_all_classes_as_fg else: source = base + "/" + raw_subfolder_name if advanced_postprocessing: # now run this for each class separately results = [] for f in fnames: predicted_segmentation = source + "/" + f output_file = folder_per_class + "/" + f results.append( p.starmap_async( load_remove_save, ((predicted_segmentation, output_file, classes), ))) results = [i.get() for i in results] # aggregate max_size_removed and min_size_kept max_size_removed = {} min_size_kept = {} for tmp in results: mx_rem, min_kept = tmp[0] for k in mx_rem: if mx_rem[k] is not None: if max_size_removed.get(k) is None: max_size_removed[k] = mx_rem[k] else: max_size_removed[k] = max(max_size_removed[k], mx_rem[k]) for k in min_kept: if min_kept[k] is not None: if min_size_kept.get(k) is None: min_size_kept[k] = min_kept[k] else: min_size_kept[k] = min(min_size_kept[k], min_kept[k]) print( "classes treated separately, smallest valid object sizes are") print(min_size_kept) print("removing only objects smaller than that") else: min_size_kept = None # rerun with the size thresholds from above pred_gt_tuples = [] results = [] for f in fnames: predicted_segmentation = source + "/" + f output_file = folder_per_class + "/" + f results.append( p.starmap_async(load_remove_save, ((predicted_segmentation, output_file, classes, min_size_kept), ))) pred_gt_tuples.append([output_file, gt_labels_folder + "/" + f]) _ = [i.get() for i in results] # evaluate postprocessed predictions _ = aggregate_scores(pred_gt_tuples, labels=classes, json_output_file=folder_per_class + "/" + "summary.json", json_author="Fabian", num_threads=processes) if do_fg_cc: old_res = deepcopy(validation_result_PP_test) else: old_res = validation_result_raw # these are the new dice scores validation_result_PP_test = load_json( folder_per_class + "/" + "summary.json")['results']['mean'] for c in classes: dc_raw = old_res[str(c)]['Dice'] dc_pp = validation_result_PP_test[str(c)]['Dice'] pp_results['dc_per_class_pp_per_class'][str(c)] = dc_pp print(c) print("before:", dc_raw) print("after: ", dc_pp) if dc_pp > (dc_raw + dice_threshold): pp_results['for_which_classes'].append(int(c)) if min_size_kept is not None: pp_results['min_valid_object_sizes'].update( {c: min_size_kept[c]}) print( "Removing all but the largest region for class %d improved results!" % c) print('min_valid_object_sizes', min_size_kept) else: print( "Only one class present, no need to do each class separately as this is covered in fg vs bg" ) if not advanced_postprocessing: pp_results['min_valid_object_sizes'] = None print("done") print("for which classes:") print(pp_results['for_which_classes']) print("min_object_sizes") print(pp_results['min_valid_object_sizes']) pp_results['validation_raw'] = raw_subfolder_name pp_results['validation_final'] = final_subf_name # now that we have a proper for_which_classes, apply that pred_gt_tuples = [] results = [] for f in fnames: predicted_segmentation = base + "/" + raw_subfolder_name + "/" + f # now remove all but the largest connected component for each class output_file = base + "/" + final_subf_name + "/" + f results.append( p.starmap_async(load_remove_save, ((predicted_segmentation, output_file, pp_results['for_which_classes'], pp_results['min_valid_object_sizes']), ))) pred_gt_tuples.append([output_file, gt_labels_folder + "/" + f]) _ = [i.get() for i in results] # evaluate postprocessed predictions _ = aggregate_scores(pred_gt_tuples, labels=classes, json_output_file=base + "/" + final_subf_name + "/" + "summary.json", json_author="Fabian", num_threads=processes) pp_results['min_valid_object_sizes'] = str( pp_results['min_valid_object_sizes']) save_json(pp_results, base + "/" + pp_filename) # delete temp if not debug: shutil.rmtree(folder_per_class) shutil.rmtree(folder_all_classes_as_fg) p.close() p.join() print("done")
def validate(self, do_mirroring=True, use_train_mode=False, tiled=True, step=2, save_softmax=True, use_gaussian=True, validation_folder_name='validation'): """ :param do_mirroring: :param use_train_mode: :param mirror_axes: :param tiled: :param tile_in_z: :param step: :param use_nifti: :param save_softmax: :param use_gaussian: :param use_temporal_models: :return: """ assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)" if self.dataset_val is None: self.load_dataset() self.do_split() output_folder = join(self.output_folder, validation_folder_name) maybe_mkdir_p(output_folder) if do_mirroring: mirror_axes = self.data_aug_params['mirror_axes'] else: mirror_axes = () pred_gt_tuples = [] process_manager = Pool(2) results = [] for k in self.dataset_val.keys(): properties = self.dataset[k]['properties'] data = np.load(self.dataset[k]['data_file'])['data'] # concat segmentation of previous step seg_from_prev_stage = np.load(join(self.folder_with_segs_from_prev_stage, k + "_segFromPrevStage.npz"))['data'][None] transpose_forward = self.plans.get('transpose_forward') if transpose_forward is not None: data = data.transpose([0] + [i+1 for i in transpose_forward]) seg_from_prev_stage = seg_from_prev_stage.transpose([0] + [i+1 for i in transpose_forward]) print(data.shape) data[-1][data[-1] == -1] = 0 data_for_net = np.concatenate((data[:-1], to_one_hot(seg_from_prev_stage[0], range(1, self.num_classes)))) softmax_pred = self.predict_preprocessing_return_softmax(data_for_net, do_mirroring, 1, use_train_mode, 1, mirror_axes, tiled, True, step, self.patch_size, use_gaussian=use_gaussian) if transpose_forward is not None: transpose_backward = self.plans.get('transpose_backward') softmax_pred = softmax_pred.transpose([0] + [i+1 for i in transpose_backward]) fname = properties['list_of_data_files'][0].split("/")[-1][:-12] if save_softmax: softmax_fname = join(output_folder, fname + ".npz") else: softmax_fname = None if np.prod(softmax_pred.shape) > (2e9 / 4 * 0.9): # *0.9 just to be save np.save(fname + ".npy", softmax_pred) softmax_pred = fname + ".npy" results.append(process_manager.starmap_async(store_seg_from_softmax, ((softmax_pred, join(output_folder, fname + ".nii.gz"), properties, 1, None, None, None, softmax_fname, None), ) ) ) pred_gt_tuples.append([join(output_folder, fname + ".nii.gz"), join(self.gt_niftis_folder, fname + ".nii.gz")]) _ = [i.get() for i in results] task = self.dataset_directory.split("/")[-1] job_name = self.experiment_name _ = aggregate_scores(pred_gt_tuples, labels=list(range(self.num_classes)), json_output_file=join(output_folder, "summary.json"), json_name=job_name, json_author="Fabian", json_description="", json_task=task)
def validate(self, do_mirroring: bool = True, use_sliding_window: bool = True, step_size: float = 0.5, save_softmax: bool = True, use_gaussian: bool = True, overwrite: bool = True, validation_folder_name: str = 'validation_raw', debug: bool = False, all_in_gpu: bool = False, segmentation_export_kwargs: dict = None): assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)" current_mode = self.network.training self.network.eval() # save whether network is in deep supervision mode or not ds = self.network.do_ds # disable deep supervision self.network.do_ds = False if segmentation_export_kwargs is None: if 'segmentation_export_params' in self.plans.keys(): force_separate_z = self.plans['segmentation_export_params'][ 'force_separate_z'] interpolation_order = self.plans['segmentation_export_params'][ 'interpolation_order'] interpolation_order_z = self.plans[ 'segmentation_export_params']['interpolation_order_z'] else: force_separate_z = None interpolation_order = 1 interpolation_order_z = 0 else: force_separate_z = segmentation_export_kwargs['force_separate_z'] interpolation_order = segmentation_export_kwargs[ 'interpolation_order'] interpolation_order_z = segmentation_export_kwargs[ 'interpolation_order_z'] if self.dataset_val is None: self.load_dataset() self.do_split() output_folder = join(self.output_folder, validation_folder_name) maybe_mkdir_p(output_folder) # this is for debug purposes my_input_args = { 'do_mirroring': do_mirroring, 'use_sliding_window': use_sliding_window, 'step': step_size, 'save_softmax': save_softmax, 'use_gaussian': use_gaussian, 'overwrite': overwrite, 'validation_folder_name': validation_folder_name, 'debug': debug, 'all_in_gpu': all_in_gpu, 'segmentation_export_kwargs': segmentation_export_kwargs, } save_json(my_input_args, join(output_folder, "validation_args.json")) if do_mirroring: if not self.data_aug_params['do_mirror']: raise RuntimeError( "We did not train with mirroring so you cannot do inference with mirroring enabled" ) mirror_axes = self.data_aug_params['mirror_axes'] else: mirror_axes = () pred_gt_tuples = [] export_pool = Pool(default_num_threads) results = [] for k in self.dataset_val.keys(): properties = load_pickle(self.dataset[k]['properties_file']) fname = properties['list_of_data_files'][0].split("/")[-1][:-12] if overwrite or (not isfile(join(output_folder, fname + ".nii.gz"))) or \ (save_softmax and not isfile(join(output_folder, fname + ".npz"))): data = np.load(self.dataset[k]['data_file'])['data'] # concat segmentation of previous step seg_from_prev_stage = np.load( join(self.folder_with_segs_from_prev_stage, k + "_segFromPrevStage.npz"))['data'][None] print(k, data.shape) data[-1][data[-1] == -1] = 0 data_for_net = np.concatenate( (data[:-1], to_one_hot(seg_from_prev_stage[0], range(1, self.num_classes)))) softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax( data_for_net, do_mirroring=do_mirroring, mirror_axes=mirror_axes, use_sliding_window=use_sliding_window, step_size=step_size, use_gaussian=use_gaussian, all_in_gpu=all_in_gpu, mixed_precision=self.fp16)[1] softmax_pred = softmax_pred.transpose( [0] + [i + 1 for i in self.transpose_backward]) if save_softmax: softmax_fname = join(output_folder, fname + ".npz") else: softmax_fname = None if np.prod(softmax_pred.shape) > ( 2e9 / 4 * 0.85): # *0.85 just to be save np.save(join(output_folder, fname + ".npy"), softmax_pred) softmax_pred = join(output_folder, fname + ".npy") results.append( export_pool.starmap_async( save_segmentation_nifti_from_softmax, ((softmax_pred, join(output_folder, fname + ".nii.gz"), properties, interpolation_order, None, None, None, softmax_fname, None, force_separate_z, interpolation_order_z), ))) pred_gt_tuples.append([ join(output_folder, fname + ".nii.gz"), join(self.gt_niftis_folder, fname + ".nii.gz") ]) _ = [i.get() for i in results] self.print_to_log_file("finished prediction") # evaluate raw predictions self.print_to_log_file("evaluation of raw predictions") task = self.dataset_directory.split("/")[-1] job_name = self.experiment_name _ = aggregate_scores( pred_gt_tuples, labels=list(range(self.num_classes)), json_output_file=join(output_folder, "summary.json"), json_name=job_name + " val tiled %s" % (str(use_sliding_window)), json_task=task, num_threads=default_num_threads) self.print_to_log_file("determining postprocessing") determine_postprocessing(self.output_folder, self.gt_niftis_folder, validation_folder_name, final_subf_name=validation_folder_name + "_postprocessed", debug=debug) gt_nifti_folder = join(self.output_folder_base, "gt_niftis") maybe_mkdir_p(gt_nifti_folder) for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"): success = False attempts = 0 e = None while not success and attempts < 10: try: shutil.copy(f, gt_nifti_folder) success = True except OSError as e: attempts += 1 sleep(1) if not success: print("Could not copy gt nifti file %s into folder %s" % (f, gt_nifti_folder)) if e is not None: raise e # restore network deep supervision mode self.network.train(current_mode) self.network.do_ds = ds
def display(users, top=15, save=False, refresh=False): DiskSpaceUsage.OWNERS = users DiskSpaceUsage.TOP_COUNT = top if top else 15 DiskSpaceUsage.FORCE_REFRESH = refresh # Create/open the cache current_folder = os.path.dirname(os.path.realpath(__file__)) cache_folder = os.path.join(current_folder, "cache") cache = FanoutCache(shards=6, directory=cache_folder) # All experiments all_experiments = list( itertools.chain(*(Experiment.get(query_criteria=QueryCriteria(). where(["owner={}".format(owner)])) for owner in DiskSpaceUsage.OWNERS))) all_experiments_len = len(all_experiments) # Create the pool of worker p = Pool(6) r = p.starmap_async(DiskSpaceUsage.get_experiment_info, itertools.product(all_experiments, (cache, ))) p.close() print("Analyzing disk space for:") print(" | {} experiments".format(all_experiments_len)) print(" | Users: {}".format(", ".join(DiskSpaceUsage.OWNERS))) # Wait for completion and display progress sys.stdout.write( " | Experiment analyzed: 0/{}".format(all_experiments_len)) sys.stdout.flush() # While we are analyzing, display the status while not r.ready(): # Estimate how many remaining we have. This is just an estimations and needs to be bounded remaining = max( 0, min(all_experiments_len, r._number_left * r._chunksize)) sys.stdout.write("\r {} Experiment analyzed: {}/{}".format( next(animation), all_experiments_len - remaining, all_experiments_len)) sys.stdout.flush() time.sleep(.5) sys.stdout.write("\r | Experiment analyzed: {}/{}".format( all_experiments_len, all_experiments_len)) sys.stdout.flush() # Get all the results experiments_info = [ cache.get(e.id) for e in all_experiments if cache.get(e.id) ] cache.close() # Display print("\n\n---------------------------") DiskSpaceUsage.top_count_experiments(experiments_info) print("\n---------------------------") DiskSpaceUsage.total_size_per_user(experiments_info) print("\n---------------------------") DiskSpaceUsage.top_count_experiments_per_user(experiments_info) # save to a csv file if save: DiskSpaceUsage.save_to_file(experiments_info)
def validate(self, do_mirroring=True, use_train_mode=False, tiled=True, step=2, save_softmax=True, use_gaussian=True, validation_folder_name='validation'): """ :param do_mirroring: :param use_train_mode: :param mirror_axes: :param tiled: :param tile_in_z: :param step: :param use_nifti: :param save_softmax: :param use_gaussian: :param use_temporal_models: :return: """ assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)" if self.dataset_val is None: self.load_dataset() self.do_split() output_folder = join(self.output_folder, validation_folder_name) maybe_mkdir_p(output_folder) if do_mirroring: mirror_axes = self.data_aug_params['mirror_axes'] else: mirror_axes = () pred_gt_tuples = [] process_manager = Pool(2) results = [] transpose_backward = self.plans.get('transpose_backward') for k in self.dataset_val.keys(): properties = self.dataset[k]['properties'] data = np.load(self.dataset[k]['data_file'])['data'] # concat segmentation of previous step seg_from_prev_stage = np.load(join(self.folder_with_segs_from_prev_stage, k + "_segFromPrevStage.npz"))['data'][None] print(data.shape) data[-1][data[-1] == -1] = 0 data_for_net = np.concatenate((data[:-1], to_one_hot(seg_from_prev_stage[0], range(1, self.num_classes)))) softmax_pred = self.predict_preprocessed_data_return_softmax(data_for_net, do_mirroring, 1, use_train_mode, 1, mirror_axes, tiled, True, step, self.patch_size, use_gaussian=use_gaussian) if transpose_backward is not None: transpose_backward = self.plans.get('transpose_backward') softmax_pred = softmax_pred.transpose([0] + [i + 1 for i in transpose_backward]) fname = properties['list_of_data_files'][0].split("/")[-1][:-12] if save_softmax: softmax_fname = join(output_folder, fname + ".npz") else: softmax_fname = None """There is a problem with python process communication that prevents us from communicating obejcts larger than 2 GB between processes (basically when the length of the pickle string that will be sent is communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either filename or np.ndarray and will handle this automatically""" if np.prod(softmax_pred.shape) > (2e9 / 4 * 0.9): # *0.9 just to be save np.save(fname + ".npy", softmax_pred) softmax_pred = fname + ".npy" results.append(process_manager.starmap_async(save_segmentation_nifti_from_softmax, ((softmax_pred, join(output_folder, fname + ".nii.gz"), properties, 1, None, None, None, softmax_fname, None), ) ) ) pred_gt_tuples.append([join(output_folder, fname + ".nii.gz"), join(self.gt_niftis_folder, fname + ".nii.gz")]) _ = [i.get() for i in results] task = self.dataset_directory.split("/")[-1] job_name = self.experiment_name _ = aggregate_scores(pred_gt_tuples, labels=list(range(self.num_classes)), json_output_file=join(output_folder, "summary.json"), json_name=job_name, json_author="Fabian", json_description="", json_task=task)
def analyze(self): # Clear the cache self.cache.clear() # Start the timer start_time = time.time() # If no analyzers -> quit if not all((self.analyzers, self.simulations)): print("No analyzers or experiments selected, exiting...") return # If any of the analyzer needs the dir map, create it if any(a.need_dir_map for a in self.analyzers if hasattr(a, 'need_dir_map')): # preload the global dir map from simtools.Utilities.SimulationDirectoryMap import SimulationDirectoryMap for experiment in self.experiments: SimulationDirectoryMap.preload_experiment(experiment) # Run the per experiment on the analyzers for exp in self.experiments: for a in self.analyzers: a.per_experiment(exp) scount = len(self.simulations) max_threads = min(self.max_threads, scount if scount != 0 else 1) # Display some info if self.verbose: print("Analyze Manager") print(" | {} simulation{} - {} experiment{}".format( scount, pluralize(scount), len(self.experiments), pluralize(self.experiments))) print(" | force_analyze is {} and {} simulation{} ignored".format( on_off(self.force_analyze), len(self.ignored_simulations), pluralize(self.ignored_simulations))) print(" | Analyzer{}: ".format(pluralize(self.analyzers))) for a in self.analyzers: print( " | - {} (Directory map: {} / File parsing: {} / Use cache: {})" .format(a.uid, on_off(a.need_dir_map), on_off(a.parse), on_off(hasattr(a, "cache")))) print(" | Pool of {} analyzing processes".format(max_threads)) pool = Pool(max_threads) if scount == 0 and self.verbose: print("No experiments/simulations for analysis.") else: results = pool.starmap_async( retrieve_data, itertools.product(self.simulations.values(), (self.analyzers, ), (self.cache, ))) while not results.ready(): self._check_exception() time_elapsed = time.time() - start_time if self.verbose: sys.stdout.write( "\r {} Analyzing {}/{}... {} elapsed".format( next(animation), len(self.cache), scount, verbose_timedelta(time_elapsed))) sys.stdout.flush() if time_elapsed > ANALYZE_TIMEOUT: raise Exception( "Timeout while waiting the analysis to complete...") time.sleep(WAIT_TIME) results.get() # At this point we have all our results # Give to the analyzer finalize_results = {} for a in self.analyzers: analyzer_data = {} for key in self.cache: if key == EXCEPTION_KEY: continue # Retrieve the cache content and the simulation object sim_cache = self.cache.get(key) simulation_obj = self.simulations[key] # Give to the analyzer analyzer_data[simulation_obj] = sim_cache[ a.uid] if sim_cache and a.uid in sim_cache else None finalize_results[a.uid] = pool.apply_async(a.finalize, (analyzer_data, )) pool.close() pool.join() for a in self.analyzers: a.results = finalize_results[a.uid].get() if self.verbose: total_time = time.time() - start_time print( "\r | Analysis done. Took {} (~ {:.3f} per simulation)".format( verbose_timedelta(total_time), total_time / scount if scount != 0 else 0))
def validate(self, do_mirroring=True, use_train_mode=False, tiled=True, step=2, save_softmax=True, use_gaussian=True, overwrite=True, validation_folder_name="validation_raw", debug=False): """ :param do_mirroring: :param use_train_mode: :param mirror_axes: :param tiled: :param tile_in_z: :param step: :param use_nifti: :param save_softmax: :param use_gaussian: :param use_temporal_models: :return: """ assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)" if self.dataset_val is None: self.load_dataset() self.do_split() output_folder = join(self.output_folder, validation_folder_name) maybe_mkdir_p(output_folder) if do_mirroring: mirror_axes = self.data_aug_params['mirror_axes'] else: mirror_axes = () pred_gt_tuples = [] process_manager = Pool(2) results = [] transpose_backward = self.plans.get('transpose_backward') for k in self.dataset_val.keys(): properties = self.dataset[k]['properties'] data = np.load(self.dataset[k]['data_file'])['data'] # concat segmentation of previous step seg_from_prev_stage = np.load( join(self.folder_with_segs_from_prev_stage, k + "_segFromPrevStage.npz"))['data'][None] print(data.shape) data[-1][data[-1] == -1] = 0 data_for_net = np.concatenate( (data[:-1], to_one_hot(seg_from_prev_stage[0], range(1, self.num_classes)))) softmax_pred = self.predict_preprocessed_data_return_softmax( data_for_net, do_mirroring, 1, use_train_mode, 1, mirror_axes, tiled, True, step, self.patch_size, use_gaussian=use_gaussian) if transpose_backward is not None: transpose_backward = self.plans.get('transpose_backward') softmax_pred = softmax_pred.transpose( [0] + [i + 1 for i in transpose_backward]) fname = properties['list_of_data_files'][0].split("/")[-1][:-12] if save_softmax: softmax_fname = join(output_folder, fname + ".npz") else: softmax_fname = None """There is a problem with python process communication that prevents us from communicating obejcts larger than 2 GB between processes (basically when the length of the pickle string that will be sent is communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either filename or np.ndarray and will handle this automatically""" if np.prod(softmax_pred.shape) > (2e9 / 4 * 0.85): # *0.85 just to be save np.save(fname + ".npy", softmax_pred) softmax_pred = fname + ".npy" results.append( process_manager.starmap_async( save_segmentation_nifti_from_softmax, ((softmax_pred, join( output_folder, fname + ".nii.gz"), properties, 3, None, None, None, softmax_fname, None), ))) pred_gt_tuples.append([ join(output_folder, fname + ".nii.gz"), join(self.gt_niftis_folder, fname + ".nii.gz") ]) _ = [i.get() for i in results] task = self.dataset_directory.split("/")[-1] job_name = self.experiment_name _ = aggregate_scores(pred_gt_tuples, labels=list(range(self.num_classes)), json_output_file=join(output_folder, "summary.json"), json_name=job_name, json_author="Fabian", json_description="", json_task=task) # in the old nnunet we would stop here. Now we add a postprocessing. This postprocessing can remove everything # except the largest connected component for each class. To see if this improves results, we do this for all # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will # have this applied during inference as well self.print_to_log_file("determining postprocessing") determine_postprocessing(self.output_folder, self.gt_niftis_folder, validation_folder_name, final_subf_name=validation_folder_name + "_postprocessed", debug=debug) # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed" # They are always in that folder, even if no postprocessing as applied! # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to # be used later gt_nifti_folder = join(self.output_folder_base, "gt_niftis") maybe_mkdir_p(gt_nifti_folder) for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"): success = False attempts = 0 while not success and attempts < 10: try: shutil.copy(f, gt_nifti_folder) success = True except OSError: attempts += 1 sleep(1)
def validate(self, do_mirroring: bool = True, use_sliding_window: bool = True, step_size: float = 0.5, save_softmax: bool = True, use_gaussian: bool = True, overwrite: bool = True, validation_folder_name: str = 'validation_raw', debug: bool = False, all_in_gpu: bool = False, segmentation_export_kwargs: dict = None): assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)" current_mode = self.network.training self.network.eval() # save whether network is in deep supervision mode or not ds = self.network.do_ds # disable deep supervision self.network.do_ds = False if segmentation_export_kwargs is None: if 'segmentation_export_params' in self.plans.keys(): force_separate_z = self.plans['segmentation_export_params'][ 'force_separate_z'] interpolation_order = self.plans['segmentation_export_params'][ 'interpolation_order'] interpolation_order_z = self.plans[ 'segmentation_export_params']['interpolation_order_z'] else: force_separate_z = None interpolation_order = 1 interpolation_order_z = 0 else: force_separate_z = segmentation_export_kwargs['force_separate_z'] interpolation_order = segmentation_export_kwargs[ 'interpolation_order'] interpolation_order_z = segmentation_export_kwargs[ 'interpolation_order_z'] if self.dataset_val is None: self.load_dataset() self.do_split() output_folder = join(self.output_folder, validation_folder_name) maybe_mkdir_p(output_folder) # this is for debug purposes my_input_args = { 'do_mirroring': do_mirroring, 'use_sliding_window': use_sliding_window, 'step': step_size, 'save_softmax': save_softmax, 'use_gaussian': use_gaussian, 'overwrite': overwrite, 'validation_folder_name': validation_folder_name, 'debug': debug, 'all_in_gpu': all_in_gpu, 'segmentation_export_kwargs': segmentation_export_kwargs, } save_json(my_input_args, join(output_folder, "validation_args.json")) if do_mirroring: if not self.data_aug_params['do_mirror']: raise RuntimeError( "We did not train with mirroring so you cannot do inference with mirroring enabled" ) mirror_axes = self.data_aug_params['mirror_axes'] else: mirror_axes = () pred_gt_tuples = [] export_pool = Pool(default_num_threads) results = [] for k in self.dataset_val.keys(): properties = load_pickle(self.dataset[k]['properties_file']) fname = properties['list_of_data_files'][0].split("/")[-1][:-12] if overwrite or (not isfile(join(output_folder, fname + ".nii.gz"))) or \ (save_softmax and not isfile(join(output_folder, fname + ".npz"))): data = np.load(self.dataset[k]['data_file'])['data'] # concat segmentation of previous step seg_from_prev_stage = np.load( join(self.folder_with_segs_from_prev_stage, k + "_segFromPrevStage.npz"))['data'][None] print(k, data.shape) data[-1][data[-1] == -1] = 0 data_for_net = np.concatenate( (data[:-1], to_one_hot(seg_from_prev_stage[0], range(1, self.num_classes)))) softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax( data_for_net, do_mirroring, mirror_axes, use_sliding_window, step_size, use_gaussian, all_in_gpu=all_in_gpu, mixed_precision=self.fp16)[1] softmax_pred = softmax_pred.transpose( [0] + [i + 1 for i in self.transpose_backward]) if save_softmax: softmax_fname = join(output_folder, fname + ".npz") else: softmax_fname = None """There is a problem with python process communication that prevents us from communicating obejcts larger than 2 GB between processes (basically when the length of the pickle string that will be sent is communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either filename or np.ndarray and will handle this automatically""" if np.prod(softmax_pred.shape) > ( 2e9 / 4 * 0.85): # *0.85 just to be save np.save(join(output_folder, fname + ".npy"), softmax_pred) softmax_pred = join(output_folder, fname + ".npy") results.append( export_pool.starmap_async( save_segmentation_nifti_from_softmax, ((softmax_pred, join(output_folder, fname + ".nii.gz"), properties, interpolation_order, None, None, None, softmax_fname, None, force_separate_z, interpolation_order_z), ))) pred_gt_tuples.append([ join(output_folder, fname + ".nii.gz"), join(self.gt_niftis_folder, fname + ".nii.gz") ]) _ = [i.get() for i in results] self.print_to_log_file("finished prediction") # evaluate raw predictions self.print_to_log_file("evaluation of raw predictions") task = self.dataset_directory.split("/")[-1] job_name = self.experiment_name _ = aggregate_scores( pred_gt_tuples, labels=list(range(self.num_classes)), json_output_file=join(output_folder, "summary.json"), json_name=job_name + " val tiled %s" % (str(use_sliding_window)), json_author="Fabian", json_task=task, num_threads=default_num_threads) # in the old nnunet we would stop here. Now we add a postprocessing. This postprocessing can remove everything # except the largest connected component for each class. To see if this improves results, we do this for all # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will # have this applied during inference as well self.print_to_log_file("determining postprocessing") determine_postprocessing(self.output_folder, self.gt_niftis_folder, validation_folder_name, final_subf_name=validation_folder_name + "_postprocessed", debug=debug) # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed" # They are always in that folder, even if no postprocessing as applied! # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to # be used later gt_nifti_folder = join(self.output_folder_base, "gt_niftis") maybe_mkdir_p(gt_nifti_folder) for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"): success = False attempts = 0 e = None while not success and attempts < 10: try: shutil.copy(f, gt_nifti_folder) success = True except OSError as e: attempts += 1 sleep(1) if not success: print("Could not copy gt nifti file %s into folder %s" % (f, gt_nifti_folder)) if e is not None: raise e # restore network deep supervision mode self.network.train(current_mode) self.network.do_ds = ds
def validate(self, do_mirroring: bool = True, use_sliding_window: bool = True, step_size: float = 0.5, save_softmax: bool = True, use_gaussian: bool = True, overwrite: bool = True, validation_folder_name: str = 'validation_raw', debug: bool = False, all_in_gpu: bool = False, segmentation_export_kwargs: dict = None, run_postprocessing_on_folds: bool = True): current_mode = self.network.training self.network.eval() assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)" if self.dataset_val is None: self.load_dataset() self.do_split() if segmentation_export_kwargs is None: if 'segmentation_export_params' in self.plans.keys(): force_separate_z = self.plans['segmentation_export_params'][ 'force_separate_z'] interpolation_order = self.plans['segmentation_export_params'][ 'interpolation_order'] interpolation_order_z = self.plans[ 'segmentation_export_params']['interpolation_order_z'] else: force_separate_z = None interpolation_order = 1 interpolation_order_z = 0 else: force_separate_z = segmentation_export_kwargs['force_separate_z'] interpolation_order = segmentation_export_kwargs[ 'interpolation_order'] interpolation_order_z = segmentation_export_kwargs[ 'interpolation_order_z'] output_folder = self.output_folder + "/" + validation_folder_name if not os.path.isdir(output_folder): os.makedirs(output_folder) if do_mirroring: mirror_axes = self.data_aug_params['mirror_axes'] else: mirror_axes = () pred_gt_tuples = [] export_pool = Pool(2) results = [] transpose_backward = self.plans.get('transpose_backward') for k in self.dataset_val.keys(): properties = load_pickle(self.dataset[k]['properties_file']) data = np.load(self.dataset[k]['data_file'])['data'] # concat segmentation of previous step seg_from_prev_stage = np.load( self.folder_with_segs_from_prev_stage + "/" + k + "_segFromPrevStage.npz")['data'][None] print(data.shape) data[-1][data[-1] == -1] = 0 data_for_net = np.concatenate( (data[:-1], to_one_hot(seg_from_prev_stage[0], range(1, self.num_classes)))) softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax( data_for_net, do_mirroring=do_mirroring, mirror_axes=mirror_axes, use_sliding_window=use_sliding_window, step_size=step_size, use_gaussian=use_gaussian, all_in_gpu=all_in_gpu, mixed_precision=self.fp16)[1] if transpose_backward is not None: transpose_backward = self.plans.get('transpose_backward') softmax_pred = softmax_pred.transpose( [0] + [i + 1 for i in transpose_backward]) fname = properties['list_of_data_files'][0].split("/")[-1][:-12] if save_softmax: softmax_fname = output_folder + "/" + fname + ".npz" else: softmax_fname = None """There is a problem with python process communication that prevents us from communicating obejcts larger than 2 GB between processes (basically when the length of the pickle string that will be sent is communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either filename or np.ndarray and will handle this automatically""" if np.prod(softmax_pred.shape) > (2e9 / 4 * 0.85): # *0.85 just to be save np.save(fname + ".npy", softmax_pred) softmax_pred = fname + ".npy" results.append( export_pool.starmap_async( save_segmentation_nifti_from_softmax, ((softmax_pred, output_folder + "/" + fname + ".nii.gz", properties, interpolation_order, self.regions_class_order, None, None, softmax_fname, None, force_separate_z, interpolation_order_z), ))) pred_gt_tuples.append([ output_folder + "/" + fname + ".nii.gz", self.gt_niftis_folder + "/" + fname + ".nii.gz" ]) _ = [i.get() for i in results] task = self.dataset_directory.split("/")[-1] job_name = self.experiment_name _ = aggregate_scores(pred_gt_tuples, labels=list(range(self.num_classes)), json_output_file=output_folder + "/" + "summary.json", json_name=job_name, json_author="Fabian", json_description="", json_task=task) if run_postprocessing_on_folds: # in the old tuframework we would stop here. Now we add a postprocessing. This postprocessing can remove everything # except the largest connected component for each class. To see if this improves results, we do this for all # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will # have this applied during inference as well self.print_to_log_file("determining postprocessing") determine_postprocessing(self.output_folder, self.gt_niftis_folder, validation_folder_name, final_subf_name=validation_folder_name + "_postprocessed", debug=debug) # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed" # They are always in that folder, even if no postprocessing as applied! # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to # be used later gt_nifti_folder = self.output_folder_base + "/" + "gt_niftis" if not os.path.isdir(gt_nifti_folder): os.makedirs(gt_nifti_folder) for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"): success = False attempts = 0 while not success and attempts < 10: try: shutil.copy(f, gt_nifti_folder) success = True except OSError: attempts += 1 sleep(1) self.network.train(current_mode) export_pool.close() export_pool.join()