def run_cropping(self, list_of_files, overwrite_existing=False, output_folder=None): """ also copied ground truth nifti segmentation into the preprocessed folder so that we can use them for evaluation on the cluster :param list_of_files: list of list of files [[PATIENTID_TIMESTEP_0000.nii.gz], [PATIENTID_TIMESTEP_0000.nii.gz]] :param overwrite_existing: :param output_folder: :return: """ if output_folder is not None: self.output_folder = output_folder output_folder_gt = os.path.join(self.output_folder, "gt_segmentations") maybe_mkdir_p(output_folder_gt) for j, case in enumerate(list_of_files): if case[-1] is not None: shutil.copy(case[-1], output_folder_gt) list_of_args = [] for j, case in enumerate(list_of_files): case_identifier = get_case_identifier(case) list_of_args.append((case, case_identifier, overwrite_existing)) p = Pool(self.num_threads) p.map(self._load_crop_save_star, list_of_args) p.close() p.join()
def apply_postprocessing_to_folder(input_folder: str, output_folder: str, for_which_classes: list, min_valid_object_size: dict = None, num_processes=8): """ applies removing of all but the largest connected component to all niftis in a folder :param min_valid_object_size: :param min_valid_object_size: :param input_folder: :param output_folder: :param for_which_classes: :param num_processes: :return: """ maybe_mkdir_p(output_folder) p = Pool(num_processes) nii_files = subfiles(input_folder, suffix=".nii.gz", join=False) input_files = [join(input_folder, i) for i in nii_files] out_files = [join(output_folder, i) for i in nii_files] results = p.starmap_async( load_remove_save, zip(input_files, out_files, [for_which_classes] * len(input_files), [min_valid_object_size] * len(input_files))) res = results.get() p.close() p.join()
def run(self, target_spacings, input_folder_with_cropped_npz, output_folder, data_identifier, num_threads=default_num_threads, force_separate_z=None): """ :param target_spacings: list of lists [[1.25, 1.25, 5]] :param input_folder_with_cropped_npz: dim: c, x, y, z | npz_file['data'] np.savez_compressed(fname.npz, data=arr) :param output_folder: :param num_threads: :param force_separate_z: None :return: """ print("Initializing to run preprocessing") print("npz folder:", input_folder_with_cropped_npz) print("output_folder:", output_folder) list_of_cropped_npz_files = subfiles(input_folder_with_cropped_npz, True, None, ".npz", True) maybe_mkdir_p(output_folder) num_stages = len(target_spacings) if not isinstance(num_threads, (list, tuple, np.ndarray)): num_threads = [num_threads] * num_stages assert len(num_threads) == num_stages for i in range(num_stages): all_args = [] output_folder_stage = os.path.join(output_folder, data_identifier + "_stage%d" % i) maybe_mkdir_p(output_folder_stage) spacing = target_spacings[i] for j, case in enumerate(list_of_cropped_npz_files): case_identifier = get_case_identifier_from_npz(case) args = spacing, case_identifier, output_folder_stage, input_folder_with_cropped_npz, force_separate_z all_args.append(args) p = Pool(num_threads[i]) p.map(self._run_star, all_args) p.close() p.join()
def split_4d(input_folder, num_processes=default_num_threads, overwrite_task_output_id=None): assert isdir(join(input_folder, "imagesTr")) and isdir(join(input_folder, "labelsTr")) and \ isfile(join(input_folder, "dataset.json")), \ "The input folder must be a valid Task folder from the Medical Segmentation Decathlon with at least the " \ "imagesTr and labelsTr subfolders and the dataset.json file" while input_folder.endswith("/"): input_folder = input_folder[:-1] full_task_name = input_folder.split("/")[-1] assert full_task_name.startswith( "Task" ), "The input folder must point to a folder that starts with TaskXX_" first_underscore = full_task_name.find("_") assert first_underscore == 6, "Input folder start with TaskXX with XX being a 3-digit id: 00, 01, 02 etc" input_task_id = int(full_task_name[4:6]) if overwrite_task_output_id is None: overwrite_task_output_id = input_task_id task_name = full_task_name[7:] output_folder = join(nnUNet_raw_data, "Task%03.0d_" % overwrite_task_output_id + task_name) if isdir(output_folder): shutil.rmtree(output_folder) files = [] output_dirs = [] maybe_mkdir_p(output_folder) for subdir in ["imagesTr", "imagesTs"]: curr_out_dir = join(output_folder, subdir) if not isdir(curr_out_dir): os.mkdir(curr_out_dir) curr_dir = join(input_folder, subdir) nii_files = [ join(curr_dir, i) for i in os.listdir(curr_dir) if i.endswith(".nii.gz") ] nii_files.sort() for n in nii_files: files.append(n) output_dirs.append(curr_out_dir) shutil.copytree(join(input_folder, "labelsTr"), join(output_folder, "labelsTr")) p = Pool(num_processes) p.starmap(split_4d_nifti, zip(files, output_dirs)) p.close() p.join() shutil.copy(join(input_folder, "dataset.json"), output_folder)
def plan_and_preprocess(task_string, processes_lowres=default_num_threads, processes_fullres=3, no_preprocessing=False): from nnunet.experiment_planning.experiment_planner_baseline_2DUNet import ExperimentPlanner2D from nnunet.experiment_planning.experiment_planner_baseline_3DUNet import ExperimentPlanner preprocessing_output_dir_this_task_train = join(preprocessing_output_dir, task_string) cropped_out_dir = join(nnUNet_cropped_data, task_string) maybe_mkdir_p(preprocessing_output_dir_this_task_train) shutil.copy(join(cropped_out_dir, "dataset_properties.pkl"), preprocessing_output_dir_this_task_train) shutil.copy(join(nnUNet_raw_data, task_string, "dataset.json"), preprocessing_output_dir_this_task_train) exp_planner = ExperimentPlanner(cropped_out_dir, preprocessing_output_dir_this_task_train) exp_planner.plan_experiment() if not no_preprocessing: exp_planner.run_preprocessing((processes_lowres, processes_fullres)) exp_planner = ExperimentPlanner2D( cropped_out_dir, preprocessing_output_dir_this_task_train) exp_planner.plan_experiment() if not no_preprocessing: exp_planner.run_preprocessing(processes_fullres) # write which class is in which slice to all training cases (required to speed up 2D Dataloader) # This is done for all data so that if we wanted to use them with 2D we could do so if not no_preprocessing: p = Pool(default_num_threads) # if there is more than one my_data_identifier (different brnaches) then this code will run for all of them if # they start with the same string. not problematic, but not pretty stages = [ i for i in subdirs( preprocessing_output_dir_this_task_train, join=True, sort=True) if i.split("/")[-1].find("stage") != -1 ] for s in stages: print(s.split("/")[-1]) list_of_npz_files = subfiles(s, True, None, ".npz", True) list_of_pkl_files = [i[:-4] + ".pkl" for i in list_of_npz_files] all_classes = [] for pk in list_of_pkl_files: with open(pk, 'rb') as f: props = pickle.load(f) all_classes_tmp = np.array(props['classes']) all_classes.append(all_classes_tmp[all_classes_tmp >= 0]) p.map(add_classes_in_slice_info, zip(list_of_npz_files, list_of_pkl_files, all_classes)) p.close() p.join()
def __init__(self, num_threads, output_folder=None): """ This one finds a mask of nonzero elements (must be nonzero in all modalities) and crops the image to that mask. In the case of BRaTS and ISLES data this results in a significant reduction in image size :param num_threads: :param output_folder: whete to store the cropped data :param list_of_files: """ self.output_folder = output_folder self.num_threads = num_threads if self.output_folder is not None: maybe_mkdir_p(self.output_folder)
def crop(task_string, override=False, num_threads=default_num_threads): cropped_out_dir = join(nnUNet_cropped_data, task_string) maybe_mkdir_p(cropped_out_dir) if override and isdir(cropped_out_dir): shutil.rmtree(cropped_out_dir) maybe_mkdir_p(cropped_out_dir) splitted_4d_output_dir_task = join(nnUNet_raw_data, task_string) lists, _ = create_lists_from_splitted_dataset(splitted_4d_output_dir_task) imgcrop = ImageCropper(num_threads, cropped_out_dir) imgcrop.run_cropping(lists, overwrite_existing=override) shutil.copy(join(nnUNet_raw_data, task_string, "dataset.json"), cropped_out_dir)
def initialize(self, training=True, force_load_plans=False): """ For prediction of test cases just set training=False, this will prevent loading of training data and training batchgenerator initialization :param training: :return: """ maybe_mkdir_p(self.output_folder) if force_load_plans or (self.plans is None): self.load_plans_file() self.process_plans(self.plans) self.setup_DA_params() self.folder_with_preprocessed_data = join( self.dataset_directory, self.plans['data_identifier'] + "_stage%d" % self.stage) if training: self.dl_tr, self.dl_val = self.get_basic_generators() if self.unpack_data: self.print_to_log_file("unpacking dataset") unpack_dataset(self.folder_with_preprocessed_data) self.print_to_log_file("done") else: self.print_to_log_file( "INFO: Not unpacking data! Training may be slow due to that. Pray you are not using 2d or you " "will wait all winter for your model to finish!") self.tr_gen, self.val_gen = get_default_augmentation( self.dl_tr, self.dl_val, self.data_aug_params['patch_size_for_spatialtransform'], self.data_aug_params) self.print_to_log_file("TRAINING KEYS:\n %s" % (str(self.dataset_tr.keys())), also_print_to_console=False) self.print_to_log_file("VALIDATION KEYS:\n %s" % (str(self.dataset_val.keys())), also_print_to_console=False) else: pass self.initialize_network() self.initialize_optimizer_and_scheduler() # assert isinstance(self.network, (SegmentationNetwork, nn.DataParallel)) self.was_initialized = True
def predict_next_stage(trainer, stage_to_be_predicted_folder, force_separate_z=False, interpolation_order=1, interpolation_order_z=0): output_folder = join(pardir(trainer.output_folder), "pred_next_stage") maybe_mkdir_p(output_folder) export_pool = Pool(2) results = [] for pat in trainer.dataset_val.keys(): print(pat) data_file = trainer.dataset_val[pat]['data_file'] data_preprocessed = np.load(data_file)['data'][:-1] predicted = trainer.predict_preprocessed_data_return_softmax( data_preprocessed, True, 1, False, 1, trainer.data_aug_params['mirror_axes'], True, True, 2, trainer.patch_size, True) data_file_nofolder = data_file.split("/")[-1] data_file_nextstage = join(stage_to_be_predicted_folder, data_file_nofolder) data_nextstage = np.load(data_file_nextstage)['data'] target_shp = data_nextstage.shape[1:] output_file = join( output_folder, data_file_nextstage.split("/")[-1][:-4] + "_segFromPrevStage.npz") if np.prod( predicted.shape) > (2e9 / 4 * 0.85): # *0.85 just to be save np.save(output_file[:-4] + ".npy", predicted) predicted = output_file[:-4] + ".npy" results.append( export_pool.starmap_async( resample_and_save, [(predicted, target_shp, output_file, force_separate_z, interpolation_order, interpolation_order_z)])) _ = [i.get() for i in results] export_pool.close() export_pool.join()
def print_to_log_file(self, *args, also_print_to_console=True, add_timestamp=True): timestamp = time() dt_object = datetime.fromtimestamp(timestamp) if add_timestamp: args = ("%s:" % dt_object, *args) if self.log_file is None: maybe_mkdir_p(self.output_folder) timestamp = datetime.now() self.log_file = join( self.output_folder, "training_log_%d_%d_%d_%02.0d_%02.0d_%02.0d.txt" % (timestamp.year, timestamp.month, timestamp.day, timestamp.hour, timestamp.minute, timestamp.second)) with open(self.log_file, 'w') as f: f.write("Starting... \n") successful = False max_attempts = 5 ctr = 0 while not successful and ctr < max_attempts: try: with open(self.log_file, 'a+') as f: for a in args: f.write(str(a)) f.write(" ") f.write("\n") successful = True except IOError: print( "%s: failed to log: " % datetime.fromtimestamp(timestamp), sys.exc_info()) sleep(0.5) ctr += 1 if also_print_to_console: print(*args)
def run(self, target_spacings, input_folder_with_cropped_npz, output_folder, data_identifier, num_threads=default_num_threads, force_separate_z=None): print("Initializing to run preprocessing") print("npz folder:", input_folder_with_cropped_npz) print("output_folder:", output_folder) list_of_cropped_npz_files = subfiles(input_folder_with_cropped_npz, True, None, ".npz", True) assert len(list_of_cropped_npz_files) != 0, "set list of files first" maybe_mkdir_p(output_folder) all_args = [] num_stages = len(target_spacings) for i in range(num_stages): output_folder_stage = os.path.join(output_folder, data_identifier + "_stage%d" % i) maybe_mkdir_p(output_folder_stage) spacing = target_spacings[i] for j, case in enumerate(list_of_cropped_npz_files): case_identifier = get_case_identifier_from_npz(case) args = spacing, case_identifier, output_folder_stage, input_folder_with_cropped_npz, force_separate_z all_args.append(args) p = Pool(num_threads) p.map(self._run_star, all_args) p.close() p.join()
def run_training(self): _ = self.tr_gen.next() _ = self.val_gen.next() torch.cuda.empty_cache() self._maybe_init_amp() self.plot_network_architecture() if cudnn.benchmark and cudnn.deterministic: warn( "torch.backends.cudnn.deterministic is True indicating a deterministic training is desired. " "But torch.backends.cudnn.benchmark is True as well and this will prevent deterministic training! " "If you want deterministic then set benchmark=False") maybe_mkdir_p(self.output_folder) if not self.was_initialized: self.initialize(True) while self.epoch < self.max_num_epochs: self.print_to_log_file("\nepoch: ", self.epoch) epoch_start_time = time() train_losses_epoch = [] # train one epoch self.network.train() for b in range(self.num_batches_per_epoch): l = self.run_iteration(self.tr_gen, True) train_losses_epoch.append(l) self.all_tr_losses.append(np.mean(train_losses_epoch)) self.print_to_log_file("train loss : %.4f" % self.all_tr_losses[-1]) with torch.no_grad(): # validation with train=False self.network.eval() val_losses = [] for b in range(self.num_val_batches_per_epoch): l = self.run_iteration(self.val_gen, False, True) val_losses.append(l) self.all_val_losses.append(np.mean(val_losses)) self.print_to_log_file("validation loss: %.4f" % self.all_val_losses[-1]) if self.also_val_in_tr_mode: self.network.train() # validation with train=True val_losses = [] for b in range(self.num_val_batches_per_epoch): l = self.run_iteration(self.val_gen, False) val_losses.append(l) self.all_val_losses_tr_mode.append(np.mean(val_losses)) self.print_to_log_file( "validation loss (train=True): %.4f" % self.all_val_losses_tr_mode[-1]) epoch_end_time = time() self.update_train_loss_MA( ) # needed for lr scheduler and stopping of training continue_training = self.on_epoch_end() if not continue_training: # allows for early stopping break self.epoch += 1 self.print_to_log_file("This epoch took %f s\n" % (epoch_end_time - epoch_start_time)) self.epoch -= 1 # if we don't do this we can get a problem with loading model_final_checkpoint. self.save_checkpoint( join(self.output_folder, "model_final_checkpoint.model")) # now we can delete latest as it will be identical with final if isfile(join(self.output_folder, "model_latest.model")): os.remove(join(self.output_folder, "model_latest.model")) if isfile(join(self.output_folder, "model_latest.model.pkl")): os.remove(join(self.output_folder, "model_latest.model.pkl"))
def determine_postprocessing(base, gt_labels_folder, raw_subfolder_name="validation_raw", temp_folder="temp", final_subf_name="validation_final", processes=default_num_threads, dice_threshold=0, debug=False, advanced_postprocessing=False, pp_filename="postprocessing.json", f_dict=None): """ :param base: :param gt_labels_folder: subfolder of base with niftis of ground truth labels :param raw_subfolder_name: subfolder of base with niftis of predicted (non-postprocessed) segmentations :param temp_folder: used to store temporary data, will be deleted after we are done here undless debug=True :param final_subf_name: final results will be stored here (subfolder of base) :param processes: :param dice_threshold: only apply postprocessing if results is better than old_result+dice_threshold (can be used as eps) :param debug: if True then the temporary files will not be deleted :return: """ # lets see what classes are in the dataset classes = [ int(i) for i in load_json(join(base, raw_subfolder_name, "summary.json")) ['results']['mean'].keys() if int(i) != 0 ] folder_all_classes_as_fg = join(base, temp_folder + "_allClasses") folder_per_class = join(base, temp_folder + "_perClass") if isdir(folder_all_classes_as_fg): shutil.rmtree(folder_all_classes_as_fg) if isdir(folder_per_class): shutil.rmtree(folder_per_class) # multiprocessing rules p = Pool(processes) assert isfile(join(base, raw_subfolder_name, "summary.json")), "join(base, raw_subfolder_name) does not " \ "contain a summary.json" # these are all the files we will be dealing with fnames = subfiles(join(base, raw_subfolder_name), suffix=".nii.gz", join=False) # make output and temp dir maybe_mkdir_p(folder_all_classes_as_fg) maybe_mkdir_p(folder_per_class) maybe_mkdir_p(join(base, final_subf_name)) pp_results = {} pp_results['dc_per_class_raw'] = {} pp_results['dc_per_class_pp_all'] = { } # dice scores after treating all foreground classes as one pp_results['dc_per_class_pp_per_class'] = { } # dice scores after removing everything except larges cc # independently for each class after we already did dc_per_class_pp_all pp_results['for_which_classes'] = [] pp_results['min_valid_object_sizes'] = {} validation_result_raw = load_json( join(base, raw_subfolder_name, "summary.json"))['results'] pp_results['num_samples'] = len(validation_result_raw['all']) validation_result_raw = validation_result_raw['mean'] if advanced_postprocessing: # first treat all foreground classes as one and remove all but the largest foreground connected component results = [] for f in fnames: predicted_segmentation = join(base, raw_subfolder_name, f) # now remove all but the largest connected component for each class output_file = join(folder_all_classes_as_fg, f) results.append( p.starmap_async(load_remove_save, ((predicted_segmentation, output_file, (classes, )), ))) results = [i.get() for i in results] # aggregate max_size_removed and min_size_kept max_size_removed = {} min_size_kept = {} for tmp in results: mx_rem, min_kept = tmp[0] for k in mx_rem: if mx_rem[k] is not None: if max_size_removed.get(k) is None: max_size_removed[k] = mx_rem[k] else: max_size_removed[k] = max(max_size_removed[k], mx_rem[k]) for k in min_kept: if min_kept[k] is not None: if min_size_kept.get(k) is None: min_size_kept[k] = min_kept[k] else: min_size_kept[k] = min(min_size_kept[k], min_kept[k]) print("foreground vs background, smallest valid object size was", min_size_kept[tuple(classes)]) print("removing only objects smaller than that...") else: min_size_kept = None # we need to rerun the step from above, now with the size constraint pred_gt_tuples = [] results = [] # first treat all foreground classes as one and remove all but the largest foreground connected component for f in fnames: predicted_segmentation = join(base, raw_subfolder_name, f) # now remove all but the largest connected component for each class output_file = join(folder_all_classes_as_fg, f) results.append( p.starmap_async(load_remove_save, ((predicted_segmentation, output_file, (classes, ), min_size_kept), ))) if f_dict: f = f_dict[f.split('.')[0]]['properties']['seg_file'].split( os.sep)[-1] # zhuc pred_gt_tuples.append([output_file, join(gt_labels_folder, f)]) _ = [i.get() for i in results] # evaluate postprocessed predictions _ = aggregate_scores(pred_gt_tuples, labels=classes, json_output_file=join(folder_all_classes_as_fg, "summary.json"), json_author="Fabian", num_threads=processes) # now we need to figure out if doing this improved the dice scores. We will implement that defensively in so far # that if a single class got worse as a result we won't do this. We can change this in the future but right now I # prefer to do it this way validation_result_PP_test = load_json( join(folder_all_classes_as_fg, "summary.json"))['results']['mean'] for c in classes: dc_raw = validation_result_raw[str(c)]['Dice'] dc_pp = validation_result_PP_test[str(c)]['Dice'] pp_results['dc_per_class_raw'][str(c)] = dc_raw pp_results['dc_per_class_pp_all'][str(c)] = dc_pp # true if new is better do_fg_cc = False comp = [ pp_results['dc_per_class_pp_all'][str(cl)] > (pp_results['dc_per_class_raw'][str(cl)] + dice_threshold) for cl in classes ] before = np.mean( [pp_results['dc_per_class_raw'][str(cl)] for cl in classes]) after = np.mean( [pp_results['dc_per_class_pp_all'][str(cl)] for cl in classes]) print("Foreground vs background") print("before:", before) print("after: ", after) if any(comp): # at least one class improved - yay! # now check if another got worse # true if new is worse any_worse = any([ pp_results['dc_per_class_pp_all'][str(cl)] < pp_results['dc_per_class_raw'][str(cl)] for cl in classes ]) if not any_worse: pp_results['for_which_classes'].append(classes) if min_size_kept is not None: pp_results['min_valid_object_sizes'].update( deepcopy(min_size_kept)) do_fg_cc = True print( "Removing all but the largest foreground region improved results!" ) print('for_which_classes', classes) print('min_valid_object_sizes', min_size_kept) else: # did not improve things - don't do it pass if len(classes) > 1: # now depending on whether we do remove all but the largest foreground connected component we define the source dir # for the next one to be the raw or the temp dir if do_fg_cc: source = folder_all_classes_as_fg else: source = join(base, raw_subfolder_name) if advanced_postprocessing: # now run this for each class separately results = [] for f in fnames: predicted_segmentation = join(source, f) output_file = join(folder_per_class, f) results.append( p.starmap_async( load_remove_save, ((predicted_segmentation, output_file, classes), ))) results = [i.get() for i in results] # aggregate max_size_removed and min_size_kept max_size_removed = {} min_size_kept = {} for tmp in results: mx_rem, min_kept = tmp[0] for k in mx_rem: if mx_rem[k] is not None: if max_size_removed.get(k) is None: max_size_removed[k] = mx_rem[k] else: max_size_removed[k] = max(max_size_removed[k], mx_rem[k]) for k in min_kept: if min_kept[k] is not None: if min_size_kept.get(k) is None: min_size_kept[k] = min_kept[k] else: min_size_kept[k] = min(min_size_kept[k], min_kept[k]) print( "classes treated separately, smallest valid object sizes are") print(min_size_kept) print("removing only objects smaller than that") else: min_size_kept = None # rerun with the size thresholds from above pred_gt_tuples = [] results = [] for f in fnames: predicted_segmentation = join(source, f) output_file = join(folder_per_class, f) results.append( p.starmap_async(load_remove_save, ((predicted_segmentation, output_file, classes, min_size_kept), ))) if f_dict: # zhuc gt_name = f_dict[f.split( '.')[0]]['properties']['seg_file'].split(os.sep)[-1] else: gt_name = f pred_gt_tuples.append( [output_file, join(gt_labels_folder, gt_name)]) _ = [i.get() for i in results] # evaluate postprocessed predictions _ = aggregate_scores(pred_gt_tuples, labels=classes, json_output_file=join(folder_per_class, "summary.json"), json_author="Fabian", num_threads=processes) if do_fg_cc: old_res = deepcopy(validation_result_PP_test) else: old_res = validation_result_raw # these are the new dice scores validation_result_PP_test = load_json( join(folder_per_class, "summary.json"))['results']['mean'] for c in classes: dc_raw = old_res[str(c)]['Dice'] dc_pp = validation_result_PP_test[str(c)]['Dice'] pp_results['dc_per_class_pp_per_class'][str(c)] = dc_pp print(c) print("before:", dc_raw) print("after: ", dc_pp) if dc_pp > (dc_raw + dice_threshold): pp_results['for_which_classes'].append(int(c)) if min_size_kept is not None: pp_results['min_valid_object_sizes'].update( {c: min_size_kept[c]}) print( "Removing all but the largest region for class %d improved results!" % c) print('min_valid_object_sizes', min_size_kept) else: print( "Only one class present, no need to do each class separately as this is covered in fg vs bg" ) if not advanced_postprocessing: pp_results['min_valid_object_sizes'] = None print("done") print("for which classes:") print(pp_results['for_which_classes']) print("min_object_sizes") print(pp_results['min_valid_object_sizes']) pp_results['validation_raw'] = raw_subfolder_name pp_results['validation_final'] = final_subf_name # now that we have a proper for_which_classes, apply that pred_gt_tuples = [] results = [] for f in fnames: predicted_segmentation = join(base, raw_subfolder_name, f) # now remove all but the largest connected component for each class output_file = join(base, final_subf_name, f) results.append( p.starmap_async(load_remove_save, ((predicted_segmentation, output_file, pp_results['for_which_classes'], pp_results['min_valid_object_sizes']), ))) pred_gt_tuples.append([output_file, join(gt_labels_folder, f)]) _ = [i.get() for i in results] # evaluate postprocessed predictions _ = aggregate_scores(pred_gt_tuples, labels=classes, json_output_file=join(base, final_subf_name, "summary.json"), json_author="Fabian", num_threads=processes) pp_results['min_valid_object_sizes'] = str( pp_results['min_valid_object_sizes']) save_json(pp_results, join(base, pp_filename)) # delete temp if not debug: shutil.rmtree(folder_per_class) shutil.rmtree(folder_all_classes_as_fg) p.close() p.join() print("done")
[join(nnunet.__path__[0], "training", "network_training")], trainerclass, "nnunet.training.network_training") if trainer_class is None: raise RuntimeError( "Could not find trainer class in nnunet.training.network_training") else: assert issubclass( trainer_class, nnUNetTrainer ), "network_trainer was found but is not derived from nnUNetTrainer" trainer = trainer_class(plans_file, fold, folder_with_preprocessed_data, output_folder=output_folder_name, dataset_directory=dataset_directory, batch_dice=batch_dice, stage=stage) trainer.initialize(False) trainer.load_dataset() trainer.do_split() trainer.load_best_checkpoint(train=False) stage_to_be_predicted_folder = join( dataset_directory, trainer.plans['data_identifier'] + "_stage%d" % 1) output_folder = join(pardir(trainer.output_folder), "pred_next_stage") maybe_mkdir_p(output_folder) predict_next_stage(trainer, stage_to_be_predicted_folder)
def validate(self, do_mirroring: bool = True, use_train_mode: bool = False, tiled: bool = True, step: int = 2, save_softmax: bool = True, use_gaussian: bool = True, overwrite: bool = True, validation_folder_name: str = 'validation_raw', debug: bool = False, all_in_gpu: bool = False, force_separate_z: bool = None, interpolation_order: int = 3, interpolation_order_z: int = 0): """ if debug=True then the temporary files generated for postprocessing determination will be kept :return: """ assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)" if self.dataset_val is None: self.load_dataset() self.do_split() # predictions as they come from the network go here output_folder = join(self.output_folder, validation_folder_name) maybe_mkdir_p(output_folder) # this is for debug purposes my_input_args = { 'do_mirroring': do_mirroring, 'use_train_mode': use_train_mode, 'tiled': tiled, 'step': step, 'save_softmax': save_softmax, 'use_gaussian': use_gaussian, 'overwrite': overwrite, 'validation_folder_name': validation_folder_name, 'debug': debug, 'all_in_gpu': all_in_gpu, 'force_separate_z': force_separate_z, 'interpolation_order': interpolation_order, 'interpolation_order_z': interpolation_order_z, } save_json(my_input_args, join(output_folder, "validation_args.json")) if do_mirroring: if not self.data_aug_params['do_mirror']: raise RuntimeError( "We did not train with mirroring so you cannot do inference with mirroring enabled" ) mirror_axes = self.data_aug_params['mirror_axes'] else: mirror_axes = () pred_gt_tuples = [] if config.DEBUG_MODE: # zhuc default_num_threads = 1 export_pool = Pool(default_num_threads) results = [] cnt = 0 # zhuc for k in self.dataset_val.keys(): properties = self.dataset[k]['properties'] cnt += 1 if config.DEBUG_MODE and cnt > 2: break # fname = properties['list_of_data_files'][0].split("/")[-1][:-12] fname = k # zhuc if overwrite or (not isfile(join(output_folder, fname + ".nii.gz"))) or \ (save_softmax and not isfile(join(output_folder, fname + ".npz"))): data = np.load(self.dataset[k]['data_file'])['data'] print(k, data.shape) data[-1][data[-1] == -1] = 0 softmax_pred = self.predict_preprocessed_data_return_softmax( data[:-1], do_mirroring, 1, use_train_mode, 1, mirror_axes, tiled, True, step, self.patch_size, use_gaussian=use_gaussian, all_in_gpu=all_in_gpu) softmax_pred = softmax_pred.transpose( [0] + [i + 1 for i in self.transpose_backward]) if save_softmax: softmax_fname = join(output_folder, fname + ".npz") else: softmax_fname = None """There is a problem with python process communication that prevents us from communicating obejcts larger than 2 GB between processes (basically when the length of the pickle string that will be sent is communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either filename or np.ndarray and will handle this automatically""" if np.prod(softmax_pred.shape) > ( 2e9 / 4 * 0.85): # *0.85 just to be save np.save(join(output_folder, fname + ".npy"), softmax_pred) softmax_pred = join(output_folder, fname + ".npy") if config.DEBUG_MODE: # zhuc results.append( save_segmentation_nifti_from_softmax( softmax_pred, join(output_folder, fname + ".nii.gz"), properties, interpolation_order, None, None, None, softmax_fname, None, force_separate_z, interpolation_order_z)) else: results.append( export_pool.starmap_async( save_segmentation_nifti_from_softmax, ((softmax_pred, join(output_folder, fname + ".nii.gz"), properties, interpolation_order, None, None, None, softmax_fname, None, force_separate_z, interpolation_order_z), ))) pred_gt_tuples.append([ join(output_folder, fname + ".nii.gz"), # join(self.gt_niftis_folder, fname + ".nii.gz")]) join(self.gt_niftis_folder, properties['seg_file'].split(os.sep)[-1]) ]) # zhuc if not config.DEBUG_MODE: # zhuc _ = [i.get() for i in results] self.print_to_log_file("finished prediction") # evaluate raw predictions self.print_to_log_file("evaluation of raw predictions") task = self.dataset_directory.split(os.sep)[-1] job_name = self.experiment_name _ = aggregate_scores( pred_gt_tuples, labels=list(range(self.num_classes)), json_output_file=join(output_folder, "summary.json"), json_name=job_name + " val tiled %s" % (str(tiled)), json_author="Fabian", json_task=task, num_threads=default_num_threads) # in the old nnunet we would stop here. Now we add a postprocessing. This postprocessing can remove everything # except the largest connected component for each class. To see if this improves results, we do this for all # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will # have this applied during inference as well self.print_to_log_file("determining postprocessing") determine_postprocessing(self.output_folder, self.gt_niftis_folder, validation_folder_name, final_subf_name=validation_folder_name + "_postprocessed", debug=debug, processes=default_num_threads, f_dict=self.dataset) # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed" # They are always in that folder, even if no postprocessing as applied! # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to # be used later gt_nifti_folder = join(self.output_folder_base, "gt_niftis") maybe_mkdir_p(gt_nifti_folder) for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"): success = False attempts = 0 e = None while not success and attempts < 10: try: shutil.copy(f, gt_nifti_folder) success = True except OSError as e: attempts += 1 sleep(1) if not success: print("Could not copy gt nifti file %s into folder %s" % (f, gt_nifti_folder)) if e is not None: raise e
""" base = os.environ[ 'nnUNet_raw_data_base'] if "nnUNet_raw_data_base" in os.environ.keys( ) else None preprocessing_output_dir = os.environ[ 'nnUNet_preprocessed'] if "nnUNet_preprocessed" in os.environ.keys( ) else None network_training_output_dir_base = os.path.join( os.environ['RESULTS_FOLDER']) if "RESULTS_FOLDER" in os.environ.keys( ) else None if base is not None: nnUNet_raw_data = join(base, "nnUNet_raw_data") nnUNet_cropped_data = join(base, "nnUNet_cropped_data") maybe_mkdir_p(nnUNet_raw_data) maybe_mkdir_p(nnUNet_cropped_data) else: print( "nnUNet_raw_data_base is not defined and nnU-Net can only be used on data for which preprocessed files " "are already present on your system. nnU-Net cannot be used for experiment planning and preprocessing like " "this. If this is not intended, please read nnunet/paths.md for information on how to set this up properly." ) nnUNet_cropped_data = nnUNet_raw_data = None if preprocessing_output_dir is not None: maybe_mkdir_p(preprocessing_output_dir) else: print( "nnUNet_preprocessed is not defined and nnU-Net can not be used for preprocessing " "or training. If this is not intended, please read nnunet/pathy.md for information on how to set this up."