def evaluate_folder(folder_with_gts: str, folder_with_predictions: str, labels: tuple): """ writes a summary.json to folder_with_predictions :param folder_with_gts: folder where the ground truth segmentations are saved. Must be nifti files. :param folder_with_predictions: folder where the predicted segmentations are saved. Must be nifti files. :param labels: tuple of int with the labels in the dataset. For example (0, 1, 2, 3) for Task01_BrainTumour. :return: """ files_gt = subfiles(folder_with_gts, suffix=".nii.gz", join=False) files_pred = subfiles(folder_with_predictions, suffix=".nii.gz", join=False) assert all([i in files_pred for i in files_gt]), "files missing in folder_with_predictions" assert all([i in files_gt for i in files_pred]), "files missing in folder_with_gts" test_ref_pairs = [(join(folder_with_predictions, i), join(folder_with_gts, i)) for i in files_pred] res = aggregate_scores(test_ref_pairs, json_output_file=join(folder_with_predictions, "summary.json"), num_threads=8, labels=labels) return res
def __data_list(self, join): ''' :param join: whether join the root path, if False, only return filename :return: img_file_list, lbl_file_list, sorted by name ''' if self.lbl_data_root is None: return subfiles(self.img_data_root, join), None return subfiles(self.img_data_root, join), subfiles(self.lbl_data_root, join)
def get_caseIDs_from_splitted_dataset_folder(folder): files = subfiles(folder, suffix=".nii.gz", join=False) # all files must be .nii.gz and have 4 digit modality index files = [i[:-12] for i in files] # only unique patient ids files = np.unique(files) return files
def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--input_folder", type=str, required=True, help="folder with input files. All .nii.gz files in this folder will be processed.") parser.add_argument("-o", "--output_folder", type=str, required=True, help="output folder. This is there the resulting segmentations will be saved. Cannot be the " "same folder as the input folder. If output_folder does not exist " "it will be created") parser.add_argument("-p", "--processes", default=4, type=str, required=False, help="number of processes for data preprocessing and nifti export. You should not have to " "touch this. So don't unless there is a clear indication that it is required. Default: 4") parser.add_argument('--keep_existing', default=True, required=False, action='store_false', help="set to False to keep segmentations in output_folder and continue where you left off " "(useful if something crashes). If this flag is not set, all segmentations that may " "already be present in output_folder will be overwritten.") args = parser.parse_args() input_folder = args.input_folder output_folder = args.output_folder processes = args.processes keep_existing = args.keep_existing maybe_download_weights() # we must generate a list of input filenames nii_files = subfiles(input_folder, suffix='.nii.gz', join=False) input_list_of_lists = [[join(input_folder, i)] for i in nii_files] output_filenames = [join(output_folder, i) for i in nii_files] predict_cases(folder_with_parameter_files, input_list_of_lists, output_filenames, (0, 1, 2, 3, 4), False, processes, processes, None, True, None, not keep_existing, False, 2, None, 3, 0)
def add_classes_in_slice_info(self): """ this speeds up oversampling foreground during training :return: """ p = Pool(default_num_threads) # if there is more than one my_data_identifier (different brnaches) then this code will run for all of them if # they start with the same string. not problematic, but not pretty stages = [ join(self.preprocessed_output_folder, self.data_identifier + "_stage%d" % i) for i in range(len(self.plans_per_stage)) ] for s in stages: print(s.split("/")[-1]) list_of_npz_files = subfiles(s, True, None, ".npz", True) list_of_pkl_files = [i[:-4] + ".pkl" for i in list_of_npz_files] all_classes = [] for pk in list_of_pkl_files: props = load_pickle(pk) all_classes_tmp = np.array(props['classes']) all_classes.append(all_classes_tmp[all_classes_tmp >= 0]) p.map(add_classes_in_slice_info, zip(list_of_npz_files, list_of_pkl_files, all_classes)) p.close() p.join()
def crawl_and_copy(current_folder, out_folder, prefix="fabian_", suffix="ummary.json"): """ This script will run recursively through all subfolders of current_folder and copy all files that end with suffix with some automatically generated prefix into out_folder :param current_folder: :param out_folder: :param prefix: :return: """ s = subdirs(current_folder, join=False) f = subfiles(current_folder, join=False) f = [i for i in f if i.endswith(suffix)] if current_folder.find("fold0") != -1: for fl in f: shutil.copy(os.path.join(current_folder, fl), os.path.join(out_folder, prefix + fl)) for su in s: if prefix == "": add = su else: add = "__" + su crawl_and_copy(os.path.join(current_folder, su), out_folder, prefix=prefix + add)
def run_in_folder(folder): json_files = subfiles(folder, True, None, ".json", True) json_files = [ i for i in json_files if not i.split("/")[-1].startswith(".") and not i.endswith("_globalMean.json") ] # stupid mac for j in json_files: foreground_mean(j)
def run_in_folder(folder): json_files = subfiles(folder, True, None, ".json", True) json_files = [ i for i in json_files if not os.path.basename(i).startswith(".") and not i.endswith("_globalMean.json") ] # stupid mac for j in json_files: foreground_mean(j)
def plan_and_preprocess(task_string, processes_lowres=default_num_threads, processes_fullres=3, no_preprocessing=False): from tuframework.experiment_planning.experiment_planner_baseline_2DUNet import ExperimentPlanner2D from tuframework.experiment_planning.experiment_planner_baseline_3DUNet import ExperimentPlanner preprocessing_output_dir_this_task_train = preprocessing_output_dir + "/" + task_string cropped_out_dir = tuFramework_cropped_data + "/" + task_string if not os.path.isdir(preprocessing_output_dir_this_task_train): os.makedirs(preprocessing_output_dir_this_task_train) shutil.copy(cropped_out_dir + "/" + "dataset_properties.pkl", preprocessing_output_dir_this_task_train) shutil.copy( tuFramework_raw_data + "/" + task_string + "/" + "dataset.json", preprocessing_output_dir_this_task_train) exp_planner = ExperimentPlanner(cropped_out_dir, preprocessing_output_dir_this_task_train) exp_planner.plan_experiment() if not no_preprocessing: exp_planner.run_preprocessing((processes_lowres, processes_fullres)) exp_planner = ExperimentPlanner2D( cropped_out_dir, preprocessing_output_dir_this_task_train) exp_planner.plan_experiment() if not no_preprocessing: exp_planner.run_preprocessing(processes_fullres) # write which class is in which slice to all training cases (required to speed up 2D Dataloader) # This is done for all data so that if we wanted to use them with 2D we could do so if not no_preprocessing: p = Pool(default_num_threads) # if there is more than one my_data_identifier (different brnaches) then this code will run for all of them if # they start with the same string. not problematic, but not pretty stages = [ i for i in subdirs( preprocessing_output_dir_this_task_train, join=True, sort=True) if i.split("/")[-1].find("stage") != -1 ] for s in stages: print(s.split("/")[-1]) list_of_npz_files = subfiles(s, False, None, ".npz", True) list_of_pkl_files = [i[:-4] + ".pkl" for i in list_of_npz_files] all_classes = [] for pk in list_of_pkl_files: with open(pk, 'rb') as f: props = pickle.load(f) all_classes_tmp = np.array(props['classes']) all_classes.append(all_classes_tmp[all_classes_tmp >= 0]) p.map(add_classes_in_slice_info, zip(list_of_npz_files, list_of_pkl_files, all_classes)) p.close() p.join()
def main(): args = get_args() print("Preparing for preprocessing data...") # Validation set is fold 1 fold = 1 validation_fold_file = args.validation_fold_file # Make sure the model exists model_dir = args.model_dir model_path = os.path.join(model_dir, "plans.pkl") assert os.path.isfile( model_path), "Cannot find the model file {:}!".format(model_path) checkpoint_name = "model_final_checkpoint" # Other settings fp16 = False num_threads_preprocessing = args.num_threads_preprocessing raw_data_dir = args.raw_data_dir preprocessed_data_dir = args.preprocessed_data_dir # Open list containing validation images from specific fold (e.g. 1) validation_files = [] with open(validation_fold_file) as f: for line in f: validation_files.append(line.rstrip()) # Create output and preprocessed directory if not os.path.isdir(preprocessed_data_dir): os.makedirs(preprocessed_data_dir) # Create list of images locations (i.e. 4 images per case => 4 modalities) all_files = subfiles(raw_data_dir, suffix=".nii.gz", join=False, sort=True) list_of_lists = [[ os.path.join(raw_data_dir, i) for i in all_files if i[:len(j)].startswith(j) and len(i) == (len(j) + 12) ] for j in validation_files] # Preprocess images, returns filenames list # This runs in multiprocess print("Acually preprocessing data...") preprocessed_files = preprocess_MLPerf(model_dir, checkpoint_name, fold, fp16, list_of_lists, validation_files, preprocessed_data_dir, num_threads_preprocessing) print("Saving metadata of the preprocessed data...") with open(os.path.join(preprocessed_data_dir, "preprocessed_files.pkl"), "wb") as f: pickle.dump(preprocessed_files, f) print("Preprocessed data saved to {:}".format(preprocessed_data_dir)) print("Done!")
def create_lists_from_splitted_dataset_folder(folder): """ does not rely on dataset.json :param folder: :return: """ caseIDs = get_caseIDs_from_splitted_dataset_folder(folder) list_of_lists = [] for f in caseIDs: list_of_lists.append(subfiles(folder, prefix=f, suffix=".nii.gz", join=True, sort=True)) return list_of_lists
def crawl_and_remove_hidden_from_decathlon(folder): folder = remove_trailing_slash(folder) assert folder.split('/')[-1].startswith("Task"), "This does not seem to be a decathlon folder. Please give me a " \ "folder that starts with TaskXX and has the subfolders imagesTr, " \ "labelsTr and imagesTs" subf = subfolders(folder, join=False) assert 'imagesTr' in subf, "This does not seem to be a decathlon folder. Please give me a " \ "folder that starts with TaskXX and has the subfolders imagesTr, " \ "labelsTr and imagesTs" assert 'imagesTs' in subf, "This does not seem to be a decathlon folder. Please give me a " \ "folder that starts with TaskXX and has the subfolders imagesTr, " \ "labelsTr and imagesTs" assert 'labelsTr' in subf, "This does not seem to be a decathlon folder. Please give me a " \ "folder that starts with TaskXX and has the subfolders imagesTr, " \ "labelsTr and imagesTs" _ = [os.remove(i) for i in subfiles(folder, prefix=".")] _ = [os.remove(i) for i in subfiles(join(folder, 'imagesTr'), prefix=".")] _ = [os.remove(i) for i in subfiles(join(folder, 'labelsTr'), prefix=".")] _ = [os.remove(i) for i in subfiles(join(folder, 'imagesTs'), prefix=".")]
def preprocess_setup(preprocessed_data_dir): print("Preparing for preprocessing data...") # Validation set is fold 1 fold = 1 validation_fold_file = '../models/image_segmentation/tensorflow/3d_unet_mlperf/inference/nnUNet/folds/fold1_validation.txt' # Make sure the model exists model_dir = 'build/result/nnUNet/3d_fullres/Task043_BraTS2019/nnUNetTrainerV2__nnUNetPlansv2.mlperf.1' model_path = os.path.join(model_dir, "plans.pkl") assert os.path.isfile( model_path), "Cannot find the model file {:}!".format(model_path) checkpoint_name = "model_final_checkpoint" # Other settings fp16 = False num_threads_preprocessing = 12 raw_data_dir = 'build/raw_data/nnUNet_raw_data/Task043_BraTS2019/imagesTr' # Open list containing validation images from specific fold (e.g. 1) validation_files = [] with open(validation_fold_file) as f: for line in f: validation_files.append(line.rstrip()) # Create output and preprocessed directory if not os.path.isdir(preprocessed_data_dir): os.makedirs(preprocessed_data_dir) # Create list of images locations (i.e. 4 images per case => 4 modalities) all_files = subfiles(raw_data_dir, suffix=".nii.gz", join=False, sort=True) list_of_lists = [[ os.path.join(raw_data_dir, i) for i in all_files if i[:len(j)].startswith(j) and len(i) == (len(j) + 12) ] for j in validation_files] # Preprocess images, returns filenames list # This runs in multiprocess print("Acually preprocessing data...") preprocessed_files = preprocess_MLPerf(model_dir, checkpoint_name, fold, fp16, list_of_lists, validation_files, preprocessed_data_dir, num_threads_preprocessing) print("Saving metadata of the preprocessed data...") with open(os.path.join(preprocessed_data_dir, "preprocessed_files.pkl"), "wb") as f: pickle.dump(preprocessed_files, f) print("Preprocessed data saved to {:}".format(preprocessed_data_dir)) print("Done!")
def unpack_dataset(folder, threads=8, key="data"): """ unpacks all npz files in a folder to npy (whatever you want to have unpacked must be saved unter key) :param folder: :param threads: :param key: :return: """ p = Pool(threads) npz_files = subfiles(folder, True, None, ".npz", True) p.map(convert_to_npy, zip(npz_files, [key] * len(npz_files))) p.close() p.join()
def evaluate_folder(folder_with_gts, folder_with_predictions, labels): """ writes a summary.json to folder_with_predictions :param folder_with_gts: :param folder_with_predictions: :return: """ files_gt = subfiles(folder_with_gts, suffix=".nii.gz", join=False) files_pred = subfiles(folder_with_predictions, suffix=".nii.gz", join=False) assert all([i in files_pred for i in files_gt]), "files missing in folder_with_predictions" assert all([i in files_gt for i in files_pred]), "files missing in folder_with_gts" test_ref_pairs = [(join(folder_with_predictions, i), join(folder_with_gts, i)) for i in files_pred] res = aggregate_scores(test_ref_pairs, json_output_file=join(folder_with_predictions, "summary.json"), num_threads=8, labels=labels) return res
def preprocess_3dunet_ref(model_dir_base, preprocessed_data_dir_base): """ Preprocess raw image data to pickle file. """ print("Preparing for preprocessing data...") # Validation set is fold 1 fold = 1 validation_fold_file = os.path.join("data_maps", "brats", "val_map.txt") # Make sure the model exists model_dir = os.path.join(model_dir_base, "3d-unet", "nnUNet", "3d_fullres", "Task043_BraTS2019", "nnUNetTrainerV2__nnUNetPlansv2.mlperf.1") model_path = os.path.join(model_dir, "plans.pkl") assert os.path.isfile(model_path), "Cannot find the model file {:}!".format(model_path) checkpoint_name = "model_final_checkpoint" # Other settings fp16 = False num_threads_preprocessing = 12 raw_data_dir = os.path.join(preprocessed_data_dir_base, "brats", "brats_reference_raw", "Task043_BraTS2019", "imagesTr") preprocessed_data_dir = os.path.join(preprocessed_data_dir_base, "brats", "brats_reference_preprocessed") # Open list containing validation images from specific fold (e.g. 1) validation_files = [] with open(validation_fold_file) as f: for line in f: validation_files.append(line.rstrip()) # Create output and preprocessed directory if not os.path.isdir(preprocessed_data_dir): os.makedirs(preprocessed_data_dir) # Create list of images locations (i.e. 4 images per case => 4 modalities) all_files = subfiles(raw_data_dir, suffix=".nii.gz", join=False, sort=True) list_of_lists = [[os.path.join(raw_data_dir, i) for i in all_files if i[:len(j)].startswith(j) and len(i) == (len(j) + 12)] for j in validation_files] # Preprocess images, returns filenames list # This runs in multiprocess print("Actually preprocessing data...") preprocessed_files = preprocess_MLPerf(model_dir, checkpoint_name, fold, fp16, list_of_lists, validation_files, preprocessed_data_dir, num_threads_preprocessing) # Save list of pkl file paths to pkl file. print("Saving metadata of the preprocessed data...") with open(os.path.join(preprocessed_data_dir, "preprocessed_files.pkl"), "wb") as f: pickle.dump(preprocessed_files, f)
def check_input_folder_and_return_caseIDs(input_folder, expected_num_modalities): print("This model expects %d input modalities for each image" % expected_num_modalities) files = subfiles(input_folder, suffix=".nii.gz", join=False, sort=True) maybe_case_ids = np.unique([i[:-12] for i in files]) remaining = deepcopy(files) missing = [] assert len( files ) > 0, "input folder did not contain any images (expected to find .nii.gz file endings)" # now check if all required files are present and that no unexpected files are remaining for c in maybe_case_ids: for n in range(expected_num_modalities): expected_output_file = c + "_%04.0d.nii.gz" % n if not isfile(join(input_folder, expected_output_file)): missing.append(expected_output_file) else: remaining.remove(expected_output_file) print( "Found %d unique case ids, here are some examples:" % len(maybe_case_ids), np.random.choice(maybe_case_ids, min(len(maybe_case_ids), 10))) print( "If they don't look right, make sure to double check your filenames. They must end with _0000.nii.gz etc" ) if len(remaining) > 0: print( "found %d unexpected remaining files in the folder. Here are some examples:" % len(remaining), np.random.choice(remaining, min(len(remaining), 10))) if len(missing) > 0: print("Some files are missing:") print(missing) raise RuntimeError("missing files in input_folder") return maybe_case_ids
def validate(self, do_mirroring: bool = True, use_sliding_window: bool = True, step_size: float = 0.5, save_softmax: bool = True, use_gaussian: bool = True, overwrite: bool = True, validation_folder_name: str = 'validation_raw', debug: bool = False, all_in_gpu: bool = False, segmentation_export_kwargs: dict = None, run_postprocessing_on_folds: bool = True): if isinstance(self.network, DDP): net = self.network.module else: net = self.network ds = net.do_ds net.do_ds = False current_mode = self.network.training self.network.eval() assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)" if self.dataset_val is None: self.load_dataset() self.do_split() if segmentation_export_kwargs is None: if 'segmentation_export_params' in self.plans.keys(): force_separate_z = self.plans['segmentation_export_params'][ 'force_separate_z'] interpolation_order = self.plans['segmentation_export_params'][ 'interpolation_order'] interpolation_order_z = self.plans[ 'segmentation_export_params']['interpolation_order_z'] else: force_separate_z = None interpolation_order = 1 interpolation_order_z = 0 else: force_separate_z = segmentation_export_kwargs['force_separate_z'] interpolation_order = segmentation_export_kwargs[ 'interpolation_order'] interpolation_order_z = segmentation_export_kwargs[ 'interpolation_order_z'] # predictions as they come from the network go here output_folder = join(self.output_folder, validation_folder_name) maybe_mkdir_p(output_folder) # this is for debug purposes my_input_args = { 'do_mirroring': do_mirroring, 'use_sliding_window': use_sliding_window, 'step_size': step_size, 'save_softmax': save_softmax, 'use_gaussian': use_gaussian, 'overwrite': overwrite, 'validation_folder_name': validation_folder_name, 'debug': debug, 'all_in_gpu': all_in_gpu, 'segmentation_export_kwargs': segmentation_export_kwargs, } save_json(my_input_args, join(output_folder, "validation_args.json")) if do_mirroring: if not self.data_aug_params['do_mirror']: raise RuntimeError( "We did not train with mirroring so you cannot do inference with mirroring enabled" ) mirror_axes = self.data_aug_params['mirror_axes'] else: mirror_axes = () pred_gt_tuples = [] export_pool = Pool(default_num_threads) results = [] all_keys = list(self.dataset_val.keys()) my_keys = all_keys[self.local_rank::dist.get_world_size()] # we cannot simply iterate over all_keys because we need to know pred_gt_tuples and valid_labels of all cases # for evaluation (which is done by local rank 0) for k in my_keys: properties = load_pickle(self.dataset[k]['properties_file']) fname = properties['list_of_data_files'][0].split("/")[-1][:-12] pred_gt_tuples.append([ join(output_folder, fname + ".nii.gz"), join(self.gt_niftis_folder, fname + ".nii.gz") ]) if k in my_keys: if overwrite or (not isfile(join(output_folder, fname + ".nii.gz"))) or \ (save_softmax and not isfile(join(output_folder, fname + ".npz"))): data = np.load(self.dataset[k]['data_file'])['data'] print(k, data.shape) data[-1][data[-1] == -1] = 0 softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax( data[:-1], do_mirroring=do_mirroring, mirror_axes=mirror_axes, use_sliding_window=use_sliding_window, step_size=step_size, use_gaussian=use_gaussian, all_in_gpu=all_in_gpu, mixed_precision=self.fp16)[1] softmax_pred = softmax_pred.transpose( [0] + [i + 1 for i in self.transpose_backward]) if save_softmax: softmax_fname = join(output_folder, fname + ".npz") else: softmax_fname = None """There is a problem with python process communication that prevents us from communicating obejcts larger than 2 GB between processes (basically when the length of the pickle string that will be sent is communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either filename or np.ndarray and will handle this automatically""" if np.prod(softmax_pred.shape) > ( 2e9 / 4 * 0.85): # *0.85 just to be save np.save(join(output_folder, fname + ".npy"), softmax_pred) softmax_pred = join(output_folder, fname + ".npy") results.append( export_pool.starmap_async( save_segmentation_nifti_from_softmax, ((softmax_pred, join(output_folder, fname + ".nii.gz"), properties, interpolation_order, self.regions_class_order, None, None, softmax_fname, None, force_separate_z, interpolation_order_z), ))) _ = [i.get() for i in results] self.print_to_log_file("finished prediction") distributed.barrier() if self.local_rank == 0: # evaluate raw predictions self.print_to_log_file("evaluation of raw predictions") task = self.dataset_directory.split("/")[-1] job_name = self.experiment_name _ = aggregate_scores(pred_gt_tuples, labels=list(range(self.num_classes)), json_output_file=join(output_folder, "summary.json"), json_name=job_name + " val tiled %s" % (str(use_sliding_window)), json_author="Fabian", json_task=task, num_threads=default_num_threads) if run_postprocessing_on_folds: # in the old nnunet we would stop here. Now we add a postprocessing. This postprocessing can remove everything # except the largest connected component for each class. To see if this improves results, we do this for all # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will # have this applied during inference as well self.print_to_log_file("determining postprocessing") determine_postprocessing( self.output_folder, self.gt_niftis_folder, validation_folder_name, final_subf_name=validation_folder_name + "_postprocessed", debug=debug) # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed" # They are always in that folder, even if no postprocessing as applied! # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to # be used later gt_nifti_folder = join(self.output_folder_base, "gt_niftis") maybe_mkdir_p(gt_nifti_folder) for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"): success = False attempts = 0 e = None while not success and attempts < 10: try: shutil.copy(f, gt_nifti_folder) success = True except OSError as e: attempts += 1 sleep(1) if not success: print("Could not copy gt nifti file %s into folder %s" % (f, gt_nifti_folder)) if e is not None: raise e self.network.train(current_mode) net.do_ds = ds
def initialize(self, training=True, force_load_plans=False): """ For prediction of test cases just set training=False, this will prevent loading of training data and training batchgenerator initialization :param training: :return: """ if not self.was_initialized: maybe_mkdir_p(self.output_folder) if force_load_plans or (self.plans is None): self.load_plans_file() self.process_plans(self.plans) self.setup_DA_params() self.folder_with_preprocessed_data = join( self.dataset_directory, self.plans['data_identifier'] + "_stage%d" % self.stage) if training: self.dl_tr, self.dl_val = self.get_basic_generators() if self.unpack_data: if self.local_rank == 0: print("unpacking dataset") unpack_dataset(self.folder_with_preprocessed_data) print("done") else: # we need to wait until worker 0 has finished unpacking npz_files = subfiles( self.folder_with_preprocessed_data, suffix=".npz", join=False) case_ids = [i[:-4] for i in npz_files] all_present = all([ isfile( join(self.folder_with_preprocessed_data, i + ".npy")) for i in case_ids ]) while not all_present: print("worker", self.local_rank, "is waiting for unpacking") sleep(3) all_present = all([ isfile( join(self.folder_with_preprocessed_data, i + ".npy")) for i in case_ids ]) # there is some slight chance that there may arise some error because dataloader are loading a file # that is still being written by worker 0. We ignore this for now an address it only if it becomes # relevant # (this can occur because while worker 0 writes the file is technically present so the other workers # will proceed and eventually try to read it) else: print( "INFO: Not unpacking data! Training may be slow due to that. Pray you are not using 2d or you " "will wait all winter for your model to finish!") # setting weights for deep supervision losses net_numpool = len(self.net_num_pool_op_kernel_sizes) # we give each output a weight which decreases exponentially (division by 2) as the resolution decreases # this gives higher resolution outputs more weight in the loss weights = np.array([1 / (2**i) for i in range(net_numpool)]) # we don't use the lowest 2 outputs. Normalize weights so that they sum to 1 mask = np.array([ True if i < net_numpool - 1 else False for i in range(net_numpool) ]) weights[~mask] = 0 weights = weights / weights.sum() self.ds_loss_weights = weights seeds_train = np.random.random_integers( 0, 99999, self.data_aug_params.get('num_threads')) seeds_val = np.random.random_integers( 0, 99999, max(self.data_aug_params.get('num_threads') // 2, 1)) print("seeds train", seeds_train) print("seeds_val", seeds_val) self.tr_gen, self.val_gen = get_moreDA_augmentation( self.dl_tr, self.dl_val, self.data_aug_params['patch_size_for_spatialtransform'], self.data_aug_params, deep_supervision_scales=self.deep_supervision_scales, seeds_train=seeds_train, seeds_val=seeds_val) self.print_to_log_file("TRAINING KEYS:\n %s" % (str(self.dataset_tr.keys())), also_print_to_console=False) self.print_to_log_file("VALIDATION KEYS:\n %s" % (str(self.dataset_val.keys())), also_print_to_console=False) else: pass self.initialize_network() self.initialize_optimizer_and_scheduler() self._maybe_init_amp() self.network = DDP(self.network) else: self.print_to_log_file( 'self.was_initialized is True, not running self.initialize again' ) self.was_initialized = True
folder = "/home/fabian/drives/E132-Projekte/Projects/2018_MedicalDecathlon/Leaderboard" task_descriptors = [ '2D final 2', '2D final, less pool, dc and topK, fold0', '2D final pseudo3d 7, fold0', '2D final, less pool, dc and ce, fold0', '3D stage0 final 2, fold0', '3D fullres final 2, fold0' ] task_ids_with_no_stage0 = [ "Task001_BrainTumour", "Task004_Hippocampus", "Task005_Prostate" ] mean_scores = OrderedDict() for t in task_descriptors: mean_scores[t] = OrderedDict() json_files = subfiles(folder, True, None, ".json", True) json_files = [ i for i in json_files if not os.path.basename(i).startswith(".") ] # stupid mac for j in json_files: with open(j, 'r') as f: res = json.load(f) task = res['task'] if task != "Task999_ALL": name = res['name'] if name in task_descriptors: if task not in list(mean_scores[name].keys()): mean_scores[name][task] = res['results']['mean']['mean'] else: raise RuntimeError("duplicate task %s for description %s" % (task, name))
def pack_dataset(folder, threads=8, key="data"): p = Pool(threads) npy_files = subfiles(folder, True, None, '.npy', True) p.map(save_as_npz, zip(npy_files, [key] * len(npy_files))) p.close() p.join()