Example #1
0
def evaluate_folder(folder_with_gts: str, folder_with_predictions: str,
                    labels: tuple):
    """
    writes a summary.json to folder_with_predictions
    :param folder_with_gts: folder where the ground truth segmentations are saved. Must be nifti files.
    :param folder_with_predictions: folder where the predicted segmentations are saved. Must be nifti files.
    :param labels: tuple of int with the labels in the dataset. For example (0, 1, 2, 3) for Task01_BrainTumour.
    :return:
    """
    files_gt = subfiles(folder_with_gts, suffix=".nii.gz", join=False)
    files_pred = subfiles(folder_with_predictions,
                          suffix=".nii.gz",
                          join=False)
    assert all([i in files_pred
                for i in files_gt]), "files missing in folder_with_predictions"
    assert all([i in files_gt
                for i in files_pred]), "files missing in folder_with_gts"
    test_ref_pairs = [(join(folder_with_predictions,
                            i), join(folder_with_gts, i)) for i in files_pred]
    res = aggregate_scores(test_ref_pairs,
                           json_output_file=join(folder_with_predictions,
                                                 "summary.json"),
                           num_threads=8,
                           labels=labels)
    return res
Example #2
0
 def __data_list(self, join):
     '''
     :param join: whether join the root path, if False, only return filename
     :return: img_file_list, lbl_file_list, sorted by name
     '''
     if self.lbl_data_root is None:
         return subfiles(self.img_data_root, join), None
     return subfiles(self.img_data_root,
                     join), subfiles(self.lbl_data_root, join)
Example #3
0
def get_caseIDs_from_splitted_dataset_folder(folder):
    files = subfiles(folder, suffix=".nii.gz", join=False)
    # all files must be .nii.gz and have 4 digit modality index
    files = [i[:-12] for i in files]
    # only unique patient ids
    files = np.unique(files)
    return files
Example #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--input_folder", type=str, required=True,
                        help="folder with input files. All .nii.gz files in this folder will be processed.")
    parser.add_argument("-o", "--output_folder", type=str, required=True,
                        help="output folder. This is there the resulting segmentations will be saved. Cannot be the "
                             "same folder as the input folder. If output_folder does not exist "
                             "it will be created")
    parser.add_argument("-p", "--processes", default=4, type=str, required=False,
                        help="number of processes for data preprocessing and nifti export. You should not have to "
                             "touch this. So don't unless there is a clear indication that it is required. Default: 4")
    parser.add_argument('--keep_existing', default=True, required=False, action='store_false',
                        help="set to False to keep segmentations in output_folder and continue where you left off "
                             "(useful if something crashes). If this flag is not set, all segmentations that may "
                             "already be present in output_folder will be overwritten.")

    args = parser.parse_args()
    input_folder = args.input_folder
    output_folder = args.output_folder
    processes = args.processes
    keep_existing = args.keep_existing

    maybe_download_weights()

    # we must generate a list of input filenames
    nii_files = subfiles(input_folder, suffix='.nii.gz', join=False)
    input_list_of_lists = [[join(input_folder, i)] for i in nii_files]

    output_filenames = [join(output_folder, i) for i in nii_files]

    predict_cases(folder_with_parameter_files, input_list_of_lists, output_filenames, (0, 1, 2, 3, 4), False, processes,
                  processes, None, True, None, not keep_existing, False, 2, None, 3, 0)
    def add_classes_in_slice_info(self):
        """
        this speeds up oversampling foreground during training
        :return:
        """
        p = Pool(default_num_threads)

        # if there is more than one my_data_identifier (different brnaches) then this code will run for all of them if
        # they start with the same string. not problematic, but not pretty
        stages = [
            join(self.preprocessed_output_folder,
                 self.data_identifier + "_stage%d" % i)
            for i in range(len(self.plans_per_stage))
        ]

        for s in stages:
            print(s.split("/")[-1])
            list_of_npz_files = subfiles(s, True, None, ".npz", True)
            list_of_pkl_files = [i[:-4] + ".pkl" for i in list_of_npz_files]
            all_classes = []
            for pk in list_of_pkl_files:
                props = load_pickle(pk)
                all_classes_tmp = np.array(props['classes'])
                all_classes.append(all_classes_tmp[all_classes_tmp >= 0])
            p.map(add_classes_in_slice_info,
                  zip(list_of_npz_files, list_of_pkl_files, all_classes))
        p.close()
        p.join()
def crawl_and_copy(current_folder,
                   out_folder,
                   prefix="fabian_",
                   suffix="ummary.json"):
    """
	This script will run recursively through all subfolders of current_folder and copy all files that end with
	suffix with some automatically generated prefix into out_folder
	:param current_folder:
	:param out_folder:
	:param prefix:
	:return:
	"""
    s = subdirs(current_folder, join=False)
    f = subfiles(current_folder, join=False)
    f = [i for i in f if i.endswith(suffix)]
    if current_folder.find("fold0") != -1:
        for fl in f:
            shutil.copy(os.path.join(current_folder, fl),
                        os.path.join(out_folder, prefix + fl))
    for su in s:
        if prefix == "":
            add = su
        else:
            add = "__" + su
        crawl_and_copy(os.path.join(current_folder, su),
                       out_folder,
                       prefix=prefix + add)
def run_in_folder(folder):
    json_files = subfiles(folder, True, None, ".json", True)
    json_files = [
        i for i in json_files if not i.split("/")[-1].startswith(".")
        and not i.endswith("_globalMean.json")
    ]  # stupid mac
    for j in json_files:
        foreground_mean(j)
Example #8
0
def run_in_folder(folder):
    json_files = subfiles(folder, True, None, ".json", True)
    json_files = [
        i for i in json_files if not os.path.basename(i).startswith(".")
        and not i.endswith("_globalMean.json")
    ]  # stupid mac
    for j in json_files:
        foreground_mean(j)
Example #9
0
def plan_and_preprocess(task_string,
                        processes_lowres=default_num_threads,
                        processes_fullres=3,
                        no_preprocessing=False):
    from tuframework.experiment_planning.experiment_planner_baseline_2DUNet import ExperimentPlanner2D
    from tuframework.experiment_planning.experiment_planner_baseline_3DUNet import ExperimentPlanner

    preprocessing_output_dir_this_task_train = preprocessing_output_dir + "/" + task_string
    cropped_out_dir = tuFramework_cropped_data + "/" + task_string
    if not os.path.isdir(preprocessing_output_dir_this_task_train):
        os.makedirs(preprocessing_output_dir_this_task_train)

    shutil.copy(cropped_out_dir + "/" + "dataset_properties.pkl",
                preprocessing_output_dir_this_task_train)
    shutil.copy(
        tuFramework_raw_data + "/" + task_string + "/" + "dataset.json",
        preprocessing_output_dir_this_task_train)

    exp_planner = ExperimentPlanner(cropped_out_dir,
                                    preprocessing_output_dir_this_task_train)
    exp_planner.plan_experiment()
    if not no_preprocessing:
        exp_planner.run_preprocessing((processes_lowres, processes_fullres))

    exp_planner = ExperimentPlanner2D(
        cropped_out_dir, preprocessing_output_dir_this_task_train)
    exp_planner.plan_experiment()
    if not no_preprocessing:
        exp_planner.run_preprocessing(processes_fullres)

    # write which class is in which slice to all training cases (required to speed up 2D Dataloader)
    # This is done for all data so that if we wanted to use them with 2D we could do so

    if not no_preprocessing:
        p = Pool(default_num_threads)

        # if there is more than one my_data_identifier (different brnaches) then this code will run for all of them if
        # they start with the same string. not problematic, but not pretty
        stages = [
            i for i in subdirs(
                preprocessing_output_dir_this_task_train, join=True, sort=True)
            if i.split("/")[-1].find("stage") != -1
        ]
        for s in stages:
            print(s.split("/")[-1])
            list_of_npz_files = subfiles(s, False, None, ".npz", True)
            list_of_pkl_files = [i[:-4] + ".pkl" for i in list_of_npz_files]
            all_classes = []
            for pk in list_of_pkl_files:
                with open(pk, 'rb') as f:
                    props = pickle.load(f)
                all_classes_tmp = np.array(props['classes'])
                all_classes.append(all_classes_tmp[all_classes_tmp >= 0])
            p.map(add_classes_in_slice_info,
                  zip(list_of_npz_files, list_of_pkl_files, all_classes))
        p.close()
        p.join()
Example #10
0
def main():
    args = get_args()

    print("Preparing for preprocessing data...")

    # Validation set is fold 1
    fold = 1
    validation_fold_file = args.validation_fold_file

    # Make sure the model exists
    model_dir = args.model_dir
    model_path = os.path.join(model_dir, "plans.pkl")
    assert os.path.isfile(
        model_path), "Cannot find the model file {:}!".format(model_path)
    checkpoint_name = "model_final_checkpoint"

    # Other settings
    fp16 = False
    num_threads_preprocessing = args.num_threads_preprocessing
    raw_data_dir = args.raw_data_dir
    preprocessed_data_dir = args.preprocessed_data_dir

    # Open list containing validation images from specific fold (e.g. 1)
    validation_files = []
    with open(validation_fold_file) as f:
        for line in f:
            validation_files.append(line.rstrip())

    # Create output and preprocessed directory
    if not os.path.isdir(preprocessed_data_dir):
        os.makedirs(preprocessed_data_dir)

    # Create list of images locations (i.e. 4 images per case => 4 modalities)
    all_files = subfiles(raw_data_dir, suffix=".nii.gz", join=False, sort=True)
    list_of_lists = [[
        os.path.join(raw_data_dir, i) for i in all_files
        if i[:len(j)].startswith(j) and len(i) == (len(j) + 12)
    ] for j in validation_files]

    # Preprocess images, returns filenames list
    # This runs in multiprocess
    print("Acually preprocessing data...")
    preprocessed_files = preprocess_MLPerf(model_dir, checkpoint_name, fold,
                                           fp16, list_of_lists,
                                           validation_files,
                                           preprocessed_data_dir,
                                           num_threads_preprocessing)

    print("Saving metadata of the preprocessed data...")
    with open(os.path.join(preprocessed_data_dir, "preprocessed_files.pkl"),
              "wb") as f:
        pickle.dump(preprocessed_files, f)

    print("Preprocessed data saved to {:}".format(preprocessed_data_dir))
    print("Done!")
Example #11
0
def create_lists_from_splitted_dataset_folder(folder):
    """
    does not rely on dataset.json
    :param folder:
    :return:
    """
    caseIDs = get_caseIDs_from_splitted_dataset_folder(folder)
    list_of_lists = []
    for f in caseIDs:
        list_of_lists.append(subfiles(folder, prefix=f, suffix=".nii.gz", join=True, sort=True))
    return list_of_lists
def crawl_and_remove_hidden_from_decathlon(folder):
    folder = remove_trailing_slash(folder)
    assert folder.split('/')[-1].startswith("Task"), "This does not seem to be a decathlon folder. Please give me a " \
                                                     "folder that starts with TaskXX and has the subfolders imagesTr, " \
                                                     "labelsTr and imagesTs"
    subf = subfolders(folder, join=False)
    assert 'imagesTr' in subf, "This does not seem to be a decathlon folder. Please give me a " \
                                                     "folder that starts with TaskXX and has the subfolders imagesTr, " \
                                                     "labelsTr and imagesTs"
    assert 'imagesTs' in subf, "This does not seem to be a decathlon folder. Please give me a " \
                                                     "folder that starts with TaskXX and has the subfolders imagesTr, " \
                                                     "labelsTr and imagesTs"
    assert 'labelsTr' in subf, "This does not seem to be a decathlon folder. Please give me a " \
                                                     "folder that starts with TaskXX and has the subfolders imagesTr, " \
                                                     "labelsTr and imagesTs"

    _ = [os.remove(i) for i in subfiles(folder, prefix=".")]
    _ = [os.remove(i) for i in subfiles(join(folder, 'imagesTr'), prefix=".")]
    _ = [os.remove(i) for i in subfiles(join(folder, 'labelsTr'), prefix=".")]
    _ = [os.remove(i) for i in subfiles(join(folder, 'imagesTs'), prefix=".")]
Example #13
0
def preprocess_setup(preprocessed_data_dir):
    print("Preparing for preprocessing data...")

    # Validation set is fold 1
    fold = 1
    validation_fold_file = '../models/image_segmentation/tensorflow/3d_unet_mlperf/inference/nnUNet/folds/fold1_validation.txt'

    # Make sure the model exists
    model_dir = 'build/result/nnUNet/3d_fullres/Task043_BraTS2019/nnUNetTrainerV2__nnUNetPlansv2.mlperf.1'
    model_path = os.path.join(model_dir, "plans.pkl")
    assert os.path.isfile(
        model_path), "Cannot find the model file {:}!".format(model_path)
    checkpoint_name = "model_final_checkpoint"

    # Other settings
    fp16 = False
    num_threads_preprocessing = 12
    raw_data_dir = 'build/raw_data/nnUNet_raw_data/Task043_BraTS2019/imagesTr'

    # Open list containing validation images from specific fold (e.g. 1)
    validation_files = []
    with open(validation_fold_file) as f:
        for line in f:
            validation_files.append(line.rstrip())

    # Create output and preprocessed directory
    if not os.path.isdir(preprocessed_data_dir):
        os.makedirs(preprocessed_data_dir)

    # Create list of images locations (i.e. 4 images per case => 4 modalities)
    all_files = subfiles(raw_data_dir, suffix=".nii.gz", join=False, sort=True)
    list_of_lists = [[
        os.path.join(raw_data_dir, i) for i in all_files
        if i[:len(j)].startswith(j) and len(i) == (len(j) + 12)
    ] for j in validation_files]

    # Preprocess images, returns filenames list
    # This runs in multiprocess
    print("Acually preprocessing data...")

    preprocessed_files = preprocess_MLPerf(model_dir, checkpoint_name, fold,
                                           fp16, list_of_lists,
                                           validation_files,
                                           preprocessed_data_dir,
                                           num_threads_preprocessing)

    print("Saving metadata of the preprocessed data...")
    with open(os.path.join(preprocessed_data_dir, "preprocessed_files.pkl"),
              "wb") as f:
        pickle.dump(preprocessed_files, f)

    print("Preprocessed data saved to {:}".format(preprocessed_data_dir))
    print("Done!")
Example #14
0
def unpack_dataset(folder, threads=8, key="data"):
    """
    unpacks all npz files in a folder to npy (whatever you want to have unpacked must be saved unter key)
    :param folder:
    :param threads:
    :param key:
    :return:
    """
    p = Pool(threads)
    npz_files = subfiles(folder, True, None, ".npz", True)
    p.map(convert_to_npy, zip(npz_files, [key] * len(npz_files)))
    p.close()
    p.join()
Example #15
0
def evaluate_folder(folder_with_gts, folder_with_predictions, labels):
    """
    writes a summary.json to folder_with_predictions
    :param folder_with_gts:
    :param folder_with_predictions:
    :return:
    """
    files_gt = subfiles(folder_with_gts, suffix=".nii.gz", join=False)
    files_pred = subfiles(folder_with_predictions,
                          suffix=".nii.gz",
                          join=False)
    assert all([i in files_pred
                for i in files_gt]), "files missing in folder_with_predictions"
    assert all([i in files_gt
                for i in files_pred]), "files missing in folder_with_gts"
    test_ref_pairs = [(join(folder_with_predictions,
                            i), join(folder_with_gts, i)) for i in files_pred]
    res = aggregate_scores(test_ref_pairs,
                           json_output_file=join(folder_with_predictions,
                                                 "summary.json"),
                           num_threads=8,
                           labels=labels)
    return res
Example #16
0
def preprocess_3dunet_ref(model_dir_base, preprocessed_data_dir_base):
    """
    Preprocess raw image data to pickle file.
    """

    print("Preparing for preprocessing data...")

    # Validation set is fold 1
    fold = 1
    validation_fold_file = os.path.join("data_maps", "brats", "val_map.txt")

    # Make sure the model exists
    model_dir = os.path.join(model_dir_base, "3d-unet", "nnUNet", "3d_fullres", "Task043_BraTS2019", "nnUNetTrainerV2__nnUNetPlansv2.mlperf.1")
    model_path = os.path.join(model_dir, "plans.pkl")
    assert os.path.isfile(model_path), "Cannot find the model file {:}!".format(model_path)
    checkpoint_name = "model_final_checkpoint"

    # Other settings
    fp16 = False
    num_threads_preprocessing = 12
    raw_data_dir = os.path.join(preprocessed_data_dir_base, "brats", "brats_reference_raw", "Task043_BraTS2019", "imagesTr")
    preprocessed_data_dir = os.path.join(preprocessed_data_dir_base, "brats", "brats_reference_preprocessed")

    # Open list containing validation images from specific fold (e.g. 1)
    validation_files = []
    with open(validation_fold_file) as f:
        for line in f:
            validation_files.append(line.rstrip())

    # Create output and preprocessed directory
    if not os.path.isdir(preprocessed_data_dir):
        os.makedirs(preprocessed_data_dir)

    # Create list of images locations (i.e. 4 images per case => 4 modalities)
    all_files = subfiles(raw_data_dir, suffix=".nii.gz", join=False, sort=True)
    list_of_lists = [[os.path.join(raw_data_dir, i) for i in all_files if i[:len(j)].startswith(j) and
                      len(i) == (len(j) + 12)] for j in validation_files]

    # Preprocess images, returns filenames list
    # This runs in multiprocess
    print("Actually preprocessing data...")
    preprocessed_files = preprocess_MLPerf(model_dir, checkpoint_name, fold, fp16, list_of_lists,
                                           validation_files, preprocessed_data_dir, num_threads_preprocessing)

    # Save list of pkl file paths to pkl file.
    print("Saving metadata of the preprocessed data...")
    with open(os.path.join(preprocessed_data_dir, "preprocessed_files.pkl"), "wb") as f:
        pickle.dump(preprocessed_files, f)
Example #17
0
def check_input_folder_and_return_caseIDs(input_folder,
                                          expected_num_modalities):
    print("This model expects %d input modalities for each image" %
          expected_num_modalities)
    files = subfiles(input_folder, suffix=".nii.gz", join=False, sort=True)

    maybe_case_ids = np.unique([i[:-12] for i in files])

    remaining = deepcopy(files)
    missing = []

    assert len(
        files
    ) > 0, "input folder did not contain any images (expected to find .nii.gz file endings)"

    # now check if all required files are present and that no unexpected files are remaining
    for c in maybe_case_ids:
        for n in range(expected_num_modalities):
            expected_output_file = c + "_%04.0d.nii.gz" % n
            if not isfile(join(input_folder, expected_output_file)):
                missing.append(expected_output_file)
            else:
                remaining.remove(expected_output_file)

    print(
        "Found %d unique case ids, here are some examples:" %
        len(maybe_case_ids),
        np.random.choice(maybe_case_ids, min(len(maybe_case_ids), 10)))
    print(
        "If they don't look right, make sure to double check your filenames. They must end with _0000.nii.gz etc"
    )

    if len(remaining) > 0:
        print(
            "found %d unexpected remaining files in the folder. Here are some examples:"
            % len(remaining),
            np.random.choice(remaining, min(len(remaining), 10)))

    if len(missing) > 0:
        print("Some files are missing:")
        print(missing)
        raise RuntimeError("missing files in input_folder")

    return maybe_case_ids
Example #18
0
    def validate(self,
                 do_mirroring: bool = True,
                 use_sliding_window: bool = True,
                 step_size: float = 0.5,
                 save_softmax: bool = True,
                 use_gaussian: bool = True,
                 overwrite: bool = True,
                 validation_folder_name: str = 'validation_raw',
                 debug: bool = False,
                 all_in_gpu: bool = False,
                 segmentation_export_kwargs: dict = None,
                 run_postprocessing_on_folds: bool = True):
        if isinstance(self.network, DDP):
            net = self.network.module
        else:
            net = self.network
        ds = net.do_ds
        net.do_ds = False

        current_mode = self.network.training
        self.network.eval()

        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        if segmentation_export_kwargs is None:
            if 'segmentation_export_params' in self.plans.keys():
                force_separate_z = self.plans['segmentation_export_params'][
                    'force_separate_z']
                interpolation_order = self.plans['segmentation_export_params'][
                    'interpolation_order']
                interpolation_order_z = self.plans[
                    'segmentation_export_params']['interpolation_order_z']
            else:
                force_separate_z = None
                interpolation_order = 1
                interpolation_order_z = 0
        else:
            force_separate_z = segmentation_export_kwargs['force_separate_z']
            interpolation_order = segmentation_export_kwargs[
                'interpolation_order']
            interpolation_order_z = segmentation_export_kwargs[
                'interpolation_order_z']

        # predictions as they come from the network go here
        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)
        # this is for debug purposes
        my_input_args = {
            'do_mirroring': do_mirroring,
            'use_sliding_window': use_sliding_window,
            'step_size': step_size,
            'save_softmax': save_softmax,
            'use_gaussian': use_gaussian,
            'overwrite': overwrite,
            'validation_folder_name': validation_folder_name,
            'debug': debug,
            'all_in_gpu': all_in_gpu,
            'segmentation_export_kwargs': segmentation_export_kwargs,
        }
        save_json(my_input_args, join(output_folder, "validation_args.json"))

        if do_mirroring:
            if not self.data_aug_params['do_mirror']:
                raise RuntimeError(
                    "We did not train with mirroring so you cannot do inference with mirroring enabled"
                )
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        export_pool = Pool(default_num_threads)
        results = []

        all_keys = list(self.dataset_val.keys())
        my_keys = all_keys[self.local_rank::dist.get_world_size()]
        # we cannot simply iterate over all_keys because we need to know pred_gt_tuples and valid_labels of all cases
        # for evaluation (which is done by local rank 0)
        for k in my_keys:
            properties = load_pickle(self.dataset[k]['properties_file'])
            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]
            pred_gt_tuples.append([
                join(output_folder, fname + ".nii.gz"),
                join(self.gt_niftis_folder, fname + ".nii.gz")
            ])
            if k in my_keys:
                if overwrite or (not isfile(join(output_folder, fname + ".nii.gz"))) or \
                        (save_softmax and not isfile(join(output_folder, fname + ".npz"))):
                    data = np.load(self.dataset[k]['data_file'])['data']

                    print(k, data.shape)
                    data[-1][data[-1] == -1] = 0

                    softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax(
                        data[:-1],
                        do_mirroring=do_mirroring,
                        mirror_axes=mirror_axes,
                        use_sliding_window=use_sliding_window,
                        step_size=step_size,
                        use_gaussian=use_gaussian,
                        all_in_gpu=all_in_gpu,
                        mixed_precision=self.fp16)[1]

                    softmax_pred = softmax_pred.transpose(
                        [0] + [i + 1 for i in self.transpose_backward])

                    if save_softmax:
                        softmax_fname = join(output_folder, fname + ".npz")
                    else:
                        softmax_fname = None
                    """There is a problem with python process communication that prevents us from communicating obejcts
                    larger than 2 GB between processes (basically when the length of the pickle string that will be sent is
                    communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long
                    enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually
                    patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will
                    then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either
                    filename or np.ndarray and will handle this automatically"""
                    if np.prod(softmax_pred.shape) > (
                            2e9 / 4 * 0.85):  # *0.85 just to be save
                        np.save(join(output_folder, fname + ".npy"),
                                softmax_pred)
                        softmax_pred = join(output_folder, fname + ".npy")

                    results.append(
                        export_pool.starmap_async(
                            save_segmentation_nifti_from_softmax,
                            ((softmax_pred,
                              join(output_folder,
                                   fname + ".nii.gz"), properties,
                              interpolation_order, self.regions_class_order,
                              None, None, softmax_fname, None,
                              force_separate_z, interpolation_order_z), )))

        _ = [i.get() for i in results]
        self.print_to_log_file("finished prediction")

        distributed.barrier()

        if self.local_rank == 0:
            # evaluate raw predictions
            self.print_to_log_file("evaluation of raw predictions")
            task = self.dataset_directory.split("/")[-1]
            job_name = self.experiment_name
            _ = aggregate_scores(pred_gt_tuples,
                                 labels=list(range(self.num_classes)),
                                 json_output_file=join(output_folder,
                                                       "summary.json"),
                                 json_name=job_name + " val tiled %s" %
                                 (str(use_sliding_window)),
                                 json_author="Fabian",
                                 json_task=task,
                                 num_threads=default_num_threads)

            if run_postprocessing_on_folds:
                # in the old nnunet we would stop here. Now we add a postprocessing. This postprocessing can remove everything
                # except the largest connected component for each class. To see if this improves results, we do this for all
                # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will
                # have this applied during inference as well
                self.print_to_log_file("determining postprocessing")
                determine_postprocessing(
                    self.output_folder,
                    self.gt_niftis_folder,
                    validation_folder_name,
                    final_subf_name=validation_folder_name + "_postprocessed",
                    debug=debug)
                # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed"
                # They are always in that folder, even if no postprocessing as applied!

            # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another
            # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be
            # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to
            # be used later
            gt_nifti_folder = join(self.output_folder_base, "gt_niftis")
            maybe_mkdir_p(gt_nifti_folder)
            for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"):
                success = False
                attempts = 0
                e = None
                while not success and attempts < 10:
                    try:
                        shutil.copy(f, gt_nifti_folder)
                        success = True
                    except OSError as e:
                        attempts += 1
                        sleep(1)
                if not success:
                    print("Could not copy gt nifti file %s into folder %s" %
                          (f, gt_nifti_folder))
                    if e is not None:
                        raise e

        self.network.train(current_mode)
        net.do_ds = ds
Example #19
0
    def initialize(self, training=True, force_load_plans=False):
        """
        For prediction of test cases just set training=False, this will prevent loading of training data and
        training batchgenerator initialization
        :param training:
        :return:
        """
        if not self.was_initialized:
            maybe_mkdir_p(self.output_folder)

            if force_load_plans or (self.plans is None):
                self.load_plans_file()

            self.process_plans(self.plans)

            self.setup_DA_params()

            self.folder_with_preprocessed_data = join(
                self.dataset_directory,
                self.plans['data_identifier'] + "_stage%d" % self.stage)
            if training:
                self.dl_tr, self.dl_val = self.get_basic_generators()
                if self.unpack_data:
                    if self.local_rank == 0:
                        print("unpacking dataset")
                        unpack_dataset(self.folder_with_preprocessed_data)
                        print("done")
                    else:
                        # we need to wait until worker 0 has finished unpacking
                        npz_files = subfiles(
                            self.folder_with_preprocessed_data,
                            suffix=".npz",
                            join=False)
                        case_ids = [i[:-4] for i in npz_files]
                        all_present = all([
                            isfile(
                                join(self.folder_with_preprocessed_data,
                                     i + ".npy")) for i in case_ids
                        ])
                        while not all_present:
                            print("worker", self.local_rank,
                                  "is waiting for unpacking")
                            sleep(3)
                            all_present = all([
                                isfile(
                                    join(self.folder_with_preprocessed_data,
                                         i + ".npy")) for i in case_ids
                            ])
                        # there is some slight chance that there may arise some error because dataloader are loading a file
                        # that is still being written by worker 0. We ignore this for now an address it only if it becomes
                        # relevant
                        # (this can occur because while worker 0 writes the file is technically present so the other workers
                        # will proceed and eventually try to read it)
                else:
                    print(
                        "INFO: Not unpacking data! Training may be slow due to that. Pray you are not using 2d or you "
                        "will wait all winter for your model to finish!")

                # setting weights for deep supervision losses
                net_numpool = len(self.net_num_pool_op_kernel_sizes)

                # we give each output a weight which decreases exponentially (division by 2) as the resolution decreases
                # this gives higher resolution outputs more weight in the loss
                weights = np.array([1 / (2**i) for i in range(net_numpool)])

                # we don't use the lowest 2 outputs. Normalize weights so that they sum to 1
                mask = np.array([
                    True if i < net_numpool - 1 else False
                    for i in range(net_numpool)
                ])
                weights[~mask] = 0
                weights = weights / weights.sum()
                self.ds_loss_weights = weights

                seeds_train = np.random.random_integers(
                    0, 99999, self.data_aug_params.get('num_threads'))
                seeds_val = np.random.random_integers(
                    0, 99999,
                    max(self.data_aug_params.get('num_threads') // 2, 1))
                print("seeds train", seeds_train)
                print("seeds_val", seeds_val)
                self.tr_gen, self.val_gen = get_moreDA_augmentation(
                    self.dl_tr,
                    self.dl_val,
                    self.data_aug_params['patch_size_for_spatialtransform'],
                    self.data_aug_params,
                    deep_supervision_scales=self.deep_supervision_scales,
                    seeds_train=seeds_train,
                    seeds_val=seeds_val)
                self.print_to_log_file("TRAINING KEYS:\n %s" %
                                       (str(self.dataset_tr.keys())),
                                       also_print_to_console=False)
                self.print_to_log_file("VALIDATION KEYS:\n %s" %
                                       (str(self.dataset_val.keys())),
                                       also_print_to_console=False)
            else:
                pass

            self.initialize_network()
            self.initialize_optimizer_and_scheduler()
            self._maybe_init_amp()
            self.network = DDP(self.network)

        else:
            self.print_to_log_file(
                'self.was_initialized is True, not running self.initialize again'
            )
        self.was_initialized = True
Example #20
0
folder = "/home/fabian/drives/E132-Projekte/Projects/2018_MedicalDecathlon/Leaderboard"
task_descriptors = [
    '2D final 2', '2D final, less pool, dc and topK, fold0',
    '2D final pseudo3d 7, fold0', '2D final, less pool, dc and ce, fold0',
    '3D stage0 final 2, fold0', '3D fullres final 2, fold0'
]
task_ids_with_no_stage0 = [
    "Task001_BrainTumour", "Task004_Hippocampus", "Task005_Prostate"
]

mean_scores = OrderedDict()
for t in task_descriptors:
    mean_scores[t] = OrderedDict()

json_files = subfiles(folder, True, None, ".json", True)
json_files = [
    i for i in json_files if not os.path.basename(i).startswith(".")
]  # stupid mac
for j in json_files:
    with open(j, 'r') as f:
        res = json.load(f)
    task = res['task']
    if task != "Task999_ALL":
        name = res['name']
        if name in task_descriptors:
            if task not in list(mean_scores[name].keys()):
                mean_scores[name][task] = res['results']['mean']['mean']
            else:
                raise RuntimeError("duplicate task %s for description %s" %
                                   (task, name))
Example #21
0
def pack_dataset(folder, threads=8, key="data"):
    p = Pool(threads)
    npy_files = subfiles(folder, True, None, '.npy', True)
    p.map(save_as_npz, zip(npy_files, [key] * len(npy_files)))
    p.close()
    p.join()