Example #1
0
    def run_training(self):
        dct = OrderedDict()
        for k in self.__dir__():
            if not k.startswith("__"):
                if not callable(getattr(self, k)):
                    dct[k] = str(getattr(self, k))
        del dct['plans']
        del dct['intensity_properties']
        del dct['dataset']
        del dct['dataset_tr']
        del dct['dataset_val']
        save_json(dct, join(self.output_folder, "debug.json"))

        import shutil

        shutil.copy(self.plans_file, join(self.output_folder_base,
                                          "plans.pkl"))

        super(nnUNetTrainer, self).run_training()
Example #2
0
 def update_fold(self, fold):
     """
     used to swap between folds for inference (ensemble of models from cross-validation)
     DO NOT USE DURING TRAINING AS THIS WILL NOT UPDATE THE DATASET SPLIT AND THE DATA AUGMENTATION GENERATORS
     :param fold:
     :return:
     """
     if fold is not None:
         if isinstance(fold, str):
             assert fold == "all", "if self.fold is a string then it must be \'all\'"
             if self.output_folder.endswith("%s" % str(self.fold)):
                 self.output_folder = self.output_folder_base
             self.output_folder = join(self.output_folder, "%s" % str(fold))
         else:
             if self.output_folder.endswith("fold_%s" % str(self.fold)):
                 self.output_folder = self.output_folder_base
             self.output_folder = join(self.output_folder,
                                       "fold_%s" % str(fold))
         self.fold = fold
def crawl_and_remove_hidden_from_decathlon(folder):
    folder = remove_trailing_slash(folder)
    assert folder.split('/')[-1].startswith("Task"), "This does not seem to be a decathlon folder. Please give me a " \
                                                     "folder that starts with TaskXX and has the subfolders imagesTr, " \
                                                     "labelsTr and imagesTs"
    subf = subfolders(folder, join=False)
    assert 'imagesTr' in subf, "This does not seem to be a decathlon folder. Please give me a " \
                                                     "folder that starts with TaskXX and has the subfolders imagesTr, " \
                                                     "labelsTr and imagesTs"
    assert 'imagesTs' in subf, "This does not seem to be a decathlon folder. Please give me a " \
                                                     "folder that starts with TaskXX and has the subfolders imagesTr, " \
                                                     "labelsTr and imagesTs"
    assert 'labelsTr' in subf, "This does not seem to be a decathlon folder. Please give me a " \
                                                     "folder that starts with TaskXX and has the subfolders imagesTr, " \
                                                     "labelsTr and imagesTs"

    _ = [os.remove(i) for i in subfiles(folder, prefix=".")]
    _ = [os.remove(i) for i in subfiles(join(folder, 'imagesTr'), prefix=".")]
    _ = [os.remove(i) for i in subfiles(join(folder, 'labelsTr'), prefix=".")]
    _ = [os.remove(i) for i in subfiles(join(folder, 'imagesTs'), prefix=".")]
Example #4
0
def create_lists_from_splitted_dataset(base_folder_splitted):
    lists = []

    json_file = join(base_folder_splitted, "dataset.json")
    with open(json_file) as jsn:
        d = json.load(jsn)
        training_files = d['training']
    num_modalities = len(d['modality'].keys())
    for tr in training_files:
        cur_pat = []
        for mod in range(num_modalities):
            cur_pat.append(
                join(base_folder_splitted, "imagesTr",
                     Path(tr['image']).parts[-1][:-7] +
                     "_%04.0d.nii.gz" % mod))
        cur_pat.append(
            join(base_folder_splitted, "labelsTr",
                 Path(tr['label']).parts[-1]))
        lists.append(cur_pat)
    return lists, {int(i): d['modality'][str(i)] for i in d['modality'].keys()}
Example #5
0
 def plot_network_architecture(self):
     try:
         from batchgenerators.utilities.file_and_folder_operations import join
         import hiddenlayer as hl
         g = hl.build_graph(self.network, torch.rand((1, self.num_input_channels, *self.patch_size)).cuda(),
                            transforms=None)
         g.save(join(self.output_folder, "network_architecture.pdf"))
         del g
     except Exception as e:
         self.print_to_log_file("Unable to plot network architecture:")
         self.print_to_log_file(e)
     finally:
         torch.cuda.empty_cache()
 def run_preprocessing(self, num_threads):
     if os.path.isdir(
             join(self.preprocessed_output_folder, "gt_segmentations")):
         shutil.rmtree(
             join(self.preprocessed_output_folder, "gt_segmentations"))
     shutil.copytree(
         join(self.folder_with_cropped_data, "gt_segmentations"),
         join(self.preprocessed_output_folder, "gt_segmentations"))
     normalization_schemes = self.plans['normalization_schemes']
     use_nonzero_mask_for_normalization = self.plans['use_mask_for_norm']
     intensityproperties = self.plans['dataset_properties'][
         'intensityproperties']
     preprocessor = PreprocessorFor2D(normalization_schemes,
                                      use_nonzero_mask_for_normalization,
                                      self.transpose_forward,
                                      intensityproperties)
     target_spacings = [
         i["current_spacing"] for i in self.plans_per_stage.values()
     ]
     preprocessor.run(target_spacings, self.folder_with_cropped_data,
                      self.preprocessed_output_folder,
                      self.plans['data_identifier'], num_threads)
Example #7
0
def evaluate_folder(folder_with_gts, folder_with_predictions, labels):
    """
    writes a summary.json to folder_with_predictions
    :param folder_with_gts:
    :param folder_with_predictions:
    :return:
    """
    files_gt = subfiles(folder_with_gts, suffix=".nii.gz", join=False)
    files_pred = subfiles(folder_with_predictions,
                          suffix=".nii.gz",
                          join=False)
    assert all([i in files_pred
                for i in files_gt]), "files missing in folder_with_predictions"
    assert all([i in files_gt
                for i in files_pred]), "files missing in folder_with_gts"
    test_ref_pairs = [(join(folder_with_predictions,
                            i), join(folder_with_gts, i)) for i in files_pred]
    res = aggregate_scores(test_ref_pairs,
                           json_output_file=join(folder_with_predictions,
                                                 "summary.json"),
                           num_threads=8,
                           labels=labels)
    return res
Example #8
0
def evaluate_folder(folder_with_gts: str, folder_with_predictions: str,
                    labels: tuple, **metric_kwargs):
    """
    writes a summary.json to folder_with_predictions
    :param folder_with_gts: folder where the ground truth segmentations are saved. Must be nifti files.
    :param folder_with_predictions: folder where the predicted segmentations are saved. Must be nifti files.
    :param labels: tuple of int with the labels in the dataset. For example (0, 1, 2, 3) for Task001_BrainTumour.
    :return:
    """
    files_gt = subfiles(folder_with_gts, suffix=".nii.gz", join=False)
    files_pred = subfiles(folder_with_predictions,
                          suffix=".nii.gz",
                          join=False)
    # assert all([i in files_pred for i in files_gt]), "files missing in folder_with_predictions"
    # assert all([i in files_gt for i in files_pred]), "files missing in folder_with_gts"
    test_ref_pairs = [(join(folder_with_predictions,
                            i), join(folder_with_gts, i)) for i in files_pred]
    res = aggregate_scores(test_ref_pairs,
                           json_output_file=join(folder_with_predictions,
                                                 "summary.json"),
                           num_threads=8,
                           labels=labels,
                           **metric_kwargs)
    return res
Example #9
0
    def initialize(self, training=True, force_load_plans=False):
        """
        For prediction of test cases just set training=False, this will prevent loading of training data and
        training batchgenerator initialization
        :param training:
        :return:
        """

        maybe_mkdir_p(self.output_folder)

        if force_load_plans or (self.plans is None):
            self.load_plans_file()

        self.process_plans(self.plans)

        self.setup_DA_params()
        self.AutoAugment = AutoAugment(
            self.data_aug_params['patch_size_for_spatialtransform'],
            self.data_aug_params)
        self.folder_with_preprocessed_data = join(
            self.dataset_directory,
            self.plans['data_identifier'] + "_stage%d" % self.stage)
        if training:
            self.dl_tr, self.dl_val = self.get_basic_generators()
            if self.unpack_data:
                self.print_to_log_file("unpacking dataset")
                unpack_dataset(self.folder_with_preprocessed_data)
                self.print_to_log_file("done")
            else:
                self.print_to_log_file(
                    "INFO: Not unpacking data! Training may be slow due to that. Pray you are not using 2d or you "
                    "will wait all winter for your model to finish!")
            self.tr_gen, self.val_gen = get_default_augmentation(
                self.dl_tr, self.dl_val,
                self.data_aug_params['patch_size_for_spatialtransform'],
                self.data_aug_params)
            self.print_to_log_file("TRAINING KEYS:\n %s" %
                                   (str(self.dataset_tr.keys())),
                                   also_print_to_console=False)
            self.print_to_log_file("VALIDATION KEYS:\n %s" %
                                   (str(self.dataset_val.keys())),
                                   also_print_to_console=False)
        else:
            pass
        self.initialize_network_optimizer_and_scheduler()
        # assert isinstance(self.network, (SegmentationNetwork, nn.DataParallel))
        self.was_initialized = True
Example #10
0
def check_input_folder_and_return_caseIDs(input_folder,
                                          expected_num_modalities):
    print("This model expects %d input modalities for each image" %
          expected_num_modalities)
    files = subfiles(input_folder, suffix=".nii.gz", join=False, sort=True)

    maybe_case_ids = np.unique([i[:-12] for i in files])

    remaining = deepcopy(files)
    missing = []

    assert len(
        files
    ) > 0, "input folder did not contain any images (expected to find .nii.gz file endings)"

    # now check if all required files are present and that no unexpected files are remaining
    for c in maybe_case_ids:
        for n in range(expected_num_modalities):
            expected_output_file = c + "_%04.0d.nii.gz" % n
            if not isfile(join(input_folder, expected_output_file)):
                missing.append(expected_output_file)
            else:
                remaining.remove(expected_output_file)

    print(
        "Found %d unique case ids, here are some examples:" %
        len(maybe_case_ids),
        np.random.choice(maybe_case_ids, min(len(maybe_case_ids), 10)))
    print(
        "If they don't look right, make sure to double check your filenames. They must end with _0000.nii.gz etc"
    )

    if len(remaining) > 0:
        print(
            "found %d unexpected remaining files in the folder. Here are some examples:"
            % len(remaining),
            np.random.choice(remaining, min(len(remaining), 10)))

    if len(missing) > 0:
        print("Some files are missing:")
        print(missing)
        raise RuntimeError("missing files in input_folder")

    return maybe_case_ids
def download_and_install_from_url(url):
    assert network_training_output_dir is not None, "Cannot install model because network_training_output_dir is not " \
                                                    "set (RESULTS_FOLDER missing as environment variable, see " \
                                                    "Installation instructions)"
    print('Downloading pretrained model from url:', url)
    import http.client
    http.client.HTTPConnection._http_vsn = 10
    http.client.HTTPConnection._http_vsn_str = 'HTTP/1.0'

    import os
    home = os.path.expanduser('~')
    random_number = int(time() * 1e7)
    tempfile = join(home, '.nnunetdownload_%s' % str(random_number))

    try:
        download_file(url=url, local_filename=tempfile, chunk_size=8192 * 16)
        print("Download finished. Extracting...")
        install_model_from_zip_file(tempfile)
        print("Done")
    except Exception as e:
        raise e
    finally:
        if isfile(tempfile):
            os.remove(tempfile)
Example #12
0
# network_output_dir_base = "/media/zhx/My Passport/lung_lobe_seg/galaNet_trained_models" # 网络存放处

raw_data_base_dir = "/data/fox_cloud/data/hexiang/lung_lobe_seg/galaNet_raw_data"  # 原始数据保存文件夹
preprocessed_output_dir = "/data/fox_cloud/data/hexiang/lung_lobe_seg/galaNet_preprocessed"  # 预处理后数据存放处
network_output_dir_base = "/data/fox_cloud/data/hexiang/lung_lobe_seg/galaNet_trained_models"  # 网络存放处

# raw_data_base_dir = "/data0/mzs/zhx/lung_lobe_seg/galaNet_raw_data" # 原始数据保存文件夹
# preprocessed_output_dir = "/data0/mzs/zhx/lung_lobe_seg/galaNet_preprocessed" # 预处理后数据存放处
# network_output_dir_base = "/data0/mzs/zhx/lung_lobe_seg/galaNet_trained_models" # 网络存放处

# raw_data_base_dir = "/home/zenghexiang/data/zenghexiang/lung_lobe_seg/galaNet_raw_data" # 原始数据保存文件夹
# preprocessed_output_dir = "/home/zenghexiang/data/zenghexiang/lung_lobe_seg/galaNet_preprocessed" # 预处理后数据存放处
# network_output_dir_base = "/home/zenghexiang/data/zenghexiang/lung_lobe_seg/galaNet_trained_models" # 网络存放处

if raw_data_base_dir is not None:
    raw_dicom_data_dir = join(raw_data_base_dir,
                              "dicom_data")  # dicom原始数据存放文件夹
    raw_cropped_data_dir = join(raw_data_base_dir,
                                "cropped_data")  # 原始数据被crop后存放的文件夹
    raw_splited_dir = join(raw_data_base_dir, "splited_data")
    maybe_mkdir_p(raw_data_base_dir)
    maybe_mkdir_p(raw_cropped_data_dir)
else:
    raise AssertionError(
        "Attention! raw_data_base_dir is not defined! Please set raw_data_base_dir in paths.py."
    )

if preprocessed_output_dir is not None:
    maybe_mkdir_p(preprocessed_output_dir)
    maybe_mkdir_p(join(preprocessed_output_dir, preprocessed_data_identifer))
    maybe_mkdir_p(join(preprocessed_output_dir, preprocessed_net_inputs))
else:
Example #13
0
    def validate(self, do_mirroring: bool = True, use_sliding_window: bool = True, step_size: float = 0.5,
                 save_softmax: bool = True, use_gaussian: bool = True, overwrite: bool = True,
                 validation_folder_name: str = 'validation_raw', debug: bool = False, all_in_gpu: bool = False,
                 segmentation_export_kwargs: dict = None, run_postprocessing_on_folds: bool = True):
        """
        if debug=True then the temporary files generated for postprocessing determination will be kept
        """

        current_mode = self.network.training
        self.network.eval()

        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        if segmentation_export_kwargs is None:
            if 'segmentation_export_params' in self.plans.keys():
                force_separate_z = self.plans['segmentation_export_params']['force_separate_z']
                interpolation_order = self.plans['segmentation_export_params']['interpolation_order']
                interpolation_order_z = self.plans['segmentation_export_params']['interpolation_order_z']
            else:
                force_separate_z = None
                interpolation_order = 1
                interpolation_order_z = 0
        else:
            force_separate_z = segmentation_export_kwargs['force_separate_z']
            interpolation_order = segmentation_export_kwargs['interpolation_order']
            interpolation_order_z = segmentation_export_kwargs['interpolation_order_z']

        # predictions as they come from the network go here
        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)
        # this is for debug purposes
        my_input_args = {'do_mirroring': do_mirroring,
                         'use_sliding_window': use_sliding_window,
                         'step_size': step_size,
                         'save_softmax': save_softmax,
                         'use_gaussian': use_gaussian,
                         'overwrite': overwrite,
                         'validation_folder_name': validation_folder_name,
                         'debug': debug,
                         'all_in_gpu': all_in_gpu,
                         'segmentation_export_kwargs': segmentation_export_kwargs,
                         }
        save_json(my_input_args, join(output_folder, "validation_args.json"))

        if do_mirroring:
            if not self.data_aug_params['do_mirror']:
                raise RuntimeError("We did not train with mirroring so you cannot do inference with mirroring enabled")
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        export_pool = Pool(default_num_threads)
        results = []

        for k in self.dataset_val.keys():
            properties = load_pickle(self.dataset[k]['properties_file'])
            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]
            if overwrite or (not isfile(join(output_folder, fname + ".nii.gz"))) or \
                    (save_softmax and not isfile(join(output_folder, fname + ".npz"))):
                data = np.load(self.dataset[k]['data_file'])['data']

                print(k, data.shape)
                data[-1][data[-1] == -1] = 0

                softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax(data[:-1],
                                                                                     do_mirroring=do_mirroring,
                                                                                     mirror_axes=mirror_axes,
                                                                                     use_sliding_window=use_sliding_window,
                                                                                     step_size=step_size,
                                                                                     use_gaussian=use_gaussian,
                                                                                     all_in_gpu=all_in_gpu,
                                                                                     mixed_precision=self.fp16)[1]

                softmax_pred = softmax_pred.transpose([0] + [i + 1 for i in self.transpose_backward])

                if save_softmax:
                    softmax_fname = join(output_folder, fname + ".npz")
                else:
                    softmax_fname = None

                """There is a problem with python process communication that prevents us from communicating obejcts
                larger than 2 GB between processes (basically when the length of the pickle string that will be sent is
                communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long
                enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually
                patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will
                then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either
                filename or np.ndarray and will handle this automatically"""
                if np.prod(softmax_pred.shape) > (2e9 / 4 * 0.85):  # *0.85 just to be save
                    np.save(join(output_folder, fname + ".npy"), softmax_pred)
                    softmax_pred = join(output_folder, fname + ".npy")

                results.append(export_pool.starmap_async(save_segmentation_nifti_from_softmax,
                                                         ((softmax_pred, join(output_folder, fname + ".nii.gz"),
                                                           properties, interpolation_order, self.regions_class_order,
                                                           None, None,
                                                           softmax_fname, None, force_separate_z,
                                                           interpolation_order_z),
                                                          )
                                                         )
                               )

            pred_gt_tuples.append([join(output_folder, fname + ".nii.gz"),
                                   join(self.gt_niftis_folder, fname + ".nii.gz")])

        _ = [i.get() for i in results]
        self.print_to_log_file("finished prediction")

        # evaluate raw predictions
        self.print_to_log_file("evaluation of raw predictions")
        task = self.dataset_directory.split("/")[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(pred_gt_tuples, labels=list(range(self.num_classes)),
                             json_output_file=join(output_folder, "summary.json"),
                             json_name=job_name + " val tiled %s" % (str(use_sliding_window)),
                             json_author="Fabian",
                             json_task=task, num_threads=default_num_threads)

        if run_postprocessing_on_folds:
            # in the old nnunet we would stop here. Now we add a postprocessing. This postprocessing can remove everything
            # except the largest connected component for each class. To see if this improves results, we do this for all
            # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will
            # have this applied during inference as well
            self.print_to_log_file("determining postprocessing")
            determine_postprocessing(self.output_folder, self.gt_niftis_folder, validation_folder_name,
                                     final_subf_name=validation_folder_name + "_postprocessed", debug=debug)
            # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed"
            # They are always in that folder, even if no postprocessing as applied!

        # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another
        # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be
        # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to
        # be used later
        gt_nifti_folder = join(self.output_folder_base, "gt_niftis")
        maybe_mkdir_p(gt_nifti_folder)
        for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"):
            success = False
            attempts = 0
            e = None
            while not success and attempts < 10:
                try:
                    shutil.copy(f, gt_nifti_folder)
                    success = True
                except OSError as e:
                    attempts += 1
                    sleep(1)
            if not success:
                print("Could not copy gt nifti file %s into folder %s" % (f, gt_nifti_folder))
                if e is not None:
                    raise e

        self.network.train(current_mode)
Example #14
0
    def run_training(self):
        """
        if we run with -c then we need to set the correct lr for the first epoch, otherwise it will run the first
        continued epoch with self.initial_lr

        we also need to make sure deep supervision in the network is enabled for training, thus the wrapper
        :return:
        """
        self.maybe_update_lr(
            self.epoch
        )  # if we dont overwrite epoch then self.epoch+1 is used which is not what we
        # want at the start of the training
        if isinstance(self.network, DDP):
            net = self.network.module
        else:
            net = self.network
        ds = net.do_ds
        net.do_ds = True

        _ = self.tr_gen.next()
        _ = self.val_gen.next()

        if torch.cuda.is_available():
            torch.cuda.empty_cache()

        self._maybe_init_amp()

        maybe_mkdir_p(self.output_folder)
        self.plot_network_architecture()

        if cudnn.benchmark and cudnn.deterministic:
            warn(
                "torch.backends.cudnn.deterministic is True indicating a deterministic training is desired. "
                "But torch.backends.cudnn.benchmark is True as well and this will prevent deterministic training! "
                "If you want deterministic then set benchmark=False")

        if not self.was_initialized:
            self.initialize(True)

        while self.epoch < self.max_num_epochs:
            self.print_to_log_file("\nepoch: ", self.epoch)
            epoch_start_time = time()
            train_losses_epoch = []

            # train one epoch
            self.network.train()

            if self.use_progress_bar:
                with trange(self.num_batches_per_epoch) as tbar:
                    for b in tbar:
                        tbar.set_description("Epoch {}/{}".format(
                            self.epoch + 1, self.max_num_epochs))

                        l = self.run_iteration(self.tr_gen, True)

                        tbar.set_postfix(loss=l)
                        train_losses_epoch.append(l)
            else:
                for _ in range(self.num_batches_per_epoch):
                    l = self.run_iteration(self.tr_gen, True)
                    train_losses_epoch.append(l)

            self.all_tr_losses.append(np.mean(train_losses_epoch))
            self.print_to_log_file("train loss : %.4f" %
                                   self.all_tr_losses[-1])

            with torch.no_grad():
                # validation with train=False
                self.network.eval()
                val_losses = []
                for b in range(self.num_val_batches_per_epoch):
                    l = self.run_iteration(self.val_gen, False, True)
                    val_losses.append(l)
                self.all_val_losses.append(np.mean(val_losses))
                self.print_to_log_file("validation loss: %.4f" %
                                       self.all_val_losses[-1])

                if self.also_val_in_tr_mode:
                    self.network.train()
                    # validation with train=True
                    val_losses = []
                    for b in range(self.num_val_batches_per_epoch):
                        l = self.run_iteration(self.val_gen, False)
                        val_losses.append(l)
                    self.all_val_losses_tr_mode.append(np.mean(val_losses))
                    self.print_to_log_file(
                        "validation loss (train=True): %.4f" %
                        self.all_val_losses_tr_mode[-1])

            self.update_train_loss_MA(
            )  # needed for lr scheduler and stopping of training

            continue_training = self.on_epoch_end()

            epoch_end_time = time()

            if not continue_training:
                # allows for early stopping
                break

            self.epoch += 1
            self.print_to_log_file("This epoch took %f s\n" %
                                   (epoch_end_time - epoch_start_time))

        self.epoch -= 1  # if we don't do this we can get a problem with loading model_final_checkpoint.

        if self.save_final_checkpoint:
            self.save_checkpoint(
                join(self.output_folder, "model_final_checkpoint.model"))

        if self.local_rank == 0:
            # now we can delete latest as it will be identical with final
            if isfile(join(self.output_folder, "model_latest.model")):
                os.remove(join(self.output_folder, "model_latest.model"))
            if isfile(join(self.output_folder, "model_latest.model.pkl")):
                os.remove(join(self.output_folder, "model_latest.model.pkl"))

        net.do_ds = ds
Example #15
0
    def initialize(self, training=True, force_load_plans=False):
        """
        :param training:
        :return:
        """
        if not self.was_initialized:
            maybe_mkdir_p(self.output_folder)

            if force_load_plans or (self.plans is None):
                self.load_plans_file()

            self.process_plans(self.plans)

            self.setup_DA_params()

            self.folder_with_preprocessed_data = join(
                self.dataset_directory,
                self.plans['data_identifier'] + "_stage%d" % self.stage)
            if training:
                self.dl_tr, self.dl_val = self.get_basic_generators()
                if self.unpack_data:
                    if self.local_rank == 0:
                        print("unpacking dataset")
                        unpack_dataset(self.folder_with_preprocessed_data)
                        print("done")
                    distributed.barrier()
                else:
                    print(
                        "INFO: Not unpacking data! Training may be slow due to that. Pray you are not using 2d or you "
                        "will wait all winter for your model to finish!")

                # setting weights for deep supervision losses
                net_numpool = len(self.net_num_pool_op_kernel_sizes)

                # we give each output a weight which decreases exponentially (division by 2) as the resolution decreases
                # this gives higher resolution outputs more weight in the loss
                weights = np.array([1 / (2**i) for i in range(net_numpool)])

                # we don't use the lowest 2 outputs. Normalize weights so that they sum to 1
                mask = np.array([
                    True if i < net_numpool - 1 else False
                    for i in range(net_numpool)
                ])
                weights[~mask] = 0
                weights = weights / weights.sum()
                self.ds_loss_weights = weights

                seeds_train = np.random.random_integers(
                    0, 99999, self.data_aug_params.get('num_threads'))
                seeds_val = np.random.random_integers(
                    0, 99999,
                    max(self.data_aug_params.get('num_threads') // 2, 1))
                print("seeds train", seeds_train)
                print("seeds_val", seeds_val)
                self.tr_gen, self.val_gen = get_moreDA_augmentation(
                    self.dl_tr,
                    self.dl_val,
                    self.data_aug_params['patch_size_for_spatialtransform'],
                    self.data_aug_params,
                    deep_supervision_scales=self.deep_supervision_scales,
                    seeds_train=seeds_train,
                    seeds_val=seeds_val,
                    pin_memory=self.pin_memory)
                self.print_to_log_file("TRAINING KEYS:\n %s" %
                                       (str(self.dataset_tr.keys())),
                                       also_print_to_console=False)
                self.print_to_log_file("VALIDATION KEYS:\n %s" %
                                       (str(self.dataset_val.keys())),
                                       also_print_to_console=False)
            else:
                pass

            self.initialize_network()
            self.initialize_optimizer_and_scheduler()
            self.network = DDP(self.network, device_ids=[self.local_rank])

        else:
            self.print_to_log_file(
                'self.was_initialized is True, not running self.initialize again'
            )
        self.was_initialized = True
Example #16
0
    def initialize(self, training=True, force_load_plans=False):
        """
        For prediction of test cases just set training=False, this will prevent loading of training data and
        training batchgenerator initialization
        :param training:
        :return:
        """
        if not self.was_initialized:
            maybe_mkdir_p(self.output_folder)

            if force_load_plans or (self.plans is None):
                self.load_plans_file()

            self.process_plans(self.plans)

            self.setup_DA_params()

            self.folder_with_preprocessed_data = join(
                self.dataset_directory,
                self.plans['data_identifier'] + "_stage%d" % self.stage)
            if training:
                self.dl_tr, self.dl_val = self.get_basic_generators()
                if self.unpack_data:
                    if self.local_rank == 0:
                        print("unpacking dataset")
                        unpack_dataset(self.folder_with_preprocessed_data)
                        print("done")
                    else:
                        # we need to wait until worker 0 has finished unpacking
                        npz_files = subfiles(
                            self.folder_with_preprocessed_data,
                            suffix=".npz",
                            join=False)
                        case_ids = [i[:-4] for i in npz_files]
                        all_present = all([
                            isfile(
                                join(self.folder_with_preprocessed_data,
                                     i + ".npy")) for i in case_ids
                        ])
                        while not all_present:
                            print("worker", self.local_rank,
                                  "is waiting for unpacking")
                            sleep(3)
                            all_present = all([
                                isfile(
                                    join(self.folder_with_preprocessed_data,
                                         i + ".npy")) for i in case_ids
                            ])
                        # there is some slight chance that there may arise some error because dataloader are loading a file
                        # that is still being written by worker 0. We ignore this for now an address it only if it becomes
                        # relevant
                        # (this can occur because while worker 0 writes the file is technically present so the other workers
                        # will proceed and eventually try to read it)
                else:
                    print(
                        "INFO: Not unpacking data! Training may be slow due to that. Pray you are not using 2d or you "
                        "will wait all winter for your model to finish!")

                # setting weights for deep supervision losses
                net_numpool = len(self.net_num_pool_op_kernel_sizes)

                # we give each output a weight which decreases exponentially (division by 2) as the resolution decreases
                # this gives higher resolution outputs more weight in the loss
                weights = np.array([1 / (2**i) for i in range(net_numpool)])

                # we don't use the lowest 2 outputs. Normalize weights so that they sum to 1
                mask = np.array([
                    True if i < net_numpool - 1 else False
                    for i in range(net_numpool)
                ])
                weights[~mask] = 0
                weights = weights / weights.sum()
                self.ds_loss_weights = weights

                seeds_train = np.random.random_integers(
                    0, 99999, self.data_aug_params.get('num_threads'))
                seeds_val = np.random.random_integers(
                    0, 99999,
                    max(self.data_aug_params.get('num_threads') // 2, 1))
                print("seeds train", seeds_train)
                print("seeds_val", seeds_val)
                self.tr_gen, self.val_gen = get_moreDA_augmentation(
                    self.dl_tr,
                    self.dl_val,
                    self.data_aug_params['patch_size_for_spatialtransform'],
                    self.data_aug_params,
                    deep_supervision_scales=self.deep_supervision_scales,
                    seeds_train=seeds_train,
                    seeds_val=seeds_val)
                self.print_to_log_file("TRAINING KEYS:\n %s" %
                                       (str(self.dataset_tr.keys())),
                                       also_print_to_console=False)
                self.print_to_log_file("VALIDATION KEYS:\n %s" %
                                       (str(self.dataset_val.keys())),
                                       also_print_to_console=False)
            else:
                pass

            self.initialize_network()
            self.initialize_optimizer_and_scheduler()
            self._maybe_init_amp()
            self.network = DDP(self.network)

        else:
            self.print_to_log_file(
                'self.was_initialized is True, not running self.initialize again'
            )
        self.was_initialized = True
Example #17
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-i",
        '--input_folder',
        help="Must contain all modalities for each patient in the correct"
        " order (same as training). Files must be named "
        "CASENAME_XXXX.nii.gz where XXXX is the modality "
        "identifier (0000, 0001, etc)",
        required=True)
    parser.add_argument('-o',
                        "--output_folder",
                        required=True,
                        help="folder for saving predictions")
    parser.add_argument('-t',
                        '--task_name',
                        help='task name or task ID, required.',
                        default=default_plans_identifier,
                        required=True)

    parser.add_argument(
        '-tr',
        '--trainer_class_name',
        help=
        'Name of the nnUNetTrainer used for 2D U-Net, full resolution 3D U-Net and low resolution '
        'U-Net. The default is %s. If you are running inference with the cascade and the folder '
        'pointed to by --lowres_segmentations does not contain the segmentation maps generated by '
        'the low resolution U-Net then the low resolution segmentation maps will be automatically '
        'generated. For this case, make sure to set the trainer class here that matches your '
        '--cascade_trainer_class_name (this part can be ignored if defaults are used).'
        % default_trainer,
        required=False,
        default=default_trainer)
    parser.add_argument(
        '-ctr',
        '--cascade_trainer_class_name',
        help=
        "Trainer class name used for predicting the 3D full resolution U-Net part of the cascade."
        "Default is %s" % default_cascade_trainer,
        required=False,
        default=default_cascade_trainer)

    parser.add_argument(
        '-m',
        '--model',
        help=
        "2d, 3d_lowres, 3d_fullres or 3d_cascade_fullres. Default: 3d_fullres",
        default="3d_fullres",
        required=False)

    parser.add_argument(
        '-p',
        '--plans_identifier',
        help='do not touch this unless you know what you are doing',
        default=default_plans_identifier,
        required=False)

    parser.add_argument(
        '-f',
        '--folds',
        nargs='+',
        default='None',
        help=
        "folds to use for prediction. Default is None which means that folds will be detected "
        "automatically in the model output folder")

    parser.add_argument(
        '-z',
        '--save_npz',
        required=False,
        action='store_true',
        help=
        "use this if you want to ensemble these predictions with those of other models. Softmax "
        "probabilities will be saved as compressed numpy arrays in output_folder and can be "
        "merged between output_folders with nnUNet_ensemble_predictions")

    parser.add_argument(
        '-l',
        '--lowres_segmentations',
        required=False,
        default='None',
        help=
        "if model is the highres stage of the cascade then you can use this folder to provide "
        "predictions from the low resolution 3D U-Net. If this is left at default, the "
        "predictions will be generated automatically (provided that the 3D low resolution U-Net "
        "network weights are present")

    parser.add_argument("--part_id",
                        type=int,
                        required=False,
                        default=0,
                        help="Used to parallelize the prediction of "
                        "the folder over several GPUs. If you "
                        "want to use n GPUs to predict this "
                        "folder you need to run this command "
                        "n times with --part_id=0, ... n-1 and "
                        "--num_parts=n (each with a different "
                        "GPU (for example via "
                        "CUDA_VISIBLE_DEVICES=X)")

    parser.add_argument("--num_parts",
                        type=int,
                        required=False,
                        default=1,
                        help="Used to parallelize the prediction of "
                        "the folder over several GPUs. If you "
                        "want to use n GPUs to predict this "
                        "folder you need to run this command "
                        "n times with --part_id=0, ... n-1 and "
                        "--num_parts=n (each with a different "
                        "GPU (via "
                        "CUDA_VISIBLE_DEVICES=X)")

    parser.add_argument(
        "--num_threads_preprocessing",
        required=False,
        default=6,
        type=int,
        help=
        "Determines many background processes will be used for data preprocessing. Reduce this if you "
        "run into out of memory (RAM) problems. Default: 6")

    parser.add_argument(
        "--num_threads_nifti_save",
        required=False,
        default=2,
        type=int,
        help=
        "Determines many background processes will be used for segmentation export. Reduce this if you "
        "run into out of memory (RAM) problems. Default: 2")

    parser.add_argument(
        "--disable_tta",
        required=False,
        default=False,
        action="store_true",
        help=
        "set this flag to disable test time data augmentation via mirroring. Speeds up inference "
        "by roughly factor 4 (2D) or 8 (3D)")

    parser.add_argument(
        "--overwrite_existing",
        required=False,
        default=False,
        action="store_true",
        help=
        "Set this flag if the target folder contains predictions that you would like to overwrite"
    )

    parser.add_argument("--mode",
                        type=str,
                        default="normal",
                        required=False,
                        help="Hands off!")
    parser.add_argument("--all_in_gpu",
                        type=str,
                        default="None",
                        required=False,
                        help="can be None, False or True. "
                        "Do not touch.")
    parser.add_argument("--step_size",
                        type=float,
                        default=0.5,
                        required=False,
                        help="don't touch")
    # parser.add_argument("--interp_order", required=False, default=3, type=int,
    #                     help="order of interpolation for segmentations, has no effect if mode=fastest. Do not touch this.")
    # parser.add_argument("--interp_order_z", required=False, default=0, type=int,
    #                     help="order of interpolation along z is z is done differently. Do not touch this.")
    # parser.add_argument("--force_separate_z", required=False, default="None", type=str,
    #                     help="force_separate_z resampling. Can be None, True or False, has no effect if mode=fastest. "
    #                          "Do not touch this.")
    parser.add_argument(
        '-chk',
        help='checkpoint name, default: model_final_checkpoint',
        required=False,
        default='model_final_checkpoint')
    parser.add_argument(
        '--disable_mixed_precision',
        default=False,
        action='store_true',
        required=False,
        help=
        'Predictions are done with mixed precision by default. This improves speed and reduces '
        'the required vram. If you want to disable mixed precision you can set this flag. Note '
        'that yhis is not recommended (mixed precision is ~2x faster!)')
    ### ----------- added by Camila
    parser.add_argument(
        '--disable_sliding_window',
        default=False,
        action='store_true',
        required=False,
        help='Disable sliding window to predict the whole image')
    ### ----------- end added by Camila

    args = parser.parse_args()
    input_folder = args.input_folder
    output_folder = args.output_folder
    part_id = args.part_id
    num_parts = args.num_parts
    folds = args.folds
    save_npz = args.save_npz
    lowres_segmentations = args.lowres_segmentations
    num_threads_preprocessing = args.num_threads_preprocessing
    num_threads_nifti_save = args.num_threads_nifti_save
    disable_tta = args.disable_tta
    step_size = args.step_size
    # interp_order = args.interp_order
    # interp_order_z = args.interp_order_z
    # force_separate_z = args.force_separate_z
    overwrite_existing = args.overwrite_existing
    mode = args.mode
    all_in_gpu = args.all_in_gpu
    model = args.model
    trainer_class_name = args.trainer_class_name
    cascade_trainer_class_name = args.cascade_trainer_class_name
    ### ----------- added by Camila
    disable_sliding_window = args.disable_sliding_window
    ### ----------- end added by Camila

    task_name = args.task_name

    if not task_name.startswith("Task"):
        task_id = int(task_name)
        task_name = convert_id_to_task_name(task_id)

    assert model in ["2d", "3d_lowres", "3d_fullres", "3d_cascade_fullres"], "-m must be 2d, 3d_lowres, 3d_fullres or " \
                                                                             "3d_cascade_fullres"

    # if force_separate_z == "None":
    #     force_separate_z = None
    # elif force_separate_z == "False":
    #     force_separate_z = False
    # elif force_separate_z == "True":
    #     force_separate_z = True
    # else:
    #     raise ValueError("force_separate_z must be None, True or False. Given: %s" % force_separate_z)

    if lowres_segmentations == "None":
        lowres_segmentations = None

    if isinstance(folds, list):
        if folds[0] == 'all' and len(folds) == 1:
            pass
        else:
            folds = [int(i) for i in folds]
    elif folds == "None":
        folds = None
    else:
        raise ValueError("Unexpected value for argument folds")

    assert all_in_gpu in ['None', 'False', 'True']
    if all_in_gpu == "None":
        all_in_gpu = None
    elif all_in_gpu == "True":
        all_in_gpu = True
    elif all_in_gpu == "False":
        all_in_gpu = False

    # we need to catch the case where model is 3d cascade fullres and the low resolution folder has not been set.
    # In that case we need to try and predict with 3d low res first
    if model == "3d_cascade_fullres" and lowres_segmentations is None:
        print(
            "lowres_segmentations is None. Attempting to predict 3d_lowres first..."
        )
        assert part_id == 0 and num_parts == 1, "if you don't specify a --lowres_segmentations folder for the " \
                                                "inference of the cascade, custom values for part_id and num_parts " \
                                                "are not supported. If you wish to have multiple parts, please " \
                                                "run the 3d_lowres inference first (separately)"
        model_folder_name = join(
            network_training_output_dir, "3d_lowres", task_name,
            trainer_class_name + "__" + args.plans_identifier)
        assert isdir(
            model_folder_name
        ), "model output folder not found. Expected: %s" % model_folder_name
        lowres_output_folder = join(output_folder, "3d_lowres_predictions")
        predict_from_folder(model_folder_name,
                            input_folder,
                            lowres_output_folder,
                            folds,
                            False,
                            num_threads_preprocessing,
                            num_threads_nifti_save,
                            None,
                            part_id,
                            num_parts,
                            not disable_tta,
                            overwrite_existing=overwrite_existing,
                            mode=mode,
                            overwrite_all_in_gpu=all_in_gpu,
                            mixed_precision=not args.disable_mixed_precision,
                            step_size=step_size,
                            disable_sliding_window=disable_sliding_window)
        lowres_segmentations = lowres_output_folder
        torch.cuda.empty_cache()
        print("3d_lowres done")

    if model == "3d_cascade_fullres":
        trainer = cascade_trainer_class_name
    else:
        trainer = trainer_class_name

    model_folder_name = join(network_training_output_dir, model, task_name,
                             trainer + "__" + args.plans_identifier)
    print("using model stored in ", model_folder_name)
    assert isdir(
        model_folder_name
    ), "model output folder not found. Expected: %s" % model_folder_name

    predict_from_folder(model_folder_name,
                        input_folder,
                        output_folder,
                        folds,
                        save_npz,
                        num_threads_preprocessing,
                        num_threads_nifti_save,
                        lowres_segmentations,
                        part_id,
                        num_parts,
                        not disable_tta,
                        overwrite_existing=overwrite_existing,
                        mode=mode,
                        overwrite_all_in_gpu=all_in_gpu,
                        mixed_precision=not args.disable_mixed_precision,
                        step_size=step_size,
                        checkpoint_name=args.chk,
                        disable_sliding_window=disable_sliding_window)
Example #18
0
    def validate(self,
                 do_mirroring=True,
                 use_train_mode=False,
                 tiled=True,
                 step=2,
                 save_softmax=True,
                 use_gaussian=True,
                 compute_global_dice=True,
                 override=True,
                 validation_folder_name='validation'):
        """
        2018_12_05: I added global accumulation of TP, FP and FN for the validation in here. This is because I believe
        that selecting models is easier when computing the Dice globally instead of independently for each case and
        then averaging over cases. The Lung dataset in particular is very unstable because of the small size of the
        Lung Lesions. My theory is that even though the global Dice is different than the acutal target metric it is
        still a good enough substitute that allows us to get a lot more stable results when rerunning the same
        experiment twice. FYI: computer vision community uses the global jaccard for the evaluation of Cityscapes etc,
        not the per-image jaccard averaged over images.
        The reason I am accumulating TP/FP/FN here and not from the nifti files (which are used by our Evaluator) is
        that all predictions made here will have identical voxel spacing whereas voxel spacings in the nifti files
        will be different (which we could compensate for by using the volume per voxel but that would require the
        evaluator to understand spacings which is does not at this point)

        :param do_mirroring:
        :param use_train_mode:
        :param mirror_axes:
        :param tiled:
        :param tile_in_z:
        :param step:
        :param use_nifti:
        :param save_softmax:
        :param use_gaussian:
        :param use_temporal_models:
        :return:
        """
        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)

        if do_mirroring:
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        export_pool = Pool(4)
        results = []
        global_tp = OrderedDict()
        global_fp = OrderedDict()
        global_fn = OrderedDict()

        for k in self.dataset_val.keys():
            print(k)
            properties = self.dataset[k]['properties']
            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]
            if override or (not isfile(join(output_folder,
                                            fname + ".nii.gz"))):
                data = np.load(self.dataset[k]['data_file'])['data']

                print(k, data.shape)
                data[-1][data[-1] == -1] = 0

                softmax_pred = self.predict_preprocessed_data_return_softmax(
                    data[:-1],
                    do_mirroring,
                    1,
                    use_train_mode,
                    1,
                    mirror_axes,
                    tiled,
                    True,
                    step,
                    self.patch_size,
                    use_gaussian=use_gaussian)

                if compute_global_dice:
                    predicted_segmentation = softmax_pred.argmax(0)
                    gt_segmentation = data[-1]
                    labels = properties['classes']
                    labels = [int(i) for i in labels if i > 0]
                    for l in labels:
                        if l not in global_fn.keys():
                            global_fn[l] = 0
                        if l not in global_fp.keys():
                            global_fp[l] = 0
                        if l not in global_tp.keys():
                            global_tp[l] = 0
                        conf = ConfusionMatrix(
                            (predicted_segmentation == l).astype(int),
                            (gt_segmentation == l).astype(int))
                        conf.compute()
                        global_fn[l] += conf.fn
                        global_fp[l] += conf.fp
                        global_tp[l] += conf.tp

                softmax_pred = softmax_pred.transpose(
                    [0] + [i + 1 for i in self.transpose_backward])

                if save_softmax:
                    softmax_fname = join(output_folder, fname + ".npz")
                else:
                    softmax_fname = None
                """There is a problem with python process communication that prevents us from communicating obejcts 
                larger than 2 GB between processes (basically when the length of the pickle string that will be sent is 
                communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long 
                enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually 
                patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will 
                then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either 
                filename or np.ndarray and will handle this automatically"""
                if np.prod(softmax_pred.shape) > (2e9 / 4 *
                                                  0.9):  # *0.9 just to be save
                    np.save(join(output_folder, fname + ".npy"), softmax_pred)
                    softmax_pred = join(output_folder, fname + ".npy")
                results.append(
                    export_pool.starmap_async(
                        save_segmentation_nifti_from_softmax,
                        ((softmax_pred, join(output_folder,
                                             fname + ".nii.gz"), properties, 3,
                          None, None, None, softmax_fname, None), )))
                # save_segmentation_nifti_from_softmax(softmax_pred, join(output_folder, fname + ".nii.gz"),
                #                                               properties, 3, None, None,
                #                                               None,
                #                                               softmax_fname,
                #                                               None)

            pred_gt_tuples.append([
                join(output_folder, fname + ".nii.gz"),
                join(self.gt_niftis_folder, fname + ".nii.gz")
            ])

        _ = [i.get() for i in results]
        print("finished prediction, now evaluating...")

        task = self.dataset_directory.split("/")[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(
            pred_gt_tuples,
            labels=list(range(self.num_classes)),
            json_output_file=join(output_folder, "summary.json"),
            json_name=job_name + " val tiled %s" % (str(tiled)),
            json_author="Fabian",
            json_task=task,
            num_threads=3)
        if compute_global_dice:
            global_dice = OrderedDict()
            all_labels = list(global_fn.keys())
            for l in all_labels:
                global_dice[int(l)] = float(
                    2 * global_tp[l] /
                    (2 * global_tp[l] + global_fn[l] + global_fp[l]))
            write_json(global_dice, join(output_folder, "global_dice.json"))
Example #19
0
# You need to set the following folders: base, preprocessing_output_dir and network_training_output_dir. See below for details.

# do not modify these unless you know what you are doing
my_output_identifier = "nnUNet"
default_plans_identifier = "nnUNetPlans"
default_data_identifier = 'nnUNet'

try:
    # base is the folder where the raw data is stored. You just need to set base only, the others will be created
    # automatically (they are subfolders of base).
    # Here I use environment variables to set the base folder. Environment variables allow me to use the same code on
    # different systems (and our compute cluster)
    base = '/vol/medic02/users/zl9518/KiTS19/kits19/nnunet_output'
    # raw_dataset_dir = join(base, "nnUNet_raw")
    raw_dataset_dir = base
    splitted_4d_output_dir = join(base, "nnUNet_raw_splitted")
    cropped_output_dir = join(base, "nnUNet_raw_cropped")
    maybe_mkdir_p(splitted_4d_output_dir)
    maybe_mkdir_p(raw_dataset_dir)
    maybe_mkdir_p(raw_dataset_dir)
except KeyError:
    cropped_output_dir = splitted_4d_output_dir = raw_dataset_dir = base = None

# preprocessing_output_dir is where the preprocessed data is stored. If you run a training I very strongly recommend
# this is a SSD!
try:
    # preprocessing_output_dir = os.environ ['nnUNet_preprocessed']
    preprocessing_output_dir = join(base, "nnUNet_preprocessed")
except KeyError:
    preprocessing_output_dir = None
 def __init__(self, folder_with_cropped_data, preprocessed_output_folder):
     super(ExperimentPlanner2D, self).__init__(folder_with_cropped_data,
                                               preprocessed_output_folder)
     self.data_identifier = default_data_identifier + "_2D"
     self.plans_fname = join(self.preprocessed_output_folder,
                             default_plans_identifier + "_plans_2D.pkl")
Example #21
0
def analyze_dataset(task_string, override=False, collect_intensityproperties=True, num_processes=default_num_threads):
    cropped_out_dir = join(nnUNet_cropped_data, task_string)
    dataset_analyzer = DatasetAnalyzer(cropped_out_dir, overwrite=override, num_processes=num_processes)
    _ = dataset_analyzer.analyze_dataset(collect_intensityproperties)
Example #22
0
Description: change to local
'''

import yaml
from batchgenerators.utilities.file_and_folder_operations import maybe_mkdir_p, join

config = yaml.load(open('./configs/default.yaml', 'r'), Loader=yaml.FullLoader)
default_plans_identifier = config['default_plans_identifier']
default_data_identifier = config['default_data_identifier']
default_trainer = config['default_trainer']
default_cascade_trainer = config['default_cascade_trainer']

DATASET_DIR = config['DATASET_DIR']
my_output_identifier = config['output_identifier']
pretrain_identifier = config['pretrain_identifier']
base = join(DATASET_DIR, "nnUNet_raw") if DATASET_DIR else None
preprocessing_output_dir = join(DATASET_DIR,
                                "nnUNet_preprocessed") if DATASET_DIR else None
network_training_output_dir_base = join(
    DATASET_DIR, "nnUNet_trained_models") if DATASET_DIR else None

if base is not None:
    maybe_mkdir_p(base)
    nnUNet_raw_data = join(base, "nnUNet_raw_data")
    nnUNet_cropped_data = join(base, "nnUNet_cropped_data")
    maybe_mkdir_p(nnUNet_raw_data)
    maybe_mkdir_p(nnUNet_cropped_data)
else:
    print(
        "the path of nnUNet_raw_data_base is not defined, please check configs.yaml."
    )
Example #23
0
    args = parser.parse_args()
    input_folder = args.input_folder
    output_folder = args.output_folder
    part_id = args.part_id
    num_parts = args.num_parts
    folds = args.folds
    save_npz = args.save_npz
    lowres_segmentations = args.lowres_segmentations
    num_threads_preprocessing = args.num_threads_preprocessing
    num_threads_nifti_save = args.num_threads_nifti_save
    tta = args.tta
    overwrite = args.overwrite_existing

    output_folder_name = join(
        network_training_output_dir, args.model, args.task_name,
        args.nnunet_trainer + "__" + args.plans_identifier)
    print("using model stored in ", output_folder_name)
    assert isdir(output_folder_name
                 ), "model output folder not found: %s" % output_folder_name

    if lowres_segmentations == "None":
        lowres_segmentations = None

    if isinstance(folds, list):
        if folds[0] == 'all' and len(folds) == 1:
            pass
        else:
            folds = [int(i) for i in folds]
    elif folds == "None":
        folds = None
Example #24
0
    def initialize(self, training=True, force_load_plans=False):
        if not self.was_initialized:
            maybe_mkdir_p(self.output_folder)

            if force_load_plans or (self.plans is None):
                self.load_plans_file()

            self.process_plans(self.plans)

            self.setup_DA_params()

            ################# Here we wrap the loss for deep supervision ############
            # we need to know the number of outputs of the network
            net_numpool = len(self.net_num_pool_op_kernel_sizes)

            # we give each output a weight which decreases exponentially (division by 2) as the resolution decreases
            # this gives higher resolution outputs more weight in the loss
            weights = np.array([1 / (2 ** i) for i in range(net_numpool)])

            # we don't use the lowest 2 outputs. Normalize weights so that they sum to 1
            mask = np.array([True if i < net_numpool - 1 else False for i in range(net_numpool)])
            weights[~mask] = 0
            weights = weights / weights.sum()

            # now wrap the loss
            # self.loss = MultipleOutputLoss2(self.loss, weights)
            ################# END ###################

            self.folder_with_preprocessed_data = join(self.dataset_directory, self.plans['data_identifier'] +
                                                      "_stage%d" % self.stage)
            if training:
                self.dl_tr, self.dl_val = self.get_basic_generators()
                if self.unpack_data:
                    print("unpacking dataset")
                    unpack_dataset(self.folder_with_preprocessed_data)
                    print("done")
                else:
                    print(
                        "INFO: Not unpacking data! Training may be slow due to that. Pray you are not using 2d or you "
                        "will wait all winter for your model to finish!")

                self.tr_gen, self.val_gen = get_insaneDA_augmentation(self.dl_tr, self.dl_val,
                                                                      self.data_aug_params[
                                                                          'patch_size_for_spatialtransform'],
                                                                      self.data_aug_params,
                                                                      deep_supervision_scales=self.deep_supervision_scales,
                                                                      pin_memory=self.pin_memory)
                self.print_to_log_file("TRAINING KEYS:\n %s" % (str(self.dataset_tr.keys())),
                                       also_print_to_console=False)
                self.print_to_log_file("VALIDATION KEYS:\n %s" % (str(self.dataset_val.keys())),
                                       also_print_to_console=False)
            else:
                pass

            self.initialize_network()
            self.initialize_optimizer_and_scheduler()

            assert isinstance(self.network, (SegmentationNetwork, nn.DataParallel))
        else:
            self.print_to_log_file('self.was_initialized is True, not running self.initialize again')
        self.was_initialized = True
Example #25
0
# -*- coding: utf-8 -*-
"""
Created on Sat Jul 10 16:11:54 2021

@author: linhai
"""

import sys
import inspect
import os
from pathlib import Path
from batchgenerators.utilities.file_and_folder_operations import join, isdir, maybe_mkdir_p, subfiles, subdirs, isfile

#print (sys.path)
curDir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parDir = os.path.dirname(curDir)
sys.path.insert(0, parDir)
#sys.path.insert(0, )
p1 = 'C:\\Research\\IMA_on_segmentation\\nnUnet\\nnUNet\\rawData\\nnUNet_raw_data\\Task05_Prostate\\imagesTr'
p2 = 'C:/Research/IMA_on_segmentation/nnUnet/nnUNet/rawData/nnUNet_raw_data\\Task05_Prostate'
p3 = 'C:\\Research\\IMA_on_segmentation\\aaa'
p4 = 'C:/Research/IMA_on_segmentation/333/aaab'
print (os.path.join(p1, "aaa")+"\\")
print (isdir(join(p1, "aaa")+"\\"))
print(p1)
print (isdir(p2))
#os.mkdir(p4)
maybe_mkdir_p(p4)
#os.makedirs(p4, exist_ok=True)
default_trainer = "nnUNetTrainerV2"
default_cascade_trainer = "nnUNetTrainerV2CascadeFullRes"
"""
PLEASE READ paths.md FOR INFORMATION TO HOW TO SET THIS UP
"""

# base = os.environ['nnUNet_raw_data_base'] if "nnUNet_raw_data_base" in os.environ.keys() else None
# preprocessing_output_dir = os.environ['nnUNet_preprocessed'] if "nnUNet_preprocessed" in os.environ.keys() else None
# network_training_output_dir_base = os.path.join(os.environ['RESULTS_FOLDER']) if "RESULTS_FOLDER" in os.environ.keys() else None
base = "/home1/mksun/nnunet_dataset/nnUNet_raw/"  #raw data path
preprocessing_output_dir = "/home1/mksun/nnunet_dataset/nnUNet_preprocessed/"
network_training_output_dir_base = "/home1/mksun/experiment/ckpt/TMI/checkpoint"
image_validation_output_dir = "/home1/mksun/experiment/image/"

if base is not None:
    nnUNet_raw_data = join(base, "nnUNet_raw_splitted")
    nnUNet_cropped_data = join(base, "nnUNet_cropped_data")
    maybe_mkdir_p(nnUNet_raw_data)
    maybe_mkdir_p(nnUNet_cropped_data)
else:
    print(
        "nnUNet_raw_data_base is not defined and nnU-Net can only be used on data for which preprocessed files "
        "are already present on your system. nnU-Net cannot be used for experiment planning and preprocessing like "
        "this. If this is not intended, please read nnunet/paths.md for information on how to set this up properly."
    )
    nnUNet_cropped_data = nnUNet_raw_data = None

if preprocessing_output_dir is not None:
    maybe_mkdir_p(preprocessing_output_dir)
else:
    print(
Example #27
0
    def __init__(self,
                 plans_file,
                 fold,
                 output_folder=None,
                 dataset_directory=None,
                 batch_dice=True,
                 stage=None,
                 unpack_data=True,
                 deterministic=True,
                 fp16=False,
                 lam=2,
                 gpu="0",
                 save_dir=None):
        """
        :param deterministic:
        :param fold: can be either [0 ... 5) for cross-validation, 'all' to train on all available training data or
        None if you wish to load some checkpoint and do inference only
        :param plans_file: the pkl file generated by preprocessing. This file will determine all design choices
        :param subfolder_with_preprocessed_data: must be a subfolder of dataset_directory (just the name of the folder,
        not the entire path). This is where the preprocessed data lies that will be used for network training. We made
        this explicitly available so that differently preprocessed data can coexist and the user can choose what to use.
        Can be None if you are doing inference only.
        :param output_folder: where to store parameters, plot progress and to the validation
        :param dataset_directory: the parent directory in which the preprocessed Task data is stored. This is required
        because the split information is stored in this directory. For running prediction only this input is not
        required and may be set to None
        :param batch_dice: compute dice loss for each sample and average over all samples in the batch or pretend the
        batch is a pseudo volume?
        :param stage: The plans file may contain several stages (used for lowres / highres / pyramid). Stage must be
        specified for training:
        if stage 1 exists then stage 1 is the high resolution stage, otherwise it's 0
        :param unpack_data: if False, npz preprocessed data will not be unpacked to npy. This consumes less space but
        is considerably slower! Running unpack_data=False with 2d should never be done!

        IMPORTANT: If you inherit from nnUNetTrainer and the init args change then you need to redefine self.init_args
        in your init accordingly. Otherwise checkpoints won't load properly!
        """
        super(nnUNetTrainer, self).__init__(deterministic, fp16, lam, gpu,
                                            save_dir)
        self.unpack_data = unpack_data
        self.init_args = (plans_file, fold, output_folder, dataset_directory,
                          batch_dice, stage, unpack_data, deterministic, fp16)
        # set through arguments from init
        self.stage = stage
        self.experiment_name = self.__class__.__name__
        self.plans_file = plans_file
        self.output_folder = output_folder
        self.dataset_directory = dataset_directory
        self.output_folder_base = self.output_folder
        self.fold = fold

        self.plans = None

        # if we are running inference only then the self.dataset_directory is set (due to checkpoint loading) but it
        # irrelevant
        if self.dataset_directory is not None and isdir(
                self.dataset_directory):
            self.gt_niftis_folder = join(self.dataset_directory,
                                         "gt_segmentations")
        else:
            self.gt_niftis_folder = None

        self.folder_with_preprocessed_data = None

        # set in self.initialize()

        self.dl_tr = self.dl_val = None
        self.num_input_channels = self.num_classes = self.net_pool_per_axis = self.patch_size = self.batch_size = \
            self.threeD = self.base_num_features = self.intensity_properties = self.normalization_schemes = \
            self.net_num_pool_op_kernel_sizes = self.net_conv_kernel_sizes = None  # loaded automatically from plans_file
        self.basic_generator_patch_size = self.data_aug_params = self.transpose_forward = self.transpose_backward = None

        self.batch_dice = batch_dice
        self.loss = DC_and_CE_loss(
            {
                'batch_dice': self.batch_dice,
                'smooth': 1e-5,
                'do_bg': False,
                'square': False
            }, {})

        self.online_eval_foreground_dc = []
        self.online_eval_tp = []
        self.online_eval_fp = []
        self.online_eval_fn = []

        self.classes = self.do_dummy_2D_aug = self.use_mask_for_norm = self.only_keep_largest_connected_component = \
            self.min_region_size_per_class = self.min_size_per_class = None

        self.inference_pad_border_mode = "constant"
        self.inference_pad_kwargs = {'constant_values': 0}

        self.update_fold(fold)
        self.pad_all_sides = None

        self.lr_scheduler_eps = 1e-3
        self.lr_scheduler_patience = 30
        self.initial_lr = 3e-4
        self.weight_decay = 3e-5

        self.oversample_foreground_percent = 0.33
Example #28
0
my_output_identifier = "nnUNet"
default_plans_identifier = "nnUNetPlansv2.1"
default_data_identifier = 'nnUNet'
default_trainer = "nnUNetTrainerV2"
default_cascade_trainer = "nnUNetTrainerV2CascadeFullRes"

"""
PLEASE READ paths.md FOR INFORMATION TO HOW TO SET THIS UP
"""

base = os.environ['nnUNet_raw_data_base'] if "nnUNet_raw_data_base" in os.environ.keys() else None
preprocessing_output_dir = os.environ['nnUNet_preprocessed'] if "nnUNet_preprocessed" in os.environ.keys() else None
network_training_output_dir_base = os.path.join(os.environ['RESULTS_FOLDER']) if "RESULTS_FOLDER" in os.environ.keys() else None

if base is not None:
    nnUNet_raw_data = join(base, "nnUNet_raw_data")
    nnUNet_cropped_data = join(base, "nnUNet_cropped_data")
    maybe_mkdir_p(nnUNet_raw_data)
    maybe_mkdir_p(nnUNet_cropped_data)
else:
    print("nnUNet_raw_data_base is not defined and nnU-Net can only be used on data for which preprocessed files "
          "are already present on your system. nnU-Net cannot be used for experiment planning and preprocessing like "
          "this. If this is not intended, please read nnunet/paths.md for information on how to set this up properly.")
    nnUNet_cropped_data = nnUNet_raw_data = None

if preprocessing_output_dir is not None:
    maybe_mkdir_p(preprocessing_output_dir)
else:
    print("nnUNet_preprocessed is not defined and nnU-Net can not be used for preprocessing "
          "or training. If this is not intended, please read nnunet/pathy.md for information on how to set this up.")
    preprocessing_output_dir = None
Example #29
0
# You need to set the following folders: base, preprocessing_output_dir and network_training_output_dir. See below for details.


# do not modify these unless you know what you are doing
my_output_identifier = "nnUNet"
default_plans_identifier = "nnUNetPlans"
default_data_identifier = 'nnUNet'

try:
    # base is the folder where the raw data is stored. You just need to set base only, the others will be created
    # automatically (they are subfolders of base).
    # Here I use environment variables to set the base folder. Environment variables allow me to use the same code on
    # different systems (and our compute cluster). You can replace this line with something like:
    # base = "/path/to/my/folder"
    base = os.environ['nnUNet_base']
    raw_dataset_dir = join(base, "nnUNet_raw")
    splitted_4d_output_dir = join(base, "nnUNet_raw_splitted")
    cropped_output_dir = join(base, "nnUNet_raw_cropped")
    maybe_mkdir_p(splitted_4d_output_dir)
    maybe_mkdir_p(raw_dataset_dir)
    maybe_mkdir_p(cropped_output_dir)
except KeyError:
    cropped_output_dir = splitted_4d_output_dir = raw_dataset_dir = base = None

# preprocessing_output_dir is where the preprocessed data is stored. If you run a training I very strongly recommend
# this is a SSD!
try:
    # Here I use environment variables to set the folder. Environment variables allow me to use the same code on
    # different systems (and our compute cluster). You can replace this line with something like:
    # preprocessing_output_dir = "/path/to/my/folder_with_preprocessed_data"
    preprocessing_output_dir = os.environ['nnUNet_preprocessed']
Example #30
0
# do not modify these unless you know what you are doing
my_output_identifier = "nnUNet"
default_plans_identifier = "nnUNetPlansv2.1"
default_data_identifier = 'nnUNet'
default_trainer = "nnUNetTrainerV2"
default_cascade_trainer = "nnUNetTrainerV2CascadeFullRes"

"""
PLEASE READ paths.md FOR INFORMATION TO HOW TO SET THIS UP
"""

base = '.../COVID-19-CT-Seg/nnunet2_COVID19_FAB'
#preprocessing_output_dir = os.environ['nnUNet_preprocessed'] if "nnUNet_preprocessed" in os.environ.keys() else None
#network_training_output_dir_base = os.path.join(os.environ['RESULTS_FOLDER']) if "RESULTS_FOLDER" in os.environ.keys() else None
preprocessing_output_dir = join(base, 'nnUNet_preprocessed')
network_training_output_dir_base = join(base, 'RESULTS_FOLDER')
if base is not None:
    nnUNet_raw_data = join(base, "nnUNet_raw_data")
    nnUNet_cropped_data = join(base, "nnUNet_cropped_data")
    maybe_mkdir_p(nnUNet_raw_data)
    maybe_mkdir_p(nnUNet_cropped_data)
else:
    print("nnUNet_raw_data_base is not defined and nnU-Net can only be used on data for which preprocessed files "
          "are already present on your system. nnU-Net cannot be used for experiment planning and preprocessing like "
          "this. If this is not intended, please read nnunet/paths.md for information on how to set this up properly.")
    nnUNet_cropped_data = nnUNet_raw_data = None

if preprocessing_output_dir is not None:
    maybe_mkdir_p(preprocessing_output_dir)
else: