Beispiel #1
0
def run(ctx, sleep, processes, quiet, once, timeout, group, **kwargs):
    from birder.core.queue import read

    click.secho('Running %s processes.' % processes)
    p = Pool(processes=processes, initializer=init_worker)
    config = {'echo': not quiet, 'timestamp': tz_now(), 'timeout': timeout}

    while True:
        # message = read()
        # if message:
        #     pass
        client.set('timestamp', tz_now().strftime('%Y-%m-%d  %H:%M:%S'))
        params = [(t, config) for t in registry if t.enabled]
        for param in params:
            param[1]['timestamp'] = tz_now()
        try:
            p.starmap_async(monit, params).get(9999999)
            if not once:
                if group:
                    click.secho('-' * 80)
                time.sleep(sleep)
        except (KeyboardInterrupt, SystemExit):
            break
        if once:
            break
Beispiel #2
0
def main(FLAGS):
    save_dir = FLAGS.save_dir
    new_WH = (FLAGS.target_width, FLAGS.target_height)
    new_labels_name = FLAGS.target_csv
    data = pd.read_csv(FLAGS.src_csv)
    data["Frame"] = data["Frame"].map(lambda x: "images/" + x)

    create_clean_dir(save_dir)
    logger.info("Cleaned {} directory".format(save_dir))

    logger.info("Resizing begins")
    start = time.time()
    pool = Pool()
    pool.starmap_async(read_image_and_resize, [(image_path, new_WH, save_dir) for image_path in data["Frame"].unique()])

    pool.close()
    pool.join()
    end = time.time()

    logger.info("Time elapsed: {}".format(end - start))
    logger.info("Resizing ends")

    logger.info("Adjusting dataframe")
    image_path = data["Frame"][0]
    image = read_image(image_path)

    H, W, _ = image.shape
    src_size = (W, H)
    labels = adjust_bbox(data, src_size, new_WH)
    labels["Frame"] = labels["Frame"].map(lambda x: os.path.join(save_dir, os.path.basename(x)))

    create_clean_dir("mask")
    logger.info("Cleaned {} directory".format("mask"))
    logger.info("Masking begin")
    start = time.time()

    pool = Pool()
    tasks = [(new_WH, image_path, labels, "mask")for image_path in labels["Frame"].unique()]
    pool.starmap_async(generate_mask_pipeline, tasks)
    pool.close()
    pool.join()
    end = time.time()
    logger.info("Masking ends. Time elapsed: {}".format(end - start))

    labels["Mask"] = labels["Frame"].map(lambda x: "mask/" + os.path.basename(x))
    labels.to_csv(new_labels_name, index=False)

    logger.info("Adjustment saved to {}".format(new_labels_name))
Beispiel #3
0
def determine_brats_postprocessing(folder_with_preds, folder_with_gt, postprocessed_output_dir, processes=8,
        thresholds=(0, 10, 50, 100, 200, 500, 750, 1000, 1500, 2500, 10000), replace_with=2):
    # find pairs
    nifti_gt = subfiles(folder_with_gt, suffix=".nii.gz", sort=True)

    p = Pool(processes)

    nifti_pred = subfiles(folder_with_preds, suffix='.nii.gz', sort=True)

    results = p.starmap_async(load_niftis_threshold_compute_dice, zip(nifti_gt, nifti_pred, [thresholds] * len(nifti_pred)))
    results = results.get()

    all_dc_per_threshold = {}
    for t in thresholds:
        all_dc_per_threshold[t] = np.array([i[1][t] for i in results])
        print(t, np.mean(all_dc_per_threshold[t]))

    means = [np.mean(all_dc_per_threshold[t]) for t in thresholds]
    best_threshold = thresholds[np.argmax(means)]
    print('best', best_threshold, means[np.argmax(means)])

    maybe_mkdir_p(postprocessed_output_dir)

    p.starmap(apply_brats_threshold, zip(nifti_pred, [postprocessed_output_dir]*len(nifti_pred), [best_threshold]*len(nifti_pred), [replace_with] * len(nifti_pred)))

    p.close()
    p.join()

    save_pickle((thresholds, means, best_threshold, all_dc_per_threshold), join(postprocessed_output_dir, "threshold.pkl"))
Beispiel #4
0
def apply_postprocessing_to_folder(input_folder: str,
                                   output_folder: str,
                                   for_which_classes: list,
                                   min_valid_object_size: dict = None,
                                   num_processes=8):
    """
    applies removing of all but the largest connected component to all niftis in a folder
    :param min_valid_object_size:
    :param min_valid_object_size:
    :param input_folder:
    :param output_folder:
    :param for_which_classes:
    :param num_processes:
    :return:
    """
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    p = Pool(num_processes)
    nii_files = subfiles(input_folder, suffix=".nii.gz", join=False)
    input_files = [input_folder + "/" + i for i in nii_files]
    out_files = [output_folder + "/" + i for i in nii_files]
    results = p.starmap_async(
        load_remove_save,
        zip(input_files, out_files, [for_which_classes] * len(input_files),
            [min_valid_object_size] * len(input_files)))
    res = results.get()
    p.close()
    p.join()
def main():
    t0 = time.time()
    name_link = read_and_download()
    pool = Pool(processes=cpu_count())
    # 传入多参数数时改成元组,和使用starmap及starmap_async
    result = pool.starmap_async(download, name_link)  # 异步非阻塞,一次只传入一个值
    # print(result.get())  # 可以查看多进程返回的结果
    pool.close()  # 关闭进程池,不再接受新的进程
    pool.join()  # 调用join之前,先调用close函数,否则会出错。join函数等待所有子进程结束
    t1 = time.time() - t0
    print(t1)
Beispiel #6
0
def multiprocessing_plot_overlay(list_of_image_files,
                                 list_of_seg_files,
                                 list_of_output_files,
                                 overlay_intensity,
                                 num_processes=8):
    p = Pool(num_processes)
    r = p.starmap_async(
        plot_overlay,
        zip(list_of_image_files, list_of_seg_files, list_of_output_files,
            [overlay_intensity] * len(list_of_output_files)))
    r.get()
    p.close()
    p.join()
Beispiel #7
0
def multiprocessing_plot_overlay_preprocessed(list_of_case_files,
                                              list_of_output_files,
                                              overlay_intensity,
                                              num_processes=8,
                                              modality_index=0):
    p = Pool(num_processes)
    r = p.starmap_async(
        plot_overlay_preprocessed,
        zip(list_of_case_files, list_of_output_files,
            [overlay_intensity] * len(list_of_output_files),
            [modality_index] * len(list_of_output_files)))
    r.get()
    p.close()
    p.join()
def evaluate_verse_folder(folder_pred,
                          folder_gt,
                          out_json="/home/fabian/verse.json"):
    p = Pool(default_num_threads)
    files_gt_bare = subfiles(folder_gt, join=False)
    assert all([isfile(join(folder_pred, i)) for i in files_gt_bare
                ]), "some files are missing in the predicted folder"
    files_pred = [join(folder_pred, i) for i in files_gt_bare]
    files_gt = [join(folder_gt, i) for i in files_gt_bare]

    results = p.starmap_async(evaluate_verse_case, zip(files_gt, files_pred))

    results = results.get()

    dct = {i: j for i, j in zip(files_gt_bare, results)}

    results_stacked = np.vstack(results)
    results_mean = np.nanmean(results_stacked, 0)
    overall_mean = np.nanmean(results_mean)

    save_json((dct, list(results_mean), overall_mean), out_json)
Beispiel #9
0
def main(FLAGS):
    """Main Function

    Notes:
        1. Read image and resize to Target Width, Height
        2. Resize bounding box coordinates accordingly
        3. Create masks with the bounding box
             background is 0 and vehicle is 255

    """
    new_WH = (FLAGS.target_width, FLAGS.target_height)
    data = pd.read_csv(FLAGS.src_csv)
    # Only consider car and truck images
    data = data[data["Label"].isin(["Car", "Truck"])].reset_index(drop=True)

    # 123.jpg -> object-detection-crowdai/123.jpg
    data["Frame"] = data["Frame"].map(
        lambda x: os.path.join(FLAGS.data_dir, x))

    # IF dir exists, clean it
    create_clean_dir(FLAGS.save_dir)
    LOGGER.info("Cleaned {} directory".format(FLAGS.save_dir))

    LOGGER.info("Resizing begins")
    start = time.time()
    pool = Pool()
    pool.starmap_async(read_image_and_resize,
                       [(image_path, new_WH, FLAGS.save_dir)
                        for image_path in data["Frame"].unique()])

    pool.close()
    pool.join()
    end = time.time()

    LOGGER.info("Time elapsed: {}".format(end - start))
    LOGGER.info("Resizing ends")

    LOGGER.info("Adjusting dataframe")

    # Read any image file to get the WIDTH and HEIGHT
    image_path = data["Frame"][0]
    image = read_image(image_path)

    H, W, _ = image.shape
    src_size = (W, H)

    labels = adjust_bbox(data, src_size, new_WH)

    # object-.../123.jpg -> data_resize/123.jpg
    labels["Frame"] = labels["Frame"].map(
        lambda x: os.path.join(FLAGS.save_dir, os.path.basename(x)))

    create_clean_dir("mask")
    LOGGER.info("Cleaned {} directory".format("mask"))
    LOGGER.info("Masking begin")
    start = time.time()

    pool = Pool()
    tasks = [(new_WH, image_path, labels, "mask")
             for image_path in labels["Frame"].unique()]
    pool.starmap_async(generate_mask_pipeline, tasks)
    pool.close()
    pool.join()
    end = time.time()
    LOGGER.info("Masking ends. Time elapsed: {}".format(end - start))

    labels["Mask"] = labels["Frame"].map(
        lambda x: os.path.join("mask", os.path.basename(x)))
    labels.to_csv(FLAGS.target_csv, index=False)

    LOGGER.info("Adjustment saved to {}".format(FLAGS.target_csv))
    def validate(self,
                 do_mirroring: bool = True,
                 use_sliding_window: bool = True,
                 step_size: float = 0.5,
                 save_softmax: bool = True,
                 use_gaussian: bool = True,
                 overwrite: bool = True,
                 validation_folder_name: str = 'validation_raw',
                 debug: bool = False,
                 all_in_gpu: bool = False,
                 force_separate_z: bool = None,
                 interpolation_order: int = 3,
                 interpolation_order_z=0):
        """
        disable nnunet postprocessing. this would just waste computation time and does not benefit brats

        !!!We run this with use_sliding_window=False per default (see on_epoch_end). This triggers fully convolutional
        inference. THIS ONLY MAKES SENSE WHEN TRAINING ON FULL IMAGES! Make sure use_sliding_window=True when running
        with default patch size (128x128x128)!!!

        per default this does not use test time data augmentation (mirroring). The reference implementation, however,
        does. I disabled it here because this eats up a lot of computation time

        """
        validation_start = time()

        current_mode = self.network.training
        self.network.eval()

        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        # predictions as they come from the network go here
        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)

        # this is for debug purposes
        my_input_args = {
            'do_mirroring': do_mirroring,
            'use_sliding_window': use_sliding_window,
            'step_size': step_size,
            'save_softmax': save_softmax,
            'use_gaussian': use_gaussian,
            'overwrite': overwrite,
            'validation_folder_name': validation_folder_name,
            'debug': debug,
            'all_in_gpu': all_in_gpu,
            'force_separate_z': force_separate_z,
            'interpolation_order': interpolation_order,
            'interpolation_order_z': interpolation_order_z,
        }
        save_json(my_input_args, join(output_folder, "validation_args.json"))

        if do_mirroring:
            if not self.data_aug_params['do_mirror']:
                raise RuntimeError(
                    "We did not train with mirroring so you cannot do inference with mirroring enabled"
                )
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        export_pool = Pool(default_num_threads)
        results = []

        for k in self.dataset_val.keys():
            properties = load_pickle(self.dataset[k]['properties_file'])
            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]
            if overwrite or (not isfile(join(output_folder, fname + ".nii.gz"))) or \
                    (save_softmax and not isfile(join(output_folder, fname + ".npz"))):
                data = np.load(self.dataset[k]['data_file'])['data']

                #print(k, data.shape)

                softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax(
                    data[:-1],
                    do_mirroring=do_mirroring,
                    mirror_axes=mirror_axes,
                    use_sliding_window=use_sliding_window,
                    step_size=step_size,
                    use_gaussian=use_gaussian,
                    all_in_gpu=all_in_gpu,
                    verbose=False,
                    mixed_precision=self.fp16)[1]

                # this does not do anything in brats -> remove this line
                # softmax_pred = softmax_pred.transpose([0] + [i + 1 for i in self.transpose_backward])

                if save_softmax:
                    softmax_fname = join(output_folder, fname + ".npz")
                else:
                    softmax_fname = None

                results.append(
                    export_pool.starmap_async(
                        save_segmentation_nifti_from_softmax,
                        ((softmax_pred, join(output_folder, fname + ".nii.gz"),
                          properties, interpolation_order, None, None, None,
                          softmax_fname, None, force_separate_z,
                          interpolation_order_z, False), )))

        _ = [i.get() for i in results]
        self.print_to_log_file("finished prediction")

        # evaluate raw predictions
        self.print_to_log_file("evaluation of raw predictions")

        # this writes a csv file into output_folder
        evaluate_regions(output_folder, self.gt_niftis_folder,
                         self.evaluation_regions)
        csv_file = np.loadtxt(join(output_folder, 'summary.csv'),
                              skiprows=1,
                              dtype=str,
                              delimiter=',')[:, 1:]

        # these are the values that are compute with np.nanmean aggregation
        whole, core, enhancing = csv_file[-4, :].astype(float)

        # do some cleanup
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

        self.network.train(current_mode)
        validation_end = time()
        self.print_to_log_file('Running the validation took %f seconds' %
                               (validation_end - validation_start))
        self.print_to_log_file(
            '(the time needed for validation is included in the total epoch time!)'
        )

        return whole, core, enhancing
Beispiel #11
0
class ProcessPoolStrategy(ParallelStrategy, _PoolRunnableStrategy,
                          _Resultable):

    _Processors_Pool: Pool = None
    _Processors_List: List[Union[ApplyResult, AsyncResult]] = None

    def __init__(self, pool_size: int):
        super().__init__(pool_size=pool_size)

    def initialization(self,
                       queue_tasks: Optional[Union[_BaseQueueTask,
                                                   _BaseList]] = None,
                       features: Optional[Union[_BaseFeatureAdapterFactory,
                                                _BaseList]] = None,
                       *args,
                       **kwargs) -> None:
        super(ProcessPoolStrategy,
              self).initialization(queue_tasks=queue_tasks,
                                   features=features,
                                   *args,
                                   **kwargs)

        # Activate multiprocessing.managers.BaseManager server
        activate_manager_server()

        # Initialize and build the Processes Pool.
        __pool_initializer: Callable = kwargs.get("pool_initializer", None)
        __pool_initargs: IterableType = kwargs.get("pool_initargs", None)
        self._Processors_Pool = Pool(processes=self.pool_size,
                                     initializer=__pool_initializer,
                                     initargs=__pool_initargs)

    def apply(self,
              tasks_size: int,
              function: Callable,
              args: Tuple = (),
              kwargs: Dict = {}) -> None:
        self.reset_result()
        __process_running_result = None

        try:
            __process_running_result = [
                self._Processors_Pool.apply(func=function,
                                            args=args,
                                            kwds=kwargs)
                for _ in range(tasks_size)
            ]
            __exception = None
            __process_run_successful = True
        except Exception as e:
            __exception = e
            __process_run_successful = False

        # Save Running result state and Running result value as dict
        self._result_saving(successful=__process_run_successful,
                            result=__process_running_result,
                            exception=None)

    def async_apply(self,
                    tasks_size: int,
                    function: Callable,
                    args: Tuple = (),
                    kwargs: Dict = {},
                    callback: Callable = None,
                    error_callback: Callable = None) -> None:
        self.reset_result()
        self._Processors_List = [
            self._Processors_Pool.apply_async(func=function,
                                              args=args,
                                              kwds=kwargs,
                                              callback=callback,
                                              error_callback=error_callback)
            for _ in range(tasks_size)
        ]

        for process in self._Processors_List:
            _process_running_result = None
            _process_run_successful = None
            _exception = None

            try:
                _process_running_result = process.get()
                _process_run_successful = process.successful()
            except Exception as e:
                _exception = e
                _process_run_successful = False

            # Save Running result state and Running result value as dict
            self._result_saving(successful=_process_run_successful,
                                result=_process_running_result,
                                exception=_exception)

    def apply_with_iter(self,
                        functions_iter: List[Callable],
                        args_iter: List[Tuple] = None,
                        kwargs_iter: List[Dict] = None) -> None:
        self.reset_result()
        __process_running_result = None

        if args_iter is None:
            args_iter = [() for _ in functions_iter]

        if kwargs_iter is None:
            kwargs_iter = [{} for _ in functions_iter]

        try:
            __process_running_result = [
                self._Processors_Pool.apply(func=_func,
                                            args=_args,
                                            kwds=_kwargs) for _func, _args,
                _kwargs in zip(functions_iter, args_iter, kwargs_iter)
            ]
            __exception = None
            __process_run_successful = True
        except Exception as e:
            __exception = e
            __process_run_successful = False

        # Save Running result state and Running result value as dict
        self._result_saving(successful=__process_run_successful,
                            result=__process_running_result,
                            exception=None)

    def async_apply_with_iter(
            self,
            functions_iter: List[Callable],
            args_iter: List[Tuple] = None,
            kwargs_iter: List[Dict] = None,
            callback_iter: List[Callable] = None,
            error_callback_iter: List[Callable] = None) -> None:
        self.reset_result()

        if args_iter is None:
            args_iter = [() for _ in functions_iter]

        if kwargs_iter is None:
            kwargs_iter = [{} for _ in functions_iter]

        if callback_iter is None:
            callback_iter = [None for _ in functions_iter]

        if error_callback_iter is None:
            error_callback_iter = [None for _ in functions_iter]

        self._Processors_List = [
            self._Processors_Pool.apply_async(func=_func,
                                              args=_args,
                                              kwds=_kwargs,
                                              callback=_callback,
                                              error_callback=_error_callback)
            for _func, _args, _kwargs, _callback, _error_callback in zip(
                functions_iter, args_iter, kwargs_iter, callback_iter,
                error_callback_iter)
        ]

        for process in self._Processors_List:
            _process_running_result = None
            _process_run_successful = None
            _exception = None

            try:
                _process_running_result = process.get()
                _process_run_successful = process.successful()
            except Exception as e:
                _exception = e
                _process_run_successful = False

            # Save Running result state and Running result value as dict
            self._result_saving(successful=_process_run_successful,
                                result=_process_running_result,
                                exception=_exception)

    def map(self,
            function: Callable,
            args_iter: IterableType = (),
            chunksize: int = None) -> None:
        self.reset_result()
        _process_running_result = None

        try:
            _process_running_result = self._Processors_Pool.map(
                func=function, iterable=args_iter, chunksize=chunksize)
            _exception = None
            _process_run_successful = True
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def async_map(self,
                  function: Callable,
                  args_iter: IterableType = (),
                  chunksize: int = None,
                  callback: Callable = None,
                  error_callback: Callable = None) -> None:
        self.reset_result()

        _process_running_result = None
        _exception = None

        _map_result = self._Processors_Pool.map_async(
            func=function,
            iterable=args_iter,
            chunksize=chunksize,
            callback=callback,
            error_callback=error_callback)

        try:
            _process_running_result = _map_result.get()
            _process_run_successful = _map_result.successful()
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def map_by_args(self,
                    function: Callable,
                    args_iter: IterableType[IterableType] = (),
                    chunksize: int = None) -> None:
        self.reset_result()
        _process_running_result = None

        try:
            _process_running_result = self._Processors_Pool.starmap(
                func=function, iterable=args_iter, chunksize=chunksize)
            _exception = None
            _process_run_successful = True
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def async_map_by_args(self,
                          function: Callable,
                          args_iter: IterableType[IterableType] = (),
                          chunksize: int = None,
                          callback: Callable = None,
                          error_callback: Callable = None) -> None:
        self.reset_result()
        _map_result = self._Processors_Pool.starmap_async(
            func=function,
            iterable=args_iter,
            chunksize=chunksize,
            callback=callback,
            error_callback=error_callback)
        _process_running_result = _map_result.get()
        _process_run_successful = _map_result.successful()

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=None)

    def imap(self,
             function: Callable,
             args_iter: IterableType = (),
             chunksize: int = 1) -> None:
        self.reset_result()
        _process_running_result = None

        try:
            imap_running_result = self._Processors_Pool.imap(
                func=function, iterable=args_iter, chunksize=chunksize)
            _process_running_result = [
                result for result in imap_running_result
            ]
            _exception = None
            _process_run_successful = True
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def imap_unordered(self,
                       function: Callable,
                       args_iter: IterableType = (),
                       chunksize: int = 1) -> None:
        self.reset_result()
        _process_running_result = None

        try:
            imap_running_result = self._Processors_Pool.imap_unordered(
                func=function, iterable=args_iter, chunksize=chunksize)
            _process_running_result = [
                result for result in imap_running_result
            ]
            _exception = None
            _process_run_successful = True
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def _result_saving(self, successful: bool, result: List,
                       exception: Exception) -> None:
        _process_result = {
            "successful": successful,
            "result": result,
            "exception": exception
        }
        self._Processors_Running_Result.append(_process_result)

    def close(self) -> None:
        self._Processors_Pool.close()
        self._Processors_Pool.join()

    def terminal(self) -> None:
        self._Processors_Pool.terminate()

    def get_result(self) -> List[_ProcessPoolResult]:
        return self.result()

    def _saving_process(self) -> List[_ProcessPoolResult]:
        _pool_results = []
        for __result in self._Processors_Running_Result:
            _pool_result = _ProcessPoolResult()
            _pool_result.is_successful = __result["successful"]
            _pool_result.data = __result["result"]
            _pool_results.append(_pool_result)
        return _pool_results
 def starmap_async(self, func, iterable, *args, **kwargs):
     return Pool.starmap_async(self, ExceptionLogger(func), iterable, *args,
                               **kwargs)
Beispiel #13
0
def determine_postprocessing(base,
                             gt_labels_folder,
                             raw_subfolder_name="validation_raw",
                             temp_folder="temp",
                             final_subf_name="validation_final",
                             processes=default_num_threads,
                             dice_threshold=0,
                             debug=False,
                             advanced_postprocessing=False,
                             pp_filename="postprocessing.json"):
    """
    :param base:
    :param gt_labels_folder: subfolder of base with niftis of ground truth labels
    :param raw_subfolder_name: subfolder of base with niftis of predicted (non-postprocessed) segmentations
    :param temp_folder: used to store temporary data, will be deleted after we are done here undless debug=True
    :param final_subf_name: final results will be stored here (subfolder of base)
    :param processes:
    :param dice_threshold: only apply postprocessing if results is better than old_result+dice_threshold (can be used as eps)
    :param debug: if True then the temporary files will not be deleted
    :return:
    """
    # lets see what classes are in the dataset
    classes = [
        int(i) for i in load_json(base + "/" + raw_subfolder_name + "/" +
                                  "summary.json")['results']['mean'].keys()
        if int(i) != 0
    ]

    folder_all_classes_as_fg = base + "/" + temp_folder + "_allClasses"
    folder_per_class = base + "/" + temp_folder + "_perClass"

    if isdir(folder_all_classes_as_fg):
        shutil.rmtree(folder_all_classes_as_fg)
    if isdir(folder_per_class):
        shutil.rmtree(folder_per_class)

    # multiprocessing rules
    p = Pool(processes)

    assert isfile( base+"/"+ raw_subfolder_name+"/"+ "summary.json" ), "join(base, raw_subfolder_name) does not " \
                                                                   "contain a summary.json"

    # these are all the files we will be dealing with
    fnames = subfiles(base + "/" + raw_subfolder_name,
                      suffix=".nii.gz",
                      join=False)

    # make output and temp dir
    if not os.path.isdir(folder_all_classes_as_fg):
        os.makedirs(folder_all_classes_as_fg)
    if not os.path.isdir(folder_per_class):
        os.makedirs(folder_per_class)
    if not os.path.isdir(base + "/" + final_subf_name):
        os.makedirs(base + "/" + final_subf_name)

    pp_results = {}
    pp_results['dc_per_class_raw'] = {}
    pp_results['dc_per_class_pp_all'] = {
    }  # dice scores after treating all foreground classes as one
    pp_results['dc_per_class_pp_per_class'] = {
    }  # dice scores after removing everything except larges cc
    # independently for each class after we already did dc_per_class_pp_all
    pp_results['for_which_classes'] = []
    pp_results['min_valid_object_sizes'] = {}

    validation_result_raw = load_json(base + "/" + raw_subfolder_name + "/" +
                                      "summary.json")['results']
    pp_results['num_samples'] = len(validation_result_raw['all'])
    validation_result_raw = validation_result_raw['mean']

    if advanced_postprocessing:
        # first treat all foreground classes as one and remove all but the largest foreground connected component
        results = []
        for f in fnames:
            predicted_segmentation = base + "/" + raw_subfolder_name + "/" + f
            # now remove all but the largest connected component for each class
            output_file = folder_all_classes_as_fg + "/" + f
            results.append(
                p.starmap_async(load_remove_save,
                                ((predicted_segmentation, output_file,
                                  (classes, )), )))

        results = [i.get() for i in results]

        # aggregate max_size_removed and min_size_kept
        max_size_removed = {}
        min_size_kept = {}
        for tmp in results:
            mx_rem, min_kept = tmp[0]
            for k in mx_rem:
                if mx_rem[k] is not None:
                    if max_size_removed.get(k) is None:
                        max_size_removed[k] = mx_rem[k]
                    else:
                        max_size_removed[k] = max(max_size_removed[k],
                                                  mx_rem[k])
            for k in min_kept:
                if min_kept[k] is not None:
                    if min_size_kept.get(k) is None:
                        min_size_kept[k] = min_kept[k]
                    else:
                        min_size_kept[k] = min(min_size_kept[k], min_kept[k])

        print("foreground vs background, smallest valid object size was",
              min_size_kept[tuple(classes)])
        print("removing only objects smaller than that...")

    else:
        min_size_kept = None

    # we need to rerun the step from above, now with the size constraint
    pred_gt_tuples = []
    results = []
    # first treat all foreground classes as one and remove all but the largest foreground connected component
    for f in fnames:
        predicted_segmentation = base + "/" + raw_subfolder_name + "/" + f
        # now remove all but the largest connected component for each class
        output_file = folder_all_classes_as_fg + "/" + f
        results.append(
            p.starmap_async(load_remove_save,
                            ((predicted_segmentation, output_file,
                              (classes, ), min_size_kept), )))
        pred_gt_tuples.append([output_file, gt_labels_folder + "/" + f])

    _ = [i.get() for i in results]

    # evaluate postprocessed predictions
    _ = aggregate_scores(pred_gt_tuples,
                         labels=classes,
                         json_output_file=folder_all_classes_as_fg + "/" +
                         "summary.json",
                         json_author="Fabian",
                         num_threads=processes)

    # now we need to figure out if doing this improved the dice scores. We will implement that defensively in so far
    # that if a single class got worse as a result we won't do this. We can change this in the future but right now I
    # prefer to do it this way
    validation_result_PP_test = load_json(folder_all_classes_as_fg + "/" +
                                          "summary.json")['results']['mean']

    for c in classes:
        dc_raw = validation_result_raw[str(c)]['Dice']
        dc_pp = validation_result_PP_test[str(c)]['Dice']
        pp_results['dc_per_class_raw'][str(c)] = dc_raw
        pp_results['dc_per_class_pp_all'][str(c)] = dc_pp

    # true if new is better
    do_fg_cc = False
    comp = [
        pp_results['dc_per_class_pp_all'][str(cl)] >
        (pp_results['dc_per_class_raw'][str(cl)] + dice_threshold)
        for cl in classes
    ]
    before = np.mean(
        [pp_results['dc_per_class_raw'][str(cl)] for cl in classes])
    after = np.mean(
        [pp_results['dc_per_class_pp_all'][str(cl)] for cl in classes])
    print("Foreground vs background")
    print("before:", before)
    print("after: ", after)
    if any(comp):
        # at least one class improved - yay!
        # now check if another got worse
        # true if new is worse
        any_worse = any([
            pp_results['dc_per_class_pp_all'][str(cl)] <
            pp_results['dc_per_class_raw'][str(cl)] for cl in classes
        ])
        if not any_worse:
            pp_results['for_which_classes'].append(classes)
            if min_size_kept is not None:
                pp_results['min_valid_object_sizes'].update(
                    deepcopy(min_size_kept))
            do_fg_cc = True
            print(
                "Removing all but the largest foreground region improved results!"
            )
            print('for_which_classes', classes)
            print('min_valid_object_sizes', min_size_kept)
    else:
        # did not improve things - don't do it
        pass

    if len(classes) > 1:
        # now depending on whether we do remove all but the largest foreground connected component we define the source dir
        # for the next one to be the raw or the temp dir
        if do_fg_cc:
            source = folder_all_classes_as_fg
        else:
            source = base + "/" + raw_subfolder_name

        if advanced_postprocessing:
            # now run this for each class separately
            results = []
            for f in fnames:
                predicted_segmentation = source + "/" + f
                output_file = folder_per_class + "/" + f
                results.append(
                    p.starmap_async(
                        load_remove_save,
                        ((predicted_segmentation, output_file, classes), )))

            results = [i.get() for i in results]

            # aggregate max_size_removed and min_size_kept
            max_size_removed = {}
            min_size_kept = {}
            for tmp in results:
                mx_rem, min_kept = tmp[0]
                for k in mx_rem:
                    if mx_rem[k] is not None:
                        if max_size_removed.get(k) is None:
                            max_size_removed[k] = mx_rem[k]
                        else:
                            max_size_removed[k] = max(max_size_removed[k],
                                                      mx_rem[k])
                for k in min_kept:
                    if min_kept[k] is not None:
                        if min_size_kept.get(k) is None:
                            min_size_kept[k] = min_kept[k]
                        else:
                            min_size_kept[k] = min(min_size_kept[k],
                                                   min_kept[k])

            print(
                "classes treated separately, smallest valid object sizes are")
            print(min_size_kept)
            print("removing only objects smaller than that")
        else:
            min_size_kept = None

        # rerun with the size thresholds from above
        pred_gt_tuples = []
        results = []
        for f in fnames:
            predicted_segmentation = source + "/" + f
            output_file = folder_per_class + "/" + f
            results.append(
                p.starmap_async(load_remove_save,
                                ((predicted_segmentation, output_file, classes,
                                  min_size_kept), )))
            pred_gt_tuples.append([output_file, gt_labels_folder + "/" + f])

        _ = [i.get() for i in results]

        # evaluate postprocessed predictions
        _ = aggregate_scores(pred_gt_tuples,
                             labels=classes,
                             json_output_file=folder_per_class + "/" +
                             "summary.json",
                             json_author="Fabian",
                             num_threads=processes)

        if do_fg_cc:
            old_res = deepcopy(validation_result_PP_test)
        else:
            old_res = validation_result_raw

        # these are the new dice scores
        validation_result_PP_test = load_json(
            folder_per_class + "/" + "summary.json")['results']['mean']

        for c in classes:
            dc_raw = old_res[str(c)]['Dice']
            dc_pp = validation_result_PP_test[str(c)]['Dice']
            pp_results['dc_per_class_pp_per_class'][str(c)] = dc_pp
            print(c)
            print("before:", dc_raw)
            print("after: ", dc_pp)

            if dc_pp > (dc_raw + dice_threshold):
                pp_results['for_which_classes'].append(int(c))
                if min_size_kept is not None:
                    pp_results['min_valid_object_sizes'].update(
                        {c: min_size_kept[c]})
                print(
                    "Removing all but the largest region for class %d improved results!"
                    % c)
                print('min_valid_object_sizes', min_size_kept)
    else:
        print(
            "Only one class present, no need to do each class separately as this is covered in fg vs bg"
        )

    if not advanced_postprocessing:
        pp_results['min_valid_object_sizes'] = None

    print("done")
    print("for which classes:")
    print(pp_results['for_which_classes'])
    print("min_object_sizes")
    print(pp_results['min_valid_object_sizes'])

    pp_results['validation_raw'] = raw_subfolder_name
    pp_results['validation_final'] = final_subf_name

    # now that we have a proper for_which_classes, apply that
    pred_gt_tuples = []
    results = []
    for f in fnames:
        predicted_segmentation = base + "/" + raw_subfolder_name + "/" + f

        # now remove all but the largest connected component for each class
        output_file = base + "/" + final_subf_name + "/" + f
        results.append(
            p.starmap_async(load_remove_save,
                            ((predicted_segmentation, output_file,
                              pp_results['for_which_classes'],
                              pp_results['min_valid_object_sizes']), )))

        pred_gt_tuples.append([output_file, gt_labels_folder + "/" + f])

    _ = [i.get() for i in results]
    # evaluate postprocessed predictions
    _ = aggregate_scores(pred_gt_tuples,
                         labels=classes,
                         json_output_file=base + "/" + final_subf_name + "/" +
                         "summary.json",
                         json_author="Fabian",
                         num_threads=processes)

    pp_results['min_valid_object_sizes'] = str(
        pp_results['min_valid_object_sizes'])

    save_json(pp_results, base + "/" + pp_filename)

    # delete temp
    if not debug:
        shutil.rmtree(folder_per_class)
        shutil.rmtree(folder_all_classes_as_fg)

    p.close()
    p.join()
    print("done")
Beispiel #14
0
    def validate(self, do_mirroring=True, use_train_mode=False, tiled=True, step=2, save_softmax=True,
                 use_gaussian=True, validation_folder_name='validation'):
        """

        :param do_mirroring:
        :param use_train_mode:
        :param mirror_axes:
        :param tiled:
        :param tile_in_z:
        :param step:
        :param use_nifti:
        :param save_softmax:
        :param use_gaussian:
        :param use_temporal_models:
        :return:
        """
        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)

        if do_mirroring:
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        process_manager = Pool(2)
        results = []

        for k in self.dataset_val.keys():
            properties = self.dataset[k]['properties']
            data = np.load(self.dataset[k]['data_file'])['data']

            # concat segmentation of previous step
            seg_from_prev_stage = np.load(join(self.folder_with_segs_from_prev_stage,
                                               k + "_segFromPrevStage.npz"))['data'][None]

            transpose_forward = self.plans.get('transpose_forward')
            if transpose_forward is not None:
                data = data.transpose([0] + [i+1 for i in transpose_forward])
                seg_from_prev_stage = seg_from_prev_stage.transpose([0] + [i+1 for i in transpose_forward])

            print(data.shape)
            data[-1][data[-1] == -1] = 0
            data_for_net = np.concatenate((data[:-1], to_one_hot(seg_from_prev_stage[0], range(1, self.num_classes))))
            softmax_pred = self.predict_preprocessing_return_softmax(data_for_net, do_mirroring, 1,
                                                                         use_train_mode, 1, mirror_axes, tiled,
                                                                         True, step, self.patch_size,
                                                                         use_gaussian=use_gaussian)

            if transpose_forward is not None:
                transpose_backward = self.plans.get('transpose_backward')
                softmax_pred = softmax_pred.transpose([0] + [i+1 for i in transpose_backward])

            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]

            if save_softmax:
                softmax_fname = join(output_folder, fname + ".npz")
            else:
                softmax_fname = None

            if np.prod(softmax_pred.shape) > (2e9 / 4 * 0.9): # *0.9 just to be save
                np.save(fname + ".npy", softmax_pred)
                softmax_pred = fname + ".npy"
            results.append(process_manager.starmap_async(store_seg_from_softmax,
                                                         ((softmax_pred, join(output_folder, fname + ".nii.gz"),
                                                           properties, 1, None, None, None, softmax_fname, None),
                                                          )
                                                         )
                           )

            pred_gt_tuples.append([join(output_folder, fname + ".nii.gz"),
                                   join(self.gt_niftis_folder, fname + ".nii.gz")])

        _ = [i.get() for i in results]

        task = self.dataset_directory.split("/")[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(pred_gt_tuples, labels=list(range(self.num_classes)),
                             json_output_file=join(output_folder, "summary.json"), json_name=job_name,
                             json_author="Fabian", json_description="",
                             json_task=task)
    def validate(self,
                 do_mirroring: bool = True,
                 use_sliding_window: bool = True,
                 step_size: float = 0.5,
                 save_softmax: bool = True,
                 use_gaussian: bool = True,
                 overwrite: bool = True,
                 validation_folder_name: str = 'validation_raw',
                 debug: bool = False,
                 all_in_gpu: bool = False,
                 segmentation_export_kwargs: dict = None):
        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"

        current_mode = self.network.training
        self.network.eval()
        # save whether network is in deep supervision mode or not
        ds = self.network.do_ds
        # disable deep supervision
        self.network.do_ds = False

        if segmentation_export_kwargs is None:
            if 'segmentation_export_params' in self.plans.keys():
                force_separate_z = self.plans['segmentation_export_params'][
                    'force_separate_z']
                interpolation_order = self.plans['segmentation_export_params'][
                    'interpolation_order']
                interpolation_order_z = self.plans[
                    'segmentation_export_params']['interpolation_order_z']
            else:
                force_separate_z = None
                interpolation_order = 1
                interpolation_order_z = 0
        else:
            force_separate_z = segmentation_export_kwargs['force_separate_z']
            interpolation_order = segmentation_export_kwargs[
                'interpolation_order']
            interpolation_order_z = segmentation_export_kwargs[
                'interpolation_order_z']

        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)
        # this is for debug purposes
        my_input_args = {
            'do_mirroring': do_mirroring,
            'use_sliding_window': use_sliding_window,
            'step': step_size,
            'save_softmax': save_softmax,
            'use_gaussian': use_gaussian,
            'overwrite': overwrite,
            'validation_folder_name': validation_folder_name,
            'debug': debug,
            'all_in_gpu': all_in_gpu,
            'segmentation_export_kwargs': segmentation_export_kwargs,
        }
        save_json(my_input_args, join(output_folder, "validation_args.json"))

        if do_mirroring:
            if not self.data_aug_params['do_mirror']:
                raise RuntimeError(
                    "We did not train with mirroring so you cannot do inference with mirroring enabled"
                )
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        export_pool = Pool(default_num_threads)
        results = []

        for k in self.dataset_val.keys():
            properties = load_pickle(self.dataset[k]['properties_file'])
            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]

            if overwrite or (not isfile(join(output_folder, fname + ".nii.gz"))) or \
                    (save_softmax and not isfile(join(output_folder, fname + ".npz"))):
                data = np.load(self.dataset[k]['data_file'])['data']

                # concat segmentation of previous step
                seg_from_prev_stage = np.load(
                    join(self.folder_with_segs_from_prev_stage,
                         k + "_segFromPrevStage.npz"))['data'][None]

                print(k, data.shape)
                data[-1][data[-1] == -1] = 0

                data_for_net = np.concatenate(
                    (data[:-1],
                     to_one_hot(seg_from_prev_stage[0],
                                range(1, self.num_classes))))

                softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax(
                    data_for_net,
                    do_mirroring=do_mirroring,
                    mirror_axes=mirror_axes,
                    use_sliding_window=use_sliding_window,
                    step_size=step_size,
                    use_gaussian=use_gaussian,
                    all_in_gpu=all_in_gpu,
                    mixed_precision=self.fp16)[1]

                softmax_pred = softmax_pred.transpose(
                    [0] + [i + 1 for i in self.transpose_backward])

                if save_softmax:
                    softmax_fname = join(output_folder, fname + ".npz")
                else:
                    softmax_fname = None

                if np.prod(softmax_pred.shape) > (
                        2e9 / 4 * 0.85):  # *0.85 just to be save
                    np.save(join(output_folder, fname + ".npy"), softmax_pred)
                    softmax_pred = join(output_folder, fname + ".npy")

                results.append(
                    export_pool.starmap_async(
                        save_segmentation_nifti_from_softmax,
                        ((softmax_pred, join(output_folder,
                                             fname + ".nii.gz"), properties,
                          interpolation_order, None, None, None, softmax_fname,
                          None, force_separate_z, interpolation_order_z), )))

            pred_gt_tuples.append([
                join(output_folder, fname + ".nii.gz"),
                join(self.gt_niftis_folder, fname + ".nii.gz")
            ])

        _ = [i.get() for i in results]
        self.print_to_log_file("finished prediction")

        # evaluate raw predictions
        self.print_to_log_file("evaluation of raw predictions")
        task = self.dataset_directory.split("/")[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(
            pred_gt_tuples,
            labels=list(range(self.num_classes)),
            json_output_file=join(output_folder, "summary.json"),
            json_name=job_name + " val tiled %s" % (str(use_sliding_window)),
            json_task=task,
            num_threads=default_num_threads)

        self.print_to_log_file("determining postprocessing")
        determine_postprocessing(self.output_folder,
                                 self.gt_niftis_folder,
                                 validation_folder_name,
                                 final_subf_name=validation_folder_name +
                                 "_postprocessed",
                                 debug=debug)

        gt_nifti_folder = join(self.output_folder_base, "gt_niftis")
        maybe_mkdir_p(gt_nifti_folder)
        for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"):
            success = False
            attempts = 0
            e = None
            while not success and attempts < 10:
                try:
                    shutil.copy(f, gt_nifti_folder)
                    success = True
                except OSError as e:
                    attempts += 1
                    sleep(1)
            if not success:
                print("Could not copy gt nifti file %s into folder %s" %
                      (f, gt_nifti_folder))
                if e is not None:
                    raise e

        # restore network deep supervision mode
        self.network.train(current_mode)
        self.network.do_ds = ds
Beispiel #16
0
    def display(users, top=15, save=False, refresh=False):
        DiskSpaceUsage.OWNERS = users
        DiskSpaceUsage.TOP_COUNT = top if top else 15
        DiskSpaceUsage.FORCE_REFRESH = refresh

        # Create/open the cache
        current_folder = os.path.dirname(os.path.realpath(__file__))
        cache_folder = os.path.join(current_folder, "cache")
        cache = FanoutCache(shards=6, directory=cache_folder)

        # All experiments
        all_experiments = list(
            itertools.chain(*(Experiment.get(query_criteria=QueryCriteria().
                                             where(["owner={}".format(owner)]))
                              for owner in DiskSpaceUsage.OWNERS)))

        all_experiments_len = len(all_experiments)

        # Create the pool of worker
        p = Pool(6)
        r = p.starmap_async(DiskSpaceUsage.get_experiment_info,
                            itertools.product(all_experiments, (cache, )))
        p.close()

        print("Analyzing disk space for:")
        print(" | {} experiments".format(all_experiments_len))
        print(" | Users: {}".format(", ".join(DiskSpaceUsage.OWNERS)))

        # Wait for completion and display progress
        sys.stdout.write(
            " | Experiment analyzed: 0/{}".format(all_experiments_len))
        sys.stdout.flush()

        # While we are analyzing, display the status
        while not r.ready():
            # Estimate how many remaining we have. This is just an estimations and needs to be bounded
            remaining = max(
                0, min(all_experiments_len, r._number_left * r._chunksize))
            sys.stdout.write("\r {} Experiment analyzed: {}/{}".format(
                next(animation), all_experiments_len - remaining,
                all_experiments_len))
            sys.stdout.flush()

            time.sleep(.5)

        sys.stdout.write("\r | Experiment analyzed: {}/{}".format(
            all_experiments_len, all_experiments_len))
        sys.stdout.flush()

        # Get all the results
        experiments_info = [
            cache.get(e.id) for e in all_experiments if cache.get(e.id)
        ]
        cache.close()

        # Display
        print("\n\n---------------------------")
        DiskSpaceUsage.top_count_experiments(experiments_info)
        print("\n---------------------------")
        DiskSpaceUsage.total_size_per_user(experiments_info)
        print("\n---------------------------")
        DiskSpaceUsage.top_count_experiments_per_user(experiments_info)

        # save to a csv file
        if save:
            DiskSpaceUsage.save_to_file(experiments_info)
Beispiel #17
0
    def validate(self, do_mirroring=True, use_train_mode=False, tiled=True, step=2, save_softmax=True,
                 use_gaussian=True, validation_folder_name='validation'):
        """

        :param do_mirroring:
        :param use_train_mode:
        :param mirror_axes:
        :param tiled:
        :param tile_in_z:
        :param step:
        :param use_nifti:
        :param save_softmax:
        :param use_gaussian:
        :param use_temporal_models:
        :return:
        """
        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)

        if do_mirroring:
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        process_manager = Pool(2)
        results = []

        transpose_backward = self.plans.get('transpose_backward')

        for k in self.dataset_val.keys():
            properties = self.dataset[k]['properties']
            data = np.load(self.dataset[k]['data_file'])['data']

            # concat segmentation of previous step
            seg_from_prev_stage = np.load(join(self.folder_with_segs_from_prev_stage,
                                               k + "_segFromPrevStage.npz"))['data'][None]

            print(data.shape)
            data[-1][data[-1] == -1] = 0
            data_for_net = np.concatenate((data[:-1], to_one_hot(seg_from_prev_stage[0], range(1, self.num_classes))))
            softmax_pred = self.predict_preprocessed_data_return_softmax(data_for_net, do_mirroring, 1,
                                                                         use_train_mode, 1, mirror_axes, tiled,
                                                                         True, step, self.patch_size,
                                                                         use_gaussian=use_gaussian)

            if transpose_backward is not None:
                transpose_backward = self.plans.get('transpose_backward')
                softmax_pred = softmax_pred.transpose([0] + [i + 1 for i in transpose_backward])

            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]

            if save_softmax:
                softmax_fname = join(output_folder, fname + ".npz")
            else:
                softmax_fname = None

            """There is a problem with python process communication that prevents us from communicating obejcts 
            larger than 2 GB between processes (basically when the length of the pickle string that will be sent is 
            communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long 
            enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually 
            patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will 
            then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either 
            filename or np.ndarray and will handle this automatically"""
            if np.prod(softmax_pred.shape) > (2e9 / 4 * 0.9): # *0.9 just to be save
                np.save(fname + ".npy", softmax_pred)
                softmax_pred = fname + ".npy"
            results.append(process_manager.starmap_async(save_segmentation_nifti_from_softmax,
                                                         ((softmax_pred, join(output_folder, fname + ".nii.gz"),
                                                           properties, 1, None, None, None, softmax_fname, None),
                                                          )
                                                         )
                           )

            pred_gt_tuples.append([join(output_folder, fname + ".nii.gz"),
                                   join(self.gt_niftis_folder, fname + ".nii.gz")])

        _ = [i.get() for i in results]

        task = self.dataset_directory.split("/")[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(pred_gt_tuples, labels=list(range(self.num_classes)),
                             json_output_file=join(output_folder, "summary.json"), json_name=job_name,
                             json_author="Fabian", json_description="",
                             json_task=task)
Beispiel #18
0
    def analyze(self):
        # Clear the cache
        self.cache.clear()

        # Start the timer
        start_time = time.time()

        # If no analyzers -> quit
        if not all((self.analyzers, self.simulations)):
            print("No analyzers or experiments selected, exiting...")
            return

        # If any of the analyzer needs the dir map, create it
        if any(a.need_dir_map for a in self.analyzers
               if hasattr(a, 'need_dir_map')):
            # preload the global dir map
            from simtools.Utilities.SimulationDirectoryMap import SimulationDirectoryMap
            for experiment in self.experiments:
                SimulationDirectoryMap.preload_experiment(experiment)

        # Run the per experiment on the analyzers
        for exp in self.experiments:
            for a in self.analyzers:
                a.per_experiment(exp)

        scount = len(self.simulations)
        max_threads = min(self.max_threads, scount if scount != 0 else 1)

        # Display some info
        if self.verbose:
            print("Analyze Manager")
            print(" | {} simulation{} - {} experiment{}".format(
                scount, pluralize(scount), len(self.experiments),
                pluralize(self.experiments)))
            print(" | force_analyze is {} and {} simulation{} ignored".format(
                on_off(self.force_analyze), len(self.ignored_simulations),
                pluralize(self.ignored_simulations)))
            print(" | Analyzer{}: ".format(pluralize(self.analyzers)))
            for a in self.analyzers:
                print(
                    " |  - {} (Directory map: {} / File parsing: {} / Use cache: {})"
                    .format(a.uid, on_off(a.need_dir_map), on_off(a.parse),
                            on_off(hasattr(a, "cache"))))
            print(" | Pool of {} analyzing processes".format(max_threads))

        pool = Pool(max_threads)
        if scount == 0 and self.verbose:
            print("No experiments/simulations for analysis.")
        else:
            results = pool.starmap_async(
                retrieve_data,
                itertools.product(self.simulations.values(),
                                  (self.analyzers, ), (self.cache, )))

            while not results.ready():
                self._check_exception()

                time_elapsed = time.time() - start_time
                if self.verbose:
                    sys.stdout.write(
                        "\r {} Analyzing {}/{}... {} elapsed".format(
                            next(animation), len(self.cache), scount,
                            verbose_timedelta(time_elapsed)))
                    sys.stdout.flush()

                if time_elapsed > ANALYZE_TIMEOUT:
                    raise Exception(
                        "Timeout while waiting the analysis to complete...")

                time.sleep(WAIT_TIME)
            results.get()

        # At this point we have all our results
        # Give to the analyzer
        finalize_results = {}
        for a in self.analyzers:
            analyzer_data = {}
            for key in self.cache:
                if key == EXCEPTION_KEY: continue
                # Retrieve the cache content and the simulation object
                sim_cache = self.cache.get(key)
                simulation_obj = self.simulations[key]
                # Give to the analyzer
                analyzer_data[simulation_obj] = sim_cache[
                    a.uid] if sim_cache and a.uid in sim_cache else None
            finalize_results[a.uid] = pool.apply_async(a.finalize,
                                                       (analyzer_data, ))

        pool.close()
        pool.join()

        for a in self.analyzers:
            a.results = finalize_results[a.uid].get()

        if self.verbose:
            total_time = time.time() - start_time
            print(
                "\r | Analysis done. Took {} (~ {:.3f} per simulation)".format(
                    verbose_timedelta(total_time),
                    total_time / scount if scount != 0 else 0))
Beispiel #19
0
    def validate(self,
                 do_mirroring=True,
                 use_train_mode=False,
                 tiled=True,
                 step=2,
                 save_softmax=True,
                 use_gaussian=True,
                 overwrite=True,
                 validation_folder_name="validation_raw",
                 debug=False):
        """

        :param do_mirroring:
        :param use_train_mode:
        :param mirror_axes:
        :param tiled:
        :param tile_in_z:
        :param step:
        :param use_nifti:
        :param save_softmax:
        :param use_gaussian:
        :param use_temporal_models:
        :return:
        """
        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)

        if do_mirroring:
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        process_manager = Pool(2)
        results = []

        transpose_backward = self.plans.get('transpose_backward')

        for k in self.dataset_val.keys():
            properties = self.dataset[k]['properties']
            data = np.load(self.dataset[k]['data_file'])['data']

            # concat segmentation of previous step
            seg_from_prev_stage = np.load(
                join(self.folder_with_segs_from_prev_stage,
                     k + "_segFromPrevStage.npz"))['data'][None]

            print(data.shape)
            data[-1][data[-1] == -1] = 0
            data_for_net = np.concatenate(
                (data[:-1],
                 to_one_hot(seg_from_prev_stage[0], range(1,
                                                          self.num_classes))))
            softmax_pred = self.predict_preprocessed_data_return_softmax(
                data_for_net,
                do_mirroring,
                1,
                use_train_mode,
                1,
                mirror_axes,
                tiled,
                True,
                step,
                self.patch_size,
                use_gaussian=use_gaussian)

            if transpose_backward is not None:
                transpose_backward = self.plans.get('transpose_backward')
                softmax_pred = softmax_pred.transpose(
                    [0] + [i + 1 for i in transpose_backward])

            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]

            if save_softmax:
                softmax_fname = join(output_folder, fname + ".npz")
            else:
                softmax_fname = None
            """There is a problem with python process communication that prevents us from communicating obejcts 
            larger than 2 GB between processes (basically when the length of the pickle string that will be sent is 
            communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long 
            enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually 
            patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will 
            then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either 
            filename or np.ndarray and will handle this automatically"""
            if np.prod(softmax_pred.shape) > (2e9 / 4 *
                                              0.85):  # *0.85 just to be save
                np.save(fname + ".npy", softmax_pred)
                softmax_pred = fname + ".npy"
            results.append(
                process_manager.starmap_async(
                    save_segmentation_nifti_from_softmax,
                    ((softmax_pred, join(
                        output_folder, fname + ".nii.gz"), properties, 3, None,
                      None, None, softmax_fname, None), )))

            pred_gt_tuples.append([
                join(output_folder, fname + ".nii.gz"),
                join(self.gt_niftis_folder, fname + ".nii.gz")
            ])

        _ = [i.get() for i in results]

        task = self.dataset_directory.split("/")[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(pred_gt_tuples,
                             labels=list(range(self.num_classes)),
                             json_output_file=join(output_folder,
                                                   "summary.json"),
                             json_name=job_name,
                             json_author="Fabian",
                             json_description="",
                             json_task=task)

        # in the old nnunet we would stop here. Now we add a postprocessing. This postprocessing can remove everything
        # except the largest connected component for each class. To see if this improves results, we do this for all
        # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will
        # have this applied during inference as well
        self.print_to_log_file("determining postprocessing")
        determine_postprocessing(self.output_folder,
                                 self.gt_niftis_folder,
                                 validation_folder_name,
                                 final_subf_name=validation_folder_name +
                                 "_postprocessed",
                                 debug=debug)
        # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed"
        # They are always in that folder, even if no postprocessing as applied!

        # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another
        # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be
        # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to
        # be used later
        gt_nifti_folder = join(self.output_folder_base, "gt_niftis")
        maybe_mkdir_p(gt_nifti_folder)
        for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"):
            success = False
            attempts = 0
            while not success and attempts < 10:
                try:
                    shutil.copy(f, gt_nifti_folder)
                    success = True
                except OSError:
                    attempts += 1
                    sleep(1)
    def validate(self,
                 do_mirroring: bool = True,
                 use_sliding_window: bool = True,
                 step_size: float = 0.5,
                 save_softmax: bool = True,
                 use_gaussian: bool = True,
                 overwrite: bool = True,
                 validation_folder_name: str = 'validation_raw',
                 debug: bool = False,
                 all_in_gpu: bool = False,
                 segmentation_export_kwargs: dict = None):
        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"

        current_mode = self.network.training
        self.network.eval()
        # save whether network is in deep supervision mode or not
        ds = self.network.do_ds
        # disable deep supervision
        self.network.do_ds = False

        if segmentation_export_kwargs is None:
            if 'segmentation_export_params' in self.plans.keys():
                force_separate_z = self.plans['segmentation_export_params'][
                    'force_separate_z']
                interpolation_order = self.plans['segmentation_export_params'][
                    'interpolation_order']
                interpolation_order_z = self.plans[
                    'segmentation_export_params']['interpolation_order_z']
            else:
                force_separate_z = None
                interpolation_order = 1
                interpolation_order_z = 0
        else:
            force_separate_z = segmentation_export_kwargs['force_separate_z']
            interpolation_order = segmentation_export_kwargs[
                'interpolation_order']
            interpolation_order_z = segmentation_export_kwargs[
                'interpolation_order_z']

        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)
        # this is for debug purposes
        my_input_args = {
            'do_mirroring': do_mirroring,
            'use_sliding_window': use_sliding_window,
            'step': step_size,
            'save_softmax': save_softmax,
            'use_gaussian': use_gaussian,
            'overwrite': overwrite,
            'validation_folder_name': validation_folder_name,
            'debug': debug,
            'all_in_gpu': all_in_gpu,
            'segmentation_export_kwargs': segmentation_export_kwargs,
        }
        save_json(my_input_args, join(output_folder, "validation_args.json"))

        if do_mirroring:
            if not self.data_aug_params['do_mirror']:
                raise RuntimeError(
                    "We did not train with mirroring so you cannot do inference with mirroring enabled"
                )
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        export_pool = Pool(default_num_threads)
        results = []

        for k in self.dataset_val.keys():
            properties = load_pickle(self.dataset[k]['properties_file'])
            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]

            if overwrite or (not isfile(join(output_folder, fname + ".nii.gz"))) or \
                    (save_softmax and not isfile(join(output_folder, fname + ".npz"))):
                data = np.load(self.dataset[k]['data_file'])['data']

                # concat segmentation of previous step
                seg_from_prev_stage = np.load(
                    join(self.folder_with_segs_from_prev_stage,
                         k + "_segFromPrevStage.npz"))['data'][None]

                print(k, data.shape)
                data[-1][data[-1] == -1] = 0

                data_for_net = np.concatenate(
                    (data[:-1],
                     to_one_hot(seg_from_prev_stage[0],
                                range(1, self.num_classes))))

                softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax(
                    data_for_net,
                    do_mirroring,
                    mirror_axes,
                    use_sliding_window,
                    step_size,
                    use_gaussian,
                    all_in_gpu=all_in_gpu,
                    mixed_precision=self.fp16)[1]

                softmax_pred = softmax_pred.transpose(
                    [0] + [i + 1 for i in self.transpose_backward])

                if save_softmax:
                    softmax_fname = join(output_folder, fname + ".npz")
                else:
                    softmax_fname = None
                """There is a problem with python process communication that prevents us from communicating obejcts 
                larger than 2 GB between processes (basically when the length of the pickle string that will be sent is 
                communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long 
                enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually 
                patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will 
                then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either 
                filename or np.ndarray and will handle this automatically"""
                if np.prod(softmax_pred.shape) > (
                        2e9 / 4 * 0.85):  # *0.85 just to be save
                    np.save(join(output_folder, fname + ".npy"), softmax_pred)
                    softmax_pred = join(output_folder, fname + ".npy")

                results.append(
                    export_pool.starmap_async(
                        save_segmentation_nifti_from_softmax,
                        ((softmax_pred, join(output_folder,
                                             fname + ".nii.gz"), properties,
                          interpolation_order, None, None, None, softmax_fname,
                          None, force_separate_z, interpolation_order_z), )))

            pred_gt_tuples.append([
                join(output_folder, fname + ".nii.gz"),
                join(self.gt_niftis_folder, fname + ".nii.gz")
            ])

        _ = [i.get() for i in results]
        self.print_to_log_file("finished prediction")

        # evaluate raw predictions
        self.print_to_log_file("evaluation of raw predictions")
        task = self.dataset_directory.split("/")[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(
            pred_gt_tuples,
            labels=list(range(self.num_classes)),
            json_output_file=join(output_folder, "summary.json"),
            json_name=job_name + " val tiled %s" % (str(use_sliding_window)),
            json_author="Fabian",
            json_task=task,
            num_threads=default_num_threads)

        # in the old nnunet we would stop here. Now we add a postprocessing. This postprocessing can remove everything
        # except the largest connected component for each class. To see if this improves results, we do this for all
        # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will
        # have this applied during inference as well
        self.print_to_log_file("determining postprocessing")
        determine_postprocessing(self.output_folder,
                                 self.gt_niftis_folder,
                                 validation_folder_name,
                                 final_subf_name=validation_folder_name +
                                 "_postprocessed",
                                 debug=debug)
        # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed"
        # They are always in that folder, even if no postprocessing as applied!

        # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another
        # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be
        # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to
        # be used later
        gt_nifti_folder = join(self.output_folder_base, "gt_niftis")
        maybe_mkdir_p(gt_nifti_folder)
        for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"):
            success = False
            attempts = 0
            e = None
            while not success and attempts < 10:
                try:
                    shutil.copy(f, gt_nifti_folder)
                    success = True
                except OSError as e:
                    attempts += 1
                    sleep(1)
            if not success:
                print("Could not copy gt nifti file %s into folder %s" %
                      (f, gt_nifti_folder))
                if e is not None:
                    raise e

        # restore network deep supervision mode
        self.network.train(current_mode)
        self.network.do_ds = ds
    def validate(self,
                 do_mirroring: bool = True,
                 use_sliding_window: bool = True,
                 step_size: float = 0.5,
                 save_softmax: bool = True,
                 use_gaussian: bool = True,
                 overwrite: bool = True,
                 validation_folder_name: str = 'validation_raw',
                 debug: bool = False,
                 all_in_gpu: bool = False,
                 segmentation_export_kwargs: dict = None,
                 run_postprocessing_on_folds: bool = True):

        current_mode = self.network.training
        self.network.eval()

        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        if segmentation_export_kwargs is None:
            if 'segmentation_export_params' in self.plans.keys():
                force_separate_z = self.plans['segmentation_export_params'][
                    'force_separate_z']
                interpolation_order = self.plans['segmentation_export_params'][
                    'interpolation_order']
                interpolation_order_z = self.plans[
                    'segmentation_export_params']['interpolation_order_z']
            else:
                force_separate_z = None
                interpolation_order = 1
                interpolation_order_z = 0
        else:
            force_separate_z = segmentation_export_kwargs['force_separate_z']
            interpolation_order = segmentation_export_kwargs[
                'interpolation_order']
            interpolation_order_z = segmentation_export_kwargs[
                'interpolation_order_z']

        output_folder = self.output_folder + "/" + validation_folder_name
        if not os.path.isdir(output_folder):
            os.makedirs(output_folder)

        if do_mirroring:
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        export_pool = Pool(2)
        results = []

        transpose_backward = self.plans.get('transpose_backward')

        for k in self.dataset_val.keys():
            properties = load_pickle(self.dataset[k]['properties_file'])
            data = np.load(self.dataset[k]['data_file'])['data']

            # concat segmentation of previous step
            seg_from_prev_stage = np.load(
                self.folder_with_segs_from_prev_stage + "/" + k +
                "_segFromPrevStage.npz")['data'][None]

            print(data.shape)
            data[-1][data[-1] == -1] = 0
            data_for_net = np.concatenate(
                (data[:-1],
                 to_one_hot(seg_from_prev_stage[0], range(1,
                                                          self.num_classes))))

            softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax(
                data_for_net,
                do_mirroring=do_mirroring,
                mirror_axes=mirror_axes,
                use_sliding_window=use_sliding_window,
                step_size=step_size,
                use_gaussian=use_gaussian,
                all_in_gpu=all_in_gpu,
                mixed_precision=self.fp16)[1]

            if transpose_backward is not None:
                transpose_backward = self.plans.get('transpose_backward')
                softmax_pred = softmax_pred.transpose(
                    [0] + [i + 1 for i in transpose_backward])

            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]

            if save_softmax:
                softmax_fname = output_folder + "/" + fname + ".npz"
            else:
                softmax_fname = None
            """There is a problem with python process communication that prevents us from communicating obejcts 
            larger than 2 GB between processes (basically when the length of the pickle string that will be sent is 
            communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long 
            enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually 
            patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will 
            then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either 
            filename or np.ndarray and will handle this automatically"""
            if np.prod(softmax_pred.shape) > (2e9 / 4 *
                                              0.85):  # *0.85 just to be save
                np.save(fname + ".npy", softmax_pred)
                softmax_pred = fname + ".npy"

            results.append(
                export_pool.starmap_async(
                    save_segmentation_nifti_from_softmax,
                    ((softmax_pred, output_folder + "/" + fname + ".nii.gz",
                      properties, interpolation_order,
                      self.regions_class_order, None, None, softmax_fname,
                      None, force_separate_z, interpolation_order_z), )))

            pred_gt_tuples.append([
                output_folder + "/" + fname + ".nii.gz",
                self.gt_niftis_folder + "/" + fname + ".nii.gz"
            ])

        _ = [i.get() for i in results]

        task = self.dataset_directory.split("/")[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(pred_gt_tuples,
                             labels=list(range(self.num_classes)),
                             json_output_file=output_folder + "/" +
                             "summary.json",
                             json_name=job_name,
                             json_author="Fabian",
                             json_description="",
                             json_task=task)

        if run_postprocessing_on_folds:
            # in the old tuframework we would stop here. Now we add a postprocessing. This postprocessing can remove everything
            # except the largest connected component for each class. To see if this improves results, we do this for all
            # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will
            # have this applied during inference as well
            self.print_to_log_file("determining postprocessing")
            determine_postprocessing(self.output_folder,
                                     self.gt_niftis_folder,
                                     validation_folder_name,
                                     final_subf_name=validation_folder_name +
                                     "_postprocessed",
                                     debug=debug)
            # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed"
            # They are always in that folder, even if no postprocessing as applied!

        # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another
        # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be
        # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to
        # be used later
        gt_nifti_folder = self.output_folder_base + "/" + "gt_niftis"
        if not os.path.isdir(gt_nifti_folder):
            os.makedirs(gt_nifti_folder)
        for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"):
            success = False
            attempts = 0
            while not success and attempts < 10:
                try:
                    shutil.copy(f, gt_nifti_folder)
                    success = True
                except OSError:
                    attempts += 1
                    sleep(1)

        self.network.train(current_mode)
        export_pool.close()
        export_pool.join()