def crawl_and_copy(current_folder,
                   out_folder,
                   prefix="fabian_",
                   suffix="ummary.json"):
    """
	This script will run recursively through all subfolders of current_folder and copy all files that end with
	suffix with some automatically generated prefix into out_folder
	:param current_folder:
	:param out_folder:
	:param prefix:
	:return:
	"""
    s = subdirs(current_folder, join=False)
    f = subfiles(current_folder, join=False)
    f = [i for i in f if i.endswith(suffix)]
    if current_folder.find("fold0") != -1:
        for fl in f:
            shutil.copy(os.path.join(current_folder, fl),
                        os.path.join(out_folder, prefix + fl))
    for su in s:
        if prefix == "":
            add = su
        else:
            add = "__" + su
        crawl_and_copy(os.path.join(current_folder, su),
                       out_folder,
                       prefix=prefix + add)
Beispiel #2
0
def plan_and_preprocess(task_string,
                        processes_lowres=default_num_threads,
                        processes_fullres=3,
                        no_preprocessing=False):
    from tuframework.experiment_planning.experiment_planner_baseline_2DUNet import ExperimentPlanner2D
    from tuframework.experiment_planning.experiment_planner_baseline_3DUNet import ExperimentPlanner

    preprocessing_output_dir_this_task_train = preprocessing_output_dir + "/" + task_string
    cropped_out_dir = tuFramework_cropped_data + "/" + task_string
    if not os.path.isdir(preprocessing_output_dir_this_task_train):
        os.makedirs(preprocessing_output_dir_this_task_train)

    shutil.copy(cropped_out_dir + "/" + "dataset_properties.pkl",
                preprocessing_output_dir_this_task_train)
    shutil.copy(
        tuFramework_raw_data + "/" + task_string + "/" + "dataset.json",
        preprocessing_output_dir_this_task_train)

    exp_planner = ExperimentPlanner(cropped_out_dir,
                                    preprocessing_output_dir_this_task_train)
    exp_planner.plan_experiment()
    if not no_preprocessing:
        exp_planner.run_preprocessing((processes_lowres, processes_fullres))

    exp_planner = ExperimentPlanner2D(
        cropped_out_dir, preprocessing_output_dir_this_task_train)
    exp_planner.plan_experiment()
    if not no_preprocessing:
        exp_planner.run_preprocessing(processes_fullres)

    # write which class is in which slice to all training cases (required to speed up 2D Dataloader)
    # This is done for all data so that if we wanted to use them with 2D we could do so

    if not no_preprocessing:
        p = Pool(default_num_threads)

        # if there is more than one my_data_identifier (different brnaches) then this code will run for all of them if
        # they start with the same string. not problematic, but not pretty
        stages = [
            i for i in subdirs(
                preprocessing_output_dir_this_task_train, join=True, sort=True)
            if i.split("/")[-1].find("stage") != -1
        ]
        for s in stages:
            print(s.split("/")[-1])
            list_of_npz_files = subfiles(s, False, None, ".npz", True)
            list_of_pkl_files = [i[:-4] + ".pkl" for i in list_of_npz_files]
            all_classes = []
            for pk in list_of_pkl_files:
                with open(pk, 'rb') as f:
                    props = pickle.load(f)
                all_classes_tmp = np.array(props['classes'])
                all_classes.append(all_classes_tmp[all_classes_tmp >= 0])
            p.map(add_classes_in_slice_info,
                  zip(list_of_npz_files, list_of_pkl_files, all_classes))
        p.close()
        p.join()