Example #1
0
def run(args):
    files = glob(args.file_regex)
    out_dir = os.path.abspath(args.out_dir)
    logger = Logger(out_dir,
                    active_file='extraction_log',
                    overwrite_existing=args.overwrite,
                    print_calling_method=False)
    logger("Args dump: {}".format(vars(args)))
    logger("Found {} files matching glob statement".format(len(files)))
    if len(files) == 0:
        return
    channels = ChannelMontageTuple(args.channels, relax=True)
    renamed_channels = args.rename_channels
    if renamed_channels and (len(renamed_channels) != len(channels)):
        raise ValueError("--rename_channels argument must have the same number"
                         " of elements as --channels. Got {} and {}.".format(
            len(channels), len(renamed_channels)
        ))

    logger("Extracting channels {}".format(channels.names))
    if renamed_channels:
        logger("Saving channels under names {}".format(renamed_channels))
    logger("Saving .h5 files to '{}'".format(out_dir))
    logger("Re-sampling: {}".format(args.resample))
    logger("-*--*--*--*--*--*--*--*--*--*--*--*--*--*--*--*-")
    extract(
        files=files,
        out_dir=out_dir,
        channels=channels,
        renamed_channels=renamed_channels,
        logger=logger,
        args=args
    )
Example #2
0
def get_logger(out_dir, overwrite, name="evaluation_log"):
    """
    Returns a Logger object for the given out_dir.
    The logger will throw an OSError if the dir exists and overwrite=False, in
    which case the script will terminate with a print message.
    """
    from mpunet.logging import Logger
    try:
        logger = Logger(out_dir,
                        active_file=name,
                        overwrite_existing=overwrite,
                        no_sub_folder=True)
    except OSError:
        from sys import exit
        print("[*] A logging file 'logs/{}' already exists. "
              "If you wish to overwrite this logfile, set the --overwrite "
              "flag.".format(name))
        exit(0)
    return logger
Example #3
0
def get_logger(project_dir, overwrite_existing):
    """
    Initialises and returns a Logger object for a given project directory.
    If a logfile already exists at the specified location, it will be
    overwritten if continue_training == True, otherwise raises RuntimeError

    Args:
        project_dir: Path to a mpunet project folder
        overwrite_existing: Whether to overwrite existing logfile in project_dir

    Returns:
        A mpunet Logger object initialized in project_dir
    """
    # Define Logger object
    from mpunet.logging import Logger
    try:
        logger = Logger(base_path=project_dir,
                        print_to_screen=True,
                        overwrite_existing=overwrite_existing)
    except OSError as e:
        raise RuntimeError("[*] A training session at '%s' already exists."
                           "\n    Use the --overwrite flag to "
                           "overwrite." % project_dir) from e
    return logger
Example #4
0
def entry_func(args=None):
    # Get parser
    parser = vars(get_parser().parse_args(args))

    # Get parser arguments
    cv_dir = os.path.abspath(parser["CV_dir"])
    out_dir = os.path.abspath(parser["out_dir"])
    create_folders(out_dir)
    await_PID = parser["wait_for"]
    run_split = parser["run_on_split"]
    start_from = parser["start_from"] or 0
    num_jobs = parser["num_jobs"] or 1

    # GPU settings
    num_GPUs = parser["num_GPUs"]
    force_GPU = parser["force_GPU"]
    ignore_GPU = parser["ignore_GPU"]
    monitor_GPUs_every = parser["monitor_GPUs_every"]

    # User input assertions
    _assert_force_and_ignore_gpus(force_GPU, ignore_GPU)
    if run_split:
        _assert_run_split(start_from, monitor_GPUs_every, num_jobs)

    # Wait for PID?
    if await_PID:
        from mpunet.utils import await_PIDs
        await_PIDs(await_PID)

    # Get file paths
    script = os.path.abspath(parser["script_prototype"])
    hparams = os.path.abspath(parser["hparams_prototype"])
    no_hparams = parser["no_hparams"]

    # Get list of folders of CV data to run on
    cv_folders = get_CV_folders(cv_dir)
    if run_split is not None:
        if run_split < 0 or run_split >= len(cv_folders):
            raise ValueError("--run_on_split should be in range [0-{}], "
                             "got {}".format(len(cv_folders) - 1, run_split))
        cv_folders = [cv_folders[run_split]]
        log_appendix = "_split{}".format(run_split)
    else:
        log_appendix = ""

    # Get a logger object
    logger = Logger(base_path="./",
                    active_file="output" + log_appendix,
                    print_calling_method=False,
                    overwrite_existing=True)

    if force_GPU:
        # Only these GPUs fill be chosen from
        from mpunet.utils import set_gpu
        set_gpu(force_GPU)
    if num_GPUs:
        # Get GPU sets (up to the number of splits)
        gpu_sets = get_free_GPU_sets(num_GPUs, ignore_GPU)[:len(cv_folders)]
    elif not num_jobs or num_jobs < 0:
        raise ValueError("Should specify a number of jobs to run in parallel "
                         "with the --num_jobs flag when using 0 GPUs pr. "
                         "process (--num_GPUs=0 was set).")
    else:
        gpu_sets = ["''"] * parser["num_jobs"]

    # Get process pool, lock and GPU queue objects
    lock = Lock()
    gpu_queue = Queue()
    for gpu in gpu_sets:
        gpu_queue.put(gpu)

    procs = []
    if monitor_GPUs_every is not None and monitor_GPUs_every:
        logger("\nOBS: Monitoring GPU pool every %i seconds\n" %
               monitor_GPUs_every)
        # Start a process monitoring new GPU availability over time
        stop_event = Event()
        t = Process(target=monitor_GPUs,
                    args=(monitor_GPUs_every, gpu_queue, num_GPUs, ignore_GPU,
                          gpu_sets, stop_event))
        t.start()
        procs.append(t)
    else:
        stop_event = None
    try:
        for cv_folder in cv_folders[start_from:]:
            gpus = gpu_queue.get()
            t = Process(target=run_sub_experiment,
                        args=(cv_folder, out_dir, script, hparams, no_hparams,
                              gpus, gpu_queue, lock, logger))
            t.start()
            procs.append(t)
            for t in procs:
                if not t.is_alive():
                    t.join()
    except KeyboardInterrupt:
        for t in procs:
            t.terminate()
    if stop_event is not None:
        stop_event.set()
    for t in procs:
        t.join()
Example #5
0
def entry_func(args=None):

    # Project base path
    args = vars(get_argparser().parse_args(args))
    basedir = os.path.abspath(args["project_dir"])
    overwrite = args["overwrite"]
    continue_training = args["continue_training"]
    eval_prob = args["eval_prob"]
    await_PID = args["wait_for"]
    dice_weight = args["dice_weight"]
    print("Fitting fusion model for project-folder: %s" % basedir)

    # Minimum images in validation set before also using training images
    min_val_images = 15

    # Fusion model training params
    epochs = args['epochs']
    fm_batch_size = args["batch_size"]

    # Early stopping params
    early_stopping = args["early_stopping"]

    # Wait for PID?
    if await_PID:
        from mpunet.utils import await_PIDs
        await_PIDs(await_PID)

    # Fetch GPU(s)
    num_GPUs = args["num_GPUs"]
    force_gpu = args["force_GPU"]
    # Wait for free GPU
    if not force_gpu:
        await_and_set_free_gpu(N=num_GPUs, sleep_seconds=120)
    else:
        set_gpu(force_gpu)

    # Get logger
    logger = Logger(base_path=basedir,
                    active_file="train_fusion",
                    overwrite_existing=overwrite)

    # Get YAML hyperparameters
    hparams = YAMLHParams(os.path.join(basedir, "train_hparams.yaml"))

    # Get some key settings
    n_classes = hparams["build"]["n_classes"]

    if hparams["build"]["out_activation"] == "linear":
        # Trained with logit targets?
        hparams["build"][
            "out_activation"] = "softmax" if n_classes > 1 else "sigmoid"

    # Get views
    views = np.load("%s/views.npz" % basedir)["arr_0"]
    del hparams["fit"]["views"]

    # Get weights and set fusion (output) path
    weights = get_best_model("%s/model" % basedir)
    weights_name = os.path.splitext(os.path.split(weights)[-1])[0]
    fusion_weights = "%s/model/fusion_weights/" \
                     "%s_fusion_weights.h5" % (basedir, weights_name)
    create_folders(os.path.split(fusion_weights)[0])

    # Log a few things
    log(logger, hparams, views, weights, fusion_weights)

    # Check if exists already...
    if not overwrite and os.path.exists(fusion_weights):
        from sys import exit
        print("\n[*] A fusion weights file already exists at '%s'."
              "\n    Use the --overwrite flag to overwrite." % fusion_weights)
        exit(0)

    # Load validation data
    images = ImagePairLoader(**hparams["val_data"], logger=logger)
    is_validation = {m.identifier: True for m in images}

    # Define random sets of images to train on simul. (cant be all due
    # to memory constraints)
    image_IDs = [m.identifier for m in images]

    if len(images) < min_val_images:
        # Pick N random training images
        diff = min_val_images - len(images)
        logger("Adding %i training images to set" % diff)

        # Load the training data and pick diff images
        train = ImagePairLoader(**hparams["train_data"], logger=logger)
        indx = np.random.choice(np.arange(len(train)),
                                diff,
                                replace=diff > len(train))

        # Add the images to the image set set
        train_add = [train[i] for i in indx]
        for m in train_add:
            is_validation[m.identifier] = False
            image_IDs.append(m.identifier)
        images.add_images(train_add)

    # Append to length % sub_size == 0
    sub_size = args["images_per_round"]
    rest = int(sub_size * np.ceil(len(image_IDs) / sub_size)) - len(image_IDs)
    if rest:
        image_IDs += list(np.random.choice(image_IDs, rest, replace=False))

    # Shuffle and split
    random.shuffle(image_IDs)
    sets = [
        set(s) for s in np.array_split(image_IDs,
                                       len(image_IDs) / sub_size)
    ]
    assert (contains_all_images(sets, image_IDs))

    # Define fusion model (named 'org' to store reference to orgiginal model if
    # multi gpu model is created below)
    fusion_model = FusionModel(n_inputs=len(views),
                               n_classes=n_classes,
                               weight=dice_weight,
                               logger=logger,
                               verbose=False)

    if continue_training:
        fusion_model.load_weights(fusion_weights)
        print("\n[OBS] CONTINUED TRAINING FROM:\n", fusion_weights)

    import tensorflow as tf
    with tf.distribute.MirroredStrategy().scope():
        # Define model
        unet = init_model(hparams["build"], logger)
        print("\n[*] Loading weights: %s\n" % weights)
        unet.load_weights(weights, by_name=True)

    # Compile the model
    logger("Compiling...")
    metrics = [
        "sparse_categorical_accuracy", sparse_fg_precision, sparse_fg_recall
    ]
    fusion_model.compile(optimizer=Adam(lr=1e-3),
                         loss=fusion_model.loss,
                         metrics=metrics)
    fusion_model._log()

    try:
        _run_fusion_training(sets, logger, hparams, min_val_images,
                             is_validation, views, n_classes, unet,
                             fusion_model, early_stopping, fm_batch_size,
                             epochs, eval_prob, fusion_weights)
    except KeyboardInterrupt:
        pass
    finally:
        if not os.path.exists(os.path.split(fusion_weights)[0]):
            os.mkdir(os.path.split(fusion_weights)[0])
        # Save fusion model weights
        # OBS: Must be original model if multi-gpu is performed!
        fusion_model.save_weights(fusion_weights)
Example #6
0
def run(args, gpu_mon):
    """
    Run the script according to args - Please refer to the argparser.

    args:
        args:    (Namespace)  command-line arguments
        gpu_mon: (GPUMonitor) Initialized mpunet GPUMonitor object
    """
    assert_args(args)
    from mpunet.logging import Logger
    from utime.train import Trainer
    from utime.hyperparameters import YAMLHParams
    from utime.utils.scriptutils import (assert_project_folder,
                                         make_multi_gpu_model)
    from utime.utils.scriptutils.train import (get_train_and_val_datasets,
                                               get_h5_train_and_val_datasets,
                                               get_data_queues, get_generators,
                                               find_and_set_gpus,
                                               get_samples_per_epoch,
                                               save_final_weights)

    project_dir = os.path.abspath("./")
    assert_project_folder(project_dir)
    if args.overwrite and not args.continue_training:
        from mpunet.bin.train import remove_previous_session
        remove_previous_session(project_dir)

    # Get logger object
    logger = Logger(project_dir,
                    overwrite_existing=args.overwrite,
                    append_existing=args.continue_training,
                    log_prefix=args.log_file_prefix)
    logger("Args dump: {}".format(vars(args)))

    # Settings depending on --preprocessed flag.
    if args.preprocessed:
        yaml_path = utime.Defaults.get_pre_processed_hparams_path(project_dir)
        dataset_func = get_h5_train_and_val_datasets
        train_queue_type = 'eager'
        val_queue_type = 'eager'
    else:
        yaml_path = utime.Defaults.get_hparams_path(project_dir)
        dataset_func = get_train_and_val_datasets
        train_queue_type = args.train_queue_type
        val_queue_type = args.val_queue_type

    # Load hparams
    hparams = YAMLHParams(yaml_path, logger=logger)
    update_hparams_with_command_line_arguments(hparams, args)

    # Initialize and load (potentially multiple) datasets
    train_datasets, val_datasets = dataset_func(hparams, args.no_val,
                                                args.train_on_val, logger)

    if args.just:
        keep_n_random(*train_datasets,
                      *val_datasets,
                      keep=args.just,
                      logger=logger)

    # Get a data loader queue object for each dataset
    train_datasets_queues = get_data_queues(
        datasets=train_datasets,
        queue_type=train_queue_type,
        max_loaded_per_dataset=args.max_loaded_per_dataset,
        num_access_before_reload=args.num_access_before_reload,
        logger=logger)
    if val_datasets:
        val_dataset_queues = get_data_queues(
            datasets=val_datasets,
            queue_type=val_queue_type,
            max_loaded_per_dataset=args.max_loaded_per_dataset,
            num_access_before_reload=args.num_access_before_reload,
            study_loader=getattr(train_datasets_queues[0], 'study_loader',
                                 None),
            logger=logger)
    else:
        val_dataset_queues = None

    # Get sequence generators for all datasets
    train_seq, val_seq = get_generators(train_datasets_queues,
                                        val_dataset_queues=val_dataset_queues,
                                        hparams=hparams)

    # Add additional (inferred) parameters to parameter file
    hparams.set_value("build",
                      "n_classes",
                      train_seq.n_classes,
                      overwrite=True)
    hparams.set_value("build",
                      "batch_shape",
                      train_seq.batch_shape,
                      overwrite=True)
    hparams.save_current()

    if args.continue_training:
        # Prepare the project directory for continued training.
        # Please refer to the function docstring for details
        from utime.models.model_init import prepare_for_continued_training
        parameter_file = prepare_for_continued_training(
            hparams=hparams, project_dir=project_dir, logger=logger)
    else:
        parameter_file = args.initialize_from  # most often is None

    # Set the GPU visibility
    num_GPUs = find_and_set_gpus(gpu_mon, args.force_GPU, args.num_GPUs)
    # Initialize and potential load parameters into the model
    from utime.models.model_init import init_model, load_from_file
    org_model = init_model(hparams["build"], logger)
    if parameter_file:
        load_from_file(org_model, parameter_file, logger, by_name=True)
    model, org_model = make_multi_gpu_model(org_model, num_GPUs)

    # Prepare a trainer object. Takes care of compiling and training.
    trainer = Trainer(model, org_model=org_model, logger=logger)

    import tensorflow as tf
    trainer.compile_model(n_classes=hparams["build"].get("n_classes"),
                          reduction=tf.keras.losses.Reduction.NONE,
                          **hparams["fit"])

    # Fit the model on a number of samples as specified in args
    samples_pr_epoch = get_samples_per_epoch(train_seq,
                                             args.max_train_samples_per_epoch)

    _ = trainer.fit(train=train_seq,
                    val=val_seq,
                    train_samples_per_epoch=samples_pr_epoch,
                    **hparams["fit"])

    # Save weights to project_dir/model/{final_weights_file_name}.h5
    # Note: these weights are rarely used, as a checkpoint callback also saves
    # weights to this directory through training
    save_final_weights(project_dir,
                       model=model,
                       file_name=args.final_weights_file_name,
                       logger=logger)
Example #7
0
def run(args, gpu_mon):
    """
    Run the script according to args - Please refer to the argparser.

    args:
        args:    (Namespace)  command-line arguments
        gpu_mon: (GPUMonitor) Initialized MultiPlanarUNet GPUMonitor object
    """
    assert_args(args)
    from mpunet.logging import Logger
    from utime.train import Trainer
    from utime.hyperparameters import YAMLHParams
    from utime.utils.scriptutils import (assert_project_folder,
                                         make_multi_gpu_model)
    from utime.utils.scriptutils.train import (get_train_and_val_datasets,
                                               get_generators,
                                               find_and_set_gpus,
                                               get_samples_per_epoch,
                                               save_final_weights)

    project_dir = os.path.abspath("./")
    assert_project_folder(project_dir)
    if args.overwrite and not args.continue_training:
        from mpunet.bin.train import remove_previous_session
        remove_previous_session(project_dir)

    # Get logger object
    logger = Logger(project_dir,
                    overwrite_existing=args.overwrite
                    or args.continue_training,
                    log_prefix=args.log_file_prefix)
    logger("Args dump: {}".format(vars(args)))

    # Load hparams
    hparams = YAMLHParams(os.path.join(project_dir, "hparams.yaml"),
                          logger=logger)
    update_hparams_with_command_line_arguments(hparams, args)

    # Initialize and load (potentially multiple) datasets
    datasets, no_val = get_train_and_val_datasets(hparams, args.no_val,
                                                  args.train_on_val, logger)

    # Load data in all datasets
    for data in datasets:
        for d in data:
            d.load(1 if args.just_one else None)
            d.pairs = d.loaded_pairs  # remove the other pairs

    # Get sequence generators for all datasets
    train_seq, val_seq = get_generators(datasets, hparams, no_val)

    # Add additional (inferred) parameters to parameter file
    hparams.set_value("build",
                      "n_classes",
                      train_seq.n_classes,
                      overwrite=True)
    hparams.set_value("build",
                      "batch_shape",
                      train_seq.batch_shape,
                      overwrite=True)
    hparams.save_current()

    if args.continue_training:
        # Prepare the project directory for continued training.
        # Please refer to the function docstring for details
        from utime.models.model_init import prepare_for_continued_training
        parameter_file = prepare_for_continued_training(
            hparams=hparams, project_dir=project_dir, logger=logger)
    else:
        parameter_file = args.initialize_from  # most often is None

    # Set the GPU visibility
    num_GPUs = find_and_set_gpus(gpu_mon, args.force_GPU, args.num_GPUs)
    # Initialize and potential load parameters into the model
    from utime.models.model_init import init_model, load_from_file
    org_model = init_model(hparams["build"], logger)
    if parameter_file:
        load_from_file(org_model, parameter_file, logger, by_name=True)
    model, org_model = make_multi_gpu_model(org_model, num_GPUs)

    # Prepare a trainer object. Takes care of compiling and training.
    trainer = Trainer(model, org_model=org_model, logger=logger)
    trainer.compile_model(n_classes=hparams["build"].get("n_classes"),
                          **hparams["fit"])

    # Fit the model on a number of samples as specified in args
    samples_pr_epoch = get_samples_per_epoch(train_seq,
                                             args.max_train_samples_per_epoch,
                                             args.val_samples_per_epoch)
    _ = trainer.fit(train=train_seq,
                    val=val_seq,
                    train_samples_per_epoch=samples_pr_epoch[0],
                    val_samples_per_epoch=samples_pr_epoch[1],
                    **hparams["fit"])

    # Save weights to project_dir/model/{final_weights_file_name}.h5
    # Note: these weights are rarely used, as a checkpoint callback also saves
    # weights to this directory through training
    save_final_weights(project_dir,
                       model=model,
                       file_name=args.final_weights_file_name,
                       logger=logger)
Example #8
0
def run(args):
    cv_dir = os.path.abspath(args.CV_dir)
    # Get list of folders of CV data to run on
    cv_folders = get_CV_folders(cv_dir)
    assert_args(args, n_splits=len(cv_folders))
    out_dir = os.path.abspath(args.out_dir)
    hparams_dir = os.path.abspath(args.hparams_prototype_dir)
    prepare_hparams_dir(hparams_dir)
    create_folders(out_dir)

    # Wait for PID?
    if args.wait_for:
        from mpunet.utils import await_PIDs
        await_PIDs(args.wait_for)

    if args.run_on_split is not None:
        cv_folders = [cv_folders[args.run_on_split]]
        log_appendix = "_split{}".format(args.run_on_split)
    else:
        log_appendix = ""

    # Get a logger object
    logger = Logger(base_path="./",
                    active_file="output" + log_appendix,
                    print_calling_method=False,
                    overwrite_existing=True)

    if args.force_GPU:
        # Only these GPUs fill be chosen from
        from mpunet.utils import set_gpu
        set_gpu(args.force_GPU)
    if args.num_GPUs:
        # Get GPU sets (up to the number of splits)
        gpu_sets = get_free_GPU_sets(args.num_GPUs,
                                     args.ignore_GPU)[:len(cv_folders)]
    elif not args.num_jobs or args.num_jobs < 0:
        raise ValueError("Should specify a number of jobs to run in parallel "
                         "with the --num_jobs flag when using 0 GPUs pr. "
                         "process (--num_GPUs=0 was set).")
    else:
        gpu_sets = ["''"] * args.num_jobs

    # Get process pool, lock and GPU queue objects
    lock = Lock()
    gpu_queue = Queue()
    for gpu in gpu_sets:
        gpu_queue.put(gpu)

    # Get file paths
    script = os.path.abspath(args.script_prototype)

    # Get GPU monitor process
    running_processes, stop_event = start_gpu_monitor_process(
        args, gpu_queue, gpu_sets, logger)

    try:
        for cv_folder in cv_folders[args.start_from:]:
            gpus = gpu_queue.get()
            t = Process(target=run_sub_experiment,
                        args=(cv_folder, out_dir, script, hparams_dir,
                              args.no_hparams, gpus, gpu_queue, lock, logger))
            t.start()
            running_processes.append(t)
            for t in running_processes:
                if not t.is_alive():
                    t.join()
    except KeyboardInterrupt:
        for t in running_processes:
            t.terminate()
    if stop_event is not None:
        stop_event.set()
    for t in running_processes:
        t.join()
Example #9
0
def run(args):
    """
    Run the script according to args - Please refer to the argparser.

    args:
        args:    (Namespace)  command-line arguments
    """
    from mpunet.logging import Logger
    from utime.hyperparameters import YAMLHParams
    from utime.utils.scriptutils import assert_project_folder
    from utime.utils.scriptutils import get_splits_from_all_datasets

    project_dir = os.path.abspath("./")
    assert_project_folder(project_dir)

    # Get logger object
    logger = Logger(project_dir + "/preprocessing_logs",
                    active_file='preprocessing',
                    overwrite_existing=args.overwrite,
                    no_sub_folder=True)
    logger("Args dump: {}".format(vars(args)))

    # Load hparams
    hparams = YAMLHParams(Defaults.get_hparams_path(project_dir),
                          logger=logger,
                          no_version_control=True)

    # Initialize and load (potentially multiple) datasets
    datasets = get_splits_from_all_datasets(hparams,
                                            splits_to_load=args.dataset_splits,
                                            logger=logger,
                                            return_data_hparams=True)

    # Check if file exists, and overwrite if specified
    if os.path.exists(args.out_path):
        if args.overwrite:
            os.remove(args.out_path)
        else:
            from sys import exit
            logger("Out file at {} exists, and --overwrite was not set."
                   "".format(args.out_path))
            exit(0)

    # Create dataset hparams output directory
    out_dir = Defaults.get_pre_processed_data_configurations_dir(project_dir)
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    with ThreadPoolExecutor(args.num_threads) as pool:
        with h5py.File(args.out_path, "w") as h5_file:
            for dataset, dataset_hparams in datasets:
                # Create a new version of the dataset-specific hyperparameters
                # that contain only the fields needed for pre-processed data
                name = dataset[0].identifier.split("/")[0]
                hparams_out_path = os.path.join(out_dir, name + ".yaml")
                copy_dataset_hparams(dataset_hparams, hparams_out_path)

                # Update paths to dataset hparams in main hparams file
                hparams.set_value(subdir='datasets',
                                  name=name,
                                  value=hparams_out_path,
                                  overwrite=True)
                # Save the hyperparameters to the pre-processed main hparams
                hparams.save_current(
                    Defaults.get_pre_processed_hparams_path(project_dir))

                # Process each dataset
                for split in dataset:
                    # Add this split to the dataset-specific hparams
                    add_dataset_entry(hparams_out_path, args.out_path,
                                      split.identifier.split("/")[-1].lower(),
                                      split.period_length_sec)
                    # Overwrite potential load time channel sampler to None
                    split.set_load_time_channel_sampling_groups(None)

                    # Create dataset group
                    split_group = h5_file.create_group(split.identifier)

                    # Run the preprocessing
                    process_func = partial(preprocess_study, split_group)

                    logger.print_to_screen = True
                    logger("Preprocessing dataset:", split)
                    logger.print_to_screen = False
                    n_pairs = len(split.pairs)
                    for i, _ in enumerate(pool.map(process_func, split.pairs)):
                        print("  {}/{}".format(i + 1, n_pairs),
                              end='\r',
                              flush=True)
                    print("")