Example #1
0
def setup(options):
    """
    Analyze pipeline options and set up requirements to running
    the prediction pipeline. This includes setting up the output
    directory, random seeds and the device where predictions are run.

    Args:
        options(Namespace): Pipeline specific options

    Returns:
        output_dir(str): Path to output directory
    """
    output_dir = setup_output_directory(options.output_dir,
                                        options.run_uuid,
                                        experiment_id=None,
                                        create=True)
    configure_logging(output_dir=output_dir,
                      debug=options.debug,
                      quiet=options.quiet)
    configure_seed(options.seed)
    configure_device(options.gpu_id)

    logger.info(pformat(vars(options)))
    logger.info("Local output directory is: {}".format(output_dir))

    if options.save_config:
        save_config_file(options, options.save_config)

    del options.output_dir  # FIXME: remove this after making sure no other
    # place uses it! # noqa
    return output_dir
Example #2
0
def setup_run(directory: Path, seed: int, debug=False, quiet=False) -> Path:
    """Set up the output directory structure for the Optuna search outputs."""
    # TODO: replace all of this with the MLFlow integration: optuna.integration.mlflow
    #   In particular: let the output directory be created by MLflow entirely
    if not directory.exists():
        # Initialize a new folder with name '0'
        output_dir = directory / '0'
        output_dir.mkdir(parents=True)
    else:
        # Initialize a new folder incrementing folder count
        output_dir = directory / str(len(list(directory.glob('*'))))
        if output_dir.exists():
            backup_directory = output_dir.with_name(
                f'{output_dir.name}_backup_{datetime.now().isoformat()}')
            logger.warning(
                f'Folder {output_dir} already exists; moving it to {backup_directory}'
            )
            output_dir.rename(backup_directory)
        output_dir.mkdir(parents=True)

    logger.info(f'Initializing new search folder at: {output_dir}')

    configure_logging(output_dir=output_dir, verbose=debug, quiet=quiet)
    configure_seed(seed)

    return output_dir
Example #3
0
def setup_run(
    config: RunConfig, quiet=False, debug=False, anchor_dir: Path = None
) -> Path:
    """Prepare for running the prediction pipeline.

    This includes setting up the output directory, random seeds, and loggers.

    Arguments:
        config: configuration options.
        quiet: whether to suppress info log messages.
        debug: whether to additionally log debug messages
               (:param:`quiet` has precedence)
        anchor_dir: directory to use as root for paths.

    Return:
        the resolved path to the output directory.
    """
    # Setup output directory
    output_dir = config.output_dir
    if not output_dir:
        experiment_id = 0
        run_id = config.run_id or uuid.uuid4().hex  # Create hash if needed
        output_dir = Path('runs') / str(experiment_id) / run_id
        if anchor_dir:
            output_dir = anchor_dir / output_dir
    output_dir.mkdir(parents=True, exist_ok=True)

    configure_logging(output_dir=output_dir, verbose=debug, quiet=quiet)
    configure_seed(config.seed)

    logger.info(f'Local output directory is: {output_dir}')

    return output_dir
Example #4
0
def setup_run(config: RunConfig,
              debug=False,
              quiet=False,
              anchor_dir: Path = None
              ) -> Tuple[Path, Optional[MLFlowTrackingLogger]]:
    """Prepare for running the training pipeline.

    This includes setting up the output directory, random seeds, and loggers.

    Arguments:
        config: configuration options.
        quiet: whether to suppress info log messages.
        debug: whether to additionally log debug messages
               (:param:`quiet` has precedence)
        anchor_dir: directory to use as root for paths.

    Return:
         a tuple with the resolved path to the output directory and the experiment
         logger (``None`` if not configured).
    """

    # Setup tracking logger
    if config.use_mlflow:
        tracking_logger = MLFlowTrackingLogger(
            experiment_name=config.experiment_name,
            run_id=config.run_id,
            tracking_uri=config.mlflow_tracking_uri,
            always_log_artifacts=config.mlflow_always_log_artifacts,
        )
        experiment_id = tracking_logger.experiment_id
        run_id = tracking_logger.run_id
    else:
        tracking_logger = None
        experiment_id = 0
        run_id = config.run_id or uuid.uuid4().hex  # Create hash if needed

    # Setup output directory
    output_dir = config.output_dir
    if not output_dir:
        output_dir = Path('runs') / str(experiment_id) / run_id
        if anchor_dir:
            output_dir = anchor_dir / output_dir
    output_dir.mkdir(parents=True, exist_ok=True)

    configure_logging(output_dir=output_dir, verbose=debug, quiet=quiet)
    configure_seed(config.seed)

    logging.info(f'This is run ID: {run_id}')
    logging.info(f'Inside experiment ID: '
                 f'{experiment_id} ({config.experiment_name})')
    logging.info(f'Local output directory is: {output_dir}')

    if tracking_logger:
        logging.info(
            f'Logging execution to MLFlow at: {tracking_logger.tracking_uri}')
        logging.info(
            f'Artifacts location: {tracking_logger.get_artifact_uri()}')

    return output_dir, tracking_logger
Example #5
0
def setup(output_dir, seed=42, gpu_id=None, debug=False, quiet=False):
    """
    Analyzes pipeline options and sets up requirements for running the training
    pipeline.

    This includes setting up the output directory, random seeds and the
    device(s) where training is run.

    Args:
        output_dir: Path to directory to use or None, in which case one is
            created automatically.
        seed (int): Random seed for all random engines (Python, PyTorch, NumPy).
        gpu_id (int): GPU number to use or `None` to use the CPU.
        debug (bool): Whether to increase the verbosity of output messages.
        quiet (bool): Whether to decrease the verbosity of output messages.
            Takes precedence over `debug`.

    Returns:
        output_dir(str): Path to output directory
    """
    output_dir = setup_output_directory(
        output_dir,
        tracking_logger.run_uuid,
        tracking_logger.experiment_id,
        create=True,
    )
    configure_logging(output_dir=output_dir, debug=debug, quiet=quiet)
    configure_seed(seed)

    logging.info("This is run ID: {}".format(tracking_logger.run_uuid))
    logging.info("Inside experiment ID: {} ({})".format(
        tracking_logger.experiment_id, tracking_logger.experiment_name))
    logging.info("Local output directory is: {}".format(output_dir))
    logging.info("Logging execution to MLflow at: {}".format(
        tracking_logger.get_tracking_uri()))

    if gpu_id is not None and gpu_id >= 0:
        torch.cuda.set_device(gpu_id)
        logging.info("Using GPU: {}".format(gpu_id))
    else:
        logging.info("Using CPU")

    logging.info("Artifacts location: {}".format(
        tracking_logger.get_artifact_uri()))

    return output_dir