def setup(options): """ Analyze pipeline options and set up requirements to running the prediction pipeline. This includes setting up the output directory, random seeds and the device where predictions are run. Args: options(Namespace): Pipeline specific options Returns: output_dir(str): Path to output directory """ output_dir = setup_output_directory(options.output_dir, options.run_uuid, experiment_id=None, create=True) configure_logging(output_dir=output_dir, debug=options.debug, quiet=options.quiet) configure_seed(options.seed) configure_device(options.gpu_id) logger.info(pformat(vars(options))) logger.info("Local output directory is: {}".format(output_dir)) if options.save_config: save_config_file(options, options.save_config) del options.output_dir # FIXME: remove this after making sure no other # place uses it! # noqa return output_dir
def setup_run(directory: Path, seed: int, debug=False, quiet=False) -> Path: """Set up the output directory structure for the Optuna search outputs.""" # TODO: replace all of this with the MLFlow integration: optuna.integration.mlflow # In particular: let the output directory be created by MLflow entirely if not directory.exists(): # Initialize a new folder with name '0' output_dir = directory / '0' output_dir.mkdir(parents=True) else: # Initialize a new folder incrementing folder count output_dir = directory / str(len(list(directory.glob('*')))) if output_dir.exists(): backup_directory = output_dir.with_name( f'{output_dir.name}_backup_{datetime.now().isoformat()}') logger.warning( f'Folder {output_dir} already exists; moving it to {backup_directory}' ) output_dir.rename(backup_directory) output_dir.mkdir(parents=True) logger.info(f'Initializing new search folder at: {output_dir}') configure_logging(output_dir=output_dir, verbose=debug, quiet=quiet) configure_seed(seed) return output_dir
def setup_run( config: RunConfig, quiet=False, debug=False, anchor_dir: Path = None ) -> Path: """Prepare for running the prediction pipeline. This includes setting up the output directory, random seeds, and loggers. Arguments: config: configuration options. quiet: whether to suppress info log messages. debug: whether to additionally log debug messages (:param:`quiet` has precedence) anchor_dir: directory to use as root for paths. Return: the resolved path to the output directory. """ # Setup output directory output_dir = config.output_dir if not output_dir: experiment_id = 0 run_id = config.run_id or uuid.uuid4().hex # Create hash if needed output_dir = Path('runs') / str(experiment_id) / run_id if anchor_dir: output_dir = anchor_dir / output_dir output_dir.mkdir(parents=True, exist_ok=True) configure_logging(output_dir=output_dir, verbose=debug, quiet=quiet) configure_seed(config.seed) logger.info(f'Local output directory is: {output_dir}') return output_dir
def setup_run(config: RunConfig, debug=False, quiet=False, anchor_dir: Path = None ) -> Tuple[Path, Optional[MLFlowTrackingLogger]]: """Prepare for running the training pipeline. This includes setting up the output directory, random seeds, and loggers. Arguments: config: configuration options. quiet: whether to suppress info log messages. debug: whether to additionally log debug messages (:param:`quiet` has precedence) anchor_dir: directory to use as root for paths. Return: a tuple with the resolved path to the output directory and the experiment logger (``None`` if not configured). """ # Setup tracking logger if config.use_mlflow: tracking_logger = MLFlowTrackingLogger( experiment_name=config.experiment_name, run_id=config.run_id, tracking_uri=config.mlflow_tracking_uri, always_log_artifacts=config.mlflow_always_log_artifacts, ) experiment_id = tracking_logger.experiment_id run_id = tracking_logger.run_id else: tracking_logger = None experiment_id = 0 run_id = config.run_id or uuid.uuid4().hex # Create hash if needed # Setup output directory output_dir = config.output_dir if not output_dir: output_dir = Path('runs') / str(experiment_id) / run_id if anchor_dir: output_dir = anchor_dir / output_dir output_dir.mkdir(parents=True, exist_ok=True) configure_logging(output_dir=output_dir, verbose=debug, quiet=quiet) configure_seed(config.seed) logging.info(f'This is run ID: {run_id}') logging.info(f'Inside experiment ID: ' f'{experiment_id} ({config.experiment_name})') logging.info(f'Local output directory is: {output_dir}') if tracking_logger: logging.info( f'Logging execution to MLFlow at: {tracking_logger.tracking_uri}') logging.info( f'Artifacts location: {tracking_logger.get_artifact_uri()}') return output_dir, tracking_logger
def setup(output_dir, seed=42, gpu_id=None, debug=False, quiet=False): """ Analyzes pipeline options and sets up requirements for running the training pipeline. This includes setting up the output directory, random seeds and the device(s) where training is run. Args: output_dir: Path to directory to use or None, in which case one is created automatically. seed (int): Random seed for all random engines (Python, PyTorch, NumPy). gpu_id (int): GPU number to use or `None` to use the CPU. debug (bool): Whether to increase the verbosity of output messages. quiet (bool): Whether to decrease the verbosity of output messages. Takes precedence over `debug`. Returns: output_dir(str): Path to output directory """ output_dir = setup_output_directory( output_dir, tracking_logger.run_uuid, tracking_logger.experiment_id, create=True, ) configure_logging(output_dir=output_dir, debug=debug, quiet=quiet) configure_seed(seed) logging.info("This is run ID: {}".format(tracking_logger.run_uuid)) logging.info("Inside experiment ID: {} ({})".format( tracking_logger.experiment_id, tracking_logger.experiment_name)) logging.info("Local output directory is: {}".format(output_dir)) logging.info("Logging execution to MLflow at: {}".format( tracking_logger.get_tracking_uri())) if gpu_id is not None and gpu_id >= 0: torch.cuda.set_device(gpu_id) logging.info("Using GPU: {}".format(gpu_id)) else: logging.info("Using CPU") logging.info("Artifacts location: {}".format( tracking_logger.get_artifact_uri())) return output_dir