def build(env: det.EnvContext, checkpoint_config: Dict[str, Any], container_path: Optional[str] = None) -> base.TensorboardManager: """ Return a tensorboard manager defined by the value of the `type` key in the configuration dictionary. Throws a `TypeError` if no tensorboard manager with `type` is defined. container_path, if set, will replace the host_path when determining the storage_path for the SharedFSTensorboardManager. """ type_name = checkpoint_config.get("type") if not type_name: raise TypeError("Missing 'type' parameter of storage configuration") if not isinstance(type_name, str): raise TypeError( "`type` parameter of storage configuration must be a string") base_path = get_base_path(checkpoint_config, manager=True) sync_path = get_sync_path(env) if type_name == "shared_fs": host_path = checkpoint_config["host_path"] storage_path = checkpoint_config.get("storage_path") return shared.SharedFSTensorboardManager( _full_storage_path(host_path, storage_path, container_path), base_path, sync_path, ) elif type_name == "gcs": return gcs.GCSTensorboardManager(checkpoint_config["bucket"], base_path, sync_path) elif type_name == "s3": return s3.S3TensorboardManager( checkpoint_config["bucket"], checkpoint_config.get("access_key", None), checkpoint_config.get("secret_key", None), checkpoint_config.get("endpoint_url", None), base_path, sync_path, ) # Return the base_path.TensorboardManager for known but unsupported storage # backends. This will result in a noop action when the workload_manager # attempts to sync the tfevent files to persistent storage. elif type_name == "hdfs": return hdfs.HDFSTensorboardManager( checkpoint_config["hdfs_url"], checkpoint_config["hdfs_path"], checkpoint_config.get("user"), base_path, sync_path, ) else: raise TypeError(f"Unknown storage type: {type_name}")
def build( cluster_id: str, experiment_id: str, trial_id: Optional[str], checkpoint_config: Dict[str, Any], container_path: Optional[str] = None, ) -> base.TensorboardManager: """ Return a tensorboard manager defined by the value of the `type` key in the configuration dictionary. Throws a `TypeError` if no tensorboard manager with `type` is defined. container_path, if set, will replace the host_path when determining the storage_path for the SharedFSTensorboardManager. """ type_name = checkpoint_config.get("type") if not type_name: raise TypeError("Missing 'type' parameter of storage configuration") if not isinstance(type_name, str): raise TypeError( "`type` parameter of storage configuration must be a string") base_path = get_base_path(checkpoint_config, manager=True) if trial_id: sync_path = get_sync_path(cluster_id, experiment_id, trial_id) else: sync_path = get_experiment_sync_path(cluster_id, experiment_id) if type_name == "shared_fs": host_path = checkpoint_config["host_path"] storage_path = checkpoint_config.get("storage_path") return shared.SharedFSTensorboardManager( _full_storage_path(host_path, storage_path, container_path), base_path, sync_path, ) elif type_name == "gcs": return gcs.GCSTensorboardManager(checkpoint_config["bucket"], base_path, sync_path) elif type_name == "s3": return s3.S3TensorboardManager( checkpoint_config["bucket"], checkpoint_config.get("access_key", None), checkpoint_config.get("secret_key", None), checkpoint_config.get("endpoint_url", None), checkpoint_config.get("prefix", None), base_path, sync_path, ) elif type_name == "azure": if not checkpoint_config.get( "connection_string") and checkpoint_config.get("access_url"): raise ValueError( """At least one of [connection_string, account_url] must be specified for Azure Tensorboard Manager, but none were.""") return azure.AzureTensorboardManager( checkpoint_config["container"], checkpoint_config.get("connection_string", None), checkpoint_config.get("access_url", None), checkpoint_config.get("credential", None), base_path, sync_path, ) # Return the base_path.TensorboardManager for known but unsupported storage # backends. This will result in a noop action when the workload_manager # attempts to sync the tfevent files to persistent storage. elif type_name == "hdfs": return hdfs.HDFSTensorboardManager( checkpoint_config["hdfs_url"], checkpoint_config["hdfs_path"], checkpoint_config.get("user"), base_path, sync_path, ) else: raise TypeError(f"Unknown storage type: {type_name}")