def test_library_usages(shutdown_only, reset_lib_usage): if os.environ.get("RAY_MINIMAL") == "1": # Doesn't work with minimal installation # since we import serve. return ray_usage_lib.record_library_usage("pre_init") ray.init() ray_usage_lib.record_library_usage("post_init") ray.workflow.init() ray.data.range(10) from ray import serve serve.start() library_usages = ray_usage_lib.get_library_usages_to_report( ray.experimental.internal_kv.internal_kv_get_gcs_client(), num_retries=20 ) tmp_path = ray._private.utils.get_ray_temp_dir() lib_usages_from_home_folder = ray_usage_lib.LibUsageRecorder( tmp_path ).read_lib_usages() expected = { "pre_init", "post_init", "dataset", "workflow", "serve", } assert set(library_usages) == expected assert set(lib_usages_from_home_folder) == expected serve.shutdown()
def test_library_usages(): if os.environ.get("RAY_MINIMAL") == "1": # Doesn't work with minimal installation # since we import serve. return ray_usage_lib._recorded_library_usages.clear() ray_usage_lib.record_library_usage("pre_init") ray.init() ray_usage_lib.record_library_usage("post_init") ray.workflow.init() ray.data.range(10) from ray import serve serve.start() library_usages = ray_usage_lib.get_library_usages_to_report( ray.experimental.internal_kv.internal_kv_get_gcs_client(), num_retries=20) assert set(library_usages) == { "pre_init", "post_init", "dataset", "workflow", "serve", } serve.shutdown() ray.shutdown()
def init( *, max_running_workflows: Optional[int] = None, max_pending_workflows: Optional[int] = None, ) -> None: """Initialize workflow. If Ray is not initialized, we will initialize Ray and use ``/tmp/ray/workflow_data`` as the default storage. Args: max_running_workflows: The maximum number of concurrently running workflows. Use -1 as infinity. 'None' means preserving previous setting or initialize the setting with infinity. max_pending_workflows: The maximum number of queued workflows. Use -1 as infinity. 'None' means preserving previous setting or initialize the setting with infinity. """ usage_lib.record_library_usage("workflow") if max_running_workflows is not None: if not isinstance(max_running_workflows, int): raise TypeError( "'max_running_workflows' must be None or an integer.") if max_running_workflows < -1 or max_running_workflows == 0: raise ValueError( "'max_running_workflows' must be a positive integer " "or use -1 as infinity.") if max_pending_workflows is not None: if not isinstance(max_pending_workflows, int): raise TypeError( "'max_pending_workflows' must be None or an integer.") if max_pending_workflows < -1: raise ValueError( "'max_pending_workflows' must be a non-negative integer " "or use -1 as infinity.") if not ray.is_initialized(): # We should use get_temp_dir_path, but for ray client, we don't # have this one. We need a flag to tell whether it's a client # or a driver to use the right dir. # For now, just use /tmp/ray/workflow_data ray.init(storage="file:///tmp/ray/workflow_data") workflow_access.init_management_actor(max_running_workflows, max_pending_workflows) serialization.init_manager()
def init() -> None: """Initialize workflow. If Ray is not initialized, we will initialize Ray and use ``/tmp/ray/workflow_data`` as the default storage. """ usage_lib.record_library_usage("workflow") if not ray.is_initialized(): # We should use get_temp_dir_path, but for ray client, we don't # have this one. We need a flag to tell whether it's a client # or a driver to use the right dir. # For now, just use /tmp/ray/workflow_data ray.init(storage="file:///tmp/ray/workflow_data") workflow_access.init_management_actor() serialization.init_manager() global _is_workflow_initialized _is_workflow_initialized = True
def start( detached: bool = False, http_options: Optional[Union[dict, HTTPOptions]] = None, dedicated_cpu: bool = False, _checkpoint_path: str = DEFAULT_CHECKPOINT_PATH, **kwargs, ) -> ServeControllerClient: """Initialize a serve instance. By default, the instance will be scoped to the lifetime of the returned Client object (or when the script exits). If detached is set to True, the instance will instead persist until serve.shutdown() is called. This is only relevant if connecting to a long-running Ray cluster (e.g., with ray.init(address="auto") or ray.init("ray://<remote_addr>")). Args: detached: Whether not the instance should be detached from this script. If set, the instance will live on the Ray cluster until it is explicitly stopped with serve.shutdown(). http_options (Optional[Dict, serve.HTTPOptions]): Configuration options for HTTP proxy. You can pass in a dictionary or HTTPOptions object with fields: - host(str, None): Host for HTTP servers to listen on. Defaults to "127.0.0.1". To expose Serve publicly, you probably want to set this to "0.0.0.0". - port(int): Port for HTTP server. Defaults to 8000. - root_path(str): Root path to mount the serve application (for example, "/serve"). All deployment routes will be prefixed with this path. Defaults to "". - middlewares(list): A list of Starlette middlewares that will be applied to the HTTP servers in the cluster. Defaults to []. - location(str, serve.config.DeploymentMode): The deployment location of HTTP servers: - "HeadOnly": start one HTTP server on the head node. Serve assumes the head node is the node you executed serve.start on. This is the default. - "EveryNode": start one HTTP server per node. - "NoServer" or None: disable HTTP server. - num_cpus (int): The number of CPU cores to reserve for each internal Serve HTTP proxy actor. Defaults to 0. dedicated_cpu: Whether to reserve a CPU core for the internal Serve controller actor. Defaults to False. """ usage_lib.record_library_usage("serve") http_deprecated_args = ["http_host", "http_port", "http_middlewares"] for key in http_deprecated_args: if key in kwargs: raise ValueError( f"{key} is deprecated, please use serve.start(http_options=" f'{{"{key}": {kwargs[key]}}}) instead.') # Initialize ray if needed. ray._private.worker.global_worker.filter_logs_by_job = False if not ray.is_initialized(): ray.init(namespace=SERVE_NAMESPACE) try: client = get_global_client(_health_check_controller=True) logger.info( f'Connecting to existing Serve app in namespace "{SERVE_NAMESPACE}".' ) _check_http_and_checkpoint_options(client, http_options, _checkpoint_path) return client except RayServeException: pass if detached: controller_name = SERVE_CONTROLLER_NAME else: controller_name = format_actor_name(get_random_letters(), SERVE_CONTROLLER_NAME) if isinstance(http_options, dict): http_options = HTTPOptions.parse_obj(http_options) if http_options is None: http_options = HTTPOptions() controller = ServeController.options( num_cpus=1 if dedicated_cpu else 0, name=controller_name, lifetime="detached" if detached else None, max_restarts=-1, max_task_retries=-1, # Pin Serve controller on the head node. resources={ get_current_node_resource_key(): 0.01 }, namespace=SERVE_NAMESPACE, max_concurrency=CONTROLLER_MAX_CONCURRENCY, ).remote( controller_name, http_options, _checkpoint_path, detached=detached, ) proxy_handles = ray.get(controller.get_http_proxies.remote()) if len(proxy_handles) > 0: try: ray.get( [handle.ready.remote() for handle in proxy_handles.values()], timeout=HTTP_PROXY_TIMEOUT, ) except ray.exceptions.GetTimeoutError: raise TimeoutError( f"HTTP proxies not available after {HTTP_PROXY_TIMEOUT}s.") client = ServeControllerClient( controller, controller_name, detached=detached, ) set_global_client(client) logger.info(f"Started{' detached ' if detached else ' '}Serve instance in " f'namespace "{SERVE_NAMESPACE}".') return client
from ray.train.callbacks import TrainingCallback from ray.train.checkpoint import CheckpointStrategy from ray.train.session import ( get_dataset_shard, local_rank, load_checkpoint, report, save_checkpoint, world_rank, world_size, ) from ray.train.trainer import Trainer, TrainingIterator from ray._private.usage import usage_lib usage_lib.record_library_usage("train") __all__ = [ "BackendConfig", "CheckpointStrategy", "get_dataset_shard", "load_checkpoint", "local_rank", "report", "save_checkpoint", "TrainingIterator", "TrainingCallback", "Trainer", "world_rank", "world_size", ]
class _SeeContrib(Trainer): def setup(self, config): raise NameError("Please run `contrib/{}` instead.".format(name)) return _SeeContrib # Also register the aliases minus contrib/ to give a good error message. for key in list(CONTRIBUTED_ALGORITHMS.keys()): assert key.startswith("contrib/") alias = key.split("/", 1)[1] if alias not in ALGORITHMS: register_trainable(alias, _see_contrib(alias)) _setup_logger() usage_lib.record_library_usage("rllib") __all__ = [ "Policy", "TFPolicy", "TorchPolicy", "RolloutWorker", "SampleBatch", "BaseEnv", "MultiAgentEnv", "VectorEnv", "ExternalEnv", ]
lograndint, qrandint, qlograndint, randn, qrandn, loguniform, qloguniform, ) from ray.tune.suggest import create_searcher from ray.tune.schedulers import create_scheduler from ray.tune.execution.placement_groups import PlacementGroupFactory from ray.tune.trainable.util import with_parameters from ray._private.usage import usage_lib usage_lib.record_library_usage("tune") __all__ = [ "Trainable", "DurableTrainable", "durable", "Callback", "TuneError", "grid_search", "register_env", "register_trainable", "run", "run_experiments", "with_parameters", "Stopper", "Experiment",