def test_rllib_integration(ray_start_regular_shared): with ray_start_client_server(): # Confirming the behavior of this context manager. # (Client mode hook not yet enabled.) assert not client_mode_should_convert() # Need to enable this for client APIs to be used. _explicitly_enable_client_mode() # Confirming mode hook is enabled. assert client_mode_should_convert() rock_paper_scissors_multiagent.main()
def test_client_context_manager(ray_start_regular_shared, connect_to_client): import ray with connect_to_client_or_not(connect_to_client): if connect_to_client: # Client mode is on. assert client_mode_should_convert(auto_init=True) # We're connected to Ray client. assert ray.util.client.ray.is_connected() else: assert not client_mode_should_convert(auto_init=True) assert not ray.util.client.ray.is_connected()
def test_rllib_integration_tune(ray_start_regular_shared): with ray_start_client_server(): # Confirming the behavior of this context manager. # (Client mode hook not yet enabled.) assert not client_mode_should_convert(auto_init=True) # Need to enable this for client APIs to be used. with enable_client_mode(): # Confirming mode hook is enabled. assert client_mode_should_convert(auto_init=True) tune.run("DQN", config={"env": "CartPole-v1"}, stop={"training_iteration": 2})
def test_client_mode_hook_thread_safe(ray_start_regular_shared): with ray_start_client_server(): with enable_client_mode(): assert client_mode_should_convert(auto_init=True) lock = threading.Lock() lock.acquire() q = queue.Queue() def disable(): with disable_client_hook(): q.put(client_mode_should_convert(auto_init=True)) lock.acquire() q.put(client_mode_should_convert(auto_init=True)) t = threading.Thread(target=disable) t.start() assert client_mode_should_convert(auto_init=True) lock.release() t.join() assert q.get() is False, "Threaded disable_client_hook failed to disable" assert q.get() is True, "Threaded disable_client_hook failed to re-enable"
def test_rllib_integration(ray_start_regular_shared): with ray_start_client_server(): import ray.rllib.agents.dqn as dqn # Confirming the behavior of this context manager. # (Client mode hook not yet enabled.) assert not client_mode_should_convert() # Need to enable this for client APIs to be used. with enable_client_mode(): # Confirming mode hook is enabled. assert client_mode_should_convert() config = dqn.SIMPLE_Q_DEFAULT_CONFIG.copy() # Run locally. config["num_workers"] = 0 # Test with compression. config["compress_observations"] = True num_iterations = 2 trainer = dqn.SimpleQTrainer(config=config, env="CartPole-v1") rw = trainer.workers.local_worker() for i in range(num_iterations): sb = rw.sample() assert sb.count == config["rollout_fragment_length"] trainer.train()
def get_current_placement_group() -> Optional[PlacementGroup]: """Get the current placement group which a task or actor is using. It returns None if there's no current placement group for the worker. For example, if you call this method in your driver, it returns None (because drivers never belong to any placement group). Examples: >>> import ray >>> from ray.util.placement_group import PlacementGroup >>> from ray.util.placement_group import get_current_placement_group >>> @ray.remote # doctest: +SKIP ... def f(): # doctest: +SKIP ... # This will return the placement group the task f belongs to. ... # It means this pg will be identical to the pg created below. ... pg = get_current_placement_group() # doctest: +SKIP >>> pg = PlacementGroup([{"CPU": 2}]) # doctest: +SKIP >>> f.options(placement_group=pg).remote() # doctest: +SKIP >>> # New script. >>> ray.init() # doctest: +SKIP >>> # New script doesn't belong to any placement group, >>> # so it returns None. >>> assert get_current_placement_group() is None # doctest: +SKIP Return: PlacementGroup: Placement group object. None if the current task or actor wasn't created with any placement group. """ if client_mode_should_convert(auto_init=True): # Client mode is only a driver. return None worker = ray._private.worker.global_worker worker.check_connected() pg_id = worker.placement_group_id if pg_id.is_nil(): return None return PlacementGroup(pg_id)
def _remote(self, args=None, kwargs=None, num_returns=None, num_cpus=None, num_gpus=None, memory=None, object_store_memory=None, accelerator_type=None, resources=None, max_retries=None, placement_group=None, placement_group_bundle_index=-1, placement_group_capture_child_tasks=None, runtime_env=None, override_environment_variables=None, name=""): """Submit the remote function for execution.""" if client_mode_should_convert(): return client_mode_convert_function( self, args, kwargs, num_returns=num_returns, num_cpus=num_cpus, num_gpus=num_gpus, memory=memory, object_store_memory=object_store_memory, accelerator_type=accelerator_type, resources=resources, max_retries=max_retries, placement_group=placement_group, placement_group_bundle_index=placement_group_bundle_index, placement_group_capture_child_tasks=( placement_group_capture_child_tasks), runtime_env=runtime_env, override_environment_variables=override_environment_variables, name=name) worker = ray.worker.global_worker worker.check_connected() # If this function was not exported in this session and job, we need to # export this function again, because the current GCS doesn't have it. if not self._is_cross_language and \ self._last_export_session_and_job != \ worker.current_session_and_job: # There is an interesting question here. If the remote function is # used by a subsequent driver (in the same script), should the # second driver pickle the function again? If yes, then the remote # function definition can differ in the second driver (e.g., if # variables in its closure have changed). We probably want the # behavior of the remote function in the second driver to be # independent of whether or not the function was invoked by the # first driver. This is an argument for repickling the function, # which we do here. self._pickled_function = pickle.dumps(self._function) self._function_descriptor = PythonFunctionDescriptor.from_function( self._function, self._pickled_function) self._last_export_session_and_job = worker.current_session_and_job worker.function_actor_manager.export(self) kwargs = {} if kwargs is None else kwargs args = [] if args is None else args if num_returns is None: num_returns = self._num_returns if max_retries is None: max_retries = self._max_retries if placement_group_capture_child_tasks is None: placement_group_capture_child_tasks = ( worker.should_capture_child_tasks_in_placement_group) if placement_group is None: if placement_group_capture_child_tasks: placement_group = get_current_placement_group() if not placement_group: placement_group = PlacementGroup.empty() check_placement_group_index(placement_group, placement_group_bundle_index) resources = ray._private.utils.resources_from_resource_arguments( self._num_cpus, self._num_gpus, self._memory, self._object_store_memory, self._resources, self._accelerator_type, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type) if runtime_env: parsed_runtime_env = runtime_support.RuntimeEnvDict(runtime_env) override_environment_variables = ( parsed_runtime_env.to_worker_env_vars( override_environment_variables)) else: parsed_runtime_env = runtime_support.RuntimeEnvDict({}) def invocation(args, kwargs): if self._is_cross_language: list_args = cross_language.format_args(worker, args, kwargs) elif not args and not kwargs and not self._function_signature: list_args = [] else: list_args = ray._private.signature.flatten_args( self._function_signature, args, kwargs) if worker.mode == ray.worker.LOCAL_MODE: assert not self._is_cross_language, \ "Cross language remote function " \ "cannot be executed locally." object_refs = worker.core_worker.submit_task( self._language, self._function_descriptor, list_args, name, num_returns, resources, max_retries, placement_group.id, placement_group_bundle_index, placement_group_capture_child_tasks, worker.debugger_breakpoint, parsed_runtime_env, override_environment_variables=override_environment_variables or dict()) # Reset worker's debug context from the last "remote" command # (which applies only to this .remote call). worker.debugger_breakpoint = b"" if len(object_refs) == 1: return object_refs[0] elif len(object_refs) > 1: return object_refs if self._decorator is not None: invocation = self._decorator(invocation) return invocation(args, kwargs)
def _remote(self, args=None, kwargs=None, **task_options): """Submit the remote function for execution.""" # We pop the "max_calls" coming from "@ray.remote" here. We no longer need # it in "_remote()". task_options.pop("max_calls", None) if client_mode_should_convert(auto_init=True): return client_mode_convert_function(self, args, kwargs, **task_options) worker = ray.worker.global_worker worker.check_connected() # If this function was not exported in this session and job, we need to # export this function again, because the current GCS doesn't have it. if (not self._is_cross_language and self._last_export_session_and_job != worker.current_session_and_job): self._function_descriptor = PythonFunctionDescriptor.from_function( self._function, self._uuid) # There is an interesting question here. If the remote function is # used by a subsequent driver (in the same script), should the # second driver pickle the function again? If yes, then the remote # function definition can differ in the second driver (e.g., if # variables in its closure have changed). We probably want the # behavior of the remote function in the second driver to be # independent of whether or not the function was invoked by the # first driver. This is an argument for repickling the function, # which we do here. try: self._pickled_function = pickle.dumps(self._function) except TypeError as e: msg = ( "Could not serialize the function " f"{self._function_descriptor.repr}. Check " "https://docs.ray.io/en/master/ray-core/objects/serialization.html#troubleshooting " # noqa "for more information.") raise TypeError(msg) from e self._last_export_session_and_job = worker.current_session_and_job worker.function_actor_manager.export(self) kwargs = {} if kwargs is None else kwargs args = [] if args is None else args # fill task required options for k, v in ray_option_utils.task_options.items(): task_options[k] = task_options.get(k, v.default_value) # "max_calls" already takes effects and should not apply again. # Remove the default value here. task_options.pop("max_calls", None) # TODO(suquark): cleanup these fields name = task_options["name"] runtime_env = parse_runtime_env(task_options["runtime_env"]) placement_group = task_options["placement_group"] placement_group_bundle_index = task_options[ "placement_group_bundle_index"] placement_group_capture_child_tasks = task_options[ "placement_group_capture_child_tasks"] scheduling_strategy = task_options["scheduling_strategy"] num_returns = task_options["num_returns"] max_retries = task_options["max_retries"] retry_exceptions = task_options["retry_exceptions"] resources = ray._private.utils.resources_from_ray_options(task_options) if scheduling_strategy is None or isinstance( scheduling_strategy, PlacementGroupSchedulingStrategy): if isinstance(scheduling_strategy, PlacementGroupSchedulingStrategy): placement_group = scheduling_strategy.placement_group placement_group_bundle_index = ( scheduling_strategy.placement_group_bundle_index) placement_group_capture_child_tasks = ( scheduling_strategy.placement_group_capture_child_tasks) if placement_group_capture_child_tasks is None: placement_group_capture_child_tasks = ( worker.should_capture_child_tasks_in_placement_group) placement_group = configure_placement_group_based_on_context( placement_group_capture_child_tasks, placement_group_bundle_index, resources, {}, # no placement_resources for tasks self._function_descriptor.function_name, placement_group=placement_group, ) if not placement_group.is_empty: scheduling_strategy = PlacementGroupSchedulingStrategy( placement_group, placement_group_bundle_index, placement_group_capture_child_tasks, ) else: scheduling_strategy = "DEFAULT" serialized_runtime_env_info = None if runtime_env is not None: serialized_runtime_env_info = get_runtime_env_info( runtime_env, is_job_runtime_env=False, serialize=True, ) def invocation(args, kwargs): if self._is_cross_language: list_args = cross_language.format_args(worker, args, kwargs) elif not args and not kwargs and not self._function_signature: list_args = [] else: list_args = ray._private.signature.flatten_args( self._function_signature, args, kwargs) if worker.mode == ray.worker.LOCAL_MODE: assert ( not self._is_cross_language ), "Cross language remote function cannot be executed locally." object_refs = worker.core_worker.submit_task( self._language, self._function_descriptor, list_args, name if name is not None else "", num_returns, resources, max_retries, retry_exceptions, scheduling_strategy, worker.debugger_breakpoint, serialized_runtime_env_info or "{}", ) # Reset worker's debug context from the last "remote" command # (which applies only to this .remote call). worker.debugger_breakpoint = b"" if len(object_refs) == 1: return object_refs[0] elif len(object_refs) > 1: return object_refs if self._decorator is not None: invocation = self._decorator(invocation) return invocation(args, kwargs)
def _remote(self, args=None, kwargs=None, num_cpus=None, num_gpus=None, memory=None, object_store_memory=None, resources=None, accelerator_type=None, max_concurrency=None, max_restarts=None, max_task_retries=None, name=None, namespace=None, lifetime=None, placement_group="default", placement_group_bundle_index=-1, placement_group_capture_child_tasks=None, runtime_env=None): """Create an actor. This method allows more flexibility than the remote method because resource requirements can be specified and override the defaults in the decorator. Args: args: The arguments to forward to the actor constructor. kwargs: The keyword arguments to forward to the actor constructor. num_cpus: The number of CPUs required by the actor creation task. num_gpus: The number of GPUs required by the actor creation task. memory: Restrict the heap memory usage of this actor. object_store_memory: Restrict the object store memory used by this actor when creating objects. resources: The custom resources required by the actor creation task. max_concurrency: The max number of concurrent calls to allow for this actor. This only works with direct actor calls. The max concurrency defaults to 1 for threaded execution, and 1000 for asyncio execution. Note that the execution order is not guaranteed when max_concurrency > 1. name: The globally unique name for the actor, which can be used to retrieve the actor via ray.get_actor(name) as long as the actor is still alive. namespace: Override the namespace to use for the actor. By default, actors are created in an anonymous namespace. The actor can be retrieved via ray.get_actor(name=name, namespace=namespace). lifetime: Either `None`, which defaults to the actor will fate share with its creator and will be deleted once its refcount drops to zero, or "detached", which means the actor will live as a global object independent of the creator. placement_group: the placement group this actor belongs to, or None if it doesn't belong to any group. Setting to "default" autodetects the placement group based on the current setting of placement_group_capture_child_tasks. placement_group_bundle_index: the index of the bundle if the actor belongs to a placement group, which may be -1 to specify any available bundle. placement_group_capture_child_tasks: Whether or not children tasks of this actor should implicitly use the same placement group as its parent. It is True by default. runtime_env (Dict[str, Any]): Specifies the runtime environment for this actor or task and its children (see :ref:`runtime-environments` for details). This API is in beta and may change before becoming stable. Returns: A handle to the newly created actor. """ if args is None: args = [] if kwargs is None: kwargs = {} meta = self.__ray_metadata__ actor_has_async_methods = len( inspect.getmembers(meta.modified_class, predicate=inspect.iscoroutinefunction)) > 0 is_asyncio = actor_has_async_methods if max_concurrency is None: if is_asyncio: max_concurrency = 1000 else: max_concurrency = 1 if max_concurrency < 1: raise ValueError("max_concurrency must be >= 1") if client_mode_should_convert(auto_init=True): return client_mode_convert_actor( self, args, kwargs, num_cpus=num_cpus, num_gpus=num_gpus, memory=memory, object_store_memory=object_store_memory, resources=resources, accelerator_type=accelerator_type, max_concurrency=max_concurrency, max_restarts=max_restarts, max_task_retries=max_task_retries, name=name, namespace=namespace, lifetime=lifetime, placement_group=placement_group, placement_group_bundle_index=placement_group_bundle_index, placement_group_capture_child_tasks=( placement_group_capture_child_tasks), runtime_env=runtime_env) worker = ray.worker.global_worker worker.check_connected() if name is not None: if not isinstance(name, str): raise TypeError( f"name must be None or a string, got: '{type(name)}'.") elif name == "": raise ValueError("Actor name cannot be an empty string.") if namespace is not None: ray._private.utils.validate_namespace(namespace) # Check whether the name is already taken. # TODO(edoakes): this check has a race condition because two drivers # could pass the check and then create the same named actor. We should # instead check this when we create the actor, but that's currently an # async call. if name is not None: try: ray.get_actor(name, namespace=namespace) except ValueError: # Name is not taken. pass else: raise ValueError( f"The name {name} (namespace={namespace}) is already " "taken. Please use " "a different name or get the existing actor using " f"ray.get_actor('{name}', namespace='{namespace}')") if lifetime is None: detached = False elif lifetime == "detached": detached = True else: raise ValueError( "actor `lifetime` argument must be either `None` or 'detached'" ) # Set the actor's default resources if not already set. First three # conditions are to check that no resources were specified in the # decorator. Last three conditions are to check that no resources were # specified when _remote() was called. if (meta.num_cpus is None and meta.num_gpus is None and meta.resources is None and meta.accelerator_type is None and num_cpus is None and num_gpus is None and resources is None and accelerator_type is None): # In the default case, actors acquire no resources for # their lifetime, and actor methods will require 1 CPU. cpus_to_use = ray_constants.DEFAULT_ACTOR_CREATION_CPU_SIMPLE actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SIMPLE else: # If any resources are specified (here or in decorator), then # all resources are acquired for the actor's lifetime and no # resources are associated with methods. cpus_to_use = (ray_constants.DEFAULT_ACTOR_CREATION_CPU_SPECIFIED if meta.num_cpus is None else meta.num_cpus) actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SPECIFIED # LOCAL_MODE cannot handle cross_language if worker.mode == ray.LOCAL_MODE: assert not meta.is_cross_language, \ "Cross language ActorClass cannot be executed locally." # Export the actor. if not meta.is_cross_language and (meta.last_export_session_and_job != worker.current_session_and_job): # If this actor class was not exported in this session and job, # we need to export this function again, because current GCS # doesn't have it. meta.last_export_session_and_job = (worker.current_session_and_job) # After serialize / deserialize modified class, the __module__ # of modified class will be ray.cloudpickle.cloudpickle. # So, here pass actor_creation_function_descriptor to make # sure export actor class correct. worker.function_actor_manager.export_actor_class( meta.modified_class, meta.actor_creation_function_descriptor, meta.method_meta.methods.keys()) resources = ray._private.utils.resources_from_resource_arguments( cpus_to_use, meta.num_gpus, meta.memory, meta.object_store_memory, meta.resources, meta.accelerator_type, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type) # If the actor methods require CPU resources, then set the required # placement resources. If actor_placement_resources is empty, then # the required placement resources will be the same as resources. actor_placement_resources = {} assert actor_method_cpu in [0, 1] if actor_method_cpu == 1: actor_placement_resources = resources.copy() actor_placement_resources["CPU"] += 1 if meta.is_cross_language: creation_args = cross_language.format_args(worker, args, kwargs) else: function_signature = meta.method_meta.signatures["__init__"] creation_args = signature.flatten_args(function_signature, args, kwargs) if placement_group_capture_child_tasks is None: placement_group_capture_child_tasks = ( worker.should_capture_child_tasks_in_placement_group) placement_group = configure_placement_group_based_on_context( placement_group_capture_child_tasks, placement_group_bundle_index, resources, actor_placement_resources, meta.class_name, placement_group=placement_group) if runtime_env: if isinstance(runtime_env, str): # Serialzed protobuf runtime env from Ray client. new_runtime_env = runtime_env elif isinstance(runtime_env, ParsedRuntimeEnv): new_runtime_env = runtime_env.serialize() else: raise TypeError(f"Error runtime env type {type(runtime_env)}") else: new_runtime_env = meta.runtime_env concurrency_groups_dict = {} for cg_name in meta.concurrency_groups: concurrency_groups_dict[cg_name] = { "name": cg_name, "max_concurrency": meta.concurrency_groups[cg_name], "function_descriptors": [], } # Update methods for method_name in meta.method_meta.concurrency_group_for_methods: cg_name = meta.method_meta.concurrency_group_for_methods[ method_name] assert cg_name in concurrency_groups_dict module_name = meta.actor_creation_function_descriptor.module_name class_name = meta.actor_creation_function_descriptor.class_name concurrency_groups_dict[cg_name]["function_descriptors"].append( PythonFunctionDescriptor(module_name, method_name, class_name)) actor_id = worker.core_worker.create_actor( meta.language, meta.actor_creation_function_descriptor, creation_args, max_restarts or meta.max_restarts, max_task_retries or meta.max_task_retries, resources, actor_placement_resources, max_concurrency, detached, name if name is not None else "", namespace if namespace is not None else "", is_asyncio, placement_group.id, placement_group_bundle_index, placement_group_capture_child_tasks, # Store actor_method_cpu in actor handle's extension data. extension_data=str(actor_method_cpu), serialized_runtime_env=new_runtime_env or "{}", concurrency_groups_dict=concurrency_groups_dict or dict()) actor_handle = ActorHandle(meta.language, actor_id, meta.method_meta.decorators, meta.method_meta.signatures, meta.method_meta.num_returns, actor_method_cpu, meta.actor_creation_function_descriptor, worker.current_session_and_job, original_handle=True) return actor_handle
def disable(): with disable_client_hook(): q.put(client_mode_should_convert(auto_init=True)) lock.acquire() q.put(client_mode_should_convert(auto_init=True))
def _remote( self, args=None, kwargs=None, num_returns=None, num_cpus=None, num_gpus=None, memory=None, object_store_memory=None, accelerator_type=None, resources=None, max_retries=None, retry_exceptions=None, placement_group="default", placement_group_bundle_index=-1, placement_group_capture_child_tasks=None, runtime_env=None, name="", scheduling_strategy: SchedulingStrategyT = None, ): """Submit the remote function for execution.""" if client_mode_should_convert(auto_init=True): return client_mode_convert_function( self, args, kwargs, num_returns=num_returns, num_cpus=num_cpus, num_gpus=num_gpus, memory=memory, object_store_memory=object_store_memory, accelerator_type=accelerator_type, resources=resources, max_retries=max_retries, retry_exceptions=retry_exceptions, placement_group=placement_group, placement_group_bundle_index=placement_group_bundle_index, placement_group_capture_child_tasks=( placement_group_capture_child_tasks), runtime_env=runtime_env, name=name, scheduling_strategy=scheduling_strategy, ) worker = ray.worker.global_worker worker.check_connected() # If this function was not exported in this session and job, we need to # export this function again, because the current GCS doesn't have it. if (not self._is_cross_language and self._last_export_session_and_job != worker.current_session_and_job): self._function_descriptor = PythonFunctionDescriptor.from_function( self._function, self._uuid) # There is an interesting question here. If the remote function is # used by a subsequent driver (in the same script), should the # second driver pickle the function again? If yes, then the remote # function definition can differ in the second driver (e.g., if # variables in its closure have changed). We probably want the # behavior of the remote function in the second driver to be # independent of whether or not the function was invoked by the # first driver. This is an argument for repickling the function, # which we do here. try: self._pickled_function = pickle.dumps(self._function) except TypeError as e: msg = ( "Could not serialize the function " f"{self._function_descriptor.repr}. Check " "https://docs.ray.io/en/master/serialization.html#troubleshooting " # noqa "for more information.") raise TypeError(msg) from e self._last_export_session_and_job = worker.current_session_and_job worker.function_actor_manager.export(self) kwargs = {} if kwargs is None else kwargs args = [] if args is None else args if num_returns is None: num_returns = self._num_returns if max_retries is None: max_retries = self._max_retries if retry_exceptions is None: retry_exceptions = self._retry_exceptions if scheduling_strategy is None: scheduling_strategy = self._scheduling_strategy resources = ray._private.utils.resources_from_resource_arguments( self._num_cpus, self._num_gpus, self._memory, self._object_store_memory, self._resources, self._accelerator_type, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type, ) if (placement_group != "default") and (scheduling_strategy is not None): raise ValueError("Placement groups should be specified via the " "scheduling_strategy option. " "The placement_group option is deprecated.") if scheduling_strategy is None or isinstance( scheduling_strategy, PlacementGroupSchedulingStrategy): if isinstance(scheduling_strategy, PlacementGroupSchedulingStrategy): placement_group = scheduling_strategy.placement_group placement_group_bundle_index = ( scheduling_strategy.placement_group_bundle_index) placement_group_capture_child_tasks = ( scheduling_strategy.placement_group_capture_child_tasks) if placement_group_capture_child_tasks is None: placement_group_capture_child_tasks = ( worker.should_capture_child_tasks_in_placement_group) if placement_group == "default": placement_group = self._placement_group placement_group = configure_placement_group_based_on_context( placement_group_capture_child_tasks, placement_group_bundle_index, resources, {}, # no placement_resources for tasks self._function_descriptor.function_name, placement_group=placement_group, ) if not placement_group.is_empty: scheduling_strategy = PlacementGroupSchedulingStrategy( placement_group, placement_group_bundle_index, placement_group_capture_child_tasks, ) else: scheduling_strategy = DEFAULT_SCHEDULING_STRATEGY if not runtime_env or runtime_env == "{}": runtime_env = self._runtime_env def invocation(args, kwargs): if self._is_cross_language: list_args = cross_language.format_args(worker, args, kwargs) elif not args and not kwargs and not self._function_signature: list_args = [] else: list_args = ray._private.signature.flatten_args( self._function_signature, args, kwargs) if worker.mode == ray.worker.LOCAL_MODE: assert not self._is_cross_language, ( "Cross language remote function " "cannot be executed locally.") object_refs = worker.core_worker.submit_task( self._language, self._function_descriptor, list_args, name, num_returns, resources, max_retries, retry_exceptions, scheduling_strategy, worker.debugger_breakpoint, runtime_env or "{}", ) # Reset worker's debug context from the last "remote" command # (which applies only to this .remote call). worker.debugger_breakpoint = b"" if len(object_refs) == 1: return object_refs[0] elif len(object_refs) > 1: return object_refs if self._decorator is not None: invocation = self._decorator(invocation) return invocation(args, kwargs)
def _remote(self, args=None, kwargs=None, num_cpus=None, num_gpus=None, memory=None, object_store_memory=None, resources=None, accelerator_type=None, max_concurrency=None, max_restarts=None, max_task_retries=None, name=None, lifetime=None, placement_group="default", placement_group_bundle_index=-1, placement_group_capture_child_tasks=None, runtime_env=None, override_environment_variables=None): """Create an actor. This method allows more flexibility than the remote method because resource requirements can be specified and override the defaults in the decorator. Args: args: The arguments to forward to the actor constructor. kwargs: The keyword arguments to forward to the actor constructor. num_cpus: The number of CPUs required by the actor creation task. num_gpus: The number of GPUs required by the actor creation task. memory: Restrict the heap memory usage of this actor. object_store_memory: Restrict the object store memory used by this actor when creating objects. resources: The custom resources required by the actor creation task. max_concurrency: The max number of concurrent calls to allow for this actor. This only works with direct actor calls. The max concurrency defaults to 1 for threaded execution, and 1000 for asyncio execution. Note that the execution order is not guaranteed when max_concurrency > 1. name: The globally unique name for the actor, which can be used to retrieve the actor via ray.get_actor(name) as long as the actor is still alive. Names may not contain '/'. lifetime: Either `None`, which defaults to the actor will fate share with its creator and will be deleted once its refcount drops to zero, or "detached", which means the actor will live as a global object independent of the creator. placement_group: the placement group this actor belongs to, or None if it doesn't belong to any group. Setting to "default" autodetects the placement group based on the current setting of placement_group_capture_child_tasks. placement_group_bundle_index: the index of the bundle if the actor belongs to a placement group, which may be -1 to specify any available bundle. placement_group_capture_child_tasks: Whether or not children tasks of this actor should implicitly use the same placement group as its parent. It is True by default. runtime_env (Dict[str, Any]): Specifies the runtime environment for this actor or task and its children (see ``runtime_env.py`` for more details). override_environment_variables: Environment variables to override and/or introduce for this actor. This is a dictionary mapping variable names to their values. Returns: A handle to the newly created actor. """ if args is None: args = [] if kwargs is None: kwargs = {} meta = self.__ray_metadata__ actor_has_async_methods = len( inspect.getmembers(meta.modified_class, predicate=inspect.iscoroutinefunction)) > 0 is_asyncio = actor_has_async_methods if max_concurrency is None: if is_asyncio: max_concurrency = 1000 else: max_concurrency = 1 if max_concurrency < 1: raise ValueError("max_concurrency must be >= 1") if client_mode_should_convert(): return client_mode_convert_actor( self, args, kwargs, num_cpus=num_cpus, num_gpus=num_gpus, memory=memory, object_store_memory=object_store_memory, resources=resources, accelerator_type=accelerator_type, max_concurrency=max_concurrency, max_restarts=max_restarts, max_task_retries=max_task_retries, name=name, lifetime=lifetime, placement_group=placement_group, placement_group_bundle_index=placement_group_bundle_index, placement_group_capture_child_tasks=( placement_group_capture_child_tasks), runtime_env=runtime_env, override_environment_variables=( override_environment_variables)) worker = ray.worker.global_worker worker.check_connected() if name is not None: if not isinstance(name, str): raise TypeError( f"name must be None or a string, got: '{type(name)}'.") elif name == "": raise ValueError("Actor name cannot be an empty string.") split_names = name.split("/", maxsplit=1) if len(split_names) <= 1: name = split_names[0] namespace = "" else: # must be length 2 namespace, name = split_names if "/" in name: raise ValueError("Actor name may not contain '/'.") else: namespace = "" # Check whether the name is already taken. # TODO(edoakes): this check has a race condition because two drivers # could pass the check and then create the same named actor. We should # instead check this when we create the actor, but that's currently an # async call. if name is not None: try: ray.get_actor(name) except ValueError: # Name is not taken. pass else: raise ValueError( f"The name {name} is already taken. Please use " "a different name or get the existing actor using " f"ray.get_actor('{name}')") if lifetime is None: detached = False elif lifetime == "detached": detached = True else: raise ValueError( "actor `lifetime` argument must be either `None` or 'detached'" ) if placement_group_capture_child_tasks is None: placement_group_capture_child_tasks = ( worker.should_capture_child_tasks_in_placement_group) if placement_group == "default": if placement_group_capture_child_tasks: placement_group = get_current_placement_group() else: placement_group = PlacementGroup.empty() if not placement_group: placement_group = PlacementGroup.empty() check_placement_group_index(placement_group, placement_group_bundle_index) # Set the actor's default resources if not already set. First three # conditions are to check that no resources were specified in the # decorator. Last three conditions are to check that no resources were # specified when _remote() was called. if (meta.num_cpus is None and meta.num_gpus is None and meta.resources is None and meta.accelerator_type is None and num_cpus is None and num_gpus is None and resources is None and accelerator_type is None): # In the default case, actors acquire no resources for # their lifetime, and actor methods will require 1 CPU. cpus_to_use = ray_constants.DEFAULT_ACTOR_CREATION_CPU_SIMPLE actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SIMPLE else: # If any resources are specified (here or in decorator), then # all resources are acquired for the actor's lifetime and no # resources are associated with methods. cpus_to_use = (ray_constants.DEFAULT_ACTOR_CREATION_CPU_SPECIFIED if meta.num_cpus is None else meta.num_cpus) actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SPECIFIED # LOCAL_MODE cannot handle cross_language if worker.mode == ray.LOCAL_MODE: assert not meta.is_cross_language, \ "Cross language ActorClass cannot be executed locally." # Export the actor. if not meta.is_cross_language and (meta.last_export_session_and_job != worker.current_session_and_job): # If this actor class was not exported in this session and job, # we need to export this function again, because current GCS # doesn't have it. meta.last_export_session_and_job = (worker.current_session_and_job) # After serialize / deserialize modified class, the __module__ # of modified class will be ray.cloudpickle.cloudpickle. # So, here pass actor_creation_function_descriptor to make # sure export actor class correct. worker.function_actor_manager.export_actor_class( meta.modified_class, meta.actor_creation_function_descriptor, meta.method_meta.methods.keys()) resources = ray._private.utils.resources_from_resource_arguments( cpus_to_use, meta.num_gpus, meta.memory, meta.object_store_memory, meta.resources, meta.accelerator_type, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type) # If the actor methods require CPU resources, then set the required # placement resources. If actor_placement_resources is empty, then # the required placement resources will be the same as resources. actor_placement_resources = {} assert actor_method_cpu in [0, 1] if actor_method_cpu == 1: actor_placement_resources = resources.copy() actor_placement_resources["CPU"] += 1 if meta.is_cross_language: creation_args = cross_language.format_args(worker, args, kwargs) else: function_signature = meta.method_meta.signatures["__init__"] creation_args = signature.flatten_args(function_signature, args, kwargs) if runtime_env is None: runtime_env = meta.runtime_env if runtime_env: if runtime_env.get("working_dir"): raise NotImplementedError( "Overriding working_dir for actors is not supported. " "Please use ray.init(runtime_env={'working_dir': ...}) " "to configure per-job environment instead.") runtime_env_dict = runtime_support.RuntimeEnvDict( runtime_env).get_parsed_dict() else: runtime_env_dict = {} if override_environment_variables: logger.warning("override_environment_variables is deprecated and " "will be removed in Ray 1.6. Please use " ".options(runtime_env={'env_vars': {...}}).remote()" "instead.") actor_id = worker.core_worker.create_actor( meta.language, meta.actor_creation_function_descriptor, creation_args, max_restarts or meta.max_restarts, max_task_retries or meta.max_task_retries, resources, actor_placement_resources, max_concurrency, detached, name if name is not None else "", namespace, is_asyncio, placement_group.id, placement_group_bundle_index, placement_group_capture_child_tasks, # Store actor_method_cpu in actor handle's extension data. extension_data=str(actor_method_cpu), runtime_env_dict=runtime_env_dict, override_environment_variables=override_environment_variables or dict()) actor_handle = ActorHandle(meta.language, actor_id, meta.method_meta.decorators, meta.method_meta.signatures, meta.method_meta.num_returns, actor_method_cpu, meta.actor_creation_function_descriptor, worker.current_session_and_job, original_handle=True) return actor_handle