def unique_nodes_pg() -> int: nodes = [] if get_current_placement_group() is None: return 0 else: for bundle in get_current_placement_group().bundle_specs: for resource in bundle: if "node" in resource: nodes.append(resource) return len(set(nodes))
def _create_placement_group(self, num_workers): """Creates a placement group for the workers. If this worker is already in a placement group then a new one will not be created. This is primarily for when Tune is the upstream and will allocate resources for SGD workers. If this worker is not in a placement group, a new one will be created and set. The placement group will have a single bundle for each worker and use the SPREAD strategy for an even distribution. """ pg = get_current_placement_group() if pg is None: bundle = { "CPU": self._num_cpus_per_worker, "GPU": int(self._use_gpu) } bundles = [bundle] * num_workers pg = ray.util.placement_group(bundles, strategy="SPREAD") logger.debug("Waiting for placement group to start.") ready, _ = ray.wait([pg.ready()], timeout=SGD_PLACEMENT_GROUP_TIMEOUT_S) if ready: logger.debug("Placement group has started.") else: raise TimeoutError( "Placement group creation timed out. Make sure " "your cluster either has enough resources or use " "an autoscaling cluster. Current resources " "available: {}, resources requested by the " "placement group: {}".format(ray.available_resources(), pg.bundle_specs)) self._worker_placement_group = pg
def schedule_nested_actor(self): # Make sure we can capture the current placement group. assert get_current_placement_group() is not None # Actors should be implicitly captured. actor = NestedActor.remote() ray.get(actor.ready.remote()) self.actors.append(actor)
def _create_strategy(self): assert self.num_workers is None or self.num_hosts is None use_pg = self.use_current_placement_group and get_current_placement_group( ) if self.num_workers or use_pg: if use_pg: logger.info("Found an existing placement group, inheriting. " "You can disable this behavior by setting " "`use_current_placement_group=False`.") num_workers = self.num_workers or self.num_workers_per_host * self.num_hosts return PGStrategy(settings=self.settings, num_workers=num_workers, use_gpu=self.use_gpu, cpus_per_worker=self.cpus_per_worker, gpus_per_worker=self.gpus_per_worker, force_create_placement_group=not self. use_current_placement_group) else: return ColocatedStrategy( settings=self.settings, num_hosts=self.num_hosts, num_workers_per_host=self.num_workers_per_host, use_gpu=self.use_gpu, cpus_per_worker=self.cpus_per_worker, gpus_per_worker=self.gpus_per_worker)
def create_nested_task(child_cpu, child_gpu, set_none=False): assert get_current_placement_group() is not None kwargs = { "num_cpus": child_cpu, "num_gpus": child_gpu, } if set_none: kwargs["placement_group"] = None return ray.get([task.options(**kwargs).remote() for _ in range(3)])
def _create_placement_group(self): """Creates a placement group if it does not exist. If a placement group is already detected (Tune) this will be a no-op. By default the placement group will be created with PACK strategy. This is optimized for colocating GPUs on a minimal number of nodes. This behavior can be overridden to use the SPREAD strategy by defining ``TRAIN_ENABLE_WORKER_SPREAD_ENV`` If a placement group is created it will be stored as self._placement_group. """ current_placement_group = get_current_placement_group() should_capture_child_tasks_in_placement_group = ( ray.worker.global_worker.should_capture_child_tasks_in_placement_group ) should_create_placement_group = ( current_placement_group is None or not should_capture_child_tasks_in_placement_group ) if should_create_placement_group: additional_resources_per_worker = ( self._additional_resources_per_worker or {} ) bundle = { "CPU": self._num_cpus_per_worker, "GPU": self._num_gpus_per_worker, **additional_resources_per_worker, } bundles = [bundle.copy() for _ in range(self._num_workers)] use_spread = bool(env_integer(TRAIN_ENABLE_WORKER_SPREAD_ENV, 0)) strategy = "SPREAD" if use_spread else "PACK" placement_group = ray.util.placement_group(bundles, strategy=strategy) logger.debug("Waiting for placement group to start.") timeout = env_integer(TRAIN_PLACEMENT_GROUP_TIMEOUT_S_ENV, 100) ready, _ = ray.wait([placement_group.ready()], timeout=timeout) if ready: logger.debug("Placement group has started.") else: raise TimeoutError( "Placement group creation timed out. Make sure your " "cluster either has enough resources or use an " "autoscaling cluster. If you are running on a cluster, " "make sure you specify an address in `ray.init()`, for example, " '`ray.init("auto")`. You can also increase the timeout by setting ' "the TRAIN_PLACEMENT_GROUP_TIMEOUT_S environment variable. " "Current resources available: {}, resources requested by the " "placement group: {}".format( ray.available_resources(), placement_group.bundle_specs ) ) self._placement_group = placement_group
def check_override(self): assert self.scaling_config["num_workers"] == 1 # Should do deep update. assert not self.custom_arg["outer"]["inner"] assert self.custom_arg["outer"]["fixed"] == 1 # Should merge with base config. assert self.preprocessor.original pg = get_current_placement_group() assert len(pg.bundle_specs) == 2 # 1 trainer, 1 worker
def test_capture_child_tasks(ray_start_cluster): cluster = ray_start_cluster total_num_actors = 4 for _ in range(2): cluster.add_node(num_cpus=total_num_actors) ray.init(address=cluster.address) pg = ray.util.placement_group([{ "CPU": 2 }, { "CPU": 2 }], strategy="STRICT_PACK") ray.get(pg.ready(), timeout=5) # If get_current_placement_group is used when the current worker/driver # doesn't belong to any of placement group, it should return None. assert get_current_placement_group() is None @ray.remote(num_cpus=1) class NestedActor: def ready(self): return True @ray.remote(num_cpus=1) class Actor: def __init__(self): self.actors = [] def ready(self): return True def schedule_nested_actor(self): actor = NestedActor.options( placement_group=get_current_placement_group()).remote() ray.get(actor.ready.remote()) self.actors.append(actor) a = Actor.options(placement_group=pg).remote() ray.get(a.ready.remote()) # 1 top level actor + 3 children. for _ in range(total_num_actors - 1): ray.get(a.schedule_nested_actor.remote()) # Make sure all the actors are scheduled on the same node. # (why? The placement group has STRICT_PACK strategy). node_id_set = set() for actor_info in ray.actors().values(): node_id = actor_info["Address"]["NodeID"] node_id_set.add(node_id) # Since all node id should be identical, set should be equal to 1. assert len(node_id_set) == 1
def test_capture_child_tasks(ray_start_cluster): cluster = ray_start_cluster total_num_tasks = 4 for _ in range(2): cluster.add_node(num_cpus=total_num_tasks, num_gpus=total_num_tasks) ray.init(address=cluster.address) pg = ray.util.placement_group( [{ "CPU": 2, "GPU": 2, }, { "CPU": 2, "GPU": 2, }], strategy="STRICT_PACK") ray.get(pg.ready()) # If get_current_placement_group is used when the current worker/driver # doesn't belong to any of placement group, it should return None. assert get_current_placement_group() is None # Test if tasks capture child tasks. @ray.remote def task(): return get_current_placement_group() @ray.remote def create_nested_task(child_cpu, child_gpu): assert get_current_placement_group() is not None return ray.get([ task.options(num_cpus=child_cpu, num_gpus=child_gpu).remote() for _ in range(3) ]) t = create_nested_task.options( num_cpus=1, num_gpus=0, placement_group=pg).remote(1, 0) pgs = ray.get(t) # Every task should have current placement group because they # should be implicitly captured by default. assert None not in pgs # Test if tasks don't capture child tasks when the option is off. t2 = create_nested_task.options( num_cpus=0, num_gpus=1, placement_group=pg, placement_group_capture_child_tasks=False).remote(0, 1) pgs = ray.get(t2) # All placement group should be None because we don't capture child tasks. assert not all(pgs)
def __init__(self, *, settings, num_workers, use_gpu, cpus_per_worker, gpus_per_worker, placement_group=None, force_create_placement_group=False): self.settings = settings self._num_workers = num_workers self.cpus_per_worker = cpus_per_worker self.gpus_per_worker = gpus_per_worker or 1 self.use_gpu = use_gpu if force_create_placement_group: self.placement_group = None else: self.placement_group = placement_group or get_current_placement_group( ) self._placement_group_bundles = self.placement_group.bundle_specs if self.placement_group else None self._created_placement_group = False
def _remote(self, args=None, kwargs=None, num_returns=None, num_cpus=None, num_gpus=None, memory=None, object_store_memory=None, accelerator_type=None, resources=None, max_retries=None, placement_group=None, placement_group_bundle_index=-1, placement_group_capture_child_tasks=None, runtime_env=None, override_environment_variables=None, name=""): """Submit the remote function for execution.""" if client_mode_should_convert(): return client_mode_convert_function( self, args, kwargs, num_returns=num_returns, num_cpus=num_cpus, num_gpus=num_gpus, memory=memory, object_store_memory=object_store_memory, accelerator_type=accelerator_type, resources=resources, max_retries=max_retries, placement_group=placement_group, placement_group_bundle_index=placement_group_bundle_index, placement_group_capture_child_tasks=( placement_group_capture_child_tasks), runtime_env=runtime_env, override_environment_variables=override_environment_variables, name=name) worker = ray.worker.global_worker worker.check_connected() # If this function was not exported in this session and job, we need to # export this function again, because the current GCS doesn't have it. if not self._is_cross_language and \ self._last_export_session_and_job != \ worker.current_session_and_job: # There is an interesting question here. If the remote function is # used by a subsequent driver (in the same script), should the # second driver pickle the function again? If yes, then the remote # function definition can differ in the second driver (e.g., if # variables in its closure have changed). We probably want the # behavior of the remote function in the second driver to be # independent of whether or not the function was invoked by the # first driver. This is an argument for repickling the function, # which we do here. self._pickled_function = pickle.dumps(self._function) self._function_descriptor = PythonFunctionDescriptor.from_function( self._function, self._pickled_function) self._last_export_session_and_job = worker.current_session_and_job worker.function_actor_manager.export(self) kwargs = {} if kwargs is None else kwargs args = [] if args is None else args if num_returns is None: num_returns = self._num_returns if max_retries is None: max_retries = self._max_retries if placement_group_capture_child_tasks is None: placement_group_capture_child_tasks = ( worker.should_capture_child_tasks_in_placement_group) if placement_group is None: if placement_group_capture_child_tasks: placement_group = get_current_placement_group() if not placement_group: placement_group = PlacementGroup.empty() check_placement_group_index(placement_group, placement_group_bundle_index) resources = ray._private.utils.resources_from_resource_arguments( self._num_cpus, self._num_gpus, self._memory, self._object_store_memory, self._resources, self._accelerator_type, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type) if runtime_env: parsed_runtime_env = runtime_support.RuntimeEnvDict(runtime_env) override_environment_variables = ( parsed_runtime_env.to_worker_env_vars( override_environment_variables)) else: parsed_runtime_env = runtime_support.RuntimeEnvDict({}) def invocation(args, kwargs): if self._is_cross_language: list_args = cross_language.format_args(worker, args, kwargs) elif not args and not kwargs and not self._function_signature: list_args = [] else: list_args = ray._private.signature.flatten_args( self._function_signature, args, kwargs) if worker.mode == ray.worker.LOCAL_MODE: assert not self._is_cross_language, \ "Cross language remote function " \ "cannot be executed locally." object_refs = worker.core_worker.submit_task( self._language, self._function_descriptor, list_args, name, num_returns, resources, max_retries, placement_group.id, placement_group_bundle_index, placement_group_capture_child_tasks, worker.debugger_breakpoint, parsed_runtime_env, override_environment_variables=override_environment_variables or dict()) # Reset worker's debug context from the last "remote" command # (which applies only to this .remote call). worker.debugger_breakpoint = b"" if len(object_refs) == 1: return object_refs[0] elif len(object_refs) > 1: return object_refs if self._decorator is not None: invocation = self._decorator(invocation) return invocation(args, kwargs)
def train_func(): return get_current_placement_group().id
def task(): return get_current_placement_group()
def test_capture_child_tasks(ray_start_cluster, connect_to_client): cluster = ray_start_cluster total_num_tasks = 4 for _ in range(2): cluster.add_node(num_cpus=total_num_tasks, num_gpus=total_num_tasks) ray.init(address=cluster.address) with connect_to_client_or_not(connect_to_client): pg = ray.util.placement_group( [ { "CPU": 2, "GPU": 2, }, { "CPU": 2, "GPU": 2, }, ], strategy="STRICT_PACK", ) ray.get(pg.ready()) # If get_current_placement_group is used when the current worker/driver # doesn't belong to any of placement group, it should return None. assert get_current_placement_group() is None # Test if tasks capture child tasks. @ray.remote def task(): return get_current_placement_group() @ray.remote def create_nested_task(child_cpu, child_gpu, set_none=False): assert get_current_placement_group() is not None kwargs = { "num_cpus": child_cpu, "num_gpus": child_gpu, } if set_none: kwargs["placement_group"] = None return ray.get([task.options(**kwargs).remote() for _ in range(3)]) t = create_nested_task.options( num_cpus=1, num_gpus=0, placement_group=pg, placement_group_capture_child_tasks=True, ).remote(1, 0) pgs = ray.get(t) # Every task should have current placement group because they # should be implicitly captured by default. assert None not in pgs t1 = create_nested_task.options( num_cpus=1, num_gpus=0, placement_group=pg, placement_group_capture_child_tasks=True, ).remote(1, 0, True) pgs = ray.get(t1) # Every task should have no placement group since it's set to None. # should be implicitly captured by default. assert set(pgs) == {None} # Test if tasks don't capture child tasks when the option is off. t2 = create_nested_task.options(num_cpus=0, num_gpus=1, placement_group=pg).remote(0, 1) pgs = ray.get(t2) # All placement groups should be None since we don't capture child # tasks. assert not all(pgs)
def test_capture_child_actors(ray_start_cluster, connect_to_client): cluster = ray_start_cluster total_num_actors = 4 for _ in range(2): cluster.add_node(num_cpus=total_num_actors) ray.init(address=cluster.address) with connect_to_client_or_not(connect_to_client): pg = ray.util.placement_group([{ "CPU": 2 }, { "CPU": 2 }], strategy="STRICT_PACK") ray.get(pg.ready()) # If get_current_placement_group is used when the current worker/driver # doesn't belong to any of placement group, it should return None. assert get_current_placement_group() is None # Test actors first. @ray.remote(num_cpus=1) class NestedActor: def ready(self): return True @ray.remote(num_cpus=1) class Actor: def __init__(self): self.actors = [] def ready(self): return True def schedule_nested_actor(self): # Make sure we can capture the current placement group. assert get_current_placement_group() is not None # Actors should be implicitly captured. actor = NestedActor.remote() ray.get(actor.ready.remote()) self.actors.append(actor) def schedule_nested_actor_outside_pg(self): # Don't use placement group. actor = NestedActor.options(placement_group=None).remote() ray.get(actor.ready.remote()) self.actors.append(actor) a = Actor.options(placement_group=pg, placement_group_capture_child_tasks=True).remote() ray.get(a.ready.remote()) # 1 top level actor + 3 children. for _ in range(total_num_actors - 1): ray.get(a.schedule_nested_actor.remote()) # Make sure all the actors are scheduled on the same node. # (why? The placement group has STRICT_PACK strategy). node_id_set = set() for actor_info in ray.state.actors().values(): if actor_info["State"] == convert_actor_state( gcs_utils.ActorTableData.ALIVE): node_id = actor_info["Address"]["NodeID"] node_id_set.add(node_id) # Since all node id should be identical, set should be equal to 1. assert len(node_id_set) == 1 # Kill an actor and wait until it is killed. kill_actor_and_wait_for_failure(a) with pytest.raises(ray.exceptions.RayActorError): ray.get(a.ready.remote()) # Now create an actor, but do not capture the current tasks a = Actor.options(placement_group=pg).remote() ray.get(a.ready.remote()) # 1 top level actor + 3 children. for _ in range(total_num_actors - 1): ray.get(a.schedule_nested_actor.remote()) # Make sure all the actors are not scheduled on the same node. # It is because the child tasks are not scheduled on the same # placement group. node_id_set = set() for actor_info in ray.state.actors().values(): if actor_info["State"] == convert_actor_state( gcs_utils.ActorTableData.ALIVE): node_id = actor_info["Address"]["NodeID"] node_id_set.add(node_id) assert len(node_id_set) == 2 # Kill an actor and wait until it is killed. kill_actor_and_wait_for_failure(a) with pytest.raises(ray.exceptions.RayActorError): ray.get(a.ready.remote()) # Lastly, make sure when None is specified, actors are not scheduled # on the same placement group. a = Actor.options(placement_group=pg).remote() ray.get(a.ready.remote()) # 1 top level actor + 3 children. for _ in range(total_num_actors - 1): ray.get(a.schedule_nested_actor_outside_pg.remote()) # Make sure all the actors are not scheduled on the same node. # It is because the child tasks are not scheduled on the same # placement group. node_id_set = set() for actor_info in ray.state.actors().values(): if actor_info["State"] == convert_actor_state( gcs_utils.ActorTableData.ALIVE): node_id = actor_info["Address"]["NodeID"] node_id_set.add(node_id) assert len(node_id_set) == 2
def _remote(self, args=None, kwargs=None, num_cpus=None, num_gpus=None, memory=None, object_store_memory=None, resources=None, accelerator_type=None, max_concurrency=None, max_restarts=None, max_task_retries=None, name=None, lifetime=None, placement_group="default", placement_group_bundle_index=-1, placement_group_capture_child_tasks=None, runtime_env=None, override_environment_variables=None): """Create an actor. This method allows more flexibility than the remote method because resource requirements can be specified and override the defaults in the decorator. Args: args: The arguments to forward to the actor constructor. kwargs: The keyword arguments to forward to the actor constructor. num_cpus: The number of CPUs required by the actor creation task. num_gpus: The number of GPUs required by the actor creation task. memory: Restrict the heap memory usage of this actor. object_store_memory: Restrict the object store memory used by this actor when creating objects. resources: The custom resources required by the actor creation task. max_concurrency: The max number of concurrent calls to allow for this actor. This only works with direct actor calls. The max concurrency defaults to 1 for threaded execution, and 1000 for asyncio execution. Note that the execution order is not guaranteed when max_concurrency > 1. name: The globally unique name for the actor, which can be used to retrieve the actor via ray.get_actor(name) as long as the actor is still alive. Names may not contain '/'. lifetime: Either `None`, which defaults to the actor will fate share with its creator and will be deleted once its refcount drops to zero, or "detached", which means the actor will live as a global object independent of the creator. placement_group: the placement group this actor belongs to, or None if it doesn't belong to any group. Setting to "default" autodetects the placement group based on the current setting of placement_group_capture_child_tasks. placement_group_bundle_index: the index of the bundle if the actor belongs to a placement group, which may be -1 to specify any available bundle. placement_group_capture_child_tasks: Whether or not children tasks of this actor should implicitly use the same placement group as its parent. It is True by default. runtime_env (Dict[str, Any]): Specifies the runtime environment for this actor or task and its children (see ``runtime_env.py`` for more details). override_environment_variables: Environment variables to override and/or introduce for this actor. This is a dictionary mapping variable names to their values. Returns: A handle to the newly created actor. """ if args is None: args = [] if kwargs is None: kwargs = {} meta = self.__ray_metadata__ actor_has_async_methods = len( inspect.getmembers( meta.modified_class, predicate=inspect.iscoroutinefunction)) > 0 is_asyncio = actor_has_async_methods if max_concurrency is None: if is_asyncio: max_concurrency = 1000 else: max_concurrency = 1 if max_concurrency < 1: raise ValueError("max_concurrency must be >= 1") if client_mode_should_convert(): return client_mode_convert_actor( self, args, kwargs, num_cpus=num_cpus, num_gpus=num_gpus, memory=memory, object_store_memory=object_store_memory, resources=resources, accelerator_type=accelerator_type, max_concurrency=max_concurrency, max_restarts=max_restarts, max_task_retries=max_task_retries, name=name, lifetime=lifetime, placement_group=placement_group, placement_group_bundle_index=placement_group_bundle_index, placement_group_capture_child_tasks=( placement_group_capture_child_tasks), runtime_env=runtime_env, override_environment_variables=( override_environment_variables)) worker = ray.worker.global_worker worker.check_connected() if name is not None: if not isinstance(name, str): raise TypeError( f"name must be None or a string, got: '{type(name)}'.") elif name == "": raise ValueError("Actor name cannot be an empty string.") split_names = name.split("/", maxsplit=1) if len(split_names) <= 1: name = split_names[0] namespace = "" else: # must be length 2 namespace, name = split_names if "/" in name: raise ValueError("Actor name may not contain '/'.") else: namespace = "" # Check whether the name is already taken. # TODO(edoakes): this check has a race condition because two drivers # could pass the check and then create the same named actor. We should # instead check this when we create the actor, but that's currently an # async call. if name is not None: try: ray.get_actor(name) except ValueError: # Name is not taken. pass else: raise ValueError( f"The name {name} is already taken. Please use " "a different name or get the existing actor using " f"ray.get_actor('{name}')") if lifetime is None: detached = False elif lifetime == "detached": detached = True else: raise ValueError( "actor `lifetime` argument must be either `None` or 'detached'" ) if placement_group_capture_child_tasks is None: placement_group_capture_child_tasks = ( worker.should_capture_child_tasks_in_placement_group) if placement_group == "default": if placement_group_capture_child_tasks: placement_group = get_current_placement_group() else: placement_group = PlacementGroup.empty() if not placement_group: placement_group = PlacementGroup.empty() check_placement_group_index(placement_group, placement_group_bundle_index) # Set the actor's default resources if not already set. First three # conditions are to check that no resources were specified in the # decorator. Last three conditions are to check that no resources were # specified when _remote() was called. if (meta.num_cpus is None and meta.num_gpus is None and meta.resources is None and meta.accelerator_type is None and num_cpus is None and num_gpus is None and resources is None and accelerator_type is None): # In the default case, actors acquire no resources for # their lifetime, and actor methods will require 1 CPU. cpus_to_use = ray_constants.DEFAULT_ACTOR_CREATION_CPU_SIMPLE actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SIMPLE else: # If any resources are specified (here or in decorator), then # all resources are acquired for the actor's lifetime and no # resources are associated with methods. cpus_to_use = (ray_constants.DEFAULT_ACTOR_CREATION_CPU_SPECIFIED if meta.num_cpus is None else meta.num_cpus) actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SPECIFIED # LOCAL_MODE cannot handle cross_language if worker.mode == ray.LOCAL_MODE: assert not meta.is_cross_language, \ "Cross language ActorClass cannot be executed locally." # Export the actor. if not meta.is_cross_language and (meta.last_export_session_and_job != worker.current_session_and_job): # If this actor class was not exported in this session and job, # we need to export this function again, because current GCS # doesn't have it. meta.last_export_session_and_job = (worker.current_session_and_job) # After serialize / deserialize modified class, the __module__ # of modified class will be ray.cloudpickle.cloudpickle. # So, here pass actor_creation_function_descriptor to make # sure export actor class correct. worker.function_actor_manager.export_actor_class( meta.modified_class, meta.actor_creation_function_descriptor, meta.method_meta.methods.keys()) resources = ray._private.utils.resources_from_resource_arguments( cpus_to_use, meta.num_gpus, meta.memory, meta.object_store_memory, meta.resources, meta.accelerator_type, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type) # If the actor methods require CPU resources, then set the required # placement resources. If actor_placement_resources is empty, then # the required placement resources will be the same as resources. actor_placement_resources = {} assert actor_method_cpu in [0, 1] if actor_method_cpu == 1: actor_placement_resources = resources.copy() actor_placement_resources["CPU"] += 1 if meta.is_cross_language: creation_args = cross_language.format_args(worker, args, kwargs) else: function_signature = meta.method_meta.signatures["__init__"] creation_args = signature.flatten_args(function_signature, args, kwargs) if runtime_env is None: runtime_env = meta.runtime_env if runtime_env: runtime_env_dict = runtime_support.RuntimeEnvDict( runtime_env).get_parsed_dict() else: runtime_env_dict = {} if override_environment_variables: logger.warning("override_environment_variables is deprecated and " "will be removed in Ray 1.6. Please use " ".options(runtime_env={'env_vars': {...}}).remote()" "instead.") actor_id = worker.core_worker.create_actor( meta.language, meta.actor_creation_function_descriptor, creation_args, max_restarts or meta.max_restarts, max_task_retries or meta.max_task_retries, resources, actor_placement_resources, max_concurrency, detached, name if name is not None else "", namespace, is_asyncio, placement_group.id, placement_group_bundle_index, placement_group_capture_child_tasks, # Store actor_method_cpu in actor handle's extension data. extension_data=str(actor_method_cpu), runtime_env_dict=runtime_env_dict, override_environment_variables=override_environment_variables or dict()) actor_handle = ActorHandle( meta.language, actor_id, meta.method_meta.decorators, meta.method_meta.signatures, meta.method_meta.num_returns, actor_method_cpu, meta.actor_creation_function_descriptor, worker.current_session_and_job, original_handle=True) return actor_handle
def create_nested_task(child_cpu, child_gpu): assert get_current_placement_group() is not None return ray.get([ task.options(num_cpus=child_cpu, num_gpus=child_gpu).remote() for _ in range(3) ])
def schedule_nested_actor(self): actor = NestedActor.options( placement_group=get_current_placement_group()).remote() ray.get(actor.ready.remote()) self.actors.append(actor)