Exemple #1
0
def unique_nodes_pg() -> int:
    nodes = []
    if get_current_placement_group() is None:
        return 0
    else:
        for bundle in get_current_placement_group().bundle_specs:
            for resource in bundle:
                if "node" in resource:
                    nodes.append(resource)
        return len(set(nodes))
Exemple #2
0
    def _create_placement_group(self, num_workers):
        """Creates a placement group for the workers.

        If this worker is already in a placement group then a new one will
        not be created. This is primarily for when Tune is the upstream and
        will allocate resources for SGD workers.

        If this worker is not in a placement group, a new one will be created
        and set. The placement group will have a single bundle for each worker
        and use the SPREAD strategy for an even distribution.
        """
        pg = get_current_placement_group()
        if pg is None:
            bundle = {
                "CPU": self._num_cpus_per_worker,
                "GPU": int(self._use_gpu)
            }
            bundles = [bundle] * num_workers
            pg = ray.util.placement_group(bundles, strategy="SPREAD")
            logger.debug("Waiting for placement group to start.")
            ready, _ = ray.wait([pg.ready()],
                                timeout=SGD_PLACEMENT_GROUP_TIMEOUT_S)
            if ready:
                logger.debug("Placement group has started.")
            else:
                raise TimeoutError(
                    "Placement group creation timed out. Make sure "
                    "your cluster either has enough resources or use "
                    "an autoscaling cluster. Current resources "
                    "available: {}, resources requested by the "
                    "placement group: {}".format(ray.available_resources(),
                                                 pg.bundle_specs))
            self._worker_placement_group = pg
 def schedule_nested_actor(self):
     # Make sure we can capture the current placement group.
     assert get_current_placement_group() is not None
     # Actors should be implicitly captured.
     actor = NestedActor.remote()
     ray.get(actor.ready.remote())
     self.actors.append(actor)
Exemple #4
0
 def _create_strategy(self):
     assert self.num_workers is None or self.num_hosts is None
     use_pg = self.use_current_placement_group and get_current_placement_group(
     )
     if self.num_workers or use_pg:
         if use_pg:
             logger.info("Found an existing placement group, inheriting. "
                         "You can disable this behavior by setting "
                         "`use_current_placement_group=False`.")
         num_workers = self.num_workers or self.num_workers_per_host * self.num_hosts
         return PGStrategy(settings=self.settings,
                           num_workers=num_workers,
                           use_gpu=self.use_gpu,
                           cpus_per_worker=self.cpus_per_worker,
                           gpus_per_worker=self.gpus_per_worker,
                           force_create_placement_group=not self.
                           use_current_placement_group)
     else:
         return ColocatedStrategy(
             settings=self.settings,
             num_hosts=self.num_hosts,
             num_workers_per_host=self.num_workers_per_host,
             use_gpu=self.use_gpu,
             cpus_per_worker=self.cpus_per_worker,
             gpus_per_worker=self.gpus_per_worker)
 def create_nested_task(child_cpu, child_gpu, set_none=False):
     assert get_current_placement_group() is not None
     kwargs = {
         "num_cpus": child_cpu,
         "num_gpus": child_gpu,
     }
     if set_none:
         kwargs["placement_group"] = None
     return ray.get([task.options(**kwargs).remote() for _ in range(3)])
Exemple #6
0
    def _create_placement_group(self):
        """Creates a placement group if it does not exist.

        If a placement group is already detected (Tune) this will be a no-op.

        By default the placement group will be created with PACK strategy.
        This is optimized for colocating GPUs on a minimal number of nodes.
        This behavior can be overridden to use the SPREAD strategy by defining
        ``TRAIN_ENABLE_WORKER_SPREAD_ENV``

        If a placement group is created it will be stored as
        self._placement_group.
        """
        current_placement_group = get_current_placement_group()
        should_capture_child_tasks_in_placement_group = (
            ray.worker.global_worker.should_capture_child_tasks_in_placement_group
        )
        should_create_placement_group = (
            current_placement_group is None
            or not should_capture_child_tasks_in_placement_group
        )

        if should_create_placement_group:
            additional_resources_per_worker = (
                self._additional_resources_per_worker or {}
            )
            bundle = {
                "CPU": self._num_cpus_per_worker,
                "GPU": self._num_gpus_per_worker,
                **additional_resources_per_worker,
            }
            bundles = [bundle.copy() for _ in range(self._num_workers)]

            use_spread = bool(env_integer(TRAIN_ENABLE_WORKER_SPREAD_ENV, 0))
            strategy = "SPREAD" if use_spread else "PACK"

            placement_group = ray.util.placement_group(bundles, strategy=strategy)
            logger.debug("Waiting for placement group to start.")
            timeout = env_integer(TRAIN_PLACEMENT_GROUP_TIMEOUT_S_ENV, 100)
            ready, _ = ray.wait([placement_group.ready()], timeout=timeout)
            if ready:
                logger.debug("Placement group has started.")
            else:
                raise TimeoutError(
                    "Placement group creation timed out. Make sure your "
                    "cluster either has enough resources or use an "
                    "autoscaling cluster. If you are running on a cluster, "
                    "make sure you specify an address in `ray.init()`, for example, "
                    '`ray.init("auto")`. You can also increase the timeout by setting '
                    "the TRAIN_PLACEMENT_GROUP_TIMEOUT_S environment variable. "
                    "Current resources available: {}, resources requested by the "
                    "placement group: {}".format(
                        ray.available_resources(), placement_group.bundle_specs
                    )
                )
            self._placement_group = placement_group
Exemple #7
0
    def check_override(self):
        assert self.scaling_config["num_workers"] == 1
        # Should do deep update.
        assert not self.custom_arg["outer"]["inner"]
        assert self.custom_arg["outer"]["fixed"] == 1
        # Should merge with base config.
        assert self.preprocessor.original

        pg = get_current_placement_group()
        assert len(pg.bundle_specs) == 2  # 1 trainer, 1 worker
def test_capture_child_tasks(ray_start_cluster):
    cluster = ray_start_cluster
    total_num_actors = 4
    for _ in range(2):
        cluster.add_node(num_cpus=total_num_actors)
    ray.init(address=cluster.address)

    pg = ray.util.placement_group([{
        "CPU": 2
    }, {
        "CPU": 2
    }],
                                  strategy="STRICT_PACK")
    ray.get(pg.ready(), timeout=5)

    # If get_current_placement_group is used when the current worker/driver
    # doesn't belong to any of placement group, it should return None.
    assert get_current_placement_group() is None

    @ray.remote(num_cpus=1)
    class NestedActor:
        def ready(self):
            return True

    @ray.remote(num_cpus=1)
    class Actor:
        def __init__(self):
            self.actors = []

        def ready(self):
            return True

        def schedule_nested_actor(self):
            actor = NestedActor.options(
                placement_group=get_current_placement_group()).remote()
            ray.get(actor.ready.remote())
            self.actors.append(actor)

    a = Actor.options(placement_group=pg).remote()
    ray.get(a.ready.remote())
    # 1 top level actor + 3 children.
    for _ in range(total_num_actors - 1):
        ray.get(a.schedule_nested_actor.remote())
    # Make sure all the actors are scheduled on the same node.
    # (why? The placement group has STRICT_PACK strategy).
    node_id_set = set()
    for actor_info in ray.actors().values():
        node_id = actor_info["Address"]["NodeID"]
        node_id_set.add(node_id)

    # Since all node id should be identical, set should be equal to 1.
    assert len(node_id_set) == 1
Exemple #9
0
def test_capture_child_tasks(ray_start_cluster):
    cluster = ray_start_cluster
    total_num_tasks = 4
    for _ in range(2):
        cluster.add_node(num_cpus=total_num_tasks, num_gpus=total_num_tasks)
    ray.init(address=cluster.address)

    pg = ray.util.placement_group(
        [{
            "CPU": 2,
            "GPU": 2,
        }, {
            "CPU": 2,
            "GPU": 2,
        }],
        strategy="STRICT_PACK")
    ray.get(pg.ready())

    # If get_current_placement_group is used when the current worker/driver
    # doesn't belong to any of placement group, it should return None.
    assert get_current_placement_group() is None

    # Test if tasks capture child tasks.
    @ray.remote
    def task():
        return get_current_placement_group()

    @ray.remote
    def create_nested_task(child_cpu, child_gpu):
        assert get_current_placement_group() is not None
        return ray.get([
            task.options(num_cpus=child_cpu, num_gpus=child_gpu).remote()
            for _ in range(3)
        ])

    t = create_nested_task.options(
        num_cpus=1, num_gpus=0, placement_group=pg).remote(1, 0)
    pgs = ray.get(t)
    # Every task should have current placement group because they
    # should be implicitly captured by default.
    assert None not in pgs

    # Test if tasks don't capture child tasks when the option is off.
    t2 = create_nested_task.options(
        num_cpus=0,
        num_gpus=1,
        placement_group=pg,
        placement_group_capture_child_tasks=False).remote(0, 1)
    pgs = ray.get(t2)
    # All placement group should be None because we don't capture child tasks.
    assert not all(pgs)
Exemple #10
0
 def __init__(self,
              *,
              settings,
              num_workers,
              use_gpu,
              cpus_per_worker,
              gpus_per_worker,
              placement_group=None,
              force_create_placement_group=False):
     self.settings = settings
     self._num_workers = num_workers
     self.cpus_per_worker = cpus_per_worker
     self.gpus_per_worker = gpus_per_worker or 1
     self.use_gpu = use_gpu
     if force_create_placement_group:
         self.placement_group = None
     else:
         self.placement_group = placement_group or get_current_placement_group(
         )
     self._placement_group_bundles = self.placement_group.bundle_specs if self.placement_group else None
     self._created_placement_group = False
Exemple #11
0
    def _remote(self,
                args=None,
                kwargs=None,
                num_returns=None,
                num_cpus=None,
                num_gpus=None,
                memory=None,
                object_store_memory=None,
                accelerator_type=None,
                resources=None,
                max_retries=None,
                placement_group=None,
                placement_group_bundle_index=-1,
                placement_group_capture_child_tasks=None,
                runtime_env=None,
                override_environment_variables=None,
                name=""):
        """Submit the remote function for execution."""
        if client_mode_should_convert():
            return client_mode_convert_function(
                self,
                args,
                kwargs,
                num_returns=num_returns,
                num_cpus=num_cpus,
                num_gpus=num_gpus,
                memory=memory,
                object_store_memory=object_store_memory,
                accelerator_type=accelerator_type,
                resources=resources,
                max_retries=max_retries,
                placement_group=placement_group,
                placement_group_bundle_index=placement_group_bundle_index,
                placement_group_capture_child_tasks=(
                    placement_group_capture_child_tasks),
                runtime_env=runtime_env,
                override_environment_variables=override_environment_variables,
                name=name)

        worker = ray.worker.global_worker
        worker.check_connected()

        # If this function was not exported in this session and job, we need to
        # export this function again, because the current GCS doesn't have it.
        if not self._is_cross_language and \
                self._last_export_session_and_job != \
                worker.current_session_and_job:
            # There is an interesting question here. If the remote function is
            # used by a subsequent driver (in the same script), should the
            # second driver pickle the function again? If yes, then the remote
            # function definition can differ in the second driver (e.g., if
            # variables in its closure have changed). We probably want the
            # behavior of the remote function in the second driver to be
            # independent of whether or not the function was invoked by the
            # first driver. This is an argument for repickling the function,
            # which we do here.
            self._pickled_function = pickle.dumps(self._function)

            self._function_descriptor = PythonFunctionDescriptor.from_function(
                self._function, self._pickled_function)

            self._last_export_session_and_job = worker.current_session_and_job
            worker.function_actor_manager.export(self)

        kwargs = {} if kwargs is None else kwargs
        args = [] if args is None else args

        if num_returns is None:
            num_returns = self._num_returns
        if max_retries is None:
            max_retries = self._max_retries

        if placement_group_capture_child_tasks is None:
            placement_group_capture_child_tasks = (
                worker.should_capture_child_tasks_in_placement_group)

        if placement_group is None:
            if placement_group_capture_child_tasks:
                placement_group = get_current_placement_group()

        if not placement_group:
            placement_group = PlacementGroup.empty()

        check_placement_group_index(placement_group,
                                    placement_group_bundle_index)

        resources = ray._private.utils.resources_from_resource_arguments(
            self._num_cpus, self._num_gpus, self._memory,
            self._object_store_memory, self._resources, self._accelerator_type,
            num_cpus, num_gpus, memory, object_store_memory, resources,
            accelerator_type)

        if runtime_env:
            parsed_runtime_env = runtime_support.RuntimeEnvDict(runtime_env)
            override_environment_variables = (
                parsed_runtime_env.to_worker_env_vars(
                    override_environment_variables))
        else:
            parsed_runtime_env = runtime_support.RuntimeEnvDict({})

        def invocation(args, kwargs):
            if self._is_cross_language:
                list_args = cross_language.format_args(worker, args, kwargs)
            elif not args and not kwargs and not self._function_signature:
                list_args = []
            else:
                list_args = ray._private.signature.flatten_args(
                    self._function_signature, args, kwargs)

            if worker.mode == ray.worker.LOCAL_MODE:
                assert not self._is_cross_language, \
                    "Cross language remote function " \
                    "cannot be executed locally."
            object_refs = worker.core_worker.submit_task(
                self._language,
                self._function_descriptor,
                list_args,
                name,
                num_returns,
                resources,
                max_retries,
                placement_group.id,
                placement_group_bundle_index,
                placement_group_capture_child_tasks,
                worker.debugger_breakpoint,
                parsed_runtime_env,
                override_environment_variables=override_environment_variables
                or dict())
            # Reset worker's debug context from the last "remote" command
            # (which applies only to this .remote call).
            worker.debugger_breakpoint = b""
            if len(object_refs) == 1:
                return object_refs[0]
            elif len(object_refs) > 1:
                return object_refs

        if self._decorator is not None:
            invocation = self._decorator(invocation)

        return invocation(args, kwargs)
Exemple #12
0
 def train_func():
     return get_current_placement_group().id
 def task():
     return get_current_placement_group()
def test_capture_child_tasks(ray_start_cluster, connect_to_client):
    cluster = ray_start_cluster
    total_num_tasks = 4
    for _ in range(2):
        cluster.add_node(num_cpus=total_num_tasks, num_gpus=total_num_tasks)
    ray.init(address=cluster.address)

    with connect_to_client_or_not(connect_to_client):
        pg = ray.util.placement_group(
            [
                {
                    "CPU": 2,
                    "GPU": 2,
                },
                {
                    "CPU": 2,
                    "GPU": 2,
                },
            ],
            strategy="STRICT_PACK",
        )
        ray.get(pg.ready())

        # If get_current_placement_group is used when the current worker/driver
        # doesn't belong to any of placement group, it should return None.
        assert get_current_placement_group() is None

        # Test if tasks capture child tasks.
        @ray.remote
        def task():
            return get_current_placement_group()

        @ray.remote
        def create_nested_task(child_cpu, child_gpu, set_none=False):
            assert get_current_placement_group() is not None
            kwargs = {
                "num_cpus": child_cpu,
                "num_gpus": child_gpu,
            }
            if set_none:
                kwargs["placement_group"] = None
            return ray.get([task.options(**kwargs).remote() for _ in range(3)])

        t = create_nested_task.options(
            num_cpus=1,
            num_gpus=0,
            placement_group=pg,
            placement_group_capture_child_tasks=True,
        ).remote(1, 0)
        pgs = ray.get(t)
        # Every task should have current placement group because they
        # should be implicitly captured by default.
        assert None not in pgs

        t1 = create_nested_task.options(
            num_cpus=1,
            num_gpus=0,
            placement_group=pg,
            placement_group_capture_child_tasks=True,
        ).remote(1, 0, True)
        pgs = ray.get(t1)
        # Every task should have no placement group since it's set to None.
        # should be implicitly captured by default.
        assert set(pgs) == {None}

        # Test if tasks don't capture child tasks when the option is off.
        t2 = create_nested_task.options(num_cpus=0,
                                        num_gpus=1,
                                        placement_group=pg).remote(0, 1)
        pgs = ray.get(t2)
        # All placement groups should be None since we don't capture child
        # tasks.
        assert not all(pgs)
def test_capture_child_actors(ray_start_cluster, connect_to_client):
    cluster = ray_start_cluster
    total_num_actors = 4
    for _ in range(2):
        cluster.add_node(num_cpus=total_num_actors)
    ray.init(address=cluster.address)

    with connect_to_client_or_not(connect_to_client):
        pg = ray.util.placement_group([{
            "CPU": 2
        }, {
            "CPU": 2
        }],
                                      strategy="STRICT_PACK")
        ray.get(pg.ready())

        # If get_current_placement_group is used when the current worker/driver
        # doesn't belong to any of placement group, it should return None.
        assert get_current_placement_group() is None

        # Test actors first.
        @ray.remote(num_cpus=1)
        class NestedActor:
            def ready(self):
                return True

        @ray.remote(num_cpus=1)
        class Actor:
            def __init__(self):
                self.actors = []

            def ready(self):
                return True

            def schedule_nested_actor(self):
                # Make sure we can capture the current placement group.
                assert get_current_placement_group() is not None
                # Actors should be implicitly captured.
                actor = NestedActor.remote()
                ray.get(actor.ready.remote())
                self.actors.append(actor)

            def schedule_nested_actor_outside_pg(self):
                # Don't use placement group.
                actor = NestedActor.options(placement_group=None).remote()
                ray.get(actor.ready.remote())
                self.actors.append(actor)

        a = Actor.options(placement_group=pg,
                          placement_group_capture_child_tasks=True).remote()
        ray.get(a.ready.remote())
        # 1 top level actor + 3 children.
        for _ in range(total_num_actors - 1):
            ray.get(a.schedule_nested_actor.remote())
        # Make sure all the actors are scheduled on the same node.
        # (why? The placement group has STRICT_PACK strategy).
        node_id_set = set()
        for actor_info in ray.state.actors().values():
            if actor_info["State"] == convert_actor_state(
                    gcs_utils.ActorTableData.ALIVE):
                node_id = actor_info["Address"]["NodeID"]
                node_id_set.add(node_id)

        # Since all node id should be identical, set should be equal to 1.
        assert len(node_id_set) == 1

        # Kill an actor and wait until it is killed.
        kill_actor_and_wait_for_failure(a)
        with pytest.raises(ray.exceptions.RayActorError):
            ray.get(a.ready.remote())

        # Now create an actor, but do not capture the current tasks
        a = Actor.options(placement_group=pg).remote()
        ray.get(a.ready.remote())
        # 1 top level actor + 3 children.
        for _ in range(total_num_actors - 1):
            ray.get(a.schedule_nested_actor.remote())
        # Make sure all the actors are not scheduled on the same node.
        # It is because the child tasks are not scheduled on the same
        # placement group.
        node_id_set = set()
        for actor_info in ray.state.actors().values():
            if actor_info["State"] == convert_actor_state(
                    gcs_utils.ActorTableData.ALIVE):
                node_id = actor_info["Address"]["NodeID"]
                node_id_set.add(node_id)

        assert len(node_id_set) == 2

        # Kill an actor and wait until it is killed.
        kill_actor_and_wait_for_failure(a)
        with pytest.raises(ray.exceptions.RayActorError):
            ray.get(a.ready.remote())

        # Lastly, make sure when None is specified, actors are not scheduled
        # on the same placement group.
        a = Actor.options(placement_group=pg).remote()
        ray.get(a.ready.remote())
        # 1 top level actor + 3 children.
        for _ in range(total_num_actors - 1):
            ray.get(a.schedule_nested_actor_outside_pg.remote())
        # Make sure all the actors are not scheduled on the same node.
        # It is because the child tasks are not scheduled on the same
        # placement group.
        node_id_set = set()
        for actor_info in ray.state.actors().values():
            if actor_info["State"] == convert_actor_state(
                    gcs_utils.ActorTableData.ALIVE):
                node_id = actor_info["Address"]["NodeID"]
                node_id_set.add(node_id)

        assert len(node_id_set) == 2
Exemple #16
0
    def _remote(self,
                args=None,
                kwargs=None,
                num_cpus=None,
                num_gpus=None,
                memory=None,
                object_store_memory=None,
                resources=None,
                accelerator_type=None,
                max_concurrency=None,
                max_restarts=None,
                max_task_retries=None,
                name=None,
                lifetime=None,
                placement_group="default",
                placement_group_bundle_index=-1,
                placement_group_capture_child_tasks=None,
                runtime_env=None,
                override_environment_variables=None):
        """Create an actor.

        This method allows more flexibility than the remote method because
        resource requirements can be specified and override the defaults in the
        decorator.

        Args:
            args: The arguments to forward to the actor constructor.
            kwargs: The keyword arguments to forward to the actor constructor.
            num_cpus: The number of CPUs required by the actor creation task.
            num_gpus: The number of GPUs required by the actor creation task.
            memory: Restrict the heap memory usage of this actor.
            object_store_memory: Restrict the object store memory used by
                this actor when creating objects.
            resources: The custom resources required by the actor creation
                task.
            max_concurrency: The max number of concurrent calls to allow for
                this actor. This only works with direct actor calls. The max
                concurrency defaults to 1 for threaded execution, and 1000 for
                asyncio execution. Note that the execution order is not
                guaranteed when max_concurrency > 1.
            name: The globally unique name for the actor, which can be used
                to retrieve the actor via ray.get_actor(name) as long as the
                actor is still alive. Names may not contain '/'.
            lifetime: Either `None`, which defaults to the actor will fate
                share with its creator and will be deleted once its refcount
                drops to zero, or "detached", which means the actor will live
                as a global object independent of the creator.
            placement_group: the placement group this actor belongs to,
                or None if it doesn't belong to any group. Setting to "default"
                autodetects the placement group based on the current setting of
                placement_group_capture_child_tasks.
            placement_group_bundle_index: the index of the bundle
                if the actor belongs to a placement group, which may be -1 to
                specify any available bundle.
            placement_group_capture_child_tasks: Whether or not children tasks
                of this actor should implicitly use the same placement group
                as its parent. It is True by default.
            runtime_env (Dict[str, Any]): Specifies the runtime environment for
                this actor or task and its children (see ``runtime_env.py`` for
                more details).
            override_environment_variables: Environment variables to override
                and/or introduce for this actor.  This is a dictionary mapping
                variable names to their values.

        Returns:
            A handle to the newly created actor.
        """
        if args is None:
            args = []
        if kwargs is None:
            kwargs = {}
        meta = self.__ray_metadata__
        actor_has_async_methods = len(
            inspect.getmembers(
                meta.modified_class,
                predicate=inspect.iscoroutinefunction)) > 0
        is_asyncio = actor_has_async_methods

        if max_concurrency is None:
            if is_asyncio:
                max_concurrency = 1000
            else:
                max_concurrency = 1

        if max_concurrency < 1:
            raise ValueError("max_concurrency must be >= 1")

        if client_mode_should_convert():
            return client_mode_convert_actor(
                self,
                args,
                kwargs,
                num_cpus=num_cpus,
                num_gpus=num_gpus,
                memory=memory,
                object_store_memory=object_store_memory,
                resources=resources,
                accelerator_type=accelerator_type,
                max_concurrency=max_concurrency,
                max_restarts=max_restarts,
                max_task_retries=max_task_retries,
                name=name,
                lifetime=lifetime,
                placement_group=placement_group,
                placement_group_bundle_index=placement_group_bundle_index,
                placement_group_capture_child_tasks=(
                    placement_group_capture_child_tasks),
                runtime_env=runtime_env,
                override_environment_variables=(
                    override_environment_variables))

        worker = ray.worker.global_worker
        worker.check_connected()

        if name is not None:
            if not isinstance(name, str):
                raise TypeError(
                    f"name must be None or a string, got: '{type(name)}'.")
            elif name == "":
                raise ValueError("Actor name cannot be an empty string.")
            split_names = name.split("/", maxsplit=1)
            if len(split_names) <= 1:
                name = split_names[0]
                namespace = ""
            else:
                # must be length 2
                namespace, name = split_names
            if "/" in name:
                raise ValueError("Actor name may not contain '/'.")
        else:
            namespace = ""

        # Check whether the name is already taken.
        # TODO(edoakes): this check has a race condition because two drivers
        # could pass the check and then create the same named actor. We should
        # instead check this when we create the actor, but that's currently an
        # async call.
        if name is not None:
            try:
                ray.get_actor(name)
            except ValueError:  # Name is not taken.
                pass
            else:
                raise ValueError(
                    f"The name {name} is already taken. Please use "
                    "a different name or get the existing actor using "
                    f"ray.get_actor('{name}')")

        if lifetime is None:
            detached = False
        elif lifetime == "detached":
            detached = True
        else:
            raise ValueError(
                "actor `lifetime` argument must be either `None` or 'detached'"
            )

        if placement_group_capture_child_tasks is None:
            placement_group_capture_child_tasks = (
                worker.should_capture_child_tasks_in_placement_group)

        if placement_group == "default":
            if placement_group_capture_child_tasks:
                placement_group = get_current_placement_group()
            else:
                placement_group = PlacementGroup.empty()

        if not placement_group:
            placement_group = PlacementGroup.empty()

        check_placement_group_index(placement_group,
                                    placement_group_bundle_index)

        # Set the actor's default resources if not already set. First three
        # conditions are to check that no resources were specified in the
        # decorator. Last three conditions are to check that no resources were
        # specified when _remote() was called.
        if (meta.num_cpus is None and meta.num_gpus is None
                and meta.resources is None and meta.accelerator_type is None
                and num_cpus is None and num_gpus is None and resources is None
                and accelerator_type is None):
            # In the default case, actors acquire no resources for
            # their lifetime, and actor methods will require 1 CPU.
            cpus_to_use = ray_constants.DEFAULT_ACTOR_CREATION_CPU_SIMPLE
            actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SIMPLE
        else:
            # If any resources are specified (here or in decorator), then
            # all resources are acquired for the actor's lifetime and no
            # resources are associated with methods.
            cpus_to_use = (ray_constants.DEFAULT_ACTOR_CREATION_CPU_SPECIFIED
                           if meta.num_cpus is None else meta.num_cpus)
            actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SPECIFIED

        # LOCAL_MODE cannot handle cross_language
        if worker.mode == ray.LOCAL_MODE:
            assert not meta.is_cross_language, \
                "Cross language ActorClass cannot be executed locally."

        # Export the actor.
        if not meta.is_cross_language and (meta.last_export_session_and_job !=
                                           worker.current_session_and_job):
            # If this actor class was not exported in this session and job,
            # we need to export this function again, because current GCS
            # doesn't have it.
            meta.last_export_session_and_job = (worker.current_session_and_job)
            # After serialize / deserialize modified class, the __module__
            # of modified class will be ray.cloudpickle.cloudpickle.
            # So, here pass actor_creation_function_descriptor to make
            # sure export actor class correct.
            worker.function_actor_manager.export_actor_class(
                meta.modified_class, meta.actor_creation_function_descriptor,
                meta.method_meta.methods.keys())

        resources = ray._private.utils.resources_from_resource_arguments(
            cpus_to_use, meta.num_gpus, meta.memory, meta.object_store_memory,
            meta.resources, meta.accelerator_type, num_cpus, num_gpus, memory,
            object_store_memory, resources, accelerator_type)

        # If the actor methods require CPU resources, then set the required
        # placement resources. If actor_placement_resources is empty, then
        # the required placement resources will be the same as resources.
        actor_placement_resources = {}
        assert actor_method_cpu in [0, 1]
        if actor_method_cpu == 1:
            actor_placement_resources = resources.copy()
            actor_placement_resources["CPU"] += 1
        if meta.is_cross_language:
            creation_args = cross_language.format_args(worker, args, kwargs)
        else:
            function_signature = meta.method_meta.signatures["__init__"]
            creation_args = signature.flatten_args(function_signature, args,
                                                   kwargs)
        if runtime_env is None:
            runtime_env = meta.runtime_env
        if runtime_env:
            runtime_env_dict = runtime_support.RuntimeEnvDict(
                runtime_env).get_parsed_dict()
        else:
            runtime_env_dict = {}

        if override_environment_variables:
            logger.warning("override_environment_variables is deprecated and "
                           "will be removed in Ray 1.6.  Please use "
                           ".options(runtime_env={'env_vars': {...}}).remote()"
                           "instead.")

        actor_id = worker.core_worker.create_actor(
            meta.language,
            meta.actor_creation_function_descriptor,
            creation_args,
            max_restarts or meta.max_restarts,
            max_task_retries or meta.max_task_retries,
            resources,
            actor_placement_resources,
            max_concurrency,
            detached,
            name if name is not None else "",
            namespace,
            is_asyncio,
            placement_group.id,
            placement_group_bundle_index,
            placement_group_capture_child_tasks,
            # Store actor_method_cpu in actor handle's extension data.
            extension_data=str(actor_method_cpu),
            runtime_env_dict=runtime_env_dict,
            override_environment_variables=override_environment_variables
            or dict())

        actor_handle = ActorHandle(
            meta.language,
            actor_id,
            meta.method_meta.decorators,
            meta.method_meta.signatures,
            meta.method_meta.num_returns,
            actor_method_cpu,
            meta.actor_creation_function_descriptor,
            worker.current_session_and_job,
            original_handle=True)

        return actor_handle
Exemple #17
0
 def create_nested_task(child_cpu, child_gpu):
     assert get_current_placement_group() is not None
     return ray.get([
         task.options(num_cpus=child_cpu, num_gpus=child_gpu).remote()
         for _ in range(3)
     ])
 def schedule_nested_actor(self):
     actor = NestedActor.options(
         placement_group=get_current_placement_group()).remote()
     ray.get(actor.ready.remote())
     self.actors.append(actor)