def test_get_conda_env_dir(tmp_path):
    """
    Typical output of `conda env list`, for context:

    base                 /Users/scaly/anaconda3
    my_env_1             /Users/scaly/anaconda3/envs/my_env_1

    For this test, `tmp_path` is a stand-in for `Users/scaly/anaconda3`.
    """

    # Simulate starting in an env named tf1.
    d = tmp_path / "envs" / "tf1"
    Path.mkdir(d, parents=True)
    with mock.patch.dict(
        os.environ, {"CONDA_PREFIX": str(d), "CONDA_DEFAULT_ENV": "tf1"}
    ):
        with pytest.raises(ValueError):
            # Env tf2 should not exist.
            env_dir = get_conda_env_dir("tf2")
        tf2_dir = tmp_path / "envs" / "tf2"
        Path.mkdir(tf2_dir, parents=True)
        env_dir = get_conda_env_dir("tf2")
        assert env_dir == str(tmp_path / "envs" / "tf2")

    # Simulate starting in (base) conda env.
    with mock.patch.dict(
        os.environ, {"CONDA_PREFIX": str(tmp_path), "CONDA_DEFAULT_ENV": "base"}
    ):
        with pytest.raises(ValueError):
            # Env tf3 should not exist.
            env_dir = get_conda_env_dir("tf3")
        # Env tf2 still should exist.
        env_dir = get_conda_env_dir("tf2")
        assert env_dir == str(tmp_path / "envs" / "tf2")
Esempio n. 2
0
def test_get_conda_env_dir(tmp_path):
    # Simulate starting in an env named tf1.
    d = tmp_path / "tf1"
    d.mkdir()
    with mock.patch.dict(os.environ, {
            "CONDA_PREFIX": str(d),
            "CONDA_DEFAULT_ENV": "tf1"
    }):
        with pytest.raises(ValueError):
            # Env tf2 should not exist.
            env_dir = get_conda_env_dir("tf2")
        tf2_dir = tmp_path / "tf2"
        tf2_dir.mkdir()
        env_dir = get_conda_env_dir("tf2")
        assert (env_dir == str(tmp_path / "tf2"))
Esempio n. 3
0
    def create_package_env(env_name, package_version: str):
        delete_env(env_name)
        subprocess.run([
            "conda", "create", "-n", env_name, "-y",
            f"python={_current_py_version()}"
        ])

        _inject_ray_to_conda_site(get_conda_env_dir(env_name))
        ray_deps: List[str] = _resolve_install_from_source_ray_dependencies()
        ray_deps.append(f"requests=={package_version}")
        with tempfile.NamedTemporaryFile("w") as f:
            f.writelines([line + "\n" for line in ray_deps])
            f.flush()

            commands = [
                init_cmd, f"conda activate {env_name}",
                f"python -m pip install -r {f.name}", "conda deactivate"
            ]
            proc = subprocess.run([" && ".join(commands)],
                                  shell=True,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
            if proc.returncode != 0:
                print("pip install failed")
                print(proc.stdout.decode())
                print(proc.stderr.decode())
                assert False
Esempio n. 4
0
 def __init__(self,
              worker_env=None,
              num_java_workers_per_process=1,
              jvm_options=None,
              code_search_path=None,
              runtime_env=None):
     if worker_env is None:
         self.worker_env = dict()
     else:
         self.worker_env = worker_env
     if runtime_env:
         conda_env = runtime_env.get("conda_env")
         if conda_env is not None:
             conda_env_dir = get_conda_env_dir(conda_env)
             if self.worker_env.get("PYTHONHOME") is not None:
                 raise ValueError(
                     f"worker_env specifies PYTHONHOME="
                     f"{self.worker_env['PYTHONHOME']} which "
                     f"conflicts with PYTHONHOME={conda_env_dir} "
                     f"required by the specified conda env "
                     f"{runtime_env['conda_env']}.")
             self.worker_env.update(PYTHONHOME=conda_env_dir)
     self.num_java_workers_per_process = num_java_workers_per_process
     self.jvm_options = jvm_options or []
     self.code_search_path = code_search_path or []
     self.runtime_env = runtime_env or dict()
    def create_package_env(env_name, package_version: str):
        delete_env(env_name)
        proc = subprocess.run(
            [
                "conda",
                "create",
                "-n",
                env_name,
                "-y",
                f"python={_current_py_version()}",
            ],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
        if proc.returncode != 0:
            print("conda create failed, returned %d" % proc.returncode)
            print(proc.stdout.decode())
            print(proc.stderr.decode())
            assert False

        _inject_ray_to_conda_site(get_conda_env_dir(env_name))
        ray_deps: List[str] = _resolve_install_from_source_ray_dependencies()
        ray_deps.append(f"requests=={package_version}")

        reqs = tmp_path_factory.mktemp("reqs") / "requirements.txt"
        with reqs.open("wt") as fid:
            for line in ray_deps:
                fid.write(line)
                fid.write("\n")

        commands = [
            f"conda activate {env_name}",
            f"python -m pip install -r {str(reqs)}",
            "conda deactivate",
        ]
        if _WIN32:
            # as a string
            command = " && ".join(commands)
        else:
            commands.insert(0, init_cmd)
            # as a list
            command = [" && ".join(commands)]
        proc = subprocess.run(
            command,
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
        if proc.returncode != 0:
            print("conda/pip install failed, returned %d" % proc.returncode)
            print("command", command)
            print(proc.stdout.decode())
            print(proc.stderr.decode())
            assert False
Esempio n. 6
0
    def to_worker_env_vars(self, override_environment_variables: dict) -> dict:
        """Given existing worker env vars, return an updated dict.

        This sets any necessary env vars to setup the runtime env.
        TODO(ekl): env vars is probably not the right long term impl.
        """
        if override_environment_variables is None:
            override_environment_variables = {}
        if self.conda:
            conda_env_dir = get_conda_env_dir(self.conda)
            override_environment_variables.update(PYTHONHOME=conda_env_dir)
        if self.working_dir:
            override_environment_variables.update(
                RAY_RUNTIME_ENV_FILES=self.working_dir)
        return override_environment_variables
Esempio n. 7
0
    def _remote(self,
                args=None,
                kwargs=None,
                num_cpus=None,
                num_gpus=None,
                memory=None,
                object_store_memory=None,
                resources=None,
                accelerator_type=None,
                max_concurrency=None,
                max_restarts=None,
                max_task_retries=None,
                name=None,
                lifetime=None,
                placement_group=None,
                placement_group_bundle_index=-1,
                placement_group_capture_child_tasks=None,
                runtime_env=None,
                override_environment_variables=None):
        """Create an actor.

        This method allows more flexibility than the remote method because
        resource requirements can be specified and override the defaults in the
        decorator.

        Args:
            args: The arguments to forward to the actor constructor.
            kwargs: The keyword arguments to forward to the actor constructor.
            num_cpus: The number of CPUs required by the actor creation task.
            num_gpus: The number of GPUs required by the actor creation task.
            memory: Restrict the heap memory usage of this actor.
            object_store_memory: Restrict the object store memory used by
                this actor when creating objects.
            resources: The custom resources required by the actor creation
                task.
            max_concurrency: The max number of concurrent calls to allow for
                this actor. This only works with direct actor calls. The max
                concurrency defaults to 1 for threaded execution, and 1000 for
                asyncio execution. Note that the execution order is not
                guaranteed when max_concurrency > 1.
            name: The globally unique name for the actor, which can be used
                to retrieve the actor via ray.get_actor(name) as long as the
                actor is still alive.
            lifetime: Either `None`, which defaults to the actor will fate
                share with its creator and will be deleted once its refcount
                drops to zero, or "detached", which means the actor will live
                as a global object independent of the creator.
            placement_group: the placement group this actor belongs to,
                or None if it doesn't belong to any group.
            placement_group_bundle_index: the index of the bundle
                if the actor belongs to a placement group, which may be -1 to
                specify any available bundle.
            placement_group_capture_child_tasks: Whether or not children tasks
                of this actor should implicitly use the same placement group
                as its parent. It is True by default.
            runtime_env (Dict[str, Any]): Specifies the runtime environment for
                this actor or task and its children.  Currently supports the
                key "conda_env", whose value should be a string which is the
                name of the desired conda environment.
            override_environment_variables: Environment variables to override
                and/or introduce for this actor.  This is a dictionary mapping
                variable names to their values.

        Returns:
            A handle to the newly created actor.
        """
        if args is None:
            args = []
        if kwargs is None:
            kwargs = {}
        meta = self.__ray_metadata__
        actor_has_async_methods = len(
            inspect.getmembers(meta.modified_class,
                               predicate=inspect.iscoroutinefunction)) > 0
        is_asyncio = actor_has_async_methods

        if max_concurrency is None:
            if is_asyncio:
                max_concurrency = 1000
            else:
                max_concurrency = 1

        if max_concurrency < 1:
            raise ValueError("max_concurrency must be >= 1")

        if client_mode_should_convert():
            return client_mode_convert_actor(
                self,
                args,
                kwargs,
                num_cpus=num_cpus,
                num_gpus=num_gpus,
                memory=memory,
                object_store_memory=object_store_memory,
                resources=resources,
                accelerator_type=accelerator_type,
                max_concurrency=max_concurrency,
                max_restarts=max_restarts,
                max_task_retries=max_task_retries,
                name=name,
                lifetime=lifetime,
                placement_group=placement_group,
                placement_group_bundle_index=placement_group_bundle_index,
                placement_group_capture_child_tasks=(
                    placement_group_capture_child_tasks),
                runtime_env=runtime_env,
                override_environment_variables=(
                    override_environment_variables))

        worker = ray.worker.global_worker
        worker.check_connected()

        if name is not None:
            if not isinstance(name, str):
                raise TypeError(
                    f"name must be None or a string, got: '{type(name)}'.")
            if name == "":
                raise ValueError("Actor name cannot be an empty string.")

        # Check whether the name is already taken.
        # TODO(edoakes): this check has a race condition because two drivers
        # could pass the check and then create the same named actor. We should
        # instead check this when we create the actor, but that's currently an
        # async call.
        if name is not None:
            try:
                ray.get_actor(name)
            except ValueError:  # Name is not taken.
                pass
            else:
                raise ValueError(
                    f"The name {name} is already taken. Please use "
                    "a different name or get the existing actor using "
                    f"ray.get_actor('{name}')")

        if lifetime is None:
            detached = False
        elif lifetime == "detached":
            detached = True
        else:
            raise ValueError(
                "actor `lifetime` argument must be either `None` or 'detached'"
            )

        if placement_group_capture_child_tasks is None:
            placement_group_capture_child_tasks = (
                worker.should_capture_child_tasks_in_placement_group)

        if placement_group is None:
            if placement_group_capture_child_tasks:
                placement_group = get_current_placement_group()

        if not placement_group:
            placement_group = PlacementGroup.empty()

        check_placement_group_index(placement_group,
                                    placement_group_bundle_index)

        # Set the actor's default resources if not already set. First three
        # conditions are to check that no resources were specified in the
        # decorator. Last three conditions are to check that no resources were
        # specified when _remote() was called.
        if (meta.num_cpus is None and meta.num_gpus is None
                and meta.resources is None and meta.accelerator_type is None
                and num_cpus is None and num_gpus is None and resources is None
                and accelerator_type is None):
            # In the default case, actors acquire no resources for
            # their lifetime, and actor methods will require 1 CPU.
            cpus_to_use = ray_constants.DEFAULT_ACTOR_CREATION_CPU_SIMPLE
            actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SIMPLE
        else:
            # If any resources are specified (here or in decorator), then
            # all resources are acquired for the actor's lifetime and no
            # resources are associated with methods.
            cpus_to_use = (ray_constants.DEFAULT_ACTOR_CREATION_CPU_SPECIFIED
                           if meta.num_cpus is None else meta.num_cpus)
            actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SPECIFIED

        # LOCAL_MODE cannot handle cross_language
        if worker.mode == ray.LOCAL_MODE:
            assert not meta.is_cross_language, \
                "Cross language ActorClass cannot be executed locally."

        # Export the actor.
        if not meta.is_cross_language and (meta.last_export_session_and_job !=
                                           worker.current_session_and_job):
            # If this actor class was not exported in this session and job,
            # we need to export this function again, because current GCS
            # doesn't have it.
            meta.last_export_session_and_job = (worker.current_session_and_job)
            # After serialize / deserialize modified class, the __module__
            # of modified class will be ray.cloudpickle.cloudpickle.
            # So, here pass actor_creation_function_descriptor to make
            # sure export actor class correct.
            worker.function_actor_manager.export_actor_class(
                meta.modified_class, meta.actor_creation_function_descriptor,
                meta.method_meta.methods.keys())

        resources = ray._private.utils.resources_from_resource_arguments(
            cpus_to_use, meta.num_gpus, meta.memory, meta.object_store_memory,
            meta.resources, meta.accelerator_type, num_cpus, num_gpus, memory,
            object_store_memory, resources, accelerator_type)

        # If the actor methods require CPU resources, then set the required
        # placement resources. If actor_placement_resources is empty, then
        # the required placement resources will be the same as resources.
        actor_placement_resources = {}
        assert actor_method_cpu in [0, 1]
        if actor_method_cpu == 1:
            actor_placement_resources = resources.copy()
            actor_placement_resources["CPU"] += 1
        if meta.is_cross_language:
            creation_args = cross_language.format_args(worker, args, kwargs)
        else:
            function_signature = meta.method_meta.signatures["__init__"]
            creation_args = signature.flatten_args(function_signature, args,
                                                   kwargs)
        if runtime_env:
            conda_env = runtime_env.get("conda_env")
            if conda_env is not None:
                conda_env_dir = get_conda_env_dir(conda_env)
                if override_environment_variables is None:
                    override_environment_variables = {}
                override_environment_variables.update(PYTHONHOME=conda_env_dir)

        actor_id = worker.core_worker.create_actor(
            meta.language,
            meta.actor_creation_function_descriptor,
            creation_args,
            max_restarts or meta.max_restarts,
            max_task_retries or meta.max_task_retries,
            resources,
            actor_placement_resources,
            max_concurrency,
            detached,
            name if name is not None else "",
            is_asyncio,
            placement_group.id,
            placement_group_bundle_index,
            placement_group_capture_child_tasks,
            # Store actor_method_cpu in actor handle's extension data.
            extension_data=str(actor_method_cpu),
            override_environment_variables=override_environment_variables
            or dict())

        actor_handle = ActorHandle(meta.language,
                                   actor_id,
                                   meta.method_meta.decorators,
                                   meta.method_meta.signatures,
                                   meta.method_meta.num_returns,
                                   actor_method_cpu,
                                   meta.actor_creation_function_descriptor,
                                   worker.current_session_and_job,
                                   original_handle=True)

        return actor_handle
Esempio n. 8
0
    def _remote(self,
                args=None,
                kwargs=None,
                num_returns=None,
                num_cpus=None,
                num_gpus=None,
                memory=None,
                object_store_memory=None,
                accelerator_type=None,
                resources=None,
                max_retries=None,
                placement_group=None,
                placement_group_bundle_index=-1,
                placement_group_capture_child_tasks=None,
                runtime_env=None,
                override_environment_variables=None,
                name=""):
        """Submit the remote function for execution."""
        if client_mode_should_convert():
            return client_mode_convert_function(
                self,
                args,
                kwargs,
                num_returns=num_returns,
                num_cpus=num_cpus,
                num_gpus=num_gpus,
                memory=memory,
                object_store_memory=object_store_memory,
                accelerator_type=accelerator_type,
                resources=resources,
                max_retries=max_retries,
                placement_group=placement_group,
                placement_group_bundle_index=placement_group_bundle_index,
                placement_group_capture_child_tasks=(
                    placement_group_capture_child_tasks),
                runtime_env=runtime_env,
                override_environment_variables=override_environment_variables,
                name=name)

        worker = ray.worker.global_worker
        worker.check_connected()

        # If this function was not exported in this session and job, we need to
        # export this function again, because the current GCS doesn't have it.
        if not self._is_cross_language and \
                self._last_export_session_and_job != \
                worker.current_session_and_job:
            # There is an interesting question here. If the remote function is
            # used by a subsequent driver (in the same script), should the
            # second driver pickle the function again? If yes, then the remote
            # function definition can differ in the second driver (e.g., if
            # variables in its closure have changed). We probably want the
            # behavior of the remote function in the second driver to be
            # independent of whether or not the function was invoked by the
            # first driver. This is an argument for repickling the function,
            # which we do here.
            self._pickled_function = pickle.dumps(self._function)

            self._function_descriptor = PythonFunctionDescriptor.from_function(
                self._function, self._pickled_function)

            self._last_export_session_and_job = worker.current_session_and_job
            worker.function_actor_manager.export(self)

        kwargs = {} if kwargs is None else kwargs
        args = [] if args is None else args

        if num_returns is None:
            num_returns = self._num_returns
        if max_retries is None:
            max_retries = self._max_retries

        if placement_group_capture_child_tasks is None:
            placement_group_capture_child_tasks = (
                worker.should_capture_child_tasks_in_placement_group)

        if placement_group is None:
            if placement_group_capture_child_tasks:
                placement_group = get_current_placement_group()

        if not placement_group:
            placement_group = PlacementGroup.empty()

        check_placement_group_index(placement_group,
                                    placement_group_bundle_index)

        resources = ray._private.utils.resources_from_resource_arguments(
            self._num_cpus, self._num_gpus, self._memory,
            self._object_store_memory, self._resources, self._accelerator_type,
            num_cpus, num_gpus, memory, object_store_memory, resources,
            accelerator_type)

        if runtime_env:
            conda_env = runtime_env.get("conda_env")
            if conda_env is not None:
                conda_env_dir = get_conda_env_dir(conda_env)
                if override_environment_variables is None:
                    override_environment_variables = {}
                override_environment_variables.update(PYTHONHOME=conda_env_dir)

        def invocation(args, kwargs):
            if self._is_cross_language:
                list_args = cross_language.format_args(worker, args, kwargs)
            elif not args and not kwargs and not self._function_signature:
                list_args = []
            else:
                list_args = ray._private.signature.flatten_args(
                    self._function_signature, args, kwargs)

            if worker.mode == ray.worker.LOCAL_MODE:
                assert not self._is_cross_language, \
                    "Cross language remote function " \
                    "cannot be executed locally."
            object_refs = worker.core_worker.submit_task(
                self._language,
                self._function_descriptor,
                list_args,
                name,
                num_returns,
                resources,
                max_retries,
                placement_group.id,
                placement_group_bundle_index,
                placement_group_capture_child_tasks,
                worker.debugger_breakpoint,
                override_environment_variables=override_environment_variables
                or dict())
            # Reset worker's debug context from the last "remote" command
            # (which applies only to this .remote call).
            worker.debugger_breakpoint = b""
            if len(object_refs) == 1:
                return object_refs[0]
            elif len(object_refs) > 1:
                return object_refs

        if self._decorator is not None:
            invocation = self._decorator(invocation)

        return invocation(args, kwargs)