def test_get_conda_env_dir(tmp_path): """ Typical output of `conda env list`, for context: base /Users/scaly/anaconda3 my_env_1 /Users/scaly/anaconda3/envs/my_env_1 For this test, `tmp_path` is a stand-in for `Users/scaly/anaconda3`. """ # Simulate starting in an env named tf1. d = tmp_path / "envs" / "tf1" Path.mkdir(d, parents=True) with mock.patch.dict( os.environ, {"CONDA_PREFIX": str(d), "CONDA_DEFAULT_ENV": "tf1"} ): with pytest.raises(ValueError): # Env tf2 should not exist. env_dir = get_conda_env_dir("tf2") tf2_dir = tmp_path / "envs" / "tf2" Path.mkdir(tf2_dir, parents=True) env_dir = get_conda_env_dir("tf2") assert env_dir == str(tmp_path / "envs" / "tf2") # Simulate starting in (base) conda env. with mock.patch.dict( os.environ, {"CONDA_PREFIX": str(tmp_path), "CONDA_DEFAULT_ENV": "base"} ): with pytest.raises(ValueError): # Env tf3 should not exist. env_dir = get_conda_env_dir("tf3") # Env tf2 still should exist. env_dir = get_conda_env_dir("tf2") assert env_dir == str(tmp_path / "envs" / "tf2")
def test_get_conda_env_dir(tmp_path): # Simulate starting in an env named tf1. d = tmp_path / "tf1" d.mkdir() with mock.patch.dict(os.environ, { "CONDA_PREFIX": str(d), "CONDA_DEFAULT_ENV": "tf1" }): with pytest.raises(ValueError): # Env tf2 should not exist. env_dir = get_conda_env_dir("tf2") tf2_dir = tmp_path / "tf2" tf2_dir.mkdir() env_dir = get_conda_env_dir("tf2") assert (env_dir == str(tmp_path / "tf2"))
def create_package_env(env_name, package_version: str): delete_env(env_name) subprocess.run([ "conda", "create", "-n", env_name, "-y", f"python={_current_py_version()}" ]) _inject_ray_to_conda_site(get_conda_env_dir(env_name)) ray_deps: List[str] = _resolve_install_from_source_ray_dependencies() ray_deps.append(f"requests=={package_version}") with tempfile.NamedTemporaryFile("w") as f: f.writelines([line + "\n" for line in ray_deps]) f.flush() commands = [ init_cmd, f"conda activate {env_name}", f"python -m pip install -r {f.name}", "conda deactivate" ] proc = subprocess.run([" && ".join(commands)], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if proc.returncode != 0: print("pip install failed") print(proc.stdout.decode()) print(proc.stderr.decode()) assert False
def __init__(self, worker_env=None, num_java_workers_per_process=1, jvm_options=None, code_search_path=None, runtime_env=None): if worker_env is None: self.worker_env = dict() else: self.worker_env = worker_env if runtime_env: conda_env = runtime_env.get("conda_env") if conda_env is not None: conda_env_dir = get_conda_env_dir(conda_env) if self.worker_env.get("PYTHONHOME") is not None: raise ValueError( f"worker_env specifies PYTHONHOME=" f"{self.worker_env['PYTHONHOME']} which " f"conflicts with PYTHONHOME={conda_env_dir} " f"required by the specified conda env " f"{runtime_env['conda_env']}.") self.worker_env.update(PYTHONHOME=conda_env_dir) self.num_java_workers_per_process = num_java_workers_per_process self.jvm_options = jvm_options or [] self.code_search_path = code_search_path or [] self.runtime_env = runtime_env or dict()
def create_package_env(env_name, package_version: str): delete_env(env_name) proc = subprocess.run( [ "conda", "create", "-n", env_name, "-y", f"python={_current_py_version()}", ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) if proc.returncode != 0: print("conda create failed, returned %d" % proc.returncode) print(proc.stdout.decode()) print(proc.stderr.decode()) assert False _inject_ray_to_conda_site(get_conda_env_dir(env_name)) ray_deps: List[str] = _resolve_install_from_source_ray_dependencies() ray_deps.append(f"requests=={package_version}") reqs = tmp_path_factory.mktemp("reqs") / "requirements.txt" with reqs.open("wt") as fid: for line in ray_deps: fid.write(line) fid.write("\n") commands = [ f"conda activate {env_name}", f"python -m pip install -r {str(reqs)}", "conda deactivate", ] if _WIN32: # as a string command = " && ".join(commands) else: commands.insert(0, init_cmd) # as a list command = [" && ".join(commands)] proc = subprocess.run( command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) if proc.returncode != 0: print("conda/pip install failed, returned %d" % proc.returncode) print("command", command) print(proc.stdout.decode()) print(proc.stderr.decode()) assert False
def to_worker_env_vars(self, override_environment_variables: dict) -> dict: """Given existing worker env vars, return an updated dict. This sets any necessary env vars to setup the runtime env. TODO(ekl): env vars is probably not the right long term impl. """ if override_environment_variables is None: override_environment_variables = {} if self.conda: conda_env_dir = get_conda_env_dir(self.conda) override_environment_variables.update(PYTHONHOME=conda_env_dir) if self.working_dir: override_environment_variables.update( RAY_RUNTIME_ENV_FILES=self.working_dir) return override_environment_variables
def _remote(self, args=None, kwargs=None, num_cpus=None, num_gpus=None, memory=None, object_store_memory=None, resources=None, accelerator_type=None, max_concurrency=None, max_restarts=None, max_task_retries=None, name=None, lifetime=None, placement_group=None, placement_group_bundle_index=-1, placement_group_capture_child_tasks=None, runtime_env=None, override_environment_variables=None): """Create an actor. This method allows more flexibility than the remote method because resource requirements can be specified and override the defaults in the decorator. Args: args: The arguments to forward to the actor constructor. kwargs: The keyword arguments to forward to the actor constructor. num_cpus: The number of CPUs required by the actor creation task. num_gpus: The number of GPUs required by the actor creation task. memory: Restrict the heap memory usage of this actor. object_store_memory: Restrict the object store memory used by this actor when creating objects. resources: The custom resources required by the actor creation task. max_concurrency: The max number of concurrent calls to allow for this actor. This only works with direct actor calls. The max concurrency defaults to 1 for threaded execution, and 1000 for asyncio execution. Note that the execution order is not guaranteed when max_concurrency > 1. name: The globally unique name for the actor, which can be used to retrieve the actor via ray.get_actor(name) as long as the actor is still alive. lifetime: Either `None`, which defaults to the actor will fate share with its creator and will be deleted once its refcount drops to zero, or "detached", which means the actor will live as a global object independent of the creator. placement_group: the placement group this actor belongs to, or None if it doesn't belong to any group. placement_group_bundle_index: the index of the bundle if the actor belongs to a placement group, which may be -1 to specify any available bundle. placement_group_capture_child_tasks: Whether or not children tasks of this actor should implicitly use the same placement group as its parent. It is True by default. runtime_env (Dict[str, Any]): Specifies the runtime environment for this actor or task and its children. Currently supports the key "conda_env", whose value should be a string which is the name of the desired conda environment. override_environment_variables: Environment variables to override and/or introduce for this actor. This is a dictionary mapping variable names to their values. Returns: A handle to the newly created actor. """ if args is None: args = [] if kwargs is None: kwargs = {} meta = self.__ray_metadata__ actor_has_async_methods = len( inspect.getmembers(meta.modified_class, predicate=inspect.iscoroutinefunction)) > 0 is_asyncio = actor_has_async_methods if max_concurrency is None: if is_asyncio: max_concurrency = 1000 else: max_concurrency = 1 if max_concurrency < 1: raise ValueError("max_concurrency must be >= 1") if client_mode_should_convert(): return client_mode_convert_actor( self, args, kwargs, num_cpus=num_cpus, num_gpus=num_gpus, memory=memory, object_store_memory=object_store_memory, resources=resources, accelerator_type=accelerator_type, max_concurrency=max_concurrency, max_restarts=max_restarts, max_task_retries=max_task_retries, name=name, lifetime=lifetime, placement_group=placement_group, placement_group_bundle_index=placement_group_bundle_index, placement_group_capture_child_tasks=( placement_group_capture_child_tasks), runtime_env=runtime_env, override_environment_variables=( override_environment_variables)) worker = ray.worker.global_worker worker.check_connected() if name is not None: if not isinstance(name, str): raise TypeError( f"name must be None or a string, got: '{type(name)}'.") if name == "": raise ValueError("Actor name cannot be an empty string.") # Check whether the name is already taken. # TODO(edoakes): this check has a race condition because two drivers # could pass the check and then create the same named actor. We should # instead check this when we create the actor, but that's currently an # async call. if name is not None: try: ray.get_actor(name) except ValueError: # Name is not taken. pass else: raise ValueError( f"The name {name} is already taken. Please use " "a different name or get the existing actor using " f"ray.get_actor('{name}')") if lifetime is None: detached = False elif lifetime == "detached": detached = True else: raise ValueError( "actor `lifetime` argument must be either `None` or 'detached'" ) if placement_group_capture_child_tasks is None: placement_group_capture_child_tasks = ( worker.should_capture_child_tasks_in_placement_group) if placement_group is None: if placement_group_capture_child_tasks: placement_group = get_current_placement_group() if not placement_group: placement_group = PlacementGroup.empty() check_placement_group_index(placement_group, placement_group_bundle_index) # Set the actor's default resources if not already set. First three # conditions are to check that no resources were specified in the # decorator. Last three conditions are to check that no resources were # specified when _remote() was called. if (meta.num_cpus is None and meta.num_gpus is None and meta.resources is None and meta.accelerator_type is None and num_cpus is None and num_gpus is None and resources is None and accelerator_type is None): # In the default case, actors acquire no resources for # their lifetime, and actor methods will require 1 CPU. cpus_to_use = ray_constants.DEFAULT_ACTOR_CREATION_CPU_SIMPLE actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SIMPLE else: # If any resources are specified (here or in decorator), then # all resources are acquired for the actor's lifetime and no # resources are associated with methods. cpus_to_use = (ray_constants.DEFAULT_ACTOR_CREATION_CPU_SPECIFIED if meta.num_cpus is None else meta.num_cpus) actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SPECIFIED # LOCAL_MODE cannot handle cross_language if worker.mode == ray.LOCAL_MODE: assert not meta.is_cross_language, \ "Cross language ActorClass cannot be executed locally." # Export the actor. if not meta.is_cross_language and (meta.last_export_session_and_job != worker.current_session_and_job): # If this actor class was not exported in this session and job, # we need to export this function again, because current GCS # doesn't have it. meta.last_export_session_and_job = (worker.current_session_and_job) # After serialize / deserialize modified class, the __module__ # of modified class will be ray.cloudpickle.cloudpickle. # So, here pass actor_creation_function_descriptor to make # sure export actor class correct. worker.function_actor_manager.export_actor_class( meta.modified_class, meta.actor_creation_function_descriptor, meta.method_meta.methods.keys()) resources = ray._private.utils.resources_from_resource_arguments( cpus_to_use, meta.num_gpus, meta.memory, meta.object_store_memory, meta.resources, meta.accelerator_type, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type) # If the actor methods require CPU resources, then set the required # placement resources. If actor_placement_resources is empty, then # the required placement resources will be the same as resources. actor_placement_resources = {} assert actor_method_cpu in [0, 1] if actor_method_cpu == 1: actor_placement_resources = resources.copy() actor_placement_resources["CPU"] += 1 if meta.is_cross_language: creation_args = cross_language.format_args(worker, args, kwargs) else: function_signature = meta.method_meta.signatures["__init__"] creation_args = signature.flatten_args(function_signature, args, kwargs) if runtime_env: conda_env = runtime_env.get("conda_env") if conda_env is not None: conda_env_dir = get_conda_env_dir(conda_env) if override_environment_variables is None: override_environment_variables = {} override_environment_variables.update(PYTHONHOME=conda_env_dir) actor_id = worker.core_worker.create_actor( meta.language, meta.actor_creation_function_descriptor, creation_args, max_restarts or meta.max_restarts, max_task_retries or meta.max_task_retries, resources, actor_placement_resources, max_concurrency, detached, name if name is not None else "", is_asyncio, placement_group.id, placement_group_bundle_index, placement_group_capture_child_tasks, # Store actor_method_cpu in actor handle's extension data. extension_data=str(actor_method_cpu), override_environment_variables=override_environment_variables or dict()) actor_handle = ActorHandle(meta.language, actor_id, meta.method_meta.decorators, meta.method_meta.signatures, meta.method_meta.num_returns, actor_method_cpu, meta.actor_creation_function_descriptor, worker.current_session_and_job, original_handle=True) return actor_handle
def _remote(self, args=None, kwargs=None, num_returns=None, num_cpus=None, num_gpus=None, memory=None, object_store_memory=None, accelerator_type=None, resources=None, max_retries=None, placement_group=None, placement_group_bundle_index=-1, placement_group_capture_child_tasks=None, runtime_env=None, override_environment_variables=None, name=""): """Submit the remote function for execution.""" if client_mode_should_convert(): return client_mode_convert_function( self, args, kwargs, num_returns=num_returns, num_cpus=num_cpus, num_gpus=num_gpus, memory=memory, object_store_memory=object_store_memory, accelerator_type=accelerator_type, resources=resources, max_retries=max_retries, placement_group=placement_group, placement_group_bundle_index=placement_group_bundle_index, placement_group_capture_child_tasks=( placement_group_capture_child_tasks), runtime_env=runtime_env, override_environment_variables=override_environment_variables, name=name) worker = ray.worker.global_worker worker.check_connected() # If this function was not exported in this session and job, we need to # export this function again, because the current GCS doesn't have it. if not self._is_cross_language and \ self._last_export_session_and_job != \ worker.current_session_and_job: # There is an interesting question here. If the remote function is # used by a subsequent driver (in the same script), should the # second driver pickle the function again? If yes, then the remote # function definition can differ in the second driver (e.g., if # variables in its closure have changed). We probably want the # behavior of the remote function in the second driver to be # independent of whether or not the function was invoked by the # first driver. This is an argument for repickling the function, # which we do here. self._pickled_function = pickle.dumps(self._function) self._function_descriptor = PythonFunctionDescriptor.from_function( self._function, self._pickled_function) self._last_export_session_and_job = worker.current_session_and_job worker.function_actor_manager.export(self) kwargs = {} if kwargs is None else kwargs args = [] if args is None else args if num_returns is None: num_returns = self._num_returns if max_retries is None: max_retries = self._max_retries if placement_group_capture_child_tasks is None: placement_group_capture_child_tasks = ( worker.should_capture_child_tasks_in_placement_group) if placement_group is None: if placement_group_capture_child_tasks: placement_group = get_current_placement_group() if not placement_group: placement_group = PlacementGroup.empty() check_placement_group_index(placement_group, placement_group_bundle_index) resources = ray._private.utils.resources_from_resource_arguments( self._num_cpus, self._num_gpus, self._memory, self._object_store_memory, self._resources, self._accelerator_type, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type) if runtime_env: conda_env = runtime_env.get("conda_env") if conda_env is not None: conda_env_dir = get_conda_env_dir(conda_env) if override_environment_variables is None: override_environment_variables = {} override_environment_variables.update(PYTHONHOME=conda_env_dir) def invocation(args, kwargs): if self._is_cross_language: list_args = cross_language.format_args(worker, args, kwargs) elif not args and not kwargs and not self._function_signature: list_args = [] else: list_args = ray._private.signature.flatten_args( self._function_signature, args, kwargs) if worker.mode == ray.worker.LOCAL_MODE: assert not self._is_cross_language, \ "Cross language remote function " \ "cannot be executed locally." object_refs = worker.core_worker.submit_task( self._language, self._function_descriptor, list_args, name, num_returns, resources, max_retries, placement_group.id, placement_group_bundle_index, placement_group_capture_child_tasks, worker.debugger_breakpoint, override_environment_variables=override_environment_variables or dict()) # Reset worker's debug context from the last "remote" command # (which applies only to this .remote call). worker.debugger_breakpoint = b"" if len(object_refs) == 1: return object_refs[0] elif len(object_refs) > 1: return object_refs if self._decorator is not None: invocation = self._decorator(invocation) return invocation(args, kwargs)