def test_ray_commit_injection(self, is_task_or_actor): # Should not be injected if no pip and conda. result = ParsedRuntimeEnv( { "env_vars": { "hi": "hi" } }, is_task_or_actor=is_task_or_actor) assert "_ray_commit" not in result # Should be injected if pip or conda present. result = ParsedRuntimeEnv( { "pip": ["requests"], }, is_task_or_actor=is_task_or_actor) assert "_ray_commit" in result result = ParsedRuntimeEnv( { "conda": "env_name" }, is_task_or_actor=is_task_or_actor) assert "_ray_commit" in result # Should not override if passed. result = ParsedRuntimeEnv( { "conda": "env_name", "_ray_commit": "Blah" }, is_task_or_actor=is_task_or_actor) assert result["_ray_commit"] == "Blah"
def test_serialization(self, is_task_or_actor): env1 = ParsedRuntimeEnv( { "pip": ["requests"], "env_vars": { "hi1": "hi1", "hi2": "hi2" } }, is_task_or_actor=is_task_or_actor) env2 = ParsedRuntimeEnv( { "env_vars": { "hi2": "hi2", "hi1": "hi1" }, "pip": ["requests"] }, is_task_or_actor=is_task_or_actor) assert env1 == env2 serialized_env1 = env1.serialize() serialized_env2 = env2.serialize() # Key ordering shouldn't matter. assert serialized_env1 == serialized_env2 deserialized_env1 = ParsedRuntimeEnv.deserialize(serialized_env1) deserialized_env2 = ParsedRuntimeEnv.deserialize(serialized_env2) assert env1 == deserialized_env1 == env2 == deserialized_env2
def _ray_from_function_descriptor(cls, language, actor_creation_function_descriptor, max_restarts, max_task_retries, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type, runtime_env): self = ActorClass.__new__(ActorClass) # Parse local pip/conda config files here. If we instead did it in # .remote(), it would get run in the Ray Client server, which runs on # a remote node where the files aren't available. if runtime_env: if isinstance(runtime_env, str): new_runtime_env = runtime_env else: new_runtime_env = ParsedRuntimeEnv(runtime_env).serialize() else: new_runtime_env = None self.__ray_metadata__ = ActorClassMetadata( language, None, actor_creation_function_descriptor, None, max_restarts, max_task_retries, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type, new_runtime_env, []) return self
def test_inject_current_ray(self): # Should not be injected if not provided by env var. result = ParsedRuntimeEnv({"env_vars": {"hi": "hi"}}) assert "_inject_current_ray" not in result os.environ["RAY_RUNTIME_ENV_LOCAL_DEV_MODE"] = "1" # Should be injected if provided by env var. result = ParsedRuntimeEnv({}) assert result["_inject_current_ray"] # Should be preserved if passed. result = ParsedRuntimeEnv({"_inject_current_ray": False}) assert not result["_inject_current_ray"] del os.environ["RAY_RUNTIME_ENV_LOCAL_DEV_MODE"]
def set_runtime_env(self, runtime_env: Optional[Dict[str, Any]]) -> None: # TODO(edoakes): this is really unfortunate, but JobConfig is imported # all over the place so this causes circular imports. We should remove # this dependency and pass in a validated runtime_env instead. from ray._private.runtime_env.validation import ParsedRuntimeEnv self._parsed_runtime_env = ParsedRuntimeEnv(runtime_env or {}) self.runtime_env = runtime_env or dict() eager_install = True if runtime_env and "eager_install" in runtime_env: eager_install = runtime_env["eager_install"] self.runtime_env_eager_install = eager_install assert isinstance(self.runtime_env_eager_install, bool), \ f"The type of eager_install is incorrect: " \ f"{type(self.runtime_env_eager_install)}" \ f", the bool type is needed." self._cached_pb = None
def test_get_conda_dict_with_ray_inserted_m1_wheel(monkeypatch): # Disable dev mode to prevent Ray dependencies being automatically inserted # into the conda dict. if os.environ.get("RAY_RUNTIME_ENV_LOCAL_DEV_MODE") is not None: monkeypatch.delenv("RAY_RUNTIME_ENV_LOCAL_DEV_MODE") if os.environ.get("RAY_CI_POST_WHEEL_TESTS") is not None: monkeypatch.delenv("RAY_CI_POST_WHEEL_TESTS") monkeypatch.setattr(ray, "__version__", "1.9.0") monkeypatch.setattr(ray, "__commit__", "92599d9127e228fe8d0a2d94ca75754ec21c4ae4") monkeypatch.setattr(sys, "version_info", (3, 9, 7, "final", 0)) # Simulate running on an M1 Mac. monkeypatch.setattr(sys, "platform", "darwin") monkeypatch.setattr(platform, "machine", lambda: "arm64") input_conda = {"dependencies": ["blah", "pip", {"pip": ["pip_pkg"]}]} runtime_env = RuntimeEnv(ParsedRuntimeEnv({"conda": input_conda}).serialize()) output_conda = _get_conda_dict_with_ray_inserted(runtime_env) # M1 wheels are not uploaded to AWS S3. So rather than have an S3 URL # inserted as a dependency, we should just have the string "ray==1.9.0". assert output_conda == { "dependencies": [ "blah", "pip", {"pip": ["ray==1.9.0", "ray[default]", "pip_pkg"]}, "python=3.9.7", ] }
async def get_job_info(self): """Return info for each job. Here a job is a Ray driver.""" request = gcs_service_pb2.GetAllJobInfoRequest() reply = await self._gcs_job_info_stub.GetAllJobInfo(request, timeout=5) jobs = {} for job_table_entry in reply.job_info_list: job_id = job_table_entry.job_id.hex() metadata = dict(job_table_entry.config.metadata) config = { "namespace": job_table_entry.config.ray_namespace, "metadata": metadata, "runtime_env": ParsedRuntimeEnv.deserialize( job_table_entry.config.runtime_env_info. serialized_runtime_env), } data = self._get_job_data(metadata) entry = { "status": None if data is None else data.status, "status_message": None if data is None else data.message, "is_dead": job_table_entry.is_dead, "start_time": job_table_entry.start_time, "end_time": job_table_entry.end_time, "config": config, } jobs[job_id] = entry return jobs
def __init__(self, language, function, function_descriptor, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type, num_returns, max_calls, max_retries, retry_exceptions, runtime_env, placement_group): if inspect.iscoroutinefunction(function): raise ValueError("'async def' should not be used for remote " "tasks. You can wrap the async function with " "`asyncio.get_event_loop.run_until(f())`. " "See more at docs.ray.io/async_api.html") self._language = language self._function = _inject_tracing_into_function(function) self._function_name = (function.__module__ + "." + function.__name__) self._function_descriptor = function_descriptor self._is_cross_language = language != Language.PYTHON self._num_cpus = (DEFAULT_REMOTE_FUNCTION_CPUS if num_cpus is None else num_cpus) self._num_gpus = num_gpus self._memory = memory if object_store_memory is not None: raise NotImplementedError( "setting object_store_memory is not implemented for tasks") self._object_store_memory = None self._resources = resources self._accelerator_type = accelerator_type self._num_returns = (DEFAULT_REMOTE_FUNCTION_NUM_RETURN_VALS if num_returns is None else num_returns) self._max_calls = (DEFAULT_REMOTE_FUNCTION_MAX_CALLS if max_calls is None else max_calls) self._max_retries = (DEFAULT_REMOTE_FUNCTION_NUM_TASK_RETRIES if max_retries is None else max_retries) self._retry_exceptions = (DEFAULT_REMOTE_FUNCTION_RETRY_EXCEPTIONS if retry_exceptions is None else retry_exceptions) # Parse local pip/conda config files here. If we instead did it in # .remote(), it would get run in the Ray Client server, which runs on # a remote node where the files aren't available. if runtime_env: if isinstance(runtime_env, str): self._runtime_env = runtime_env else: self._runtime_env = ParsedRuntimeEnv(runtime_env or {}).serialize() else: self._runtime_env = None self._placement_group = placement_group self._decorator = getattr(function, "__ray_invocation_decorator__", None) self._function_signature = ray._private.signature.extract_signature( self._function) self._last_export_session_and_job = None self._uuid = uuid.uuid4() # Override task.remote's signature and docstring @wraps(function) def _remote_proxy(*args, **kwargs): return self._remote(args=args, kwargs=kwargs) self.remote = _remote_proxy
def test_reject_pip_and_conda(self, is_task_or_actor): with pytest.raises(ValueError): ParsedRuntimeEnv( { "pip": ["requests"], "conda": "env_name" }, is_task_or_actor=is_task_or_actor)
def runtime_env(self): """Get the runtime env dict used for the current driver or worker. Returns: The runtime env dict currently using by this worker. """ return ParsedRuntimeEnv.deserialize(self.get_runtime_env_string())
def _validate_runtime_env(self): # TODO(edoakes): this is really unfortunate, but JobConfig is imported # all over the place so this causes circular imports. We should remove # this dependency and pass in a validated runtime_env instead. from ray._private.runtime_env.validation import ParsedRuntimeEnv eager_install = self.runtime_env.get("eager_install", True) if not isinstance(eager_install, bool): raise TypeError("eager_install must be a boolean.") return ParsedRuntimeEnv(self.runtime_env), eager_install
def _ray_from_modified_class(cls, modified_class, class_id, max_restarts, max_task_retries, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type, runtime_env, concurrency_groups): for attribute in [ "remote", "_remote", "_ray_from_modified_class", "_ray_from_function_descriptor", ]: if hasattr(modified_class, attribute): logger.warning("Creating an actor from class " f"{modified_class.__name__} overwrites " f"attribute {attribute} of that class") # Make sure the actor class we are constructing inherits from the # original class so it retains all class properties. class DerivedActorClass(cls, modified_class): pass name = f"ActorClass({modified_class.__name__})" DerivedActorClass.__module__ = modified_class.__module__ DerivedActorClass.__name__ = name DerivedActorClass.__qualname__ = name # Construct the base object. self = DerivedActorClass.__new__(DerivedActorClass) # Actor creation function descriptor. actor_creation_function_descriptor = \ PythonFunctionDescriptor.from_class( modified_class.__ray_actor_class__) # Parse local pip/conda config files here. If we instead did it in # .remote(), it would get run in the Ray Client server, which runs on # a remote node where the files aren't available. if runtime_env: if isinstance(runtime_env, str): new_runtime_env = runtime_env else: new_runtime_env = ParsedRuntimeEnv(runtime_env).serialize() else: new_runtime_env = None self.__ray_metadata__ = ActorClassMetadata( Language.PYTHON, modified_class, actor_creation_function_descriptor, class_id, max_restarts, max_task_retries, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type, new_runtime_env, concurrency_groups) return self
def run_setup_with_logger(): runtime_env: dict = ParsedRuntimeEnv.deserialize( serialized_runtime_env or "{}") allocated_resource: dict = json.loads( serialized_allocated_resource_instances or "{}") # Use a separate logger for each job. per_job_logger = self.get_or_create_logger(request.job_id) # TODO(chenk008): Add log about allocated_resource to # avoid lint error. That will be moved to cgroup plugin. per_job_logger.debug(f"Worker has resource :" f"{allocated_resource}") context = RuntimeEnvContext( env_vars=runtime_env.get("env_vars")) self._conda_manager.setup(runtime_env, context, logger=per_job_logger) self._py_modules_manager.setup(runtime_env, context, logger=per_job_logger) self._working_dir_manager.setup(runtime_env, context, logger=per_job_logger) self._container_manager.setup(runtime_env, context, logger=per_job_logger) # Add the mapping of URIs -> the serialized environment to be # used for cache invalidation. for plugin_uri in runtime_env.get_uris(): self._uris_to_envs[plugin_uri].add(serialized_runtime_env) # Run setup function from all the plugins for plugin_class_path in runtime_env.get("plugins", {}).keys(): plugin_class = import_attr(plugin_class_path) # TODO(simon): implement uri support plugin_class.create("uri not implemented", runtime_env, context) plugin_class.modify_context("uri not implemented", runtime_env, context) return context
def test_validate_excludes_empty_list(self): assert ParsedRuntimeEnv({"excludes": []}) == {}
def _remote(self, args=None, kwargs=None, num_returns=None, num_cpus=None, num_gpus=None, memory=None, object_store_memory=None, accelerator_type=None, resources=None, max_retries=None, retry_exceptions=None, placement_group="default", placement_group_bundle_index=-1, placement_group_capture_child_tasks=None, runtime_env=None, name=""): """Submit the remote function for execution.""" if client_mode_should_convert(auto_init=True): return client_mode_convert_function( self, args, kwargs, num_returns=num_returns, num_cpus=num_cpus, num_gpus=num_gpus, memory=memory, object_store_memory=object_store_memory, accelerator_type=accelerator_type, resources=resources, max_retries=max_retries, retry_exceptions=retry_exceptions, placement_group=placement_group, placement_group_bundle_index=placement_group_bundle_index, placement_group_capture_child_tasks=( placement_group_capture_child_tasks), runtime_env=runtime_env, name=name) worker = ray.worker.global_worker worker.check_connected() # If this function was not exported in this session and job, we need to # export this function again, because the current GCS doesn't have it. if not self._is_cross_language and \ self._last_export_session_and_job != \ worker.current_session_and_job: # There is an interesting question here. If the remote function is # used by a subsequent driver (in the same script), should the # second driver pickle the function again? If yes, then the remote # function definition can differ in the second driver (e.g., if # variables in its closure have changed). We probably want the # behavior of the remote function in the second driver to be # independent of whether or not the function was invoked by the # first driver. This is an argument for repickling the function, # which we do here. self._pickled_function = pickle.dumps(self._function) self._function_descriptor = PythonFunctionDescriptor.from_function( self._function, self._uuid) self._last_export_session_and_job = worker.current_session_and_job worker.function_actor_manager.export(self) kwargs = {} if kwargs is None else kwargs args = [] if args is None else args if num_returns is None: num_returns = self._num_returns if max_retries is None: max_retries = self._max_retries if retry_exceptions is None: retry_exceptions = self._retry_exceptions if placement_group_capture_child_tasks is None: placement_group_capture_child_tasks = ( worker.should_capture_child_tasks_in_placement_group) if self._placement_group != "default": if self._placement_group: placement_group = self._placement_group else: placement_group = PlacementGroup.empty() elif placement_group == "default": if placement_group_capture_child_tasks: placement_group = get_current_placement_group() else: placement_group = PlacementGroup.empty() if not placement_group: placement_group = PlacementGroup.empty() check_placement_group_index(placement_group, placement_group_bundle_index) resources = ray._private.utils.resources_from_resource_arguments( self._num_cpus, self._num_gpus, self._memory, self._object_store_memory, self._resources, self._accelerator_type, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type) if runtime_env and not isinstance(runtime_env, ParsedRuntimeEnv): runtime_env = ParsedRuntimeEnv(runtime_env) elif isinstance(runtime_env, ParsedRuntimeEnv): pass else: runtime_env = self._runtime_env parent_runtime_env = worker.core_worker.get_current_runtime_env() parsed_runtime_env = override_task_or_actor_runtime_env( runtime_env, parent_runtime_env) def invocation(args, kwargs): if self._is_cross_language: list_args = cross_language.format_args(worker, args, kwargs) elif not args and not kwargs and not self._function_signature: list_args = [] else: list_args = ray._private.signature.flatten_args( self._function_signature, args, kwargs) if worker.mode == ray.worker.LOCAL_MODE: assert not self._is_cross_language, \ "Cross language remote function " \ "cannot be executed locally." object_refs = worker.core_worker.submit_task( self._language, self._function_descriptor, list_args, name, num_returns, resources, max_retries, retry_exceptions, placement_group.id, placement_group_bundle_index, placement_group_capture_child_tasks, worker.debugger_breakpoint, parsed_runtime_env.serialize(), parsed_runtime_env.get("uris") or []) # Reset worker's debug context from the last "remote" command # (which applies only to this .remote call). worker.debugger_breakpoint = b"" if len(object_refs) == 1: return object_refs[0] elif len(object_refs) > 1: return object_refs if self._decorator is not None: invocation = self._decorator(invocation) return invocation(args, kwargs)
class JobConfig: """A class used to store the configurations of a job. Attributes: num_java_workers_per_process (int): The number of java workers per worker process. jvm_options (str[]): The jvm options for java workers of the job. code_search_path (list): A list of directories or jar files that specify the search path for user code. This will be used as `CLASSPATH` in Java and `PYTHONPATH` in Python. runtime_env (dict): A runtime environment dictionary (see ``runtime_env.py`` for detailed documentation). client_job (bool): A boolean represent the source of the job. """ def __init__(self, num_java_workers_per_process=1, jvm_options=None, code_search_path=None, runtime_env=None, client_job=False, metadata=None, ray_namespace=None): self.num_java_workers_per_process = num_java_workers_per_process self.jvm_options = jvm_options or [] self.code_search_path = code_search_path or [] # It's difficult to find the error that caused by the # code_search_path is a string. So we assert here. assert isinstance(self.code_search_path, (list, tuple)), \ f"The type of code search path is incorrect: " \ f"{type(code_search_path)}" self.client_job = client_job self.metadata = metadata or {} self.ray_namespace = ray_namespace self.set_runtime_env(runtime_env) def set_metadata(self, key: str, value: str) -> None: self.metadata[key] = value def serialize(self): """Serialize the struct into protobuf string""" return self.get_proto_job_config().SerializeToString() def set_runtime_env(self, runtime_env: Optional[Dict[str, Any]]) -> None: # TODO(edoakes): this is really unfortunate, but JobConfig is imported # all over the place so this causes circular imports. We should remove # this dependency and pass in a validated runtime_env instead. from ray._private.runtime_env.validation import ParsedRuntimeEnv self._parsed_runtime_env = ParsedRuntimeEnv(runtime_env or {}) self.runtime_env = runtime_env or dict() eager_install = True if runtime_env and "eager_install" in runtime_env: eager_install = runtime_env["eager_install"] self.runtime_env_eager_install = eager_install assert isinstance(self.runtime_env_eager_install, bool), \ f"The type of eager_install is incorrect: " \ f"{type(self.runtime_env_eager_install)}" \ f", the bool type is needed." self._cached_pb = None def set_ray_namespace(self, ray_namespace: str) -> None: if ray_namespace != self.ray_namespace: self.ray_namespace = ray_namespace self._cached_pb = None def get_proto_job_config(self): """Return the prototype structure of JobConfig""" if self._cached_pb is None: self._cached_pb = gcs_utils.JobConfig() if self.ray_namespace is None: self._cached_pb.ray_namespace = str(uuid.uuid4()) else: self._cached_pb.ray_namespace = self.ray_namespace self._cached_pb.num_java_workers_per_process = ( self.num_java_workers_per_process) self._cached_pb.jvm_options.extend(self.jvm_options) self._cached_pb.code_search_path.extend(self.code_search_path) self._cached_pb.runtime_env.uris[:] = self.get_runtime_env_uris() serialized_env = self.get_serialized_runtime_env() self._cached_pb.runtime_env.serialized_runtime_env = serialized_env for k, v in self.metadata.items(): self._cached_pb.metadata[k] = v self._cached_pb.runtime_env.runtime_env_eager_install = \ self.runtime_env_eager_install return self._cached_pb def get_runtime_env_uris(self): """Get the uris of runtime environment""" return self._parsed_runtime_env.get("uris") or [] def get_serialized_runtime_env(self) -> str: """Return the JSON-serialized parsed runtime env dict""" return self._parsed_runtime_env.serialize() def set_runtime_env_uris(self, uris): self.runtime_env["uris"] = uris self._parsed_runtime_env["uris"] = uris
def test_empty(self): assert ParsedRuntimeEnv({}) == {}
def _remote(self, args=None, kwargs=None, num_cpus=None, num_gpus=None, memory=None, object_store_memory=None, resources=None, accelerator_type=None, max_concurrency=None, max_restarts=None, max_task_retries=None, name=None, namespace=None, lifetime=None, placement_group="default", placement_group_bundle_index=-1, placement_group_capture_child_tasks=None, runtime_env=None): """Create an actor. This method allows more flexibility than the remote method because resource requirements can be specified and override the defaults in the decorator. Args: args: The arguments to forward to the actor constructor. kwargs: The keyword arguments to forward to the actor constructor. num_cpus: The number of CPUs required by the actor creation task. num_gpus: The number of GPUs required by the actor creation task. memory: Restrict the heap memory usage of this actor. object_store_memory: Restrict the object store memory used by this actor when creating objects. resources: The custom resources required by the actor creation task. max_concurrency: The max number of concurrent calls to allow for this actor. This only works with direct actor calls. The max concurrency defaults to 1 for threaded execution, and 1000 for asyncio execution. Note that the execution order is not guaranteed when max_concurrency > 1. name: The globally unique name for the actor, which can be used to retrieve the actor via ray.get_actor(name) as long as the actor is still alive. namespace: Override the namespace to use for the actor. By default, actors are created in an anonymous namespace. The actor can be retrieved via ray.get_actor(name=name, namespace=namespace). lifetime: Either `None`, which defaults to the actor will fate share with its creator and will be deleted once its refcount drops to zero, or "detached", which means the actor will live as a global object independent of the creator. placement_group: the placement group this actor belongs to, or None if it doesn't belong to any group. Setting to "default" autodetects the placement group based on the current setting of placement_group_capture_child_tasks. placement_group_bundle_index: the index of the bundle if the actor belongs to a placement group, which may be -1 to specify any available bundle. placement_group_capture_child_tasks: Whether or not children tasks of this actor should implicitly use the same placement group as its parent. It is True by default. runtime_env (Dict[str, Any]): Specifies the runtime environment for this actor or task and its children (see :ref:`runtime-environments` for details). This API is in beta and may change before becoming stable. Returns: A handle to the newly created actor. """ if args is None: args = [] if kwargs is None: kwargs = {} meta = self.__ray_metadata__ actor_has_async_methods = len( inspect.getmembers(meta.modified_class, predicate=inspect.iscoroutinefunction)) > 0 is_asyncio = actor_has_async_methods if max_concurrency is None: if is_asyncio: max_concurrency = 1000 else: max_concurrency = 1 if max_concurrency < 1: raise ValueError("max_concurrency must be >= 1") if client_mode_should_convert(auto_init=True): return client_mode_convert_actor( self, args, kwargs, num_cpus=num_cpus, num_gpus=num_gpus, memory=memory, object_store_memory=object_store_memory, resources=resources, accelerator_type=accelerator_type, max_concurrency=max_concurrency, max_restarts=max_restarts, max_task_retries=max_task_retries, name=name, namespace=namespace, lifetime=lifetime, placement_group=placement_group, placement_group_bundle_index=placement_group_bundle_index, placement_group_capture_child_tasks=( placement_group_capture_child_tasks), runtime_env=runtime_env) worker = ray.worker.global_worker worker.check_connected() if name is not None: if not isinstance(name, str): raise TypeError( f"name must be None or a string, got: '{type(name)}'.") elif name == "": raise ValueError("Actor name cannot be an empty string.") if namespace is not None: ray._private.utils.validate_namespace(namespace) # Check whether the name is already taken. # TODO(edoakes): this check has a race condition because two drivers # could pass the check and then create the same named actor. We should # instead check this when we create the actor, but that's currently an # async call. if name is not None: try: ray.get_actor(name, namespace=namespace) except ValueError: # Name is not taken. pass else: raise ValueError( f"The name {name} (namespace={namespace}) is already " "taken. Please use " "a different name or get the existing actor using " f"ray.get_actor('{name}', namespace='{namespace}')") if lifetime is None: detached = False elif lifetime == "detached": detached = True else: raise ValueError( "actor `lifetime` argument must be either `None` or 'detached'" ) # Set the actor's default resources if not already set. First three # conditions are to check that no resources were specified in the # decorator. Last three conditions are to check that no resources were # specified when _remote() was called. if (meta.num_cpus is None and meta.num_gpus is None and meta.resources is None and meta.accelerator_type is None and num_cpus is None and num_gpus is None and resources is None and accelerator_type is None): # In the default case, actors acquire no resources for # their lifetime, and actor methods will require 1 CPU. cpus_to_use = ray_constants.DEFAULT_ACTOR_CREATION_CPU_SIMPLE actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SIMPLE else: # If any resources are specified (here or in decorator), then # all resources are acquired for the actor's lifetime and no # resources are associated with methods. cpus_to_use = (ray_constants.DEFAULT_ACTOR_CREATION_CPU_SPECIFIED if meta.num_cpus is None else meta.num_cpus) actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SPECIFIED # LOCAL_MODE cannot handle cross_language if worker.mode == ray.LOCAL_MODE: assert not meta.is_cross_language, \ "Cross language ActorClass cannot be executed locally." # Export the actor. if not meta.is_cross_language and (meta.last_export_session_and_job != worker.current_session_and_job): # If this actor class was not exported in this session and job, # we need to export this function again, because current GCS # doesn't have it. meta.last_export_session_and_job = (worker.current_session_and_job) # After serialize / deserialize modified class, the __module__ # of modified class will be ray.cloudpickle.cloudpickle. # So, here pass actor_creation_function_descriptor to make # sure export actor class correct. worker.function_actor_manager.export_actor_class( meta.modified_class, meta.actor_creation_function_descriptor, meta.method_meta.methods.keys()) resources = ray._private.utils.resources_from_resource_arguments( cpus_to_use, meta.num_gpus, meta.memory, meta.object_store_memory, meta.resources, meta.accelerator_type, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type) # If the actor methods require CPU resources, then set the required # placement resources. If actor_placement_resources is empty, then # the required placement resources will be the same as resources. actor_placement_resources = {} assert actor_method_cpu in [0, 1] if actor_method_cpu == 1: actor_placement_resources = resources.copy() actor_placement_resources["CPU"] += 1 if meta.is_cross_language: creation_args = cross_language.format_args(worker, args, kwargs) else: function_signature = meta.method_meta.signatures["__init__"] creation_args = signature.flatten_args(function_signature, args, kwargs) if placement_group_capture_child_tasks is None: placement_group_capture_child_tasks = ( worker.should_capture_child_tasks_in_placement_group) placement_group = configure_placement_group_based_on_context( placement_group_capture_child_tasks, placement_group_bundle_index, resources, actor_placement_resources, meta.class_name, placement_group=placement_group) if runtime_env and not isinstance(runtime_env, ParsedRuntimeEnv): runtime_env = ParsedRuntimeEnv(runtime_env) elif isinstance(runtime_env, ParsedRuntimeEnv): pass else: runtime_env = meta.runtime_env parent_runtime_env = worker.core_worker.get_current_runtime_env() parsed_runtime_env = override_task_or_actor_runtime_env( runtime_env, parent_runtime_env) concurrency_groups_dict = {} for cg_name in meta.concurrency_groups: concurrency_groups_dict[cg_name] = { "name": cg_name, "max_concurrency": meta.concurrency_groups[cg_name], "function_descriptors": [], } # Update methods for method_name in meta.method_meta.concurrency_group_for_methods: cg_name = meta.method_meta.concurrency_group_for_methods[ method_name] assert cg_name in concurrency_groups_dict module_name = meta.actor_creation_function_descriptor.module_name class_name = meta.actor_creation_function_descriptor.class_name concurrency_groups_dict[cg_name]["function_descriptors"].append( PythonFunctionDescriptor(module_name, method_name, class_name)) actor_id = worker.core_worker.create_actor( meta.language, meta.actor_creation_function_descriptor, creation_args, max_restarts or meta.max_restarts, max_task_retries or meta.max_task_retries, resources, actor_placement_resources, max_concurrency, detached, name if name is not None else "", namespace if namespace is not None else "", is_asyncio, placement_group.id, placement_group_bundle_index, placement_group_capture_child_tasks, # Store actor_method_cpu in actor handle's extension data. extension_data=str(actor_method_cpu), serialized_runtime_env=parsed_runtime_env.serialize(), runtime_env_uris=parsed_runtime_env.get_uris(), concurrency_groups_dict=concurrency_groups_dict or dict()) actor_handle = ActorHandle(meta.language, actor_id, meta.method_meta.decorators, meta.method_meta.signatures, meta.method_meta.num_returns, actor_method_cpu, meta.actor_creation_function_descriptor, worker.current_session_and_job, original_handle=True) return actor_handle
def test_key_with_value_none(): runtime_env_dict = {"pip": None} parsed_runtime_env = ParsedRuntimeEnv(runtime_env_dict) assert parsed_runtime_env == {}
def options( self, args=None, kwargs=None, num_returns=None, num_cpus=None, num_gpus=None, memory=None, object_store_memory=None, accelerator_type=None, resources=None, max_retries=None, retry_exceptions=None, placement_group="default", placement_group_bundle_index=-1, placement_group_capture_child_tasks=None, runtime_env=None, name="", scheduling_strategy: SchedulingStrategyT = None, ): """Configures and overrides the task invocation parameters. The arguments are the same as those that can be passed to :obj:`ray.remote`. Overriding `max_calls` is not supported. Examples: .. code-block:: python @ray.remote(num_gpus=1, max_calls=1, num_returns=2) def f(): return 1, 2 # Task f will require 2 gpus instead of 1. g = f.options(num_gpus=2) """ func_cls = self # Parse local pip/conda config files here. If we instead did it in # .remote(), it would get run in the Ray Client server, which runs on # a remote node where the files aren't available. if runtime_env: if isinstance(runtime_env, str): # Serialzed protobuf runtime env from Ray client. new_runtime_env = runtime_env else: new_runtime_env = ParsedRuntimeEnv(runtime_env).serialize() else: # Keep the runtime_env as None. In .remote(), we need to know if # runtime_env is None to know whether or not to fall back to the # runtime_env specified in the @ray.remote decorator. new_runtime_env = None class FuncWrapper: def remote(self, *args, **kwargs): return func_cls._remote( args=args, kwargs=kwargs, num_returns=num_returns, num_cpus=num_cpus, num_gpus=num_gpus, memory=memory, object_store_memory=object_store_memory, accelerator_type=accelerator_type, resources=resources, max_retries=max_retries, retry_exceptions=retry_exceptions, placement_group=placement_group, placement_group_bundle_index=placement_group_bundle_index, placement_group_capture_child_tasks=( placement_group_capture_child_tasks), runtime_env=new_runtime_env, name=name, scheduling_strategy=scheduling_strategy, ) return FuncWrapper()
def options(self, args=None, kwargs=None, num_returns=None, num_cpus=None, num_gpus=None, memory=None, object_store_memory=None, accelerator_type=None, resources=None, max_retries=None, retry_exceptions=None, placement_group="default", placement_group_bundle_index=-1, placement_group_capture_child_tasks=None, runtime_env=None, name=""): """Configures and overrides the task invocation parameters. The arguments are the same as those that can be passed to :obj:`ray.remote`. Examples: .. code-block:: python @ray.remote(num_gpus=1, max_calls=1, num_returns=2) def f(): return 1, 2 # Task f will require 2 gpus instead of 1. g = f.options(num_gpus=2, max_calls=None) """ func_cls = self # Parse local pip/conda config files here. If we instead did it in # .remote(), it would get run in the Ray Client server, which runs on # a remote node where the files aren't available. new_runtime_env = ParsedRuntimeEnv(runtime_env or {}, is_task_or_actor=True) class FuncWrapper: def remote(self, *args, **kwargs): return func_cls._remote( args=args, kwargs=kwargs, num_returns=num_returns, num_cpus=num_cpus, num_gpus=num_gpus, memory=memory, object_store_memory=object_store_memory, accelerator_type=accelerator_type, resources=resources, max_retries=max_retries, retry_exceptions=retry_exceptions, placement_group=placement_group, placement_group_bundle_index=placement_group_bundle_index, placement_group_capture_child_tasks=( placement_group_capture_child_tasks), runtime_env=new_runtime_env, name=name) return FuncWrapper()
def options(self, args=None, kwargs=None, num_cpus=None, num_gpus=None, memory=None, object_store_memory=None, resources=None, accelerator_type=None, max_concurrency=None, max_restarts=None, max_task_retries=None, name=None, namespace=None, lifetime=None, placement_group="default", placement_group_bundle_index=-1, placement_group_capture_child_tasks=None, runtime_env=None): """Configures and overrides the actor instantiation parameters. The arguments are the same as those that can be passed to :obj:`ray.remote`. Examples: .. code-block:: python @ray.remote(num_cpus=2, resources={"CustomResource": 1}) class Foo: def method(self): return 1 # Class Foo will require 1 cpu instead of 2. # It will also require no custom resources. Bar = Foo.options(num_cpus=1, resources=None) """ actor_cls = self # Parse local pip/conda config files here. If we instead did it in # .remote(), it would get run in the Ray Client server, which runs on # a remote node where the files aren't available. if runtime_env: if isinstance(runtime_env, str): new_runtime_env = runtime_env else: new_runtime_env = ParsedRuntimeEnv(runtime_env or {}).serialize() else: # Keep the new_runtime_env as None. In .remote(), we need to know # if runtime_env is None to know whether or not to fall back to the # runtime_env specified in the @ray.remote decorator. new_runtime_env = None class ActorOptionWrapper: def remote(self, *args, **kwargs): return actor_cls._remote( args=args, kwargs=kwargs, num_cpus=num_cpus, num_gpus=num_gpus, memory=memory, object_store_memory=object_store_memory, resources=resources, accelerator_type=accelerator_type, max_concurrency=max_concurrency, max_restarts=max_restarts, max_task_retries=max_task_retries, name=name, namespace=namespace, lifetime=lifetime, placement_group=placement_group, placement_group_bundle_index=placement_group_bundle_index, placement_group_capture_child_tasks=( placement_group_capture_child_tasks), runtime_env=new_runtime_env) return ActorOptionWrapper()
def test_empty(self, is_task_or_actor): assert ParsedRuntimeEnv({}, is_task_or_actor=is_task_or_actor) == {}