def _deserialization_helper(self, state, ray_forking): """This is defined in order to make pickling work. Args: state: The serialized state of the actor handle. ray_forking: True if this is being called because Ray is forking the actor handle and false if it is being called by pickling. """ worker = ray.worker.get_global_worker() worker.check_connected() if state["ray_forking"]: actor_handle_id = state["actor_handle_id"] else: # Right now, if the actor handle has been pickled, we create a # temporary actor handle id for invocations. # TODO(pcm): This still leads to a lot of actor handles being # created, there should be a better way to handle pickled # actor handles. # TODO(swang): Accessing the worker's current task ID is not # thread-safe. # TODO(swang): Unpickling the same actor handle twice in the same # task will break the application, and unpickling it twice in the # same actor is likely a performance bug. We should consider # logging a warning in these cases. actor_handle_id = compute_actor_handle_id_non_forked( state["actor_handle_id"], worker.current_task_id) self.__init__( state["actor_id"], state["module_name"], state["class_name"], state["actor_cursor"], state["actor_method_names"], state["method_signatures"], state["method_num_return_vals"], state["actor_creation_dummy_object_id"], state["actor_method_cpus"], # This is the driver ID of the driver that owns the actor, not # necessarily the driver that owns this actor handle. state["actor_driver_id"], actor_handle_id=actor_handle_id)
def free(object_ids, local_only=False): """Free a list of IDs from object stores. This function is a low-level API which should be used in restricted scenarios. If local_only is false, the request will be send to all object stores. This method will not return any value to indicate whether the deletion is successful or not. This function is an instruction to object store. If the some of the objects are in use, object stores will delete them later when the ref count is down to 0. Args: object_ids (List[ObjectID]): List of object IDs to delete. local_only (bool): Whether only deleting the list of objects in local object store or all object stores. """ worker = ray.worker.get_global_worker() if isinstance(object_ids, ray.ObjectID): object_ids = [object_ids] if not isinstance(object_ids, list): raise TypeError("free() expects a list of ObjectID, got {}".format( type(object_ids))) # Make sure that the values are object IDs. for object_id in object_ids: if not isinstance(object_id, ray.ObjectID): raise TypeError("Attempting to call `free` on the value {}, " "which is not an ray.ObjectID.".format(object_id)) worker.check_connected() with profiling.profile("ray.free"): if len(object_ids) == 0: return worker.raylet_client.free_objects(object_ids, local_only)
def _actor_method_call(self, method_name, args=None, kwargs=None, num_return_vals=None): """Method execution stub for an actor handle. This is the function that executes when `actor.method_name.remote(*args, **kwargs)` is called. Instead of executing locally, the method is packaged as a task and scheduled to the remote actor instance. Args: method_name: The name of the actor method to execute. args: A list of arguments for the actor method. kwargs: A dictionary of keyword arguments for the actor method. num_return_vals (int): The number of return values for the method. Returns: object_ids: A list of object IDs returned by the remote actor method. """ worker = ray.worker.get_global_worker() worker.check_connected() function_signature = self._ray_method_signatures[method_name] if args is None: args = [] if kwargs is None: kwargs = {} args = signature.extend_args(function_signature, args, kwargs) # Execute functions locally if Ray is run in LOCAL_MODE # Copy args to prevent the function from mutating them. if worker.mode == ray.LOCAL_MODE: return getattr(worker.actors[self._ray_actor_id], method_name)(*copy.deepcopy(args)) function_descriptor = FunctionDescriptor( self._ray_module_name, method_name, self._ray_class_name) with self._ray_actor_lock: object_ids = worker.submit_task( function_descriptor, args, actor_id=self._ray_actor_id, actor_handle_id=self._ray_actor_handle_id, actor_counter=self._ray_actor_counter, actor_creation_dummy_object_id=( self._ray_actor_creation_dummy_object_id), execution_dependencies=[self._ray_actor_cursor], new_actor_handles=self._ray_new_actor_handles, # We add one for the dummy return ID. num_return_vals=num_return_vals + 1, resources={"CPU": self._ray_actor_method_cpus}, placement_resources={}, driver_id=self._ray_actor_driver_id, ) # Update the actor counter and cursor to reflect the most recent # invocation. self._ray_actor_counter += 1 # The last object returned is the dummy object that should be # passed in to the next actor method. Do not return it to the user. self._ray_actor_cursor = object_ids.pop() # We have notified the backend of the new actor handles to expect # since the last task was submitted, so clear the list. self._ray_new_actor_handles = [] if len(object_ids) == 1: object_ids = object_ids[0] elif len(object_ids) == 0: object_ids = None return object_ids
def _actor_method_call(self, method_name, args=None, kwargs=None, num_return_vals=None): """Method execution stub for an actor handle. This is the function that executes when `actor.method_name.remote(*args, **kwargs)` is called. Instead of executing locally, the method is packaged as a task and scheduled to the remote actor instance. Args: method_name: The name of the actor method to execute. args: A list of arguments for the actor method. kwargs: A dictionary of keyword arguments for the actor method. dependency: The object ID that this method is dependent on. Defaults to None, for no dependencies. Most tasks should pass in the dummy object returned by the preceding task. Some tasks, such as checkpoint and terminate methods, have no dependencies. Returns: object_ids: A list of object IDs returned by the remote actor method. """ worker = ray.worker.get_global_worker() worker.check_connected() function_signature = self._ray_method_signatures[method_name] if args is None: args = [] if kwargs is None: kwargs = {} args = signature.extend_args(function_signature, args, kwargs) # Execute functions locally if Ray is run in LOCAL_MODE # Copy args to prevent the function from mutating them. if worker.mode == ray.LOCAL_MODE: return getattr(worker.actors[self._ray_actor_id], method_name)(*copy.deepcopy(args)) is_actor_checkpoint_method = (method_name == "__ray_checkpoint__") function_descriptor = FunctionDescriptor(self._ray_module_name, method_name, self._ray_class_name) with self._ray_actor_lock: object_ids = worker.submit_task( function_descriptor, args, actor_id=self._ray_actor_id, actor_handle_id=self._ray_actor_handle_id, actor_counter=self._ray_actor_counter, is_actor_checkpoint_method=is_actor_checkpoint_method, actor_creation_dummy_object_id=( self._ray_actor_creation_dummy_object_id), execution_dependencies=[self._ray_actor_cursor], new_actor_handles=self._ray_new_actor_handles, # We add one for the dummy return ID. num_return_vals=num_return_vals + 1, resources={"CPU": self._ray_actor_method_cpus}, placement_resources={}, driver_id=self._ray_actor_driver_id, ) # Update the actor counter and cursor to reflect the most recent # invocation. self._ray_actor_counter += 1 # The last object returned is the dummy object that should be # passed in to the next actor method. Do not return it to the user. self._ray_actor_cursor = object_ids.pop() # We have notified the backend of the new actor handles to expect # since the last task was submitted, so clear the list. self._ray_new_actor_handles = [] if len(object_ids) == 1: object_ids = object_ids[0] elif len(object_ids) == 0: object_ids = None return object_ids
def _remote(self, args=None, kwargs=None, num_cpus=None, num_gpus=None, memory=None, object_store_memory=None, resources=None, accelerator_type=None, max_concurrency=None, max_restarts=None, max_task_retries=None, name=None, lifetime=None, placement_group=None, placement_group_bundle_index=-1, placement_group_capture_child_tasks=None, override_environment_variables=None): """Create an actor. This method allows more flexibility than the remote method because resource requirements can be specified and override the defaults in the decorator. Args: args: The arguments to forward to the actor constructor. kwargs: The keyword arguments to forward to the actor constructor. num_cpus: The number of CPUs required by the actor creation task. num_gpus: The number of GPUs required by the actor creation task. memory: Restrict the heap memory usage of this actor. object_store_memory: Restrict the object store memory used by this actor when creating objects. resources: The custom resources required by the actor creation task. max_concurrency: The max number of concurrent calls to allow for this actor. This only works with direct actor calls. The max concurrency defaults to 1 for threaded execution, and 1000 for asyncio execution. Note that the execution order is not guaranteed when max_concurrency > 1. name: The globally unique name for the actor, which can be used to retrieve the actor via ray.get_actor(name) as long as the actor is still alive. lifetime: Either `None`, which defaults to the actor will fate share with its creator and will be deleted once its refcount drops to zero, or "detached", which means the actor will live as a global object independent of the creator. placement_group: the placement group this actor belongs to, or None if it doesn't belong to any group. placement_group_bundle_index: the index of the bundle if the actor belongs to a placement group, which may be -1 to specify any available bundle. placement_group_capture_child_tasks: Whether or not children tasks of this actor should implicitly use the same placement group as its parent. It is True by default. override_environment_variables: Environment variables to override and/or introduce for this actor. This is a dictionary mapping variable names to their values. Returns: A handle to the newly created actor. """ if args is None: args = [] if kwargs is None: kwargs = {} meta = self.__ray_metadata__ actor_has_async_methods = len( inspect.getmembers(meta.modified_class, predicate=inspect.iscoroutinefunction)) > 0 is_asyncio = actor_has_async_methods if max_concurrency is None: if is_asyncio: max_concurrency = 1000 else: max_concurrency = 1 if max_concurrency < 1: raise ValueError("max_concurrency must be >= 1") worker = ray.worker.global_worker worker.check_connected() if name is not None: if not isinstance(name, str): raise TypeError( f"name must be None or a string, got: '{type(name)}'.") if name == "": raise ValueError("Actor name cannot be an empty string.") # Check whether the name is already taken. # TODO(edoakes): this check has a race condition because two drivers # could pass the check and then create the same named actor. We should # instead check this when we create the actor, but that's currently an # async call. if name is not None: try: ray.get_actor(name) except ValueError: # Name is not taken. pass else: raise ValueError( f"The name {name} is already taken. Please use " "a different name or get the existing actor using " f"ray.get_actor('{name}')") if lifetime is None: detached = False elif lifetime == "detached": detached = True else: raise ValueError("lifetime must be either `None` or 'detached'") if placement_group_capture_child_tasks is None: placement_group_capture_child_tasks = ( worker.should_capture_child_tasks_in_placement_group) if placement_group is None: if placement_group_capture_child_tasks: placement_group = get_current_placement_group() if not placement_group: placement_group = PlacementGroup.empty() check_placement_group_index(placement_group, placement_group_bundle_index) # Set the actor's default resources if not already set. First three # conditions are to check that no resources were specified in the # decorator. Last three conditions are to check that no resources were # specified when _remote() was called. if (meta.num_cpus is None and meta.num_gpus is None and meta.resources is None and meta.accelerator_type is None and num_cpus is None and num_gpus is None and resources is None and accelerator_type is None): # In the default case, actors acquire no resources for # their lifetime, and actor methods will require 1 CPU. cpus_to_use = ray_constants.DEFAULT_ACTOR_CREATION_CPU_SIMPLE actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SIMPLE else: # If any resources are specified (here or in decorator), then # all resources are acquired for the actor's lifetime and no # resources are associated with methods. cpus_to_use = (ray_constants.DEFAULT_ACTOR_CREATION_CPU_SPECIFIED if meta.num_cpus is None else meta.num_cpus) actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SPECIFIED # LOCAL_MODE cannot handle cross_language if worker.mode == ray.LOCAL_MODE: assert not meta.is_cross_language, \ "Cross language ActorClass cannot be executed locally." # Export the actor. if not meta.is_cross_language and (meta.last_export_session_and_job != worker.current_session_and_job): # If this actor class was not exported in this session and job, # we need to export this function again, because current GCS # doesn't have it. meta.last_export_session_and_job = (worker.current_session_and_job) # After serialize / deserialize modified class, the __module__ # of modified class will be ray.cloudpickle.cloudpickle. # So, here pass actor_creation_function_descriptor to make # sure export actor class correct. worker.function_actor_manager.export_actor_class( meta.modified_class, meta.actor_creation_function_descriptor, meta.method_meta.methods.keys()) resources = ray.utils.resources_from_resource_arguments( cpus_to_use, meta.num_gpus, meta.memory, meta.object_store_memory, meta.resources, meta.accelerator_type, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type) # If the actor methods require CPU resources, then set the required # placement resources. If actor_placement_resources is empty, then # the required placement resources will be the same as resources. actor_placement_resources = {} assert actor_method_cpu in [0, 1] if actor_method_cpu == 1: actor_placement_resources = resources.copy() actor_placement_resources["CPU"] += 1 if meta.is_cross_language: creation_args = cross_language.format_args(worker, args, kwargs) else: function_signature = meta.method_meta.signatures["__init__"] creation_args = signature.flatten_args(function_signature, args, kwargs) actor_id = worker.core_worker.create_actor( meta.language, meta.actor_creation_function_descriptor, creation_args, max_restarts or meta.max_restarts, max_task_retries or meta.max_task_retries, resources, actor_placement_resources, max_concurrency, detached, name if name is not None else "", is_asyncio, placement_group.id, placement_group_bundle_index, placement_group_capture_child_tasks, # Store actor_method_cpu in actor handle's extension data. extension_data=str(actor_method_cpu), override_environment_variables=override_environment_variables or dict()) actor_handle = ActorHandle(meta.language, actor_id, meta.method_meta.decorators, meta.method_meta.signatures, meta.method_meta.num_returns, actor_method_cpu, meta.actor_creation_function_descriptor, worker.current_session_and_job, original_handle=True) return actor_handle
def _actor_method_call(self, method_name, args=None, kwargs=None, num_return_vals=None): """Method execution stub for an actor handle. This is the function that executes when `actor.method_name.remote(*args, **kwargs)` is called. Instead of executing locally, the method is packaged as a task and scheduled to the remote actor instance. Args: method_name: The name of the actor method to execute. args: A list of arguments for the actor method. kwargs: A dictionary of keyword arguments for the actor method. num_return_vals (int): The number of return values for the method. Returns: object_ids: A list of object IDs returned by the remote actor method. """ worker = ray.worker.get_global_worker() worker.check_connected() function_signature = self._ray_method_signatures[method_name] if args is None: args = [] if kwargs is None: kwargs = {} args = signature.extend_args(function_signature, args, kwargs) function_descriptor = FunctionDescriptor(self._ray_module_name, method_name, self._ray_class_name) if worker.mode == ray.LOCAL_MODE: function = getattr(worker.actors[self._ray_actor_id], method_name) object_ids = worker.local_mode_manager.execute( function, function_descriptor, args, num_return_vals) else: with self._ray_actor_lock: object_ids = worker.submit_task( function_descriptor, args, actor_id=self._ray_actor_id, actor_handle_id=self._ray_actor_handle_id, actor_counter=self._ray_actor_counter, actor_creation_dummy_object_id=( self._ray_actor_creation_dummy_object_id), previous_actor_task_dummy_object_id=self._ray_actor_cursor, new_actor_handles=self._ray_new_actor_handles, # We add one for the dummy return ID. num_return_vals=num_return_vals + 1, resources={"CPU": self._ray_actor_method_cpus}, placement_resources={}, job_id=self._ray_actor_job_id, ) # Update the actor counter and cursor to reflect the most # recent invocation. self._ray_actor_counter += 1 # The last object returned is the dummy object that should be # passed in to the next actor method. Do not return it to the # user. self._ray_actor_cursor = object_ids.pop() # We have notified the backend of the new actor handles to # expect since the last task was submitted, so clear the list. self._ray_new_actor_handles = [] if len(object_ids) == 1: object_ids = object_ids[0] elif len(object_ids) == 0: object_ids = None return object_ids