def init(self, env): """init streaming actor""" env = pickle.loads(env) self.env = env logger.info("init operator instance %s", self.processor_name) if env.config.channel_type == Config.NATIVE_CHANNEL: core_worker = ray.worker.global_worker.core_worker reader_async_func = PythonFunctionDescriptor( __name__, self.on_reader_message.__name__, self.__class__.__name__) reader_sync_func = PythonFunctionDescriptor( __name__, self.on_reader_message_sync.__name__, self.__class__.__name__) self.reader_client = _streaming.ReaderClient( core_worker, reader_async_func, reader_sync_func) writer_async_func = PythonFunctionDescriptor( __name__, self.on_writer_message.__name__, self.__class__.__name__) writer_sync_func = PythonFunctionDescriptor( __name__, self.on_writer_message_sync.__name__, self.__class__.__name__) self.writer_client = _streaming.WriterClient( core_worker, writer_async_func, writer_sync_func) if len(self.input_channels) > 0: self.input_gate = DataInput(env, self.input_channels) self.input_gate.init() if len(self.output_channels) > 0: self.output_gate = DataOutput( env, self.output_channels, self.operator.partitioning_strategies) self.output_gate.init() logger.info("init operator instance %s succeed", self.processor_name) return True
def init_reader(self, input_channel, writer_actor): conf = {Config.CHANNEL_TYPE: Config.NATIVE_CHANNEL} writer_async_func = PythonFunctionDescriptor( __name__, self.on_writer_message.__name__, self.__class__.__name__) writer_sync_func = PythonFunctionDescriptor( __name__, self.on_writer_message_sync.__name__, self.__class__.__name__) transfer.ChannelCreationParametersBuilder.set_python_writer_function_descriptor( writer_async_func, writer_sync_func) self.reader = transfer.DataReader([input_channel], [pickle.loads(writer_actor)], conf)
def init_writer(self, output_channel, reader_actor): conf = {Config.CHANNEL_TYPE: Config.NATIVE_CHANNEL} reader_async_func = PythonFunctionDescriptor( __name__, self.on_reader_message.__name__, self.__class__.__name__) reader_sync_func = PythonFunctionDescriptor( __name__, self.on_reader_message_sync.__name__, self.__class__.__name__) transfer.ChannelCreationParametersBuilder.set_python_reader_function_descriptor( reader_async_func, reader_sync_func) self.writer = transfer.DataWriter([output_channel], [pickle.loads(reader_actor)], conf) self.output_channel_id = transfer.ChannelID(output_channel)
def load_actor_class(self, job_id, actor_creation_function_descriptor): """Load the actor class. Args: job_id: job ID of the actor. actor_creation_function_descriptor: Function descriptor of the actor constructor. Returns: The actor class. """ function_id = actor_creation_function_descriptor.function_id # Check if the actor class already exists in the cache. actor_class = self._loaded_actor_classes.get(function_id, None) if actor_class is None: # Load actor class. if self._worker.load_code_from_local: job_id = ray.JobID.nil() # Load actor class from local code. actor_class = self._load_actor_class_from_local( job_id, actor_creation_function_descriptor) else: # Load actor class from GCS. actor_class = self._load_actor_class_from_gcs( job_id, actor_creation_function_descriptor) # Save the loaded actor class in cache. self._loaded_actor_classes[function_id] = actor_class # Generate execution info for the methods of this actor class. module_name = actor_creation_function_descriptor.module_name actor_class_name = actor_creation_function_descriptor.class_name actor_methods = inspect.getmembers(actor_class, predicate=is_function_or_method) for actor_method_name, actor_method in actor_methods: # Actor creation function descriptor use a unique function # hash to solve actor name conflict. When constructing an # actor, the actor creation function descriptor will be the # key to find __init__ method execution info. So, here we # use actor creation function descriptor as method descriptor # for generating __init__ method execution info. if actor_method_name == "__init__": method_descriptor = actor_creation_function_descriptor else: method_descriptor = PythonFunctionDescriptor( module_name, actor_method_name, actor_class_name) method_id = method_descriptor.function_id executor = self._make_actor_method_executor( actor_method_name, actor_method, actor_imported=True, ) self._function_execution_info[job_id][method_id] = ( FunctionExecutionInfo( function=executor, function_name=actor_method_name, max_calls=0, )) self._num_task_executions[job_id][method_id] = 0 self._num_task_executions[job_id][function_id] = 0 return actor_class
def _ray_from_modified_class(cls, modified_class, class_id, max_restarts, num_cpus, num_gpus, memory, object_store_memory, resources): for attribute in [ "remote", "_remote", "_ray_from_modified_class", "_ray_from_function_descriptor" ]: if hasattr(modified_class, attribute): logger.warning("Creating an actor from class {} overwrites " "attribute {} of that class".format( modified_class.__name__, attribute)) # Make sure the actor class we are constructing inherits from the # original class so it retains all class properties. class DerivedActorClass(cls, modified_class): pass name = "ActorClass({})".format(modified_class.__name__) DerivedActorClass.__module__ = modified_class.__module__ DerivedActorClass.__name__ = name DerivedActorClass.__qualname__ = name # Construct the base object. self = DerivedActorClass.__new__(DerivedActorClass) # Actor creation function descriptor. actor_creation_function_descriptor = \ PythonFunctionDescriptor.from_class( modified_class.__ray_actor_class__) self.__ray_metadata__ = ActorClassMetadata( Language.PYTHON, modified_class, actor_creation_function_descriptor, class_id, max_restarts, num_cpus, num_gpus, memory, object_store_memory, resources) return self
def __init__(self): writer_async_func = PythonFunctionDescriptor( __name__, self.on_writer_message.__name__, self.__class__.__name__) writer_sync_func = PythonFunctionDescriptor( __name__, self.on_writer_message_sync.__name__, self.__class__.__name__) self.writer_client = _streaming.WriterClient(writer_async_func, writer_sync_func) reader_async_func = PythonFunctionDescriptor( __name__, self.on_reader_message.__name__, self.__class__.__name__) reader_sync_func = PythonFunctionDescriptor( __name__, self.on_reader_message_sync.__name__, self.__class__.__name__) self.reader_client = _streaming.ReaderClient(reader_async_func, reader_sync_func) self.writer = None self.output_channel_id = None self.reader = None
def init(self, worker_context_bytes): worker_context = remote_call_pb.WorkerContext() worker_context.ParseFromString(worker_context_bytes) self.worker_context = worker_context self.task_id = worker_context.task_id self.config = worker_context.conf execution_graph = ExecutionGraph(worker_context.graph) self.execution_graph = execution_graph self.execution_task = self.execution_graph. \ get_execution_task_by_task_id(self.task_id) self.execution_node = self.execution_graph. \ get_execution_node_by_task_id(self.task_id) operator = self.execution_node.stream_operator self.stream_processor = processor.build_processor(operator) logger.info( "Initializing JobWorker, task_id: {}, operator: {}.".format( self.task_id, self.stream_processor)) if self.config.get(Config.CHANNEL_TYPE, Config.NATIVE_CHANNEL): core_worker = ray.worker.global_worker.core_worker reader_async_func = PythonFunctionDescriptor( __name__, self.on_reader_message.__name__, self.__class__.__name__) reader_sync_func = PythonFunctionDescriptor( __name__, self.on_reader_message_sync.__name__, self.__class__.__name__) self.reader_client = _streaming.ReaderClient( core_worker, reader_async_func, reader_sync_func) writer_async_func = PythonFunctionDescriptor( __name__, self.on_writer_message.__name__, self.__class__.__name__) writer_sync_func = PythonFunctionDescriptor( __name__, self.on_writer_message_sync.__name__, self.__class__.__name__) self.writer_client = _streaming.WriterClient( core_worker, writer_async_func, writer_sync_func) self.task = self.create_stream_task() self.task.start() logger.info("JobWorker init succeed") return True
def _ray_from_modified_class(cls, modified_class, class_id, max_restarts, max_task_retries, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type, runtime_env, concurrency_groups): for attribute in [ "remote", "_remote", "_ray_from_modified_class", "_ray_from_function_descriptor", ]: if hasattr(modified_class, attribute): logger.warning("Creating an actor from class " f"{modified_class.__name__} overwrites " f"attribute {attribute} of that class") # Make sure the actor class we are constructing inherits from the # original class so it retains all class properties. class DerivedActorClass(cls, modified_class): pass name = f"ActorClass({modified_class.__name__})" DerivedActorClass.__module__ = modified_class.__module__ DerivedActorClass.__name__ = name DerivedActorClass.__qualname__ = name # Construct the base object. self = DerivedActorClass.__new__(DerivedActorClass) # Actor creation function descriptor. actor_creation_function_descriptor = \ PythonFunctionDescriptor.from_class( modified_class.__ray_actor_class__) # Parse local pip/conda config files here. If we instead did it in # .remote(), it would get run in the Ray Client server, which runs on # a remote node where the files aren't available. if runtime_env: if isinstance(runtime_env, str): new_runtime_env = runtime_env else: new_runtime_env = ParsedRuntimeEnv(runtime_env).serialize() else: new_runtime_env = None self.__ray_metadata__ = ActorClassMetadata( Language.PYTHON, modified_class, actor_creation_function_descriptor, class_id, max_restarts, max_task_retries, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type, new_runtime_env, concurrency_groups) return self
def load_actor_class(self, job_id, function_descriptor): """Load the actor class. Args: job_id: job ID of the actor. function_descriptor: Function descriptor of the actor constructor. Returns: The actor class. """ function_id = function_descriptor.function_id # Check if the actor class already exists in the cache. actor_class = self._loaded_actor_classes.get(function_id, None) if actor_class is None: # Load actor class. if self._worker.load_code_from_local: job_id = ray.JobID.nil() # Load actor class from local code. actor_class = self._load_actor_from_local( job_id, function_descriptor) else: # Load actor class from GCS. actor_class = self._load_actor_class_from_gcs( job_id, function_descriptor) # Save the loaded actor class in cache. self._loaded_actor_classes[function_id] = actor_class # Generate execution info for the methods of this actor class. module_name = function_descriptor.module_name actor_class_name = function_descriptor.class_name actor_methods = inspect.getmembers(actor_class, predicate=is_function_or_method) for actor_method_name, actor_method in actor_methods: method_descriptor = PythonFunctionDescriptor( module_name, actor_method_name, actor_class_name) method_id = method_descriptor.function_id executor = self._make_actor_method_executor( actor_method_name, actor_method, actor_imported=True, ) self._function_execution_info[job_id][method_id] = ( FunctionExecutionInfo( function=executor, function_name=actor_method_name, max_calls=0, )) self._num_task_executions[job_id][method_id] = 0 self._num_task_executions[job_id][function_id] = 0 return actor_class
def __init__(self, language, actor_id, method_decorators, method_signatures, method_num_return_vals, actor_method_cpus, is_cross_language, actor_creation_function_descriptor, session_and_job, original_handle=False): self._ray_actor_language = language self._ray_actor_id = actor_id self._ray_original_handle = original_handle self._ray_method_decorators = method_decorators self._ray_method_signatures = method_signatures self._ray_method_num_return_vals = method_num_return_vals self._ray_actor_method_cpus = actor_method_cpus self._ray_session_and_job = session_and_job self._ray_is_cross_language = is_cross_language self._ray_actor_creation_function_descriptor = \ actor_creation_function_descriptor self._ray_function_descriptor = {} if not self._ray_is_cross_language: assert isinstance(actor_creation_function_descriptor, PythonFunctionDescriptor) module_name = actor_creation_function_descriptor.module_name class_name = actor_creation_function_descriptor.class_name for method_name in self._ray_method_signatures.keys(): function_descriptor = PythonFunctionDescriptor( module_name, method_name, class_name) self._ray_function_descriptor[ method_name] = function_descriptor method = ActorMethod( self, method_name, self._ray_method_num_return_vals[method_name], decorator=self._ray_method_decorators.get(method_name)) setattr(self, method_name, method)
def _remote(self, args=None, kwargs=None, num_returns=None, num_cpus=None, num_gpus=None, memory=None, object_store_memory=None, accelerator_type=None, resources=None, max_retries=None, placement_group=None, placement_group_bundle_index=-1, placement_group_capture_child_tasks=None, runtime_env=None, override_environment_variables=None, name=""): """Submit the remote function for execution.""" if client_mode_should_convert(): return client_mode_convert_function( self, args, kwargs, num_returns=num_returns, num_cpus=num_cpus, num_gpus=num_gpus, memory=memory, object_store_memory=object_store_memory, accelerator_type=accelerator_type, resources=resources, max_retries=max_retries, placement_group=placement_group, placement_group_bundle_index=placement_group_bundle_index, placement_group_capture_child_tasks=( placement_group_capture_child_tasks), runtime_env=runtime_env, override_environment_variables=override_environment_variables, name=name) worker = ray.worker.global_worker worker.check_connected() # If this function was not exported in this session and job, we need to # export this function again, because the current GCS doesn't have it. if not self._is_cross_language and \ self._last_export_session_and_job != \ worker.current_session_and_job: # There is an interesting question here. If the remote function is # used by a subsequent driver (in the same script), should the # second driver pickle the function again? If yes, then the remote # function definition can differ in the second driver (e.g., if # variables in its closure have changed). We probably want the # behavior of the remote function in the second driver to be # independent of whether or not the function was invoked by the # first driver. This is an argument for repickling the function, # which we do here. self._pickled_function = pickle.dumps(self._function) self._function_descriptor = PythonFunctionDescriptor.from_function( self._function, self._pickled_function) self._last_export_session_and_job = worker.current_session_and_job worker.function_actor_manager.export(self) kwargs = {} if kwargs is None else kwargs args = [] if args is None else args if num_returns is None: num_returns = self._num_returns if max_retries is None: max_retries = self._max_retries if placement_group_capture_child_tasks is None: placement_group_capture_child_tasks = ( worker.should_capture_child_tasks_in_placement_group) if placement_group is None: if placement_group_capture_child_tasks: placement_group = get_current_placement_group() if not placement_group: placement_group = PlacementGroup.empty() check_placement_group_index(placement_group, placement_group_bundle_index) resources = ray._private.utils.resources_from_resource_arguments( self._num_cpus, self._num_gpus, self._memory, self._object_store_memory, self._resources, self._accelerator_type, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type) if runtime_env: parsed_runtime_env = runtime_support.RuntimeEnvDict(runtime_env) override_environment_variables = ( parsed_runtime_env.to_worker_env_vars( override_environment_variables)) else: parsed_runtime_env = runtime_support.RuntimeEnvDict({}) def invocation(args, kwargs): if self._is_cross_language: list_args = cross_language.format_args(worker, args, kwargs) elif not args and not kwargs and not self._function_signature: list_args = [] else: list_args = ray._private.signature.flatten_args( self._function_signature, args, kwargs) if worker.mode == ray.worker.LOCAL_MODE: assert not self._is_cross_language, \ "Cross language remote function " \ "cannot be executed locally." object_refs = worker.core_worker.submit_task( self._language, self._function_descriptor, list_args, name, num_returns, resources, max_retries, placement_group.id, placement_group_bundle_index, placement_group_capture_child_tasks, worker.debugger_breakpoint, parsed_runtime_env, override_environment_variables=override_environment_variables or dict()) # Reset worker's debug context from the last "remote" command # (which applies only to this .remote call). worker.debugger_breakpoint = b"" if len(object_refs) == 1: return object_refs[0] elif len(object_refs) > 1: return object_refs if self._decorator is not None: invocation = self._decorator(invocation) return invocation(args, kwargs)
def _remote(self, args=None, kwargs=None, num_return_vals=None, is_direct_call=None, num_cpus=None, num_gpus=None, memory=None, object_store_memory=None, resources=None, max_retries=None): """Submit the remote function for execution.""" worker = ray.worker.get_global_worker() worker.check_connected() # If this function was not exported in this session and job, we need to # export this function again, because the current GCS doesn't have it. if not self._is_cross_language and \ self._last_export_session_and_job != \ worker.current_session_and_job: # There is an interesting question here. If the remote function is # used by a subsequent driver (in the same script), should the # second driver pickle the function again? If yes, then the remote # function definition can differ in the second driver (e.g., if # variables in its closure have changed). We probably want the # behavior of the remote function in the second driver to be # independent of whether or not the function was invoked by the # first driver. This is an argument for repickling the function, # which we do here. self._pickled_function = pickle.dumps(self._function) self._function_descriptor = PythonFunctionDescriptor.from_function( self._function, self._pickled_function) self._last_export_session_and_job = worker.current_session_and_job worker.function_actor_manager.export(self) kwargs = {} if kwargs is None else kwargs args = [] if args is None else args if num_return_vals is None: num_return_vals = self._num_return_vals if is_direct_call is not None and not is_direct_call: raise ValueError("Non-direct call tasks are no longer supported.") if max_retries is None: max_retries = self._max_retries resources = ray.utils.resources_from_resource_arguments( self._num_cpus, self._num_gpus, self._memory, self._object_store_memory, self._resources, num_cpus, num_gpus, memory, object_store_memory, resources) def invocation(args, kwargs): if self._is_cross_language: list_args = cross_language.format_args(worker, args, kwargs) elif not args and not kwargs and not self._function_signature: list_args = [] else: list_args = ray.signature.flatten_args( self._function_signature, args, kwargs) if worker.mode == ray.worker.LOCAL_MODE: assert not self._is_cross_language, \ "Cross language remote function " \ "cannot be executed locally." object_ids = worker.local_mode_manager.execute( self._function, self._function_descriptor, args, kwargs, num_return_vals) else: object_ids = worker.core_worker.submit_task( self._language, self._function_descriptor, list_args, num_return_vals, resources, max_retries) if len(object_ids) == 1: return object_ids[0] elif len(object_ids) > 1: return object_ids if self._decorator is not None: invocation = self._decorator(invocation) return invocation(args, kwargs)
def _remote(self, args=None, kwargs=None, **task_options): """Submit the remote function for execution.""" # We pop the "max_calls" coming from "@ray.remote" here. We no longer need # it in "_remote()". task_options.pop("max_calls", None) if client_mode_should_convert(auto_init=True): return client_mode_convert_function(self, args, kwargs, **task_options) worker = ray.worker.global_worker worker.check_connected() # If this function was not exported in this session and job, we need to # export this function again, because the current GCS doesn't have it. if (not self._is_cross_language and self._last_export_session_and_job != worker.current_session_and_job): self._function_descriptor = PythonFunctionDescriptor.from_function( self._function, self._uuid) # There is an interesting question here. If the remote function is # used by a subsequent driver (in the same script), should the # second driver pickle the function again? If yes, then the remote # function definition can differ in the second driver (e.g., if # variables in its closure have changed). We probably want the # behavior of the remote function in the second driver to be # independent of whether or not the function was invoked by the # first driver. This is an argument for repickling the function, # which we do here. try: self._pickled_function = pickle.dumps(self._function) except TypeError as e: msg = ( "Could not serialize the function " f"{self._function_descriptor.repr}. Check " "https://docs.ray.io/en/master/ray-core/objects/serialization.html#troubleshooting " # noqa "for more information.") raise TypeError(msg) from e self._last_export_session_and_job = worker.current_session_and_job worker.function_actor_manager.export(self) kwargs = {} if kwargs is None else kwargs args = [] if args is None else args # fill task required options for k, v in ray_option_utils.task_options.items(): task_options[k] = task_options.get(k, v.default_value) # "max_calls" already takes effects and should not apply again. # Remove the default value here. task_options.pop("max_calls", None) # TODO(suquark): cleanup these fields name = task_options["name"] runtime_env = parse_runtime_env(task_options["runtime_env"]) placement_group = task_options["placement_group"] placement_group_bundle_index = task_options[ "placement_group_bundle_index"] placement_group_capture_child_tasks = task_options[ "placement_group_capture_child_tasks"] scheduling_strategy = task_options["scheduling_strategy"] num_returns = task_options["num_returns"] max_retries = task_options["max_retries"] retry_exceptions = task_options["retry_exceptions"] resources = ray._private.utils.resources_from_ray_options(task_options) if scheduling_strategy is None or isinstance( scheduling_strategy, PlacementGroupSchedulingStrategy): if isinstance(scheduling_strategy, PlacementGroupSchedulingStrategy): placement_group = scheduling_strategy.placement_group placement_group_bundle_index = ( scheduling_strategy.placement_group_bundle_index) placement_group_capture_child_tasks = ( scheduling_strategy.placement_group_capture_child_tasks) if placement_group_capture_child_tasks is None: placement_group_capture_child_tasks = ( worker.should_capture_child_tasks_in_placement_group) placement_group = configure_placement_group_based_on_context( placement_group_capture_child_tasks, placement_group_bundle_index, resources, {}, # no placement_resources for tasks self._function_descriptor.function_name, placement_group=placement_group, ) if not placement_group.is_empty: scheduling_strategy = PlacementGroupSchedulingStrategy( placement_group, placement_group_bundle_index, placement_group_capture_child_tasks, ) else: scheduling_strategy = "DEFAULT" serialized_runtime_env_info = None if runtime_env is not None: serialized_runtime_env_info = get_runtime_env_info( runtime_env, is_job_runtime_env=False, serialize=True, ) def invocation(args, kwargs): if self._is_cross_language: list_args = cross_language.format_args(worker, args, kwargs) elif not args and not kwargs and not self._function_signature: list_args = [] else: list_args = ray._private.signature.flatten_args( self._function_signature, args, kwargs) if worker.mode == ray.worker.LOCAL_MODE: assert ( not self._is_cross_language ), "Cross language remote function cannot be executed locally." object_refs = worker.core_worker.submit_task( self._language, self._function_descriptor, list_args, name if name is not None else "", num_returns, resources, max_retries, retry_exceptions, scheduling_strategy, worker.debugger_breakpoint, serialized_runtime_env_info or "{}", ) # Reset worker's debug context from the last "remote" command # (which applies only to this .remote call). worker.debugger_breakpoint = b"" if len(object_refs) == 1: return object_refs[0] elif len(object_refs) > 1: return object_refs if self._decorator is not None: invocation = self._decorator(invocation) return invocation(args, kwargs)
def _remote(self, args=None, kwargs=None, num_cpus=None, num_gpus=None, memory=None, object_store_memory=None, resources=None, accelerator_type=None, max_concurrency=None, max_restarts=None, max_task_retries=None, name=None, namespace=None, lifetime=None, placement_group="default", placement_group_bundle_index=-1, placement_group_capture_child_tasks=None, runtime_env=None): """Create an actor. This method allows more flexibility than the remote method because resource requirements can be specified and override the defaults in the decorator. Args: args: The arguments to forward to the actor constructor. kwargs: The keyword arguments to forward to the actor constructor. num_cpus: The number of CPUs required by the actor creation task. num_gpus: The number of GPUs required by the actor creation task. memory: Restrict the heap memory usage of this actor. object_store_memory: Restrict the object store memory used by this actor when creating objects. resources: The custom resources required by the actor creation task. max_concurrency: The max number of concurrent calls to allow for this actor. This only works with direct actor calls. The max concurrency defaults to 1 for threaded execution, and 1000 for asyncio execution. Note that the execution order is not guaranteed when max_concurrency > 1. name: The globally unique name for the actor, which can be used to retrieve the actor via ray.get_actor(name) as long as the actor is still alive. namespace: Override the namespace to use for the actor. By default, actors are created in an anonymous namespace. The actor can be retrieved via ray.get_actor(name=name, namespace=namespace). lifetime: Either `None`, which defaults to the actor will fate share with its creator and will be deleted once its refcount drops to zero, or "detached", which means the actor will live as a global object independent of the creator. placement_group: the placement group this actor belongs to, or None if it doesn't belong to any group. Setting to "default" autodetects the placement group based on the current setting of placement_group_capture_child_tasks. placement_group_bundle_index: the index of the bundle if the actor belongs to a placement group, which may be -1 to specify any available bundle. placement_group_capture_child_tasks: Whether or not children tasks of this actor should implicitly use the same placement group as its parent. It is True by default. runtime_env (Dict[str, Any]): Specifies the runtime environment for this actor or task and its children (see :ref:`runtime-environments` for details). This API is in beta and may change before becoming stable. Returns: A handle to the newly created actor. """ if args is None: args = [] if kwargs is None: kwargs = {} meta = self.__ray_metadata__ actor_has_async_methods = len( inspect.getmembers(meta.modified_class, predicate=inspect.iscoroutinefunction)) > 0 is_asyncio = actor_has_async_methods if max_concurrency is None: if is_asyncio: max_concurrency = 1000 else: max_concurrency = 1 if max_concurrency < 1: raise ValueError("max_concurrency must be >= 1") if client_mode_should_convert(auto_init=True): return client_mode_convert_actor( self, args, kwargs, num_cpus=num_cpus, num_gpus=num_gpus, memory=memory, object_store_memory=object_store_memory, resources=resources, accelerator_type=accelerator_type, max_concurrency=max_concurrency, max_restarts=max_restarts, max_task_retries=max_task_retries, name=name, namespace=namespace, lifetime=lifetime, placement_group=placement_group, placement_group_bundle_index=placement_group_bundle_index, placement_group_capture_child_tasks=( placement_group_capture_child_tasks), runtime_env=runtime_env) worker = ray.worker.global_worker worker.check_connected() if name is not None: if not isinstance(name, str): raise TypeError( f"name must be None or a string, got: '{type(name)}'.") elif name == "": raise ValueError("Actor name cannot be an empty string.") if namespace is not None: ray._private.utils.validate_namespace(namespace) # Check whether the name is already taken. # TODO(edoakes): this check has a race condition because two drivers # could pass the check and then create the same named actor. We should # instead check this when we create the actor, but that's currently an # async call. if name is not None: try: ray.get_actor(name, namespace=namespace) except ValueError: # Name is not taken. pass else: raise ValueError( f"The name {name} (namespace={namespace}) is already " "taken. Please use " "a different name or get the existing actor using " f"ray.get_actor('{name}', namespace='{namespace}')") if lifetime is None: detached = False elif lifetime == "detached": detached = True else: raise ValueError( "actor `lifetime` argument must be either `None` or 'detached'" ) # Set the actor's default resources if not already set. First three # conditions are to check that no resources were specified in the # decorator. Last three conditions are to check that no resources were # specified when _remote() was called. if (meta.num_cpus is None and meta.num_gpus is None and meta.resources is None and meta.accelerator_type is None and num_cpus is None and num_gpus is None and resources is None and accelerator_type is None): # In the default case, actors acquire no resources for # their lifetime, and actor methods will require 1 CPU. cpus_to_use = ray_constants.DEFAULT_ACTOR_CREATION_CPU_SIMPLE actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SIMPLE else: # If any resources are specified (here or in decorator), then # all resources are acquired for the actor's lifetime and no # resources are associated with methods. cpus_to_use = (ray_constants.DEFAULT_ACTOR_CREATION_CPU_SPECIFIED if meta.num_cpus is None else meta.num_cpus) actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SPECIFIED # LOCAL_MODE cannot handle cross_language if worker.mode == ray.LOCAL_MODE: assert not meta.is_cross_language, \ "Cross language ActorClass cannot be executed locally." # Export the actor. if not meta.is_cross_language and (meta.last_export_session_and_job != worker.current_session_and_job): # If this actor class was not exported in this session and job, # we need to export this function again, because current GCS # doesn't have it. meta.last_export_session_and_job = (worker.current_session_and_job) # After serialize / deserialize modified class, the __module__ # of modified class will be ray.cloudpickle.cloudpickle. # So, here pass actor_creation_function_descriptor to make # sure export actor class correct. worker.function_actor_manager.export_actor_class( meta.modified_class, meta.actor_creation_function_descriptor, meta.method_meta.methods.keys()) resources = ray._private.utils.resources_from_resource_arguments( cpus_to_use, meta.num_gpus, meta.memory, meta.object_store_memory, meta.resources, meta.accelerator_type, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type) # If the actor methods require CPU resources, then set the required # placement resources. If actor_placement_resources is empty, then # the required placement resources will be the same as resources. actor_placement_resources = {} assert actor_method_cpu in [0, 1] if actor_method_cpu == 1: actor_placement_resources = resources.copy() actor_placement_resources["CPU"] += 1 if meta.is_cross_language: creation_args = cross_language.format_args(worker, args, kwargs) else: function_signature = meta.method_meta.signatures["__init__"] creation_args = signature.flatten_args(function_signature, args, kwargs) if placement_group_capture_child_tasks is None: placement_group_capture_child_tasks = ( worker.should_capture_child_tasks_in_placement_group) placement_group = configure_placement_group_based_on_context( placement_group_capture_child_tasks, placement_group_bundle_index, resources, actor_placement_resources, meta.class_name, placement_group=placement_group) if runtime_env: if isinstance(runtime_env, str): # Serialzed protobuf runtime env from Ray client. new_runtime_env = runtime_env elif isinstance(runtime_env, ParsedRuntimeEnv): new_runtime_env = runtime_env.serialize() else: raise TypeError(f"Error runtime env type {type(runtime_env)}") else: new_runtime_env = meta.runtime_env concurrency_groups_dict = {} for cg_name in meta.concurrency_groups: concurrency_groups_dict[cg_name] = { "name": cg_name, "max_concurrency": meta.concurrency_groups[cg_name], "function_descriptors": [], } # Update methods for method_name in meta.method_meta.concurrency_group_for_methods: cg_name = meta.method_meta.concurrency_group_for_methods[ method_name] assert cg_name in concurrency_groups_dict module_name = meta.actor_creation_function_descriptor.module_name class_name = meta.actor_creation_function_descriptor.class_name concurrency_groups_dict[cg_name]["function_descriptors"].append( PythonFunctionDescriptor(module_name, method_name, class_name)) actor_id = worker.core_worker.create_actor( meta.language, meta.actor_creation_function_descriptor, creation_args, max_restarts or meta.max_restarts, max_task_retries or meta.max_task_retries, resources, actor_placement_resources, max_concurrency, detached, name if name is not None else "", namespace if namespace is not None else "", is_asyncio, placement_group.id, placement_group_bundle_index, placement_group_capture_child_tasks, # Store actor_method_cpu in actor handle's extension data. extension_data=str(actor_method_cpu), serialized_runtime_env=new_runtime_env or "{}", concurrency_groups_dict=concurrency_groups_dict or dict()) actor_handle = ActorHandle(meta.language, actor_id, meta.method_meta.decorators, meta.method_meta.signatures, meta.method_meta.num_returns, actor_method_cpu, meta.actor_creation_function_descriptor, worker.current_session_and_job, original_handle=True) return actor_handle
def _remote( self, args=None, kwargs=None, num_returns=None, num_cpus=None, num_gpus=None, memory=None, object_store_memory=None, accelerator_type=None, resources=None, max_retries=None, retry_exceptions=None, placement_group="default", placement_group_bundle_index=-1, placement_group_capture_child_tasks=None, runtime_env=None, name="", scheduling_strategy: SchedulingStrategyT = None, ): """Submit the remote function for execution.""" if client_mode_should_convert(auto_init=True): return client_mode_convert_function( self, args, kwargs, num_returns=num_returns, num_cpus=num_cpus, num_gpus=num_gpus, memory=memory, object_store_memory=object_store_memory, accelerator_type=accelerator_type, resources=resources, max_retries=max_retries, retry_exceptions=retry_exceptions, placement_group=placement_group, placement_group_bundle_index=placement_group_bundle_index, placement_group_capture_child_tasks=( placement_group_capture_child_tasks), runtime_env=runtime_env, name=name, scheduling_strategy=scheduling_strategy, ) worker = ray.worker.global_worker worker.check_connected() # If this function was not exported in this session and job, we need to # export this function again, because the current GCS doesn't have it. if (not self._is_cross_language and self._last_export_session_and_job != worker.current_session_and_job): self._function_descriptor = PythonFunctionDescriptor.from_function( self._function, self._uuid) # There is an interesting question here. If the remote function is # used by a subsequent driver (in the same script), should the # second driver pickle the function again? If yes, then the remote # function definition can differ in the second driver (e.g., if # variables in its closure have changed). We probably want the # behavior of the remote function in the second driver to be # independent of whether or not the function was invoked by the # first driver. This is an argument for repickling the function, # which we do here. try: self._pickled_function = pickle.dumps(self._function) except TypeError as e: msg = ( "Could not serialize the function " f"{self._function_descriptor.repr}. Check " "https://docs.ray.io/en/master/serialization.html#troubleshooting " # noqa "for more information.") raise TypeError(msg) from e self._last_export_session_and_job = worker.current_session_and_job worker.function_actor_manager.export(self) kwargs = {} if kwargs is None else kwargs args = [] if args is None else args if num_returns is None: num_returns = self._num_returns if max_retries is None: max_retries = self._max_retries if retry_exceptions is None: retry_exceptions = self._retry_exceptions if scheduling_strategy is None: scheduling_strategy = self._scheduling_strategy resources = ray._private.utils.resources_from_resource_arguments( self._num_cpus, self._num_gpus, self._memory, self._object_store_memory, self._resources, self._accelerator_type, num_cpus, num_gpus, memory, object_store_memory, resources, accelerator_type, ) if (placement_group != "default") and (scheduling_strategy is not None): raise ValueError("Placement groups should be specified via the " "scheduling_strategy option. " "The placement_group option is deprecated.") if scheduling_strategy is None or isinstance( scheduling_strategy, PlacementGroupSchedulingStrategy): if isinstance(scheduling_strategy, PlacementGroupSchedulingStrategy): placement_group = scheduling_strategy.placement_group placement_group_bundle_index = ( scheduling_strategy.placement_group_bundle_index) placement_group_capture_child_tasks = ( scheduling_strategy.placement_group_capture_child_tasks) if placement_group_capture_child_tasks is None: placement_group_capture_child_tasks = ( worker.should_capture_child_tasks_in_placement_group) if placement_group == "default": placement_group = self._placement_group placement_group = configure_placement_group_based_on_context( placement_group_capture_child_tasks, placement_group_bundle_index, resources, {}, # no placement_resources for tasks self._function_descriptor.function_name, placement_group=placement_group, ) if not placement_group.is_empty: scheduling_strategy = PlacementGroupSchedulingStrategy( placement_group, placement_group_bundle_index, placement_group_capture_child_tasks, ) else: scheduling_strategy = DEFAULT_SCHEDULING_STRATEGY if not runtime_env or runtime_env == "{}": runtime_env = self._runtime_env def invocation(args, kwargs): if self._is_cross_language: list_args = cross_language.format_args(worker, args, kwargs) elif not args and not kwargs and not self._function_signature: list_args = [] else: list_args = ray._private.signature.flatten_args( self._function_signature, args, kwargs) if worker.mode == ray.worker.LOCAL_MODE: assert not self._is_cross_language, ( "Cross language remote function " "cannot be executed locally.") object_refs = worker.core_worker.submit_task( self._language, self._function_descriptor, list_args, name, num_returns, resources, max_retries, retry_exceptions, scheduling_strategy, worker.debugger_breakpoint, runtime_env or "{}", ) # Reset worker's debug context from the last "remote" command # (which applies only to this .remote call). worker.debugger_breakpoint = b"" if len(object_refs) == 1: return object_refs[0] elif len(object_refs) > 1: return object_refs if self._decorator is not None: invocation = self._decorator(invocation) return invocation(args, kwargs)
class ChannelCreationParametersBuilder: """ wrap initial parameters needed by a streaming queue """ _java_reader_async_function_descriptor = JavaFunctionDescriptor( "io.ray.streaming.runtime.worker.JobWorker", "onReaderMessage", "([B)V") _java_reader_sync_function_descriptor = JavaFunctionDescriptor( "io.ray.streaming.runtime.worker.JobWorker", "onReaderMessageSync", "([B)[B") _java_writer_async_function_descriptor = JavaFunctionDescriptor( "io.ray.streaming.runtime.worker.JobWorker", "onWriterMessage", "([B)V") _java_writer_sync_function_descriptor = JavaFunctionDescriptor( "io.ray.streaming.runtime.worker.JobWorker", "onWriterMessageSync", "([B)[B") _python_reader_async_function_descriptor = PythonFunctionDescriptor( "ray.streaming.runtime.worker", "on_reader_message", "JobWorker") _python_reader_sync_function_descriptor = PythonFunctionDescriptor( "ray.streaming.runtime.worker", "on_reader_message_sync", "JobWorker") _python_writer_async_function_descriptor = PythonFunctionDescriptor( "ray.streaming.runtime.worker", "on_writer_message", "JobWorker") _python_writer_sync_function_descriptor = PythonFunctionDescriptor( "ray.streaming.runtime.worker", "on_writer_message_sync", "JobWorker") def get_parameters(self): return self._parameters def __init__(self): self._parameters = [] def build_input_queue_parameters(self, from_actors): self.build_parameters( from_actors, self._java_writer_async_function_descriptor, self._java_writer_sync_function_descriptor, self._python_writer_async_function_descriptor, self._python_writer_sync_function_descriptor, ) return self def build_output_queue_parameters(self, to_actors): self.build_parameters( to_actors, self._java_reader_async_function_descriptor, self._java_reader_sync_function_descriptor, self._python_reader_async_function_descriptor, self._python_reader_sync_function_descriptor, ) return self def build_parameters(self, actors, java_async_func, java_sync_func, py_async_func, py_sync_func): for handle in actors: parameter = None if handle._ray_actor_language == Language.PYTHON: parameter = _streaming.ChannelCreationParameter( handle._ray_actor_id, py_async_func, py_sync_func) else: parameter = _streaming.ChannelCreationParameter( handle._ray_actor_id, java_async_func, java_sync_func) self._parameters.append(parameter) return self @staticmethod def set_python_writer_function_descriptor(async_function, sync_function): ChannelCreationParametersBuilder._python_writer_async_function_descriptor = ( async_function) ChannelCreationParametersBuilder._python_writer_sync_function_descriptor = ( sync_function) @staticmethod def set_python_reader_function_descriptor(async_function, sync_function): ChannelCreationParametersBuilder._python_reader_async_function_descriptor = ( async_function) ChannelCreationParametersBuilder._python_reader_sync_function_descriptor = ( sync_function)