Example #1
0
    def init(self, env):
        """init streaming actor"""
        env = pickle.loads(env)
        self.env = env
        logger.info("init operator instance %s", self.processor_name)

        if env.config.channel_type == Config.NATIVE_CHANNEL:
            core_worker = ray.worker.global_worker.core_worker
            reader_async_func = PythonFunctionDescriptor(
                __name__, self.on_reader_message.__name__,
                self.__class__.__name__)
            reader_sync_func = PythonFunctionDescriptor(
                __name__, self.on_reader_message_sync.__name__,
                self.__class__.__name__)
            self.reader_client = _streaming.ReaderClient(
                core_worker, reader_async_func, reader_sync_func)
            writer_async_func = PythonFunctionDescriptor(
                __name__, self.on_writer_message.__name__,
                self.__class__.__name__)
            writer_sync_func = PythonFunctionDescriptor(
                __name__, self.on_writer_message_sync.__name__,
                self.__class__.__name__)
            self.writer_client = _streaming.WriterClient(
                core_worker, writer_async_func, writer_sync_func)
        if len(self.input_channels) > 0:
            self.input_gate = DataInput(env, self.input_channels)
            self.input_gate.init()
        if len(self.output_channels) > 0:
            self.output_gate = DataOutput(
                env, self.output_channels,
                self.operator.partitioning_strategies)
            self.output_gate.init()
        logger.info("init operator instance %s succeed", self.processor_name)
        return True
 def init_reader(self, input_channel, writer_actor):
     conf = {Config.CHANNEL_TYPE: Config.NATIVE_CHANNEL}
     writer_async_func = PythonFunctionDescriptor(
         __name__, self.on_writer_message.__name__, self.__class__.__name__)
     writer_sync_func = PythonFunctionDescriptor(
         __name__, self.on_writer_message_sync.__name__,
         self.__class__.__name__)
     transfer.ChannelCreationParametersBuilder.set_python_writer_function_descriptor(
         writer_async_func, writer_sync_func)
     self.reader = transfer.DataReader([input_channel],
                                       [pickle.loads(writer_actor)], conf)
 def init_writer(self, output_channel, reader_actor):
     conf = {Config.CHANNEL_TYPE: Config.NATIVE_CHANNEL}
     reader_async_func = PythonFunctionDescriptor(
         __name__, self.on_reader_message.__name__, self.__class__.__name__)
     reader_sync_func = PythonFunctionDescriptor(
         __name__, self.on_reader_message_sync.__name__,
         self.__class__.__name__)
     transfer.ChannelCreationParametersBuilder.set_python_reader_function_descriptor(
         reader_async_func, reader_sync_func)
     self.writer = transfer.DataWriter([output_channel],
                                       [pickle.loads(reader_actor)], conf)
     self.output_channel_id = transfer.ChannelID(output_channel)
Example #4
0
    def load_actor_class(self, job_id, actor_creation_function_descriptor):
        """Load the actor class.

        Args:
            job_id: job ID of the actor.
            actor_creation_function_descriptor: Function descriptor of
                the actor constructor.

        Returns:
            The actor class.
        """
        function_id = actor_creation_function_descriptor.function_id
        # Check if the actor class already exists in the cache.
        actor_class = self._loaded_actor_classes.get(function_id, None)
        if actor_class is None:
            # Load actor class.
            if self._worker.load_code_from_local:
                job_id = ray.JobID.nil()
                # Load actor class from local code.
                actor_class = self._load_actor_class_from_local(
                    job_id, actor_creation_function_descriptor)
            else:
                # Load actor class from GCS.
                actor_class = self._load_actor_class_from_gcs(
                    job_id, actor_creation_function_descriptor)
            # Save the loaded actor class in cache.
            self._loaded_actor_classes[function_id] = actor_class

            # Generate execution info for the methods of this actor class.
            module_name = actor_creation_function_descriptor.module_name
            actor_class_name = actor_creation_function_descriptor.class_name
            actor_methods = inspect.getmembers(actor_class,
                                               predicate=is_function_or_method)
            for actor_method_name, actor_method in actor_methods:
                # Actor creation function descriptor use a unique function
                # hash to solve actor name conflict. When constructing an
                # actor, the actor creation function descriptor will be the
                # key to find __init__ method execution info. So, here we
                # use actor creation function descriptor as method descriptor
                # for generating __init__ method execution info.
                if actor_method_name == "__init__":
                    method_descriptor = actor_creation_function_descriptor
                else:
                    method_descriptor = PythonFunctionDescriptor(
                        module_name, actor_method_name, actor_class_name)
                method_id = method_descriptor.function_id
                executor = self._make_actor_method_executor(
                    actor_method_name,
                    actor_method,
                    actor_imported=True,
                )
                self._function_execution_info[job_id][method_id] = (
                    FunctionExecutionInfo(
                        function=executor,
                        function_name=actor_method_name,
                        max_calls=0,
                    ))
                self._num_task_executions[job_id][method_id] = 0
            self._num_task_executions[job_id][function_id] = 0
        return actor_class
Example #5
0
    def _ray_from_modified_class(cls, modified_class, class_id, max_restarts,
                                 num_cpus, num_gpus, memory,
                                 object_store_memory, resources):
        for attribute in [
                "remote", "_remote", "_ray_from_modified_class",
                "_ray_from_function_descriptor"
        ]:
            if hasattr(modified_class, attribute):
                logger.warning("Creating an actor from class {} overwrites "
                               "attribute {} of that class".format(
                                   modified_class.__name__, attribute))

        # Make sure the actor class we are constructing inherits from the
        # original class so it retains all class properties.
        class DerivedActorClass(cls, modified_class):
            pass

        name = "ActorClass({})".format(modified_class.__name__)
        DerivedActorClass.__module__ = modified_class.__module__
        DerivedActorClass.__name__ = name
        DerivedActorClass.__qualname__ = name
        # Construct the base object.
        self = DerivedActorClass.__new__(DerivedActorClass)
        # Actor creation function descriptor.
        actor_creation_function_descriptor = \
            PythonFunctionDescriptor.from_class(
                modified_class.__ray_actor_class__)

        self.__ray_metadata__ = ActorClassMetadata(
            Language.PYTHON, modified_class,
            actor_creation_function_descriptor, class_id, max_restarts,
            num_cpus, num_gpus, memory, object_store_memory, resources)

        return self
Example #6
0
 def __init__(self):
     writer_async_func = PythonFunctionDescriptor(
         __name__, self.on_writer_message.__name__, self.__class__.__name__)
     writer_sync_func = PythonFunctionDescriptor(
         __name__, self.on_writer_message_sync.__name__,
         self.__class__.__name__)
     self.writer_client = _streaming.WriterClient(writer_async_func,
                                                  writer_sync_func)
     reader_async_func = PythonFunctionDescriptor(
         __name__, self.on_reader_message.__name__, self.__class__.__name__)
     reader_sync_func = PythonFunctionDescriptor(
         __name__, self.on_reader_message_sync.__name__,
         self.__class__.__name__)
     self.reader_client = _streaming.ReaderClient(reader_async_func,
                                                  reader_sync_func)
     self.writer = None
     self.output_channel_id = None
     self.reader = None
Example #7
0
    def init(self, worker_context_bytes):
        worker_context = remote_call_pb.WorkerContext()
        worker_context.ParseFromString(worker_context_bytes)
        self.worker_context = worker_context
        self.task_id = worker_context.task_id
        self.config = worker_context.conf
        execution_graph = ExecutionGraph(worker_context.graph)
        self.execution_graph = execution_graph
        self.execution_task = self.execution_graph. \
            get_execution_task_by_task_id(self.task_id)
        self.execution_node = self.execution_graph. \
            get_execution_node_by_task_id(self.task_id)
        operator = self.execution_node.stream_operator
        self.stream_processor = processor.build_processor(operator)
        logger.info(
            "Initializing JobWorker, task_id: {}, operator: {}.".format(
                self.task_id, self.stream_processor))

        if self.config.get(Config.CHANNEL_TYPE, Config.NATIVE_CHANNEL):
            core_worker = ray.worker.global_worker.core_worker
            reader_async_func = PythonFunctionDescriptor(
                __name__, self.on_reader_message.__name__,
                self.__class__.__name__)
            reader_sync_func = PythonFunctionDescriptor(
                __name__, self.on_reader_message_sync.__name__,
                self.__class__.__name__)
            self.reader_client = _streaming.ReaderClient(
                core_worker, reader_async_func, reader_sync_func)
            writer_async_func = PythonFunctionDescriptor(
                __name__, self.on_writer_message.__name__,
                self.__class__.__name__)
            writer_sync_func = PythonFunctionDescriptor(
                __name__, self.on_writer_message_sync.__name__,
                self.__class__.__name__)
            self.writer_client = _streaming.WriterClient(
                core_worker, writer_async_func, writer_sync_func)

        self.task = self.create_stream_task()
        self.task.start()
        logger.info("JobWorker init succeed")
        return True
Example #8
0
    def _ray_from_modified_class(cls, modified_class, class_id, max_restarts,
                                 max_task_retries, num_cpus, num_gpus, memory,
                                 object_store_memory, resources,
                                 accelerator_type, runtime_env,
                                 concurrency_groups):
        for attribute in [
                "remote",
                "_remote",
                "_ray_from_modified_class",
                "_ray_from_function_descriptor",
        ]:
            if hasattr(modified_class, attribute):
                logger.warning("Creating an actor from class "
                               f"{modified_class.__name__} overwrites "
                               f"attribute {attribute} of that class")

        # Make sure the actor class we are constructing inherits from the
        # original class so it retains all class properties.
        class DerivedActorClass(cls, modified_class):
            pass

        name = f"ActorClass({modified_class.__name__})"
        DerivedActorClass.__module__ = modified_class.__module__
        DerivedActorClass.__name__ = name
        DerivedActorClass.__qualname__ = name
        # Construct the base object.
        self = DerivedActorClass.__new__(DerivedActorClass)
        # Actor creation function descriptor.
        actor_creation_function_descriptor = \
            PythonFunctionDescriptor.from_class(
                modified_class.__ray_actor_class__)

        # Parse local pip/conda config files here. If we instead did it in
        # .remote(), it would get run in the Ray Client server, which runs on
        # a remote node where the files aren't available.
        if runtime_env:
            if isinstance(runtime_env, str):
                new_runtime_env = runtime_env
            else:
                new_runtime_env = ParsedRuntimeEnv(runtime_env).serialize()
        else:
            new_runtime_env = None

        self.__ray_metadata__ = ActorClassMetadata(
            Language.PYTHON, modified_class,
            actor_creation_function_descriptor, class_id, max_restarts,
            max_task_retries, num_cpus, num_gpus, memory, object_store_memory,
            resources, accelerator_type, new_runtime_env, concurrency_groups)

        return self
Example #9
0
    def load_actor_class(self, job_id, function_descriptor):
        """Load the actor class.

        Args:
            job_id: job ID of the actor.
            function_descriptor: Function descriptor of the actor constructor.

        Returns:
            The actor class.
        """
        function_id = function_descriptor.function_id
        # Check if the actor class already exists in the cache.
        actor_class = self._loaded_actor_classes.get(function_id, None)
        if actor_class is None:
            # Load actor class.
            if self._worker.load_code_from_local:
                job_id = ray.JobID.nil()
                # Load actor class from local code.
                actor_class = self._load_actor_from_local(
                    job_id, function_descriptor)
            else:
                # Load actor class from GCS.
                actor_class = self._load_actor_class_from_gcs(
                    job_id, function_descriptor)
            # Save the loaded actor class in cache.
            self._loaded_actor_classes[function_id] = actor_class

            # Generate execution info for the methods of this actor class.
            module_name = function_descriptor.module_name
            actor_class_name = function_descriptor.class_name
            actor_methods = inspect.getmembers(actor_class,
                                               predicate=is_function_or_method)
            for actor_method_name, actor_method in actor_methods:
                method_descriptor = PythonFunctionDescriptor(
                    module_name, actor_method_name, actor_class_name)
                method_id = method_descriptor.function_id
                executor = self._make_actor_method_executor(
                    actor_method_name,
                    actor_method,
                    actor_imported=True,
                )
                self._function_execution_info[job_id][method_id] = (
                    FunctionExecutionInfo(
                        function=executor,
                        function_name=actor_method_name,
                        max_calls=0,
                    ))
                self._num_task_executions[job_id][method_id] = 0
            self._num_task_executions[job_id][function_id] = 0
        return actor_class
Example #10
0
    def __init__(self,
                 language,
                 actor_id,
                 method_decorators,
                 method_signatures,
                 method_num_return_vals,
                 actor_method_cpus,
                 is_cross_language,
                 actor_creation_function_descriptor,
                 session_and_job,
                 original_handle=False):
        self._ray_actor_language = language
        self._ray_actor_id = actor_id
        self._ray_original_handle = original_handle
        self._ray_method_decorators = method_decorators
        self._ray_method_signatures = method_signatures
        self._ray_method_num_return_vals = method_num_return_vals
        self._ray_actor_method_cpus = actor_method_cpus
        self._ray_session_and_job = session_and_job
        self._ray_is_cross_language = is_cross_language
        self._ray_actor_creation_function_descriptor = \
            actor_creation_function_descriptor
        self._ray_function_descriptor = {}

        if not self._ray_is_cross_language:
            assert isinstance(actor_creation_function_descriptor,
                              PythonFunctionDescriptor)
            module_name = actor_creation_function_descriptor.module_name
            class_name = actor_creation_function_descriptor.class_name
            for method_name in self._ray_method_signatures.keys():
                function_descriptor = PythonFunctionDescriptor(
                    module_name, method_name, class_name)
                self._ray_function_descriptor[
                    method_name] = function_descriptor
                method = ActorMethod(
                    self,
                    method_name,
                    self._ray_method_num_return_vals[method_name],
                    decorator=self._ray_method_decorators.get(method_name))
                setattr(self, method_name, method)
Example #11
0
    def _remote(self,
                args=None,
                kwargs=None,
                num_returns=None,
                num_cpus=None,
                num_gpus=None,
                memory=None,
                object_store_memory=None,
                accelerator_type=None,
                resources=None,
                max_retries=None,
                placement_group=None,
                placement_group_bundle_index=-1,
                placement_group_capture_child_tasks=None,
                runtime_env=None,
                override_environment_variables=None,
                name=""):
        """Submit the remote function for execution."""
        if client_mode_should_convert():
            return client_mode_convert_function(
                self,
                args,
                kwargs,
                num_returns=num_returns,
                num_cpus=num_cpus,
                num_gpus=num_gpus,
                memory=memory,
                object_store_memory=object_store_memory,
                accelerator_type=accelerator_type,
                resources=resources,
                max_retries=max_retries,
                placement_group=placement_group,
                placement_group_bundle_index=placement_group_bundle_index,
                placement_group_capture_child_tasks=(
                    placement_group_capture_child_tasks),
                runtime_env=runtime_env,
                override_environment_variables=override_environment_variables,
                name=name)

        worker = ray.worker.global_worker
        worker.check_connected()

        # If this function was not exported in this session and job, we need to
        # export this function again, because the current GCS doesn't have it.
        if not self._is_cross_language and \
                self._last_export_session_and_job != \
                worker.current_session_and_job:
            # There is an interesting question here. If the remote function is
            # used by a subsequent driver (in the same script), should the
            # second driver pickle the function again? If yes, then the remote
            # function definition can differ in the second driver (e.g., if
            # variables in its closure have changed). We probably want the
            # behavior of the remote function in the second driver to be
            # independent of whether or not the function was invoked by the
            # first driver. This is an argument for repickling the function,
            # which we do here.
            self._pickled_function = pickle.dumps(self._function)

            self._function_descriptor = PythonFunctionDescriptor.from_function(
                self._function, self._pickled_function)

            self._last_export_session_and_job = worker.current_session_and_job
            worker.function_actor_manager.export(self)

        kwargs = {} if kwargs is None else kwargs
        args = [] if args is None else args

        if num_returns is None:
            num_returns = self._num_returns
        if max_retries is None:
            max_retries = self._max_retries

        if placement_group_capture_child_tasks is None:
            placement_group_capture_child_tasks = (
                worker.should_capture_child_tasks_in_placement_group)

        if placement_group is None:
            if placement_group_capture_child_tasks:
                placement_group = get_current_placement_group()

        if not placement_group:
            placement_group = PlacementGroup.empty()

        check_placement_group_index(placement_group,
                                    placement_group_bundle_index)

        resources = ray._private.utils.resources_from_resource_arguments(
            self._num_cpus, self._num_gpus, self._memory,
            self._object_store_memory, self._resources, self._accelerator_type,
            num_cpus, num_gpus, memory, object_store_memory, resources,
            accelerator_type)

        if runtime_env:
            parsed_runtime_env = runtime_support.RuntimeEnvDict(runtime_env)
            override_environment_variables = (
                parsed_runtime_env.to_worker_env_vars(
                    override_environment_variables))
        else:
            parsed_runtime_env = runtime_support.RuntimeEnvDict({})

        def invocation(args, kwargs):
            if self._is_cross_language:
                list_args = cross_language.format_args(worker, args, kwargs)
            elif not args and not kwargs and not self._function_signature:
                list_args = []
            else:
                list_args = ray._private.signature.flatten_args(
                    self._function_signature, args, kwargs)

            if worker.mode == ray.worker.LOCAL_MODE:
                assert not self._is_cross_language, \
                    "Cross language remote function " \
                    "cannot be executed locally."
            object_refs = worker.core_worker.submit_task(
                self._language,
                self._function_descriptor,
                list_args,
                name,
                num_returns,
                resources,
                max_retries,
                placement_group.id,
                placement_group_bundle_index,
                placement_group_capture_child_tasks,
                worker.debugger_breakpoint,
                parsed_runtime_env,
                override_environment_variables=override_environment_variables
                or dict())
            # Reset worker's debug context from the last "remote" command
            # (which applies only to this .remote call).
            worker.debugger_breakpoint = b""
            if len(object_refs) == 1:
                return object_refs[0]
            elif len(object_refs) > 1:
                return object_refs

        if self._decorator is not None:
            invocation = self._decorator(invocation)

        return invocation(args, kwargs)
Example #12
0
    def _remote(self,
                args=None,
                kwargs=None,
                num_return_vals=None,
                is_direct_call=None,
                num_cpus=None,
                num_gpus=None,
                memory=None,
                object_store_memory=None,
                resources=None,
                max_retries=None):
        """Submit the remote function for execution."""
        worker = ray.worker.get_global_worker()
        worker.check_connected()

        # If this function was not exported in this session and job, we need to
        # export this function again, because the current GCS doesn't have it.
        if not self._is_cross_language and \
                self._last_export_session_and_job != \
                worker.current_session_and_job:
            # There is an interesting question here. If the remote function is
            # used by a subsequent driver (in the same script), should the
            # second driver pickle the function again? If yes, then the remote
            # function definition can differ in the second driver (e.g., if
            # variables in its closure have changed). We probably want the
            # behavior of the remote function in the second driver to be
            # independent of whether or not the function was invoked by the
            # first driver. This is an argument for repickling the function,
            # which we do here.
            self._pickled_function = pickle.dumps(self._function)

            self._function_descriptor = PythonFunctionDescriptor.from_function(
                self._function, self._pickled_function)

            self._last_export_session_and_job = worker.current_session_and_job
            worker.function_actor_manager.export(self)

        kwargs = {} if kwargs is None else kwargs
        args = [] if args is None else args

        if num_return_vals is None:
            num_return_vals = self._num_return_vals
        if is_direct_call is not None and not is_direct_call:
            raise ValueError("Non-direct call tasks are no longer supported.")
        if max_retries is None:
            max_retries = self._max_retries

        resources = ray.utils.resources_from_resource_arguments(
            self._num_cpus, self._num_gpus, self._memory,
            self._object_store_memory, self._resources, num_cpus, num_gpus,
            memory, object_store_memory, resources)

        def invocation(args, kwargs):
            if self._is_cross_language:
                list_args = cross_language.format_args(worker, args, kwargs)
            elif not args and not kwargs and not self._function_signature:
                list_args = []
            else:
                list_args = ray.signature.flatten_args(
                    self._function_signature, args, kwargs)

            if worker.mode == ray.worker.LOCAL_MODE:
                assert not self._is_cross_language, \
                    "Cross language remote function " \
                    "cannot be executed locally."
                object_ids = worker.local_mode_manager.execute(
                    self._function, self._function_descriptor, args, kwargs,
                    num_return_vals)
            else:
                object_ids = worker.core_worker.submit_task(
                    self._language, self._function_descriptor, list_args,
                    num_return_vals, resources, max_retries)

            if len(object_ids) == 1:
                return object_ids[0]
            elif len(object_ids) > 1:
                return object_ids

        if self._decorator is not None:
            invocation = self._decorator(invocation)

        return invocation(args, kwargs)
Example #13
0
    def _remote(self, args=None, kwargs=None, **task_options):
        """Submit the remote function for execution."""
        # We pop the "max_calls" coming from "@ray.remote" here. We no longer need
        # it in "_remote()".
        task_options.pop("max_calls", None)
        if client_mode_should_convert(auto_init=True):
            return client_mode_convert_function(self, args, kwargs,
                                                **task_options)

        worker = ray.worker.global_worker
        worker.check_connected()

        # If this function was not exported in this session and job, we need to
        # export this function again, because the current GCS doesn't have it.
        if (not self._is_cross_language and self._last_export_session_and_job
                != worker.current_session_and_job):
            self._function_descriptor = PythonFunctionDescriptor.from_function(
                self._function, self._uuid)
            # There is an interesting question here. If the remote function is
            # used by a subsequent driver (in the same script), should the
            # second driver pickle the function again? If yes, then the remote
            # function definition can differ in the second driver (e.g., if
            # variables in its closure have changed). We probably want the
            # behavior of the remote function in the second driver to be
            # independent of whether or not the function was invoked by the
            # first driver. This is an argument for repickling the function,
            # which we do here.
            try:
                self._pickled_function = pickle.dumps(self._function)
            except TypeError as e:
                msg = (
                    "Could not serialize the function "
                    f"{self._function_descriptor.repr}. Check "
                    "https://docs.ray.io/en/master/ray-core/objects/serialization.html#troubleshooting "  # noqa
                    "for more information.")
                raise TypeError(msg) from e

            self._last_export_session_and_job = worker.current_session_and_job
            worker.function_actor_manager.export(self)

        kwargs = {} if kwargs is None else kwargs
        args = [] if args is None else args

        # fill task required options
        for k, v in ray_option_utils.task_options.items():
            task_options[k] = task_options.get(k, v.default_value)
        # "max_calls" already takes effects and should not apply again.
        # Remove the default value here.
        task_options.pop("max_calls", None)

        # TODO(suquark): cleanup these fields
        name = task_options["name"]
        runtime_env = parse_runtime_env(task_options["runtime_env"])
        placement_group = task_options["placement_group"]
        placement_group_bundle_index = task_options[
            "placement_group_bundle_index"]
        placement_group_capture_child_tasks = task_options[
            "placement_group_capture_child_tasks"]
        scheduling_strategy = task_options["scheduling_strategy"]
        num_returns = task_options["num_returns"]
        max_retries = task_options["max_retries"]
        retry_exceptions = task_options["retry_exceptions"]

        resources = ray._private.utils.resources_from_ray_options(task_options)

        if scheduling_strategy is None or isinstance(
                scheduling_strategy, PlacementGroupSchedulingStrategy):
            if isinstance(scheduling_strategy,
                          PlacementGroupSchedulingStrategy):
                placement_group = scheduling_strategy.placement_group
                placement_group_bundle_index = (
                    scheduling_strategy.placement_group_bundle_index)
                placement_group_capture_child_tasks = (
                    scheduling_strategy.placement_group_capture_child_tasks)

            if placement_group_capture_child_tasks is None:
                placement_group_capture_child_tasks = (
                    worker.should_capture_child_tasks_in_placement_group)
            placement_group = configure_placement_group_based_on_context(
                placement_group_capture_child_tasks,
                placement_group_bundle_index,
                resources,
                {},  # no placement_resources for tasks
                self._function_descriptor.function_name,
                placement_group=placement_group,
            )
            if not placement_group.is_empty:
                scheduling_strategy = PlacementGroupSchedulingStrategy(
                    placement_group,
                    placement_group_bundle_index,
                    placement_group_capture_child_tasks,
                )
            else:
                scheduling_strategy = "DEFAULT"

        serialized_runtime_env_info = None
        if runtime_env is not None:
            serialized_runtime_env_info = get_runtime_env_info(
                runtime_env,
                is_job_runtime_env=False,
                serialize=True,
            )

        def invocation(args, kwargs):
            if self._is_cross_language:
                list_args = cross_language.format_args(worker, args, kwargs)
            elif not args and not kwargs and not self._function_signature:
                list_args = []
            else:
                list_args = ray._private.signature.flatten_args(
                    self._function_signature, args, kwargs)

            if worker.mode == ray.worker.LOCAL_MODE:
                assert (
                    not self._is_cross_language
                ), "Cross language remote function cannot be executed locally."
            object_refs = worker.core_worker.submit_task(
                self._language,
                self._function_descriptor,
                list_args,
                name if name is not None else "",
                num_returns,
                resources,
                max_retries,
                retry_exceptions,
                scheduling_strategy,
                worker.debugger_breakpoint,
                serialized_runtime_env_info or "{}",
            )
            # Reset worker's debug context from the last "remote" command
            # (which applies only to this .remote call).
            worker.debugger_breakpoint = b""
            if len(object_refs) == 1:
                return object_refs[0]
            elif len(object_refs) > 1:
                return object_refs

        if self._decorator is not None:
            invocation = self._decorator(invocation)

        return invocation(args, kwargs)
Example #14
0
    def _remote(self,
                args=None,
                kwargs=None,
                num_cpus=None,
                num_gpus=None,
                memory=None,
                object_store_memory=None,
                resources=None,
                accelerator_type=None,
                max_concurrency=None,
                max_restarts=None,
                max_task_retries=None,
                name=None,
                namespace=None,
                lifetime=None,
                placement_group="default",
                placement_group_bundle_index=-1,
                placement_group_capture_child_tasks=None,
                runtime_env=None):
        """Create an actor.

        This method allows more flexibility than the remote method because
        resource requirements can be specified and override the defaults in the
        decorator.

        Args:
            args: The arguments to forward to the actor constructor.
            kwargs: The keyword arguments to forward to the actor constructor.
            num_cpus: The number of CPUs required by the actor creation task.
            num_gpus: The number of GPUs required by the actor creation task.
            memory: Restrict the heap memory usage of this actor.
            object_store_memory: Restrict the object store memory used by
                this actor when creating objects.
            resources: The custom resources required by the actor creation
                task.
            max_concurrency: The max number of concurrent calls to allow for
                this actor. This only works with direct actor calls. The max
                concurrency defaults to 1 for threaded execution, and 1000 for
                asyncio execution. Note that the execution order is not
                guaranteed when max_concurrency > 1.
            name: The globally unique name for the actor, which can be used
                to retrieve the actor via ray.get_actor(name) as long as the
                actor is still alive.
            namespace: Override the namespace to use for the actor. By default,
                actors are created in an anonymous namespace. The actor can
                be retrieved via ray.get_actor(name=name, namespace=namespace).
            lifetime: Either `None`, which defaults to the actor will fate
                share with its creator and will be deleted once its refcount
                drops to zero, or "detached", which means the actor will live
                as a global object independent of the creator.
            placement_group: the placement group this actor belongs to,
                or None if it doesn't belong to any group. Setting to "default"
                autodetects the placement group based on the current setting of
                placement_group_capture_child_tasks.
            placement_group_bundle_index: the index of the bundle
                if the actor belongs to a placement group, which may be -1 to
                specify any available bundle.
            placement_group_capture_child_tasks: Whether or not children tasks
                of this actor should implicitly use the same placement group
                as its parent. It is True by default.
            runtime_env (Dict[str, Any]): Specifies the runtime environment for
                this actor or task and its children (see
                :ref:`runtime-environments` for details).  This API is in beta
                and may change before becoming stable.

        Returns:
            A handle to the newly created actor.
        """
        if args is None:
            args = []
        if kwargs is None:
            kwargs = {}
        meta = self.__ray_metadata__
        actor_has_async_methods = len(
            inspect.getmembers(meta.modified_class,
                               predicate=inspect.iscoroutinefunction)) > 0
        is_asyncio = actor_has_async_methods

        if max_concurrency is None:
            if is_asyncio:
                max_concurrency = 1000
            else:
                max_concurrency = 1

        if max_concurrency < 1:
            raise ValueError("max_concurrency must be >= 1")

        if client_mode_should_convert(auto_init=True):
            return client_mode_convert_actor(
                self,
                args,
                kwargs,
                num_cpus=num_cpus,
                num_gpus=num_gpus,
                memory=memory,
                object_store_memory=object_store_memory,
                resources=resources,
                accelerator_type=accelerator_type,
                max_concurrency=max_concurrency,
                max_restarts=max_restarts,
                max_task_retries=max_task_retries,
                name=name,
                namespace=namespace,
                lifetime=lifetime,
                placement_group=placement_group,
                placement_group_bundle_index=placement_group_bundle_index,
                placement_group_capture_child_tasks=(
                    placement_group_capture_child_tasks),
                runtime_env=runtime_env)

        worker = ray.worker.global_worker
        worker.check_connected()

        if name is not None:
            if not isinstance(name, str):
                raise TypeError(
                    f"name must be None or a string, got: '{type(name)}'.")
            elif name == "":
                raise ValueError("Actor name cannot be an empty string.")
        if namespace is not None:
            ray._private.utils.validate_namespace(namespace)

        # Check whether the name is already taken.
        # TODO(edoakes): this check has a race condition because two drivers
        # could pass the check and then create the same named actor. We should
        # instead check this when we create the actor, but that's currently an
        # async call.
        if name is not None:
            try:
                ray.get_actor(name, namespace=namespace)
            except ValueError:  # Name is not taken.
                pass
            else:
                raise ValueError(
                    f"The name {name} (namespace={namespace}) is already "
                    "taken. Please use "
                    "a different name or get the existing actor using "
                    f"ray.get_actor('{name}', namespace='{namespace}')")

        if lifetime is None:
            detached = False
        elif lifetime == "detached":
            detached = True
        else:
            raise ValueError(
                "actor `lifetime` argument must be either `None` or 'detached'"
            )

        # Set the actor's default resources if not already set. First three
        # conditions are to check that no resources were specified in the
        # decorator. Last three conditions are to check that no resources were
        # specified when _remote() was called.
        if (meta.num_cpus is None and meta.num_gpus is None
                and meta.resources is None and meta.accelerator_type is None
                and num_cpus is None and num_gpus is None and resources is None
                and accelerator_type is None):
            # In the default case, actors acquire no resources for
            # their lifetime, and actor methods will require 1 CPU.
            cpus_to_use = ray_constants.DEFAULT_ACTOR_CREATION_CPU_SIMPLE
            actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SIMPLE
        else:
            # If any resources are specified (here or in decorator), then
            # all resources are acquired for the actor's lifetime and no
            # resources are associated with methods.
            cpus_to_use = (ray_constants.DEFAULT_ACTOR_CREATION_CPU_SPECIFIED
                           if meta.num_cpus is None else meta.num_cpus)
            actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SPECIFIED

        # LOCAL_MODE cannot handle cross_language
        if worker.mode == ray.LOCAL_MODE:
            assert not meta.is_cross_language, \
                "Cross language ActorClass cannot be executed locally."

        # Export the actor.
        if not meta.is_cross_language and (meta.last_export_session_and_job !=
                                           worker.current_session_and_job):
            # If this actor class was not exported in this session and job,
            # we need to export this function again, because current GCS
            # doesn't have it.
            meta.last_export_session_and_job = (worker.current_session_and_job)
            # After serialize / deserialize modified class, the __module__
            # of modified class will be ray.cloudpickle.cloudpickle.
            # So, here pass actor_creation_function_descriptor to make
            # sure export actor class correct.
            worker.function_actor_manager.export_actor_class(
                meta.modified_class, meta.actor_creation_function_descriptor,
                meta.method_meta.methods.keys())

        resources = ray._private.utils.resources_from_resource_arguments(
            cpus_to_use, meta.num_gpus, meta.memory, meta.object_store_memory,
            meta.resources, meta.accelerator_type, num_cpus, num_gpus, memory,
            object_store_memory, resources, accelerator_type)

        # If the actor methods require CPU resources, then set the required
        # placement resources. If actor_placement_resources is empty, then
        # the required placement resources will be the same as resources.
        actor_placement_resources = {}
        assert actor_method_cpu in [0, 1]
        if actor_method_cpu == 1:
            actor_placement_resources = resources.copy()
            actor_placement_resources["CPU"] += 1
        if meta.is_cross_language:
            creation_args = cross_language.format_args(worker, args, kwargs)
        else:
            function_signature = meta.method_meta.signatures["__init__"]
            creation_args = signature.flatten_args(function_signature, args,
                                                   kwargs)

        if placement_group_capture_child_tasks is None:
            placement_group_capture_child_tasks = (
                worker.should_capture_child_tasks_in_placement_group)
        placement_group = configure_placement_group_based_on_context(
            placement_group_capture_child_tasks,
            placement_group_bundle_index,
            resources,
            actor_placement_resources,
            meta.class_name,
            placement_group=placement_group)

        if runtime_env:
            if isinstance(runtime_env, str):
                # Serialzed protobuf runtime env from Ray client.
                new_runtime_env = runtime_env
            elif isinstance(runtime_env, ParsedRuntimeEnv):
                new_runtime_env = runtime_env.serialize()
            else:
                raise TypeError(f"Error runtime env type {type(runtime_env)}")
        else:
            new_runtime_env = meta.runtime_env

        concurrency_groups_dict = {}
        for cg_name in meta.concurrency_groups:
            concurrency_groups_dict[cg_name] = {
                "name": cg_name,
                "max_concurrency": meta.concurrency_groups[cg_name],
                "function_descriptors": [],
            }

        # Update methods
        for method_name in meta.method_meta.concurrency_group_for_methods:
            cg_name = meta.method_meta.concurrency_group_for_methods[
                method_name]
            assert cg_name in concurrency_groups_dict

            module_name = meta.actor_creation_function_descriptor.module_name
            class_name = meta.actor_creation_function_descriptor.class_name
            concurrency_groups_dict[cg_name]["function_descriptors"].append(
                PythonFunctionDescriptor(module_name, method_name, class_name))

        actor_id = worker.core_worker.create_actor(
            meta.language,
            meta.actor_creation_function_descriptor,
            creation_args,
            max_restarts or meta.max_restarts,
            max_task_retries or meta.max_task_retries,
            resources,
            actor_placement_resources,
            max_concurrency,
            detached,
            name if name is not None else "",
            namespace if namespace is not None else "",
            is_asyncio,
            placement_group.id,
            placement_group_bundle_index,
            placement_group_capture_child_tasks,
            # Store actor_method_cpu in actor handle's extension data.
            extension_data=str(actor_method_cpu),
            serialized_runtime_env=new_runtime_env or "{}",
            concurrency_groups_dict=concurrency_groups_dict or dict())

        actor_handle = ActorHandle(meta.language,
                                   actor_id,
                                   meta.method_meta.decorators,
                                   meta.method_meta.signatures,
                                   meta.method_meta.num_returns,
                                   actor_method_cpu,
                                   meta.actor_creation_function_descriptor,
                                   worker.current_session_and_job,
                                   original_handle=True)

        return actor_handle
Example #15
0
    def _remote(
        self,
        args=None,
        kwargs=None,
        num_returns=None,
        num_cpus=None,
        num_gpus=None,
        memory=None,
        object_store_memory=None,
        accelerator_type=None,
        resources=None,
        max_retries=None,
        retry_exceptions=None,
        placement_group="default",
        placement_group_bundle_index=-1,
        placement_group_capture_child_tasks=None,
        runtime_env=None,
        name="",
        scheduling_strategy: SchedulingStrategyT = None,
    ):
        """Submit the remote function for execution."""

        if client_mode_should_convert(auto_init=True):
            return client_mode_convert_function(
                self,
                args,
                kwargs,
                num_returns=num_returns,
                num_cpus=num_cpus,
                num_gpus=num_gpus,
                memory=memory,
                object_store_memory=object_store_memory,
                accelerator_type=accelerator_type,
                resources=resources,
                max_retries=max_retries,
                retry_exceptions=retry_exceptions,
                placement_group=placement_group,
                placement_group_bundle_index=placement_group_bundle_index,
                placement_group_capture_child_tasks=(
                    placement_group_capture_child_tasks),
                runtime_env=runtime_env,
                name=name,
                scheduling_strategy=scheduling_strategy,
            )

        worker = ray.worker.global_worker
        worker.check_connected()

        # If this function was not exported in this session and job, we need to
        # export this function again, because the current GCS doesn't have it.
        if (not self._is_cross_language and self._last_export_session_and_job
                != worker.current_session_and_job):
            self._function_descriptor = PythonFunctionDescriptor.from_function(
                self._function, self._uuid)
            # There is an interesting question here. If the remote function is
            # used by a subsequent driver (in the same script), should the
            # second driver pickle the function again? If yes, then the remote
            # function definition can differ in the second driver (e.g., if
            # variables in its closure have changed). We probably want the
            # behavior of the remote function in the second driver to be
            # independent of whether or not the function was invoked by the
            # first driver. This is an argument for repickling the function,
            # which we do here.
            try:
                self._pickled_function = pickle.dumps(self._function)
            except TypeError as e:
                msg = (
                    "Could not serialize the function "
                    f"{self._function_descriptor.repr}. Check "
                    "https://docs.ray.io/en/master/serialization.html#troubleshooting "  # noqa
                    "for more information.")
                raise TypeError(msg) from e

            self._last_export_session_and_job = worker.current_session_and_job
            worker.function_actor_manager.export(self)

        kwargs = {} if kwargs is None else kwargs
        args = [] if args is None else args

        if num_returns is None:
            num_returns = self._num_returns
        if max_retries is None:
            max_retries = self._max_retries
        if retry_exceptions is None:
            retry_exceptions = self._retry_exceptions
        if scheduling_strategy is None:
            scheduling_strategy = self._scheduling_strategy

        resources = ray._private.utils.resources_from_resource_arguments(
            self._num_cpus,
            self._num_gpus,
            self._memory,
            self._object_store_memory,
            self._resources,
            self._accelerator_type,
            num_cpus,
            num_gpus,
            memory,
            object_store_memory,
            resources,
            accelerator_type,
        )

        if (placement_group != "default") and (scheduling_strategy
                                               is not None):
            raise ValueError("Placement groups should be specified via the "
                             "scheduling_strategy option. "
                             "The placement_group option is deprecated.")

        if scheduling_strategy is None or isinstance(
                scheduling_strategy, PlacementGroupSchedulingStrategy):
            if isinstance(scheduling_strategy,
                          PlacementGroupSchedulingStrategy):
                placement_group = scheduling_strategy.placement_group
                placement_group_bundle_index = (
                    scheduling_strategy.placement_group_bundle_index)
                placement_group_capture_child_tasks = (
                    scheduling_strategy.placement_group_capture_child_tasks)

            if placement_group_capture_child_tasks is None:
                placement_group_capture_child_tasks = (
                    worker.should_capture_child_tasks_in_placement_group)
            if placement_group == "default":
                placement_group = self._placement_group
            placement_group = configure_placement_group_based_on_context(
                placement_group_capture_child_tasks,
                placement_group_bundle_index,
                resources,
                {},  # no placement_resources for tasks
                self._function_descriptor.function_name,
                placement_group=placement_group,
            )
            if not placement_group.is_empty:
                scheduling_strategy = PlacementGroupSchedulingStrategy(
                    placement_group,
                    placement_group_bundle_index,
                    placement_group_capture_child_tasks,
                )
            else:
                scheduling_strategy = DEFAULT_SCHEDULING_STRATEGY

        if not runtime_env or runtime_env == "{}":
            runtime_env = self._runtime_env

        def invocation(args, kwargs):
            if self._is_cross_language:
                list_args = cross_language.format_args(worker, args, kwargs)
            elif not args and not kwargs and not self._function_signature:
                list_args = []
            else:
                list_args = ray._private.signature.flatten_args(
                    self._function_signature, args, kwargs)

            if worker.mode == ray.worker.LOCAL_MODE:
                assert not self._is_cross_language, (
                    "Cross language remote function "
                    "cannot be executed locally.")
            object_refs = worker.core_worker.submit_task(
                self._language,
                self._function_descriptor,
                list_args,
                name,
                num_returns,
                resources,
                max_retries,
                retry_exceptions,
                scheduling_strategy,
                worker.debugger_breakpoint,
                runtime_env or "{}",
            )
            # Reset worker's debug context from the last "remote" command
            # (which applies only to this .remote call).
            worker.debugger_breakpoint = b""
            if len(object_refs) == 1:
                return object_refs[0]
            elif len(object_refs) > 1:
                return object_refs

        if self._decorator is not None:
            invocation = self._decorator(invocation)

        return invocation(args, kwargs)
Example #16
0
class ChannelCreationParametersBuilder:
    """
    wrap initial parameters needed by a streaming queue
    """

    _java_reader_async_function_descriptor = JavaFunctionDescriptor(
        "io.ray.streaming.runtime.worker.JobWorker", "onReaderMessage",
        "([B)V")
    _java_reader_sync_function_descriptor = JavaFunctionDescriptor(
        "io.ray.streaming.runtime.worker.JobWorker", "onReaderMessageSync",
        "([B)[B")
    _java_writer_async_function_descriptor = JavaFunctionDescriptor(
        "io.ray.streaming.runtime.worker.JobWorker", "onWriterMessage",
        "([B)V")
    _java_writer_sync_function_descriptor = JavaFunctionDescriptor(
        "io.ray.streaming.runtime.worker.JobWorker", "onWriterMessageSync",
        "([B)[B")
    _python_reader_async_function_descriptor = PythonFunctionDescriptor(
        "ray.streaming.runtime.worker", "on_reader_message", "JobWorker")
    _python_reader_sync_function_descriptor = PythonFunctionDescriptor(
        "ray.streaming.runtime.worker", "on_reader_message_sync", "JobWorker")
    _python_writer_async_function_descriptor = PythonFunctionDescriptor(
        "ray.streaming.runtime.worker", "on_writer_message", "JobWorker")
    _python_writer_sync_function_descriptor = PythonFunctionDescriptor(
        "ray.streaming.runtime.worker", "on_writer_message_sync", "JobWorker")

    def get_parameters(self):
        return self._parameters

    def __init__(self):
        self._parameters = []

    def build_input_queue_parameters(self, from_actors):
        self.build_parameters(
            from_actors,
            self._java_writer_async_function_descriptor,
            self._java_writer_sync_function_descriptor,
            self._python_writer_async_function_descriptor,
            self._python_writer_sync_function_descriptor,
        )
        return self

    def build_output_queue_parameters(self, to_actors):
        self.build_parameters(
            to_actors,
            self._java_reader_async_function_descriptor,
            self._java_reader_sync_function_descriptor,
            self._python_reader_async_function_descriptor,
            self._python_reader_sync_function_descriptor,
        )
        return self

    def build_parameters(self, actors, java_async_func, java_sync_func,
                         py_async_func, py_sync_func):
        for handle in actors:
            parameter = None
            if handle._ray_actor_language == Language.PYTHON:
                parameter = _streaming.ChannelCreationParameter(
                    handle._ray_actor_id, py_async_func, py_sync_func)
            else:
                parameter = _streaming.ChannelCreationParameter(
                    handle._ray_actor_id, java_async_func, java_sync_func)
            self._parameters.append(parameter)
        return self

    @staticmethod
    def set_python_writer_function_descriptor(async_function, sync_function):
        ChannelCreationParametersBuilder._python_writer_async_function_descriptor = (
            async_function)
        ChannelCreationParametersBuilder._python_writer_sync_function_descriptor = (
            sync_function)

    @staticmethod
    def set_python_reader_function_descriptor(async_function, sync_function):
        ChannelCreationParametersBuilder._python_reader_async_function_descriptor = (
            async_function)
        ChannelCreationParametersBuilder._python_reader_sync_function_descriptor = (
            sync_function)