def remote(self, *args, **kwargs): if len(args) != 0: raise RayServeException( "handle.remote must be invoked with keyword arguments.") # get slo_ms before enqueuing the query request_slo_ms = kwargs.pop("slo_ms", None) if request_slo_ms is not None: try: request_slo_ms = float(request_slo_ms) if request_slo_ms < 0: raise ValueError( "Request SLO must be positive, it is {}".format( request_slo_ms)) except ValueError as e: raise RayServeException(str(e)) result_object_id_bytes = ray.get( self.router_handle.enqueue_request.remote( service=self.endpoint_name, request_args=(), request_kwargs=kwargs, request_context=TaskContext.Python, request_slo_ms=request_slo_ms)) return ray.ObjectID(result_object_id_bytes)
def remote(self, *args, **kwargs): if len(args) != 0: raise RayServeException( "handle.remote must be invoked with keyword arguments.") result_object_id_bytes = ray.get( self.router_handle.enqueue_request.remote( service=self.endpoint_name, request_args=(), request_kwargs=kwargs, request_context=TaskContext.Python)) return ray.ObjectID(result_object_id_bytes)
def remote(self, *args, **kwargs): if len(args) != 0: raise RayServeException( "handle.remote must be invoked with keyword arguments.") # create RequestMetadata instance request_in_object = RequestMetadata(self.endpoint_name, TaskContext.Python, self.relative_slo_ms, self.absolute_slo_ms) return self.router_handle.enqueue_request.remote( request_in_object, **kwargs)
def _check_slo_ms(self, slo_value): if slo_value is not None: try: slo_value = float(slo_value) if slo_value < 0: raise ValueError( "Request SLO must be positive, it is {}".format( slo_value)) return slo_value except ValueError as e: raise RayServeException(str(e)) return None
def __setattr__(self, name, value): raise RayServeException(_not_in_web_context_error)
def __getattribute__(self, name): raise RayServeException(_not_in_web_context_error)
def check(*args, **kwargs): if _get_global_state() is None: raise RayServeException("Please run serve.init to initialize or " "connect to existing ray serve cluster.") return f(*args, **kwargs)
def create_backend(func_or_class, backend_tag, *actor_init_args, backend_config=BackendConfig()): """Create a backend using func_or_class and assign backend_tag. Args: func_or_class (callable, class): a function or a class implements __call__ protocol. backend_tag (str): a unique tag assign to this backend. It will be used to associate services in traffic policy. backend_config (BackendConfig): An object defining backend properties for starting a backend. *actor_init_args (optional): the argument to pass to the class initialization method. """ assert isinstance(backend_config, BackendConfig), ("backend_config must be" " of instance BackendConfig") backend_config_dict = dict(backend_config) should_accept_batch = (True if backend_config.max_batch_size is not None else False) batch_annotation_not_found = RayServeException( "max_batch_size is set in config but the function or method does not " "accept batching. Please use @serve.accept_batch to explicitly mark " "the function or method as batchable and takes in list as arguments.") arg_list = [] if inspect.isfunction(func_or_class): if should_accept_batch and not hasattr(func_or_class, "serve_accept_batch"): raise batch_annotation_not_found # arg list for a fn is function itself arg_list = [func_or_class] # ignore lint on lambda expression creator = lambda kwrgs: TaskRunnerActor._remote(**kwrgs) # noqa: E731 elif inspect.isclass(func_or_class): if should_accept_batch and not hasattr(func_or_class.__call__, "serve_accept_batch"): raise batch_annotation_not_found # Python inheritance order is right-to-left. We put RayServeMixin # on the left to make sure its methods are not overriden. @ray.remote class CustomActor(RayServeMixin, func_or_class): pass arg_list = actor_init_args # ignore lint on lambda expression creator = lambda kwargs: CustomActor._remote(**kwargs) # noqa: E731 else: raise TypeError( "Backend must be a function or class, it is {}.".format( type(func_or_class))) # save creator which starts replicas global_state.backend_table.register_backend(backend_tag, creator) # save information about configurations needed to start the replicas global_state.backend_table.register_info(backend_tag, backend_config_dict) # save the initial arguments needed by replicas global_state.backend_table.save_init_args(backend_tag, arg_list) # set the backend config inside the router # particularly for max-batch-size ray.get(global_state.init_or_get_router().set_backend_config.remote( backend_tag, backend_config_dict)) scale(backend_tag, backend_config_dict["num_replicas"])
def invoke_batch(self, request_item_list): # TODO(alind) : create no-http services. The enqueues # from such services will always be TaskContext.Python. # Assumption : all the requests in a bacth # have same serve context. # For batching kwargs are modified as follows - # kwargs [Python Context] : key,val # kwargs_list : key, [val1,val2, ... , valn] # or # args[Web Context] : val # args_list : [val1,val2, ...... , valn] # where n (current batch size) <= max_batch_size of a backend arg_list = [] kwargs_list = defaultdict(list) context_flags = set() batch_size = len(request_item_list) for item in request_item_list: args, kwargs, is_web_context = parse_request_item(item) context_flags.add(is_web_context) if is_web_context: # Python context only have kwargs flask_request = args[0] arg_list.append(flask_request) else: # Web context only have one positional argument for k, v in kwargs.items(): kwargs_list[k].append(v) # Set the flask request as a list to conform # with batching semantics: when in batching # mode, each argument it turned into list. arg_list.append(FakeFlaskRequest()) try: # check mixing of query context # unified context needed if len(context_flags) != 1: raise RayServeException( "Batched queries contain mixed context. Please only send " "the same type of requests in batching mode.") serve_context.web = context_flags.pop() serve_context.batch_size = batch_size start_timestamp = time.time() result_list = self.__call__(*args, **kwargs_list) self._serve_metric_latency_list.append(time.time() - start_timestamp) if (not isinstance(result_list, list)) or (len(result_list) != batch_size): raise RayServeException("__call__ function " "doesn't preserve batch-size. " "Please return a list of result " "with length equals to the batch " "size.") return result_list except Exception as e: wrapped_exception = wrap_to_ray_error(e) self._serve_metric_error_counter += batch_size return [wrapped_exception for _ in range(batch_size)]
def invoke_batch(self, request_item_list): # TODO(alind) : create no-http services. The enqueues # from such services will always be TaskContext.Python. # Assumption : all the requests in a bacth # have same serve context. # For batching kwargs are modified as follows - # kwargs [Python Context] : key,val # kwargs_list : key, [val1,val2, ... , valn] # or # args[Web Context] : val # args_list : [val1,val2, ...... , valn] # where n (current batch size) <= max_batch_size of a backend kwargs_list = defaultdict(list) result_object_ids, context_flag_list, arg_list = [], [], [] curr_batch_size = len(request_item_list) for item in request_item_list: args, kwargs, is_web_context, result_object_id = ( parse_request_item(item)) context_flag_list.append(is_web_context) # Python context only have kwargs # Web context only have one positional argument if is_web_context: arg_list.append(args[0]) else: for k, v in kwargs.items(): kwargs_list[k].append(v) result_object_ids.append(result_object_id) try: # check mixing of query context # unified context needed if len(set(context_flag_list)) != 1: raise RayServeException( "Batched queries contain mixed context.") serve_context.web = all(context_flag_list) if serve_context.web: args = (arg_list, ) else: # Set the flask request as a list to conform # with batching semantics: when in batching # mode, each argument it turned into list. fake_flask_request_lst = [ FakeFlaskRequest() for _ in range(curr_batch_size) ] args = (fake_flask_request_lst, ) # set the current batch size (n) for serve_context serve_context.batch_size = len(result_object_ids) start_timestamp = time.time() result_list = self.__call__(*args, **kwargs_list) if (not isinstance(result_list, list)) or (len(result_list) != len(result_object_ids)): raise RayServeException("__call__ function " "doesn't preserve batch-size. " "Please return a list of result " "with length equals to the batch " "size.") for result, result_object_id in zip(result_list, result_object_ids): ray.worker.global_worker.put_object(result, result_object_id) self._serve_metric_latency_list.append(time.time() - start_timestamp) except Exception as e: wrapped_exception = wrap_to_ray_error(e) self._serve_metric_error_counter += len(result_object_ids) for result_object_id in result_object_ids: ray.worker.global_worker.put_object(wrapped_exception, result_object_id)