def parse_request_item(request_item): if request_item.request_context == TaskContext.Web: is_web_context = True asgi_scope, body_bytes = request_item.request_args flask_request = build_flask_request(asgi_scope, io.BytesIO(body_bytes)) args = (flask_request, ) kwargs = {} else: is_web_context = False args = (FakeFlaskRequest(), ) kwargs = request_item.request_kwargs return args, kwargs, is_web_context
def parse_request_item(request_item): if request_item.request_context == TaskContext.Web: is_web_context = True asgi_scope, body_bytes = request_item.request_args # http_body_bytes enclosed in list due to # https://github.com/ray-project/ray/issues/6944 # TODO(alind): remove list enclosing after issue is fixed flask_request = build_flask_request(asgi_scope, io.BytesIO(body_bytes[0])) args = (flask_request, ) kwargs = {} else: is_web_context = False args = (FakeFlaskRequest(), ) kwargs = request_item.request_kwargs return args, kwargs, is_web_context
async def invoke_batch(self, request_item_list): arg_list = [] kwargs_list = defaultdict(list) context_flags = set() batch_size = len(request_item_list) call_methods = set() for item in request_item_list: args, kwargs, is_web_context = parse_request_item(item) context_flags.add(is_web_context) call_method = self.get_runner_method(item) call_methods.add(call_method) if is_web_context: # Python context only have kwargs flask_request = args[0] arg_list.append(flask_request) else: # Web context only have one positional argument for k, v in kwargs.items(): kwargs_list[k].append(v) # Set the flask request as a list to conform # with batching semantics: when in batching # mode, each argument is turned into list. if self.has_positional_args(call_method): arg_list.append(FakeFlaskRequest()) try: # Check mixing of query context (unified context needed). if len(context_flags) != 1: raise RayServeException( "Batched queries contain mixed context. Please only send " "the same type of requests in batching mode.") serve_context.web = context_flags.pop() if len(call_methods) != 1: raise RayServeException( "Queries contain mixed calling methods. Please only send " "the same type of requests in batching mode.") call_method = ensure_async(call_methods.pop()) serve_context.batch_size = batch_size # Flask requests are passed to __call__ as a list arg_list = [arg_list] self.request_counter.add(batch_size) result_list = await call_method(*arg_list, **kwargs_list) if not isinstance(result_list, Iterable) or isinstance( result_list, (dict, set)): error_message = ("RayServe expects an ordered iterable object " "but the worker returned a {}".format( type(result_list))) raise RayServeException(error_message) # Normalize the result into a list type. This operation is fast # in Python because it doesn't copy anything. result_list = list(result_list) if (len(result_list) != batch_size): error_message = ("Worker doesn't preserve batch size. The " "input has length {} but the returned list " "has length {}. Please return a list of " "results with length equal to the batch size" ".".format(batch_size, len(result_list))) raise RayServeException(error_message) self._reset_context() return result_list except Exception as e: wrapped_exception = wrap_to_ray_error(e) self.error_counter.add() self._reset_context() return [wrapped_exception for _ in range(batch_size)]
async def invoke_batch(self, request_item_list): # TODO(alind) : create no-http services. The enqueues # from such services will always be TaskContext.Python. # Assumption : all the requests in a bacth # have same serve context. # For batching kwargs are modified as follows - # kwargs [Python Context] : key,val # kwargs_list : key, [val1,val2, ... , valn] # or # args[Web Context] : val # args_list : [val1,val2, ...... , valn] # where n (current batch size) <= max_batch_size of a backend arg_list = [] kwargs_list = defaultdict(list) context_flags = set() batch_size = len(request_item_list) call_methods = set() for item in request_item_list: args, kwargs, is_web_context = parse_request_item(item) context_flags.add(is_web_context) call_method = self.get_runner_method(item) call_methods.add(call_method) if is_web_context: # Python context only have kwargs flask_request = args[0] arg_list.append(flask_request) else: # Web context only have one positional argument for k, v in kwargs.items(): kwargs_list[k].append(v) # Set the flask request as a list to conform # with batching semantics: when in batching # mode, each argument is turned into list. if self.has_positional_args(call_method): arg_list.append(FakeFlaskRequest()) try: # Check mixing of query context (unified context needed). if len(context_flags) != 1: raise RayServeException( "Batched queries contain mixed context. Please only send " "the same type of requests in batching mode.") serve_context.web = context_flags.pop() if len(call_methods) != 1: raise RayServeException( "Queries contain mixed calling methods. Please only send " "the same type of requests in batching mode.") call_method = ensure_async(call_methods.pop()) serve_context.batch_size = batch_size # Flask requests are passed to __call__ as a list arg_list = [arg_list] start_timestamp = time.time() result_list = await call_method(*arg_list, **kwargs_list) self.latency_list.append(time.time() - start_timestamp) if (not isinstance(result_list, list)) or (len(result_list) != batch_size): error_message = ("Worker doesn't preserve batch size. The " "input has length {} but the returned list " "has length {}. Please return a list of " "results with length equal to the batch size" ".".format(batch_size, len(result_list))) raise RayServeException(error_message) return result_list except Exception as e: wrapped_exception = wrap_to_ray_error(e) self.error_counter += batch_size return [wrapped_exception for _ in range(batch_size)]
def invoke_batch(self, request_item_list): # TODO(alind) : create no-http services. The enqueues # from such services will always be TaskContext.Python. # Assumption : all the requests in a bacth # have same serve context. # For batching kwargs are modified as follows - # kwargs [Python Context] : key,val # kwargs_list : key, [val1,val2, ... , valn] # or # args[Web Context] : val # args_list : [val1,val2, ...... , valn] # where n (current batch size) <= max_batch_size of a backend arg_list = [] kwargs_list = defaultdict(list) context_flags = set() batch_size = len(request_item_list) for item in request_item_list: args, kwargs, is_web_context = parse_request_item(item) context_flags.add(is_web_context) if is_web_context: # Python context only have kwargs flask_request = args[0] arg_list.append(flask_request) else: # Web context only have one positional argument for k, v in kwargs.items(): kwargs_list[k].append(v) # Set the flask request as a list to conform # with batching semantics: when in batching # mode, each argument it turned into list. arg_list.append(FakeFlaskRequest()) try: # check mixing of query context # unified context needed if len(context_flags) != 1: raise RayServeException( "Batched queries contain mixed context. Please only send " "the same type of requests in batching mode.") serve_context.web = context_flags.pop() serve_context.batch_size = batch_size # Flask requests are passed to __call__ as a list arg_list = [arg_list] start_timestamp = time.time() result_list = self.__call__(*arg_list, **kwargs_list) self._serve_metric_latency_list.append(time.time() - start_timestamp) if (not isinstance(result_list, list)) or (len(result_list) != batch_size): raise RayServeException("__call__ function " "doesn't preserve batch-size. " "Please return a list of result " "with length equals to the batch " "size.") return result_list except Exception as e: wrapped_exception = wrap_to_ray_error(e) self._serve_metric_error_counter += batch_size return [wrapped_exception for _ in range(batch_size)]