def _ray_serve_get_runner_method(self, request_item): method_name = request_item.call_method if not hasattr(self, method_name): raise RayServeException( "Backend doesn't have method {} " "which is specified in the request. " "The avaiable methods are {}".format(method_name, dir(self)) ) return getattr(self, method_name)
def remote(self, *args, **kwargs): if len(args) != 0: raise RayServeException( "handle.remote must be invoked with keyword arguments." ) return self.router_handle.enqueue_request.remote( self._make_metadata(), **kwargs )
def _check_slo_ms(self, slo_value): if slo_value is not None: try: slo_value = float(slo_value) if slo_value < 0: raise ValueError( "Request SLO must be positive, it is {}".format( slo_value ) ) return slo_value except ValueError as e: raise RayServeException(str(e)) return None
def invoke_batch(self, request_item_list): # TODO(alind) : create no-http services. The enqueues # from such services will always be TaskContext.Python. # Assumption : all the requests in a bacth # have same serve_benchmark context. # For batching kwargs are modified as follows - # kwargs [Python Context] : key,val # kwargs_list : key, [val1,val2, ... , valn] # or # args[Web Context] : val # args_list : [val1,val2, ...... , valn] # where n (current batch size) <= max_batch_size of a backend arg_list = [] kwargs_list = defaultdict(list) context_flags = set() batch_size = len(request_item_list) call_methods = set() for item in request_item_list: args, kwargs, is_web_context = parse_request_item(item) context_flags.add(is_web_context) call_method = self._ray_serve_get_runner_method(item) call_methods.add(call_method) if is_web_context: # Python context only have kwargs flask_request = args[0] arg_list.append(flask_request) else: # Web context only have one positional argument for k, v in kwargs.items(): kwargs_list[k].append(v) # Set the flask request as a list to conform # with batching semantics: when in batching # mode, each argument it turned into list. if self._ray_serve_count_num_positional(call_method): arg_list.append(FakeFlaskRequest()) try: # check mixing of query context # unified context needed if len(context_flags) != 1: raise RayServeException( "Batched queries contain mixed context. Please only send " "the same type of requests in batching mode." ) serve_context.web = context_flags.pop() if len(call_methods) != 1: raise RayServeException( "Queries contain mixed calling methods. Please only send " "the same type of requests in batching mode." ) call_method = call_methods.pop() serve_context.batch_size = batch_size # Flask requests are passed to __call__ as a list arg_list = [arg_list] result_list = call_method(*arg_list, **kwargs_list) if (not isinstance(result_list, list)) or ( len(result_list) != batch_size ): raise RayServeException( "__call__ function " "doesn't preserve batch-size. " "Please return a list of result " "with length equals to the batch " "size." ) return result_list except Exception as e: wrapped_exception = wrap_to_ray_error(e) return [wrapped_exception for _ in range(batch_size)]
def __setattr__(self, name, value): raise RayServeException(_not_in_web_context_error)
def __getattribute__(self, name): raise RayServeException(_not_in_web_context_error)
def check(*args, **kwargs): if _get_global_state() is None: raise RayServeException( "Please run serve_benchmark.init to initialize or " "connect to existing ray serve_benchmark cluster.") return f(*args, **kwargs)