Beispiel #1
0
    def invoke_single(self, request_item):
        args, kwargs, is_web_context = parse_request_item(request_item)
        serve_context.web = is_web_context
        start_timestamp = time.time()

        try:
            result = self.__call__(*args, **kwargs)
        except Exception as e:
            result = wrap_to_ray_error(e)
            self._serve_metric_error_counter += 1

        self._serve_metric_latency_list.append(time.time() - start_timestamp)
        return result
Beispiel #2
0
 def invoke_single(self, request_item):
     args, kwargs, is_web_context, result_object_id = parse_request_item(
         request_item)
     serve_context.web = is_web_context
     start_timestamp = time.time()
     try:
         result = self.__call__(*args, **kwargs)
         ray.worker.global_worker.put_object(result, result_object_id)
     except Exception as e:
         wrapped_exception = wrap_to_ray_error(e)
         self._serve_metric_error_counter += 1
         ray.worker.global_worker.put_object(wrapped_exception,
                                             result_object_id)
     self._serve_metric_latency_list.append(time.time() - start_timestamp)
Beispiel #3
0
    def invoke_batch(self, request_item_list):
        # TODO(alind) : create no-http services. The enqueues
        # from such services will always be TaskContext.Python.

        # Assumption : all the requests in a bacth
        # have same serve context.

        # For batching kwargs are modified as follows -
        # kwargs [Python Context] : key,val
        # kwargs_list             : key, [val1,val2, ... , valn]
        # or
        # args[Web Context]       : val
        # args_list               : [val1,val2, ...... , valn]
        # where n (current batch size) <= max_batch_size of a backend

        arg_list = []
        kwargs_list = defaultdict(list)
        context_flags = set()
        batch_size = len(request_item_list)

        for item in request_item_list:
            args, kwargs, is_web_context = parse_request_item(item)
            context_flags.add(is_web_context)

            if is_web_context:
                # Python context only have kwargs
                flask_request = args[0]
                arg_list.append(flask_request)
            else:
                # Web context only have one positional argument
                for k, v in kwargs.items():
                    kwargs_list[k].append(v)

                # Set the flask request as a list to conform
                # with batching semantics: when in batching
                # mode, each argument it turned into list.
                arg_list.append(FakeFlaskRequest())

        try:
            # check mixing of query context
            # unified context needed
            if len(context_flags) != 1:
                raise RayServeException(
                    "Batched queries contain mixed context. Please only send "
                    "the same type of requests in batching mode.")

            serve_context.web = context_flags.pop()
            serve_context.batch_size = batch_size
            start_timestamp = time.time()

            result_list = self.__call__(*args, **kwargs_list)

            self._serve_metric_latency_list.append(time.time() -
                                                   start_timestamp)
            if (not isinstance(result_list,
                               list)) or (len(result_list) != batch_size):
                raise RayServeException("__call__ function "
                                        "doesn't preserve batch-size. "
                                        "Please return a list of result "
                                        "with length equals to the batch "
                                        "size.")
            return result_list
        except Exception as e:
            wrapped_exception = wrap_to_ray_error(e)
            self._serve_metric_error_counter += batch_size
            return [wrapped_exception for _ in range(batch_size)]
Beispiel #4
0
    def invoke_batch(self, request_item_list):
        # TODO(alind) : create no-http services. The enqueues
        # from such services will always be TaskContext.Python.

        # Assumption : all the requests in a bacth
        # have same serve context.

        # For batching kwargs are modified as follows -
        # kwargs [Python Context] : key,val
        # kwargs_list             : key, [val1,val2, ... , valn]
        # or
        # args[Web Context]       : val
        # args_list               : [val1,val2, ...... , valn]
        # where n (current batch size) <= max_batch_size of a backend

        kwargs_list = defaultdict(list)
        result_object_ids, context_flag_list, arg_list = [], [], []
        curr_batch_size = len(request_item_list)

        for item in request_item_list:
            args, kwargs, is_web_context, result_object_id = (
                parse_request_item(item))
            context_flag_list.append(is_web_context)

            # Python context only have kwargs
            # Web context only have one positional argument
            if is_web_context:
                arg_list.append(args[0])
            else:
                for k, v in kwargs.items():
                    kwargs_list[k].append(v)
            result_object_ids.append(result_object_id)

        try:
            # check mixing of query context
            # unified context needed
            if len(set(context_flag_list)) != 1:
                raise RayServeException(
                    "Batched queries contain mixed context.")
            serve_context.web = all(context_flag_list)
            if serve_context.web:
                args = (arg_list, )
            else:
                # Set the flask request as a list to conform
                # with batching semantics: when in batching
                # mode, each argument it turned into list.
                fake_flask_request_lst = [
                    FakeFlaskRequest() for _ in range(curr_batch_size)
                ]
                args = (fake_flask_request_lst, )
            # set the current batch size (n) for serve_context
            serve_context.batch_size = len(result_object_ids)
            start_timestamp = time.time()
            result_list = self.__call__(*args, **kwargs_list)
            if (not isinstance(result_list, list)) or (len(result_list) !=
                                                       len(result_object_ids)):
                raise RayServeException("__call__ function "
                                        "doesn't preserve batch-size. "
                                        "Please return a list of result "
                                        "with length equals to the batch "
                                        "size.")
            for result, result_object_id in zip(result_list,
                                                result_object_ids):
                ray.worker.global_worker.put_object(result, result_object_id)
            self._serve_metric_latency_list.append(time.time() -
                                                   start_timestamp)
        except Exception as e:
            wrapped_exception = wrap_to_ray_error(e)
            self._serve_metric_error_counter += len(result_object_ids)
            for result_object_id in result_object_ids:
                ray.worker.global_worker.put_object(wrapped_exception,
                                                    result_object_id)