コード例 #1
0
    async def _flush_service_queues(self):
        # perform traffic splitting for requests
        for service, queue in self.service_queues.items():
            # while there are incoming requests and there are backends
            while queue.qsize() and len(self.traffic[service]):
                backend_names = list(self.traffic[service].keys())
                backend_weights = list(self.traffic[service].values())
                if len(self.traffic[service]) >= 2:
                    # randomly pick 2 backends
                    backend1, backend2 = np.random.choice(backend_names,
                                                          2,
                                                          replace=False,
                                                          p=backend_weights)

                    # see the length of buffer queues of the two backends
                    # and pick the one which has less no. of queries
                    # in the buffer
                    if (len(self.buffer_queues[backend1]) <= len(
                            self.buffer_queues[backend2])):
                        chosen_backend = backend1
                    else:
                        chosen_backend = backend2
                    logger.debug("[Power of two chocies] found two backends "
                                 "{} and {}: choosing {}.".format(
                                     backend1, backend2, chosen_backend))
                else:
                    chosen_backend = np.random.choice(
                        backend_names, replace=False,
                        p=backend_weights).squeeze()
                request = await queue.get()
                self.buffer_queues[chosen_backend].add(request)
コード例 #2
0
 def set_traffic(self, service, traffic_dict):
     logger.debug("Setting traffic for service %s to %s", service,
                  traffic_dict)
     self.traffic[service] = traffic_dict
     backend_names = list(self.traffic[service].keys())
     self.round_robin_iterator_map[service] = itertools.cycle(backend_names)
     self.flush()
コード例 #3
0
 def set_traffic(self, service, traffic_dict):
     logger.debug("Setting traffic for service %s to %s", service,
                  traffic_dict)
     self.traffic[service] = traffic_dict
     backend_names = list(self.traffic[service].keys())
     self.fixed_packing_iterator_map[service] = itertools.cycle(
         itertools.chain.from_iterable(
             itertools.repeat(x, self.packing_num) for x in backend_names))
     self.flush()
コード例 #4
0
    def register_service(self, route: str, service: str):
        """Create an entry in the routing table

        Args:
            route: http path name. Must begin with '/'.
            service: service name. This is the name http actor will push
                the request to.
        """
        logger.debug("[KV] Registering route {} to service {}.".format(
            route, service))
        self.routing_table.put(route, service)
コード例 #5
0
    def wait_until_http_ready(self, num_retries=5, backoff_time_s=1):
        routing_table_request_count = 0
        retries = num_retries

        while not routing_table_request_count:
            routing_table_request_count = (ray.get(
                self.kv_store_actor_handle.get_request_count.remote()))
            logger.debug((LOG_PREFIX + "Checking if HTTP server is ready."
                          "{} retries left.").format(retries))
            time.sleep(backoff_time_s)
            retries -= 1
            if retries == 0:
                raise Exception(
                    "HTTP server not ready after {} retries.".format(
                        num_retries))
コード例 #6
0
    async def _flush_service_queues(self):
        # perform traffic splitting for requests
        for service, queue in self.service_queues.items():
            # while there are incoming requests and there are backends
            while queue.qsize() and len(self.traffic[service]):
                backend_names = list(self.traffic[service].keys())
                backend_weights = list(self.traffic[service].values())
                # randomly choose a backend for every query
                chosen_backend = np.random.choice(backend_names,
                                                  replace=False,
                                                  p=backend_weights).squeeze()
                logger.debug("Matching service {} to backend {}".format(
                    service, chosen_backend))

                request = await queue.get()
                self.buffer_queues[chosen_backend].add(request)
コード例 #7
0
    async def enqueue_request(self,
                              service,
                              request_args,
                              request_kwargs,
                              request_context,
                              request_slo_ms=None):
        logger.debug("Received a request for service {}".format(service))

        request_slo_ms = _adjust_latency_slo(request_slo_ms)
        query = Query(request_args, request_kwargs, request_context,
                      request_slo_ms)
        await self.service_queues[service].put(query)
        await self.flush()

        # Note: a future change can be to directly return the ObjectID from
        # replica task submission
        result = await query.async_future
        return result
コード例 #8
0
    def register_service(self, route: Union[str, None], service: str):
        """Create an entry in the routing table

        Args:
            route: http path name. Must begin with '/'.
            service: service name. This is the name http actor will push
                the request to.
        """
        logger.debug("[KV] Registering route {} to service {}.".format(
            route, service))

        # put no route services in default key
        if route is None:
            no_http_services = json.loads(
                self.routing_table.get(NO_ROUTE_KEY, "[]"))
            no_http_services.append(service)
            self.routing_table.put(NO_ROUTE_KEY, json.dumps(no_http_services))
        else:
            self.routing_table.put(route, service)
コード例 #9
0
    def wait_until_http_ready(self, num_retries=5, backoff_time_s=1):
        http_is_ready = False
        retries = num_retries

        while not http_is_ready:
            try:
                resp = requests.get(self.http_address)
                assert resp.status_code == 200
                http_is_ready = True
            except Exception:
                pass

            logger.debug((LOG_PREFIX + "Checking if HTTP server is ready."
                          "{} retries left.").format(retries))
            time.sleep(backoff_time_s)
            retries -= 1
            if retries == 0:
                raise Exception(
                    "HTTP server not ready after {} retries.".format(
                        num_retries))
コード例 #10
0
ファイル: queues.py プロジェクト: skyofwinter/ray
    async def enqueue_request(self, request_in_object, *request_args,
                              **request_kwargs):
        service = request_in_object.service
        logger.debug("Received a request for service {}".format(service))

        # check if the slo specified is directly the
        # wall clock time
        if request_in_object.absolute_slo_ms is not None:
            request_slo_ms = request_in_object.absolute_slo_ms
        else:
            request_slo_ms = request_in_object.adjust_relative_slo_ms()
        request_context = request_in_object.request_context
        query = Query(request_args, request_kwargs, request_context,
                      request_slo_ms)
        await self.service_queues[service].put(query)
        await self.flush()

        # Note: a future change can be to directly return the ObjectID from
        # replica task submission
        result = await query.async_future
        return result
コード例 #11
0
ファイル: queues.py プロジェクト: skyofwinter/ray
    async def _flush_buffer_queues(self):
        for service in self.traffic.keys():
            ready_backends = self._get_available_backends(service)
            for backend in ready_backends:
                # no work available
                if len(self.buffer_queues[backend]) == 0:
                    continue

                buffer_queue = self.buffer_queues[backend]
                worker_queue = self.worker_queues[backend]

                logger.debug("Assigning queries for backend {} with buffer "
                             "queue size {} and worker queue size {}".format(
                                 backend, len(buffer_queue),
                                 worker_queue.qsize()))

                max_batch_size = None
                if backend in self.backend_info:
                    max_batch_size = self.backend_info[backend][
                        "max_batch_size"]

                await self._assign_query_to_worker(buffer_queue, worker_queue,
                                                   max_batch_size)
コード例 #12
0
 def set_traffic(self, service, traffic_dict):
     logger.debug("Setting traffic for service %s to %s", service,
                  traffic_dict)
     self.traffic[service] = traffic_dict
     self.flush()
コード例 #13
0
 def link(self, service, backend):
     logger.debug("Link %s with %s", service, backend)
     self.traffic[service][backend] = 1.0
     self.flush()
コード例 #14
0
 def set_backend_config(self, backend, config_dict):
     logger.debug("Setting backend config for "
                  "backend {} to {}".format(backend, config_dict))
     self.backend_info[backend] = config_dict
コード例 #15
0
 def link(self, service, backend):
     logger.debug("Link %s with %s", service, backend)
     self.set_traffic(service, {backend: 1.0})
コード例 #16
0
ファイル: queues.py プロジェクト: skyofwinter/ray
 async def dequeue_request(self, backend, replica_handle):
     logger.debug(
         "Received a dequeue request for backend {}".format(backend))
     await self.worker_queues[backend].put(replica_handle)
     await self.flush()