async def _flush_service_queues(self): # perform traffic splitting for requests for service, queue in self.service_queues.items(): # while there are incoming requests and there are backends while queue.qsize() and len(self.traffic[service]): backend_names = list(self.traffic[service].keys()) backend_weights = list(self.traffic[service].values()) if len(self.traffic[service]) >= 2: # randomly pick 2 backends backend1, backend2 = np.random.choice(backend_names, 2, replace=False, p=backend_weights) # see the length of buffer queues of the two backends # and pick the one which has less no. of queries # in the buffer if (len(self.buffer_queues[backend1]) <= len( self.buffer_queues[backend2])): chosen_backend = backend1 else: chosen_backend = backend2 logger.debug("[Power of two chocies] found two backends " "{} and {}: choosing {}.".format( backend1, backend2, chosen_backend)) else: chosen_backend = np.random.choice( backend_names, replace=False, p=backend_weights).squeeze() request = await queue.get() self.buffer_queues[chosen_backend].add(request)
def set_traffic(self, service, traffic_dict): logger.debug("Setting traffic for service %s to %s", service, traffic_dict) self.traffic[service] = traffic_dict backend_names = list(self.traffic[service].keys()) self.round_robin_iterator_map[service] = itertools.cycle(backend_names) self.flush()
def set_traffic(self, service, traffic_dict): logger.debug("Setting traffic for service %s to %s", service, traffic_dict) self.traffic[service] = traffic_dict backend_names = list(self.traffic[service].keys()) self.fixed_packing_iterator_map[service] = itertools.cycle( itertools.chain.from_iterable( itertools.repeat(x, self.packing_num) for x in backend_names)) self.flush()
def register_service(self, route: str, service: str): """Create an entry in the routing table Args: route: http path name. Must begin with '/'. service: service name. This is the name http actor will push the request to. """ logger.debug("[KV] Registering route {} to service {}.".format( route, service)) self.routing_table.put(route, service)
def wait_until_http_ready(self, num_retries=5, backoff_time_s=1): routing_table_request_count = 0 retries = num_retries while not routing_table_request_count: routing_table_request_count = (ray.get( self.kv_store_actor_handle.get_request_count.remote())) logger.debug((LOG_PREFIX + "Checking if HTTP server is ready." "{} retries left.").format(retries)) time.sleep(backoff_time_s) retries -= 1 if retries == 0: raise Exception( "HTTP server not ready after {} retries.".format( num_retries))
async def _flush_service_queues(self): # perform traffic splitting for requests for service, queue in self.service_queues.items(): # while there are incoming requests and there are backends while queue.qsize() and len(self.traffic[service]): backend_names = list(self.traffic[service].keys()) backend_weights = list(self.traffic[service].values()) # randomly choose a backend for every query chosen_backend = np.random.choice(backend_names, replace=False, p=backend_weights).squeeze() logger.debug("Matching service {} to backend {}".format( service, chosen_backend)) request = await queue.get() self.buffer_queues[chosen_backend].add(request)
async def enqueue_request(self, service, request_args, request_kwargs, request_context, request_slo_ms=None): logger.debug("Received a request for service {}".format(service)) request_slo_ms = _adjust_latency_slo(request_slo_ms) query = Query(request_args, request_kwargs, request_context, request_slo_ms) await self.service_queues[service].put(query) await self.flush() # Note: a future change can be to directly return the ObjectID from # replica task submission result = await query.async_future return result
def register_service(self, route: Union[str, None], service: str): """Create an entry in the routing table Args: route: http path name. Must begin with '/'. service: service name. This is the name http actor will push the request to. """ logger.debug("[KV] Registering route {} to service {}.".format( route, service)) # put no route services in default key if route is None: no_http_services = json.loads( self.routing_table.get(NO_ROUTE_KEY, "[]")) no_http_services.append(service) self.routing_table.put(NO_ROUTE_KEY, json.dumps(no_http_services)) else: self.routing_table.put(route, service)
def wait_until_http_ready(self, num_retries=5, backoff_time_s=1): http_is_ready = False retries = num_retries while not http_is_ready: try: resp = requests.get(self.http_address) assert resp.status_code == 200 http_is_ready = True except Exception: pass logger.debug((LOG_PREFIX + "Checking if HTTP server is ready." "{} retries left.").format(retries)) time.sleep(backoff_time_s) retries -= 1 if retries == 0: raise Exception( "HTTP server not ready after {} retries.".format( num_retries))
async def enqueue_request(self, request_in_object, *request_args, **request_kwargs): service = request_in_object.service logger.debug("Received a request for service {}".format(service)) # check if the slo specified is directly the # wall clock time if request_in_object.absolute_slo_ms is not None: request_slo_ms = request_in_object.absolute_slo_ms else: request_slo_ms = request_in_object.adjust_relative_slo_ms() request_context = request_in_object.request_context query = Query(request_args, request_kwargs, request_context, request_slo_ms) await self.service_queues[service].put(query) await self.flush() # Note: a future change can be to directly return the ObjectID from # replica task submission result = await query.async_future return result
async def _flush_buffer_queues(self): for service in self.traffic.keys(): ready_backends = self._get_available_backends(service) for backend in ready_backends: # no work available if len(self.buffer_queues[backend]) == 0: continue buffer_queue = self.buffer_queues[backend] worker_queue = self.worker_queues[backend] logger.debug("Assigning queries for backend {} with buffer " "queue size {} and worker queue size {}".format( backend, len(buffer_queue), worker_queue.qsize())) max_batch_size = None if backend in self.backend_info: max_batch_size = self.backend_info[backend][ "max_batch_size"] await self._assign_query_to_worker(buffer_queue, worker_queue, max_batch_size)
def set_traffic(self, service, traffic_dict): logger.debug("Setting traffic for service %s to %s", service, traffic_dict) self.traffic[service] = traffic_dict self.flush()
def link(self, service, backend): logger.debug("Link %s with %s", service, backend) self.traffic[service][backend] = 1.0 self.flush()
def set_backend_config(self, backend, config_dict): logger.debug("Setting backend config for " "backend {} to {}".format(backend, config_dict)) self.backend_info[backend] = config_dict
def link(self, service, backend): logger.debug("Link %s with %s", service, backend) self.set_traffic(service, {backend: 1.0})
async def dequeue_request(self, backend, replica_handle): logger.debug( "Received a dequeue request for backend {}".format(backend)) await self.worker_queues[backend].put(replica_handle) await self.flush()