def __process(self, request: AllocateRequest, req_type: str, is_delete: bool) -> AllocateResponse: req_wid = '' if isinstance(request, AllocateThreadsRequest): req_wid = request.get_workload_id() req = self.__build_base_req(request.get_cpu()) req.metadata[ REQ_TYPE_METADATA_KEY] = req_type # for logging purposes server side for wid, w in request.get_workloads().items(): req.task_to_job_id[wid] = w.get_job_id() if is_delete and wid == req_wid: continue req.tasks_to_place.append(wid) try: log.info("remote %s (tasks_to_place=%s)", req_type, req.tasks_to_place) response = self.__stub.ComputeIsolation( req, timeout=self.__call_timeout_secs) except grpc.RpcError as e: log.error("remote %s failed (tasks_to_place=%s):\n%s", req_type, req.tasks_to_place, repr(e)) raise e try: return self.__deser(response) except Exception as e: log.error("failed to deseralize response for remote %s of %s:\n%s", req_type, req_wid, repr(e)) raise e
def rebalance(self, request: AllocateRequest) -> AllocateResponse: log.info("Ignoring attempt to rebalance workloads: '{}'".format( request.get_workloads())) return AllocateResponse( request.get_cpu(), get_workload_allocations(request.get_cpu(), list(request.get_workloads().values())), self.get_name())
def rebalance(self, request: AllocateRequest) -> AllocateResponse: cpu = request.get_cpu() workloads = request.get_workloads() metadata = {} cpu = rebalance(cpu, workloads, self.__free_thread_provider, metadata) return AllocateResponse( cpu, get_workload_allocations(cpu, workloads.values()), self.get_name(), metadata)
def __get_rebalance_request(self): workload_map = self.get_workload_map_copy() resource_usage = self.__wmm.get_resource_usage(workload_map.keys()) log.debug("resource_usage: %s", json.dumps(resource_usage.serialize())) cpu_usage = self.__get_optional_default(resource_usage.get_cpu_usage, {}) mem_usage = self.__get_optional_default(resource_usage.get_mem_usage, {}) net_recv_usage = self.__get_optional_default( resource_usage.get_net_recv_usage, {}) net_trans_usage = self.__get_optional_default( resource_usage.get_net_trans_usage, {}) disk_usage = self.__get_optional_default(resource_usage.get_disk_usage, {}) return AllocateRequest( cpu=self.get_cpu_copy(), workloads=workload_map, resource_usage=resource_usage, cpu_usage=cpu_usage, mem_usage=mem_usage, net_recv_usage=net_recv_usage, net_trans_usage=net_trans_usage, disk_usage=disk_usage, metadata=self.__get_request_metadata("rebalance"))
def get_no_usage_rebalance_request(cpu: Cpu, workloads: List[Workload]): return AllocateRequest(cpu=cpu, workloads=__workloads_list_to_map(workloads), resource_usage=GlobalResourceUsage({}), cpu_usage={}, mem_usage={}, net_recv_usage={}, net_trans_usage={}, disk_usage={}, metadata=DEFAULT_TEST_REQUEST_METADATA)
def rebalance(self, request: AllocateRequest) -> AllocateResponse: self.__call_meta = {} cpu = request.get_cpu() cpu_usage = request.get_cpu_usage() workloads = request.get_workloads() self.__cnt_rebalance_calls += 1 if len(workloads) == 0: log.warning("Ignoring rebalance of empty CPU.") self.__call_meta['rebalance_empty'] = 1 return AllocateResponse(cpu, self.get_name(), self.__call_meta) log.info("Rebalancing with predictions...") curr_ids_per_workload = cpu.get_workload_ids_to_thread_ids() return AllocateResponse( self.__compute_allocation(cpu, None, workloads, curr_ids_per_workload, cpu_usage, None), self.get_name(), self.__call_meta)
def rebalance(self, request: AllocateRequest) -> AllocateResponse: url = "{}/rebalance".format(self.__url) body = request.to_dict() log.debug("url: {}, body: {}".format(url, body)) response = requests.put(url, json=body, headers=self.__headers, timeout=self.__timeout) log.debug("rebalance response code: {}".format(response.status_code)) if response.status_code == 200: return deserialize_response(response.headers, response.json()) raise CpuAllocationException("Failed to rebalance threads: {}".format(response.text))
def rebalance(self, request: AllocateRequest) -> AllocateResponse: try: self.__primary_rebalance_call_count += 1 return self.__primary_allocator.rebalance(request) except: log.exception( "Failed to rebalance workloads: '{}' with primary allocator: '{}', falling back to: '{}'".format( [w.get_id() for w in request.get_workloads().values()], self.__primary_allocator.__class__.__name__, self.__secondary_allocator.__class__.__name__)) self.__secondary_rebalance_call_count += 1 return self.__secondary_allocator.rebalance(request)
def __get_rebalance_request(self): pcp_usage = self.__wmm.get_pcp_usage() return AllocateRequest( cpu=self.get_cpu_copy(), workloads=self.get_workload_map_copy(), cpu_usage=pcp_usage.get(CPU_USAGE, {}), mem_usage=pcp_usage.get(MEM_USAGE, {}), net_recv_usage=pcp_usage.get(NET_RECV_USAGE, {}), net_trans_usage=pcp_usage.get(NET_TRANS_USAGE, {}), disk_usage=pcp_usage.get(DISK_USAGE, {}), metadata=self.__get_request_metadata("rebalance"))
def test_balance_forecast_ip(self): cpu = get_cpu() w1 = get_test_workload("a", 2, STATIC) w2 = get_test_workload("b", 4, BURST) allocator = forecast_ip_alloc_simple request = AllocateThreadsRequest(cpu, "a", {"a": w1}, {}, DEFAULT_TEST_REQUEST_METADATA) cpu = allocator.assign_threads(request).get_cpu() request = AllocateThreadsRequest(cpu, "b", { "a": w1, "b": w2 }, {}, DEFAULT_TEST_REQUEST_METADATA) cpu = allocator.assign_threads(request).get_cpu() request = AllocateRequest(cpu, { "a": w1, "b": w2 }, {}, DEFAULT_TEST_REQUEST_METADATA) cpu = allocator.rebalance(request).get_cpu() self.assertLessEqual(2 + 4, len(cpu.get_claimed_threads())) w2t = cpu.get_workload_ids_to_thread_ids() self.assertEqual(2, len(w2t["a"])) self.assertLessEqual(4, len(w2t["b"])) # burst got at least 4 for _ in range(20): request = AllocateRequest(cpu, { "a": w1, "b": w2 }, {}, DEFAULT_TEST_REQUEST_METADATA) cpu = allocator.rebalance(request).get_cpu() w2t = cpu.get_workload_ids_to_thread_ids() self.assertEqual(2, len(w2t["a"])) self.assertLessEqual(4, len(w2t["b"]))
def rebalance(self, request: AllocateRequest) -> AllocateResponse: try: self.__primary_rebalance_call_count += 1 self.__should_fallback_immediately() return self.__primary_allocator.rebalance(request) except Exception as e: log.error( "Failed to rebalance workloads: '{}' with primary allocator: '{}', falling back to: '{}' because '{}'".format( [w.get_id() for w in request.get_workloads().values()], self.__primary_allocator.__class__.__name__, self.__secondary_allocator.__class__.__name__, e)) self.__secondary_rebalance_call_count += 1 return self.__secondary_allocator.rebalance(request)
def test_forecast_ip_big_burst_pool_if_empty_instance(self): cpu = get_cpu() allocator = forecast_ip_alloc_simple w = get_test_workload("a", 1, BURST) request = AllocateThreadsRequest(cpu, "a", {"a": w}, {}, DEFAULT_TEST_REQUEST_METADATA) cpu = allocator.assign_threads(request).get_cpu() original_burst_claim_sz = len(cpu.get_claimed_threads()) # should at least consume all the cores: self.assertLessEqual( len(cpu.get_threads()) / 2, original_burst_claim_sz) w2 = get_test_workload("b", 3, STATIC) request = AllocateThreadsRequest(cpu, "b", { "a": w, "b": w2 }, {}, DEFAULT_TEST_REQUEST_METADATA) cpu = allocator.assign_threads(request).get_cpu() new_burst_claim_sz = len(get_threads_with_workload(cpu, w2.get_id())) self.assertLess(new_burst_claim_sz, original_burst_claim_sz) total_claim_sz = len(cpu.get_claimed_threads()) self.assertLessEqual(3 + 1, total_claim_sz) self.assertLessEqual(1, new_burst_claim_sz) # there shouldn't be an empty core for p in cpu.get_packages(): for c in p.get_cores(): self.assertLess(0, sum(t.is_claimed() for t in c.get_threads())) request = AllocateThreadsRequest(cpu, "b", { "a": w, "b": w2 }, {}, DEFAULT_TEST_REQUEST_METADATA) cpu = allocator.free_threads(request).get_cpu() request = AllocateRequest(cpu, {"a": w}, {}, DEFAULT_TEST_REQUEST_METADATA) cpu = allocator.rebalance(request).get_cpu() self.assertEqual(original_burst_claim_sz, len(cpu.get_claimed_threads()))
def rebalance(self, request: AllocateRequest) -> AllocateResponse: url = "{}/rebalance".format(self.__url) body = request.to_dict() try: log.info("rebalancing threads remotely") response = requests.put(url, json=body, headers=self.__headers, timeout=self.__timeout) except requests.exceptions.Timeout as e: log.info("rebalancing threads remotely timed out") raise e if response.status_code == 200: log.info("rebalanced threads remotely") return deserialize_response(response.headers, response.json()) log.error("failed to rebalance threads remotely with status code: %d", response.status_code) raise CpuAllocationException("Failed to rebalance threads: {}".format( response.text))
def rebalance(self, request: AllocateRequest) -> AllocateResponse: return AllocateResponse(request.get_cpu(), self.get_name())
def rebalance(self, request: AllocateRequest) -> AllocateResponse: return AllocateResponse( request.get_cpu(), get_workload_allocations(request.get_cpu(), request.get_workloads().values()), self.get_name())
def __get_rebalance_request(self): return AllocateRequest( self.get_cpu_copy(), self.get_workload_map_copy(), self.__get_cpu_usage(), self.__get_request_metadata("rebalance"))
def get_cpu_event(request: AllocateRequest, response: AllocateResponse): return { "request": request.to_dict(), "response": response.to_dict(), }
def rebalance(self, request: AllocateRequest) -> AllocateResponse: cpu = request.get_cpu() workloads = request.get_workloads() cpu = rebalance(cpu, workloads, self.__free_thread_provider) return AllocateResponse(cpu, self.get_name())