Exemple #1
0
    def __process(self, request: AllocateRequest, req_type: str,
                  is_delete: bool) -> AllocateResponse:
        req_wid = ''
        if isinstance(request, AllocateThreadsRequest):
            req_wid = request.get_workload_id()
        req = self.__build_base_req(request.get_cpu())
        req.metadata[
            REQ_TYPE_METADATA_KEY] = req_type  # for logging purposes server side

        for wid, w in request.get_workloads().items():
            req.task_to_job_id[wid] = w.get_job_id()
            if is_delete and wid == req_wid:
                continue
            req.tasks_to_place.append(wid)

        try:
            log.info("remote %s (tasks_to_place=%s)", req_type,
                     req.tasks_to_place)
            response = self.__stub.ComputeIsolation(
                req, timeout=self.__call_timeout_secs)
        except grpc.RpcError as e:
            log.error("remote %s failed (tasks_to_place=%s):\n%s", req_type,
                      req.tasks_to_place, repr(e))
            raise e

        try:
            return self.__deser(response)
        except Exception as e:
            log.error("failed to deseralize response for remote %s of %s:\n%s",
                      req_type, req_wid, repr(e))
            raise e
 def rebalance(self, request: AllocateRequest) -> AllocateResponse:
     log.info("Ignoring attempt to rebalance workloads: '{}'".format(
         request.get_workloads()))
     return AllocateResponse(
         request.get_cpu(),
         get_workload_allocations(request.get_cpu(),
                                  list(request.get_workloads().values())),
         self.get_name())
Exemple #3
0
    def rebalance(self, request: AllocateRequest) -> AllocateResponse:
        cpu = request.get_cpu()
        workloads = request.get_workloads()

        metadata = {}
        cpu = rebalance(cpu, workloads, self.__free_thread_provider, metadata)
        return AllocateResponse(
            cpu, get_workload_allocations(cpu, workloads.values()),
            self.get_name(), metadata)
    def __get_rebalance_request(self):
        workload_map = self.get_workload_map_copy()
        resource_usage = self.__wmm.get_resource_usage(workload_map.keys())
        log.debug("resource_usage: %s", json.dumps(resource_usage.serialize()))
        cpu_usage = self.__get_optional_default(resource_usage.get_cpu_usage,
                                                {})
        mem_usage = self.__get_optional_default(resource_usage.get_mem_usage,
                                                {})
        net_recv_usage = self.__get_optional_default(
            resource_usage.get_net_recv_usage, {})
        net_trans_usage = self.__get_optional_default(
            resource_usage.get_net_trans_usage, {})
        disk_usage = self.__get_optional_default(resource_usage.get_disk_usage,
                                                 {})

        return AllocateRequest(
            cpu=self.get_cpu_copy(),
            workloads=workload_map,
            resource_usage=resource_usage,
            cpu_usage=cpu_usage,
            mem_usage=mem_usage,
            net_recv_usage=net_recv_usage,
            net_trans_usage=net_trans_usage,
            disk_usage=disk_usage,
            metadata=self.__get_request_metadata("rebalance"))
Exemple #5
0
def get_no_usage_rebalance_request(cpu: Cpu, workloads: List[Workload]):
    return AllocateRequest(cpu=cpu,
                           workloads=__workloads_list_to_map(workloads),
                           resource_usage=GlobalResourceUsage({}),
                           cpu_usage={},
                           mem_usage={},
                           net_recv_usage={},
                           net_trans_usage={},
                           disk_usage={},
                           metadata=DEFAULT_TEST_REQUEST_METADATA)
    def rebalance(self, request: AllocateRequest) -> AllocateResponse:
        self.__call_meta = {}
        cpu = request.get_cpu()
        cpu_usage = request.get_cpu_usage()
        workloads = request.get_workloads()
        self.__cnt_rebalance_calls += 1

        if len(workloads) == 0:
            log.warning("Ignoring rebalance of empty CPU.")
            self.__call_meta['rebalance_empty'] = 1
            return AllocateResponse(cpu, self.get_name(), self.__call_meta)

        log.info("Rebalancing with predictions...")
        curr_ids_per_workload = cpu.get_workload_ids_to_thread_ids()

        return AllocateResponse(
            self.__compute_allocation(cpu, None, workloads, curr_ids_per_workload, cpu_usage, None),
            self.get_name(),
            self.__call_meta)
    def rebalance(self, request: AllocateRequest) -> AllocateResponse:
        url = "{}/rebalance".format(self.__url)
        body = request.to_dict()
        log.debug("url: {}, body: {}".format(url, body))
        response = requests.put(url, json=body, headers=self.__headers, timeout=self.__timeout)
        log.debug("rebalance response code: {}".format(response.status_code))

        if response.status_code == 200:
            return deserialize_response(response.headers, response.json())

        raise CpuAllocationException("Failed to rebalance threads: {}".format(response.text))
Exemple #8
0
 def rebalance(self, request: AllocateRequest) -> AllocateResponse:
     try:
         self.__primary_rebalance_call_count += 1
         return self.__primary_allocator.rebalance(request)
     except:
         log.exception(
             "Failed to rebalance workloads: '{}' with primary allocator: '{}', falling back to: '{}'".format(
                 [w.get_id() for w in request.get_workloads().values()],
                 self.__primary_allocator.__class__.__name__,
                 self.__secondary_allocator.__class__.__name__))
         self.__secondary_rebalance_call_count += 1
         return self.__secondary_allocator.rebalance(request)
Exemple #9
0
    def __get_rebalance_request(self):
        pcp_usage = self.__wmm.get_pcp_usage()

        return AllocateRequest(
            cpu=self.get_cpu_copy(),
            workloads=self.get_workload_map_copy(),
            cpu_usage=pcp_usage.get(CPU_USAGE, {}),
            mem_usage=pcp_usage.get(MEM_USAGE, {}),
            net_recv_usage=pcp_usage.get(NET_RECV_USAGE, {}),
            net_trans_usage=pcp_usage.get(NET_TRANS_USAGE, {}),
            disk_usage=pcp_usage.get(DISK_USAGE, {}),
            metadata=self.__get_request_metadata("rebalance"))
Exemple #10
0
    def test_balance_forecast_ip(self):
        cpu = get_cpu()

        w1 = get_test_workload("a", 2, STATIC)
        w2 = get_test_workload("b", 4, BURST)

        allocator = forecast_ip_alloc_simple

        request = AllocateThreadsRequest(cpu, "a", {"a": w1}, {},
                                         DEFAULT_TEST_REQUEST_METADATA)
        cpu = allocator.assign_threads(request).get_cpu()

        request = AllocateThreadsRequest(cpu, "b", {
            "a": w1,
            "b": w2
        }, {}, DEFAULT_TEST_REQUEST_METADATA)
        cpu = allocator.assign_threads(request).get_cpu()

        request = AllocateRequest(cpu, {
            "a": w1,
            "b": w2
        }, {}, DEFAULT_TEST_REQUEST_METADATA)
        cpu = allocator.rebalance(request).get_cpu()

        self.assertLessEqual(2 + 4, len(cpu.get_claimed_threads()))

        w2t = cpu.get_workload_ids_to_thread_ids()
        self.assertEqual(2, len(w2t["a"]))
        self.assertLessEqual(4, len(w2t["b"]))  # burst got at least 4

        for _ in range(20):
            request = AllocateRequest(cpu, {
                "a": w1,
                "b": w2
            }, {}, DEFAULT_TEST_REQUEST_METADATA)
            cpu = allocator.rebalance(request).get_cpu()

        w2t = cpu.get_workload_ids_to_thread_ids()
        self.assertEqual(2, len(w2t["a"]))
        self.assertLessEqual(4, len(w2t["b"]))
 def rebalance(self, request: AllocateRequest) -> AllocateResponse:
     try:
         self.__primary_rebalance_call_count += 1
         self.__should_fallback_immediately()
         return self.__primary_allocator.rebalance(request)
     except Exception as e:
         log.error(
             "Failed to rebalance workloads: '{}' with primary allocator: '{}', falling back to: '{}' because '{}'".format(
                 [w.get_id() for w in request.get_workloads().values()],
                 self.__primary_allocator.__class__.__name__,
                 self.__secondary_allocator.__class__.__name__,
                 e))
         self.__secondary_rebalance_call_count += 1
         return self.__secondary_allocator.rebalance(request)
Exemple #12
0
    def test_forecast_ip_big_burst_pool_if_empty_instance(self):
        cpu = get_cpu()
        allocator = forecast_ip_alloc_simple

        w = get_test_workload("a", 1, BURST)

        request = AllocateThreadsRequest(cpu, "a", {"a": w}, {},
                                         DEFAULT_TEST_REQUEST_METADATA)
        cpu = allocator.assign_threads(request).get_cpu()

        original_burst_claim_sz = len(cpu.get_claimed_threads())
        # should at least consume all the cores:
        self.assertLessEqual(
            len(cpu.get_threads()) / 2, original_burst_claim_sz)

        w2 = get_test_workload("b", 3, STATIC)
        request = AllocateThreadsRequest(cpu, "b", {
            "a": w,
            "b": w2
        }, {}, DEFAULT_TEST_REQUEST_METADATA)
        cpu = allocator.assign_threads(request).get_cpu()

        new_burst_claim_sz = len(get_threads_with_workload(cpu, w2.get_id()))
        self.assertLess(new_burst_claim_sz, original_burst_claim_sz)

        total_claim_sz = len(cpu.get_claimed_threads())
        self.assertLessEqual(3 + 1, total_claim_sz)
        self.assertLessEqual(1, new_burst_claim_sz)

        # there shouldn't be an empty core
        for p in cpu.get_packages():
            for c in p.get_cores():
                self.assertLess(0,
                                sum(t.is_claimed() for t in c.get_threads()))

        request = AllocateThreadsRequest(cpu, "b", {
            "a": w,
            "b": w2
        }, {}, DEFAULT_TEST_REQUEST_METADATA)
        cpu = allocator.free_threads(request).get_cpu()

        request = AllocateRequest(cpu, {"a": w}, {},
                                  DEFAULT_TEST_REQUEST_METADATA)
        cpu = allocator.rebalance(request).get_cpu()
        self.assertEqual(original_burst_claim_sz,
                         len(cpu.get_claimed_threads()))
    def rebalance(self, request: AllocateRequest) -> AllocateResponse:
        url = "{}/rebalance".format(self.__url)
        body = request.to_dict()

        try:
            log.info("rebalancing threads remotely")
            response = requests.put(url,
                                    json=body,
                                    headers=self.__headers,
                                    timeout=self.__timeout)
        except requests.exceptions.Timeout as e:
            log.info("rebalancing threads remotely timed out")
            raise e

        if response.status_code == 200:
            log.info("rebalanced threads remotely")
            return deserialize_response(response.headers, response.json())

        log.error("failed to rebalance threads remotely with status code: %d",
                  response.status_code)
        raise CpuAllocationException("Failed to rebalance threads: {}".format(
            response.text))
 def rebalance(self, request: AllocateRequest) -> AllocateResponse:
     return AllocateResponse(request.get_cpu(), self.get_name())
Exemple #15
0
 def rebalance(self, request: AllocateRequest) -> AllocateResponse:
     return AllocateResponse(
         request.get_cpu(),
         get_workload_allocations(request.get_cpu(),
                                  request.get_workloads().values()),
         self.get_name())
 def __get_rebalance_request(self):
     return AllocateRequest(
         self.get_cpu_copy(),
         self.get_workload_map_copy(),
         self.__get_cpu_usage(),
         self.__get_request_metadata("rebalance"))
Exemple #17
0
def get_cpu_event(request: AllocateRequest, response: AllocateResponse):
    return {
        "request": request.to_dict(),
        "response": response.to_dict(),
    }
    def rebalance(self, request: AllocateRequest) -> AllocateResponse:
        cpu = request.get_cpu()
        workloads = request.get_workloads()

        cpu = rebalance(cpu, workloads, self.__free_thread_provider)
        return AllocateResponse(cpu, self.get_name())