def test_simple_usage(self):
        w_id0 = str(uuid.uuid4())
        w_id1 = str(uuid.uuid4())

        expected_usage0 = [0.0, 1.0, 2.0]
        expected_usage1 = [3.0, 4.0, 5.0]

        raw_usage = {
            CPU_USAGE: {
                w_id0: expected_usage0,
                w_id1: expected_usage1
            },
            MEM_USAGE: {
                w_id0: expected_usage0,
                w_id1: expected_usage1
            },
            NET_RECV_USAGE: {
                w_id0: expected_usage0,
                w_id1: expected_usage1
            },
            NET_TRANS_USAGE: {
                w_id0: expected_usage0,
                w_id1: expected_usage1
            },
            DISK_USAGE: {
                w_id0: expected_usage0,
                w_id1: expected_usage1
            }
        }

        ru = GlobalResourceUsage(raw_usage)

        def __assert_expected(expected_usage, w_id):
            self.assertEqual(expected_usage, ru.get_cpu_usage()[w_id])
            self.assertEqual(expected_usage, ru.get_mem_usage()[w_id])
            self.assertEqual(expected_usage, ru.get_net_trans_usage()[w_id])
            self.assertEqual(expected_usage, ru.get_net_recv_usage()[w_id])
            self.assertEqual(expected_usage, ru.get_disk_usage()[w_id])

        def __assert_all_expected(expected_usage, all_usage):
            self.assertEqual(expected_usage, all_usage[CPU_USAGE])
            self.assertEqual(expected_usage, all_usage[MEM_USAGE])
            self.assertEqual(expected_usage, all_usage[NET_RECV_USAGE])
            self.assertEqual(expected_usage, all_usage[NET_TRANS_USAGE])
            self.assertEqual(expected_usage, all_usage[DISK_USAGE])

        __assert_expected(expected_usage0, w_id0)
        __assert_expected(expected_usage1, w_id1)
        __assert_all_expected(expected_usage0, ru.get_all_usage_for_workload(w_id0))
        __assert_all_expected(expected_usage1, ru.get_all_usage_for_workload(w_id1))

        # serialize/deserialize and assert again
        serial_ru = ru.serialize()
        ru = deserialize_global_resource_usage(serial_ru)

        __assert_expected(expected_usage0, w_id0)
        __assert_expected(expected_usage1, w_id1)
        __assert_all_expected(expected_usage0, ru.get_all_usage_for_workload(w_id0))
        __assert_all_expected(expected_usage1, ru.get_all_usage_for_workload(w_id1))
 def get_resource_usage(self, workload_ids: List[str]) -> GlobalResourceUsage:
     try:
         global_usage = GlobalResourceUsage(self.__get_usage_dict(workload_ids))
         log.debug("Got resource usage: %s", json.dumps(global_usage.serialize(), sort_keys=True, separators=(',', ':')))
         return global_usage
     except Exception:
         log.error("failed to get resource usage, returning empty usage")
         with self.__metric_lock:
             self.__get_resource_usage_failure_count += 1
         return GlobalResourceUsage({})
    def get_cpu_predictions(self, workloads: List[Workload], resource_usage: GlobalResourceUsage) \
            -> Optional[Dict[str, float]]:

        config_manager = get_config_manager()
        if config_manager is None:
            log.warning("Config manager is not yet set")
            return {}

        cpu_usage = resource_usage.get_cpu_usage()
        if cpu_usage is None:
            log.warning("No cpu usage")
            return {}
        pred_env = PredEnvironment(config_manager.get_region(),
                                   config_manager.get_environment(),
                                   datetime.utcnow().hour)

        predictions = {}
        for workload in workloads:
            workload_cpu_usage = cpu_usage.get(workload.get_id(), None)
            if workload_cpu_usage is None:
                log.warning("No CPU usage for workload: %s", workload.get_id())
                continue

            workload_cpu_usage = [float(u) for u in workload_cpu_usage]
            pred_cpus = self.predict(workload, workload_cpu_usage, pred_env)
            predictions[workload.get_id()] = pred_cpus

        return predictions
예제 #4
0
def get_no_usage_rebalance_request(cpu: Cpu, workloads: List[Workload]):
    return AllocateRequest(cpu=cpu,
                           workloads=__workloads_list_to_map(workloads),
                           resource_usage=GlobalResourceUsage({}),
                           cpu_usage={},
                           mem_usage={},
                           net_recv_usage={},
                           net_trans_usage={},
                           disk_usage={},
                           metadata=DEFAULT_TEST_REQUEST_METADATA)
    def test_empty_usage(self):
        ru = GlobalResourceUsage({})

        def __assert_empty():
            self.assertIsNotNone(ru)
            self.assertEqual({}, ru.serialize())
            self.assertEqual({}, ru.get_all_usage_for_workload("foo"))
            self.assertIsNone(ru.get_cpu_usage())
            self.assertIsNone(ru.get_mem_usage())
            self.assertIsNone(ru.get_net_recv_usage())
            self.assertIsNone(ru.get_net_trans_usage())
            self.assertIsNone(ru.get_disk_usage())

        __assert_empty()

        # serialize/deserialize and assert again
        serial_du = ru.serialize()
        ru = deserialize_global_resource_usage(serial_du)
        __assert_empty()
예제 #6
0
 def __get_job_body(self, pod: V1Pod, resource_usage: GlobalResourceUsage):
     return {
         "job_id": pod.metadata.name,
         "job_descriptor": get_job_descriptor(pod),
         "task_data": {
             "started_ts_ms":
             str(get_start_time(pod)),
             "past_usage":
             self.__translate_usage(
                 resource_usage.get_all_usage_for_workload(
                     pod.metadata.name))
         }
     }
    def __get_rebalance_request(self):
        pcp_usage = self.__wmm.get_pcp_usage()
        resource_usage = GlobalResourceUsage(pcp_usage)

        return AllocateRequest(
            cpu=self.get_cpu_copy(),
            workloads=self.get_workload_map_copy(),
            resource_usage=resource_usage,
            cpu_usage=pcp_usage.get(CPU_USAGE, {}),
            mem_usage=pcp_usage.get(MEM_USAGE, {}),
            net_recv_usage=pcp_usage.get(NET_RECV_USAGE, {}),
            net_trans_usage=pcp_usage.get(NET_TRANS_USAGE, {}),
            disk_usage=pcp_usage.get(DISK_USAGE, {}),
            metadata=self.__get_request_metadata("rebalance"))
    def __get_threads_request(self, workload_id, workload_map, request_type):
        pcp_usage = self.__wmm.get_pcp_usage()
        resource_usage = GlobalResourceUsage(pcp_usage)

        return AllocateThreadsRequest(
            cpu=self.get_cpu_copy(),
            workload_id=workload_id,
            workloads=workload_map,
            resource_usage=resource_usage,
            cpu_usage=pcp_usage.get(CPU_USAGE, {}),
            mem_usage=pcp_usage.get(MEM_USAGE, {}),
            net_recv_usage=pcp_usage.get(NET_RECV_USAGE, {}),
            net_trans_usage=pcp_usage.get(NET_TRANS_USAGE, {}),
            disk_usage=pcp_usage.get(DISK_USAGE, {}),
            metadata=self.__get_request_metadata(request_type))
예제 #9
0
 def __get_job_body(self, pod: V1Pod, resource_usage: GlobalResourceUsage):
     return {
         "job_id": pod.metadata.name,
         # Note that on v1 pods job_descriptor is None.
         # That is OK though, because the resource prediction service will fetch the job
         # descriptor itself in that case.
         "job_descriptor": get_job_descriptor(pod),
         "task_data": {
             "started_ts_ms":
             str(get_start_time(pod)),
             "past_usage":
             self.__translate_usage(
                 resource_usage.get_all_usage_for_workload(
                     pod.metadata.name))
         }
     }
예제 #10
0
    def __get_simple_cpu_predictions(self) -> Dict[str, float]:
        cpu_predictor = self.__cpu_usage_predictor_manager.get_cpu_predictor()
        if cpu_predictor is None:
            log.error("Failed to get cpu predictor")
            return {}

        workloads = self.workload_manager.get_workloads()
        resource_usage = GlobalResourceUsage(
            self.__workload_monitor_manager.get_pcp_usage())

        log.info("Getting simple cpu predictions...")
        cpu_predictions = cpu_predictor.get_cpu_predictions(
            workloads, resource_usage)
        if cpu_predictions is None:
            log.error("Failed to get cpu predictions")
            return {}
        else:
            log.info("Got simple cpu predictions: %s",
                     json.dumps(cpu_predictions))
            return cpu_predictions
    def get_predictions(
        self, pods: List[V1Pod], resource_usage: GlobalResourceUsage
    ) -> Optional[ResourceUsagePredictions]:
        config_manager = get_config_manager()
        if config_manager is None:
            log.warning("Config manager not yet set.")
            return None

        running_pods = []
        for p in pods:
            if p.metadata.name in resource_usage.get_workload_ids():
                running_pods.append(p)
            else:
                log.info("Pod is not yet running: %s", p.metadata.name)

        client_crt = get_client_cert_path(config_manager)
        client_key = get_client_key_path(config_manager)
        if client_crt is None or client_key is None:
            log.error("Failed to generate credential paths")
            return None

        url = get_url(config_manager)
        if url is None:
            log.error("Unable to generate prediction service url")
            return None

        body = self.__get_body(running_pods, resource_usage)
        if body is None:
            log.error("Unable to generate a prediction request body")
            return None

        predictions = get_predictions(client_crt, client_key, url, body)
        if predictions is None:
            log.error("Failed to get predictions")
            return None

        return ResourceUsagePredictions(predictions)
예제 #12
0
 def get_resource_usage(self,
                        workload_ids: List[str]) -> GlobalResourceUsage:
     return GlobalResourceUsage({})