def test_single_burst_workload_lifecycle(self): for allocator_class in [ GreedyCpuAllocator, IntegerProgramCpuAllocator ]: thread_count = 2 workload = Workload(uuid.uuid4(), thread_count, BURST) cgroup_manager = MockCgroupManager() workload_manager = WorkloadManager(get_cpu(), cgroup_manager, allocator_class=allocator_class) # Add workload workload_manager.add_workload(workload) self.assertEqual( 2, cgroup_manager.container_update_counts[workload.get_id()]) # All threads should have been assigned to the only burst workload. self.assertEqual( DEFAULT_TOTAL_THREAD_COUNT, len(cgroup_manager.container_update_map[workload.get_id()])) # No threads should have been consumed from the cpu model perspective. self.assertEqual( DEFAULT_TOTAL_THREAD_COUNT, len(workload_manager.get_cpu().get_empty_threads())) # Remove workload workload_manager.remove_workload(workload.get_id()) self.assertEqual( DEFAULT_TOTAL_THREAD_COUNT, len(workload_manager.get_cpu().get_empty_threads()))
def test_remove_unknown_workload(self): for allocator_class in [ GreedyCpuAllocator, IntegerProgramCpuAllocator ]: unknown_workload_id = "unknown" thread_count = 2 workload = Workload(uuid.uuid4(), thread_count, STATIC) workload_manager = WorkloadManager(get_cpu(), MockCgroupManager(), allocator_class=allocator_class) # Remove from empty set workload_manager.remove_workload([unknown_workload_id]) # Add workload workload_manager.add_workload(workload) self.assertEqual( DEFAULT_TOTAL_THREAD_COUNT - thread_count, len(workload_manager.get_cpu().get_empty_threads())) # Removal of an unknown workload should have no effect workload_manager.remove_workload([unknown_workload_id]) self.assertEqual( DEFAULT_TOTAL_THREAD_COUNT - thread_count, len(workload_manager.get_cpu().get_empty_threads())) # Remove workload with unknown workload, real workload should be removed workload_manager.remove_workload(unknown_workload_id) workload_manager.remove_workload(workload.get_id()) self.assertEqual( DEFAULT_TOTAL_THREAD_COUNT, len(workload_manager.get_cpu().get_empty_threads()))
def test_single_static_workload_lifecycle(self): for allocator_class in [ GreedyCpuAllocator, IntegerProgramCpuAllocator ]: thread_count = 2 workload = Workload(uuid.uuid4(), thread_count, STATIC) cgroup_manager = MockCgroupManager() workload_manager = WorkloadManager(get_cpu(), cgroup_manager, allocator_class=allocator_class) # Add workload workload_manager.add_workload(workload) self.assertEqual( DEFAULT_TOTAL_THREAD_COUNT - thread_count, len(workload_manager.get_cpu().get_empty_threads())) self.assertEqual( 1, cgroup_manager.container_update_counts[workload.get_id()]) # Remove workload workload_manager.remove_workload(workload.get_id()) self.assertEqual( DEFAULT_TOTAL_THREAD_COUNT, len(workload_manager.get_cpu().get_empty_threads()))
def test_assign_two_workloads_empty_cpu_ip(self): """ Workload 0: 2 threads --> (p:0 c:0 t:0) (p:0 c:1 t:0) Workload 1: 1 thread --> (p:1 c:0 t:0) """ cpu = get_cpu() allocator = IntegerProgramCpuAllocator(cpu) w0 = Workload(uuid.uuid4(), 2, STATIC) w1 = Workload(uuid.uuid4(), 1, STATIC) allocator.assign_threads(w0) allocator.assign_threads(w1) self.assertEqual(3, len(cpu.get_claimed_threads())) packages = cpu.get_packages() # WORKLOAD 0 core00 = packages[0].get_cores()[0] core01 = packages[0].get_cores()[1] thread0 = core00.get_threads()[0] self.assertEqual(0, thread0.get_id()) self.assertTrue(thread0.is_claimed()) thread1 = core01.get_threads()[0] self.assertEqual(1, thread1.get_id()) self.assertTrue(thread1.is_claimed()) # WORKLOAD 1 core00 = packages[1].get_cores()[0] thread4 = core00.get_threads()[0] self.assertEqual(4, thread4.get_id()) self.assertTrue(thread4.is_claimed())
def test_cache_ip(self): """ [add a=2, add b=2, remove b=2, add c=2, remove a=2, add d=2] should lead to the following cache entries: (state=[], req=[2]) (state=[2], req=[2,2]) (state=[2,2], req=[2,0]) [cache hit] [cache hit] (state=[2,2], req=[2,2]) but different layout """ cpu = get_cpu() allocator = IntegerProgramCpuAllocator(cpu) allocator.assign_threads(Workload("a", 2, STATIC)) self.assertEqual(1, len(allocator._IntegerProgramCpuAllocator__cache)) allocator.assign_threads(Workload("b", 2, STATIC)) self.assertEqual(2, len(allocator._IntegerProgramCpuAllocator__cache)) allocator.free_threads("b") self.assertEqual(3, len(allocator._IntegerProgramCpuAllocator__cache)) allocator.assign_threads(Workload("c", 2, STATIC)) self.assertEqual(3, len(allocator._IntegerProgramCpuAllocator__cache)) allocator.free_threads("a") self.assertEqual(4, len(allocator._IntegerProgramCpuAllocator__cache)) allocator.assign_threads(Workload("d", 2, STATIC)) self.assertEqual(5, len(allocator._IntegerProgramCpuAllocator__cache))
def test_fill_cpu(self): """ Workload 0: 8 cores Workload 1: 4 cores Workload 2: 2 cores Workload 3: 1 core Workload 4: 1 core -------------------- Total: 16 cores """ for allocator_class in ALLOCATORS: cpu = get_cpu() allocator = allocator_class(cpu) workloads = [ Workload(uuid.uuid4(), 8, STATIC), Workload(uuid.uuid4(), 4, STATIC), Workload(uuid.uuid4(), 2, STATIC), Workload(uuid.uuid4(), 1, STATIC), Workload(uuid.uuid4(), 1, STATIC)] tot_req = 0 for w in workloads: allocator.assign_threads(w) tot_req += w.get_thread_count() self.assertEqual(tot_req, len(cpu.get_claimed_threads()))
def test_invalid_workload(self): with self.assertRaises(ValueError): Workload(uuid.uuid4(), 1, "UNKNOWN_WORKLOAD_TYPE") with self.assertRaises(ValueError): Workload(uuid.uuid4(), -1, BURST) with self.assertRaises(ValueError): Workload(BURST, 1, STATIC)
def test_construction(self): identifier = uuid.uuid4() thread_count = 2 workload_type = STATIC w = Workload(identifier, thread_count, workload_type) self.assertEqual(identifier, w.get_id()) self.assertEqual(thread_count, w.get_thread_count()) self.assertEqual(workload_type, w.get_type())
def test_assign_threads(self): cpu = get_cpu() cgroup_manager = MockCgroupManager() cpu_allocator = NoopResetCpuAllocator(cpu) cpu_allocator.set_cgroup_manager(cgroup_manager) w = Workload(uuid.uuid4(), 1, STATIC) cpu_allocator.assign_threads(w) self.assertEqual(1, cgroup_manager.container_update_counts[w.get_id()]) self.assertEqual(len(cpu.get_threads()), len(cgroup_manager.container_update_map[w.get_id()]))
def test_assign_to_full_cpu_fails(self): for allocator_class in ALLOCATORS: # Fill the CPU cpu = get_cpu() allocator = allocator_class(cpu) w0 = Workload(uuid.uuid4(), DEFAULT_TOTAL_THREAD_COUNT, STATIC) allocator.assign_threads(w0) self.assertTrue(is_cpu_full(cpu)) # Fail to claim one more thread w1 = Workload(uuid.uuid4(), 1, STATIC) with self.assertRaises(ValueError): allocator.assign_threads(w1)
def test_free_cpu(self): for allocator_class in ALLOCATORS: cpu = get_cpu() allocator = allocator_class(cpu) self.assertEqual(DEFAULT_TOTAL_THREAD_COUNT, len(cpu.get_empty_threads())) w = Workload(uuid.uuid4(), 3, STATIC) allocator.assign_threads(w) self.assertEqual( DEFAULT_TOTAL_THREAD_COUNT - w.get_thread_count(), len(cpu.get_empty_threads())) allocator.free_threads(w.get_id()) self.assertEqual(DEFAULT_TOTAL_THREAD_COUNT, len(cpu.get_empty_threads()))
def handle(self, event): if not self.__relevant(event): return name = get_container_name(event) cpus = get_cpu_count(event) workload_type = get_workload_type(event) workload = Workload(name, cpus, workload_type) self.handling_event(event, "adding workload: '{}'".format(workload.get_id())) self.workload_manager.add_workload(workload) self.handled_event(event, "added workload: '{}'".format(workload.get_id()))
def assign_threads(self, workload): thread_count = workload.get_thread_count() claimed_threads = [] if thread_count == 0: return claimed_threads log.info("Assigning '{}' thread(s) to workload: '{}'".format(workload.get_thread_count(), workload.get_id())) if is_cpu_full(self.__cpu): raise ValueError("Cannot assign workload: '{}' to full CPU.".format(workload.get_id())) package = self.__cpu.get_emptiest_package() while thread_count > 0 and len(package.get_empty_threads()) > 0: core = get_emptiest_core(package) empty_threads = core.get_empty_threads()[:thread_count] for empty_thread in empty_threads: log.debug("Claiming package:core:thread '{}:{}:{}' for workload '{}'".format( package.get_id(), core.get_id(), empty_thread.get_id(), workload.get_id())) empty_thread.claim(workload.get_id()) claimed_threads.append(empty_thread) thread_count -= 1 return claimed_threads + self.assign_threads(Workload(workload.get_id(), thread_count, workload.get_type()))
def get_workload_from_event(event): identifier = get_container_name(event) cpus = get_cpu(event) mem = get_mem(event) disk = get_disk(event) network = get_network(event) app_name = get_app_name(event) owner_email = get_owner_email(event) image = get_image(event) command = get_command(event) entrypoint = get_entrypoint(event) job_type = get_job_type(event) workload_type = get_workload_type(event) return Workload(identifier=identifier, thread_count=cpus, mem=mem, disk=disk, network=network, app_name=app_name, owner_email=owner_email, image=image, command=command, entrypoint=entrypoint, job_type=job_type, workload_type=workload_type)
def test_free_cpu_on_container_die(self): workload_name = str(uuid.uuid4()) workload = Workload(workload_name, DEFAULT_CPU_COUNT, STATIC) docker_client = MockDockerClient([MockContainer(workload)]) events = [ get_container_create_event(DEFAULT_CPU_COUNT, STATIC, workload_name, workload_name), get_container_die_event(workload_name) ] event_count = len(events) event_iterable = MockEventProvider( events, 1) # Force in order event processing for the test test_context = TestContext(docker_client) manager = EventManager(event_iterable, test_context.get_event_handlers(), get_mock_file_manager(), DEFAULT_TEST_EVENT_TIMEOUT_SECS) wait_until(lambda: event_count == manager.get_processed_count()) self.assertEqual(0, manager.get_queue_depth()) self.assertEqual(DEFAULT_TOTAL_THREAD_COUNT, len(test_context.get_cpu().get_empty_threads())) self.assertEqual( 1, test_context.get_create_event_handler().get_handled_event_count()) self.assertEqual( 1, test_context.get_free_event_handler().get_handled_event_count()) manager.stop_processing_events()
def test_one_cross_package_violation(self): cpu = get_cpu() allocator = IntegerProgramCpuAllocator(cpu) w = Workload(uuid.uuid4(), 9, STATIC) allocator.assign_threads(w) violations = get_cross_package_violations(cpu) self.assertEqual(1, len(violations))
def test_no_fallback_should_error(self): wm = WorkloadManager(get_cpu(2, 2, 2), MockCgroupManager(), allocator_class=CrashingAssignAllocator, fallback_allocator_class=None) wm.add_workload(Workload("foo", 1, STATIC)) self.assertEqual(1, wm.get_error_count())
def test_no_cross_packages_placement_no_bad_affinity_ip(self): w_a = Workload("a", 3, STATIC) w_b = Workload("b", 2, STATIC) w_c = Workload("c", 1, STATIC) w_d = Workload("d", 2, STATIC) cpu = get_cpu(package_count=2, cores_per_package=2, threads_per_core=2) workload_manager = WorkloadManager(cpu, MockCgroupManager()) workload_manager.add_workload(w_a) workload_manager.add_workload(w_b) workload_manager.add_workload(w_c) workload_manager.add_workload(w_d) self.assertEqual( 0, len(get_cross_package_violations(workload_manager.get_cpu()))) #self.assertEqual(1, len(get_shared_core_violations(workload_manager.get_cpu()))) # todo: fix me self.assertEqual(0, len(workload_manager.get_cpu().get_empty_threads()))
def parse_workload(workload_dict: dict) -> Workload: workload = Workload(identifier=workload_dict['id'], thread_count=workload_dict['thread_count'], mem=workload_dict['mem'], disk=workload_dict['disk'], network=workload_dict['network'], app_name=workload_dict['app_name'], owner_email=workload_dict['owner_email'], image=workload_dict['image'], command=workload_dict['command'], entrypoint=workload_dict['entrypoint'], job_type=workload_dict['job_type'], workload_type=workload_dict['type']) # Input example: "2019-03-23 18:03:50.668041" creation_time = datetime.datetime.strptime(workload_dict["creation_time"], '%Y-%m-%d %H:%M:%S.%f') workload.set_creation_time(creation_time) return workload
def get_test_workload(identifier, thread_count, workload_type): return Workload(identifier=identifier, thread_count=thread_count, mem=DEFAULT_TEST_MEM, disk=DEFAULT_TEST_DISK, network=DEFAULT_TEST_NETWORK, app_name=DEFAULT_TEST_APP_NAME, owner_email=DEFAULT_TEST_OWNER_EMAIL, image=DEFAULT_TEST_IMAGE, command=DEFAULT_TEST_CMD, entrypoint=DEFAULT_TEST_ENTRYPOINT, job_type=DEFAULT_TEST_JOB_TYPE, workload_type=workload_type)
def test_no_change_populated_cpu(self): w0 = Workload(uuid.uuid4(), 4, STATIC) cur_cpu = get_cpu() new_cpu = get_cpu() allocator0 = GreedyCpuAllocator(cur_cpu) allocator0.assign_threads(w0) allocator1 = GreedyCpuAllocator(new_cpu) allocator1.assign_threads(w0) self.assertFalse(has_better_isolation(cur_cpu, new_cpu))
def test_free_cpu_3_workloads(self): # Add 3 workloads sequentially, and then remove the 2nd one added. for allocator_class in ALLOCATORS: cpu = get_cpu() allocator = allocator_class(cpu) w0 = Workload(123, 3, STATIC) w1 = Workload(456, 2, STATIC) w2 = Workload(789, 4, STATIC) allocator.assign_threads(w0) allocator.assign_threads(w1) allocator.assign_threads(w2) self.assertEqual(3 + 4 + 2, len(cpu.get_claimed_threads())) allocator.free_threads(w1.get_id()) self.assertEqual(3 + 4, len(cpu.get_claimed_threads())) workload_ids_left = set() for t in cpu.get_threads(): if t.is_claimed(): workload_ids_left.add(t.get_workload_id()) self.assertListEqual(sorted(list(workload_ids_left)), [123, 789])
def predict(self, workload: Workload, cpu_usage_last_hour: np.array, pred_env: PredEnvironment) -> float: if workload.get_id() == 'static_a': return workload.get_thread_count() * 0.8 elif workload.get_id() == 'static_b': return workload.get_thread_count() * 0.01 elif workload.get_id() == 'burst_c': return workload.get_thread_count() * 0.9
def test_ip_fallback(self): w_a = Workload("a", 3, STATIC) w_b = Workload("b", 2, STATIC) w_c = Workload("c", 1, STATIC) w_d = Workload("d", 2, STATIC) cpu = get_cpu(package_count=2, cores_per_package=2, threads_per_core=2) wm = WorkloadManager(cpu, MockCgroupManager(), allocator_class=CrashingAllocator) wm.add_workload(w_a) wm.add_workload(w_b) wm.remove_workload("a") wm.add_workload(w_c) wm.remove_workload("b") wm.add_workload(w_d) self.assertEqual(3, len(wm.get_cpu().get_claimed_threads())) self.assertEqual( 3, len(wm.get_allocator().get_cpu().get_claimed_threads())) self.assertEqual(6, wm.get_fallback_allocator_calls_count())
def test_assign_one_thread_empty_cpu(self): """ Workload 0: 1 thread --> (p:0 c:0 t:0) """ for allocator_class in ALLOCATORS: cpu = get_cpu() allocator = allocator_class(cpu) self.assertEqual(DEFAULT_TOTAL_THREAD_COUNT, len(cpu.get_empty_threads())) w = Workload(uuid.uuid4(), 1, STATIC) allocator.assign_threads(w) self.assertEqual(DEFAULT_TOTAL_THREAD_COUNT - 1, len(cpu.get_empty_threads())) self.assertEqual(1, len(cpu.get_claimed_threads())) self.assertEqual(0, cpu.get_claimed_threads()[0].get_id())
def test_assign_two_threads_empty_cpu_greedy(self): """ Workload 0: 2 threads --> (p:0 c:0 t:0) (p:0 c:1 t:1) """ cpu = get_cpu() allocator = GreedyCpuAllocator(cpu) w = Workload(uuid.uuid4(), 2, STATIC) allocator.assign_threads(w) self.assertEqual(2, len(cpu.get_claimed_threads())) # Expected core and threads core00 = cpu.get_packages()[0].get_cores()[0] thread0 = core00.get_threads()[0] self.assertEqual(0, thread0.get_id()) self.assertTrue(thread0.is_claimed()) thread1 = core00.get_threads()[1] self.assertEqual(8, thread1.get_id()) self.assertTrue(thread1.is_claimed())
def test_get_workloads_endpoint(self): override_config_manager(ConfigManager(TestPropertyProvider({}))) cpu = get_cpu() thread_count = 2 workload_id = str(uuid.uuid4()) workload = Workload(workload_id, thread_count, STATIC) workload_manager = WorkloadManager(cpu, MockCgroupManager()) set_wm(workload_manager) workloads = json.loads(get_workloads()) self.assertEqual(0, len(workloads)) workload_manager.add_workload(workload) workloads = json.loads(get_workloads()) self.assertEqual(workload_id, workloads[0]["id"]) self.assertEqual(STATIC, workloads[0]["type"]) self.assertEqual(thread_count, workloads[0]["thread_count"])
def get_current_workloads(docker_client): workloads = [] for container in docker_client.containers.list(): workload_id = container.name if __has_required_labels(container): try: labels = container.labels cpu = int(__get_value(labels, CPU_LABEL_KEY, -1)) mem = int(__get_value(labels, MEM_LABEL_KEY, -1)) disk = int(__get_value(labels, DISK_LABEL_KEY, -1)) network = int(__get_value(labels, DISK_LABEL_KEY, -1)) app_name = __get_value(labels, APP_NAME_LABEL_KEY) owner_email = __get_value(labels, OWNER_EMAIL_LABEL_KEY) command = __get_value(labels, COMMAND_LABEL_KEY) entrypoint = __get_value(labels, ENTRYPOINT_LABEL_KEY) job_type = __get_value(labels, JOB_TYPE_LABEL_KEY) workload_type = __get_value(labels, WORKLOAD_TYPE_LABEL_KEY) image = __get_image(container) workloads.append( Workload(identifier=workload_id, thread_count=cpu, mem=mem, disk=disk, network=network, app_name=app_name, owner_email=owner_email, image=image, command=command, entrypoint=entrypoint, job_type=job_type, workload_type=workload_type)) log.info("Found running workload: '{}'".format(workload_id)) except: log.exception( "Failed to parse labels for container: '{}'".format( container.name)) else: log.warning( "Found running workload: '{}' without expected labels'") return workloads
def get_current_workloads(docker_client): workloads = [] for container in docker_client.containers.list(): workload_id = container.name if __has_required_labels(container): try: cpu = int(container.labels[CPU_LABEL_KEY]) workload_type = container.labels[WORKLOAD_TYPE_LABEL_KEY] workloads.append(Workload(workload_id, cpu, workload_type)) log.info("Found running workload: '{}'".format(workload_id)) except: log.exception( "Failed to parse labels for container: '{}'".format( container.name)) else: log.warning( "Found running workload: '{}' without expected label: '{}'". format(workload_id, CPU_LABEL_KEY)) return workloads
def test_assign_ten_threads_empty_cpu_ip(self): """ Workload 0: 10 threads --> (p:0 c:[0-7] t:[0-9]) | 1 | 1 | 1 | 1 | | 1 | 1 | | | | ------------- | | 1 | 1 | 1 | 1 | | | | | | """ cpu = get_cpu() allocator = IntegerProgramCpuAllocator(cpu) w = Workload(uuid.uuid4(), 10, STATIC) allocator.assign_threads(w) self.assertEqual(10, len(cpu.get_claimed_threads())) expected_thread_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 12] thread_ids = [thread.get_id() for thread in cpu.get_claimed_threads()] thread_ids.sort() self.assertEqual(expected_thread_ids, thread_ids)