def test_get_wm_status_endpoint(self): set_workload_manager(self.__get_default_workload_manager()) event_manager = EventManager(MockEventProvider([]), [], 0.01) set_event_manager(event_manager) event_manager.start_processing_events() s = json.loads(get_wm_status()) self.assertEqual(1, len(s)) self.assertEqual(3, len(s["workload_manager"])) event_manager.stop_processing_events()
reconcile_event_handler = ReconcileEventHandler(reconciler) oversub_event_handler = None if is_kubernetes(): oversub_event_handler = OversubscribeEventHandler( workload_manager, KubernetesOpportunisticWindowPublisher()) event_handlers = [ h for h in [ create_event_handler, free_event_handler, rebalance_event_handler, reconcile_event_handler, oversub_event_handler ] if h is not None ] # Start event processing log.info("Starting Docker event handling...") event_manager = EventManager(docker.from_env().events(), event_handlers) set_event_manager(event_manager) log.info("Starting health check thread...") threading.Thread(target=health_check, args=[exit_handler, event_manager, get_config_manager()]).start() # Report metrics log.info("Starting metrics reporting...") metrics_reporters = [ m for m in [ cgroup_manager, event_log_manager, event_manager, reconciler, workload_manager, workload_monitor_manager, oversub_event_handler ] if m is not None ]
def test_free_cpu_on_container_die(self): registry = Registry() workload_name = str(uuid.uuid4()) events = [ get_container_create_event(DEFAULT_CPU_COUNT, STATIC, workload_name, workload_name), get_container_die_event(workload_name) ] event_count = len(events) event_iterable = MockEventProvider( events, 1) # Force in order event processing for the test test_context = TestContext() manager = EventManager(event_iterable, test_context.get_event_handlers(), DEFAULT_TEST_EVENT_TIMEOUT_SECS) manager.set_registry(registry) manager.start_processing_events() wait_until(lambda: event_count == manager.get_processed_count()) self.assertEqual(0, manager.get_queue_depth()) self.assertEqual(DEFAULT_TOTAL_THREAD_COUNT, len(test_context.get_cpu().get_empty_threads())) self.assertEqual( 1, test_context.get_create_event_handler().get_handled_event_count()) self.assertEqual( 1, test_context.get_free_event_handler().get_handled_event_count()) manager.stop_processing_events() manager.report_metrics({}) self.assertTrue(gauge_value_equals(registry, QUEUE_DEPTH_KEY, 0)) self.assertTrue( gauge_value_equals( registry, EVENT_SUCCEEDED_KEY, event_count * len(test_context.get_event_handlers()))) self.assertTrue(gauge_value_equals(registry, EVENT_FAILED_KEY, 0)) self.assertTrue( gauge_value_equals(registry, EVENT_PROCESSED_KEY, event_count))
# Setup the event handlers log.info("Setting up the Docker event handlers...") create_event_handler = CreateEventHandler(workload_manager) free_event_handler = FreeEventHandler(workload_manager) rebalance_event_handler = RebalanceEventHandler(workload_manager) reconciler = Reconciler(cgroup_manager, RealExitHandler()) reconcile_event_handler = ReconcileEventHandler(reconciler) event_handlers = [ create_event_handler, free_event_handler, rebalance_event_handler, reconcile_event_handler ] # Start event processing log.info("Starting Docker event handling...") event_manager = EventManager(docker.from_env().events(), event_handlers) set_event_manager(event_manager) # Report metrics log.info("Starting metrics reporting...") MetricsManager([ cgroup_manager, event_log_manager, event_manager, reconciler, workload_manager, workload_monitor_manager ]) # Initialize currently running containers as workloads log.info("Isolating currently running workloads...") for workload in get_current_workloads(docker.from_env()): try: workload_manager.add_workload(workload) except:
def test_rebalance(self): registry = Registry() events = [REBALANCE_EVENT] event_count = len(events) event_iterable = MockEventProvider(events) test_context = TestContext() manager = EventManager(event_iterable, test_context.get_event_handlers(), DEFAULT_TEST_EVENT_TIMEOUT_SECS) manager.set_registry(registry) manager.start_processing_events() wait_until(lambda: event_count == manager.get_processed_count()) self.assertEqual(0, manager.get_queue_depth()) self.assertEqual(DEFAULT_TOTAL_THREAD_COUNT, len(test_context.get_cpu().get_empty_threads())) self.assertEqual( 0, test_context.get_create_event_handler().get_handled_event_count()) self.assertEqual( 0, test_context.get_free_event_handler().get_handled_event_count()) self.assertEqual( 1, test_context.get_rebalance_event_handler().get_handled_event_count( )) manager.stop_processing_events() manager.report_metrics({}) self.assertTrue(gauge_value_equals(registry, QUEUE_DEPTH_KEY, 0)) self.assertTrue( gauge_value_equals( registry, EVENT_SUCCEEDED_KEY, event_count * len(test_context.get_event_handlers()))) self.assertTrue(gauge_value_equals(registry, EVENT_FAILED_KEY, 0)) self.assertTrue( gauge_value_equals(registry, EVENT_PROCESSED_KEY, event_count))
def test_unknown_workload_type_label(self): registry = Registry() test_context = TestContext() unknown_event = get_event( CONTAINER, CREATE, uuid.uuid4(), { NAME: "container-name", APP_NAME_LABEL_KEY: DEFAULT_TEST_APP_NAME, CPU_LABEL_KEY: "1", MEM_LABEL_KEY: str(DEFAULT_TEST_MEM), DISK_LABEL_KEY: str(DEFAULT_TEST_DISK), NETWORK_LABEL_KEY: str(DEFAULT_TEST_NETWORK), JOB_TYPE_LABEL_KEY: DEFAULT_TEST_JOB_TYPE, WORKLOAD_TYPE_LABEL_KEY: "unknown", OWNER_EMAIL_LABEL_KEY: DEFAULT_TEST_OWNER_EMAIL, IMAGE_LABEL_KEY: DEFAULT_TEST_IMAGE, }) valid_event = get_container_create_event(1) event_iterable = MockEventProvider([unknown_event, valid_event]) manager = EventManager(event_iterable, test_context.get_event_handlers(), DEFAULT_TEST_EVENT_TIMEOUT_SECS) manager.set_registry(registry) manager.start_processing_events() wait_until(lambda: manager.get_error_count() == 1) wait_until(lambda: manager.get_processed_count() == 2) self.assertEqual(0, manager.get_queue_depth()) manager.stop_processing_events() manager.report_metrics({}) self.assertTrue(gauge_value_equals(registry, QUEUE_DEPTH_KEY, 0)) self.assertTrue(gauge_value_equals(registry, EVENT_SUCCEEDED_KEY, 5)) self.assertTrue(gauge_value_equals(registry, EVENT_FAILED_KEY, 1)) self.assertTrue(gauge_value_equals(registry, EVENT_PROCESSED_KEY, 2))
def test_absent_workload_type_label(self): registry = Registry() test_context = TestContext() name = str(uuid.uuid4()) unknown_event = get_event(CONTAINER, CREATE, name, { CPU_LABEL_KEY: "1", NAME: name }) event_handlers = test_context.get_event_handlers() event_iterable = MockEventProvider([unknown_event]) manager = EventManager(event_iterable, event_handlers, DEFAULT_TEST_EVENT_TIMEOUT_SECS) manager.set_registry(registry) manager.start_processing_events() wait_until(lambda: test_context.get_create_event_handler(). get_ignored_event_count() == 1) self.assertEqual(0, manager.get_queue_depth()) manager.stop_processing_events() manager.report_metrics({}) self.assertTrue(gauge_value_equals(registry, QUEUE_DEPTH_KEY, 0)) self.assertTrue( gauge_value_equals(registry, EVENT_SUCCEEDED_KEY, len(test_context.get_event_handlers()))) self.assertTrue(gauge_value_equals(registry, EVENT_FAILED_KEY, 0)) self.assertTrue(gauge_value_equals(registry, EVENT_PROCESSED_KEY, 1))
def test_unknown_action(self): registry = Registry() test_context = TestContext() unknown_event = get_event(CONTAINER, "unknown", uuid.uuid4(), {}) event_iterable = MockEventProvider([unknown_event]) manager = EventManager(event_iterable, test_context.get_event_handlers(), DEFAULT_TEST_EVENT_TIMEOUT_SECS) manager.set_registry(registry) manager.start_processing_events() wait_until(lambda: test_context.get_create_event_handler(). get_ignored_event_count() == 1) self.assertEqual(0, manager.get_queue_depth()) manager.stop_processing_events() manager.report_metrics({}) self.assertTrue(gauge_value_equals(registry, QUEUE_DEPTH_KEY, 0)) self.assertTrue( gauge_value_equals(registry, EVENT_SUCCEEDED_KEY, len(test_context.get_event_handlers()))) self.assertTrue(gauge_value_equals(registry, EVENT_FAILED_KEY, 0)) self.assertTrue(gauge_value_equals(registry, EVENT_PROCESSED_KEY, 1))
def test_update_mock_container(self): registry = Registry() test_pod = get_simple_test_pod() get_pod_manager().set_pod(test_pod) workload_name = test_pod.metadata.name events = [ get_container_create_event(DEFAULT_CPU_COUNT, STATIC, workload_name, workload_name) ] event_count = len(events) event_iterable = MockEventProvider(events) test_context = TestContext() manager = EventManager(event_iterable, test_context.get_event_handlers(), DEFAULT_TEST_EVENT_TIMEOUT_SECS) manager.set_registry(registry, {}) manager.start_processing_events() wait_until(lambda: event_count == manager.get_processed_count()) self.assertEqual(0, manager.get_queue_depth()) self.assertEqual( event_count, test_context.get_workload_manager().get_success_count()) self.assertEqual(DEFAULT_TOTAL_THREAD_COUNT - DEFAULT_CPU_COUNT, len(test_context.get_cpu().get_empty_threads())) self.assertEqual( 1, test_context.get_create_event_handler().get_handled_event_count()) manager.stop_processing_events() manager.report_metrics({}) self.assertTrue(gauge_value_equals(registry, QUEUE_DEPTH_KEY, 0)) self.assertTrue( counter_value_equals( registry, EVENT_SUCCEEDED_KEY, event_count * len(test_context.get_event_handlers()))) self.assertTrue(counter_value_equals(registry, EVENT_FAILED_KEY, 0)) self.assertTrue( counter_value_equals(registry, EVENT_PROCESSED_KEY, event_count))