Ejemplo n.º 1
0
    def test_get_wm_status_endpoint(self):
        set_workload_manager(self.__get_default_workload_manager())

        event_manager = EventManager(MockEventProvider([]), [], 0.01)
        set_event_manager(event_manager)
        event_manager.start_processing_events()

        s = json.loads(get_wm_status())
        self.assertEqual(1, len(s))
        self.assertEqual(3, len(s["workload_manager"]))

        event_manager.stop_processing_events()
Ejemplo n.º 2
0
    reconcile_event_handler = ReconcileEventHandler(reconciler)
    oversub_event_handler = None
    if is_kubernetes():
        oversub_event_handler = OversubscribeEventHandler(
            workload_manager, KubernetesOpportunisticWindowPublisher())

    event_handlers = [
        h for h in [
            create_event_handler, free_event_handler, rebalance_event_handler,
            reconcile_event_handler, oversub_event_handler
        ] if h is not None
    ]

    # Start event processing
    log.info("Starting Docker event handling...")
    event_manager = EventManager(docker.from_env().events(), event_handlers)
    set_event_manager(event_manager)

    log.info("Starting health check thread...")
    threading.Thread(target=health_check,
                     args=[exit_handler, event_manager,
                           get_config_manager()]).start()

    # Report metrics
    log.info("Starting metrics reporting...")
    metrics_reporters = [
        m for m in [
            cgroup_manager, event_log_manager, event_manager, reconciler,
            workload_manager, workload_monitor_manager, oversub_event_handler
        ] if m is not None
    ]
Ejemplo n.º 3
0
    def test_free_cpu_on_container_die(self):
        registry = Registry()
        workload_name = str(uuid.uuid4())

        events = [
            get_container_create_event(DEFAULT_CPU_COUNT, STATIC,
                                       workload_name, workload_name),
            get_container_die_event(workload_name)
        ]
        event_count = len(events)
        event_iterable = MockEventProvider(
            events, 1)  # Force in order event processing for the test

        test_context = TestContext()
        manager = EventManager(event_iterable,
                               test_context.get_event_handlers(),
                               DEFAULT_TEST_EVENT_TIMEOUT_SECS)
        manager.set_registry(registry)
        manager.start_processing_events()

        wait_until(lambda: event_count == manager.get_processed_count())
        self.assertEqual(0, manager.get_queue_depth())
        self.assertEqual(DEFAULT_TOTAL_THREAD_COUNT,
                         len(test_context.get_cpu().get_empty_threads()))
        self.assertEqual(
            1,
            test_context.get_create_event_handler().get_handled_event_count())
        self.assertEqual(
            1,
            test_context.get_free_event_handler().get_handled_event_count())

        manager.stop_processing_events()

        manager.report_metrics({})
        self.assertTrue(gauge_value_equals(registry, QUEUE_DEPTH_KEY, 0))
        self.assertTrue(
            gauge_value_equals(
                registry, EVENT_SUCCEEDED_KEY,
                event_count * len(test_context.get_event_handlers())))
        self.assertTrue(gauge_value_equals(registry, EVENT_FAILED_KEY, 0))
        self.assertTrue(
            gauge_value_equals(registry, EVENT_PROCESSED_KEY, event_count))
Ejemplo n.º 4
0
    # Setup the event handlers
    log.info("Setting up the Docker event handlers...")
    create_event_handler = CreateEventHandler(workload_manager)
    free_event_handler = FreeEventHandler(workload_manager)
    rebalance_event_handler = RebalanceEventHandler(workload_manager)
    reconciler = Reconciler(cgroup_manager, RealExitHandler())
    reconcile_event_handler = ReconcileEventHandler(reconciler)
    event_handlers = [
        create_event_handler, free_event_handler, rebalance_event_handler,
        reconcile_event_handler
    ]

    # Start event processing
    log.info("Starting Docker event handling...")
    event_manager = EventManager(docker.from_env().events(), event_handlers)
    set_event_manager(event_manager)

    # Report metrics
    log.info("Starting metrics reporting...")
    MetricsManager([
        cgroup_manager, event_log_manager, event_manager, reconciler,
        workload_manager, workload_monitor_manager
    ])

    # Initialize currently running containers as workloads
    log.info("Isolating currently running workloads...")
    for workload in get_current_workloads(docker.from_env()):
        try:
            workload_manager.add_workload(workload)
        except:
Ejemplo n.º 5
0
    def test_rebalance(self):
        registry = Registry()

        events = [REBALANCE_EVENT]
        event_count = len(events)
        event_iterable = MockEventProvider(events)

        test_context = TestContext()
        manager = EventManager(event_iterable,
                               test_context.get_event_handlers(),
                               DEFAULT_TEST_EVENT_TIMEOUT_SECS)
        manager.set_registry(registry)
        manager.start_processing_events()

        wait_until(lambda: event_count == manager.get_processed_count())
        self.assertEqual(0, manager.get_queue_depth())
        self.assertEqual(DEFAULT_TOTAL_THREAD_COUNT,
                         len(test_context.get_cpu().get_empty_threads()))
        self.assertEqual(
            0,
            test_context.get_create_event_handler().get_handled_event_count())
        self.assertEqual(
            0,
            test_context.get_free_event_handler().get_handled_event_count())
        self.assertEqual(
            1,
            test_context.get_rebalance_event_handler().get_handled_event_count(
            ))

        manager.stop_processing_events()

        manager.report_metrics({})
        self.assertTrue(gauge_value_equals(registry, QUEUE_DEPTH_KEY, 0))
        self.assertTrue(
            gauge_value_equals(
                registry, EVENT_SUCCEEDED_KEY,
                event_count * len(test_context.get_event_handlers())))
        self.assertTrue(gauge_value_equals(registry, EVENT_FAILED_KEY, 0))
        self.assertTrue(
            gauge_value_equals(registry, EVENT_PROCESSED_KEY, event_count))
Ejemplo n.º 6
0
    def test_unknown_workload_type_label(self):
        registry = Registry()
        test_context = TestContext()
        unknown_event = get_event(
            CONTAINER, CREATE, uuid.uuid4(), {
                NAME: "container-name",
                APP_NAME_LABEL_KEY: DEFAULT_TEST_APP_NAME,
                CPU_LABEL_KEY: "1",
                MEM_LABEL_KEY: str(DEFAULT_TEST_MEM),
                DISK_LABEL_KEY: str(DEFAULT_TEST_DISK),
                NETWORK_LABEL_KEY: str(DEFAULT_TEST_NETWORK),
                JOB_TYPE_LABEL_KEY: DEFAULT_TEST_JOB_TYPE,
                WORKLOAD_TYPE_LABEL_KEY: "unknown",
                OWNER_EMAIL_LABEL_KEY: DEFAULT_TEST_OWNER_EMAIL,
                IMAGE_LABEL_KEY: DEFAULT_TEST_IMAGE,
            })
        valid_event = get_container_create_event(1)
        event_iterable = MockEventProvider([unknown_event, valid_event])
        manager = EventManager(event_iterable,
                               test_context.get_event_handlers(),
                               DEFAULT_TEST_EVENT_TIMEOUT_SECS)
        manager.set_registry(registry)
        manager.start_processing_events()

        wait_until(lambda: manager.get_error_count() == 1)
        wait_until(lambda: manager.get_processed_count() == 2)
        self.assertEqual(0, manager.get_queue_depth())

        manager.stop_processing_events()

        manager.report_metrics({})
        self.assertTrue(gauge_value_equals(registry, QUEUE_DEPTH_KEY, 0))
        self.assertTrue(gauge_value_equals(registry, EVENT_SUCCEEDED_KEY, 5))
        self.assertTrue(gauge_value_equals(registry, EVENT_FAILED_KEY, 1))
        self.assertTrue(gauge_value_equals(registry, EVENT_PROCESSED_KEY, 2))
Ejemplo n.º 7
0
    def test_absent_workload_type_label(self):
        registry = Registry()
        test_context = TestContext()
        name = str(uuid.uuid4())
        unknown_event = get_event(CONTAINER, CREATE, name, {
            CPU_LABEL_KEY: "1",
            NAME: name
        })
        event_handlers = test_context.get_event_handlers()
        event_iterable = MockEventProvider([unknown_event])
        manager = EventManager(event_iterable, event_handlers,
                               DEFAULT_TEST_EVENT_TIMEOUT_SECS)
        manager.set_registry(registry)
        manager.start_processing_events()

        wait_until(lambda: test_context.get_create_event_handler().
                   get_ignored_event_count() == 1)
        self.assertEqual(0, manager.get_queue_depth())

        manager.stop_processing_events()

        manager.report_metrics({})
        self.assertTrue(gauge_value_equals(registry, QUEUE_DEPTH_KEY, 0))
        self.assertTrue(
            gauge_value_equals(registry, EVENT_SUCCEEDED_KEY,
                               len(test_context.get_event_handlers())))
        self.assertTrue(gauge_value_equals(registry, EVENT_FAILED_KEY, 0))
        self.assertTrue(gauge_value_equals(registry, EVENT_PROCESSED_KEY, 1))
Ejemplo n.º 8
0
    def test_unknown_action(self):
        registry = Registry()
        test_context = TestContext()
        unknown_event = get_event(CONTAINER, "unknown", uuid.uuid4(), {})
        event_iterable = MockEventProvider([unknown_event])
        manager = EventManager(event_iterable,
                               test_context.get_event_handlers(),
                               DEFAULT_TEST_EVENT_TIMEOUT_SECS)
        manager.set_registry(registry)
        manager.start_processing_events()

        wait_until(lambda: test_context.get_create_event_handler().
                   get_ignored_event_count() == 1)
        self.assertEqual(0, manager.get_queue_depth())

        manager.stop_processing_events()

        manager.report_metrics({})
        self.assertTrue(gauge_value_equals(registry, QUEUE_DEPTH_KEY, 0))
        self.assertTrue(
            gauge_value_equals(registry, EVENT_SUCCEEDED_KEY,
                               len(test_context.get_event_handlers())))
        self.assertTrue(gauge_value_equals(registry, EVENT_FAILED_KEY, 0))
        self.assertTrue(gauge_value_equals(registry, EVENT_PROCESSED_KEY, 1))
Ejemplo n.º 9
0
    def test_update_mock_container(self):
        registry = Registry()
        test_pod = get_simple_test_pod()
        get_pod_manager().set_pod(test_pod)
        workload_name = test_pod.metadata.name

        events = [
            get_container_create_event(DEFAULT_CPU_COUNT, STATIC,
                                       workload_name, workload_name)
        ]
        event_count = len(events)
        event_iterable = MockEventProvider(events)

        test_context = TestContext()
        manager = EventManager(event_iterable,
                               test_context.get_event_handlers(),
                               DEFAULT_TEST_EVENT_TIMEOUT_SECS)
        manager.set_registry(registry, {})
        manager.start_processing_events()

        wait_until(lambda: event_count == manager.get_processed_count())
        self.assertEqual(0, manager.get_queue_depth())
        self.assertEqual(
            event_count,
            test_context.get_workload_manager().get_success_count())
        self.assertEqual(DEFAULT_TOTAL_THREAD_COUNT - DEFAULT_CPU_COUNT,
                         len(test_context.get_cpu().get_empty_threads()))
        self.assertEqual(
            1,
            test_context.get_create_event_handler().get_handled_event_count())

        manager.stop_processing_events()

        manager.report_metrics({})
        self.assertTrue(gauge_value_equals(registry, QUEUE_DEPTH_KEY, 0))
        self.assertTrue(
            counter_value_equals(
                registry, EVENT_SUCCEEDED_KEY,
                event_count * len(test_context.get_event_handlers())))
        self.assertTrue(counter_value_equals(registry, EVENT_FAILED_KEY, 0))
        self.assertTrue(
            counter_value_equals(registry, EVENT_PROCESSED_KEY, event_count))