Beispiel #1
0
    def test_free_cpu_on_container_die(self):
        workload_name = str(uuid.uuid4())
        workload = Workload(workload_name, DEFAULT_CPU_COUNT, STATIC)
        docker_client = MockDockerClient([MockContainer(workload)])

        events = [
            get_container_create_event(DEFAULT_CPU_COUNT, STATIC,
                                       workload_name, workload_name),
            get_container_die_event(workload_name)
        ]
        event_count = len(events)
        event_iterable = MockEventProvider(
            events, 1)  # Force in order event processing for the test

        test_context = TestContext(docker_client)
        manager = EventManager(event_iterable,
                               test_context.get_event_handlers(),
                               get_mock_file_manager(),
                               DEFAULT_TEST_EVENT_TIMEOUT_SECS)

        wait_until(lambda: event_count == manager.get_processed_count())
        self.assertEqual(0, manager.get_queue_depth())
        self.assertEqual(DEFAULT_TOTAL_THREAD_COUNT,
                         len(test_context.get_cpu().get_empty_threads()))
        self.assertEqual(
            1,
            test_context.get_create_event_handler().get_handled_event_count())
        self.assertEqual(
            1,
            test_context.get_free_event_handler().get_handled_event_count())

        manager.stop_processing_events()
    def test_add_metrics(self):

        test_context = TestContext()
        workload_name = str(uuid.uuid4())
        events = [
            get_container_create_event(DEFAULT_CPU_COUNT, STATIC,
                                       workload_name, workload_name)
        ]
        event_count = len(events)
        event_manager = EventManager(MockEventProvider(events),
                                     test_context.get_event_handlers(),
                                     get_mock_file_manager(), 5.0)
        wait_until(lambda: event_count == event_manager.get_processed_count())

        log.info("Event manager has processed {} events.".format(
            event_manager.get_processed_count()))

        workload_manager = test_context.get_workload_manager()
        registry = Registry()
        reporter = InternalMetricsReporter(workload_manager, event_manager)
        reporter.set_registry(registry)
        reporter.report_metrics({})

        wait_until(lambda: self.__gauge_value_equals(registry, RUNNING, 1))
        wait_until(lambda: self.__gauge_value_equals(registry, ADDED_KEY, 1))
        wait_until(lambda: self.__gauge_value_equals(registry, REMOVED_KEY, 0))
        wait_until(
            lambda: self.__gauge_value_equals(registry, SUCCEEDED_KEY, 1))
        wait_until(lambda: self.__gauge_value_equals(registry, FAILED_KEY, 0))
        wait_until(
            lambda: self.__gauge_value_equals(registry, QUEUE_DEPTH_KEY, 0))
        wait_until(
            lambda: self.__gauge_value_equals(registry, WORKLOAD_COUNT_KEY, 1))
        wait_until(lambda: self.__gauge_value_equals(
            registry, PACKAGE_VIOLATIONS_KEY, 0))
        wait_until(lambda: self.__gauge_value_equals(registry,
                                                     CORE_VIOLATIONS_KEY, 0))
        wait_until(lambda: self.__gauge_value_equals(registry,
                                                     EVENT_SUCCEEDED_KEY, 3))
        wait_until(
            lambda: self.__gauge_value_equals(registry, EVENT_FAILED_KEY, 0))
        wait_until(lambda: self.__gauge_value_equals(registry,
                                                     EVENT_PROCESSED_KEY, 1))
        wait_until(lambda: self.__gauge_value_equals(
            registry, FALLBACK_ALLOCATOR_COUNT, 0))
        wait_until(lambda: self.__gauge_value_equals(
            registry, IP_ALLOCATOR_TIMEBOUND_COUNT, 0))

        event_manager.stop_processing_events()
Beispiel #3
0
    def test_unknown_workload_type_label(self):
        test_context = TestContext()
        unknown_event = get_event(
            CONTAINER, CREATE, uuid.uuid4(), {
                NAME: "container-name",
                CPU_LABEL_KEY: "1",
                WORKLOAD_TYPE_LABEL_KEY: "unknown"
            })
        valid_event = get_container_create_event(1)
        event_iterable = MockEventProvider([unknown_event, valid_event])
        manager = EventManager(event_iterable,
                               test_context.get_event_handlers(),
                               get_mock_file_manager(),
                               DEFAULT_TEST_EVENT_TIMEOUT_SECS)

        wait_until(lambda: manager.get_error_count() == 1)
        wait_until(lambda: manager.get_processed_count() == 2)
        self.assertEqual(0, manager.get_queue_depth())

        manager.stop_processing_events()
    def test_crash_ip_allocator_metrics(self):

        cpu = get_cpu(2, 16, 2)
        test_context = TestContext(cpu=cpu)

        # now override the cpu seen by the allocator to crash it
        test_context.get_workload_manager().get_allocator().set_cpu(
            get_cpu(2, 2, 2))

        events = [get_container_create_event(10, name="foo", id="bar")]
        event_count = len(events)
        event_manager = EventManager(MockEventProvider(events),
                                     test_context.get_event_handlers(),
                                     get_mock_file_manager(), 5.0)

        wait_until(lambda: event_count == event_manager.get_processed_count())

        log.info("Event manager has processed {} events.".format(
            event_manager.get_processed_count()))

        workload_manager = test_context.get_workload_manager()
        registry = Registry()
        reporter = InternalMetricsReporter(workload_manager, event_manager)
        reporter.set_registry(registry)
        reporter.report_metrics({})

        wait_until(lambda: self.__gauge_value_equals(registry, RUNNING, 1))
        wait_until(lambda: self.__gauge_value_equals(registry, ADDED_KEY, 1))
        wait_until(lambda: self.__gauge_value_equals(registry, REMOVED_KEY, 0))
        wait_until(
            lambda: self.__gauge_value_equals(registry, SUCCEEDED_KEY, 1))
        wait_until(lambda: self.__gauge_value_equals(registry, FAILED_KEY, 0))
        wait_until(
            lambda: self.__gauge_value_equals(registry, WORKLOAD_COUNT_KEY, 1))
        wait_until(lambda: self.__gauge_value_equals(
            registry, FALLBACK_ALLOCATOR_COUNT, 1))

        event_manager.stop_processing_events()
    def test_edge_case_ip_allocator_metrics(self):
        # this is a specific scenario causing troubles to the solver.
        # we should hit the time-bound limit and report it.

        cpu = get_cpu(2, 16, 2)
        test_context = TestContext(cpu=cpu)
        test_context.get_workload_manager().get_allocator(
        ).set_solver_max_runtime_secs(0.01)
        events = []
        cnt_evts = 0

        for i in range(15):
            events.append(get_container_create_event(2, name=str(i),
                                                     id=str(i)))
        cnt_evts += 15

        events.append(get_container_create_event(1, name="15", id="15"))
        cnt_evts += 1

        for i in range(9):
            events.append(
                get_container_create_event(2,
                                           name=str(i + cnt_evts),
                                           id=str(i + cnt_evts)))

        events.append(get_container_die_event(name="15", id="15"))

        event_count = len(events)
        event_manager = EventManager(MockEventProvider(events),
                                     test_context.get_event_handlers(),
                                     get_mock_file_manager(), 5.0)

        wait_until(lambda: event_count == event_manager.get_processed_count(),
                   timeout=20)

        log.info("Event manager has processed {} events.".format(
            event_manager.get_processed_count()))

        workload_manager = test_context.get_workload_manager()
        registry = Registry()
        reporter = InternalMetricsReporter(workload_manager, event_manager)
        reporter.set_registry(registry)
        reporter.report_metrics({})

        wait_until(lambda: self.__gauge_value_equals(registry, RUNNING, 1))
        wait_until(lambda: self.__gauge_value_equals(registry, ADDED_KEY, 25))
        wait_until(lambda: self.__gauge_value_equals(registry, REMOVED_KEY, 1))
        wait_until(
            lambda: self.__gauge_value_equals(registry, SUCCEEDED_KEY, 26))
        wait_until(lambda: self.__gauge_value_equals(registry, FAILED_KEY, 0))
        wait_until(
            lambda: self.__gauge_value_equals(registry, QUEUE_DEPTH_KEY, 0))
        wait_until(lambda: self.__gauge_value_equals(registry,
                                                     WORKLOAD_COUNT_KEY, 24))
        wait_until(lambda: self.__gauge_value_equals(
            registry, PACKAGE_VIOLATIONS_KEY, 0))
        wait_until(lambda: self.__gauge_value_equals(
            registry, EVENT_SUCCEEDED_KEY, 3 * 26))
        wait_until(
            lambda: self.__gauge_value_equals(registry, EVENT_FAILED_KEY, 0))
        wait_until(lambda: self.__gauge_value_equals(registry,
                                                     EVENT_PROCESSED_KEY, 26))
        wait_until(lambda: self.__gauge_value_reached(
            registry, IP_ALLOCATOR_TIMEBOUND_COUNT, 1))
        wait_until(lambda: self.__gauge_value_reached(
            registry, ALLOCATOR_CALL_DURATION, 0.1))

        event_manager.stop_processing_events()