Ejemplo n.º 1
0
    def test_no_cross_package_violation(self):
        cpu = get_cpu()
        allocator = IntegerProgramCpuAllocator(cpu)
        w = Workload(uuid.uuid4(), 4, STATIC)

        violations = get_cross_package_violations(cpu)
        self.assertEqual(0, len(violations))

        allocator.assign_threads(w)
        violations = get_cross_package_violations(cpu)
        self.assertEqual(0, len(violations))
Ejemplo n.º 2
0
    def test_no_cross_package_violation(self):
        cpu = get_cpu()
        allocator = IntegerProgramCpuAllocator()
        w = get_test_workload(uuid.uuid4(), 4, STATIC)

        violations = get_cross_package_violations(cpu)
        self.assertEqual(0, len(violations))

        request = get_no_usage_threads_request(cpu, [w])
        cpu = allocator.assign_threads(request).get_cpu()
        violations = get_cross_package_violations(cpu)
        self.assertEqual(0, len(violations))
Ejemplo n.º 3
0
    def test_no_cross_package_violation(self):
        cpu = get_cpu()
        allocator = IntegerProgramCpuAllocator()
        w = get_test_workload(uuid.uuid4(), 4, STATIC)

        violations = get_cross_package_violations(cpu)
        self.assertEqual(0, len(violations))

        request = AllocateThreadsRequest(cpu, w.get_id(), {w.get_id(): w}, {},
                                         DEFAULT_TEST_REQUEST_METADATA)
        cpu = allocator.assign_threads(request).get_cpu()
        violations = get_cross_package_violations(cpu)
        self.assertEqual(0, len(violations))
Ejemplo n.º 4
0
def get_violations():
    return json.dumps({
        "cross_package":
        get_cross_package_violations(get_workload_manager().get_cpu()),
        "shared_core":
        get_shared_core_violations(get_workload_manager().get_cpu())
    })
Ejemplo n.º 5
0
    def report_metrics(self, tags):
        cpu = self.get_cpu_copy()
        workload_map = self.get_workload_map_copy()

        self.__reg.gauge(RUNNING, tags).set(1)

        self.__reg.gauge(ADDED_KEY, tags).set(self.get_added_count())
        self.__reg.gauge(REMOVED_KEY, tags).set(self.get_removed_count())
        self.__reg.gauge(REBALANCED_KEY, tags).set(self.get_rebalanced_count())
        self.__reg.gauge(SUCCEEDED_KEY, tags).set(self.get_success_count())
        self.__reg.gauge(FAILED_KEY, tags).set(self.get_error_count())
        self.__reg.gauge(WORKLOAD_COUNT_KEY, tags).set(len(self.get_workloads()))
        self.__reg.gauge(ADDED_TO_FULL_CPU_ERROR_KEY, tags).set(self.__added_to_full_cpu_count)

        self.__reg.gauge(ALLOCATOR_CALL_DURATION, tags).set(self.get_allocator_call_duration_sum_secs())

        cross_package_violation_count = len(get_cross_package_violations(cpu))
        shared_core_violation_count = len(get_shared_core_violations(cpu))
        self.__reg.gauge(PACKAGE_VIOLATIONS_KEY, tags).set(cross_package_violation_count)
        self.__reg.gauge(CORE_VIOLATIONS_KEY, tags).set(shared_core_violation_count)

        # Allocation / Request sizes
        self.__reg.gauge(ALLOCATED_SIZE_KEY, tags).set(get_allocated_size(cpu))
        self.__reg.gauge(UNALLOCATED_SIZE_KEY, tags).set(get_unallocated_size(cpu))
        self.__reg.gauge(STATIC_ALLOCATED_SIZE_KEY, tags).set(get_static_allocated_size(cpu, workload_map))
        self.__reg.gauge(BURST_ALLOCATED_SIZE_KEY, tags).set(get_burst_allocated_size(cpu, workload_map))
        self.__reg.gauge(BURST_REQUESTED_SIZE_KEY, tags).set(get_burst_request_size(list(workload_map.values())))
        self.__reg.gauge(OVERSUBSCRIBED_THREADS_KEY, tags).set(get_oversubscribed_thread_count(cpu, workload_map))

        # Have the sub-components report metrics
        self.__cpu_allocator.report_metrics(tags)
        self.__cgroup_manager.report_metrics(tags)
Ejemplo n.º 6
0
    def report_metrics(self, tags):
        cpu = self.get_cpu_copy()
        workload_map = self.get_workload_map_copy()

        self.__reg.gauge(RUNNING, tags).set(1)
        self.__reg.gauge(WORKLOAD_COUNT_KEY,
                         tags).set(len(self.get_workloads()))

        self.__reg.counter(ADDED_KEY, tags).increment(self.get_added_count())
        self.__reg.counter(REMOVED_KEY,
                           tags).increment(self.get_removed_count())
        self.__reg.counter(REBALANCED_KEY,
                           tags).increment(self.get_rebalanced_count())
        self.__reg.counter(SUCCEEDED_KEY,
                           tags).increment(self.get_success_count())
        self.__reg.counter(FAILED_KEY, tags).increment(self.get_error_count())

        self.__added_count = 0
        self.__removed_count = 0
        self.__rebalanced_count = 0
        self.__error_count = 0

        cross_package_violation_count = len(get_cross_package_violations(cpu))
        shared_core_violation_count = len(get_shared_core_violations(cpu))
        self.__reg.gauge(PACKAGE_VIOLATIONS_KEY,
                         tags).set(cross_package_violation_count)
        self.__reg.gauge(CORE_VIOLATIONS_KEY,
                         tags).set(shared_core_violation_count)

        # Allocation / Request sizes
        self.__reg.gauge(ALLOCATED_SIZE_KEY, tags).set(get_allocated_size(cpu))
        self.__reg.gauge(UNALLOCATED_SIZE_KEY,
                         tags).set(get_unallocated_size(cpu))
        self.__reg.gauge(STATIC_ALLOCATED_SIZE_KEY, tags).set(
            get_static_allocated_size(cpu, workload_map))
        self.__reg.gauge(BURST_ALLOCATED_SIZE_KEY,
                         tags).set(get_burst_allocated_size(cpu, workload_map))
        self.__reg.gauge(BURST_REQUESTED_SIZE_KEY, tags).set(
            get_burst_request_size(list(workload_map.values())))
        self.__reg.gauge(OVERSUBSCRIBED_THREADS_KEY, tags).set(
            get_oversubscribed_thread_count(cpu, workload_map))
        self.__reg.gauge(BURSTABLE_THREADS_KEY, tags).set(
            self.__get_free_thread_count(self.__last_response))
        self.__reg.gauge(OVERSUBSCRIBABLE_THREADS_KEY, tags).set(
            self.__get_oversubscribable_thread_count(self.__last_response))
        self.__reg.gauge(OVERSUBSCRIBE_CONSUMED_CPU_COUNT, tags).set(
            self.__get_consumed_opportunistic_cpu_count())

        # Have the sub-components report metrics
        self.__cpu_allocator.report_metrics(tags)
        self.__cgroup_manager.report_metrics(tags)
Ejemplo n.º 7
0
    def report_metrics(self, tags):
        log.debug("Reporting metrics")
        try:
            # Workload manager metrics
            self.__reg.gauge(RUNNING, tags).set(1)

            self.__reg.gauge(ADDED_KEY, tags).set(
                self.__workload_manager.get_added_count())
            self.__reg.gauge(REMOVED_KEY, tags).set(
                self.__workload_manager.get_removed_count())
            self.__reg.gauge(SUCCEEDED_KEY, tags).set(
                self.__workload_manager.get_success_count())
            self.__reg.gauge(FAILED_KEY, tags).set(
                self.__workload_manager.get_error_count())
            self.__reg.gauge(WORKLOAD_COUNT_KEY, tags).set(
                len(self.__workload_manager.get_workloads()))

            # Allocator metrics
            self.__reg.gauge(ALLOCATOR_CALL_DURATION, tags).set(
                self.__workload_manager.get_allocator_call_duration_sum_secs())
            self.__reg.gauge(FALLBACK_ALLOCATOR_COUNT, tags).set(
                self.__workload_manager.get_fallback_allocator_calls_count())
            self.__reg.gauge(IP_ALLOCATOR_TIMEBOUND_COUNT, tags).set(
                self.__workload_manager.
                get_time_bound_ip_allocator_solution_count())

            # Event manager metrics
            self.__reg.gauge(QUEUE_DEPTH_KEY,
                             tags).set(self.__event_manager.get_queue_depth())
            self.__reg.gauge(EVENT_SUCCEEDED_KEY, tags).set(
                self.__event_manager.get_success_count())
            self.__reg.gauge(EVENT_FAILED_KEY,
                             tags).set(self.__event_manager.get_error_count())
            self.__reg.gauge(EVENT_PROCESSED_KEY, tags).set(
                self.__event_manager.get_processed_count())

            # CPU metrics
            cross_package_violation_count = len(
                get_cross_package_violations(
                    self.__workload_manager.get_cpu()))
            shared_core_violation_count = len(
                get_shared_core_violations(self.__workload_manager.get_cpu()))
            self.__reg.gauge(PACKAGE_VIOLATIONS_KEY,
                             tags).set(cross_package_violation_count)
            self.__reg.gauge(CORE_VIOLATIONS_KEY,
                             tags).set(shared_core_violation_count)
            log.debug("Reported metrics")

        except:
            log.exception("Failed to report metric")
    def test_no_cross_packages_placement_no_bad_affinity_ip(self):
        w_a = get_test_workload("a", 3, STATIC)
        w_b = get_test_workload("b", 2, STATIC)
        w_c = get_test_workload("c", 1, STATIC)
        w_d = get_test_workload("d", 2, STATIC)

        cpu = get_cpu(package_count=2, cores_per_package=2, threads_per_core=2)

        workload_manager = WorkloadManager(cpu, MockCgroupManager(),
                                           IntegerProgramCpuAllocator())
        workload_manager.add_workload(w_a)
        workload_manager.add_workload(w_b)
        workload_manager.add_workload(w_c)
        workload_manager.add_workload(w_d)

        self.assertEqual(
            0, len(get_cross_package_violations(workload_manager.get_cpu())))
        self.assertEqual(0,
                         len(workload_manager.get_cpu().get_empty_threads()))
Ejemplo n.º 9
0
    def test_no_cross_packages_placement_no_bad_affinity_ip(self):

        w_a = Workload("a", 3, STATIC)
        w_b = Workload("b", 2, STATIC)
        w_c = Workload("c", 1, STATIC)
        w_d = Workload("d", 2, STATIC)

        cpu = get_cpu(package_count=2, cores_per_package=2, threads_per_core=2)

        workload_manager = WorkloadManager(cpu, MockCgroupManager())
        workload_manager.add_workload(w_a)
        workload_manager.add_workload(w_b)
        workload_manager.add_workload(w_c)
        workload_manager.add_workload(w_d)

        self.assertEqual(
            0, len(get_cross_package_violations(workload_manager.get_cpu())))
        #self.assertEqual(1, len(get_shared_core_violations(workload_manager.get_cpu())))  # todo: fix me
        self.assertEqual(0,
                         len(workload_manager.get_cpu().get_empty_threads()))
Ejemplo n.º 10
0
def has_better_isolation(cur_cpu, new_cpu):
    """
    Here we determine whether a proposed placement of workloads improves upon the current workload placement.

    There are two possible violations:
       1. shared_core: a core is shared by multiple workloads
       2. cross_package: a workload is placed on multiple packages

    Below we describe a matrix answering the follow question: The new workload placement is better?

    There are 9 possible cases to consider.
        '-' indicates a decrease
        '0' indicates no change
        '+' indicates an increase
        'count' refers to how many workloads are affected by the violation



                                    shared_core_count
                                    -      0      +

                                -   T      T      T


            cross_package_count 0   T      F      F


                                +   F      F      F


        Only two controversial answers exist.  They are in the lower-left and upper-right hand corners.

            lower-left: shared_core_count has decreased, but cross_package_count has increased
            upper-right: cross_package_count has decreased, but shared_core_count has increased

        In both cases we make the assumption that avoiding cross package workloads is to be preferred.

    NOTE: We do not consider the placement of burst workloads when comparing the two placement options.

    :return: True if the new_cpu has better placement, False otherwise
    """
    cur_cross_package_violation_count = len(
        get_cross_package_violations(cur_cpu))
    new_cross_package_violation_count = len(
        get_cross_package_violations(new_cpu))

    cur_shared_core_violation_count = len(get_shared_core_violations(cur_cpu))
    new_shared_core_violation_count = len(get_shared_core_violations(new_cpu))

    # More violations is bad, so a positive change is bad
    cross_package_violation_change = new_cross_package_violation_count - cur_cross_package_violation_count
    shared_core_violation_change = new_shared_core_violation_count - cur_shared_core_violation_count

    # Top row of matrix
    if cross_package_violation_change < 0:
        return True

    # Bottom row of matrix
    if cross_package_violation_change > 0:
        return False

    # Middle row of matrix, can assume cross_package_violation_change == 0
    return shared_core_violation_change < 0