def __remove_workload(self, workload_id): log.info("Removing workload: {}".format(workload_id)) if workload_id not in self.__workloads: log.error("Attempted to remove unknown workload: '{}'".format( workload_id)) return workload_map = self.get_workload_map_copy() request = self.__get_threads_request(workload_id, workload_map, "free") response = self.__cpu_allocator.free_threads(request) workload_map.pop(workload_id) self.__update_state(response, workload_map) report_cpu_event(request, response)
def __init__(self): self.__address = self.__get_address() log.info("Set keystone address to: {}".format(self.__address)) self.__enabled = self.__address is not None self.__q = Queue() self.__reg = None self.__succeeded_msg_count = 0 self.__retry_msg_count = 0 self.__failed_msg_count = 0 self.__processing_thread = Thread(target=self.__process_events) self.__processing_thread.start()
def __init__(self, free_thread_provider): config_manager = get_config_manager() self.__url = config_manager.get_str(REMOTE_ALLOCATOR_URL, "http://localhost:7501") solver_max_runtime_secs = config_manager.get_float( MAX_SOLVER_RUNTIME, DEFAULT_MAX_SOLVER_RUNTIME) solver_max_connect_secs = config_manager.get_float( MAX_SOLVER_CONNECT_SEC, DEFAULT_MAX_SOLVER_CONNECT_SEC) self.__timeout = (solver_max_connect_secs, solver_max_runtime_secs) self.__headers = {'Content-Type': "application/json"} self.__reg = None log.info("remote allocator max_connect_secs: %d, max_runtime_secs: %d", solver_max_connect_secs, solver_max_runtime_secs)
def free_threads(): try: body = request.get_json() log.info("Processing free threads request: {}".format(body)) threads_request = get_threads_request(body) response = get_free_cpu_allocator().free_threads(threads_request) global free_threads_success_count free_threads_success_count += 1 return jsonify(response.to_dict()) except: log.exception("Failed to free threads") global free_threads_failure_count free_threads_failure_count += 1 return "Failed to free threads", 500
def test_assign_one_workload_empty_cpu(self): cpu = get_cpu() self.assertEqual(DEFAULT_TOTAL_THREAD_COUNT, len(cpu.get_empty_threads())) w = get_test_workload(uuid.uuid4(), 1, STATIC) request = get_no_usage_threads_request(cpu, [w]) cpu = noop_reset_allocator.assign_threads(request).get_cpu() log.info(cpu) self.assertEqual(0, len(cpu.get_empty_threads())) self.assertEqual(DEFAULT_TOTAL_THREAD_COUNT, len(cpu.get_claimed_threads())) for t in cpu.get_threads(): self.assertEqual(1, len(t.get_workload_ids())) self.assertEqual(w.get_id(), t.get_workload_ids()[0])
def __watch(self): while True: try: instance_id = get_config_manager().get_str("EC2_INSTANCE_ID") field_selector = "spec.nodeName={}".format(instance_id) log.info("Watching pods with field selector: %s", field_selector) v1 = client.CoreV1Api() w = watch.Watch() for event in w.stream(v1.list_pod_for_all_namespaces, field_selector=field_selector): self.__handle_event(event) except: log.exception("pod watch thread failed")
def rebalance(): try: body = request.get_json() log.info("Processing rebalance threads request: {}".format(body)) rebalance_request = get_rebalance_request(body) response = get_rebalance_cpu_allocator().rebalance(rebalance_request) global rebalance_success_count rebalance_success_count += 1 return jsonify(response.to_dict()) except: log.exception("Failed to rebalance") global rebalance_failure_count rebalance_failure_count += 1 return "Failed to rebalance", 500
def test_assign_more_than_available_threads_with_one_workload(self): for allocator in OVER_ALLOCATORS: cpu = get_cpu() w_jumbo = get_test_workload("jumbo", DEFAULT_TOTAL_THREAD_COUNT * 1.5, STATIC) request = AllocateThreadsRequest(cpu, w_jumbo.get_id(), {w_jumbo.get_id(): w_jumbo}, {}, DEFAULT_TEST_REQUEST_METADATA) cpu = allocator.assign_threads(request).get_cpu() log.info(cpu) self.assertEqual(DEFAULT_TOTAL_THREAD_COUNT, len(cpu.get_claimed_threads())) self.assertEqual([w_jumbo.get_id()], list(cpu.get_workload_ids_to_thread_ids().keys()))
def __is_long_enough(self, workload) -> bool: min_duration_sec = 60 * self.__config_manager.get_int( OVERSUBSCRIBE_WINDOW_SIZE_MINUTES_KEY, DEFAULT_OVERSUBSCRIBE_WINDOW_SIZE_MINUTES) workload_duration_sec = self.__get_workload_duration( workload, min_duration_sec) if workload_duration_sec < min_duration_sec: log.info( "Workload: {} is too short. workload_duration_sec: {} < min_duration_sec: {}" .format(workload.get_id(), workload_duration_sec, min_duration_sec)) return False log.info( "Workload: {} is long enough. workload_duration_sec: {} >= min_duration_sec: {}" .format(workload.get_id(), workload_duration_sec, min_duration_sec)) return True
def __init__(self, relative_start_sec: int, interval_sec: int, sample_interval_sec: int = DEFAULT_SAMPLE_FREQUENCY_SEC, query_timeout_sec: int = DEFAULT_METRICS_QUERY_TIMEOUT_SEC): self.__relative_start_sec = relative_start_sec self.__interval_sec = interval_sec self.__query_timeout_sec = query_timeout_sec self.__interval_count = int(relative_start_sec / interval_sec) self.__usages = None self.__lock = Lock() self.__snapshot_usage_raw() log.info("Scheduling pcp metrics collecting every {} seconds".format( sample_interval_sec)) schedule.every(sample_interval_sec).seconds.do( self.__snapshot_usage_raw)
def __init__( self, config_manager: ConfigManager, exit_handler: ExitHandler, properties: List[str], detection_interval: int = PROPERTY_CHANGE_DETECTION_INTERVAL_SEC): self.__config_manager = config_manager self.__exit_handler = exit_handler self.__properties = properties log.info("Starting watching for changes to properties: {}".format( properties)) for p in properties: v = config_manager.get_cached_str(p) log.info("{}: {}".format(p, v)) schedule.every(detection_interval).seconds.do(self.detect_changes)
def rebalance(self, request: AllocateRequest) -> AllocateResponse: self.__call_meta = {} cpu = request.get_cpu() cpu_usage = request.get_cpu_usage() workloads = request.get_workloads() self.__cnt_rebalance_calls += 1 if len(workloads) == 0: log.warning("Ignoring rebalance of empty CPU.") self.__call_meta['rebalance_empty'] = 1 return AllocateResponse(cpu, self.get_name(), self.__call_meta) log.info("Rebalancing with predictions...") curr_ids_per_workload = cpu.get_workload_ids_to_thread_ids() return AllocateResponse( self.__compute_allocation(cpu, None, workloads, curr_ids_per_workload, cpu_usage, None), self.get_name(), self.__call_meta)
def rebalance(): try: request_ip = request.headers.get(FORWARDED_FOR_HEADER) log.info("Processing rebalance threads request (from, proxy): {}".format(request_ip)) body = request.get_json() rebalance_request = get_rebalance_request(body) response = get_rebalance_cpu_allocator().rebalance(rebalance_request) global rebalance_success_count rebalance_success_count += 1 log.info("Processed rebalance threads request (from, proxy): {}".format(request_ip)) return jsonify(response.to_dict()) except: log.exception("Failed to rebalance") global rebalance_failure_count rebalance_failure_count += 1 return "Failed to rebalance", 500
def get_current_workloads(docker_client): workloads = [] for container in docker_client.containers.list(): workload_id = container.name if __has_required_labels(container): try: labels = container.labels cpu = int(__get_value(labels, CPU_LABEL_KEY, -1)) mem = int(__get_value(labels, MEM_LABEL_KEY, -1)) disk = int(__get_value(labels, DISK_LABEL_KEY, -1)) network = int(__get_value(labels, DISK_LABEL_KEY, -1)) app_name = __get_value(labels, APP_NAME_LABEL_KEY) owner_email = __get_value(labels, OWNER_EMAIL_LABEL_KEY) command = __get_value(labels, COMMAND_LABEL_KEY) entrypoint = __get_value(labels, ENTRYPOINT_LABEL_KEY) job_type = __get_value(labels, JOB_TYPE_LABEL_KEY) workload_type = __get_value(labels, WORKLOAD_TYPE_LABEL_KEY) image = __get_image(container) workloads.append( Workload(identifier=workload_id, thread_count=cpu, mem=mem, disk=disk, network=network, app_name=app_name, owner_email=owner_email, image=image, command=command, entrypoint=entrypoint, job_type=job_type, workload_type=workload_type)) log.info("Found running workload: '{}'".format(workload_id)) except: log.exception( "Failed to parse labels for container: '{}'".format( container.name)) else: log.warning( "Found running workload: '{}' without expected labels'") return workloads
def get_current_workloads(docker_client): workloads = [] for container in docker_client.containers.list(): workload_id = container.name if __has_required_labels(container): try: cpu = int(container.labels[CPU_LABEL_KEY]) workload_type = container.labels[WORKLOAD_TYPE_LABEL_KEY] workloads.append(Workload(workload_id, cpu, workload_type)) log.info("Found running workload: '{}'".format(workload_id)) except: log.exception( "Failed to parse labels for container: '{}'".format( container.name)) else: log.warning( "Found running workload: '{}' without expected label: '{}'". format(workload_id, CPU_LABEL_KEY)) return workloads
def __get_simple_cpu_predictions(self) -> Dict[str, float]: cpu_predictor = self.__cpu_usage_predictor_manager.get_cpu_predictor() if cpu_predictor is None: log.error("Failed to get cpu predictor") return {} workloads = self.workload_manager.get_workloads() resource_usage = GlobalResourceUsage( self.__workload_monitor_manager.get_pcp_usage()) log.info("Getting simple cpu predictions...") cpu_predictions = cpu_predictor.get_cpu_predictions( workloads, resource_usage) if cpu_predictions is None: log.error("Failed to get cpu predictions") return {} else: log.info("Got simple cpu predictions: %s", json.dumps(cpu_predictions)) return cpu_predictions
def test_assign_one_thread_empty_cpu(self): """ Workload 0: 1 thread --> (p:0 c:0 t:0) """ for allocator in ALLOCATORS: cpu = get_cpu() self.assertEqual(DEFAULT_TOTAL_THREAD_COUNT, len(cpu.get_empty_threads())) w = get_test_workload(uuid.uuid4(), 1, STATIC) request = get_no_usage_threads_request(cpu, [w]) cpu = allocator.assign_threads(request).get_cpu() log.info(cpu) self.assertEqual(DEFAULT_TOTAL_THREAD_COUNT - 1, len(cpu.get_empty_threads())) self.assertEqual(1, len(cpu.get_claimed_threads())) self.assertEqual( w.get_id(), cpu.get_claimed_threads()[0].get_workload_ids()[0])
def __init__(self, cpu: Cpu, cgroup_manager: CgroupManager, cpu_allocator: CpuAllocator): self.__reg = None self.__lock = Lock() self.__instance_id = get_config_manager().get_str(EC2_INSTANCE_ID) self.__cpu_allocator = cpu_allocator self.__error_count = 0 self.__added_count = 0 self.__removed_count = 0 self.__rebalanced_count = 0 self.__added_to_full_cpu_count = 0 self.__allocator_call_duration_sum_secs = 0 self.__cpu = cpu self.__cgroup_manager = cgroup_manager self.__wmm = get_workload_monitor_manager() self.__workloads = {} log.info("Created workload manager")
def get_workload_from_kubernetes(identifier) -> Optional[KubernetesWorkload]: if not managers_are_initialized(): log.error( "Cannot get workload from kubernetes because managers aren't initialized" ) return None retry_count = get_config_manager().get_int( GET_WORKLOAD_RETRY_COUNT, DEFAULT_GET_WORKLOAD_RETRY_COUNT) retry_interval = get_config_manager().get_float( GET_WORKLOAD_RETRY_INTERVAL_SEC, DEFAULT_GET_WORKLOAD_RETRY_INTERVAL_SEC) pod_manager = get_pod_manager() for i in range(retry_count): log.info("Getting pod from kubernetes: %s", identifier) pod = pod_manager.get_pod(identifier) if pod is not None: log.info("Got pod from kubernetes: %s", identifier) return KubernetesWorkload(pod) log.info("Retrying getting pod from kubernetes in %s seconds", retry_interval) time.sleep(retry_interval) log.error("Failed to get pod from kubernetes: %s", identifier) return None
def test_external_cpu_manipulation(self): cpu = get_cpu() violations = get_shared_core_violations(cpu) log.info("shared core violations: {}".format(violations)) self.assertEqual(0, len(violations)) # Claim 1 thread on every core dummy_workload_id = uuid.uuid4() for p in cpu.get_packages(): for c in p.get_cores(): c.get_threads()[0].claim(dummy_workload_id) violations = get_shared_core_violations(cpu) log.info("shared core violations: {}".format(violations)) self.assertEqual(0, len(violations)) # Assign another workload which will force core sharing allocator = GreedyCpuAllocator() w = get_test_workload(uuid.uuid4(), 2, STATIC) workloads = {w.get_id(): w} request = AllocateThreadsRequest(cpu, w.get_id(), workloads, {}, DEFAULT_TEST_REQUEST_METADATA) cpu = allocator.assign_threads(request).get_cpu() violations = get_shared_core_violations(cpu) log.info("shared core violations: {}".format(violations)) self.assertEqual(2, len(violations))
def __apply_isolation(self, response: AllocateResponse): last_w_responses = self.__get_workload_allocation_dict(self.__last_response) for w_alloc in response.get_workload_allocations(): last_w_alloc = last_w_responses.get(w_alloc.get_workload_id(), None) if w_alloc == last_w_alloc: log.info("Skipping update of workload: {}".format(w_alloc.get_workload_id())) continue workload_id = w_alloc.get_workload_id() thread_ids = w_alloc.get_thread_ids() quota = w_alloc.get_cpu_quota() shares = w_alloc.get_cpu_shares() log.info("updating workload: '{}' cpuset: '{}', quota: '{}', shares: '{}'".format( workload_id, thread_ids, quota, shares)) # This ordering is important for reporting whether a workload is isolated. # We must always set the "cpuset" first. self.__cgroup_manager.set_cpuset(workload_id, thread_ids) self.__cgroup_manager.set_quota(workload_id, quota) self.__cgroup_manager.set_shares(workload_id, shares)
def __apply_isolation(self, response: AllocateResponse): last_w_responses = self.__get_workload_allocation_dict( self.__last_response) for w_alloc in response.get_workload_allocations(): last_w_alloc = last_w_responses.get(w_alloc.get_workload_id(), None) if w_alloc == last_w_alloc: log.info("Skipping update of workload: {}".format( w_alloc.get_workload_id())) continue workload_id = w_alloc.get_workload_id() thread_ids = w_alloc.get_thread_ids() quota = w_alloc.get_cpu_quota() shares = w_alloc.get_cpu_shares() memory_migrate = w_alloc.get_memory_migrate() memory_spread_page = w_alloc.get_memory_spread_page() memory_spread_slab = w_alloc.get_memory_spread_slab() log.info(f'updating workload: {workload_id} ' f'cpuset: {thread_ids}, ' f'quota: {quota}, ' f'shares: {shares}, ' f'memory_migrate: {memory_migrate}, ' f'memory_spread_page: {memory_spread_page}, ' f'memory_spread_slab: {memory_spread_slab}') # This ordering is important for reporting whether a workload is isolated. # We must always set the "cpuset" first. self.__cgroup_manager.set_cpuset(workload_id, thread_ids) self.__cgroup_manager.set_quota(workload_id, quota) self.__cgroup_manager.set_shares(workload_id, shares) self.__cgroup_manager.set_memory_migrate(workload_id, memory_migrate) self.__cgroup_manager.set_memory_spread_page( workload_id, memory_spread_page) self.__cgroup_manager.set_memory_spread_slab( workload_id, memory_spread_slab)
def __predict_usage(self, workloads, cpu_usage): res = {} cpu_usage_predictor = self.__get_cpu_usage_predictor() cm = self.__config_manager pred_env = PredEnvironment(cm.get_region(), cm.get_environment(), dt.utcnow().hour) start_time = time.time() for w in workloads.values(): # TODO: batch the call pred = cpu_usage_predictor.predict(w, cpu_usage.get(w.get_id(), None), pred_env) res[w.get_id()] = pred stop_time = time.time() self.__call_meta['pred_cpu_usage_dur_secs'] = stop_time - start_time try: self.__call_meta['pred_cpu_usage_model_id'] = cpu_usage_predictor.get_model().meta_data['model_training_titus_task_id'] except: self.__call_meta['pred_cpu_usage_model_id'] = 'unknown' log.info("Usage prediction per workload: " + str(res)) if len(res) > 0: self.__call_meta['pred_cpu_usage'] = dict(res) return res
def test_crash_ip_allocator_metrics(self): cpu = get_cpu(2, 16, 2) test_context = TestContext(cpu=cpu) # now override the cpu seen by the allocator to crash it test_context.get_workload_manager().get_allocator().set_cpu( get_cpu(2, 2, 2)) events = [get_container_create_event(10, name="foo", id="bar")] event_count = len(events) event_manager = EventManager(MockEventProvider(events), test_context.get_event_handlers(), get_mock_file_manager(), 5.0) wait_until(lambda: event_count == event_manager.get_processed_count()) log.info("Event manager has processed {} events.".format( event_manager.get_processed_count())) workload_manager = test_context.get_workload_manager() registry = Registry() reporter = InternalMetricsReporter(workload_manager, event_manager) reporter.set_registry(registry) reporter.report_metrics({}) wait_until(lambda: self.__gauge_value_equals(registry, RUNNING, 1)) wait_until(lambda: self.__gauge_value_equals(registry, ADDED_KEY, 1)) wait_until(lambda: self.__gauge_value_equals(registry, REMOVED_KEY, 0)) wait_until( lambda: self.__gauge_value_equals(registry, SUCCEEDED_KEY, 1)) wait_until(lambda: self.__gauge_value_equals(registry, FAILED_KEY, 0)) wait_until( lambda: self.__gauge_value_equals(registry, WORKLOAD_COUNT_KEY, 1)) wait_until(lambda: self.__gauge_value_equals( registry, FALLBACK_ALLOCATOR_COUNT, 1)) event_manager.stop_processing_events()
def __init__(self, exit_handler: ExitHandler): self.__exit_handler = exit_handler self.__config_manager = get_config_manager() self.__node_name = self.__config_manager.get_str(EC2_INSTANCE_ID) kubeconfig = self.get_kubeconfig_path() self.__core_api = kubernetes.client.CoreV1Api( kubernetes.config.new_client_from_config(config_file=kubeconfig)) self.__custom_api = kubernetes.client.CustomObjectsApi( kubernetes.config.new_client_from_config(config_file=kubeconfig)) self.__lock = Lock() self.__opportunistic_resources = {} oversubscribe_frequency = self.__config_manager.get_float( OVERSUBSCRIBE_FREQUENCY_KEY, DEFAULT_OVERSUBSCRIBE_FREQUENCY) if oversubscribe_frequency > 0: watch_thread = Thread(target=self.__watch) watch_thread.start() else: log.info( "Skipping opportunistic resource watch, as opportunistic publishing is not configured." )
def __init__(self, config_manager, exit_handler, properties, detection_interval=PROPERTY_CHANGE_DETECTION_INTERVAL_SEC): self.__config_manager = config_manager self.__exit_handler = exit_handler self.__properties = properties self.__original_properties = {} for p in properties: self.__original_properties[p] = config_manager.get_str(p) self.__original_primary_allocator_name =\ get_fallback_allocator(config_manager).get_primary_allocator().__class__.__name__ log.info("Starting watching for changes to properties: {}".format( properties)) for k, v in self.__original_properties.items(): log.info("{}: {}".format(k, v)) schedule.every(detection_interval).seconds.do(self.detect_changes)
def __init__(self, cpu, cgroup_manager, allocator_class=IntegerProgramCpuAllocator, fallback_allocator_class=GreedyCpuAllocator): self.__lock = Lock() self.__error_count = 0 self.__added_count = 0 self.__removed_count = 0 self.__allocator_call_duration_sum_secs = 0 self.__fallback_allocator_calls_count = 0 self.__time_bound_ip_allocator_solution_count = 0 self.__cpu = cpu self.__cgroup_manager = cgroup_manager self.__workloads = {} self.__cpu_allocator = allocator_class(cpu) self.__is_ip_allocator_used = False self.__fallback_cpu_allocator = None if isinstance(self.__cpu_allocator, IntegerProgramCpuAllocator): self.__is_ip_allocator_used = True if fallback_allocator_class is not None: self.__fallback_cpu_allocator = fallback_allocator_class(cpu) log.info("Created workload manager with allocator: '{}'".format(self.__cpu_allocator.__class__.__name__))
def __init__(self, cpu: Cpu, cgroup_manager: CgroupManager, cpu_allocator: CpuAllocator): self.__reg = None self.__tags = None self.__lock = Lock() self.__instance_id = get_config_manager().get_str(EC2_INSTANCE_ID) self.__cpu_allocator = cpu_allocator self.__error_count = 0 self.__added_count = 0 self.__removed_count = 0 self.__rebalanced_count = 0 self.__workload_processing_duration_sec = 0 self.__update_state_duration_sec = 0 self.__cpu = cpu self.__cgroup_manager = cgroup_manager self.__wmm = get_workload_monitor_manager() self.__workloads = {} self.__last_response = None log.info("Created workload manager")
def get_predictions( self, pods: List[V1Pod], resource_usage: GlobalResourceUsage ) -> Optional[ResourceUsagePredictions]: config_manager = get_config_manager() if config_manager is None: log.warning("Config manager not yet set.") return None running_pods = [] for p in pods: if self.is_running(p): running_pods.append(p) else: log.info("Pod is not yet running: %s", p.metadata.name) client_crt = get_client_cert_path(config_manager) client_key = get_client_key_path(config_manager) if client_crt is None or client_key is None: log.error("Failed to generate credential paths") return None url = get_url(config_manager) if url is None: log.error("Unable to generate prediction service url") return None body = self.__get_body(running_pods, resource_usage) if body is None: log.error("Unable to generate a prediction request body") return None predictions = get_predictions(client_crt, client_key, url, body) if predictions is None: log.error("Failed to get predictions") return None return ResourceUsagePredictions(predictions)
def __watch(self): label_selector = "{}={}".format( OPPORTUNISTIC_RESOURCE_NODE_NAME_LABEL_KEY, self.__node_name) log.info("Starting opportunistic resource watch...") stream = None try: stream = watch.Watch().stream( self.__custom_api.list_cluster_custom_object, group="titus.netflix.com", version="v1", plural="opportunistic-resources", label_selector=label_selector) for event in stream: log.info("Event: %s", event) if self.__is_expired_error(event): raise Exception("Opportunistic resource expired") event_type = event['type'] if event_type not in HANDLED_EVENTS: log.warning("Ignoring unhandled event: %s", event) continue event_metadata_name = event['object']['metadata']['name'] with self.__lock: if event_type == ADDED: self.__opportunistic_resources[ event_metadata_name] = event elif event_type == DELETED: self.__opportunistic_resources.pop( event_metadata_name, None) except Exception: if stream is not None: stream.close() log.exception("Watch of opportunistic resources failed") self.__exit_handler.exit(OPPORTUNISTIC_WATCH_FAILURE)