def place_pod_on_node(self, pod: Pod, node: Node): """ Method to keep track of already placed pods on nodes in order to allow calculating the remaining resources on a node. """ for container in pod.spec.containers: image_name = normalize_image_name(container.image) if image_name not in self.images_on_nodes[node.name]: image_state = self.get_image_state(image_name) image_state.num_nodes += 1 images_on_nodes = self.images_on_nodes[node.name] images_on_nodes[image_name] = image_state self.images_on_nodes[node.name][ image_name] = image_state # FIXME: isn't this the same statement? required_cpu_millis = container.resources.requests.get( 'cpu', container.resources.default_milli_cpu_request) required_memory = container.resources.requests.get( 'memory', container.resources.default_mem_request) node.allocatable.cpu_millis -= required_cpu_millis node.allocatable.memory -= required_memory node.pods.append(pod)
def estimate(self, context: ClusterContext, pod: Pod, scheduling_result: SchedulingResult) -> Tuple[str, str]: if scheduling_result is None or scheduling_result.suggested_host is None: return 'startup_time', None host = scheduling_result.suggested_host.name host_type = host[host.rindex('_') + 1:] # For the startup time the bandwidth to the registry is necessary bandwidth = context.get_bandwidth_graph()[host]['registry'] startup_time = 0 for container in pod.spec.containers: image = container.image image_present = normalize_image_name( image) not in scheduling_result.needed_images data = self.durations.query(f'host == "{host_type}" and ' f'image == "{image}" and ' f'bandwidth == "{bandwidth}" and ' f'image_present == {image_present}') if data.empty: raise ValueError('no data for %s, %s, %s, %s' % (host_type, image, bandwidth, image_present)) else: sample = data['duration'].sample() startup_time += sample.values[0] return 'startup_time', str(startup_time)
def estimate(self, context: ClusterContext, pod: Pod, scheduling_result: SchedulingResult) -> Tuple[str, str]: if scheduling_result is None or scheduling_result.suggested_host is None: return 'startup_time', None host = scheduling_result.suggested_host.name host_arch = scheduling_result.suggested_host.labels[ 'beta.kubernetes.io/arch'] host_type = host[host.rindex('_') + 1:] bandwidth = int( 1.25e7 ) # always assume 100mbit (which is probably the downlink we have @ DSG) startup_time = 0 for container in pod.spec.containers: image = container.image image_name = normalize_image_name(image) image_present = image_name not in scheduling_result.needed_images image_time = self.get_sampler(host_type, image, image_present).sample() if not image_present: image_size = context.get_image_state( image_name).size[host_arch] dl_time = image_size / bandwidth image_time = max(0, image_time - dl_time) startup_time += image_time return 'startup_time', str(startup_time)
def estimate(self, context: ClusterContext, pod: Pod, scheduling_result: SchedulingResult) -> Tuple[str, str]: if scheduling_result is None or scheduling_result.suggested_host is None: return 'startup_time', None host = scheduling_result.suggested_host.name host_type = host[host.rindex('_') + 1:] # For the startup time the bandwidth to the registry is necessary bandwidth = context.get_bandwidth_graph()[host]['registry'] startup_time = 0 for container in pod.spec.containers: image = container.image image_present = normalize_image_name( image) not in scheduling_result.needed_images k = (host_type, image, image_present, bandwidth) if k not in self.startup_time_samplers: raise ValueError(k) startup_time += self.startup_time_samplers[k].sample() return 'startup_time', str(startup_time)
def remove_pod_images_from_node(self, pod: Pod, node: Node): for container in pod.spec.containers: image_name = normalize_image_name(container.image) if image_name in self.images_on_nodes[node.name]: image_state = self.get_image_state(image_name) image_state.num_nodes -= 1 del self.images_on_nodes[node.name][image_name]
def sum_image_scores(self, context: ClusterContext, pod: Pod, node: Node) -> int: calc_sum = 0 total_num_nodes = len(context.list_nodes()) if pod.spec.containers is not Node: for container in pod.spec.containers: try: image_state: ImageState = context.images_on_nodes[ node.name][normalize_image_name(container.image)] calc_sum += self.scaled_image_score( node, image_state, total_num_nodes) except KeyError: pass return calc_sum
def get_size(self, context: ClusterContext, pod: Pod, node: Node) -> int: size = 0 node_arch = node.labels['beta.kubernetes.io/arch'] # determines for each container the size of the container image for the architecture of the node for container in pod.spec.containers: image_name = normalize_image_name(container.image) if image_name in context.images_on_nodes[node.name]: # node already has the image continue image_states = context.get_image_state(image_name) if node_arch not in image_states.size: replacement = list(image_states.size.keys())[0] logger.error( "could not resolve node arch '%s' for image '%s', estimating using '%s' instead", node_arch, image_name, replacement) node_arch = replacement size += context.get_image_state(image_name).size[node_arch] return size
def simulate_docker_pull(env: FaasSimEnvironment, replica: FunctionReplica, result: SchedulingResult): started = env.now # TODO: there's a lot of potential to improve fidelity here: consider image layers, simulate extraction time, etc. node = result.suggested_host sizes = env.cluster.get_image_sizes(replica.function.pod, node.labels['beta.kubernetes.io/arch']) # needed image names are already normalized by the scheduler required = sum([size for image, size in sizes.items() if normalize_image_name(image) in result.needed_images]) if required <= 0: return # FIXME: crude simulation of layer sharing (90% across images is shared) num_images = len(env.cluster.images_on_nodes[replica.node.name]) - 1 if num_images > 0: required = required * 0.1 route = env.topology.get_route(env.topology.get_registry(), node) flow = SafeFlow(env, required, route) yield flow.start() for hop in route.hops: env.metrics.log_network(required, 'docker_pull', hop) env.metrics.log_flow(required, env.now - started, route.source, route.destination, 'docker_pull')