def start_container_policy(host: Host, NAHM): sorted_list = sorted(host.container_inactive_list, key=lambda container: container.getInactiveTime(), reverse=True) index = 0 while (NAHM > 0) and (index < len(sorted_list)): container = sorted_list[index] if (container.getContainerState() == 'QUEUED'): if (container.getMinMemoryLimitPG() <= NAHM) and ( host.has_free_cores() >= container.request_cpus): cpu_allocation = host.get_available_cores( container.request_cpus) if parser['Container']['type'] == 'LXC': container.startContainer() container.setMemLimit2(container.getMinMemoryLimitPG()) container.setCPUCores(cpu_allocation) elif parser['Container']['type'] == 'DOCKER': swap = container.getMaxMemoryLimit() + psutil.swap_memory( ).total container.startContainer( memory_limit=container.request_mem, swap_limit=swap, cpuset=cpu_allocation) host.container_active_list.append(container) host.container_inactive_list.remove(container) logging.info( 'Container %s moved during Start from Inactive -> Active with status %s.', container.name, container.state) container.inactive_time = 0 NAHM -= container.getMemoryLimitPG() logging.info('new NAHM\u2193: %d', NAHM) index += 1
def resume_policy(host: Host): for container in host.container_inactive_list: if (container.state == 'SUSPENDED'): if (container.getMemoryState() == 'STEAL') and (container.getMemoryStateTime() > 10): if (host.has_free_cores() >= container.request_cpus): cpu_allocation = host.get_available_cores( container.request_cpus) container.setContainerState('RESUMING') host.container_active_list.append(container) host.container_inactive_list.remove(container) container.inactive_time = 0 print('Resuming container:', container.name) #Thread(target=container.resumeContainer, args=(cpu_allocation,), daemon=True).start() ctx = mp.get_context('spawn') proc = ctx.Process(target=container.resumeContainer, args=(cpu_allocation, )) proc.start() log_plc.info( 'Container %s moved during Resume from Inactive -> Active with status %s.', container.name, container.state)
def qos_start_policy(self, host: Host): sorted_list = sorted(host.container_inactive_list, key=lambda container: container.getInactiveTime(), reverse=True) index = 0 log_basic.info('Available NAHM: %d', self.NAHM) if self.level == 'GUARANTEED': while (self.NAHM > 0) and (index < len(sorted_list)): container = sorted_list[index] if (container.getContainerState() == 'QUEUED'): if (container.getMaxMemoryLimitPG() <= self.NAHM) and ( host.has_free_cores() >= container.request_cpus): cpu_allocation = host.get_available_cores( container.request_cpus) if parser['Container']['type'] == 'LXC': container.startContainer() container.setMemLimit2( container.getMaxMemoryLimitPG()) container.setCPUCores(cpu_allocation) host.container_active_list.append(container) host.container_inactive_list.remove(container) log_basic.info( 'Container %s moved during Start from Inactive -> Active with status %s.', container.name, container.state) container.inactive_time = 0 self.NAHM -= container.getMemoryLimitPG() log_basic.info('C: %s, CML: %d, new NAHM\u2193: %d', container.name, container.getMemoryLimitPG(), self.NAHM) index += 1 elif self.level == 'BEST': limit_division = round(self.NAHM / host.inactive_list_counter()) while (self.NAHM > 0) and (index < len(sorted_list)): container = sorted_list[index] if (container.getContainerState() == 'QUEUED'): if (container.getMinMemoryLimitPG() <= limit_division) and (host.has_free_cores() >= container.request_cpus): cpu_allocation = host.get_available_cores( container.request_cpus) if parser['Container']['type'] == 'LXC': container.startContainer() container.setMemLimit2(limit_division) container.setCPUCores(cpu_allocation) host.container_active_list.append(container) host.container_inactive_list.remove(container) log_basic.info( 'Container %s moved during Start from Inactive -> Active with status %s.', container.name, container.state) container.inactive_time = 0 self.NAHM -= container.getMemoryLimitPG() log_basic.info('C: %s, CML: %d, new NAHM\u2193: %d', container.name, container.getMemoryLimitPG(), self.NAHM) index += 1 elif self.level == 'FAIR': limit_division = round(self.NAHM / host.inactive_list_counter()) while (self.NAHM > 0) and (index < len(sorted_list)): container = sorted_list[index] if (container.getContainerState() == 'QUEUED'): if (container.getMinMemoryLimitPG() <= limit_division) and (host.has_free_cores() >= container.request_cpus): cpu_allocation = host.get_available_cores( container.request_cpus) if container.getMaxMemoryLimitPG() > limit_division: new_limit = limit_division else: new_limit = container.getMaxMemoryLimitPG() if parser['Container']['type'] == 'LXC': container.startContainer() container.setMemLimit2(new_limit) container.setCPUCores(cpu_allocation) host.container_active_list.append(container) host.container_inactive_list.remove(container) log_basic.info( 'Container %s moved during Start from Inactive -> Active with status %s.', container.name, container.state) container.inactive_time = 0 self.NAHM -= container.getMemoryLimitPG() log_basic.info('C: %s, CML: %d, new NAHM\u2193: %d', container.name, container.getMemoryLimitPG(), self.NAHM) index += 1
def memory_shaping_policy(host: Host): need_list = [] urgent_list = [] stable_list = [] mem_need = 0 mem_urgent_need = 0 # Classification: # Calculate memory consumption based in the historical info window # Categorize the memory comportament and organize in lists print('Classification Phase', flush=True) for container in host.container_active_list: if container.state == 'RUNNING': consumption = database.get_container_memory_consumption2( container.name, 10) container.setMemoryState(consumption) mem_limit = container.getMemoryLimit() mem_used = container.getUsedMemory() print('Container: ', container.name, ' Using: ', mem_used, ' Limit: ', mem_limit, ' Mem_State: ', container.mem_state, ' MU: ', consumption['memory'], ' SU: ', consumption['swap'], 'MJF: ', consumption['major_faults']) if container.getMemoryState() == 'RISING': delta = consumption['memory'] + consumption['swap'] #if (container.getUsedMemory() + delta) >= container.getMemoryLimit(): if (mem_used + delta) >= mem_limit: need_list.append({'container': container, 'delta': delta}) logging.info( 'Need Container: %s, Using: %d, Delta: %d, Limit: %d', container.name, mem_used, delta, mem_limit) mem_need += delta if consumption['major_faults'] > 0: #delta = (consumption['page_faults'] + consumption['major_faults']) * mmap.PAGESIZE delta = consumption['major_faults'] * mmap.PAGESIZE urgent_list.append({ 'container': container, 'delta': delta }) logging.info( 'Urgent Container: %s, Using: %d, Delta: %d, Limit: %d', container.name, mem_used, delta, mem_limit) mem_urgent_need += delta else: if container.getMemoryStateTime() > 10: stable_list.append(container) logging.info('Stable Container: %s, Using: %d, Limit: %d', container.name, mem_used, mem_limit) # First Recover: # Recover some memory from FALLING and STABLE containers with Threshold less than 90% available_limit = host.get_available_limit() logging.info('Available Limit to be distribute: %d', available_limit) print('Light Recovery Phase', flush=True) print('Available: ', available_limit, ' Need: ', mem_need, ' Urgent: ', mem_urgent_need, flush=True) for container in stable_list: if container.getMemoryThreshold() < 90: delta = container.getMemoryLimit() // 10 container.setMemLimit(limit=str(container.mem_limit - delta), swap=str(container.mem_swap_limit - delta)) available_limit += delta print('Available: ', available_limit, flush=True) # Distribute Memory # Distribute memory over the containers if the request is lower than the available memory limit print('Distribution Phase', flush=True) print('Available: ', available_limit, ' Need: ', mem_need, ' Urgent: ', mem_urgent_need, flush=True) if (mem_need > 0) and (mem_need <= available_limit): for item in need_list: container = item['container'] delta = item['delta'] old_limit = container.getMemoryLimit() old_swap_limit = container.getSwapLimit() container.setMemLimit(limit=str(old_limit + delta), swap=str(old_swap_limit + delta)) print('Container ', container.name, ' updated limit to ', old_limit + delta, flush=True) available_limit -= delta print('Available: ', available_limit, flush=True) elif (mem_urgent_need > 0) and (mem_urgent_need <= available_limit): for item in urgent_list: container = item['container'] delta = item['delta'] old_limit = container.getMemoryLimit() old_swap_limit = container.getSwapLimit() container.setMemLimit(limit=str(container.mem_limit + delta), swap=str(container.mem_swap_limit + delta)) print('Container ', container.name, ' updated limit to ', old_limit + delta, flush=True) available_limit -= delta print('Available: ', available_limit, flush=True) elif (mem_urgent_need > 0): print( 'Critical State 1: Insufficient Memory for all Urgent Containers') urgent_list.sort(key=lambda item: item['container'].getRunningTime(), reverse=True) index = 0 while (available_limit > 0) and (index < len(urgent_list)): container = urgent_list[index]['container'] needed = urgent_list[index]['delta'] print('Container: ', container.name, ' Needed: ', needed) if (available_limit - needed) > 0: old_limit = container.getMemoryLimit() old_swap_limit = container.getSwapLimit() container.setMemLimit(limit=str(container.mem_limit + needed), swap=str(container.mem_swap_limit + needed)) print('Container ', container.name, ' updated limit to ', old_limit + delta, flush=True) available_limit -= needed print('Available: ', available_limit, flush=True) index += 1 # Activate recover memory policy from stable # Force to use swap for good print('Heavy Recovery Phase', flush=True) print('Available: ', available_limit, ' Need: ', mem_need, ' Urgent: ', mem_urgent_need, flush=True) steal_check = False if (available_limit <= mem_need): if stable_list: for container in stable_list: delta = int(container.mem_stats['inactive_anon']) if delta > 0: container.setMemLimit( limit=str(container.mem_limit - delta), swap=str(container.mem_swap_limit - delta)) available_limit += delta print('Available: ', available_limit, flush=True) steal_check = True if (available_limit <= mem_need): print('Critical State 2: Suspend a Container') sorted_list = sorted(host.container_active_list, key=lambda container: container.getRunningTime()) index = 0 while (available_limit <= mem_need) and (index < len(sorted_list)): container = sorted_list[index] if container not in stable_list: available_limit += container.getMemoryLimit() #Parallel Suspension Thread Creation and Execution container.state = 'SUSPENDING' core_list = container.cpu_set.split() for core in core_list: host.core_allocation[int(core)] = False container.inactive_time = datetime.now() host.container_inactive_list.append(container) host.container_active_list.remove(container) logging.info( 'Container %s moved during Suspension from Active -> Inactive with status %s.', container.name, container.state) print('Container: ', container.name, ' State: ', container.state) print('Available: ', available_limit, flush=True) Thread(target=container.suspendContainer).start() steal_check = True index += 1 # Start new containers or restart suspended containers if steal_check == False: print('Start/Resume Phase', flush=True) print('Available: ', available_limit, ' Need: ', mem_need, ' Urgent: ', mem_urgent_need, flush=True) #sorted_list = sorted(host.container_inactive_list, key=lambda container: container.start_time, reverse=True) sorted_list = sorted(host.container_inactive_list, key=lambda container: container.getInactiveTime(), reverse=True) print('Lista Ordenada:', sorted_list) index = 0 while (available_limit > 0) and (index < len(sorted_list)): container = sorted_list[index] if (container.state == 'SUSPENDED'): if (container.getMemoryLimit() <= available_limit) and ( host.has_free_cores() >= container.request_cpus): print('Restart container ', container.name) cpu_allocation = host.get_available_cores( container.request_cpus) container.state = 'RESUMING' Thread(target=container.resumeContainer, args=(cpu_allocation, )).start() host.container_active_list.append(container) host.container_inactive_list.remove(container) logging.info( 'Container %s moved during Resume from Inactive -> Active with status %s.', container.name, container.state) container.inactive_time = 0 available_limit -= container.mem_limit print('Available: ', available_limit, flush=True) elif (container.state in ['CREATED', 'NEW']): if (container.request_mem <= available_limit) and ( host.has_free_cores() >= container.request_cpus): cpu_allocation = host.get_available_cores( container.request_cpus) swap = container.request_mem + psutil.swap_memory().total if (cpu_allocation != ''): if parser['Container']['type'] == 'LXC': container.startContainer() container.setMemLimit(str(container.request_mem), str(swap)) container.setCPUCores(cpu_allocation) elif parser['Container']['type'] == 'DOCKER': container.startContainer( memory_limit=container.request_mem, swap_limit=swap, cpuset=cpu_allocation) host.container_active_list.append(container) host.container_inactive_list.remove(container) logging.info( 'Container %s moved during Start from Inactive -> Active with status %s.', container.name, container.state) container.inactive_time = 0 available_limit -= container.request_mem print('Available: ', available_limit, flush=True) index += 1