def qos_recovery_limit_policy(self, host: Host): limit_division = round(self.NAHM / host.active_list_counter()) for container in host.container_active_list: mem_limit = container.getMemoryLimitPG() max_limit = container.getMaxMemoryLimitPG() log_basic.info('C: %s, CML: %d', container.name, mem_limit) if self.level == 'FAIR': if ((mem_limit + limit_division) > max_limit) and (mem_limit != max_limit): self.NAHM -= max_limit - mem_limit container.setMemLimit2(max_limit) log_basic.info( 'Readjusting to Max Container: %s, new CML T1\u25B2: %d, new NAHM: %d\u25BC', container.name, container.getMemoryLimitPG(), self.NAHM) elif (mem_limit + limit_division) < max_limit: new_limit = mem_limit + limit_division self.NAHM -= limit_division container.setMemLimit2(new_limit) log_basic.info( 'Readjusting Container: %s, new CML T1\u25B2: %d, new NAHM\u25BC: %d', container.name, container.getMemoryLimitPG(), self.NAHM) elif self.level == 'BEST': new_limit = mem_limit + limit_division self.NAHM -= limit_division container.setMemLimit2(new_limit) log_basic.info( 'Readjusting Container: %s, new CML T1\u25B2: %d, new NAHM\u25BC: %d', container.name, container.getMemoryLimitPG(), self.NAHM)
def create_host(self, ip_addr): """Create a new host""" #print('CREATING NEW HOST WITH IP ' + str(ip_addr)) if self.ip_exists(ip_addr): #print('\tIP ALREADY EXISTS') return self.get_host_by_ip(ip_addr) new_host = Host() new_host.id += str(self.host_counter) new_host.ip = ip_addr new_host.add_ip(ip_addr) self.host_counter += 1 self.all_hosts.append(new_host) self.add_to_host_list(new_host) return new_host
def start_container_policy(host: Host, NAHM): sorted_list = sorted(host.container_inactive_list, key=lambda container: container.getInactiveTime(), reverse=True) index = 0 while (NAHM > 0) and (index < len(sorted_list)): container = sorted_list[index] if (container.getContainerState() == 'QUEUED'): if (container.getMinMemoryLimitPG() <= NAHM) and ( host.has_free_cores() >= container.request_cpus): cpu_allocation = host.get_available_cores( container.request_cpus) if parser['Container']['type'] == 'LXC': container.startContainer() container.setMemLimit2(container.getMinMemoryLimitPG()) container.setCPUCores(cpu_allocation) elif parser['Container']['type'] == 'DOCKER': swap = container.getMaxMemoryLimit() + psutil.swap_memory( ).total container.startContainer( memory_limit=container.request_mem, swap_limit=swap, cpuset=cpu_allocation) host.container_active_list.append(container) host.container_inactive_list.remove(container) logging.info( 'Container %s moved during Start from Inactive -> Active with status %s.', container.name, container.state) container.inactive_time = 0 NAHM -= container.getMemoryLimitPG() logging.info('new NAHM\u2193: %d', NAHM) index += 1
def resume_policy(host: Host): for container in host.container_inactive_list: if (container.state == 'SUSPENDED'): if (container.getMemoryState() == 'STEAL') and (container.getMemoryStateTime() > 10): if (host.has_free_cores() >= container.request_cpus): cpu_allocation = host.get_available_cores( container.request_cpus) container.setContainerState('RESUMING') host.container_active_list.append(container) host.container_inactive_list.remove(container) container.inactive_time = 0 print('Resuming container:', container.name) #Thread(target=container.resumeContainer, args=(cpu_allocation,), daemon=True).start() ctx = mp.get_context('spawn') proc = ctx.Process(target=container.resumeContainer, args=(cpu_allocation, )) proc.start() log_plc.info( 'Container %s moved during Resume from Inactive -> Active with status %s.', container.name, container.state)
def qos_share_limit_policy(self, host: Host): total_containers = host.active_list_counter( ) + host.inactive_list_counter() total_max_memory = host.get_max_usable_memoryPG() if self.level == 'BEST': if (host.active_list_counter() > 0): total_used = host.get_container_total_usedPG() local_NAHM = total_max_memory - total_used log_basic.info('Effective Not Used NAHM: %d', local_NAHM) shared_local_NAHM = round(local_NAHM / total_containers) for container in host.container_active_list: if (container.getContainerState() == 'RUNNING') and (shared_local_NAHM > 0): mem_used = container.getUsedMemoryPG() mem_limit = container.getMemoryLimitPG() log_basic.info('C: %s, CMU: %d, CML: %d', container.name, mem_used, mem_limit) new_limit = mem_used + shared_local_NAHM local_NAHM -= shared_local_NAHM container.setMemLimit2(new_limit) log_basic.info( 'Best Effort Adjusts Container: %s, new CML: %d', container.name, container.getMemoryLimitPG()) self.NAHM = local_NAHM log_basic.info('Remain NAHM to start new containers: %d', self.NAHM) elif self.level == 'FAIR': new_limit = round(total_max_memory / total_containers) if host.active_list_counter() > 0: for container in host.container_active_list: if container.getContainerState() == 'RUNNING': mem_limit = container.getMemoryLimitPG() log_basic.info('C: %s, CML: %d', container.name, mem_limit) if new_limit < container.getMinMemoryLimitPG(): new_limit = container.getMinMemoryLimitPG() delta = mem_limit - new_limit self.NAHM += delta container.setMemLimit2(new_limit) log_basic.info( 'Fair Share Stolen Container: %s, Delta: %d, new CML T1\u25BC: %d, new NAHM\u25B2: %d', container.name, delta, container.getMemoryLimitPG(), self.NAHM)
def suspend_pressure_policy(host: Host): for container in host.container_active_list: if (container.state == 'RUNNING') and (not container.mem_steal_check): if container.getUsedMemory() >= container.getMaxMemoryLimit(): container.setContainerState('SUSPENDING') core_list = container.cpu_set.split() for core in core_list: host.core_allocation[int(core)] = False container.inactive_time = datetime.now() host.container_inactive_list.append(container) host.container_active_list.remove(container) print('Suspending container:', container.name) container.mem_steal_check = True #Thread(target = container.suspendContainer, daemon=True).start() ctx = mp.get_context('spawn') proc = ctx.Process(target=container.suspendContainer) proc.start() log_plc.info( 'Container %s moved during Suspension from Active -> Inactive with status %s.', container.name, container.state)
def qos_start_policy(self, host: Host): sorted_list = sorted(host.container_inactive_list, key=lambda container: container.getInactiveTime(), reverse=True) index = 0 log_basic.info('Available NAHM: %d', self.NAHM) if self.level == 'GUARANTEED': while (self.NAHM > 0) and (index < len(sorted_list)): container = sorted_list[index] if (container.getContainerState() == 'QUEUED'): if (container.getMaxMemoryLimitPG() <= self.NAHM) and ( host.has_free_cores() >= container.request_cpus): cpu_allocation = host.get_available_cores( container.request_cpus) if parser['Container']['type'] == 'LXC': container.startContainer() container.setMemLimit2( container.getMaxMemoryLimitPG()) container.setCPUCores(cpu_allocation) host.container_active_list.append(container) host.container_inactive_list.remove(container) log_basic.info( 'Container %s moved during Start from Inactive -> Active with status %s.', container.name, container.state) container.inactive_time = 0 self.NAHM -= container.getMemoryLimitPG() log_basic.info('C: %s, CML: %d, new NAHM\u2193: %d', container.name, container.getMemoryLimitPG(), self.NAHM) index += 1 elif self.level == 'BEST': limit_division = round(self.NAHM / host.inactive_list_counter()) while (self.NAHM > 0) and (index < len(sorted_list)): container = sorted_list[index] if (container.getContainerState() == 'QUEUED'): if (container.getMinMemoryLimitPG() <= limit_division) and (host.has_free_cores() >= container.request_cpus): cpu_allocation = host.get_available_cores( container.request_cpus) if parser['Container']['type'] == 'LXC': container.startContainer() container.setMemLimit2(limit_division) container.setCPUCores(cpu_allocation) host.container_active_list.append(container) host.container_inactive_list.remove(container) log_basic.info( 'Container %s moved during Start from Inactive -> Active with status %s.', container.name, container.state) container.inactive_time = 0 self.NAHM -= container.getMemoryLimitPG() log_basic.info('C: %s, CML: %d, new NAHM\u2193: %d', container.name, container.getMemoryLimitPG(), self.NAHM) index += 1 elif self.level == 'FAIR': limit_division = round(self.NAHM / host.inactive_list_counter()) while (self.NAHM > 0) and (index < len(sorted_list)): container = sorted_list[index] if (container.getContainerState() == 'QUEUED'): if (container.getMinMemoryLimitPG() <= limit_division) and (host.has_free_cores() >= container.request_cpus): cpu_allocation = host.get_available_cores( container.request_cpus) if container.getMaxMemoryLimitPG() > limit_division: new_limit = limit_division else: new_limit = container.getMaxMemoryLimitPG() if parser['Container']['type'] == 'LXC': container.startContainer() container.setMemLimit2(new_limit) container.setCPUCores(cpu_allocation) host.container_active_list.append(container) host.container_inactive_list.remove(container) log_basic.info( 'Container %s moved during Start from Inactive -> Active with status %s.', container.name, container.state) container.inactive_time = 0 self.NAHM -= container.getMemoryLimitPG() log_basic.info('C: %s, CML: %d, new NAHM\u2193: %d', container.name, container.getMemoryLimitPG(), self.NAHM) index += 1
def container_manager2(shared_list: list, entry_queue: mp.Queue): # Algorithm 1 logCM = logging.getLogger('Container_Manager') logCM.setLevel(logging.INFO) format = logging.Formatter(fmt='%(asctime)s %(levelname)s %(message)s', datefmt='%d/%m/%Y %H:%M:%S') file_handler = logging.FileHandler(filename='./log/container-manager2.log', mode='a') file_handler.setFormatter(format) file_handler.setLevel(logging.INFO) logCM.addHandler(file_handler) config = ConfigParser() config.read('./config/local-config.txt') host = Host() scheduler = VEMOC() scheduler.long_interval = int(config['Scheduler']['sched_interval']) scheduler.short_interval = max(int(scheduler.long_interval / 2), 3) scheduler.mem_write_rate = 9999 scheduler.swapout_rate = 33000 scheduler.swapin_rate = 7000 #scheduler.MUE = float(config['Scheduler']['MUE']) HMUT = round((22.8 * 2**30) / mmap.PAGESIZE) #maxMU = float(config['Scheduler']['MUE']) maxMU = 0.997 scheduler.latency = 0.1 sched_counter = 1 while True: start_time = datetime.now() logCM.info('========================================================') logCM.info('Sched counter: %d', sched_counter) logCM.info('Sched init timestamp: %s', start_time) scheduler.reset() scheduler.sched_interval = scheduler.long_interval scheduler.sched_start_time = start_time # Add Created Containers while not entry_queue.empty(): container = entry_queue.get() logCM.info('New Container: %s', container.name) container.inactive_time = datetime.now() container.setContainerState('QUEUED') host.container_inactive_list.append(container) # Count Inactive Container Memory inactive_memory = 0 host.update() host.update_containers2() #host.remove_finished_containers() for container in host.container_inactive_list: if container.state == 'QUEUED': inactive_memory += container.getMinMemoryLimitPG() elif container.state == 'SUSPENDED': inactive_memory += container.getMemoryLimitPG( ) + container.getDeltaMemory() #scheduler.setNAHM(host.get_available_limit()) #HAM = host.memory.available #TCML = host.get_container_total_limit() TCML, NAHM, HAM = host.get_host_memory_info() scheduler.setNAHM(NAHM) #scheduler.MUE = min(maxMU, maxMU - (HAM + TCML - HMUT) / (20 * 10 ** 9)) if HAM < 262144: scheduler.setMUE( min( maxMU, maxMU - (HAM + TCML - HMUT) / ((20 * 10**9) / mmap.PAGESIZE))) else: scheduler.setMUE(maxMU) scheduler.spare_mem_cap = round( scheduler.mem_write_rate * (scheduler.sched_interval + scheduler.latency)) logCM.info('NAHM: %d, HAM: %d, TCML: %d', scheduler.NAHM, HAM, TCML) logCM.info('MUE: %f, spare_mem_cap: %d, latency: %f', scheduler.MUE, scheduler.spare_mem_cap, scheduler.latency) logCM.info('Active List: ' + str(host.container_active_list)) logCM.info('Inactive List: ' + str(host.container_inactive_list)) # Call algorithm 2 if len(host.container_active_list) > 0: logCM.info( '---------------------------------------------------------') logCM.info('Executing Demand Estimation:') scheduler.mem_demand_estimation2(host) # Call algorithm 5 if scheduler.getNAHM() < ( scheduler.getMemoryNeeded() + scheduler.getMemoryUrgent() + scheduler.getPauseDemand() + inactive_memory): logCM.info( '---------------------------------------------------------') logCM.info('Executing Passive Memory Reduction:' + str(scheduler.provider_list)) scheduler.passive_memory_reduction2() # Call algorithm 6 if scheduler.getNAHM() < (scheduler.getMemoryNeeded() + scheduler.getMemoryUrgent() + scheduler.getPauseDemand()): logCM.info( '---------------------------------------------------------') logCM.info('Executing Active Memory Reduction:' + str(scheduler.provider_list)) scheduler.active_memory_recovery3() # Call algorithm 7 if (scheduler.getMemoryUrgent() > 0) or (scheduler.getMemoryNeeded() > 0) or (scheduler.getPauseDemand() > 0): logCM.info( '---------------------------------------------------------') logCM.info('Executing Container Limits Adjusts:') scheduler.increase_container_memory_limits(host) #end_lat = datetime.now() #scheduler.latency = (end_lat - start_time).total_seconds() # Call algorithm 8 if (scheduler.getMemoryUrgent() > 0) or (scheduler.getMemoryNeeded() > 0) or (scheduler.getPauseDemand() > 0): logCM.info( '---------------------------------------------------------') logCM.info('Executing Pause/Suspend Running Containers:') scheduler.pause_suspend_running_containers(host) # Call algorithm 10 elif (scheduler.getStealCheck() == False) and (len(host.container_inactive_list) != 0): logCM.info( '---------------------------------------------------------') logCM.info('Executing Start/Resume Inactive Containers:') scheduler.start_resume_inactive_container(host) # Host Updates host.update() host.update_containers2() shared_list[0] = host.container_active_list shared_list[1] = host.container_inactive_list shared_list[2] = host.core_allocation # Calculate process sleep time stop_time = datetime.now() #sched_time = (stop_time - start_time).total_seconds() scheduler.latency = (stop_time - start_time).total_seconds() logCM.info('Sched end timestamp: %s', stop_time) logCM.info('New Sched Latency: %f', scheduler.latency) #logCM.info('Sched Time: %f seconds, Sleep time: %f seconds', sched_time, (scheduler.sched_interval - sched_time)) logCM.info('Sleep time: %f seconds', scheduler.sched_interval - scheduler.latency - 0.007) logCM.info('========================================================') sched_counter += 1 #if (sched_time < scheduler.sched_interval): # time.sleep(scheduler.sched_interval - sched_time) if (scheduler.latency < scheduler.sched_interval): time.sleep(scheduler.sched_interval - scheduler.latency - 0.007)
def container_manager(shared_list: list, entry_queue: mp.Queue): logCM = logging.getLogger('Container_Manager') logCM.setLevel(logging.INFO) format = logging.Formatter(fmt='%(asctime)s %(levelname)s %(message)s', datefmt='%d/%m/%Y %H:%M:%S') file_handler = logging.FileHandler(filename='./log/container-manager.log', mode='a') file_handler.setFormatter(format) file_handler.setLevel(logging.INFO) logCM.addHandler(file_handler) host = Host() #cooldown_list = [] while True: logCM.debug('Starting Container Manager') # Add Created Containers while not entry_queue.empty(): container = entry_queue.get() logCM.info('New Container: %s', container.name) container.inactive_time = datetime.now() container.setContainerState('QUEUED') host.container_inactive_list.append(container) host.update() host.update_containers() #for cooldown in cooldown_list: # if not host.is_active_container(cooldown['name']): # cooldown_list.remove(cooldown) #free_mem = host.get_available_memory() free_mem = host.get_available_limit() logCM.info('Free Memory Before Policy: %d MiB', free_mem // 2**20) #free_mem = policies.ED_policy(host, free_mem, cooldown_list) if (free_mem > 0 ) and host.has_free_cores() and host.has_inactive_containers(): policies.start_container_policy(host, free_mem) logCM.info('Free Memory After Policy: %d MiB', free_mem // 2**20) host.update() host.update_containers() shared_list[0] = host.container_active_list shared_list[1] = host.container_inactive_list shared_list[2] = host.core_allocation logCM.debug('Container Manager Sleeping') time.sleep(5)
def host_monitor(shared_list: list): logging.basicConfig(filename='./log/host-monitor.log', filemode='a', format='%(asctime)s %(levelname)s:%(message)s', datefmt='%d/%m/%Y %H:%M:%S', level=logging.INFO) host = Host() while True: logging.info( '========================================================') logging.debug('Starting Host Monitor') start_time = datetime.now() try: update_lat_init = datetime.now() host.update() host.container_active_list = shared_list[0] host.container_inactive_list = shared_list[1] #host.core_allocation = shared_list[2] #host.update_containers() host.update_containers2() update_lat_end = datetime.now() logging.info('Get Host + Containers Info Latency: %f', (update_lat_end - update_lat_init).total_seconds()) #print('Monitor:\n', 'AC:', host.container_active_list, 'IC:', host.container_inactive_list, 'Core:', host.core_allocation) publish_lat_init = datetime.now() container_list = host.container_active_list + host.container_inactive_list logging.info('Container List:' + str(container_list)) for container in container_list: if container.checkContainer(): logging.debug('Publish Container %s Info', container.name) #database.publish_local_container_history(container) nosqlbase.publish_container_history(container) publish_lat_end = datetime.now() logging.info('Local publish Container Info Latency: %f', (publish_lat_end - publish_lat_init).total_seconds()) send_lat_init = datetime.now() logging.debug('Send Monitoring Data to Manager') logging.debug('Sended Host Data: %s', vars(host)) communication.send_monitor_data(host) send_lat_end = datetime.now() logging.info('Send Host Info to CM Latency: %f', (send_lat_end - send_lat_init).total_seconds()) host.remove_finished_containers() #shared_list[2] = host.core_allocation except Exception as err: logging.error('Monitor error: %s', err) stop_time = datetime.now() monitor_time = (stop_time - start_time).total_seconds() logging.info('Monitor Total Time: %f, Next Sleep Time: %f', monitor_time, (1 - monitor_time)) logging.info( '========================================================') if monitor_time < 1: logging.debug('Host Monitor Sleeping') time.sleep(1 - monitor_time)
def no_manager(shared_list: list, entry_queue: mp.Queue): logNM = logging.getLogger('Container_Manager') logNM.setLevel(logging.INFO) format = logging.Formatter(fmt='%(asctime)s %(levelname)s %(message)s', datefmt='%d/%m/%Y %H:%M:%S') file_handler = logging.FileHandler(filename='./log/no-manager.log', mode='a') file_handler.setFormatter(format) file_handler.setLevel(logging.DEBUG) logNM.addHandler(file_handler) sched = Basic() config = ConfigParser() config.read('./config/local-config.txt') sched.setLevel(config['QoS']['level']) host = Host() sched_counter = 1 while True: start_time = datetime.now() logNM.info('========================================================') logNM.info('Sched counter: %d', sched_counter) logNM.info('Sched init timestamp: %s', start_time) print(sched_counter, datetime.now()) # Add Created Containers while not entry_queue.empty(): container = entry_queue.get() logNM.info('New Container: %s', container.name) container.inactive_time = datetime.now() container.setContainerState('QUEUED') host.container_inactive_list.append(container) host.update() host.update_containers2() TCML, NAHM, HAM = host.get_host_memory_info() sched.setNAHM(NAHM) logNM.info('NAHM: %d, HAM: %d, TCML: %d', sched.getNAHM(), HAM, TCML) logNM.info('Active List: %s', host.container_active_list) logNM.info('Inactive List: %s', host.container_inactive_list) logNM.info('QoS Test: %s', sched.getLevel()) if (host.inactive_list_counter() != 0): logNM.info( '---------------------------------------------------------') logNM.info('Executing Limit Redistribution Policy:') sched.qos_share_limit_policy(host) logNM.info( '---------------------------------------------------------') logNM.info('Executing Start Inactive Containers:') sched.qos_start_policy(host) else: if (sched.getNAHM() > 0) and (sched.getLevel() in [ 'BEST', 'FAIR' ]) and (host.active_list_counter() > 0): logNM.info( '---------------------------------------------------------' ) logNM.info('Executing NAHM Redistribution:') sched.qos_recovery_limit_policy(host) host.update() host.update_containers2() shared_list[0] = host.container_active_list shared_list[1] = host.container_inactive_list stop_time = datetime.now() logNM.info('Sched end timestamp: %s', stop_time) latency = (stop_time - start_time).total_seconds() logNM.info('New Sched Latency: %f', latency) logNM.info('Sleep time: %f seconds', 1 - latency) logNM.info('========================================================') sched_counter += 1 if (latency < 1): time.sleep(1 - latency)
def memory_shaping_policy(host: Host): need_list = [] urgent_list = [] stable_list = [] mem_need = 0 mem_urgent_need = 0 # Classification: # Calculate memory consumption based in the historical info window # Categorize the memory comportament and organize in lists print('Classification Phase', flush=True) for container in host.container_active_list: if container.state == 'RUNNING': consumption = database.get_container_memory_consumption2( container.name, 10) container.setMemoryState(consumption) mem_limit = container.getMemoryLimit() mem_used = container.getUsedMemory() print('Container: ', container.name, ' Using: ', mem_used, ' Limit: ', mem_limit, ' Mem_State: ', container.mem_state, ' MU: ', consumption['memory'], ' SU: ', consumption['swap'], 'MJF: ', consumption['major_faults']) if container.getMemoryState() == 'RISING': delta = consumption['memory'] + consumption['swap'] #if (container.getUsedMemory() + delta) >= container.getMemoryLimit(): if (mem_used + delta) >= mem_limit: need_list.append({'container': container, 'delta': delta}) logging.info( 'Need Container: %s, Using: %d, Delta: %d, Limit: %d', container.name, mem_used, delta, mem_limit) mem_need += delta if consumption['major_faults'] > 0: #delta = (consumption['page_faults'] + consumption['major_faults']) * mmap.PAGESIZE delta = consumption['major_faults'] * mmap.PAGESIZE urgent_list.append({ 'container': container, 'delta': delta }) logging.info( 'Urgent Container: %s, Using: %d, Delta: %d, Limit: %d', container.name, mem_used, delta, mem_limit) mem_urgent_need += delta else: if container.getMemoryStateTime() > 10: stable_list.append(container) logging.info('Stable Container: %s, Using: %d, Limit: %d', container.name, mem_used, mem_limit) # First Recover: # Recover some memory from FALLING and STABLE containers with Threshold less than 90% available_limit = host.get_available_limit() logging.info('Available Limit to be distribute: %d', available_limit) print('Light Recovery Phase', flush=True) print('Available: ', available_limit, ' Need: ', mem_need, ' Urgent: ', mem_urgent_need, flush=True) for container in stable_list: if container.getMemoryThreshold() < 90: delta = container.getMemoryLimit() // 10 container.setMemLimit(limit=str(container.mem_limit - delta), swap=str(container.mem_swap_limit - delta)) available_limit += delta print('Available: ', available_limit, flush=True) # Distribute Memory # Distribute memory over the containers if the request is lower than the available memory limit print('Distribution Phase', flush=True) print('Available: ', available_limit, ' Need: ', mem_need, ' Urgent: ', mem_urgent_need, flush=True) if (mem_need > 0) and (mem_need <= available_limit): for item in need_list: container = item['container'] delta = item['delta'] old_limit = container.getMemoryLimit() old_swap_limit = container.getSwapLimit() container.setMemLimit(limit=str(old_limit + delta), swap=str(old_swap_limit + delta)) print('Container ', container.name, ' updated limit to ', old_limit + delta, flush=True) available_limit -= delta print('Available: ', available_limit, flush=True) elif (mem_urgent_need > 0) and (mem_urgent_need <= available_limit): for item in urgent_list: container = item['container'] delta = item['delta'] old_limit = container.getMemoryLimit() old_swap_limit = container.getSwapLimit() container.setMemLimit(limit=str(container.mem_limit + delta), swap=str(container.mem_swap_limit + delta)) print('Container ', container.name, ' updated limit to ', old_limit + delta, flush=True) available_limit -= delta print('Available: ', available_limit, flush=True) elif (mem_urgent_need > 0): print( 'Critical State 1: Insufficient Memory for all Urgent Containers') urgent_list.sort(key=lambda item: item['container'].getRunningTime(), reverse=True) index = 0 while (available_limit > 0) and (index < len(urgent_list)): container = urgent_list[index]['container'] needed = urgent_list[index]['delta'] print('Container: ', container.name, ' Needed: ', needed) if (available_limit - needed) > 0: old_limit = container.getMemoryLimit() old_swap_limit = container.getSwapLimit() container.setMemLimit(limit=str(container.mem_limit + needed), swap=str(container.mem_swap_limit + needed)) print('Container ', container.name, ' updated limit to ', old_limit + delta, flush=True) available_limit -= needed print('Available: ', available_limit, flush=True) index += 1 # Activate recover memory policy from stable # Force to use swap for good print('Heavy Recovery Phase', flush=True) print('Available: ', available_limit, ' Need: ', mem_need, ' Urgent: ', mem_urgent_need, flush=True) steal_check = False if (available_limit <= mem_need): if stable_list: for container in stable_list: delta = int(container.mem_stats['inactive_anon']) if delta > 0: container.setMemLimit( limit=str(container.mem_limit - delta), swap=str(container.mem_swap_limit - delta)) available_limit += delta print('Available: ', available_limit, flush=True) steal_check = True if (available_limit <= mem_need): print('Critical State 2: Suspend a Container') sorted_list = sorted(host.container_active_list, key=lambda container: container.getRunningTime()) index = 0 while (available_limit <= mem_need) and (index < len(sorted_list)): container = sorted_list[index] if container not in stable_list: available_limit += container.getMemoryLimit() #Parallel Suspension Thread Creation and Execution container.state = 'SUSPENDING' core_list = container.cpu_set.split() for core in core_list: host.core_allocation[int(core)] = False container.inactive_time = datetime.now() host.container_inactive_list.append(container) host.container_active_list.remove(container) logging.info( 'Container %s moved during Suspension from Active -> Inactive with status %s.', container.name, container.state) print('Container: ', container.name, ' State: ', container.state) print('Available: ', available_limit, flush=True) Thread(target=container.suspendContainer).start() steal_check = True index += 1 # Start new containers or restart suspended containers if steal_check == False: print('Start/Resume Phase', flush=True) print('Available: ', available_limit, ' Need: ', mem_need, ' Urgent: ', mem_urgent_need, flush=True) #sorted_list = sorted(host.container_inactive_list, key=lambda container: container.start_time, reverse=True) sorted_list = sorted(host.container_inactive_list, key=lambda container: container.getInactiveTime(), reverse=True) print('Lista Ordenada:', sorted_list) index = 0 while (available_limit > 0) and (index < len(sorted_list)): container = sorted_list[index] if (container.state == 'SUSPENDED'): if (container.getMemoryLimit() <= available_limit) and ( host.has_free_cores() >= container.request_cpus): print('Restart container ', container.name) cpu_allocation = host.get_available_cores( container.request_cpus) container.state = 'RESUMING' Thread(target=container.resumeContainer, args=(cpu_allocation, )).start() host.container_active_list.append(container) host.container_inactive_list.remove(container) logging.info( 'Container %s moved during Resume from Inactive -> Active with status %s.', container.name, container.state) container.inactive_time = 0 available_limit -= container.mem_limit print('Available: ', available_limit, flush=True) elif (container.state in ['CREATED', 'NEW']): if (container.request_mem <= available_limit) and ( host.has_free_cores() >= container.request_cpus): cpu_allocation = host.get_available_cores( container.request_cpus) swap = container.request_mem + psutil.swap_memory().total if (cpu_allocation != ''): if parser['Container']['type'] == 'LXC': container.startContainer() container.setMemLimit(str(container.request_mem), str(swap)) container.setCPUCores(cpu_allocation) elif parser['Container']['type'] == 'DOCKER': container.startContainer( memory_limit=container.request_mem, swap_limit=swap, cpuset=cpu_allocation) host.container_active_list.append(container) host.container_inactive_list.remove(container) logging.info( 'Container %s moved during Start from Inactive -> Active with status %s.', container.name, container.state) container.inactive_time = 0 available_limit -= container.request_mem print('Available: ', available_limit, flush=True) index += 1