def rebalance_containers(self, config): self.__config = config self.__debug = get_config_value(self.__config, CONFIG_DEFAULT_VALUES, "DEBUG") log_info("_______________", self.__debug) log_info("Performing CONTAINER CPU Balancing", self.__debug) # Get the containers and applications try: applications = get_structures(self.__couchdb_handler, self.__debug, subtype="application") containers = get_structures(self.__couchdb_handler, self.__debug, subtype="container") except requests.exceptions.HTTPError as e: log_error("Couldn't get applications", self.__debug) log_error(str(e), self.__debug) return # Filter out the ones that do not accept rebalancing or that do not need any internal rebalancing rebalanceable_apps = list() for app in applications: # TODO Improve this management if "rebalance" not in app or app["rebalance"] == True: pass else: continue if len(app["containers"]) <= 1: continue if self.__app_containers_can_be_rebalanced(app): rebalanceable_apps.append(app) # Sort them according to each application they belong app_containers = dict() for app in rebalanceable_apps: app_name = app["name"] app_containers[app_name] = list() app_containers_names = app["containers"] for container in containers: if container["name"] in app_containers_names: app_containers[app_name].append(container) # Get the container usages app_containers[app_name] = self.__fill_containers_with_usage_info(app_containers[app_name]) # Rebalance applications for app in rebalanceable_apps: app_name = app["name"] log_info("Going to rebalance {0} now".format(app_name), self.__debug) self.__rebalance_containers_by_pair_swapping(app_containers[app_name], app_name) log_info("_______________", self.__debug)
def get_container_resources_dict(): # Remote database operation containers = get_structures(db_handler, debug, subtype="container") if not containers: return # Get all the different hosts of the containers hosts_info = dict() for container in containers: host = container["host"] if host not in hosts_info: hosts_info[host] = dict() hosts_info[host]["host_rescaler_ip"] = container[ "host_rescaler_ip"] hosts_info[host]["host_rescaler_port"] = container[ "host_rescaler_port"] # For each host, retrieve its containers and persist the ones we look for container_info = fill_container_dict(hosts_info, containers) container_resources_dict = dict() for container in containers: container_name = container["name"] if container_name not in container_info: log_warning( "Container info for {0} not found, check that it is really living in its supposed host '{1}', and that " "the host is alive and with the Node Scaler service running". format(container_name, container["host"]), debug) continue container_resources_dict[container_name] = container container_resources_dict[container_name]["resources"] = container_info[ container_name] return container_resources_dict
def persist_containers(container_resources_dict): # Try to get the containers, if unavailable, return # Remote database operation containers = get_structures(db_handler, debug, subtype="container") if not containers: return # Retrieve each container resources, persist them and store them to generate host info threads = [] for container in containers: # Check that the document has been properly initialized, otherwise it might be overwritten with just # the "current" value without possibility of correcting it skip = False for resource in resources_persisted: if resource not in container["resources"] or "max" not in container[ "resources"][resource]: log_error( "Container {0} has not a proper config for the resource {1}" .format(container["name"], resource), debug) skip = True if skip: continue process = Thread(target=thread_persist_container, args=( container, container_resources_dict, )) process.start() threads.append(process) for process in threads: process.join()
def refeed(self, ): myConfig = MyConfig(CONFIG_DEFAULT_VALUES) logging.basicConfig(filename=SERVICE_NAME + '.log', level=logging.INFO) while True: # Get service info service = get_service(self.couchdb_handler, SERVICE_NAME) # Heartbeat beat(self.couchdb_handler, SERVICE_NAME) # CONFIG myConfig.set_config(service["config"]) self.debug = myConfig.get_value("DEBUG") debug = self.debug self.window_difference = myConfig.get_value("WINDOW_TIMELAPSE") self.window_delay = myConfig.get_value("WINDOW_DELAY") SERVICE_IS_ACTIVATED = myConfig.get_value("ACTIVE") t0 = start_epoch(self.debug) log_info("Config is as follows:", debug) log_info(".............................................", debug) log_info("Time window lapse -> {0}".format(self.window_difference), debug) log_info("Delay -> {0}".format(self.window_delay), debug) log_info(".............................................", debug) thread = None if SERVICE_IS_ACTIVATED: # Remote database operation host_info_cache = dict() containers = get_structures(self.couchdb_handler, debug, subtype="container") if not containers: # As no container info is available, no application information will be able to be generated log_info("No structures to process", debug) time.sleep(self.window_difference) end_epoch(self.debug, self.window_difference, t0) continue else: thread = Thread(target=self.refeed_thread, args=()) thread.start() else: log_warning("Refeeder is not activated", debug) time.sleep(self.window_difference) wait_operation_thread(thread, debug) log_info("Refeed processed", debug) end_epoch(self.debug, self.window_difference, t0)
def persist_applications(container_resources_dict): # Try to get the applications, if unavailable, return applications = get_structures(db_handler, debug, subtype="application") if not applications: return # Generate the applications current resource values for app in applications: for resource in resources_persisted: if resource not in app["resources"]: log_error( "Application {0} is missing info of resource {1}".format( app["name"], resource), debug) else: app["resources"][resource]["current"] = 0 application_containers = app["containers"] for container_name in application_containers: if container_name not in container_resources_dict: log_error( "Container info {0} is missing for app : {1}, app info will not be totally accurate" .format(container_name, app["name"]), debug) continue for resource in resources_persisted: try: container_resources = container_resources_dict[ container_name]["resources"] if resource not in container_resources or not container_resources[ resource]: log_error( "Unable to get info for resource {0} for container {1} when computing app {2} resources" .format(resource, container_name, app["name"]), debug) else: current_resource_label = translate_map[resource][ "limit_label"] app["resources"][resource][ "current"] += container_resources[resource][ current_resource_label] except KeyError: if "name" in container_resources_dict[ container_name] and "name" in app: log_error( "Container info {0} is missing for app: {1} and resource {2} resource," .format(container_name, app["name"], resource) + " app info will not be totally accurate", debug) # Remote database operation update_structure(app, db_handler, debug)
def guard(self, ): myConfig = MyConfig(CONFIG_DEFAULT_VALUES) logging.basicConfig(filename=SERVICE_NAME + '.log', level=logging.INFO) while True: # Get service info service = get_service(self.couchdb_handler, SERVICE_NAME) # Heartbeat beat(self.couchdb_handler, SERVICE_NAME) # CONFIG myConfig.set_config(service["config"]) self.debug = myConfig.get_value("DEBUG") debug = self.debug self.guardable_resources = myConfig.get_value( "GUARDABLE_RESOURCES") self.cpu_shares_per_watt = myConfig.get_value( "CPU_SHARES_PER_WATT") self.window_difference = myConfig.get_value("WINDOW_TIMELAPSE") self.window_delay = myConfig.get_value("WINDOW_DELAY") self.structure_guarded = myConfig.get_value("STRUCTURE_GUARDED") self.event_timeout = myConfig.get_value("EVENT_TIMEOUT") SERVICE_IS_ACTIVATED = myConfig.get_value("ACTIVE") t0 = start_epoch(self.debug) log_info("Config is as follows:", debug) log_info(".............................................", debug) log_info("Time window lapse -> {0}".format(self.window_difference), debug) log_info("Delay -> {0}".format(self.window_delay), debug) log_info("Event timeout -> {0}".format(self.event_timeout), debug) log_info( "Resources guarded are -> {0}".format( self.guardable_resources), debug) log_info( "Structure type guarded is -> {0}".format( self.structure_guarded), debug) log_info(".............................................", debug) ## CHECK INVALID CONFIG ## invalid, message = self.invalid_conf() if invalid: log_error(message, debug) if self.window_difference < 5: log_error( "Window difference is too short, replacing with DEFAULT value '{0}'" .format(CONFIG_DEFAULT_VALUES["WINDOW_TIMELAPSE"]), self.debug) self.window_difference = CONFIG_DEFAULT_VALUES[ "WINDOW_TIMELAPSE"] time.sleep(self.window_difference) end_epoch(self.debug, self.window_difference, t0) continue thread = None if SERVICE_IS_ACTIVATED: # Remote database operation structures = get_structures(self.couchdb_handler, debug, subtype=self.structure_guarded) if structures: log_info( "{0} Structures to process, launching threads".format( len(structures)), debug) thread = Thread(name="guard_structures", target=self.guard_structures, args=(structures, )) thread.start() else: log_info("No structures to process", debug) else: log_warning("Guardian is not activated", debug) time.sleep(self.window_difference) wait_operation_thread(thread, debug) end_epoch(t0, self.window_difference, t0)
def refeed_thread(self, ): applications = get_structures(self.couchdb_handler, self.debug, subtype="application") if applications: self.refeed_applications(applications)