def rebalance_containers(self, config):
        self.__config = config
        self.__debug = get_config_value(self.__config, CONFIG_DEFAULT_VALUES, "DEBUG")

        log_info("_______________", self.__debug)
        log_info("Performing CONTAINER CPU Balancing", self.__debug)

        # Get the containers and applications
        try:
            applications = get_structures(self.__couchdb_handler, self.__debug, subtype="application")
            containers = get_structures(self.__couchdb_handler, self.__debug, subtype="container")
        except requests.exceptions.HTTPError as e:
            log_error("Couldn't get applications", self.__debug)
            log_error(str(e), self.__debug)
            return

        # Filter out the ones that do not accept rebalancing or that do not need any internal rebalancing
        rebalanceable_apps = list()
        for app in applications:
            # TODO Improve this management
            if "rebalance" not in app or app["rebalance"] == True:
                pass
            else:
                continue
            if len(app["containers"]) <= 1:
                continue

            if self.__app_containers_can_be_rebalanced(app):
                rebalanceable_apps.append(app)

        # Sort them according to each application they belong
        app_containers = dict()
        for app in rebalanceable_apps:
            app_name = app["name"]
            app_containers[app_name] = list()
            app_containers_names = app["containers"]
            for container in containers:
                if container["name"] in app_containers_names:
                    app_containers[app_name].append(container)
            # Get the container usages
            app_containers[app_name] = self.__fill_containers_with_usage_info(app_containers[app_name])

        # Rebalance applications
        for app in rebalanceable_apps:
            app_name = app["name"]
            log_info("Going to rebalance {0} now".format(app_name), self.__debug)
            self.__rebalance_containers_by_pair_swapping(app_containers[app_name], app_name)

        log_info("_______________", self.__debug)
def get_container_resources_dict():
    # Remote database operation
    containers = get_structures(db_handler, debug, subtype="container")
    if not containers:
        return

    # Get all the different hosts of the containers
    hosts_info = dict()
    for container in containers:
        host = container["host"]
        if host not in hosts_info:
            hosts_info[host] = dict()
            hosts_info[host]["host_rescaler_ip"] = container[
                "host_rescaler_ip"]
            hosts_info[host]["host_rescaler_port"] = container[
                "host_rescaler_port"]

    # For each host, retrieve its containers and persist the ones we look for
    container_info = fill_container_dict(hosts_info, containers)

    container_resources_dict = dict()
    for container in containers:
        container_name = container["name"]
        if container_name not in container_info:
            log_warning(
                "Container info for {0} not found, check that it is really living in its supposed host '{1}', and that "
                "the host is alive and with the Node Scaler service running".
                format(container_name, container["host"]), debug)
            continue
        container_resources_dict[container_name] = container
        container_resources_dict[container_name]["resources"] = container_info[
            container_name]

    return container_resources_dict
def persist_containers(container_resources_dict):
    # Try to get the containers, if unavailable, return
    # Remote database operation
    containers = get_structures(db_handler, debug, subtype="container")
    if not containers:
        return

    # Retrieve each container resources, persist them and store them to generate host info
    threads = []
    for container in containers:
        # Check that the document has been properly initialized, otherwise it might be overwritten with just
        # the "current" value without possibility of correcting it
        skip = False
        for resource in resources_persisted:
            if resource not in container["resources"] or "max" not in container[
                    "resources"][resource]:
                log_error(
                    "Container {0} has not a proper config for the resource {1}"
                    .format(container["name"], resource), debug)
                skip = True
        if skip:
            continue

        process = Thread(target=thread_persist_container,
                         args=(
                             container,
                             container_resources_dict,
                         ))
        process.start()
        threads.append(process)

    for process in threads:
        process.join()
예제 #4
0
    def refeed(self, ):
        myConfig = MyConfig(CONFIG_DEFAULT_VALUES)
        logging.basicConfig(filename=SERVICE_NAME + '.log', level=logging.INFO)

        while True:
            # Get service info
            service = get_service(self.couchdb_handler, SERVICE_NAME)

            # Heartbeat
            beat(self.couchdb_handler, SERVICE_NAME)

            # CONFIG
            myConfig.set_config(service["config"])
            self.debug = myConfig.get_value("DEBUG")
            debug = self.debug
            self.window_difference = myConfig.get_value("WINDOW_TIMELAPSE")
            self.window_delay = myConfig.get_value("WINDOW_DELAY")
            SERVICE_IS_ACTIVATED = myConfig.get_value("ACTIVE")

            t0 = start_epoch(self.debug)

            log_info("Config is as follows:", debug)
            log_info(".............................................", debug)
            log_info("Time window lapse -> {0}".format(self.window_difference),
                     debug)
            log_info("Delay -> {0}".format(self.window_delay), debug)
            log_info(".............................................", debug)

            thread = None
            if SERVICE_IS_ACTIVATED:
                # Remote database operation
                host_info_cache = dict()
                containers = get_structures(self.couchdb_handler,
                                            debug,
                                            subtype="container")
                if not containers:
                    # As no container info is available, no application information will be able to be generated
                    log_info("No structures to process", debug)
                    time.sleep(self.window_difference)
                    end_epoch(self.debug, self.window_difference, t0)
                    continue
                else:
                    thread = Thread(target=self.refeed_thread, args=())
                    thread.start()
            else:
                log_warning("Refeeder is not activated", debug)

            time.sleep(self.window_difference)

            wait_operation_thread(thread, debug)
            log_info("Refeed processed", debug)

            end_epoch(self.debug, self.window_difference, t0)
def persist_applications(container_resources_dict):
    # Try to get the applications, if unavailable, return
    applications = get_structures(db_handler, debug, subtype="application")
    if not applications:
        return

    # Generate the applications current resource values
    for app in applications:
        for resource in resources_persisted:
            if resource not in app["resources"]:
                log_error(
                    "Application {0} is missing info of resource {1}".format(
                        app["name"], resource), debug)
            else:
                app["resources"][resource]["current"] = 0

        application_containers = app["containers"]
        for container_name in application_containers:

            if container_name not in container_resources_dict:
                log_error(
                    "Container info {0} is missing for app : {1}, app info will not be totally accurate"
                    .format(container_name, app["name"]), debug)
                continue

            for resource in resources_persisted:
                try:
                    container_resources = container_resources_dict[
                        container_name]["resources"]
                    if resource not in container_resources or not container_resources[
                            resource]:
                        log_error(
                            "Unable to get info for resource {0} for container {1} when computing app {2} resources"
                            .format(resource, container_name,
                                    app["name"]), debug)
                    else:
                        current_resource_label = translate_map[resource][
                            "limit_label"]
                        app["resources"][resource][
                            "current"] += container_resources[resource][
                                current_resource_label]
                except KeyError:
                    if "name" in container_resources_dict[
                            container_name] and "name" in app:
                        log_error(
                            "Container info {0} is missing for app: {1} and resource {2} resource,"
                            .format(container_name, app["name"], resource) +
                            " app info will not be totally accurate", debug)

        # Remote database operation
        update_structure(app, db_handler, debug)
예제 #6
0
    def guard(self, ):
        myConfig = MyConfig(CONFIG_DEFAULT_VALUES)
        logging.basicConfig(filename=SERVICE_NAME + '.log', level=logging.INFO)

        while True:
            # Get service info
            service = get_service(self.couchdb_handler, SERVICE_NAME)

            # Heartbeat
            beat(self.couchdb_handler, SERVICE_NAME)

            # CONFIG
            myConfig.set_config(service["config"])
            self.debug = myConfig.get_value("DEBUG")
            debug = self.debug
            self.guardable_resources = myConfig.get_value(
                "GUARDABLE_RESOURCES")
            self.cpu_shares_per_watt = myConfig.get_value(
                "CPU_SHARES_PER_WATT")
            self.window_difference = myConfig.get_value("WINDOW_TIMELAPSE")
            self.window_delay = myConfig.get_value("WINDOW_DELAY")
            self.structure_guarded = myConfig.get_value("STRUCTURE_GUARDED")
            self.event_timeout = myConfig.get_value("EVENT_TIMEOUT")
            SERVICE_IS_ACTIVATED = myConfig.get_value("ACTIVE")

            t0 = start_epoch(self.debug)

            log_info("Config is as follows:", debug)
            log_info(".............................................", debug)
            log_info("Time window lapse -> {0}".format(self.window_difference),
                     debug)
            log_info("Delay -> {0}".format(self.window_delay), debug)
            log_info("Event timeout -> {0}".format(self.event_timeout), debug)
            log_info(
                "Resources guarded are -> {0}".format(
                    self.guardable_resources), debug)
            log_info(
                "Structure type guarded is -> {0}".format(
                    self.structure_guarded), debug)
            log_info(".............................................", debug)

            ## CHECK INVALID CONFIG ##
            invalid, message = self.invalid_conf()
            if invalid:
                log_error(message, debug)
                if self.window_difference < 5:
                    log_error(
                        "Window difference is too short, replacing with DEFAULT value '{0}'"
                        .format(CONFIG_DEFAULT_VALUES["WINDOW_TIMELAPSE"]),
                        self.debug)
                    self.window_difference = CONFIG_DEFAULT_VALUES[
                        "WINDOW_TIMELAPSE"]
                time.sleep(self.window_difference)
                end_epoch(self.debug, self.window_difference, t0)
                continue

            thread = None
            if SERVICE_IS_ACTIVATED:
                # Remote database operation
                structures = get_structures(self.couchdb_handler,
                                            debug,
                                            subtype=self.structure_guarded)
                if structures:
                    log_info(
                        "{0} Structures to process, launching threads".format(
                            len(structures)), debug)
                    thread = Thread(name="guard_structures",
                                    target=self.guard_structures,
                                    args=(structures, ))
                    thread.start()
                else:
                    log_info("No structures to process", debug)
            else:
                log_warning("Guardian is not activated", debug)

            time.sleep(self.window_difference)

            wait_operation_thread(thread, debug)

            end_epoch(t0, self.window_difference, t0)
예제 #7
0
 def refeed_thread(self, ):
     applications = get_structures(self.couchdb_handler,
                                   self.debug,
                                   subtype="application")
     if applications:
         self.refeed_applications(applications)