def rebalance_containers(self, config):
        self.__config = config
        self.__debug = get_config_value(self.__config, CONFIG_DEFAULT_VALUES, "DEBUG")

        log_info("_______________", self.__debug)
        log_info("Performing CONTAINER CPU Balancing", self.__debug)

        # Get the containers and applications
        try:
            applications = get_structures(self.__couchdb_handler, self.__debug, subtype="application")
            containers = get_structures(self.__couchdb_handler, self.__debug, subtype="container")
        except requests.exceptions.HTTPError as e:
            log_error("Couldn't get applications", self.__debug)
            log_error(str(e), self.__debug)
            return

        # Filter out the ones that do not accept rebalancing or that do not need any internal rebalancing
        rebalanceable_apps = list()
        for app in applications:
            # TODO Improve this management
            if "rebalance" not in app or app["rebalance"] == True:
                pass
            else:
                continue
            if len(app["containers"]) <= 1:
                continue

            if self.__app_containers_can_be_rebalanced(app):
                rebalanceable_apps.append(app)

        # Sort them according to each application they belong
        app_containers = dict()
        for app in rebalanceable_apps:
            app_name = app["name"]
            app_containers[app_name] = list()
            app_containers_names = app["containers"]
            for container in containers:
                if container["name"] in app_containers_names:
                    app_containers[app_name].append(container)
            # Get the container usages
            app_containers[app_name] = self.__fill_containers_with_usage_info(app_containers[app_name])

        # Rebalance applications
        for app in rebalanceable_apps:
            app_name = app["name"]
            log_info("Going to rebalance {0} now".format(app_name), self.__debug)
            self.__rebalance_containers_by_pair_swapping(app_containers[app_name], app_name)

        log_info("_______________", self.__debug)
Example #2
0
    def refeed_user_used_energy(self, applications, users, db_handler, debug):
        for user in users:
            if "cpu" not in user:
                user["cpu"] = {}
            if "energy" not in user:
                user["energy"] = {}
            total_user = {"cpu": 0, "energy": 0}
            total_user_current_cpu = 0
            user_apps = get_user_apps(applications, user)
            for app in user_apps:
                for resource in ["energy", "cpu"]:
                    if "usage" in app["resources"][resource] and app[
                            "resources"][resource]["usage"]:
                        total_user[resource] += app["resources"][resource][
                            "usage"]
                    else:
                        log_error(
                            "Application {0} of user {1} has no used {2} field or value"
                            .format(app["name"], user["name"],
                                    resource), debug)

                if "current" in app["resources"]["cpu"] and app["resources"][
                        "cpu"]["usage"]:
                    total_user_current_cpu += app["resources"][resource][
                        "current"]
                else:
                    log_error(
                        "Application {0} of user {1} has no current cpu field or value"
                        .format(app["name"], user["name"]), debug)

            user["energy"]["used"] = total_user["energy"]
            user["cpu"]["usage"] = total_user["cpu"]
            user["cpu"]["current"] = total_user_current_cpu
            db_handler.update_user(user)
            log_info(
                "Updated energy consumed by user {0}".format(user["name"]),
                debug)
Example #3
0
    def print_structure_info(self, container, usages, limits, triggered_events,
                             triggered_requests):
        resources = container["resources"]

        container_name_str = "@" + container["name"]
        resources_str = "| "
        for resource in self.guardable_resources:
            if container["resources"][resource]["guard"]:
                resources_str += resource + "({0})".format(
                    self.get_resource_summary(resource, resources, limits,
                                              usages)) + " | "

        ev, req = list(), list()
        for event in triggered_events:
            ev.append(event["name"])
        for request in triggered_requests:
            req.append(request["action"])
        triggered_requests_and_events = "#TRIGGERED EVENTS {0} AND TRIGGERED REQUESTS {1}".format(
            str(ev), str(req))
        log_info(
            " ".join([
                container_name_str, resources_str,
                triggered_requests_and_events
            ]), self.debug)
def persist_thread():
    t0 = time.time()
    container_resources_dict = get_container_resources_dict()
    t1 = time.time()
    persist_applications(container_resources_dict)
    t2 = time.time()
    persist_containers(container_resources_dict)
    t3 = time.time()

    log_info(
        "It took {0} seconds to get container info".format(
            str("%.2f" % (t1 - t0))), debug)
    log_info(
        "It took {0} seconds to snapshot applications".format(
            str("%.2f" % (t2 - t1))), debug)
    log_info(
        "It took {0} seconds to snapshot containers".format(
            str("%.2f" % (t3 - t2))), debug)
def persist_docs(funct):

    t0 = time.time()
    docs = get_data(funct)
    t1 = time.time()

    if docs:
        log_info(
            "It took {0} seconds to get {1} info".format(
                str("%.2f" % (t1 - t0)), funct), debug)
        num_docs = send_data(docs)
        t2 = time.time()
        if num_docs > 0:
            log_info(
                "It took {0} seconds to send {1} info".format(
                    str("%.2f" % (t2 - t1)), funct), debug)
            log_info(
                "Post was done with {0} documents of '{1}'".format(
                    str(num_docs), funct), debug)
def persist():
    logging.basicConfig(filename=SERVICE_NAME + '.log', level=logging.INFO)

    global debug

    myConfig = MyConfig(CONFIG_DEFAULT_VALUES)

    while True:
        log_info("----------------------", debug)
        log_info("Starting Epoch", debug)
        t0 = time.time()

        # Get service info
        service = get_service(db_handler,
                              SERVICE_NAME)  # Remote database operation

        # Heartbeat
        beat(db_handler, SERVICE_NAME)  # Remote database operation

        # CONFIG
        myConfig.set_config(service["config"])
        polling_frequency = myConfig.get_value("POLLING_FREQUENCY")
        debug = myConfig.get_value("DEBUG")
        documents_persisted = myConfig.get_value("DOCUMENTS_PERSISTED")
        SERVICE_IS_ACTIVATED = myConfig.get_value("ACTIVE")

        log_info("Config is as follows:", debug)
        log_info(".............................................", debug)
        log_info("Polling frequency -> {0}".format(polling_frequency), debug)
        log_info(
            "Documents to be persisted are -> {0}".format(documents_persisted),
            debug)
        log_info(".............................................", debug)

        ## CHECK INVALID CONFIG ##
        # TODO THis code is duplicated on the structures and database snapshoters
        invalid, message = invalid_conf(myConfig)
        if invalid:
            log_error(message, debug)
            time.sleep(polling_frequency)
            if polling_frequency < 4:
                log_error(
                    "Polling frequency is too short, replacing with DEFAULT value '{0}'"
                    .format(CONFIG_DEFAULT_VALUES["POLLING_FREQUENCY"]), debug)
                polling_frequency = CONFIG_DEFAULT_VALUES["POLLING_FREQUENCY"]

            log_info("----------------------\n", debug)
            time.sleep(polling_frequency)
            continue

        if SERVICE_IS_ACTIVATED:
            for docType in documents_persisted:
                persist_docs(docType)
        else:
            log_warning(
                "Database snapshoter is not activated, will not do anything",
                debug)

        t1 = time.time()
        log_info("Epoch processed in {0} seconds ".format("%.2f" % (t1 - t0)),
                 debug)
        log_info("----------------------\n", debug)

        time.sleep(polling_frequency)
Example #7
0
    def guard(self, ):
        myConfig = MyConfig(CONFIG_DEFAULT_VALUES)
        logging.basicConfig(filename=SERVICE_NAME + '.log', level=logging.INFO)

        while True:
            # Get service info
            service = get_service(self.couchdb_handler, SERVICE_NAME)

            # Heartbeat
            beat(self.couchdb_handler, SERVICE_NAME)

            # CONFIG
            myConfig.set_config(service["config"])
            self.debug = myConfig.get_value("DEBUG")
            debug = self.debug
            self.guardable_resources = myConfig.get_value(
                "GUARDABLE_RESOURCES")
            self.cpu_shares_per_watt = myConfig.get_value(
                "CPU_SHARES_PER_WATT")
            self.window_difference = myConfig.get_value("WINDOW_TIMELAPSE")
            self.window_delay = myConfig.get_value("WINDOW_DELAY")
            self.structure_guarded = myConfig.get_value("STRUCTURE_GUARDED")
            self.event_timeout = myConfig.get_value("EVENT_TIMEOUT")
            SERVICE_IS_ACTIVATED = myConfig.get_value("ACTIVE")

            t0 = start_epoch(self.debug)

            log_info("Config is as follows:", debug)
            log_info(".............................................", debug)
            log_info("Time window lapse -> {0}".format(self.window_difference),
                     debug)
            log_info("Delay -> {0}".format(self.window_delay), debug)
            log_info("Event timeout -> {0}".format(self.event_timeout), debug)
            log_info(
                "Resources guarded are -> {0}".format(
                    self.guardable_resources), debug)
            log_info(
                "Structure type guarded is -> {0}".format(
                    self.structure_guarded), debug)
            log_info(".............................................", debug)

            ## CHECK INVALID CONFIG ##
            invalid, message = self.invalid_conf()
            if invalid:
                log_error(message, debug)
                if self.window_difference < 5:
                    log_error(
                        "Window difference is too short, replacing with DEFAULT value '{0}'"
                        .format(CONFIG_DEFAULT_VALUES["WINDOW_TIMELAPSE"]),
                        self.debug)
                    self.window_difference = CONFIG_DEFAULT_VALUES[
                        "WINDOW_TIMELAPSE"]
                time.sleep(self.window_difference)
                end_epoch(self.debug, self.window_difference, t0)
                continue

            thread = None
            if SERVICE_IS_ACTIVATED:
                # Remote database operation
                structures = get_structures(self.couchdb_handler,
                                            debug,
                                            subtype=self.structure_guarded)
                if structures:
                    log_info(
                        "{0} Structures to process, launching threads".format(
                            len(structures)), debug)
                    thread = Thread(name="guard_structures",
                                    target=self.guard_structures,
                                    args=(structures, ))
                    thread.start()
                else:
                    log_info("No structures to process", debug)
            else:
                log_warning("Guardian is not activated", debug)

            time.sleep(self.window_difference)

            wait_operation_thread(thread, debug)

            end_epoch(t0, self.window_difference, t0)
    def __rebalance_containers_by_pair_swapping(self, containers, app_name):
        # Filter the containers between donors and receivers, according to usage and rules
        donors = self.__get_container_donors(containers)
        receivers = self.__get_container_receivers(containers)

        log_info("Nodes that will give: {0}".format(str([c["name"] for c in donors])), self.__debug)
        log_info("Nodes that will receive:  {0}".format(str([c["name"] for c in receivers])), self.__debug)

        if not receivers:
            log_info("No containers in need of rebalancing for {0}".format(app_name), self.__debug)
            return
        else:
            # Order the containers from lower to upper current CPU limit
            receivers = sorted(receivers, key=lambda c: c["resources"]["cpu"]["current"])

        # Steal resources from the low-usage containers (givers), create 'slices' of resources
        donor_slices = list()
        id = 0
        for container in donors:
            # Ensure that this request will be successfully processed, otherwise we are 'giving' away extra resources
            current_value = container["resources"]["cpu"]["current"]
            min_value = container["resources"]["cpu"]["min"]
            usage_value = container["resources"]["cpu"]["usage"]
            stolen_amount = 0.5 * (current_value - max(min_value,  usage_value))

            slice_amount = 25
            acum = 0
            while acum + slice_amount < stolen_amount:
                donor_slices.append((container, slice_amount, id))
                acum += slice_amount
                id += 1

            # Remaining
            if acum < stolen_amount:
                donor_slices.append((container, int(stolen_amount-acum), id))
                acum += slice_amount
                id += 1

        donor_slices = sorted(donor_slices, key=lambda c: c[1])
        print("Donor slices are")
        for c in donor_slices:
            print(c[0]["name"], c[1])

        # Remove those donors that are of no use (there are no possible receivers for them)
        viable_donors = list()
        for c in donor_slices:
            viable = False
            for r in receivers:
                if r["host"] == c[0]["host"]:
                    viable = True
                    break
            if viable:
                viable_donors.append(c)
        print("VIABLE donor slices are")
        for c in viable_donors:
            print(c[0]["name"], c[1], c[2])
        donor_slices = viable_donors

        # Give the resources to the bottlenecked containers
        requests = dict()
        while donor_slices:
            print("Donor slices are")
            for c in donor_slices:
                print(c[0]["name"], c[1], c[2])

            for receiver in receivers:
                # Look for a donor container on the same host
                amount_to_scale, donor, id = None, None, None
                for c, amount, i in donor_slices:
                    if c["host"] == receiver["host"]:
                        amount_to_scale = amount
                        donor = c
                        id = i
                        break

                if not amount_to_scale:
                    log_info("No more donors on its host, container {0} left out".format(receiver["name"]), self.__debug)
                    continue

                # Remove this slice from the list
                donor_slices = list(filter(lambda x: x[2] != id, donor_slices))

                max_receiver_amount = receiver["resources"]["cpu"]["max"] - receiver["resources"]["cpu"]["current"]
                # If this container can't be scaled anymore, skip
                if max_receiver_amount == 0:
                    continue

                # Trim the amount to scale if needed
                if amount_to_scale > max_receiver_amount:
                    amount_to_scale = max_receiver_amount

                # Create the pair of scaling requests
                # TODO This should use Guardians method to generate requests
                request = dict(
                    type="request",
                    resource="cpu",
                    amount=int(amount_to_scale),
                    structure=receiver["name"],
                    action="CpuRescaleUp",
                    timestamp=int(time.time()),
                    structure_type="container",
                    host=receiver["host"],
                    host_rescaler_ip=receiver["host_rescaler_ip"],
                    host_rescaler_port=receiver["host_rescaler_port"]
                )

                if receiver["name"] not in requests:
                    requests[receiver["name"]] = list()
                requests[receiver["name"]].append(request)

                # TODO This should use Guardians method to generate requests
                request = dict(
                    type="request",
                    resource="cpu",
                    amount=int(-amount_to_scale),
                    structure=donor["name"],
                    action="CpuRescaleDown",
                    timestamp=int(time.time()),
                    structure_type="container",
                    host=donor["host"],
                    host_rescaler_ip=donor["host_rescaler_ip"],
                    host_rescaler_port=donor["host_rescaler_port"]
                )

                if donor["name"] not in requests:
                    requests[donor["name"]] = list()
                requests[donor["name"]].append(request)
                log_info("Resource swap between {0}(donor) and {1}(receiver)".format(donor["name"], receiver["name"]), self.__debug)

        log_info("No more donors", self.__debug)

        final_requests = list()
        for container in requests:
            # Copy the first request as the base request
            flat_request = dict(requests[container][0])
            flat_request["amount"] = 0
            for request in requests[container]:
                flat_request["amount"] += request["amount"]
            final_requests.append(flat_request)

        log_info("REQUESTS ARE:", self.__debug)
        for c in requests.values():
            for r in c:
                print(r)

        # TODO
        # Adjust requests amounts according to the maximums (trim), otherwise the scaling down will be performed but not the scaling up, and shares will be lost

        log_info("FINAL REQUESTS ARE:", self.__debug)
        for r in final_requests:
            print(r)
            self.__couchdb_handler.add_request(r)
Example #9
0
    def refeed(self, ):
        myConfig = MyConfig(CONFIG_DEFAULT_VALUES)
        logging.basicConfig(filename=SERVICE_NAME + '.log', level=logging.INFO)

        while True:
            # Get service info
            service = get_service(self.couchdb_handler, SERVICE_NAME)

            # Heartbeat
            beat(self.couchdb_handler, SERVICE_NAME)

            # CONFIG
            myConfig.set_config(service["config"])
            self.debug = myConfig.get_value("DEBUG")
            debug = self.debug
            self.window_difference = myConfig.get_value("WINDOW_TIMELAPSE")
            self.window_delay = myConfig.get_value("WINDOW_DELAY")
            SERVICE_IS_ACTIVATED = myConfig.get_value("ACTIVE")

            t0 = start_epoch(self.debug)

            log_info("Config is as follows:", debug)
            log_info(".............................................", debug)
            log_info("Time window lapse -> {0}".format(self.window_difference),
                     debug)
            log_info("Delay -> {0}".format(self.window_delay), debug)
            log_info(".............................................", debug)

            thread = None
            if SERVICE_IS_ACTIVATED:
                # Remote database operation
                host_info_cache = dict()
                containers = get_structures(self.couchdb_handler,
                                            debug,
                                            subtype="container")
                if not containers:
                    # As no container info is available, no application information will be able to be generated
                    log_info("No structures to process", debug)
                    time.sleep(self.window_difference)
                    end_epoch(self.debug, self.window_difference, t0)
                    continue
                else:
                    thread = Thread(target=self.refeed_thread, args=())
                    thread.start()
            else:
                log_warning("Refeeder is not activated", debug)

            time.sleep(self.window_difference)

            wait_operation_thread(thread, debug)
            log_info("Refeed processed", debug)

            end_epoch(self.debug, self.window_difference, t0)
def persist():
    logging.basicConfig(filename=SERVICE_NAME + '.log', level=logging.INFO)

    global resources_persisted
    global debug

    myConfig = MyConfig(CONFIG_DEFAULT_VALUES)

    while True:
        log_info("----------------------", debug)
        log_info("Starting Epoch", debug)
        t0 = time.time()

        # Get service info
        service = get_service(db_handler,
                              SERVICE_NAME)  # Remote database operation

        # Heartbeat
        beat(db_handler, SERVICE_NAME)  # Remote database operation

        # CONFIG
        myConfig.set_config(service["config"])
        polling_frequency = myConfig.get_value("POLLING_FREQUENCY")
        debug = myConfig.get_value("DEBUG")
        resources_persisted = myConfig.get_value("RESOURCES_PERSISTED")
        SERVICE_IS_ACTIVATED = myConfig.get_value("ACTIVE")
        log_info(
            "Going to snapshot resources: {0}".format(resources_persisted),
            debug)

        log_info("Config is as follows:", debug)
        log_info(".............................................", debug)
        log_info("Polling frequency -> {0}".format(polling_frequency), debug)
        log_info(
            "Resources to be snapshoter are -> {0}".format(
                resources_persisted), debug)
        log_info(".............................................", debug)

        ## CHECK INVALID CONFIG ##
        # TODO This code is duplicated on the structures and database snapshoters
        invalid, message = invalid_conf(myConfig)
        if invalid:
            log_error(message, debug)
            time.sleep(polling_frequency)
            if polling_frequency < 3:
                log_error(
                    "Polling frequency is too short, replacing with DEFAULT value '{0}'"
                    .format(CONFIG_DEFAULT_VALUES["POLLING_FREQUENCY"]), debug)
                polling_frequency = CONFIG_DEFAULT_VALUES["POLLING_FREQUENCY"]

            log_info("----------------------\n", debug)
            time.sleep(polling_frequency)
            continue

        thread = None
        if SERVICE_IS_ACTIVATED:
            thread = Thread(target=persist_thread, args=())
            thread.start()
        else:
            log_warning(
                "Structure snapshoter is not activated, will not do anything",
                debug)

        time.sleep(polling_frequency)

        wait_operation_thread(thread, debug)

        t1 = time.time()
        time_proc = "%.2f" % (t1 - t0 - polling_frequency)
        time_total = "%.2f" % (t1 - t0)
        log_info(
            "Epoch processed in {0} seconds ({1} processing and {2} sleeping)".
            format(time_total, time_proc, str(polling_frequency)), debug)
        log_info("----------------------\n", debug)