Exemple #1
0
    def group_hosts_by_outlet_temp(self):
        """Group hosts based on outlet temp meters"""
        nodes = self.compute_model.get_all_compute_nodes()
        size_cluster = len(nodes)
        if size_cluster == 0:
            raise wexc.ClusterEmpty()

        hosts_need_release = []
        hosts_target = []
        for node in nodes.values():
            resource_id = node.uuid

            outlet_temp = self.ceilometer.statistic_aggregation(
                resource_id=resource_id,
                meter_name=self._meter,
                period="30",
                aggregate='avg')
            # some hosts may not have outlet temp meters, remove from target
            if outlet_temp is None:
                LOG.warning(_LW("%s: no outlet temp data"), resource_id)
                continue

            LOG.debug("%s: outlet temperature %f" % (resource_id, outlet_temp))
            instance_data = {'node': node, 'outlet_temp': outlet_temp}
            if outlet_temp >= self.threshold:
                # mark the node to release resources
                hosts_need_release.append(instance_data)
            else:
                hosts_target.append(instance_data)
        return hosts_need_release, hosts_target
Exemple #2
0
    def group_hosts_by_airflow(self):
        """Group hosts based on airflow meters"""

        nodes = self.compute_model.get_all_compute_nodes()
        if not nodes:
            raise wexc.ClusterEmpty()
        overload_hosts = []
        nonoverload_hosts = []
        for node_id in nodes:
            node = self.compute_model.get_node_by_uuid(node_id)
            resource_id = node.uuid
            airflow = self.ceilometer.statistic_aggregation(
                resource_id=resource_id,
                meter_name=self.meter_name_airflow,
                period=self._period,
                aggregate='avg')
            # some hosts may not have airflow meter, remove from target
            if airflow is None:
                LOG.warning("%s: no airflow data", resource_id)
                continue

            LOG.debug("%s: airflow %f" % (resource_id, airflow))
            nodemap = {'node': node, 'airflow': airflow}
            if airflow >= self.threshold_airflow:
                # mark the node to release resources
                overload_hosts.append(nodemap)
            else:
                nonoverload_hosts.append(nodemap)
        return overload_hosts, nonoverload_hosts
    def group_hosts_by_outlet_temp(self, cluster_data_model):
        """Group hosts based on outlet temp meters"""

        hypervisors = cluster_data_model.get_all_hypervisors()
        size_cluster = len(hypervisors)
        if size_cluster == 0:
            raise wexc.ClusterEmpty()

        hosts_need_release = []
        hosts_target = []
        for hypervisor_id in hypervisors:
            hypervisor = cluster_data_model.get_hypervisor_from_id(
                hypervisor_id)
            resource_id = hypervisor.uuid

            outlet_temp = self.ceilometer.statistic_aggregation(
                resource_id=resource_id,
                meter_name=self._meter,
                period="30",
                aggregate='avg')
            # some hosts may not have outlet temp meters, remove from target
            if outlet_temp is None:
                LOG.warning(_LE("%s: no outlet temp data"), resource_id)
                continue

            LOG.debug("%s: outlet temperature %f" % (resource_id, outlet_temp))
            hvmap = {'hv': hypervisor, 'outlet_temp': outlet_temp}
            if outlet_temp >= self.threshold:
                # mark the hypervisor to release resources
                hosts_need_release.append(hvmap)
            else:
                hosts_target.append(hvmap)
        return hosts_need_release, hosts_target
Exemple #4
0
    def group_hosts_by_cpu_util(self):
        """Calculate the workloads of each node

        try to find out the nodes which have reached threshold
        and the nodes which are under threshold.
        and also calculate the average workload value of all nodes.
        and also generate the instance workload map.
        """

        nodes = self.compute_model.get_all_compute_nodes()
        cluster_size = len(nodes)
        if not nodes:
            raise wexc.ClusterEmpty()
        overload_hosts = []
        nonoverload_hosts = []
        # total workload of cluster
        cluster_workload = 0.0
        # use workload_cache to store the workload of VMs for reuse purpose
        workload_cache = {}
        for node_id in nodes:
            node = self.compute_model.get_node_by_uuid(node_id)
            instances = self.compute_model.get_node_instances(node)
            node_workload = 0.0
            for instance in instances:
                try:
                    cpu_util = self.ceilometer.statistic_aggregation(
                        resource_id=instance.uuid,
                        meter_name=self._meter,
                        period=self._period,
                        aggregate='avg')
                except Exception as exc:
                    LOG.exception(exc)
                    LOG.error(_LE("Can not get cpu_util from Ceilometer"))
                    continue
                if cpu_util is None:
                    LOG.debug("Instance (%s): cpu_util is None", instance.uuid)
                    continue
                workload_cache[instance.uuid] = cpu_util * instance.vcpus / 100
                node_workload += workload_cache[instance.uuid]
                LOG.debug("VM (%s): cpu_util %f", instance.uuid, cpu_util)
            node_cpu_util = node_workload / node.vcpus * 100

            cluster_workload += node_workload

            instance_data = {
                'node': node,
                "cpu_util": node_cpu_util,
                'workload': node_workload
            }
            if node_cpu_util >= self.threshold:
                # mark the node to release resources
                overload_hosts.append(instance_data)
            else:
                nonoverload_hosts.append(instance_data)

        avg_workload = cluster_workload / cluster_size

        return overload_hosts, nonoverload_hosts, avg_workload, workload_cache
    def pre_execute(self):
        LOG.info(_LI("Initializing Server Consolidation"))
        if not self.compute_model:
            raise exception.ClusterStateNotDefined()

        if len(self.compute_model.get_all_compute_nodes()) == 0:
            raise exception.ClusterEmpty()

        LOG.debug(self.compute_model.to_string())
Exemple #6
0
    def group_hosts_by_outlet_temp(self):
        """Group hosts based on outlet temp meters"""
        nodes = self.compute_model.get_all_compute_nodes()
        size_cluster = len(nodes)
        if size_cluster == 0:
            raise wexc.ClusterEmpty()

        hosts_need_release = []
        hosts_target = []
        metric_name = self.METRIC_NAMES[
            self.config.datasource]['host_outlet_temp']
        for node in nodes.values():
            resource_id = node.uuid
            outlet_temp = None

            if self.config.datasource == "ceilometer":
                outlet_temp = self.ceilometer.statistic_aggregation(
                    resource_id=resource_id,
                    meter_name=metric_name,
                    period=self.period,
                    aggregate='avg'
                )
            elif self.config.datasource == "gnocchi":
                stop_time = datetime.datetime.utcnow()
                start_time = stop_time - datetime.timedelta(
                    seconds=int(self.period))
                outlet_temp = self.gnocchi.statistic_aggregation(
                    resource_id=resource_id,
                    metric=metric_name,
                    granularity=self.granularity,
                    start_time=start_time,
                    stop_time=stop_time,
                    aggregation='mean'
                )
            # some hosts may not have outlet temp meters, remove from target
            if outlet_temp is None:
                LOG.warning("%s: no outlet temp data", resource_id)
                continue

            LOG.debug("%s: outlet temperature %f" % (resource_id, outlet_temp))
            instance_data = {'node': node, 'outlet_temp': outlet_temp}
            if outlet_temp >= self.threshold:
                # mark the node to release resources
                hosts_need_release.append(instance_data)
            else:
                hosts_target.append(instance_data)
        return hosts_need_release, hosts_target
Exemple #7
0
    def group_hosts_by_airflow(self):
        """Group hosts based on airflow meters"""

        nodes = self.compute_model.get_all_compute_nodes()
        if not nodes:
            raise wexc.ClusterEmpty()
        overload_hosts = []
        nonoverload_hosts = []
        for node_id in nodes:
            airflow = None
            node = self.compute_model.get_node_by_uuid(
                node_id)
            resource_id = node.uuid
            if self.config.datasource == "ceilometer":
                airflow = self.ceilometer.statistic_aggregation(
                    resource_id=resource_id,
                    meter_name=self.meter_name_airflow,
                    period=self._period,
                    aggregate='avg')
            elif self.config.datasource == "gnocchi":
                stop_time = datetime.datetime.utcnow()
                start_time = stop_time - datetime.timedelta(
                    seconds=int(self._period))
                airflow = self.gnocchi.statistic_aggregation(
                    resource_id=resource_id,
                    metric=self.meter_name_airflow,
                    granularity=self.granularity,
                    start_time=start_time,
                    stop_time=stop_time,
                    aggregation='mean')
            # some hosts may not have airflow meter, remove from target
            if airflow is None:
                LOG.warning("%s: no airflow data", resource_id)
                continue

            LOG.debug("%s: airflow %f" % (resource_id, airflow))
            nodemap = {'node': node, 'airflow': airflow}
            if airflow >= self.threshold_airflow:
                # mark the node to release resources
                overload_hosts.append(nodemap)
            else:
                nonoverload_hosts.append(nodemap)
        return overload_hosts, nonoverload_hosts
Exemple #8
0
    def group_hosts_by_outlet_temp(self):
        """Group hosts based on outlet temp meters"""
        nodes = self.get_available_compute_nodes()
        size_cluster = len(nodes)
        if size_cluster == 0:
            raise wexc.ClusterEmpty()

        hosts_need_release = []
        hosts_target = []
        metric_name = self.METRIC_NAMES[
            self.config.datasource]['host_outlet_temp']
        for node in nodes.values():
            resource_id = node.uuid
            outlet_temp = None

            outlet_temp = self.datasource_backend.statistic_aggregation(
                resource_id=resource_id,
                meter_name=metric_name,
                period=self.period,
                granularity=self.granularity,
            )

            # some hosts may not have outlet temp meters, remove from target
            if outlet_temp is None:
                LOG.warning("%s: no outlet temp data", resource_id)
                continue

            LOG.debug("%(resource)s: outlet temperature %(temp)f", {
                'resource': resource_id,
                'temp': outlet_temp
            })
            instance_data = {'node': node, 'outlet_temp': outlet_temp}
            if outlet_temp >= self.threshold:
                # mark the node to release resources
                hosts_need_release.append(instance_data)
            else:
                hosts_target.append(instance_data)
        return hosts_need_release, hosts_target
    def execute(self, original_model):
        LOG.info(_LI("Initializing Sercon Consolidation"))

        if original_model is None:
            raise exception.ClusterStateNotDefined()

        # todo(jed) clone model
        current_model = original_model

        self.efficacy = 100
        unsuccessful_migration = 0

        first_migration = True
        size_cluster = len(current_model.get_all_hypervisors())
        if size_cluster == 0:
            raise exception.ClusterEmpty()

        self.compute_attempts(size_cluster)

        for hypervisor_id in current_model.get_all_hypervisors():
            hypervisor = current_model.get_hypervisor_from_id(hypervisor_id)
            count = current_model.get_mapping(). \
                get_node_vms_from_id(hypervisor_id)
            if len(count) == 0:
                if hypervisor.state == hyper_state.HypervisorState.ONLINE:
                    self.add_change_service_state(
                        hypervisor_id,
                        hyper_state.HypervisorState.OFFLINE.value)

        while self.get_allowed_migration_attempts() >= unsuccessful_migration:
            if not first_migration:
                self.efficacy = self.calculate_migration_efficacy()
                if self.efficacy < float(self.target_efficacy):
                    break
            first_migration = False
            score = []

            score = self.score_of_nodes(current_model, score)
            ''' sort compute nodes by Score decreasing ''' ''
            sorted_score = sorted(score, reverse=True, key=lambda x: (x[1]))
            LOG.debug("Hypervisor(s) BFD {0}".format(sorted_score))
            ''' get Node to be released '''
            if len(score) == 0:
                LOG.warning(
                    _LW("The workloads of the compute nodes"
                        " of the cluster is zero"))
                break

            node_to_release, vm_score = self.node_and_vm_score(
                sorted_score, score, current_model)
            ''' sort VMs by Score '''
            sorted_vms = sorted(vm_score, reverse=True, key=lambda x: (x[1]))
            # BFD: Best Fit Decrease
            LOG.debug("VM(s) BFD {0}".format(sorted_vms))

            migrations = self.calculate_num_migrations(sorted_vms,
                                                       current_model,
                                                       node_to_release,
                                                       sorted_score)

            unsuccessful_migration = self.unsuccessful_migration_actualization(
                migrations, unsuccessful_migration)
        infos = {
            "number_of_migrations": self.number_of_migrations,
            "number_of_nodes_released": self.number_of_released_nodes,
            "efficacy": self.efficacy
        }
        LOG.debug(infos)
        self.solution.model = current_model
        self.solution.efficacy = self.efficacy
        return self.solution
Exemple #10
0
    def group_hosts(self):

        nodes = self.compute_model.get_all_compute_nodes()
        size_cluster = len(nodes)
        if size_cluster == 0:
            raise wexc.ClusterEmpty()

        hosts_need_release = {}
        hosts_target = []

        for node in nodes.values():
            instances_of_node = self.compute_model.get_node_instances(node)
            node_instance_count = len(instances_of_node)

            # Flag that tells us whether to skip the node or not. If True,
            # the node is skipped. Will be true if we find a noisy instance or
            # when potential priority instance will be same as potential noisy
            # instance
            loop_break_flag = False

            if node_instance_count > 1:

                instance_priority_list = []

                for instance in instances_of_node:
                    instance_priority_list.append(instance)

                # If there is no metadata regarding watcher-priority, it takes
                # DEFAULT_WATCHER_PRIORITY as priority.
                instance_priority_list.sort(
                    key=lambda a: (a.get('metadata').get('watcher-priority'),
                                   self.DEFAULT_WATCHER_PRIORITY))

                instance_priority_list_reverse = list(instance_priority_list)
                instance_priority_list_reverse.reverse()

                for potential_priority_instance in instance_priority_list:

                    priority_instance = self.find_priority_instance(
                        potential_priority_instance)

                    if (priority_instance is not None):

                        for potential_noisy_instance in (
                                instance_priority_list_reverse):
                            if (potential_noisy_instance ==
                                    potential_priority_instance):
                                loop_break_flag = True
                                break

                            noisy_instance = self.find_noisy_instance(
                                potential_noisy_instance)

                            if noisy_instance is not None:
                                hosts_need_release[node.uuid] = {
                                    'priority_vm': potential_priority_instance,
                                    'noisy_vm': potential_noisy_instance
                                }
                                LOG.debug("Priority VM found: %s",
                                          potential_priority_instance.uuid)
                                LOG.debug("Noisy VM found: %s",
                                          potential_noisy_instance.uuid)
                                loop_break_flag = True
                                break

                    # No need to check other instances in the node
                    if loop_break_flag is True:
                        break

            if node.uuid not in hosts_need_release:
                hosts_target.append(node)

        return hosts_need_release, hosts_target
    def group_hosts_by_cpu_or_ram_util(self):
        """Calculate the workloads of each node

        try to find out the nodes which have reached threshold
        and the nodes which are under threshold.
        and also calculate the average workload value of all nodes.
        and also generate the instance workload map.
        """

        nodes = self.get_available_compute_nodes()
        cluster_size = len(nodes)
        if not nodes:
            raise wexc.ClusterEmpty()
        overload_hosts = []
        nonoverload_hosts = []
        # total workload of cluster
        cluster_workload = 0.0
        # use workload_cache to store the workload of VMs for reuse purpose
        workload_cache = {}
        for node_id in nodes:
            node = self.compute_model.get_node_by_uuid(node_id)
            instances = self.compute_model.get_node_instances(node)
            node_workload = 0.0
            for instance in instances:
                util = None
                try:
                    util = self.datasource_backend.statistic_aggregation(
                        instance.uuid,
                        self._meter,
                        self._period,
                        self._granularity,
                        aggregation='mean',
                        dimensions=dict(resource_id=instance.uuid))
                except Exception as exc:
                    LOG.exception(exc)
                    LOG.error("Can not get %s from %s", self._meter,
                              self.config.datasource)
                    continue
                if util is None:
                    LOG.debug("Instance (%s): %s is None", instance.uuid,
                              self._meter)
                    continue
                if self._meter == self.CPU_METER_NAME:
                    workload_cache[instance.uuid] = (util * instance.vcpus /
                                                     100)
                else:
                    workload_cache[instance.uuid] = util
                node_workload += workload_cache[instance.uuid]
                LOG.debug("VM (%s): %s %f", instance.uuid, self._meter, util)

            cluster_workload += node_workload
            if self._meter == self.CPU_METER_NAME:
                node_util = node_workload / node.vcpus * 100
            else:
                node_util = node_workload / node.memory * 100

            instance_data = {
                'node': node,
                self._meter: node_util,
                'workload': node_workload
            }
            if node_util >= self.threshold:
                # mark the node to release resources
                overload_hosts.append(instance_data)
            else:
                nonoverload_hosts.append(instance_data)

        avg_workload = cluster_workload / cluster_size

        return overload_hosts, nonoverload_hosts, avg_workload, workload_cache