def _source(self, node):
     attrs = InfoGraphNode.get_attributes(node)
     if InfoGraphNode.get_layer(node) == GRAPH_LAYER.PHYSICAL:
         if 'allocation' in attrs:
             return attrs['allocation']
         # fix due to the landscape
         else:
             while attrs.get('attributes', None):
                 attrs = attrs['attributes']
             if 'allocation' in attrs:
                 return attrs['allocation']
     if InfoGraphNode.get_type(node) == NODE_TYPE.VIRTUAL_MACHINE:
         if 'vm_name' in attrs:
             return attrs['vm_name']
     if InfoGraphNode.get_type(node) == NODE_TYPE.INSTANCE_DISK:
         # The machine is the source as this is a libvirt disk.
         disk_name = InfoGraphNode.get_name(node)
         vm = self.landscape.get_neighbour_by_type(
             disk_name, NODE_TYPE.VIRTUAL_MACHINE)
         machine = self.landscape.get_neighbour_by_type(
             vm, NODE_TYPE.PHYSICAL_MACHINE)
         return machine
     if InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_MACHINE:
         if 'name' in attrs:
             return attrs['name']
     if InfoGraphNode.get_type(node) == NODE_TYPE.DOCKER_CONTAINER:
         docker_node = self.landscape.get_neighbour_by_type(
             InfoGraphNode.get_name(node), 'docker_node')
         if docker_node:
             machine = self.landscape.get_neighbour_by_type(
                 docker_node, 'machine')
             return machine
     return None
Ejemplo n.º 2
0
    def extract_infrastructure_graph(workload_name, ts_from, ts_to):
        """
        Returns the entire landscape at the current time

        :return:
        """
        landscape_ip = ConfigHelper.get("LANDSCAPE", "host")
        landscape_port = ConfigHelper.get("LANDSCAPE", "port")
        subgraph_extraction = SubGraphExtraction(landscape_ip=landscape_ip,
                                                 landscape_port=landscape_port)
        # res = subgraph_extraction.get_workload_view_graph(
        #     workload_name, int(ts_from), int(ts_to),
        #     name_filtering_support=True)
        res = landscape.get_graph()
        #PARALLEL = True
        if PARALLEL:
            i = 0
            threads = []
            cpu_count = multiprocessing.cpu_count()
            all_node = res.nodes(data=True)
            no_node_thread = len(res.nodes()) / cpu_count
            node_pool = []

            for node in all_node:
                if i < no_node_thread:
                    node_pool.append(node)
                    i = i + 1
                else:
                    thread1 = ParallelLandscape(
                        i, "Thread-{}".format(InfoGraphNode.get_name(node)), i,
                        node_pool)
                    # thread1 = ParallelTelemetryAnnotation(i, "Thread-{}".format(InfoGraphNode.get_name(node)), i,
                    #                                       node_pool, internal_graph, self.telemetry, ts_to, ts_from)
                    thread1.start()
                    threads.append(thread1)
                    i = 0
                    node_pool = []
            if len(node_pool) != 0:
                thread1 = ParallelLandscape(
                    i, "Thread-{}".format(InfoGraphNode.get_name(node)), i,
                    node_pool)
                thread1.start()
                threads.append(thread1)

            [t.join() for t in threads]
        else:
            for node in res.nodes(data=True):
                attrs = InfoGraphNode.get_attributes(node)
                attrs = InfoGraphUtilities.str_to_dict(attrs)
                InfoGraphNode.set_attributes(node, attrs)
        return res
Ejemplo n.º 3
0
 def annotate_machine_disk_util(internal_graph, node):
     source = InfoGraphNode.get_attributes(node)['allocation']
     machine = InfoGraphNode.get_node(internal_graph, source)
     machine_util = InfoGraphNode.get_disk_utilization(machine)
     if 'intel/use/disk/utilization' not in machine_util.columns:
         disk_metric = 'intel/procfs/disk/utilization_percentage'
         disk_util_df = InfoGraphNode.get_disk_utilization(node)
         if disk_metric in disk_util_df.columns:
             disk_util = disk_util_df[disk_metric]
             disk_util = disk_util.fillna(0)
             machine_util[InfoGraphNode.get_attributes(node)['name']] = disk_util
             InfoGraphNode.set_disk_utilization(machine, machine_util)
         else:
             LOG.info('Disk util not Found use for node {}'.format(InfoGraphNode.get_name(node)))
     else:
         LOG.debug('Found use disk for node {}'.format(InfoGraphNode.get_name(node)))
Ejemplo n.º 4
0
    def get_metrics(graph, metrics='all'):
        """
        Returns all the metrics associated with the input graph
        :param graph: (NetworkX Graph) Graph to be annotated with data
        :param metrics: metric type to be considered. default = all
        :return: the list of metrics associated with the graph
        """
        metric_list = []
        for node in graph.nodes(data=True):
            node_name = InfoGraphNode.get_name(node)
            node_layer = InfoGraphNode.get_layer(node)
            node_type = InfoGraphNode.get_type(node)
            # This method supports export of either normal metrics coming
            #  from telemetry agent or utilization type of metrics.
            if metrics == 'all':
                node_telemetry_data = InfoGraphNode.get_telemetry_data(node)
            else:
                node_telemetry_data = InfoGraphNode.get_utilization(node)

            metric_list.extend([
                "{}@{}@{}@{}".format(node_name, node_layer, node_type,
                                     metric_name).replace(".", "_")
                for metric_name in node_telemetry_data.columns.values
                if metric_name != 'timestamp'
            ])
        return metric_list
Ejemplo n.º 5
0
    def get_compute_node_view(self,
                              compute_node_hostnames,
                              ts_from=None,
                              ts_to=None,
                              name_filtering_support=False):
        """
        Returns a view for the compute node.
        """
        res = None
        if isinstance(compute_node_hostnames, str):
            res = self._get_compute_node_subgraph(compute_node_hostnames,
                                                  ts_from, ts_to)

        elif isinstance(compute_node_hostnames, list):
            res = self._get_network_subgraph(ts_from, ts_to)
            for hostname in compute_node_hostnames:
                if isinstance(hostname, str):
                    graph = self._get_compute_node_subgraph(
                        hostname, ts_from, ts_to)
                    if len(graph.nodes()) > 0:
                        graphs.merge_graph(res, graph)

        if name_filtering_support:
            for node in res.nodes(data=True):
                name = InfoGraphNode.get_name(node)
                InfoGraphNode.set_attribute(node, 'node_name', name)
        return res
Ejemplo n.º 6
0
    def _create_pandas_data_frame_from_graph(graph, metrics='all'):
        """
        Save on csv files the data in the graph.
        Stores one csv per node of the graph

        :param graph: (NetworkX Graph) Graph to be annotated with data
        :param directory: (str) directory where to store csv files
        :return: NetworkX Graph annotated with telemetry data
        """
        result = pandas.DataFrame()
        for node in graph.nodes(data=True):
            node_name = InfoGraphNode.get_name(node)
            node_layer = InfoGraphNode.get_layer(node)
            node_type = InfoGraphNode.get_type(node)

            # This method supports export of either normal metrics coming
            #  from telemetry agent or utilization type of metrics.
            if metrics == 'all':
                node_telemetry_data = InfoGraphNode.get_telemetry_data(node)
            else:
                node_telemetry_data = InfoGraphNode.get_utilization(node)
            # df = node_telemetry_data.copy()

            # LOG.info("Node Name: {} -- Telemetry: {}".format(
            #     InfoGraphNode.get_name(node),
            #     InfoGraphNode.get_telemetry_data(node).columns.values
            # ))

            node_telemetry_data['timestamp'] = node_telemetry_data[
                'timestamp'].astype(float)
            node_telemetry_data['timestamp'] = node_telemetry_data[
                'timestamp'].round()
            node_telemetry_data['timestamp'] = node_telemetry_data[
                'timestamp'].astype(int)
            for metric_name in node_telemetry_data.columns.values:
                if metric_name == 'timestamp':
                    continue
                col_name = "{}@{}@{}@{}".\
                    format(node_name, node_layer, node_type, metric_name)
                col_name = col_name.replace(".", "_")
                node_telemetry_data = node_telemetry_data.rename(
                    columns={metric_name: col_name})

                # LOG.info("TELEMETRIA: {}".format(node_telemetry_data.columns.values))

            if node_telemetry_data.empty or len(
                    node_telemetry_data.columns) <= 1:
                continue
            if result.empty:
                result = node_telemetry_data.copy()
            else:
                node_telemetry_data = \
                    node_telemetry_data.drop_duplicates(subset='timestamp')
                result = pandas.merge(result,
                                      node_telemetry_data,
                                      how='outer',
                                      on='timestamp')
            # TODO: Try with this removed
            # result.set_index(['timestamp'])
        return result
Ejemplo n.º 7
0
 def annotate_machine_network_util(internal_graph, node):
     source = InfoGraphNode.get_attributes(node)['allocation']
     machine = InfoGraphNode.get_node(internal_graph, source)
     machine_util = InfoGraphNode.get_network_utilization(machine)
     if 'intel/use/network/utilization' not in machine_util.columns:
         net_metric = 'intel/psutil/net/utilization_percentage'
         net_util_df = InfoGraphNode.get_network_utilization(node)
         if net_metric in net_util_df.columns:
             net_util = net_util_df[net_metric]
             net_util = net_util.fillna(0)
             machine_util[InfoGraphNode.get_attributes(node)['name']] = net_util
             InfoGraphNode.set_network_utilization(machine, machine_util)
         else:
             LOG.info('Net util not Found use for node {}'.format(InfoGraphNode.get_name(node)))
     else:
         LOG.debug('Found use network for node {}'.format(InfoGraphNode.get_name(node)))
Ejemplo n.º 8
0
 def annotate_machine_pu_util(internal_graph, node):
     source = InfoGraphNode.get_machine_name_of_pu(node)
     machine = InfoGraphNode.get_node(internal_graph, source)
     machine_util = InfoGraphNode.get_compute_utilization(machine)
     if 'intel/use/compute/utilization' not in machine_util.columns:
         sum_util = None
         cpu_metric = 'intel/procfs/cpu/utilization_percentage'
         pu_util_df = InfoGraphNode.get_compute_utilization(node)
         if cpu_metric in pu_util_df.columns:
             pu_util = pu_util_df[cpu_metric]
             pu_util = pu_util.fillna(0)
             machine_util[InfoGraphNode.get_attributes(node)['name']] = pu_util
             InfoGraphNode.set_compute_utilization(machine, machine_util)
         else:
             LOG.info('CPU util not Found use for node {}'.format(InfoGraphNode.get_name(node)))
     else:
         LOG.debug('Found use for node {}'.format(InfoGraphNode.get_name(node)))
 def _nova_uuid(self, node):
     if InfoGraphNode.get_type(node) == NODE_TYPE.INSTANCE_DISK:
         disk_name = InfoGraphNode.get_name(node)
         vm = self.landscape.get_neighbour_by_type(disk_name, "vm")
         return vm
     if InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_MACHINE:
         vm = self.vms.pop()
         return vm
     return None
Ejemplo n.º 10
0
    def filter_graph(graph):
        """
        Returns the graph filtered removing all the nodes with no telemetry
        """
        template_mapping = dict()

        res = graph.copy()
        for node in res.nodes(data=True):
            # for p in node[1]['attributes']:
            #     p = str(p)
            template = node[1]['attributes']['template'] \
                if 'template' in node[1]['attributes'] else None

            # If node is a service node, need to remove the template
            if template:
                template_mapping[InfoGraphNode.get_name(node)] = template
                node[1]['attributes'].pop('template')

            # Fix format for conversion to JSON (happening in analytics)
            node[1]['attributes'] = \
                str(misc.convert_unicode_dict_to_string(node[1]['attributes'])).\
                    replace("'", '"')

        for node in res.nodes(data=True):
            node_name = InfoGraphNode.get_name(node)
            telemetry = InfoGraphNode.get_telemetry_data(node)
            layer = InfoGraphNode.get_layer(node)
            # if len(telemetry.columns.values) <= 1:

            if len(telemetry.columns) <= 1 and \
                    not layer == InfoGraphNodeLayer.SERVICE:
                InfoGraphNode.set_telemetry_data(node, dict())
                res.filter_nodes('node_name', node_name)

        # Convert attributes back to dict()
        for node in res.nodes(data=True):
            string = InfoGraphNode.get_attributes(node)
            attrs = InfoGraphUtilities.str_to_dict(string)
            if InfoGraphNode.get_type(node) == \
                    InfoGraphNodeType.SERVICE_COMPUTE:
                attrs['template'] = \
                    template_mapping[InfoGraphNode.get_name(node)]
            InfoGraphNode.set_attributes(node, attrs)
        return res
 def _disk(self, node):
     disk = None
     if (InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_DISK
             or InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_MACHINE):
         attrs = InfoGraphNode.get_attributes(node)
         if 'osdev_storage-name' in attrs:
             disk = attrs["osdev_storage-name"]
     elif InfoGraphNode.get_type(node) == NODE_TYPE.INSTANCE_DISK:
         disk = InfoGraphNode.get_name(node).split("_")[1]
     return disk
Ejemplo n.º 12
0
    def get_annotated_graph(self,
                            graph,
                            ts_from,
                            ts_to,
                            utilization=True,
                            saturation=True):
        internal_graph = graph.copy()
        i = 0
        threads = []
        cpu_count = multiprocessing.cpu_count()
        no_node_thread = len(internal_graph.nodes()) / (cpu_count)
        node_pool = []
        node_pools = []
        for node in internal_graph.nodes(data=True):
            if i < no_node_thread:
                node_pool.append(node)
                i = i + 1
            else:
                thread1 = ParallelTelemetryAnnotation(
                    i, "Thread-{}".format(InfoGraphNode.get_name(node)), i,
                    node_pool, internal_graph, self.telemetry, ts_to, ts_from)
                threads.append(thread1)
                node_pools.append(node_pool)
                i = 1
                node_pool = [node]
        if len(node_pool) != 0:
            node_pools.append(node_pool)
            thread1 = ParallelTelemetryAnnotation(
                i, "Thread-{}".format(InfoGraphNode.get_name(node)), i,
                node_pool, internal_graph, self.telemetry, ts_to, ts_from)
            threads.append(thread1)

        [t.start() for t in threads]
        [t.join() for t in threads]

        for node in internal_graph.nodes(data=True):
            if InfoGraphNode.get_type(node) == InfoGraphNodeType.PHYSICAL_PU:
                self.utils.annotate_machine_pu_util(internal_graph, node)
            elif InfoGraphNode.node_is_disk(node):
                self.utils.annotate_machine_disk_util(internal_graph, node)
            elif InfoGraphNode.node_is_nic(node):
                self.utils.annotate_machine_network_util(internal_graph, node)
        return internal_graph
 def _stack(self, node):
     if InfoGraphNode.get_type(node) == NODE_TYPE.VIRTUAL_MACHINE:
         # Taking service node to which the VM is connected
         predecessors = self.landscape.predecessors(
             InfoGraphNode.get_name(node))
         for predecessor in predecessors:
             predecessor_node = self.landscape.node[predecessor]
             if predecessor_node['type'] == NODE_TYPE.SERVICE_COMPUTE:
                 if 'stack_name' in predecessor_node:
                     return predecessor_node["stack_name"]
     return None
Ejemplo n.º 14
0
    def get_correlation(node_a, node_b, metric_a, metric_b):
        # TODO: Add node validation
        # InfoGraphNode.validateNode(node_a)
        # InfoGraphNode.validateNode(node_b)

        node_name_a = InfoGraphNode.get_name(node_a)
        node_name_b = InfoGraphNode.get_name(node_b)

        if metric_a == 'utilization':
            telemetry_a = InfoGraphNode.get_utilization(node_a)
        else:
            telemetry_a = InfoGraphNode.get_telemetry_data(node_a)

        if metric_b == 'utilization':
            telemetry_b = InfoGraphNode.get_utilization(node_b)
        else:
            telemetry_b = InfoGraphNode.get_telemetry_data(node_b)

        if metric_a not in telemetry_a.columns.values:
            raise ValueError(
                "Metric {} is not in Telemetry data of Node {}".format(
                    metric_a, node_name_a))
        if metric_b not in telemetry_b.columns.values:
            raise ValueError(
                "Metric {} is not in Telemetry data of Node {}".format(
                    metric_b, node_name_b))
        if telemetry_a.empty and telemetry_b.empty:
            return 0

        res = telemetry_a.corrwith(telemetry_b)

        df_a = telemetry_a.\
            rename(columns={metric_a: "a-{}".format(metric_a)}).astype(float)
        df_b = telemetry_b.\
            rename(columns={metric_b: "b-{}".format(metric_b)}).astype(float)
        correlation = pandas.merge(df_a, df_b, how='outer', on='timestamp')
        correlation = correlation.dropna()
        res = correlation["a-{}".format(metric_a)].\
            corr(correlation["b-{}".format(metric_b)])
        return res
    def _source_metrics(self, node):
        """
        Retrieves metrics associated with a source/host.  The source is 
        identified by the node and then all metrics types are collected for 
        that source.  If the node is physical then the metric types are 
        retrieved using just the machine name as the source, if the node is 
        virtual then the source (the vm hostname) and the stack name are 
        required. 
        """

        metric_types = []
        node_layer = InfoGraphNode.get_layer(node)
        node_type = InfoGraphNode.get_type(node)
        if node_layer == GRAPH_LAYER.PHYSICAL \
                or node_type == NODE_TYPE.INSTANCE_DISK:
            try:
                source = self._source(node)
                identifier = source
                query_tags = {"source": source}
                metric_types = self._cached_metrics(identifier, query_tags)
            except Exception as ex:
                LOG.error('Malformed graph: {}'.format(
                    InfoGraphNode.get_name(node)))
                LOG.error(ex)

        elif node_layer == GRAPH_LAYER.VIRTUAL:
            source = self._source(node)
            stack = self._stack(node)

            #LOG.info("SOURCE: {}".format(source))
            #LOG.info("STACK: {}".format(stack))

            if stack is not None:

                identifier = "{}-{}".format(source, stack)
                # query_tags = {"source": source, "stack": stack}

                query_tags = {"stack_name": stack}
                metric_types = self._cached_metrics(identifier, query_tags)
        elif node_type == NODE_TYPE.DOCKER_CONTAINER:
            source = self._source(node)
            docker_id = InfoGraphNode.get_docker_id(node)
            if docker_id is not None and source is not None:
                identifier = "{}-{}".format(source, docker_id)
                query_tags = {"docker_id": docker_id, "source": source}
                metric_types = self._cached_metrics(identifier, query_tags)

        return metric_types
Ejemplo n.º 16
0
 def _node_is_nic_on_management_net(node, graph, mng_net_name):
     node_name = InfoGraphNode.get_name(node)
     node_type = InfoGraphNode.get_type(node)
     if node_type == InfoGraphNodeType.VIRTUAL_NIC or \
        node_type == InfoGraphNodeType.VIRTUAL_NIC_2:
         neighs = graph.neighbors(node_name)
         for n in neighs:
             neighbor = InfoGraphNode.\
                 get_node(graph, n)
             if InfoGraphNode.get_type(neighbor) == \
                     InfoGraphNodeType.VIRTUAL_NETWORK:
                 network_name = \
                     InfoGraphNode.get_attributes(
                         neighbor)['name']
                 if network_name == mng_net_name:
                     return True
     return False
Ejemplo n.º 17
0
    def machine_capacity_usage(annotated_subgraph):
        """
        This is a type of fingerprint from the infrastructure perspective
        """
        # TODO: Validate graph
        categories = list()
        categories.append(InfoGraphNodeCategory.COMPUTE)
        categories.append(InfoGraphNodeCategory.NETWORK)
        # TODO: Add a Volume to the workloads to get HD usage
        categories.append(InfoGraphNodeCategory.STORAGE)
        # TODO: Get telemetry for Memory
        categories.append(InfoGraphNodeCategory.MEMORY)

        fingerprint = dict()
        counter = dict()
        for category in categories:
            fingerprint[category] = 0
            counter[category] = 0

        # calculation of the fingerprint on top of the virtual resources
        local_subgraph = annotated_subgraph.copy()
        local_subgraph.filter_nodes('layer', "virtual")
        local_subgraph.filter_nodes('layer', "service")
        local_subgraph.filter_nodes('type', 'machine')

        for node in local_subgraph.nodes(data=True):
            # if Fingerprint._node_is_nic_on_management_net(
            #         node, annotated_subgraph, mng_net_name):
            #     continue
            name = InfoGraphNode.get_name(node)
            category = InfoGraphNode.get_category(node)
            utilization = InfoGraphNode.get_utilization(node)
            if 'utilization' in utilization.columns.values:
                # LOG.info("NODE: {} - CATEGORY: {}".format(name, category))
                mean = utilization['utilization'].mean()
                fingerprint[category] += mean
                counter[category] += 1

        # This is just an average
        # TODO: Improve the average
        for category in categories:
            if counter[category] > 0:
                fingerprint[category] = \
                    fingerprint[category] / counter[category]
        return fingerprint
Ejemplo n.º 18
0
    def get_workload_view_graph(self,
                                stack_names,
                                ts_from=None,
                                ts_to=None,
                                name_filtering_support=False):
        """
        Returns a graph which only includes the resources related to the
         execution of the stack names indicated in the input parameter
        """
        res = None

        if isinstance(stack_names, str):
            res = self._get_workload_subgraph(stack_names, ts_from, ts_to)

        # TODO - URGENT: Check this with the new Lanscape
        elif isinstance(stack_names, list):
            temp_res = list()
            for stack_name in stack_names:
                graph = self._get_workload_subgraph(str(stack_name), ts_from,
                                                    ts_to)
                if len(graph.nodes()) > 0:
                    temp_res.append(graph)
            for graph in temp_res:
                if not res and len(graph.nodes()) > 0:
                    res = graph
                elif len(graph.nodes()) > 0:
                    # TODO - URGENT: Fix this. Put Merge within the analytics
                    res = graphs.merge_graph(res, graph)
        # TODO - URGENT: Check this with the new Lanscape
        machine_count = 0
        for node in res.nodes(data=True):
            if InfoGraphNode.node_is_machine(node):
                machine_count += 1

        if name_filtering_support:
            for node in res.nodes(data=True):
                name = InfoGraphNode.get_name(node)
                InfoGraphNode.set_attribute(node, 'node_name', name)

        return res
 def _source(self, node):
     attrs = InfoGraphNode.get_attributes(node)
     if InfoGraphNode.get_layer(node) == GRAPH_LAYER.PHYSICAL:
         if 'allocation' in attrs:
             return attrs['allocation']
     if InfoGraphNode.get_type(node) == NODE_TYPE.VIRTUAL_MACHINE:
         if 'vm_name' in attrs:
             return attrs['vm_name']
         elif 'name' in attrs:
             return attrs['name']
     if InfoGraphNode.get_type(node) == NODE_TYPE.INSTANCE_DISK:
         # The machine is the source as this is a libvirt disk.
         disk_name = InfoGraphNode.get_name(node)
         vm = self.landscape.get_neighbour_by_type(
             disk_name, NODE_TYPE.VIRTUAL_MACHINE)
         machine = self.landscape.get_neighbour_by_type(
             vm, NODE_TYPE.PHYSICAL_MACHINE)
         return machine
     if InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_MACHINE:
         if 'name' in attrs:
             return attrs['name']
     return None
Ejemplo n.º 20
0
    def saturation_scores(graph):
        """
        Returns a dictionary with the scores of
        all the nodes of the graph.

        :param graph: InfoGraph
        :return: dict[node_name] = score
        """
        res = dict()
        for node in graph.nodes(data=True):
            node_name = InfoGraphNode.get_name(node)
            res[node_name] = dict()
            sat = InfoGraphNode.get_saturation(node)
            import analytics_engine.common as common
            LOG = common.LOG

            res[node_name]['compute'] = 0
            res[node_name]['disk'] = 0
            res[node_name]['network'] = 0
            res[node_name]['memory'] = 0
            if (isinstance(sat, pandas.DataFrame) and
                    sat.empty) or \
                    (not isinstance(sat, pandas.DataFrame) and
                             sat == None):
                continue

            if 'intel/use/compute/saturation' in sat:
                res[node_name]['compute'] = (
                    sat.get('intel/use/compute/saturation').mean()) / 100.0
            if 'intel/use/memory/saturation' in sat:
                res[node_name]['memory'] = (
                    sat.get('intel/use/memory/saturation').mean()) / 100.0
            if 'intel/use/disk/saturation' in sat:
                res[node_name]['disk'] = (
                    sat.get('intel/use/disk/saturation').mean()) / 100.0
            if 'intel/use/network/saturation' in sat:
                res[node_name]['network'] = (
                    sat.get('intel/use/network/saturation').mean()) / 100.0
        return res
    def get_queries(self, graph, node, ts_from, ts_to):
        """
        :param graph:
        :param node:
        :param ts_from:
        :param ts_to:
        :return:
        """
        node_name = InfoGraphNode.get_name(node)
        node_layer = InfoGraphNode.get_layer(node)
        queries = list()
        # No point to ask for Service Resources
        if node_layer == InfoGraphNodeLayer.SERVICE:
            return queries

        for metric in self._get_metrics(node):
            try:
                query = self._build_query(metric, node, ts_from, ts_to)
            except Exception as e:
                LOG.error('Exception for metric: {}'.format(metric))
            queries.append({"{}_{}".format(metric, node_name): query})

        return queries
 def _get_nova_uuids(self, node):
     if InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_MACHINE:
         phy_name = InfoGraphNode.get_name(node)
         self.vms = self.landscape.get_neighbours_by_type(phy_name, "vm")
Ejemplo n.º 23
0
    def compute_node_resources(annotated_subgraph, hostname=None):
        """
        This is a type of fingerprint from the infrastructure perspective
        """
        # TODO: Validate graph
        data = dict()
        statistics = dict()

        # Calculation of the fingerprint on top of the virtual resources
        local_subgraph = annotated_subgraph.copy()

        for node in local_subgraph.nodes(data=True):
            layer = InfoGraphNode.get_layer(node)
            if layer == InfoGraphNodeLayer.VIRTUAL:
                continue
            if layer == InfoGraphNodeLayer.SERVICE:
                continue
            type = InfoGraphNode.get_type(node)
            if type == 'core':
                continue

            # If hostname has been specified, need to take into account only
            # nodes that are related to the specific host
            attrs = InfoGraphNode.get_attributes(node)
            allocation = attrs['allocation'] if 'allocation' in attrs \
                else None
            if hostname and not hostname == allocation:
                continue

            name = InfoGraphNode.get_name(node)
            statistics[name] = {
                'mean': 0,
                'median': 0,
                'min': 0,
                'max': 0,
                'var': 0,
                'std_dev': 0
            }
            utilization = InfoGraphNode.get_utilization(node)
            try:
                utilization = utilization.drop('timestamp', 1)
            except ValueError:
                utilization = InfoGraphNode.get_utilization(node)
            data[name] = utilization

            if not data[name].empty:
                mean = data[name]['utilization'].mean()
                median = (data[name]['utilization']).median()
                min = data[name]['utilization'].min()
                maximum = data[name]['utilization'].max()
                var = data[name]['utilization'].var()
                std_dev = math.sqrt(var)
            else:
                mean = 0
                median = 0
                min = 0
                maximum = 0
                var = 0
                std_dev = 0
            statistics[name] = \
                {'mean': mean,
                 'median': median,
                 'min': min,
                 'max': maximum,
                 'var': var,
                 'std_dev': std_dev}

        return [data, statistics]
Ejemplo n.º 24
0
    def run(self, workload, optimal_node_type='machine'):
        """
        Ranks machines by CPU utilization.

        :param workload: Contains workload related info and results.

        :return: heuristic results
        """
        workload_config = workload.get_configuration()
        graph = workload.get_latest_graph()
        if not graph:
            raise KeyError('No graph to be processed.')

        scores = LandscapeScore.utilization_scores(graph)
        scores_sat = LandscapeScore.saturation_scores(graph)
        heuristic_results = pd.DataFrame(columns=[
            'node_name',
            'type',
            'ipaddress',
            'compute utilization',
            'compute saturation',
            'memory utilization',
            'memory saturation',
            'network utilization',
            'network saturation',
            'disk utilization',
            'disk saturation',
        ])
        heuristic_results_nt = heuristic_results.copy()
        device_id_col_name = None
        project = None
        if workload_config.get('project'):
            project = workload_config['project']
            device_id_col_name = workload_config['project'] + '_device_id'
            heuristic_results[device_id_col_name] = None

        telemetry_filter = workload_config.get('telemetry_filter')
        for node in graph.nodes(data=True):
            node_name = InfoGraphNode.get_name(node)
            node_type = InfoGraphNode.get_type(node)
            list_node_name = node_name
            if node_type == optimal_node_type:
                if InfoGraphNode.node_is_vm(node):
                    vm_name = InfoGraphNode.get_properties(node).get('vm_name')
                    if vm_name:
                        list_node_name = vm_name
                data = {
                    'node_name':
                    list_node_name,
                    'type':
                    node_type,
                    'ipaddress':
                    InfoGraphNode.get_attributes(node).get('ipaddress'),
                    'compute utilization':
                    scores[node_name]['compute'],
                    'compute saturation':
                    scores_sat[node_name]['compute'],
                    'memory utilization':
                    scores[node_name]['memory'],
                    'memory saturation':
                    scores_sat[node_name]['memory'],
                    'network utilization':
                    scores[node_name]['network'],
                    'network saturation':
                    scores_sat[node_name]['network'],
                    'disk utilization':
                    scores[node_name]['disk'],
                    'disk saturation':
                    scores_sat[node_name]['disk']
                }
                if device_id_col_name:
                    dev_id = InfoGraphNode.get_properties(node).get(
                        device_id_col_name)
                    if project == 'mf2c':
                        dev_id = dev_id.replace('_', '-')
                    data[device_id_col_name] = dev_id
                if InfoGraphNode.get_properties(node).get(
                        "telemetry_data") is not None:
                    heuristic_results = heuristic_results.append(
                        data, ignore_index=True)
                elif not telemetry_filter:
                    heuristic_results_nt = heuristic_results.append(
                        data, ignore_index=True)

            if not workload.get_workload_name().startswith('optimal_'):
                if InfoGraphNode.get_type(
                        node
                ) == "docker_container" and optimal_node_type == 'machine':
                    node_name = InfoGraphNode.get_docker_id(node)
                    heuristic_results = heuristic_results.append(
                        {
                            'node_name': node_name,
                            'type': node_type,
                            'ipaddress': None,
                            'compute utilization':
                            scores[node_name]['compute'],
                            'compute saturation': None,
                            'memory utilization': scores[node_name]['memory'],
                            'memory saturation': None,
                            'network utilization':
                            scores[node_name]['network'],
                            'network saturation': None,
                            'disk utilization': scores[node_name]['disk'],
                            'disk saturation': None
                        },
                        ignore_index=True)
        sort_fields = ['compute utilization']
        sort_order = workload_config.get('sort_order')
        if sort_order:
            sort_fields = []
            for val in sort_order:
                if val == 'cpu':
                    sort_fields.append('compute utilization')
                if val == 'memory':
                    sort_fields.append('memory utilization')
                if val == 'network':
                    sort_fields.append('network utilization')
                if val == 'disk':
                    sort_fields.append('disk utilization')
        heuristic_results_nt = heuristic_results_nt.replace([0], [None])
        heuristic_results = heuristic_results.sort_values(by=sort_fields,
                                                          ascending=True)
        heuristic_results = heuristic_results.append(heuristic_results_nt,
                                                     ignore_index=True)
        workload.append_metadata(self.__filter_name__, heuristic_results)
        LOG.info('AVG: {}'.format(heuristic_results))
        return heuristic_results
Ejemplo n.º 25
0
    def utilization_scores(graph):
        """
        Returns a dictionary with the scores of
        all the nodes of the graph.

        :param graph: InfoGraph
        :return: dict[node_name] = score
        """
        res = dict()
        for node in graph.nodes(data=True):
            node_name = InfoGraphNode.get_name(node)
            res[node_name] = dict()
            util = InfoGraphNode.get_utilization(node)
            import analytics_engine.common as common
            LOG = common.LOG

            res[node_name]['compute'] = 0
            res[node_name]['disk'] = 0
            res[node_name]['network'] = 0
            res[node_name]['memory'] = 0
            if (isinstance(util, pandas.DataFrame) and
                    util.empty) or \
                    (not isinstance(util, pandas.DataFrame) and
                             util==None):
                continue

            # intel/use/
            if 'intel/use/compute/utilization' in util:
                res[node_name]['compute'] = (
                    util.get('intel/use/compute/utilization').mean()) / 100.0
            elif 'intel/procfs/cpu/utilization_percentage' in util:
                res[node_name]['compute'] = (util.get(
                    'intel/procfs/cpu/utilization_percentage').mean()) / 100.0
            if 'intel/use/memory/utilization' in util:
                res[node_name]['memory'] = (
                    util.get('intel/use/memory/utilization').mean()) / 100.0
            elif 'intel/procfs/memory/utilization_percentage' in util:
                res[node_name]['memory'] = (
                    util.get('intel/procfs/memory/utilization_percentage'
                             ).mean()) / 100.0
            if 'intel/use/disk/utilization' in util:
                res[node_name]['disk'] = (
                    util.get('intel/use/disk/utilization').mean()) / 100.0
            elif 'intel/procfs/disk/utilization_percentage' in util:
                res[node_name]['disk'] = (util.get(
                    'intel/procfs/disk/utilization_percentage').mean()) / 100.0
            if 'intel/use/network/utilization' in util:
                res[node_name]['network'] = (
                    util.get('intel/use/network/utilization').mean()) / 100.0
            elif 'intel/psutil/net/utilization_percentage' in util:
                res[node_name]['network'] = (util.get(
                    'intel/psutil/net/utilization_percentage').mean()) / 100.0

            # special handling of cpu, disk & network utilization if node is a machine
            if InfoGraphNode.node_is_machine(node):
                # mean from all cpu columns
                cpu_util = InfoGraphNode.get_compute_utilization(node)
                cpu_util['total'] = [
                    sum(row) / len(row) for index, row in cpu_util.iterrows()
                ]
                res[node_name]['compute'] = cpu_util['total'].mean() / 100
                # mean from all disk columns
                disk_util = InfoGraphNode.get_disk_utilization(node)
                if disk_util.empty:
                    res[node_name]['disk'] = 0.0
                else:
                    disk_util['total'] = [
                        sum(row) / len(row)
                        for index, row in disk_util.iterrows()
                    ]
                    res[node_name]['disk'] = disk_util['total'].mean() / 100
                # mean from all nic columns
                net_util = InfoGraphNode.get_network_utilization(node)
                if net_util.empty:
                    res[node_name]['network'] = 0.0
                else:
                    net_util['total'] = [
                        sum(row) / len(row)
                        for index, row in net_util.iterrows()
                    ]
                    res[node_name]['network'] = net_util['total'].mean() / 100
                # custom metric

            if InfoGraphNode.get_type(
                    node) == InfoGraphNodeType.DOCKER_CONTAINER:
                node_name = InfoGraphNode.get_docker_id(node)
                res[node_name] = {}
                if 'intel/docker/stats/cgroups/cpu_stats/cpu_usage/percentage' in util.columns:
                    res[node_name]['compute'] = util[
                        'intel/docker/stats/cgroups/cpu_stats/cpu_usage/percentage'].mean(
                        ) / 100
                else:
                    res[node_name]['compute'] = 0
                if 'intel/docker/stats/cgroups/memory_stats/usage/percentage' in util.columns:
                    res[node_name]['memory'] = util[
                        'intel/docker/stats/cgroups/memory_stats/usage/percentage'].mean(
                        ) / 100
                else:
                    res[node_name]['memory'] = 0
                if 'intel/docker/stats/network/utilization_percentage' in util.columns:
                    res[node_name]['network'] = util[
                        'intel/docker/stats/network/utilization_percentage'].mean(
                        ) / 100
                else:
                    res[node_name]['network'] = 0
                if 'intel/docker/stats/cgroups/blkio_stats/io_time_recursive/percentage' in util.columns:
                    res[node_name]['disk'] = util[
                        'intel/docker/stats/cgroups/blkio_stats/io_time_recursive/percentage'].mean(
                        ) / 100
                else:
                    res[node_name]['disk'] = 0
        return res
Ejemplo n.º 26
0
    def get_annotated_graph(self,
                            graph,
                            ts_from,
                            ts_to,
                            utilization=False,
                            saturation=False):
        """
        Collect data from cimmaron tsdb in relation to the specified graph and
         time windows and store an annotated subgraph in specified directory

        :param graph: (NetworkX Graph) Graph to be annotated with data
        :param ts_from: (str) Epoch time representation of start time
        :param ts_to: (str) Epoch time representation of stop time
        :param utilization: (bool) if True the method calculates also
                                    utilization for each node, if available
        :return: NetworkX Graph annotated with telemetry data
        """
        TelemetryAnnotation._get_annotated_graph_input_validation(
            graph, ts_from, ts_to)
        internal_graph = graph.copy()
        self.internal_graph = internal_graph
        for node in internal_graph.nodes(data=True):
            if isinstance(self.telemetry, SnapAnnotation):
                queries = list()
                try:
                    queries = self.telemetry.get_queries(
                        internal_graph, node, ts_from, ts_to)
                    # queries = self.telemetry.get_queries(graph, node, ts_from, ts_to)
                except Exception as e:
                    LOG.error("Exception: {}".format(e))
                    LOG.error(e)
                    import traceback
                    traceback.print_exc()
                if len(queries) != 0:
                    InfoGraphNode.set_queries(node, queries)

                    telemetry_data = self.telemetry.get_data(node)
                    InfoGraphNode.set_telemetry_data(node, telemetry_data)
                    if utilization and not telemetry_data.empty:
                        SnapUtils.utilization(internal_graph, node,
                                              self.telemetry)
                        # if only procfs is available, results needs to be
                        # propagated at machine level
                        if InfoGraphNode.get_type(
                                node) == InfoGraphNodeType.PHYSICAL_PU:
                            SnapUtils.annotate_machine_pu_util(
                                internal_graph, node)
                        if InfoGraphNode.node_is_disk(node):
                            SnapUtils.annotate_machine_disk_util(
                                internal_graph, node)
                        if InfoGraphNode.node_is_nic(node):
                            SnapUtils.annotate_machine_network_util(
                                internal_graph, node)
                    if saturation:
                        SnapUtils.saturation(internal_graph, node,
                                             self.telemetry)
            elif isinstance(self.telemetry, PrometheusAnnotation):
                queries = list()
                try:
                    queries = self.telemetry.get_queries(
                        internal_graph, node, ts_from, ts_to)
                    # queries = self.telemetry.get_queries(graph, node, ts_from, ts_to)
                except Exception as e:
                    LOG.error("Exception: {}".format(e))
                    LOG.error(e)
                    import traceback
                    traceback.print_exc()
                if len(queries) != 0:
                    InfoGraphNode.set_queries(node, queries)

                    telemetry_data = self.telemetry.get_data(node)
                    InfoGraphNode.set_telemetry_data(node, telemetry_data)
                    # if utilization and not telemetry_data.empty:
                    #PrometheusUtils.utilization(internal_graph, node, self.telemetry)
                    # if only procfs is available, results needs to be
                    # propagated at machine level
                    #if InfoGraphNode.get_type(node) == InfoGraphNodeType.PHYSICAL_PU:
                    #    PrometheusUtils.annotate_machine_pu_util(internal_graph, node)
                    #if InfoGraphNode.node_is_disk(node):
                    #    PrometheusUtils.annotate_machine_disk_util(internal_graph, node)
                    #if InfoGraphNode.node_is_nic(node):
                    #    PrometheusUtils.annotate_machine_network_util(internal_graph, node)
                    #if saturation:
                    #PrometheusUtils.saturation(internal_graph, node, self.telemetry)
            else:
                telemetry_data = self.telemetry.get_data(node)
                InfoGraphNode.set_telemetry_data(node, telemetry_data)
                if utilization and not telemetry_data.empty:
                    SnapUtils.utilization(internal_graph, node, self.telemetry)
                    # if only procfs is available, results needs to be
                    # propagated at machine level
                    if InfoGraphNode.get_type(
                            node) == InfoGraphNodeType.PHYSICAL_PU:
                        source = InfoGraphNode.get_machine_name_of_pu(node)
                        machine = InfoGraphNode.get_node(
                            internal_graph, source)
                        machine_util = InfoGraphNode.get_compute_utilization(
                            machine)
                        if '/intel/use/compute/utilization' not in machine_util.columns:
                            sum_util = None
                            pu_util = InfoGraphNode.get_compute_utilization(
                                node
                            )['intel/procfs/cpu/utilization_percentage']
                            pu_util = pu_util.fillna(0)
                            if 'intel/procfs/cpu/utilization_percentage' in machine_util.columns:

                                machine_util = machine_util[
                                    'intel/procfs/cpu/utilization_percentage']
                                machine_util = machine_util.fillna(0)
                                sum_util = machine_util.add(pu_util,
                                                            fill_value=0)
                            else:
                                sum_util = pu_util
                            if isinstance(sum_util, pandas.Series):
                                # sum_util.index.name = None
                                sum_util = pandas.DataFrame(
                                    sum_util,
                                    columns=[
                                        'intel/procfs/cpu/utilization_percentage'
                                    ])
                            InfoGraphNode.set_compute_utilization(
                                machine, sum_util)
                        else:
                            LOG.debug('Found use for node {}'.format(
                                InfoGraphNode.get_name(node)))
                if saturation:
                    self._saturation(internal_graph, node, self.telemetry)
        return internal_graph