예제 #1
0
    def _create_pandas_data_frame_from_graph(graph, metrics='all'):
        """
        Save on csv files the data in the graph.
        Stores one csv per node of the graph

        :param graph: (NetworkX Graph) Graph to be annotated with data
        :param directory: (str) directory where to store csv files
        :return: NetworkX Graph annotated with telemetry data
        """
        result = pandas.DataFrame()
        for node in graph.nodes(data=True):
            node_name = InfoGraphNode.get_name(node)
            node_layer = InfoGraphNode.get_layer(node)
            node_type = InfoGraphNode.get_type(node)

            # This method supports export of either normal metrics coming
            #  from telemetry agent or utilization type of metrics.
            if metrics == 'all':
                node_telemetry_data = InfoGraphNode.get_telemetry_data(node)
            else:
                node_telemetry_data = InfoGraphNode.get_utilization(node)
            # df = node_telemetry_data.copy()

            # LOG.info("Node Name: {} -- Telemetry: {}".format(
            #     InfoGraphNode.get_name(node),
            #     InfoGraphNode.get_telemetry_data(node).columns.values
            # ))

            node_telemetry_data['timestamp'] = node_telemetry_data[
                'timestamp'].astype(float)
            node_telemetry_data['timestamp'] = node_telemetry_data[
                'timestamp'].round()
            node_telemetry_data['timestamp'] = node_telemetry_data[
                'timestamp'].astype(int)
            for metric_name in node_telemetry_data.columns.values:
                if metric_name == 'timestamp':
                    continue
                col_name = "{}@{}@{}@{}".\
                    format(node_name, node_layer, node_type, metric_name)
                col_name = col_name.replace(".", "_")
                node_telemetry_data = node_telemetry_data.rename(
                    columns={metric_name: col_name})

                # LOG.info("TELEMETRIA: {}".format(node_telemetry_data.columns.values))

            if node_telemetry_data.empty or len(
                    node_telemetry_data.columns) <= 1:
                continue
            if result.empty:
                result = node_telemetry_data.copy()
            else:
                node_telemetry_data = \
                    node_telemetry_data.drop_duplicates(subset='timestamp')
                result = pandas.merge(result,
                                      node_telemetry_data,
                                      how='outer',
                                      on='timestamp')
            # TODO: Try with this removed
            # result.set_index(['timestamp'])
        return result
    def get_queries(self, landscape, node, ts_from, ts_to):
        """
        Return queries to use for telemetry for the specific node.

        :param landscape:
        :param node:
        :param ts_from:
        :param ts_to:
        :return:
        """

        queries = []
        self.landscape = landscape
        node_layer = InfoGraphNode.get_layer(node)
        # Service Layer metrics are not required
        #if node_layer == GRAPH_LAYER.SERVICE:
        #    return []
        for metric in self._get_metrics(node):
            try:
                query = self._build_query(metric, node, ts_from, ts_to)
                queries.append(query)
            except Exception as e:
                LOG.error('Exception for metric: {}'.format(metric))

        return queries
 def _source(self, node):
     attrs = InfoGraphNode.get_attributes(node)
     if InfoGraphNode.get_layer(node) == GRAPH_LAYER.PHYSICAL:
         if 'allocation' in attrs:
             return attrs['allocation']
         # fix due to the landscape
         else:
             while attrs.get('attributes', None):
                 attrs = attrs['attributes']
             if 'allocation' in attrs:
                 return attrs['allocation']
     if InfoGraphNode.get_type(node) == NODE_TYPE.VIRTUAL_MACHINE:
         if 'vm_name' in attrs:
             return attrs['vm_name']
     if InfoGraphNode.get_type(node) == NODE_TYPE.INSTANCE_DISK:
         # The machine is the source as this is a libvirt disk.
         disk_name = InfoGraphNode.get_name(node)
         vm = self.landscape.get_neighbour_by_type(
             disk_name, NODE_TYPE.VIRTUAL_MACHINE)
         machine = self.landscape.get_neighbour_by_type(
             vm, NODE_TYPE.PHYSICAL_MACHINE)
         return machine
     if InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_MACHINE:
         if 'name' in attrs:
             return attrs['name']
     if InfoGraphNode.get_type(node) == NODE_TYPE.DOCKER_CONTAINER:
         docker_node = self.landscape.get_neighbour_by_type(
             InfoGraphNode.get_name(node), 'docker_node')
         if docker_node:
             machine = self.landscape.get_neighbour_by_type(
                 docker_node, 'machine')
             return machine
     return None
예제 #4
0
    def get_metrics(graph, metrics='all'):
        """
        Returns all the metrics associated with the input graph
        :param graph: (NetworkX Graph) Graph to be annotated with data
        :param metrics: metric type to be considered. default = all
        :return: the list of metrics associated with the graph
        """
        metric_list = []
        for node in graph.nodes(data=True):
            node_name = InfoGraphNode.get_name(node)
            node_layer = InfoGraphNode.get_layer(node)
            node_type = InfoGraphNode.get_type(node)
            # This method supports export of either normal metrics coming
            #  from telemetry agent or utilization type of metrics.
            if metrics == 'all':
                node_telemetry_data = InfoGraphNode.get_telemetry_data(node)
            else:
                node_telemetry_data = InfoGraphNode.get_utilization(node)

            metric_list.extend([
                "{}@{}@{}@{}".format(node_name, node_layer, node_type,
                                     metric_name).replace(".", "_")
                for metric_name in node_telemetry_data.columns.values
                if metric_name != 'timestamp'
            ])
        return metric_list
    def _source_metrics(self, node):
        """
        Retrieves metrics associated with a source/host.  The source is 
        identified by the node and then all metrics types are collected for 
        that source.  If the node is physical then the metric types are 
        retrieved using just the machine name as the source, if the node is 
        virtual then the source (the vm hostname) and the stack name are 
        required. 
        """

        metric_types = []
        node_layer = InfoGraphNode.get_layer(node)
        node_type = InfoGraphNode.get_type(node)
        if node_layer == GRAPH_LAYER.PHYSICAL \
                or node_type == NODE_TYPE.INSTANCE_DISK:
            try:
                source = self._source(node)
                identifier = source
                query_tags = {"source": source}
                metric_types = self._cached_metrics(identifier, query_tags)
            except Exception as ex:
                LOG.error('Malformed graph: {}'.format(
                    InfoGraphNode.get_name(node)))
                LOG.error(ex)

        elif node_layer == GRAPH_LAYER.VIRTUAL:
            source = self._source(node)
            stack = self._stack(node)

            #LOG.info("SOURCE: {}".format(source))
            #LOG.info("STACK: {}".format(stack))

            if stack is not None:

                identifier = "{}-{}".format(source, stack)
                # query_tags = {"source": source, "stack": stack}

                query_tags = {"stack_name": stack}
                metric_types = self._cached_metrics(identifier, query_tags)
        elif node_type == NODE_TYPE.DOCKER_CONTAINER:
            source = self._source(node)
            docker_id = InfoGraphNode.get_docker_id(node)
            if docker_id is not None and source is not None:
                identifier = "{}-{}".format(source, docker_id)
                query_tags = {"docker_id": docker_id, "source": source}
                metric_types = self._cached_metrics(identifier, query_tags)

        return metric_types
예제 #6
0
    def filter_graph(graph):
        """
        Returns the graph filtered removing all the nodes with no telemetry
        """
        template_mapping = dict()

        res = graph.copy()
        for node in res.nodes(data=True):
            # for p in node[1]['attributes']:
            #     p = str(p)
            template = node[1]['attributes']['template'] \
                if 'template' in node[1]['attributes'] else None

            # If node is a service node, need to remove the template
            if template:
                template_mapping[InfoGraphNode.get_name(node)] = template
                node[1]['attributes'].pop('template')

            # Fix format for conversion to JSON (happening in analytics)
            node[1]['attributes'] = \
                str(misc.convert_unicode_dict_to_string(node[1]['attributes'])).\
                    replace("'", '"')

        for node in res.nodes(data=True):
            node_name = InfoGraphNode.get_name(node)
            telemetry = InfoGraphNode.get_telemetry_data(node)
            layer = InfoGraphNode.get_layer(node)
            # if len(telemetry.columns.values) <= 1:

            if len(telemetry.columns) <= 1 and \
                    not layer == InfoGraphNodeLayer.SERVICE:
                InfoGraphNode.set_telemetry_data(node, dict())
                res.filter_nodes('node_name', node_name)

        # Convert attributes back to dict()
        for node in res.nodes(data=True):
            string = InfoGraphNode.get_attributes(node)
            attrs = InfoGraphUtilities.str_to_dict(string)
            if InfoGraphNode.get_type(node) == \
                    InfoGraphNodeType.SERVICE_COMPUTE:
                attrs['template'] = \
                    template_mapping[InfoGraphNode.get_name(node)]
            InfoGraphNode.set_attributes(node, attrs)
        return res
 def _source(self, node):
     attrs = InfoGraphNode.get_attributes(node)
     if InfoGraphNode.get_layer(node) == GRAPH_LAYER.PHYSICAL:
         if 'allocation' in attrs:
             return attrs['allocation']
     if InfoGraphNode.get_type(node) == NODE_TYPE.VIRTUAL_MACHINE:
         if 'vm_name' in attrs:
             return attrs['vm_name']
         elif 'name' in attrs:
             return attrs['name']
     if InfoGraphNode.get_type(node) == NODE_TYPE.INSTANCE_DISK:
         # The machine is the source as this is a libvirt disk.
         disk_name = InfoGraphNode.get_name(node)
         vm = self.landscape.get_neighbour_by_type(
             disk_name, NODE_TYPE.VIRTUAL_MACHINE)
         machine = self.landscape.get_neighbour_by_type(
             vm, NODE_TYPE.PHYSICAL_MACHINE)
         return machine
     if InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_MACHINE:
         if 'name' in attrs:
             return attrs['name']
     return None
    def get_queries(self, graph, node, ts_from, ts_to):
        """
        :param graph:
        :param node:
        :param ts_from:
        :param ts_to:
        :return:
        """
        node_name = InfoGraphNode.get_name(node)
        node_layer = InfoGraphNode.get_layer(node)
        queries = list()
        # No point to ask for Service Resources
        if node_layer == InfoGraphNodeLayer.SERVICE:
            return queries

        for metric in self._get_metrics(node):
            try:
                query = self._build_query(metric, node, ts_from, ts_to)
            except Exception as e:
                LOG.error('Exception for metric: {}'.format(metric))
            queries.append({"{}_{}".format(metric, node_name): query})

        return queries
예제 #9
0
    def workload(nodes):
        """
        This is a type of fingerprint from the infrastructure perspective
        """
        # TODO: Validate graph
        data = dict()
        statistics = dict()
        compute = InfoGraphNodeCategory.COMPUTE
        data[compute] = pandas.DataFrame()
        statistics[compute] = {
            'mean': 0,
            'median': 0,
            'min': 0,
            'max': 0,
            'var': 0,
            'std_dev': 0
        }
        network = InfoGraphNodeCategory.NETWORK
        data[network] = pandas.DataFrame()
        statistics[network] = {
            'mean': 0,
            'median': 0,
            'min': 0,
            'max': 0,
            'var': 0,
            'std_dev': 0
        }
        storage = InfoGraphNodeCategory.STORAGE
        data[storage] = pandas.DataFrame()
        statistics[storage] = {
            'mean': 0,
            'median': 0,
            'min': 0,
            'max': 0,
            'var': 0,
            'std_dev': 0
        }
        memory = InfoGraphNodeCategory.MEMORY
        data[memory] = pandas.DataFrame()
        statistics[memory] = {
            'mean': 0,
            'median': 0,
            'min': 0,
            'max': 0,
            'var': 0,
            'std_dev': 0
        }

        # Calculation of the fingerprint on top of the virtual resources
        for node in nodes:
            layer = InfoGraphNode.get_layer(node)
            is_machine = InfoGraphNode.node_is_machine(node)
            if is_machine:
                continue
            if layer == InfoGraphNodeLayer.PHYSICAL:
                continue
            if layer == InfoGraphNodeLayer.SERVICE:
                continue

        for category in statistics:
            mean = data[category]['utilization'].mean()
            median = 0
            min = 0
            maximum = 0
            var = 0
            std_dev = 0
            statistics[category] = \
                {'mean': mean,
                 'median': median,
                 'min': min,
                 'max': maximum,
                 'var': var,
                 'std_dev': std_dev}

        return [data, statistics]
예제 #10
0
    def compute_node_resources(annotated_subgraph, hostname=None):
        """
        This is a type of fingerprint from the infrastructure perspective
        """
        # TODO: Validate graph
        data = dict()
        statistics = dict()

        # Calculation of the fingerprint on top of the virtual resources
        local_subgraph = annotated_subgraph.copy()

        for node in local_subgraph.nodes(data=True):
            layer = InfoGraphNode.get_layer(node)
            if layer == InfoGraphNodeLayer.VIRTUAL:
                continue
            if layer == InfoGraphNodeLayer.SERVICE:
                continue
            type = InfoGraphNode.get_type(node)
            if type == 'core':
                continue

            # If hostname has been specified, need to take into account only
            # nodes that are related to the specific host
            attrs = InfoGraphNode.get_attributes(node)
            allocation = attrs['allocation'] if 'allocation' in attrs \
                else None
            if hostname and not hostname == allocation:
                continue

            name = InfoGraphNode.get_name(node)
            statistics[name] = {
                'mean': 0,
                'median': 0,
                'min': 0,
                'max': 0,
                'var': 0,
                'std_dev': 0
            }
            utilization = InfoGraphNode.get_utilization(node)
            try:
                utilization = utilization.drop('timestamp', 1)
            except ValueError:
                utilization = InfoGraphNode.get_utilization(node)
            data[name] = utilization

            if not data[name].empty:
                mean = data[name]['utilization'].mean()
                median = (data[name]['utilization']).median()
                min = data[name]['utilization'].min()
                maximum = data[name]['utilization'].max()
                var = data[name]['utilization'].var()
                std_dev = math.sqrt(var)
            else:
                mean = 0
                median = 0
                min = 0
                maximum = 0
                var = 0
                std_dev = 0
            statistics[name] = \
                {'mean': mean,
                 'median': median,
                 'min': min,
                 'max': maximum,
                 'var': var,
                 'std_dev': std_dev}

        return [data, statistics]
예제 #11
0
    def compute_node(annotated_subgraph, hostname=None):
        """
        This is a type of fingerprint from the infrastructure perspective
        """
        # TODO: Validate graph
        data = dict()
        statistics = dict()
        compute = InfoGraphNodeCategory.COMPUTE
        data[compute] = pandas.DataFrame()
        statistics[compute] = {
            'mean': 0,
            'median': 0,
            'min': 0,
            'max': 0,
            'var': 0,
            'std_dev': 0
        }
        network = InfoGraphNodeCategory.NETWORK
        data[network] = pandas.DataFrame()
        statistics[network] = {
            'mean': 0,
            'median': 0,
            'min': 0,
            'max': 0,
            'var': 0,
            'std_dev': 0
        }
        storage = InfoGraphNodeCategory.STORAGE
        data[storage] = pandas.DataFrame()
        statistics[storage] = {
            'mean': 0,
            'median': 0,
            'min': 0,
            'max': 0,
            'var': 0,
            'std_dev': 0
        }
        memory = InfoGraphNodeCategory.MEMORY
        data[memory] = pandas.DataFrame()
        statistics[memory] = {
            'mean': 0,
            'median': 0,
            'min': 0,
            'max': 0,
            'var': 0,
            'std_dev': 0
        }

        # Calculation of the fingerprint on top of the virtual resources
        local_subgraph = annotated_subgraph.copy()

        for node in local_subgraph.nodes(data=True):
            layer = InfoGraphNode.get_layer(node)
            is_machine = InfoGraphNode.node_is_machine(node)
            if is_machine:
                continue
            if layer == InfoGraphNodeLayer.VIRTUAL:
                continue
            if layer == InfoGraphNodeLayer.SERVICE:
                continue
            # If hostname has been specified, need to take into account only
            # nodes that are related to the specific host
            attrs = InfoGraphNode.get_attributes(node)
            allocation = attrs['allocation'] if 'allocation' in attrs \
                else None
            if hostname and not hostname == allocation:
                continue

            category = InfoGraphNode.get_category(node)
            utilization = InfoGraphNode.get_utilization(node)
            try:
                utilization = utilization.drop('timestamp', 1)
            except ValueError:
                utilization = InfoGraphNode.get_utilization(node)
            data[category] = pandas.concat([data[category], utilization])

        for category in statistics:
            if not data[category].empty:
                mean = data[category]['utilization'].mean()
                median = (data[category]['utilization']).median()
                min = data[category]['utilization'].min()
                maximum = data[category]['utilization'].max()
                var = data[category]['utilization'].var()
                std_dev = math.sqrt(var)
            else:
                mean = 0
                median = 0
                min = 0
                maximum = 0
                var = 0
                std_dev = 0
            statistics[category] = \
                {'mean': mean,
                 'median': median,
                 'min': min,
                 'max': maximum,
                 'var': var,
                 'std_dev': std_dev}

        return [data, statistics]