예제 #1
0
    def _create_pandas_data_frame_from_graph(graph, metrics='all'):
        """
        Save on csv files the data in the graph.
        Stores one csv per node of the graph

        :param graph: (NetworkX Graph) Graph to be annotated with data
        :param directory: (str) directory where to store csv files
        :return: NetworkX Graph annotated with telemetry data
        """
        result = pandas.DataFrame()
        for node in graph.nodes(data=True):
            node_name = InfoGraphNode.get_name(node)
            node_layer = InfoGraphNode.get_layer(node)
            node_type = InfoGraphNode.get_type(node)

            # This method supports export of either normal metrics coming
            #  from telemetry agent or utilization type of metrics.
            if metrics == 'all':
                node_telemetry_data = InfoGraphNode.get_telemetry_data(node)
            else:
                node_telemetry_data = InfoGraphNode.get_utilization(node)
            # df = node_telemetry_data.copy()

            # LOG.info("Node Name: {} -- Telemetry: {}".format(
            #     InfoGraphNode.get_name(node),
            #     InfoGraphNode.get_telemetry_data(node).columns.values
            # ))

            node_telemetry_data['timestamp'] = node_telemetry_data[
                'timestamp'].astype(float)
            node_telemetry_data['timestamp'] = node_telemetry_data[
                'timestamp'].round()
            node_telemetry_data['timestamp'] = node_telemetry_data[
                'timestamp'].astype(int)
            for metric_name in node_telemetry_data.columns.values:
                if metric_name == 'timestamp':
                    continue
                col_name = "{}@{}@{}@{}".\
                    format(node_name, node_layer, node_type, metric_name)
                col_name = col_name.replace(".", "_")
                node_telemetry_data = node_telemetry_data.rename(
                    columns={metric_name: col_name})

                # LOG.info("TELEMETRIA: {}".format(node_telemetry_data.columns.values))

            if node_telemetry_data.empty or len(
                    node_telemetry_data.columns) <= 1:
                continue
            if result.empty:
                result = node_telemetry_data.copy()
            else:
                node_telemetry_data = \
                    node_telemetry_data.drop_duplicates(subset='timestamp')
                result = pandas.merge(result,
                                      node_telemetry_data,
                                      how='outer',
                                      on='timestamp')
            # TODO: Try with this removed
            # result.set_index(['timestamp'])
        return result
예제 #2
0
    def get_metrics(graph, metrics='all'):
        """
        Returns all the metrics associated with the input graph
        :param graph: (NetworkX Graph) Graph to be annotated with data
        :param metrics: metric type to be considered. default = all
        :return: the list of metrics associated with the graph
        """
        metric_list = []
        for node in graph.nodes(data=True):
            node_name = InfoGraphNode.get_name(node)
            node_layer = InfoGraphNode.get_layer(node)
            node_type = InfoGraphNode.get_type(node)
            # This method supports export of either normal metrics coming
            #  from telemetry agent or utilization type of metrics.
            if metrics == 'all':
                node_telemetry_data = InfoGraphNode.get_telemetry_data(node)
            else:
                node_telemetry_data = InfoGraphNode.get_utilization(node)

            metric_list.extend([
                "{}@{}@{}@{}".format(node_name, node_layer, node_type,
                                     metric_name).replace(".", "_")
                for metric_name in node_telemetry_data.columns.values
                if metric_name != 'timestamp'
            ])
        return metric_list
예제 #3
0
    def get_correlation(node_a, node_b, metric_a, metric_b):
        # TODO: Add node validation
        # InfoGraphNode.validateNode(node_a)
        # InfoGraphNode.validateNode(node_b)

        node_name_a = InfoGraphNode.get_name(node_a)
        node_name_b = InfoGraphNode.get_name(node_b)

        if metric_a == 'utilization':
            telemetry_a = InfoGraphNode.get_utilization(node_a)
        else:
            telemetry_a = InfoGraphNode.get_telemetry_data(node_a)

        if metric_b == 'utilization':
            telemetry_b = InfoGraphNode.get_utilization(node_b)
        else:
            telemetry_b = InfoGraphNode.get_telemetry_data(node_b)

        if metric_a not in telemetry_a.columns.values:
            raise ValueError(
                "Metric {} is not in Telemetry data of Node {}".format(
                    metric_a, node_name_a))
        if metric_b not in telemetry_b.columns.values:
            raise ValueError(
                "Metric {} is not in Telemetry data of Node {}".format(
                    metric_b, node_name_b))
        if telemetry_a.empty and telemetry_b.empty:
            return 0

        res = telemetry_a.corrwith(telemetry_b)

        df_a = telemetry_a.\
            rename(columns={metric_a: "a-{}".format(metric_a)}).astype(float)
        df_b = telemetry_b.\
            rename(columns={metric_b: "b-{}".format(metric_b)}).astype(float)
        correlation = pandas.merge(df_a, df_b, how='outer', on='timestamp')
        correlation = correlation.dropna()
        res = correlation["a-{}".format(metric_a)].\
            corr(correlation["b-{}".format(metric_b)])
        return res
예제 #4
0
    def filter_graph(graph):
        """
        Returns the graph filtered removing all the nodes with no telemetry
        """
        template_mapping = dict()

        res = graph.copy()
        for node in res.nodes(data=True):
            # for p in node[1]['attributes']:
            #     p = str(p)
            template = node[1]['attributes']['template'] \
                if 'template' in node[1]['attributes'] else None

            # If node is a service node, need to remove the template
            if template:
                template_mapping[InfoGraphNode.get_name(node)] = template
                node[1]['attributes'].pop('template')

            # Fix format for conversion to JSON (happening in analytics)
            node[1]['attributes'] = \
                str(misc.convert_unicode_dict_to_string(node[1]['attributes'])).\
                    replace("'", '"')

        for node in res.nodes(data=True):
            node_name = InfoGraphNode.get_name(node)
            telemetry = InfoGraphNode.get_telemetry_data(node)
            layer = InfoGraphNode.get_layer(node)
            # if len(telemetry.columns.values) <= 1:

            if len(telemetry.columns) <= 1 and \
                    not layer == InfoGraphNodeLayer.SERVICE:
                InfoGraphNode.set_telemetry_data(node, dict())
                res.filter_nodes('node_name', node_name)

        # Convert attributes back to dict()
        for node in res.nodes(data=True):
            string = InfoGraphNode.get_attributes(node)
            attrs = InfoGraphUtilities.str_to_dict(string)
            if InfoGraphNode.get_type(node) == \
                    InfoGraphNodeType.SERVICE_COMPUTE:
                attrs['template'] = \
                    template_mapping[InfoGraphNode.get_name(node)]
            InfoGraphNode.set_attributes(node, attrs)
        return res
    def run(self, workload):

        tmp_path = "/media/iolie/WORK/data/"

        # Extract data from Info Core
        service_subgraphs = workload.get_latest_graph()
        telemetry = {}
        cols = []
        if not service_subgraphs or len(service_subgraphs) == 0:
            return
        # first add telemetry data of all nodes to a dictionary
        print "Data merger started " + str(time.time())
        for subgraph in service_subgraphs:
            for node in subgraph.nodes(data=True):
                node_id = node[0]
                node_tm = InfoGraphNode.get_telemetry_data(node)
                if InfoGraphNode.node_is_vm(node):
                    if not node_tm.empty:
                        node_tm.columns = tm_utils.clean_vm_telemetry_colnames(node_tm.columns)
                        vm_name = InfoGraphNode.get_attributes(node).get("vm_name")
                        if vm_name:
                            node_id = vm_name
                if not node_tm.empty:
                    tm = telemetry.get(node_id)
                    if not isinstance(tm, pd.DataFrame):
                    #if not tm:
                        telemetry[node_id] = node_tm
                        #telemetry[node_id] = [node_tm]
                        #node_tm.to_csv(tmp_path+node_id, index=False)
                    else:
                        telemetry[node_id] = pd.concat([tm, node_tm])
                        #telemetry[node_id].append(node_tm)
                        #node_tm.to_csv(tmp_path + node_id, mode='a', header=False, index=False)
                InfoGraphNode.set_telemetry_data(node, pd.DataFrame())
        print "Data merger finished " + str(time.time())
        print telemetry.keys()
        print len(telemetry)
        # merge subgraphs
        graph = None
        counter = 0
        for subgraph in service_subgraphs:
            counter = counter + 1
            if not graph and len(subgraph.nodes()) > 0:
                graph = subgraph
            elif len(subgraph.nodes()) > 0:
                graphs.merge_graph(graph, subgraph)
            #print "Merged {} subgraphs out of {} subgraphs in all".format(counter, len(service_subgraphs))
        # merge telemetry data

        #for key in telemetry.keys():
        #    val = telemetry[key]
            # print key + ' {}'.format(len(val))
        #    if len(val) > 1:
        #        telemetry[key] = pd.concat(val)
        #    elif len(val) == 1:
        #        telemetry[key] = val[0]
            # print node_id + ', ' + str(time.time())
            # print "Merged telemetry data of {} nodes out of {} nodes in all".format(counter, len(telemetry.keys()))

        # set telemetry data on merged graph
        for node in graph.nodes(data=True):
            node_id = node[0]
            if InfoGraphNode.node_is_vm(node):
                vm_name = InfoGraphNode.get_attributes(node).get("vm_name")
                if vm_name:
                    node_id = vm_name
            tm = telemetry.get(node_id)
            #try:
            #    tm = pd.read_csv(tmp_path + node_id)
            #except:
            #    tm = pd.DataFrame()
            if isinstance(tm, pd.DataFrame):
                if not tm.empty:
                    InfoGraphNode.set_telemetry_data(node, tm)
                    del telemetry[node_id]  # delete telemetry data so that only one copy exists in the graph
            else:
                InfoGraphNode.set_telemetry_data(node, None)
            # print "Set telemetry data of node {}".format(node_id)
        print "Set telemetry data of merged graph"
        workload.save_results(self.__filter_name__, graph)
        return graph