def _create_pandas_data_frame_from_graph(graph, metrics='all'): """ Save on csv files the data in the graph. Stores one csv per node of the graph :param graph: (NetworkX Graph) Graph to be annotated with data :param directory: (str) directory where to store csv files :return: NetworkX Graph annotated with telemetry data """ result = pandas.DataFrame() for node in graph.nodes(data=True): node_name = InfoGraphNode.get_name(node) node_layer = InfoGraphNode.get_layer(node) node_type = InfoGraphNode.get_type(node) # This method supports export of either normal metrics coming # from telemetry agent or utilization type of metrics. if metrics == 'all': node_telemetry_data = InfoGraphNode.get_telemetry_data(node) else: node_telemetry_data = InfoGraphNode.get_utilization(node) # df = node_telemetry_data.copy() # LOG.info("Node Name: {} -- Telemetry: {}".format( # InfoGraphNode.get_name(node), # InfoGraphNode.get_telemetry_data(node).columns.values # )) node_telemetry_data['timestamp'] = node_telemetry_data[ 'timestamp'].astype(float) node_telemetry_data['timestamp'] = node_telemetry_data[ 'timestamp'].round() node_telemetry_data['timestamp'] = node_telemetry_data[ 'timestamp'].astype(int) for metric_name in node_telemetry_data.columns.values: if metric_name == 'timestamp': continue col_name = "{}@{}@{}@{}".\ format(node_name, node_layer, node_type, metric_name) col_name = col_name.replace(".", "_") node_telemetry_data = node_telemetry_data.rename( columns={metric_name: col_name}) # LOG.info("TELEMETRIA: {}".format(node_telemetry_data.columns.values)) if node_telemetry_data.empty or len( node_telemetry_data.columns) <= 1: continue if result.empty: result = node_telemetry_data.copy() else: node_telemetry_data = \ node_telemetry_data.drop_duplicates(subset='timestamp') result = pandas.merge(result, node_telemetry_data, how='outer', on='timestamp') # TODO: Try with this removed # result.set_index(['timestamp']) return result
def get_metrics(graph, metrics='all'): """ Returns all the metrics associated with the input graph :param graph: (NetworkX Graph) Graph to be annotated with data :param metrics: metric type to be considered. default = all :return: the list of metrics associated with the graph """ metric_list = [] for node in graph.nodes(data=True): node_name = InfoGraphNode.get_name(node) node_layer = InfoGraphNode.get_layer(node) node_type = InfoGraphNode.get_type(node) # This method supports export of either normal metrics coming # from telemetry agent or utilization type of metrics. if metrics == 'all': node_telemetry_data = InfoGraphNode.get_telemetry_data(node) else: node_telemetry_data = InfoGraphNode.get_utilization(node) metric_list.extend([ "{}@{}@{}@{}".format(node_name, node_layer, node_type, metric_name).replace(".", "_") for metric_name in node_telemetry_data.columns.values if metric_name != 'timestamp' ]) return metric_list
def get_correlation(node_a, node_b, metric_a, metric_b): # TODO: Add node validation # InfoGraphNode.validateNode(node_a) # InfoGraphNode.validateNode(node_b) node_name_a = InfoGraphNode.get_name(node_a) node_name_b = InfoGraphNode.get_name(node_b) if metric_a == 'utilization': telemetry_a = InfoGraphNode.get_utilization(node_a) else: telemetry_a = InfoGraphNode.get_telemetry_data(node_a) if metric_b == 'utilization': telemetry_b = InfoGraphNode.get_utilization(node_b) else: telemetry_b = InfoGraphNode.get_telemetry_data(node_b) if metric_a not in telemetry_a.columns.values: raise ValueError( "Metric {} is not in Telemetry data of Node {}".format( metric_a, node_name_a)) if metric_b not in telemetry_b.columns.values: raise ValueError( "Metric {} is not in Telemetry data of Node {}".format( metric_b, node_name_b)) if telemetry_a.empty and telemetry_b.empty: return 0 res = telemetry_a.corrwith(telemetry_b) df_a = telemetry_a.\ rename(columns={metric_a: "a-{}".format(metric_a)}).astype(float) df_b = telemetry_b.\ rename(columns={metric_b: "b-{}".format(metric_b)}).astype(float) correlation = pandas.merge(df_a, df_b, how='outer', on='timestamp') correlation = correlation.dropna() res = correlation["a-{}".format(metric_a)].\ corr(correlation["b-{}".format(metric_b)]) return res
def filter_graph(graph): """ Returns the graph filtered removing all the nodes with no telemetry """ template_mapping = dict() res = graph.copy() for node in res.nodes(data=True): # for p in node[1]['attributes']: # p = str(p) template = node[1]['attributes']['template'] \ if 'template' in node[1]['attributes'] else None # If node is a service node, need to remove the template if template: template_mapping[InfoGraphNode.get_name(node)] = template node[1]['attributes'].pop('template') # Fix format for conversion to JSON (happening in analytics) node[1]['attributes'] = \ str(misc.convert_unicode_dict_to_string(node[1]['attributes'])).\ replace("'", '"') for node in res.nodes(data=True): node_name = InfoGraphNode.get_name(node) telemetry = InfoGraphNode.get_telemetry_data(node) layer = InfoGraphNode.get_layer(node) # if len(telemetry.columns.values) <= 1: if len(telemetry.columns) <= 1 and \ not layer == InfoGraphNodeLayer.SERVICE: InfoGraphNode.set_telemetry_data(node, dict()) res.filter_nodes('node_name', node_name) # Convert attributes back to dict() for node in res.nodes(data=True): string = InfoGraphNode.get_attributes(node) attrs = InfoGraphUtilities.str_to_dict(string) if InfoGraphNode.get_type(node) == \ InfoGraphNodeType.SERVICE_COMPUTE: attrs['template'] = \ template_mapping[InfoGraphNode.get_name(node)] InfoGraphNode.set_attributes(node, attrs) return res
def run(self, workload): tmp_path = "/media/iolie/WORK/data/" # Extract data from Info Core service_subgraphs = workload.get_latest_graph() telemetry = {} cols = [] if not service_subgraphs or len(service_subgraphs) == 0: return # first add telemetry data of all nodes to a dictionary print "Data merger started " + str(time.time()) for subgraph in service_subgraphs: for node in subgraph.nodes(data=True): node_id = node[0] node_tm = InfoGraphNode.get_telemetry_data(node) if InfoGraphNode.node_is_vm(node): if not node_tm.empty: node_tm.columns = tm_utils.clean_vm_telemetry_colnames(node_tm.columns) vm_name = InfoGraphNode.get_attributes(node).get("vm_name") if vm_name: node_id = vm_name if not node_tm.empty: tm = telemetry.get(node_id) if not isinstance(tm, pd.DataFrame): #if not tm: telemetry[node_id] = node_tm #telemetry[node_id] = [node_tm] #node_tm.to_csv(tmp_path+node_id, index=False) else: telemetry[node_id] = pd.concat([tm, node_tm]) #telemetry[node_id].append(node_tm) #node_tm.to_csv(tmp_path + node_id, mode='a', header=False, index=False) InfoGraphNode.set_telemetry_data(node, pd.DataFrame()) print "Data merger finished " + str(time.time()) print telemetry.keys() print len(telemetry) # merge subgraphs graph = None counter = 0 for subgraph in service_subgraphs: counter = counter + 1 if not graph and len(subgraph.nodes()) > 0: graph = subgraph elif len(subgraph.nodes()) > 0: graphs.merge_graph(graph, subgraph) #print "Merged {} subgraphs out of {} subgraphs in all".format(counter, len(service_subgraphs)) # merge telemetry data #for key in telemetry.keys(): # val = telemetry[key] # print key + ' {}'.format(len(val)) # if len(val) > 1: # telemetry[key] = pd.concat(val) # elif len(val) == 1: # telemetry[key] = val[0] # print node_id + ', ' + str(time.time()) # print "Merged telemetry data of {} nodes out of {} nodes in all".format(counter, len(telemetry.keys())) # set telemetry data on merged graph for node in graph.nodes(data=True): node_id = node[0] if InfoGraphNode.node_is_vm(node): vm_name = InfoGraphNode.get_attributes(node).get("vm_name") if vm_name: node_id = vm_name tm = telemetry.get(node_id) #try: # tm = pd.read_csv(tmp_path + node_id) #except: # tm = pd.DataFrame() if isinstance(tm, pd.DataFrame): if not tm.empty: InfoGraphNode.set_telemetry_data(node, tm) del telemetry[node_id] # delete telemetry data so that only one copy exists in the graph else: InfoGraphNode.set_telemetry_data(node, None) # print "Set telemetry data of node {}".format(node_id) print "Set telemetry data of merged graph" workload.save_results(self.__filter_name__, graph) return graph