def get_metrics(graph, metrics='all'): """ Returns all the metrics associated with the input graph :param graph: (NetworkX Graph) Graph to be annotated with data :param metrics: metric type to be considered. default = all :return: the list of metrics associated with the graph """ metric_list = [] for node in graph.nodes(data=True): node_name = InfoGraphNode.get_name(node) node_layer = InfoGraphNode.get_layer(node) node_type = InfoGraphNode.get_type(node) # This method supports export of either normal metrics coming # from telemetry agent or utilization type of metrics. if metrics == 'all': node_telemetry_data = InfoGraphNode.get_telemetry_data(node) else: node_telemetry_data = InfoGraphNode.get_utilization(node) metric_list.extend([ "{}@{}@{}@{}".format(node_name, node_layer, node_type, metric_name).replace(".", "_") for metric_name in node_telemetry_data.columns.values if metric_name != 'timestamp' ]) return metric_list
def _create_pandas_data_frame_from_graph(graph, metrics='all'): """ Save on csv files the data in the graph. Stores one csv per node of the graph :param graph: (NetworkX Graph) Graph to be annotated with data :param directory: (str) directory where to store csv files :return: NetworkX Graph annotated with telemetry data """ result = pandas.DataFrame() for node in graph.nodes(data=True): node_name = InfoGraphNode.get_name(node) node_layer = InfoGraphNode.get_layer(node) node_type = InfoGraphNode.get_type(node) # This method supports export of either normal metrics coming # from telemetry agent or utilization type of metrics. if metrics == 'all': node_telemetry_data = InfoGraphNode.get_telemetry_data(node) else: node_telemetry_data = InfoGraphNode.get_utilization(node) # df = node_telemetry_data.copy() # LOG.info("Node Name: {} -- Telemetry: {}".format( # InfoGraphNode.get_name(node), # InfoGraphNode.get_telemetry_data(node).columns.values # )) node_telemetry_data['timestamp'] = node_telemetry_data[ 'timestamp'].astype(float) node_telemetry_data['timestamp'] = node_telemetry_data[ 'timestamp'].round() node_telemetry_data['timestamp'] = node_telemetry_data[ 'timestamp'].astype(int) for metric_name in node_telemetry_data.columns.values: if metric_name == 'timestamp': continue col_name = "{}@{}@{}@{}".\ format(node_name, node_layer, node_type, metric_name) col_name = col_name.replace(".", "_") node_telemetry_data = node_telemetry_data.rename( columns={metric_name: col_name}) # LOG.info("TELEMETRIA: {}".format(node_telemetry_data.columns.values)) if node_telemetry_data.empty or len( node_telemetry_data.columns) <= 1: continue if result.empty: result = node_telemetry_data.copy() else: node_telemetry_data = \ node_telemetry_data.drop_duplicates(subset='timestamp') result = pandas.merge(result, node_telemetry_data, how='outer', on='timestamp') # TODO: Try with this removed # result.set_index(['timestamp']) return result
def get_correlation(node_a, node_b, metric_a, metric_b): # TODO: Add node validation # InfoGraphNode.validateNode(node_a) # InfoGraphNode.validateNode(node_b) node_name_a = InfoGraphNode.get_name(node_a) node_name_b = InfoGraphNode.get_name(node_b) if metric_a == 'utilization': telemetry_a = InfoGraphNode.get_utilization(node_a) else: telemetry_a = InfoGraphNode.get_telemetry_data(node_a) if metric_b == 'utilization': telemetry_b = InfoGraphNode.get_utilization(node_b) else: telemetry_b = InfoGraphNode.get_telemetry_data(node_b) if metric_a not in telemetry_a.columns.values: raise ValueError( "Metric {} is not in Telemetry data of Node {}".format( metric_a, node_name_a)) if metric_b not in telemetry_b.columns.values: raise ValueError( "Metric {} is not in Telemetry data of Node {}".format( metric_b, node_name_b)) if telemetry_a.empty and telemetry_b.empty: return 0 res = telemetry_a.corrwith(telemetry_b) df_a = telemetry_a.\ rename(columns={metric_a: "a-{}".format(metric_a)}).astype(float) df_b = telemetry_b.\ rename(columns={metric_b: "b-{}".format(metric_b)}).astype(float) correlation = pandas.merge(df_a, df_b, how='outer', on='timestamp') correlation = correlation.dropna() res = correlation["a-{}".format(metric_a)].\ corr(correlation["b-{}".format(metric_b)]) return res
def machine_capacity_usage(annotated_subgraph): """ This is a type of fingerprint from the infrastructure perspective """ # TODO: Validate graph categories = list() categories.append(InfoGraphNodeCategory.COMPUTE) categories.append(InfoGraphNodeCategory.NETWORK) # TODO: Add a Volume to the workloads to get HD usage categories.append(InfoGraphNodeCategory.STORAGE) # TODO: Get telemetry for Memory categories.append(InfoGraphNodeCategory.MEMORY) fingerprint = dict() counter = dict() for category in categories: fingerprint[category] = 0 counter[category] = 0 # calculation of the fingerprint on top of the virtual resources local_subgraph = annotated_subgraph.copy() local_subgraph.filter_nodes('layer', "virtual") local_subgraph.filter_nodes('layer', "service") local_subgraph.filter_nodes('type', 'machine') for node in local_subgraph.nodes(data=True): # if Fingerprint._node_is_nic_on_management_net( # node, annotated_subgraph, mng_net_name): # continue name = InfoGraphNode.get_name(node) category = InfoGraphNode.get_category(node) utilization = InfoGraphNode.get_utilization(node) if 'utilization' in utilization.columns.values: # LOG.info("NODE: {} - CATEGORY: {}".format(name, category)) mean = utilization['utilization'].mean() fingerprint[category] += mean counter[category] += 1 # This is just an average # TODO: Improve the average for category in categories: if counter[category] > 0: fingerprint[category] = \ fingerprint[category] / counter[category] return fingerprint
def utilization_scores(graph): """ Returns a dictionary with the scores of all the nodes of the graph. :param graph: InfoGraph :return: dict[node_name] = score """ res = dict() for node in graph.nodes(data=True): node_name = InfoGraphNode.get_name(node) res[node_name] = dict() util = InfoGraphNode.get_utilization(node) import analytics_engine.common as common LOG = common.LOG res[node_name]['compute'] = 0 res[node_name]['disk'] = 0 res[node_name]['network'] = 0 res[node_name]['memory'] = 0 if (isinstance(util, pandas.DataFrame) and util.empty) or \ (not isinstance(util, pandas.DataFrame) and util==None): continue # intel/use/ if 'intel/use/compute/utilization' in util: res[node_name]['compute'] = ( util.get('intel/use/compute/utilization').mean()) / 100.0 elif 'intel/procfs/cpu/utilization_percentage' in util: res[node_name]['compute'] = (util.get( 'intel/procfs/cpu/utilization_percentage').mean()) / 100.0 if 'intel/use/memory/utilization' in util: res[node_name]['memory'] = ( util.get('intel/use/memory/utilization').mean()) / 100.0 elif 'intel/procfs/memory/utilization_percentage' in util: res[node_name]['memory'] = ( util.get('intel/procfs/memory/utilization_percentage' ).mean()) / 100.0 if 'intel/use/disk/utilization' in util: res[node_name]['disk'] = ( util.get('intel/use/disk/utilization').mean()) / 100.0 elif 'intel/procfs/disk/utilization_percentage' in util: res[node_name]['disk'] = (util.get( 'intel/procfs/disk/utilization_percentage').mean()) / 100.0 if 'intel/use/network/utilization' in util: res[node_name]['network'] = ( util.get('intel/use/network/utilization').mean()) / 100.0 elif 'intel/psutil/net/utilization_percentage' in util: res[node_name]['network'] = (util.get( 'intel/psutil/net/utilization_percentage').mean()) / 100.0 # special handling of cpu, disk & network utilization if node is a machine if InfoGraphNode.node_is_machine(node): # mean from all cpu columns cpu_util = InfoGraphNode.get_compute_utilization(node) cpu_util['total'] = [ sum(row) / len(row) for index, row in cpu_util.iterrows() ] res[node_name]['compute'] = cpu_util['total'].mean() / 100 # mean from all disk columns disk_util = InfoGraphNode.get_disk_utilization(node) if disk_util.empty: res[node_name]['disk'] = 0.0 else: disk_util['total'] = [ sum(row) / len(row) for index, row in disk_util.iterrows() ] res[node_name]['disk'] = disk_util['total'].mean() / 100 # mean from all nic columns net_util = InfoGraphNode.get_network_utilization(node) if net_util.empty: res[node_name]['network'] = 0.0 else: net_util['total'] = [ sum(row) / len(row) for index, row in net_util.iterrows() ] res[node_name]['network'] = net_util['total'].mean() / 100 # custom metric if InfoGraphNode.get_type( node) == InfoGraphNodeType.DOCKER_CONTAINER: node_name = InfoGraphNode.get_docker_id(node) res[node_name] = {} if 'intel/docker/stats/cgroups/cpu_stats/cpu_usage/percentage' in util.columns: res[node_name]['compute'] = util[ 'intel/docker/stats/cgroups/cpu_stats/cpu_usage/percentage'].mean( ) / 100 else: res[node_name]['compute'] = 0 if 'intel/docker/stats/cgroups/memory_stats/usage/percentage' in util.columns: res[node_name]['memory'] = util[ 'intel/docker/stats/cgroups/memory_stats/usage/percentage'].mean( ) / 100 else: res[node_name]['memory'] = 0 if 'intel/docker/stats/network/utilization_percentage' in util.columns: res[node_name]['network'] = util[ 'intel/docker/stats/network/utilization_percentage'].mean( ) / 100 else: res[node_name]['network'] = 0 if 'intel/docker/stats/cgroups/blkio_stats/io_time_recursive/percentage' in util.columns: res[node_name]['disk'] = util[ 'intel/docker/stats/cgroups/blkio_stats/io_time_recursive/percentage'].mean( ) / 100 else: res[node_name]['disk'] = 0 return res
def compute_node_resources(annotated_subgraph, hostname=None): """ This is a type of fingerprint from the infrastructure perspective """ # TODO: Validate graph data = dict() statistics = dict() # Calculation of the fingerprint on top of the virtual resources local_subgraph = annotated_subgraph.copy() for node in local_subgraph.nodes(data=True): layer = InfoGraphNode.get_layer(node) if layer == InfoGraphNodeLayer.VIRTUAL: continue if layer == InfoGraphNodeLayer.SERVICE: continue type = InfoGraphNode.get_type(node) if type == 'core': continue # If hostname has been specified, need to take into account only # nodes that are related to the specific host attrs = InfoGraphNode.get_attributes(node) allocation = attrs['allocation'] if 'allocation' in attrs \ else None if hostname and not hostname == allocation: continue name = InfoGraphNode.get_name(node) statistics[name] = { 'mean': 0, 'median': 0, 'min': 0, 'max': 0, 'var': 0, 'std_dev': 0 } utilization = InfoGraphNode.get_utilization(node) try: utilization = utilization.drop('timestamp', 1) except ValueError: utilization = InfoGraphNode.get_utilization(node) data[name] = utilization if not data[name].empty: mean = data[name]['utilization'].mean() median = (data[name]['utilization']).median() min = data[name]['utilization'].min() maximum = data[name]['utilization'].max() var = data[name]['utilization'].var() std_dev = math.sqrt(var) else: mean = 0 median = 0 min = 0 maximum = 0 var = 0 std_dev = 0 statistics[name] = \ {'mean': mean, 'median': median, 'min': min, 'max': maximum, 'var': var, 'std_dev': std_dev} return [data, statistics]
def compute_node(annotated_subgraph, hostname=None): """ This is a type of fingerprint from the infrastructure perspective """ # TODO: Validate graph data = dict() statistics = dict() compute = InfoGraphNodeCategory.COMPUTE data[compute] = pandas.DataFrame() statistics[compute] = { 'mean': 0, 'median': 0, 'min': 0, 'max': 0, 'var': 0, 'std_dev': 0 } network = InfoGraphNodeCategory.NETWORK data[network] = pandas.DataFrame() statistics[network] = { 'mean': 0, 'median': 0, 'min': 0, 'max': 0, 'var': 0, 'std_dev': 0 } storage = InfoGraphNodeCategory.STORAGE data[storage] = pandas.DataFrame() statistics[storage] = { 'mean': 0, 'median': 0, 'min': 0, 'max': 0, 'var': 0, 'std_dev': 0 } memory = InfoGraphNodeCategory.MEMORY data[memory] = pandas.DataFrame() statistics[memory] = { 'mean': 0, 'median': 0, 'min': 0, 'max': 0, 'var': 0, 'std_dev': 0 } # Calculation of the fingerprint on top of the virtual resources local_subgraph = annotated_subgraph.copy() for node in local_subgraph.nodes(data=True): layer = InfoGraphNode.get_layer(node) is_machine = InfoGraphNode.node_is_machine(node) if is_machine: continue if layer == InfoGraphNodeLayer.VIRTUAL: continue if layer == InfoGraphNodeLayer.SERVICE: continue # If hostname has been specified, need to take into account only # nodes that are related to the specific host attrs = InfoGraphNode.get_attributes(node) allocation = attrs['allocation'] if 'allocation' in attrs \ else None if hostname and not hostname == allocation: continue category = InfoGraphNode.get_category(node) utilization = InfoGraphNode.get_utilization(node) try: utilization = utilization.drop('timestamp', 1) except ValueError: utilization = InfoGraphNode.get_utilization(node) data[category] = pandas.concat([data[category], utilization]) for category in statistics: if not data[category].empty: mean = data[category]['utilization'].mean() median = (data[category]['utilization']).median() min = data[category]['utilization'].min() maximum = data[category]['utilization'].max() var = data[category]['utilization'].var() std_dev = math.sqrt(var) else: mean = 0 median = 0 min = 0 maximum = 0 var = 0 std_dev = 0 statistics[category] = \ {'mean': mean, 'median': median, 'min': min, 'max': maximum, 'var': var, 'std_dev': std_dev} return [data, statistics]