def annotate_machine_network_util(internal_graph, node): source = InfoGraphNode.get_attributes(node)['allocation'] machine = InfoGraphNode.get_node(internal_graph, source) machine_util = InfoGraphNode.get_network_utilization(machine) if 'intel/use/network/utilization' not in machine_util.columns: net_metric = 'intel/psutil/net/utilization_percentage' net_util_df = InfoGraphNode.get_network_utilization(node) if net_metric in net_util_df.columns: net_util = net_util_df[net_metric] net_util = net_util.fillna(0) machine_util[InfoGraphNode.get_attributes(node)['name']] = net_util InfoGraphNode.set_network_utilization(machine, machine_util) else: LOG.info('Net util not Found use for node {}'.format(InfoGraphNode.get_name(node))) else: LOG.debug('Found use network for node {}'.format(InfoGraphNode.get_name(node)))
def _source(self, node): attrs = InfoGraphNode.get_attributes(node) if InfoGraphNode.get_layer(node) == GRAPH_LAYER.PHYSICAL: if 'allocation' in attrs: return attrs['allocation'] # fix due to the landscape else: while attrs.get('attributes', None): attrs = attrs['attributes'] if 'allocation' in attrs: return attrs['allocation'] if InfoGraphNode.get_type(node) == NODE_TYPE.VIRTUAL_MACHINE: if 'vm_name' in attrs: return attrs['vm_name'] if InfoGraphNode.get_type(node) == NODE_TYPE.INSTANCE_DISK: # The machine is the source as this is a libvirt disk. disk_name = InfoGraphNode.get_name(node) vm = self.landscape.get_neighbour_by_type( disk_name, NODE_TYPE.VIRTUAL_MACHINE) machine = self.landscape.get_neighbour_by_type( vm, NODE_TYPE.PHYSICAL_MACHINE) return machine if InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_MACHINE: if 'name' in attrs: return attrs['name'] if InfoGraphNode.get_type(node) == NODE_TYPE.DOCKER_CONTAINER: docker_node = self.landscape.get_neighbour_by_type( InfoGraphNode.get_name(node), 'docker_node') if docker_node: machine = self.landscape.get_neighbour_by_type( docker_node, 'machine') return machine return None
def _get_workload_subgraph(self, stack_name, ts_from=None, ts_to=None): res = None try: # Get the node ID for the stack_name and query the landscape properties = [("stack_name", stack_name)] try: time_window = ts_to - ts_from except: time_window = 600 landscape_res = landscape.get_node_by_properties( properties, ts_from, time_window) if not landscape_res: LOG.info("No graph for a stack returned from analytics") # try a service name properties = [("service_name", stack_name)] landscape_res = landscape.get_node_by_properties( properties, ts_from, time_window) if not landscape_res: LOG.info("No graph for a service returned from analytics") return None res = landscape.get_subgraph(landscape_res.nodes()[0], ts_from, time_window) except Exception as e: LOG.debug('Something went seriously wrong.') LOG.error(e) for node in res.nodes(data=True): attrs = InfoGraphNode.get_attributes(node) attrs = InfoGraphUtilities.str_to_dict(attrs) InfoGraphNode.set_attributes(node, attrs) return res
def annotate_machine_disk_util(internal_graph, node): source = InfoGraphNode.get_attributes(node)['allocation'] machine = InfoGraphNode.get_node(internal_graph, source) machine_util = InfoGraphNode.get_disk_utilization(machine) if 'intel/use/disk/utilization' not in machine_util.columns: disk_metric = 'intel/procfs/disk/utilization_percentage' disk_util_df = InfoGraphNode.get_disk_utilization(node) if disk_metric in disk_util_df.columns: disk_util = disk_util_df[disk_metric] disk_util = disk_util.fillna(0) machine_util[InfoGraphNode.get_attributes(node)['name']] = disk_util InfoGraphNode.set_disk_utilization(machine, machine_util) else: LOG.info('Disk util not Found use for node {}'.format(InfoGraphNode.get_name(node))) else: LOG.debug('Found use disk for node {}'.format(InfoGraphNode.get_name(node)))
def run(self, workload, service_type="stack", telemetry_system='snap'): # Extract data from Info Core service_subgraphs = list() try: LOG.debug("Workload: {}".format(workload.get_workload_name())) landscape_ip = ConfigHelper.get("LANDSCAPE", "host") landscape_port = ConfigHelper.get("LANDSCAPE", "port") landscape.set_landscape_server(host=landscape_ip, port=landscape_port) sge = SubGraphExtraction(landscape_ip, landscape_port) res = sge.get_hist_service_nodes(service_type, workload.get_workload_name()) nodes = [(node[0], InfoGraphNode.get_attributes(node).get('from'), InfoGraphNode.get_attributes(node).get('to')) for node in res.nodes(data=True)] nodes.sort(reverse=True, key=self.node_sort) #pr = cProfile.Profile() counter = 0 for node in nodes: #pr.enable() node_id = node[0] from_ts = int(time.time()) # to_ts = int(attrs['to']) tf = from_ts * -1 subgraph = landscape.get_subgraph(node_id, from_ts, tf) if len(subgraph.nodes()) > 0: annotated_subgraph = SubgraphUtilities.graph_telemetry_annotation( subgraph, node[1], node[2], telemetry_system) service_subgraphs.append(annotated_subgraph) #print "cProfile Stats"+node_id #print "==============" #pr.print_stats(sort='time') #print "==============" #pr.disable() counter = counter + 1 if counter == SUBGRAPH_LIMIT: break except Exception as e: LOG.error(e) LOG.error("No topology data has been found for the selected " "workload.") import traceback traceback.print_exc() exit() workload.save_results(self.__filter_name__, service_subgraphs) return service_subgraphs
def _tags(self, metric, node): tags = {} attrs = InfoGraphNode.get_attributes(node) tag_keys = self._tag_keys(metric, node) for tag_key in tag_keys: tag_value = self._tag_value(tag_key, node, metric) tags[tag_key] = tag_value return tags
def _nic(self, node): nic = None if InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_NIC: attrs = InfoGraphNode.get_attributes(node) if 'osdev_network-name' in attrs: nic = attrs["osdev_network-name"] # if InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_MACHINE: # LOG.info('NODEEEEE: {}'.format(node)) return nic
def _disk(self, node): disk = None if (InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_DISK or InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_MACHINE): attrs = InfoGraphNode.get_attributes(node) if 'osdev_storage-name' in attrs: disk = attrs["osdev_storage-name"] elif InfoGraphNode.get_type(node) == NODE_TYPE.INSTANCE_DISK: disk = InfoGraphNode.get_name(node).split("_")[1] return disk
def _pu(self, node, metric): pu = None if (InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_PU or InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_MACHINE): attrs = InfoGraphNode.get_attributes(node) if 'os_index' in attrs: pu = attrs["os_index"] # metric prefix 'cpu' on to the front of the cpu number. if pu and ('intel/proc/schedstat/cpu/' in metric or 'intel/psutil/cpu/' in metric): pu = "cpu{}".format(pu) return pu
def _get_compute_node_subgraph(self, compute_node, ts_from=None, ts_to=None): res = self.db.\ get_subgraph('type', 'machine', timestamp=ts_to) for node in res.nodes(data=True): attrs = InfoGraphNode.get_attributes(node) attrs = InfoGraphUtilities.str_to_dict(attrs) InfoGraphNode.set_attributes(node, attrs) return res
def get_node_subgraph(self, node_id, ts_from=None, ts_to=None): try: time_window = ts_to - ts_from except: time_window = 600 landscape_res = landscape.get_subgraph(node_id, ts_from, time_window) for node in landscape_res.nodes(data=True): attrs = InfoGraphNode.get_attributes(node) attrs = InfoGraphUtilities.str_to_dict(attrs) InfoGraphNode.set_attributes(node, attrs) return landscape_res
def _nic(self, node, tag_key): nic = None if InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_NIC: attrs = InfoGraphNode.get_attributes(node) if tag_key == "hardware_addr": nic = attrs["address"] elif 'osdev_network-name' in attrs: nic = attrs["osdev_network-name"] elif 'name' in attrs: nic = attrs["name"] # if InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_MACHINE: # LOG.info('NODEEEEE: {}'.format(node)) return nic
def run(self, workload): # Extract data from Info Core subgraph = None try: LOG.debug("Workload: {}".format(workload.get_workload_name())) workload_config = workload.get_configuration() if workload_config.get('device_id'): prop_name = workload_config.get('project', '') + '_device_id' device_id = workload_config['device_id'] properties = [(prop_name, device_id)] ls_utils = LandscapeUtils() res = ls_utils.get_node_by_properties(properties, inactive=False) nodes = [(node[0], InfoGraphNode.get_attributes(node).get('from'), InfoGraphNode.get_attributes(node).get('to')) for node in res.nodes(data=True)] if len(nodes) == 0: return nodes.sort(reverse=True, key=self.node_sort) node_id = nodes[0][0] sge = SubGraphExtraction() subgraph = sge.get_node_subgraph(node_id, workload.get_ts_from(), workload.get_ts_to()) else: subgraph = SubgraphUtilities.extract_workload_subgraphs( workload.get_service_name(), workload.get_ts_from(), workload.get_ts_to()) except Exception as e: LOG.error(e) LOG.error("No topology data has been found for the selected " "workload.") import traceback traceback.print_exc() exit() workload.save_results(self.__filter_name__, subgraph) return subgraph
def extract_infrastructure_graph(workload_name, ts_from, ts_to): """ Returns the entire landscape at the current time :return: """ landscape_ip = ConfigHelper.get("LANDSCAPE", "host") landscape_port = ConfigHelper.get("LANDSCAPE", "port") subgraph_extraction = SubGraphExtraction(landscape_ip=landscape_ip, landscape_port=landscape_port) # res = subgraph_extraction.get_workload_view_graph( # workload_name, int(ts_from), int(ts_to), # name_filtering_support=True) res = landscape.get_graph() #PARALLEL = True if PARALLEL: i = 0 threads = [] cpu_count = multiprocessing.cpu_count() all_node = res.nodes(data=True) no_node_thread = len(res.nodes()) / cpu_count node_pool = [] for node in all_node: if i < no_node_thread: node_pool.append(node) i = i + 1 else: thread1 = ParallelLandscape( i, "Thread-{}".format(InfoGraphNode.get_name(node)), i, node_pool) # thread1 = ParallelTelemetryAnnotation(i, "Thread-{}".format(InfoGraphNode.get_name(node)), i, # node_pool, internal_graph, self.telemetry, ts_to, ts_from) thread1.start() threads.append(thread1) i = 0 node_pool = [] if len(node_pool) != 0: thread1 = ParallelLandscape( i, "Thread-{}".format(InfoGraphNode.get_name(node)), i, node_pool) thread1.start() threads.append(thread1) [t.join() for t in threads] else: for node in res.nodes(data=True): attrs = InfoGraphNode.get_attributes(node) attrs = InfoGraphUtilities.str_to_dict(attrs) InfoGraphNode.set_attributes(node, attrs) return res
def _node_is_nic_on_management_net(node, graph, mng_net_name): node_name = InfoGraphNode.get_name(node) node_type = InfoGraphNode.get_type(node) if node_type == InfoGraphNodeType.VIRTUAL_NIC or \ node_type == InfoGraphNodeType.VIRTUAL_NIC_2: neighs = graph.neighbors(node_name) for n in neighs: neighbor = InfoGraphNode.\ get_node(graph, n) if InfoGraphNode.get_type(neighbor) == \ InfoGraphNodeType.VIRTUAL_NETWORK: network_name = \ InfoGraphNode.get_attributes( neighbor)['name'] if network_name == mng_net_name: return True return False
def annotate_machine_pu_util(internal_graph, node): source = InfoGraphNode.get_machine_name_of_pu(node) machine = InfoGraphNode.get_node(internal_graph, source) machine_util = InfoGraphNode.get_compute_utilization(machine) if 'intel/use/compute/utilization' not in machine_util.columns: sum_util = None cpu_metric = 'intel/procfs/cpu/utilization_percentage' pu_util_df = InfoGraphNode.get_compute_utilization(node) if cpu_metric in pu_util_df.columns: pu_util = pu_util_df[cpu_metric] pu_util = pu_util.fillna(0) machine_util[InfoGraphNode.get_attributes(node)['name']] = pu_util InfoGraphNode.set_compute_utilization(machine, machine_util) else: LOG.info('CPU util not Found use for node {}'.format(InfoGraphNode.get_name(node))) else: LOG.debug('Found use for node {}'.format(InfoGraphNode.get_name(node)))
def _pu(self, node, metric): pu = None if (InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_PU or InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_MACHINE): attrs = InfoGraphNode.get_attributes(node) # fix attributes from landscaper - fixing # permanently on the fly if needed while attrs.get('attributes', None): attrs = attrs['attributes'] if 'os_index' in attrs: pu = attrs["os_index"] # metric prefix 'cpu' on to the front of the cpu number. if pu and ('intel/proc/schedstat/cpu/' in metric or 'intel/psutil/cpu/' in metric): pu = "cpu{}".format(pu) return pu
def filter_graph(graph): """ Returns the graph filtered removing all the nodes with no telemetry """ template_mapping = dict() res = graph.copy() for node in res.nodes(data=True): # for p in node[1]['attributes']: # p = str(p) template = node[1]['attributes']['template'] \ if 'template' in node[1]['attributes'] else None # If node is a service node, need to remove the template if template: template_mapping[InfoGraphNode.get_name(node)] = template node[1]['attributes'].pop('template') # Fix format for conversion to JSON (happening in analytics) node[1]['attributes'] = \ str(misc.convert_unicode_dict_to_string(node[1]['attributes'])).\ replace("'", '"') for node in res.nodes(data=True): node_name = InfoGraphNode.get_name(node) telemetry = InfoGraphNode.get_telemetry_data(node) layer = InfoGraphNode.get_layer(node) # if len(telemetry.columns.values) <= 1: if len(telemetry.columns) <= 1 and \ not layer == InfoGraphNodeLayer.SERVICE: InfoGraphNode.set_telemetry_data(node, dict()) res.filter_nodes('node_name', node_name) # Convert attributes back to dict() for node in res.nodes(data=True): string = InfoGraphNode.get_attributes(node) attrs = InfoGraphUtilities.str_to_dict(string) if InfoGraphNode.get_type(node) == \ InfoGraphNodeType.SERVICE_COMPUTE: attrs['template'] = \ template_mapping[InfoGraphNode.get_name(node)] InfoGraphNode.set_attributes(node, attrs) return res
def _source(self, node): attrs = InfoGraphNode.get_attributes(node) if InfoGraphNode.get_layer(node) == GRAPH_LAYER.PHYSICAL: if 'allocation' in attrs: return attrs['allocation'] if InfoGraphNode.get_type(node) == NODE_TYPE.VIRTUAL_MACHINE: if 'vm_name' in attrs: return attrs['vm_name'] elif 'name' in attrs: return attrs['name'] if InfoGraphNode.get_type(node) == NODE_TYPE.INSTANCE_DISK: # The machine is the source as this is a libvirt disk. disk_name = InfoGraphNode.get_name(node) vm = self.landscape.get_neighbour_by_type( disk_name, NODE_TYPE.VIRTUAL_MACHINE) machine = self.landscape.get_neighbour_by_type( vm, NODE_TYPE.PHYSICAL_MACHINE) return machine if InfoGraphNode.get_type(node) == NODE_TYPE.PHYSICAL_MACHINE: if 'name' in attrs: return attrs['name'] return None
def _get_network_subgraph(self, ts_from=None, ts_to=None): # TODO - URGENT: Wait for the network information to be in the graph and test this again properties = [ ("type", "switch"), ] try: time_window = ts_to - ts_from except: time_window = 600 landscape_res = landscape.get_node_by_properties( properties, ts_from, time_window) subgs = list() for lr in landscape_res: subgs.append( landscape.get_subgraph(lr.nodes()[0], ts_from, time_window)) for subg in subgs: for node in subg.nodes(data=True): attrs = InfoGraphNode.get_attributes(node) attrs = InfoGraphUtilities.str_to_dict(attrs) InfoGraphNode.set_attributes(node, attrs) return subgs
def _tag_value(self, tag_key, node, metric): # TODO: fully qualify this with metric name, if metric is this and tag tag_value = None if tag_key == "instance": tag_value = self._source(node) elif tag_key == "source": tag_value = self._source(node) if tag_value is None: node_type = InfoGraphNode.get_type(node) if node_type == NODE_TYPE.PHYSICAL_DISK: tag_value = self._disk(node) elif node_type == NODE_TYPE.PHYSICAL_PU: tag_value = self._pu(node, metric) elif node_type == NODE_TYPE.PHYSICAL_NIC: attrs = InfoGraphNode.get_attributes(node) tag_value = self._nic(node) elif node_type == NODE_TYPE.VIRTUAL_MACHINE: tag_value = self._vm(node) return tag_value
def run(self): for node in self.node_pool: attrs = InfoGraphNode.get_attributes(node) attrs = InfoGraphUtilities.str_to_dict(attrs) InfoGraphNode.set_attributes(node, attrs)
def run(self, workload, optimal_node_type='machine'): """ Ranks machines by CPU utilization. :param workload: Contains workload related info and results. :return: heuristic results """ workload_config = workload.get_configuration() graph = workload.get_latest_graph() if not graph: raise KeyError('No graph to be processed.') scores = LandscapeScore.utilization_scores(graph) scores_sat = LandscapeScore.saturation_scores(graph) heuristic_results = pd.DataFrame(columns=[ 'node_name', 'type', 'ipaddress', 'compute utilization', 'compute saturation', 'memory utilization', 'memory saturation', 'network utilization', 'network saturation', 'disk utilization', 'disk saturation', ]) heuristic_results_nt = heuristic_results.copy() device_id_col_name = None project = None if workload_config.get('project'): project = workload_config['project'] device_id_col_name = workload_config['project'] + '_device_id' heuristic_results[device_id_col_name] = None telemetry_filter = workload_config.get('telemetry_filter') for node in graph.nodes(data=True): node_name = InfoGraphNode.get_name(node) node_type = InfoGraphNode.get_type(node) list_node_name = node_name if node_type == optimal_node_type: if InfoGraphNode.node_is_vm(node): vm_name = InfoGraphNode.get_properties(node).get('vm_name') if vm_name: list_node_name = vm_name data = { 'node_name': list_node_name, 'type': node_type, 'ipaddress': InfoGraphNode.get_attributes(node).get('ipaddress'), 'compute utilization': scores[node_name]['compute'], 'compute saturation': scores_sat[node_name]['compute'], 'memory utilization': scores[node_name]['memory'], 'memory saturation': scores_sat[node_name]['memory'], 'network utilization': scores[node_name]['network'], 'network saturation': scores_sat[node_name]['network'], 'disk utilization': scores[node_name]['disk'], 'disk saturation': scores_sat[node_name]['disk'] } if device_id_col_name: dev_id = InfoGraphNode.get_properties(node).get( device_id_col_name) if project == 'mf2c': dev_id = dev_id.replace('_', '-') data[device_id_col_name] = dev_id if InfoGraphNode.get_properties(node).get( "telemetry_data") is not None: heuristic_results = heuristic_results.append( data, ignore_index=True) elif not telemetry_filter: heuristic_results_nt = heuristic_results.append( data, ignore_index=True) if not workload.get_workload_name().startswith('optimal_'): if InfoGraphNode.get_type( node ) == "docker_container" and optimal_node_type == 'machine': node_name = InfoGraphNode.get_docker_id(node) heuristic_results = heuristic_results.append( { 'node_name': node_name, 'type': node_type, 'ipaddress': None, 'compute utilization': scores[node_name]['compute'], 'compute saturation': None, 'memory utilization': scores[node_name]['memory'], 'memory saturation': None, 'network utilization': scores[node_name]['network'], 'network saturation': None, 'disk utilization': scores[node_name]['disk'], 'disk saturation': None }, ignore_index=True) sort_fields = ['compute utilization'] sort_order = workload_config.get('sort_order') if sort_order: sort_fields = [] for val in sort_order: if val == 'cpu': sort_fields.append('compute utilization') if val == 'memory': sort_fields.append('memory utilization') if val == 'network': sort_fields.append('network utilization') if val == 'disk': sort_fields.append('disk utilization') heuristic_results_nt = heuristic_results_nt.replace([0], [None]) heuristic_results = heuristic_results.sort_values(by=sort_fields, ascending=True) heuristic_results = heuristic_results.append(heuristic_results_nt, ignore_index=True) workload.append_metadata(self.__filter_name__, heuristic_results) LOG.info('AVG: {}'.format(heuristic_results)) return heuristic_results
def _vm(self, node): if InfoGraphNode.get_type(node) == NODE_TYPE.VIRTUAL_MACHINE: attrs = InfoGraphNode.get_attributes(node) return attrs['libvirt_instance']
def _create_pandas_data_frame_from_graph(graph, metrics='all'): """ Save on csv files the data in the graph. Stores one csv per node of the graph :param graph: (NetworkX Graph) Graph to be annotated with data :param directory: (str) directory where to store csv files :return: NetworkX Graph annotated with telemetry data """ result = pandas.DataFrame() for node in graph.nodes(data=True): node_name = InfoGraphNode.get_name(node) node_layer = InfoGraphNode.get_layer(node) node_type = InfoGraphNode.get_type(node) if node_type == 'vm': node_attrs = InfoGraphNode.get_attributes(node) node_name = node_attrs['vm_name'] if node_attrs.get( 'vm_name') else node_name # This method supports export of either normal metrics coming # from telemetry agent or utilization type of metrics. if metrics == 'all': node_telemetry_data = InfoGraphNode.get_telemetry_data(node) else: node_telemetry_data = InfoGraphNode.get_utilization(node) # df = node_telemetry_data.copy() # LOG.info("Node Name: {} -- Telemetry: {}".format( # InfoGraphNode.get_name(node), # InfoGraphNode.get_telemetry_data(node).columns.values # )) if isinstance(node_telemetry_data, pandas.DataFrame): if node_telemetry_data.empty: continue node_telemetry_data = node_telemetry_data.reset_index() else: continue node_telemetry_data['timestamp'] = node_telemetry_data[ 'timestamp'].astype(float) node_telemetry_data['timestamp'] = node_telemetry_data[ 'timestamp'].round() node_telemetry_data['timestamp'] = node_telemetry_data[ 'timestamp'].astype(int) renames = {} for metric_name in node_telemetry_data.columns.values: if metric_name == 'timestamp': continue col_name = "{}@{}@{}@{}".\ format(node_name, node_layer, node_type, metric_name) col_name = col_name.replace(".", "_") renames[metric_name] = col_name node_telemetry_data = node_telemetry_data.rename(columns=renames) # LOG.info("TELEMETRIA: {}".format(node_telemetry_data.columns.values)) if node_telemetry_data.empty or len( node_telemetry_data.columns) <= 1: continue if result.empty: result = node_telemetry_data.copy() else: node_telemetry_data = \ node_telemetry_data.drop_duplicates(subset='timestamp') result = pandas.merge(result, node_telemetry_data, how='outer', on='timestamp') # TODO: Try with this removed # result.set_index(['timestamp']) return result
def annotate_machine_network_util(internal_graph, node): source = InfoGraphNode.get_attributes(node)['allocation'] machine = InfoGraphNode.get_node(internal_graph, source) machine_util = InfoGraphNode.get_network_utilization(machine) InfoGraphNode.set_network_utilization(machine, pandas.DataFrame())
def compute_node(annotated_subgraph, hostname=None): """ This is a type of fingerprint from the infrastructure perspective """ # TODO: Validate graph data = dict() statistics = dict() compute = InfoGraphNodeCategory.COMPUTE data[compute] = pandas.DataFrame() statistics[compute] = { 'mean': 0, 'median': 0, 'min': 0, 'max': 0, 'var': 0, 'std_dev': 0 } network = InfoGraphNodeCategory.NETWORK data[network] = pandas.DataFrame() statistics[network] = { 'mean': 0, 'median': 0, 'min': 0, 'max': 0, 'var': 0, 'std_dev': 0 } storage = InfoGraphNodeCategory.STORAGE data[storage] = pandas.DataFrame() statistics[storage] = { 'mean': 0, 'median': 0, 'min': 0, 'max': 0, 'var': 0, 'std_dev': 0 } memory = InfoGraphNodeCategory.MEMORY data[memory] = pandas.DataFrame() statistics[memory] = { 'mean': 0, 'median': 0, 'min': 0, 'max': 0, 'var': 0, 'std_dev': 0 } # Calculation of the fingerprint on top of the virtual resources local_subgraph = annotated_subgraph.copy() for node in local_subgraph.nodes(data=True): layer = InfoGraphNode.get_layer(node) is_machine = InfoGraphNode.node_is_machine(node) if is_machine: continue if layer == InfoGraphNodeLayer.VIRTUAL: continue if layer == InfoGraphNodeLayer.SERVICE: continue # If hostname has been specified, need to take into account only # nodes that are related to the specific host attrs = InfoGraphNode.get_attributes(node) allocation = attrs['allocation'] if 'allocation' in attrs \ else None if hostname and not hostname == allocation: continue category = InfoGraphNode.get_category(node) utilization = InfoGraphNode.get_utilization(node) try: utilization = utilization.drop('timestamp', 1) except ValueError: utilization = InfoGraphNode.get_utilization(node) data[category] = pandas.concat([data[category], utilization]) for category in statistics: if not data[category].empty: mean = data[category]['utilization'].mean() median = (data[category]['utilization']).median() min = data[category]['utilization'].min() maximum = data[category]['utilization'].max() var = data[category]['utilization'].var() std_dev = math.sqrt(var) else: mean = 0 median = 0 min = 0 maximum = 0 var = 0 std_dev = 0 statistics[category] = \ {'mean': mean, 'median': median, 'min': min, 'max': maximum, 'var': var, 'std_dev': std_dev} return [data, statistics]
def run(self, workload): tmp_path = "/media/iolie/WORK/data/" # Extract data from Info Core service_subgraphs = workload.get_latest_graph() telemetry = {} cols = [] if not service_subgraphs or len(service_subgraphs) == 0: return # first add telemetry data of all nodes to a dictionary print "Data merger started " + str(time.time()) for subgraph in service_subgraphs: for node in subgraph.nodes(data=True): node_id = node[0] node_tm = InfoGraphNode.get_telemetry_data(node) if InfoGraphNode.node_is_vm(node): if not node_tm.empty: node_tm.columns = tm_utils.clean_vm_telemetry_colnames(node_tm.columns) vm_name = InfoGraphNode.get_attributes(node).get("vm_name") if vm_name: node_id = vm_name if not node_tm.empty: tm = telemetry.get(node_id) if not isinstance(tm, pd.DataFrame): #if not tm: telemetry[node_id] = node_tm #telemetry[node_id] = [node_tm] #node_tm.to_csv(tmp_path+node_id, index=False) else: telemetry[node_id] = pd.concat([tm, node_tm]) #telemetry[node_id].append(node_tm) #node_tm.to_csv(tmp_path + node_id, mode='a', header=False, index=False) InfoGraphNode.set_telemetry_data(node, pd.DataFrame()) print "Data merger finished " + str(time.time()) print telemetry.keys() print len(telemetry) # merge subgraphs graph = None counter = 0 for subgraph in service_subgraphs: counter = counter + 1 if not graph and len(subgraph.nodes()) > 0: graph = subgraph elif len(subgraph.nodes()) > 0: graphs.merge_graph(graph, subgraph) #print "Merged {} subgraphs out of {} subgraphs in all".format(counter, len(service_subgraphs)) # merge telemetry data #for key in telemetry.keys(): # val = telemetry[key] # print key + ' {}'.format(len(val)) # if len(val) > 1: # telemetry[key] = pd.concat(val) # elif len(val) == 1: # telemetry[key] = val[0] # print node_id + ', ' + str(time.time()) # print "Merged telemetry data of {} nodes out of {} nodes in all".format(counter, len(telemetry.keys())) # set telemetry data on merged graph for node in graph.nodes(data=True): node_id = node[0] if InfoGraphNode.node_is_vm(node): vm_name = InfoGraphNode.get_attributes(node).get("vm_name") if vm_name: node_id = vm_name tm = telemetry.get(node_id) #try: # tm = pd.read_csv(tmp_path + node_id) #except: # tm = pd.DataFrame() if isinstance(tm, pd.DataFrame): if not tm.empty: InfoGraphNode.set_telemetry_data(node, tm) del telemetry[node_id] # delete telemetry data so that only one copy exists in the graph else: InfoGraphNode.set_telemetry_data(node, None) # print "Set telemetry data of node {}".format(node_id) print "Set telemetry data of merged graph" workload.save_results(self.__filter_name__, graph) return graph
def utilization(internal_graph, node, telemetry): # machine usage telemetry_data = telemetry.get_data(node) if 'intel/use/compute/utilization' in telemetry_data: InfoGraphNode.set_compute_utilization(node, pandas.DataFrame(telemetry_data['intel/use/compute/utilization'], columns=['intel/use/compute/utilization'])) # pu usage if 'intel/procfs/cpu/utilization_percentage' in telemetry_data: InfoGraphNode.set_compute_utilization(node, pandas.DataFrame( telemetry_data['intel/procfs/cpu/utilization_percentage'], columns=['intel/procfs/cpu/utilization_percentage'])) if 'intel/use/memory/utilization' in telemetry_data: InfoGraphNode.set_memory_utilization(node, pandas.DataFrame(telemetry_data['intel/use/memory/utilization'])) if 'intel/use/disk/utilization' in telemetry_data: InfoGraphNode.set_disk_utilization(node, pandas.DataFrame(telemetry_data['intel/use/disk/utilization'])) if 'intel/use/network/utilization' in telemetry_data: InfoGraphNode.set_network_utilization(node, pandas.DataFrame(telemetry_data['intel/use/network/utilization'])) # supporting not available /use/ metrics if 'intel/procfs/meminfo/mem_total' in telemetry_data and 'intel/procfs/meminfo/mem_used' in telemetry_data: # LOG.info('Found memory utilization procfs') mem_used = telemetry_data['intel/procfs/meminfo/mem_used'].fillna(0) mem_total = telemetry_data['intel/procfs/meminfo/mem_total'].fillna(0) mem_util = mem_used * 100 / mem_total mem_util.name = 'intel/procfs/memory/utilization_percentage' InfoGraphNode.set_memory_utilization(node, pandas.DataFrame(mem_util)) if 'intel/procfs/disk/io_time' in telemetry_data: io_time = telemetry_data['intel/procfs/disk/io_time'].fillna(0) disk_util = io_time*100/1000 disk_util.name = 'intel/procfs/disk/utilization_percentage' InfoGraphNode.set_disk_utilization(node, pandas.DataFrame(disk_util)) if 'intel/psutil/net/bytes_recv' in telemetry_data and 'intel/psutil/net/bytes_sent' in telemetry_data: source=telemetry._source(node) machine = InfoGraphNode.get_node(internal_graph, source) nic_speed = InfoGraphNode.get_nic_speed_mbps(machine) * 1000000 net_data = telemetry_data.filter(['timestamp', 'intel/psutil/net/bytes_recv','intel/psutil/net/bytes_sent'], axis=1) net_data.fillna(0) net_data['intel/psutil/net/bytes_total'] = net_data['intel/psutil/net/bytes_recv']+net_data['intel/psutil/net/bytes_sent'] net_data_interval = net_data.set_index('timestamp').diff() net_data_interval['intel/psutil/net/utilization_percentage'] = net_data_interval['intel/psutil/net/bytes_total'] * 100 /nic_speed net_data_pct = pandas.DataFrame(net_data_interval['intel/psutil/net/utilization_percentage']) InfoGraphNode.set_network_utilization(node, net_data_pct) elif 'intel/procfs/iface/bytes_recv' in telemetry_data and 'intel/procfs/iface/bytes_recv' in telemetry_data: source=telemetry._source(node) machine = InfoGraphNode.get_node(internal_graph, source) nic_speed = InfoGraphNode.get_nic_speed_mbps(machine) * 1000000 net_data = telemetry_data.filter(['timestamp', 'intel/procfs/iface/bytes_recv','intel/procfs/iface/bytes_sent'], axis=1) net_data.fillna(0) net_data['intel/psutil/net/bytes_total'] = net_data['intel/procfs/iface/bytes_recv']+net_data['intel/procfs/iface/bytes_sent'] net_data_interval = net_data.set_index('timestamp').diff() net_data_interval['intel/psutil/net/utilization_percentage'] = net_data_interval['intel/psutil/net/bytes_total'] * 100 /nic_speed net_data_pct = pandas.DataFrame(net_data_interval['intel/psutil/net/utilization_percentage']) InfoGraphNode.set_network_utilization(node, net_data_pct) if 'intel/docker/stats/cgroups/cpu_stats/cpu_usage/total' in telemetry_data: # Container node #cpu util cpu_data = telemetry_data.filter(['timestamp', 'intel/docker/stats/cgroups/cpu_stats/cpu_usage/total'], axis=1) cpu_data_interval = cpu_data.set_index('timestamp').diff() #util data in nanoseconds cpu_data_interval['intel/docker/stats/cgroups/cpu_stats/cpu_usage/percentage'] = cpu_data_interval['intel/docker/stats/cgroups/cpu_stats/cpu_usage/total'] / 10000000 cpu_data_pct = pandas.DataFrame(cpu_data_interval['intel/docker/stats/cgroups/cpu_stats/cpu_usage/percentage']) InfoGraphNode.set_compute_utilization(node, cpu_data_pct) if "intel/docker/stats/cgroups/memory_stats/usage/usage" in telemetry_data: #container mem util source=telemetry._source(node) machine = InfoGraphNode.get_node(internal_graph, source) local_mem = int(InfoGraphNode.get_attributes(machine).get("local_memory")) mem_data = telemetry_data.filter(['timestamp', "intel/docker/stats/cgroups/memory_stats/usage/usage"], axis=1) mem_data["intel/docker/stats/cgroups/memory_stats/usage/percentage"] = mem_data["intel/docker/stats/cgroups/memory_stats/usage/usage"]/local_mem * 100 mem_data_pct = pandas.DataFrame(mem_data["intel/docker/stats/cgroups/memory_stats/usage/percentage"]) InfoGraphNode.set_memory_utilization(node, mem_data_pct) if "intel/docker/stats/network/tx_bytes" in telemetry_data: #container network util source=telemetry._source(node) machine = InfoGraphNode.get_node(internal_graph, source) nic_speed = InfoGraphNode.get_nic_speed_mbps(machine) * 1000000 net_data = telemetry_data.filter(['timestamp', "intel/docker/stats/network/tx_bytes","intel/docker/stats/network/rx_bytes"], axis=1) net_data.fillna(0) net_data['intel/docker/stats/network/bytes_total'] = net_data["intel/docker/stats/network/tx_bytes"]+net_data["intel/docker/stats/network/rx_bytes"] net_data_interval = net_data.set_index('timestamp').diff() net_data_interval['intel/docker/stats/network/utilization_percentage'] = net_data_interval['intel/docker/stats/network/bytes_total'] * 100 /nic_speed net_data_pct = pandas.DataFrame(net_data_interval['intel/docker/stats/network/utilization_percentage']) InfoGraphNode.set_network_utilization(node, net_data_pct) if "intel/docker/stats/cgroups/blkio_stats/io_time_recursive/value" in telemetry_data: #container disk util disk_data = telemetry_data.filter(['timestamp', "intel/docker/stats/cgroups/blkio_stats/io_time_recursive/value"], axis=1) disk_data_interval = disk_data.set_index('timestamp').diff() #util data in milliseconds disk_data_interval["intel/docker/stats/cgroups/blkio_stats/io_time_recursive/percentage"] = \ disk_data_interval["intel/docker/stats/cgroups/blkio_stats/io_time_recursive/value"] / 1000000 disk_data_pct = pandas.DataFrame(disk_data_interval["intel/docker/stats/cgroups/blkio_stats/io_time_recursive/percentage"]) InfoGraphNode.set_disk_utilization(node, disk_data_pct)
def compute_node_resources(annotated_subgraph, hostname=None): """ This is a type of fingerprint from the infrastructure perspective """ # TODO: Validate graph data = dict() statistics = dict() # Calculation of the fingerprint on top of the virtual resources local_subgraph = annotated_subgraph.copy() for node in local_subgraph.nodes(data=True): layer = InfoGraphNode.get_layer(node) if layer == InfoGraphNodeLayer.VIRTUAL: continue if layer == InfoGraphNodeLayer.SERVICE: continue type = InfoGraphNode.get_type(node) if type == 'core': continue # If hostname has been specified, need to take into account only # nodes that are related to the specific host attrs = InfoGraphNode.get_attributes(node) allocation = attrs['allocation'] if 'allocation' in attrs \ else None if hostname and not hostname == allocation: continue name = InfoGraphNode.get_name(node) statistics[name] = { 'mean': 0, 'median': 0, 'min': 0, 'max': 0, 'var': 0, 'std_dev': 0 } utilization = InfoGraphNode.get_utilization(node) try: utilization = utilization.drop('timestamp', 1) except ValueError: utilization = InfoGraphNode.get_utilization(node) data[name] = utilization if not data[name].empty: mean = data[name]['utilization'].mean() median = (data[name]['utilization']).median() min = data[name]['utilization'].min() maximum = data[name]['utilization'].max() var = data[name]['utilization'].var() std_dev = math.sqrt(var) else: mean = 0 median = 0 min = 0 maximum = 0 var = 0 std_dev = 0 statistics[name] = \ {'mean': mean, 'median': median, 'min': min, 'max': maximum, 'var': var, 'std_dev': std_dev} return [data, statistics]