def annotate_machine_pu_util(internal_graph, node): source = InfoGraphNode.get_machine_name_of_pu(node) machine = InfoGraphNode.get_node(internal_graph, source) machine_util = InfoGraphNode.get_compute_utilization(machine) if 'intel/use/compute/utilization' not in machine_util.columns: sum_util = None cpu_metric = 'intel/procfs/cpu/utilization_percentage' pu_util_df = InfoGraphNode.get_compute_utilization(node) if cpu_metric in pu_util_df.columns: pu_util = pu_util_df[cpu_metric] pu_util = pu_util.fillna(0) machine_util[InfoGraphNode.get_attributes(node)['name']] = pu_util InfoGraphNode.set_compute_utilization(machine, machine_util) else: LOG.info('CPU util not Found use for node {}'.format(InfoGraphNode.get_name(node))) else: LOG.debug('Found use for node {}'.format(InfoGraphNode.get_name(node)))
def get_annotated_graph(self, graph, ts_from, ts_to, utilization=False, saturation=False): """ Collect data from cimmaron tsdb in relation to the specified graph and time windows and store an annotated subgraph in specified directory :param graph: (NetworkX Graph) Graph to be annotated with data :param ts_from: (str) Epoch time representation of start time :param ts_to: (str) Epoch time representation of stop time :param utilization: (bool) if True the method calculates also utilization for each node, if available :return: NetworkX Graph annotated with telemetry data """ TelemetryAnnotation._get_annotated_graph_input_validation( graph, ts_from, ts_to) internal_graph = graph.copy() self.internal_graph = internal_graph for node in internal_graph.nodes(data=True): if isinstance(self.telemetry, SnapAnnotation): queries = list() try: queries = self.telemetry.get_queries( internal_graph, node, ts_from, ts_to) # queries = self.telemetry.get_queries(graph, node, ts_from, ts_to) except Exception as e: LOG.error("Exception: {}".format(e)) LOG.error(e) import traceback traceback.print_exc() if len(queries) != 0: InfoGraphNode.set_queries(node, queries) telemetry_data = self.telemetry.get_data(node) InfoGraphNode.set_telemetry_data(node, telemetry_data) if utilization and not telemetry_data.empty: SnapUtils.utilization(internal_graph, node, self.telemetry) # if only procfs is available, results needs to be # propagated at machine level if InfoGraphNode.get_type( node) == InfoGraphNodeType.PHYSICAL_PU: SnapUtils.annotate_machine_pu_util( internal_graph, node) if InfoGraphNode.node_is_disk(node): SnapUtils.annotate_machine_disk_util( internal_graph, node) if InfoGraphNode.node_is_nic(node): SnapUtils.annotate_machine_network_util( internal_graph, node) if saturation: SnapUtils.saturation(internal_graph, node, self.telemetry) elif isinstance(self.telemetry, PrometheusAnnotation): queries = list() try: queries = self.telemetry.get_queries( internal_graph, node, ts_from, ts_to) # queries = self.telemetry.get_queries(graph, node, ts_from, ts_to) except Exception as e: LOG.error("Exception: {}".format(e)) LOG.error(e) import traceback traceback.print_exc() if len(queries) != 0: InfoGraphNode.set_queries(node, queries) telemetry_data = self.telemetry.get_data(node) InfoGraphNode.set_telemetry_data(node, telemetry_data) # if utilization and not telemetry_data.empty: #PrometheusUtils.utilization(internal_graph, node, self.telemetry) # if only procfs is available, results needs to be # propagated at machine level #if InfoGraphNode.get_type(node) == InfoGraphNodeType.PHYSICAL_PU: # PrometheusUtils.annotate_machine_pu_util(internal_graph, node) #if InfoGraphNode.node_is_disk(node): # PrometheusUtils.annotate_machine_disk_util(internal_graph, node) #if InfoGraphNode.node_is_nic(node): # PrometheusUtils.annotate_machine_network_util(internal_graph, node) #if saturation: #PrometheusUtils.saturation(internal_graph, node, self.telemetry) else: telemetry_data = self.telemetry.get_data(node) InfoGraphNode.set_telemetry_data(node, telemetry_data) if utilization and not telemetry_data.empty: SnapUtils.utilization(internal_graph, node, self.telemetry) # if only procfs is available, results needs to be # propagated at machine level if InfoGraphNode.get_type( node) == InfoGraphNodeType.PHYSICAL_PU: source = InfoGraphNode.get_machine_name_of_pu(node) machine = InfoGraphNode.get_node( internal_graph, source) machine_util = InfoGraphNode.get_compute_utilization( machine) if '/intel/use/compute/utilization' not in machine_util.columns: sum_util = None pu_util = InfoGraphNode.get_compute_utilization( node )['intel/procfs/cpu/utilization_percentage'] pu_util = pu_util.fillna(0) if 'intel/procfs/cpu/utilization_percentage' in machine_util.columns: machine_util = machine_util[ 'intel/procfs/cpu/utilization_percentage'] machine_util = machine_util.fillna(0) sum_util = machine_util.add(pu_util, fill_value=0) else: sum_util = pu_util if isinstance(sum_util, pandas.Series): # sum_util.index.name = None sum_util = pandas.DataFrame( sum_util, columns=[ 'intel/procfs/cpu/utilization_percentage' ]) InfoGraphNode.set_compute_utilization( machine, sum_util) else: LOG.debug('Found use for node {}'.format( InfoGraphNode.get_name(node))) if saturation: self._saturation(internal_graph, node, self.telemetry) return internal_graph
def annotate_machine_pu_util(internal_graph, node): source = InfoGraphNode.get_machine_name_of_pu(node) machine = InfoGraphNode.get_node(internal_graph, source) machine_util = InfoGraphNode.get_compute_utilization(machine) InfoGraphNode.set_compute_utilization(machine, pandas.DataFrame())
def utilization(internal_graph, node, telemetry): # machine usage telemetry_data = telemetry.get_data(node) if 'intel/use/compute/utilization' in telemetry_data: InfoGraphNode.set_compute_utilization(node, pandas.DataFrame(telemetry_data['intel/use/compute/utilization'], columns=['intel/use/compute/utilization'])) # pu usage if 'intel/procfs/cpu/utilization_percentage' in telemetry_data: InfoGraphNode.set_compute_utilization(node, pandas.DataFrame( telemetry_data['intel/procfs/cpu/utilization_percentage'], columns=['intel/procfs/cpu/utilization_percentage'])) if 'intel/use/memory/utilization' in telemetry_data: InfoGraphNode.set_memory_utilization(node, pandas.DataFrame(telemetry_data['intel/use/memory/utilization'])) if 'intel/use/disk/utilization' in telemetry_data: InfoGraphNode.set_disk_utilization(node, pandas.DataFrame(telemetry_data['intel/use/disk/utilization'])) if 'intel/use/network/utilization' in telemetry_data: InfoGraphNode.set_network_utilization(node, pandas.DataFrame(telemetry_data['intel/use/network/utilization'])) # supporting not available /use/ metrics if 'intel/procfs/meminfo/mem_total' in telemetry_data and 'intel/procfs/meminfo/mem_used' in telemetry_data: # LOG.info('Found memory utilization procfs') mem_used = telemetry_data['intel/procfs/meminfo/mem_used'].fillna(0) mem_total = telemetry_data['intel/procfs/meminfo/mem_total'].fillna(0) mem_util = mem_used * 100 / mem_total mem_util.name = 'intel/procfs/memory/utilization_percentage' InfoGraphNode.set_memory_utilization(node, pandas.DataFrame(mem_util)) if 'intel/procfs/disk/io_time' in telemetry_data: io_time = telemetry_data['intel/procfs/disk/io_time'].fillna(0) disk_util = io_time*100/1000 disk_util.name = 'intel/procfs/disk/utilization_percentage' InfoGraphNode.set_disk_utilization(node, pandas.DataFrame(disk_util)) if 'intel/psutil/net/bytes_recv' in telemetry_data and 'intel/psutil/net/bytes_sent' in telemetry_data: source=telemetry._source(node) machine = InfoGraphNode.get_node(internal_graph, source) nic_speed = InfoGraphNode.get_nic_speed_mbps(machine) * 1000000 net_data = telemetry_data.filter(['timestamp', 'intel/psutil/net/bytes_recv','intel/psutil/net/bytes_sent'], axis=1) net_data.fillna(0) net_data['intel/psutil/net/bytes_total'] = net_data['intel/psutil/net/bytes_recv']+net_data['intel/psutil/net/bytes_sent'] net_data_interval = net_data.set_index('timestamp').diff() net_data_interval['intel/psutil/net/utilization_percentage'] = net_data_interval['intel/psutil/net/bytes_total'] * 100 /nic_speed net_data_pct = pandas.DataFrame(net_data_interval['intel/psutil/net/utilization_percentage']) InfoGraphNode.set_network_utilization(node, net_data_pct) elif 'intel/procfs/iface/bytes_recv' in telemetry_data and 'intel/procfs/iface/bytes_recv' in telemetry_data: source=telemetry._source(node) machine = InfoGraphNode.get_node(internal_graph, source) nic_speed = InfoGraphNode.get_nic_speed_mbps(machine) * 1000000 net_data = telemetry_data.filter(['timestamp', 'intel/procfs/iface/bytes_recv','intel/procfs/iface/bytes_sent'], axis=1) net_data.fillna(0) net_data['intel/psutil/net/bytes_total'] = net_data['intel/procfs/iface/bytes_recv']+net_data['intel/procfs/iface/bytes_sent'] net_data_interval = net_data.set_index('timestamp').diff() net_data_interval['intel/psutil/net/utilization_percentage'] = net_data_interval['intel/psutil/net/bytes_total'] * 100 /nic_speed net_data_pct = pandas.DataFrame(net_data_interval['intel/psutil/net/utilization_percentage']) InfoGraphNode.set_network_utilization(node, net_data_pct) if 'intel/docker/stats/cgroups/cpu_stats/cpu_usage/total' in telemetry_data: # Container node #cpu util cpu_data = telemetry_data.filter(['timestamp', 'intel/docker/stats/cgroups/cpu_stats/cpu_usage/total'], axis=1) cpu_data_interval = cpu_data.set_index('timestamp').diff() #util data in nanoseconds cpu_data_interval['intel/docker/stats/cgroups/cpu_stats/cpu_usage/percentage'] = cpu_data_interval['intel/docker/stats/cgroups/cpu_stats/cpu_usage/total'] / 10000000 cpu_data_pct = pandas.DataFrame(cpu_data_interval['intel/docker/stats/cgroups/cpu_stats/cpu_usage/percentage']) InfoGraphNode.set_compute_utilization(node, cpu_data_pct) if "intel/docker/stats/cgroups/memory_stats/usage/usage" in telemetry_data: #container mem util source=telemetry._source(node) machine = InfoGraphNode.get_node(internal_graph, source) local_mem = int(InfoGraphNode.get_attributes(machine).get("local_memory")) mem_data = telemetry_data.filter(['timestamp', "intel/docker/stats/cgroups/memory_stats/usage/usage"], axis=1) mem_data["intel/docker/stats/cgroups/memory_stats/usage/percentage"] = mem_data["intel/docker/stats/cgroups/memory_stats/usage/usage"]/local_mem * 100 mem_data_pct = pandas.DataFrame(mem_data["intel/docker/stats/cgroups/memory_stats/usage/percentage"]) InfoGraphNode.set_memory_utilization(node, mem_data_pct) if "intel/docker/stats/network/tx_bytes" in telemetry_data: #container network util source=telemetry._source(node) machine = InfoGraphNode.get_node(internal_graph, source) nic_speed = InfoGraphNode.get_nic_speed_mbps(machine) * 1000000 net_data = telemetry_data.filter(['timestamp', "intel/docker/stats/network/tx_bytes","intel/docker/stats/network/rx_bytes"], axis=1) net_data.fillna(0) net_data['intel/docker/stats/network/bytes_total'] = net_data["intel/docker/stats/network/tx_bytes"]+net_data["intel/docker/stats/network/rx_bytes"] net_data_interval = net_data.set_index('timestamp').diff() net_data_interval['intel/docker/stats/network/utilization_percentage'] = net_data_interval['intel/docker/stats/network/bytes_total'] * 100 /nic_speed net_data_pct = pandas.DataFrame(net_data_interval['intel/docker/stats/network/utilization_percentage']) InfoGraphNode.set_network_utilization(node, net_data_pct) if "intel/docker/stats/cgroups/blkio_stats/io_time_recursive/value" in telemetry_data: #container disk util disk_data = telemetry_data.filter(['timestamp', "intel/docker/stats/cgroups/blkio_stats/io_time_recursive/value"], axis=1) disk_data_interval = disk_data.set_index('timestamp').diff() #util data in milliseconds disk_data_interval["intel/docker/stats/cgroups/blkio_stats/io_time_recursive/percentage"] = \ disk_data_interval["intel/docker/stats/cgroups/blkio_stats/io_time_recursive/value"] / 1000000 disk_data_pct = pandas.DataFrame(disk_data_interval["intel/docker/stats/cgroups/blkio_stats/io_time_recursive/percentage"]) InfoGraphNode.set_disk_utilization(node, disk_data_pct)