def __init__(self, traces, columns, templates, pivot, filters, window=None, zip_constraints=True): self._ip_vec = [] self._ip_vec.append(listify(traces)) self._ip_vec.append(listify(columns)) self._ip_vec.append(listify(templates)) self._lens = list(map(len, self._ip_vec)) self._max_len = max(self._lens) self._pivot = pivot self._filters = filters self.window = window self._constraints = [] self._trace_expanded = False self._expand() if zip_constraints: self._populate_zip_constraints() else: self._populate_constraints()
def get_trace_event_data(trace, execnames=None, pids=None): """Create a list of objects that can be consumed by EventPlot to plot task residency like kernelshark """ if execnames: execnames = listify(execnames) if pids: pids = listify(pids) data = collections.defaultdict(list) pmap = {} data_frame = trace.sched_switch.data_frame start_idx = data_frame.index.values[0] end_idx = data_frame.index.values[-1] procs = set() for index, row in data_frame.iterrows(): prev_pid = row["prev_pid"] next_pid = row["next_pid"] next_comm = row["next_comm"] if prev_pid in pmap: name = pmap[prev_pid] data[name][-1][1] = index del pmap[prev_pid] name = "{}-{}".format(next_comm, next_pid) if next_pid in pmap: # Corrupted trace probably due to dropped events. We # don't know when the pid in pmap finished. We just # ignore it and don't plot it warn_str = "Corrupted trace (dropped events) for PID {} at time {}". \ format(next_pid, index) warnings.warn(warn_str) del pmap[next_pid] del data[name][-1] if next_pid != 0 and not next_comm.startswith("migration"): if execnames and next_comm not in execnames: continue if pids and next_pid not in pids: continue data[name].append([index, end_idx, row["__cpu"]]) pmap[next_pid] = name procs.add(name) return data, procs, [start_idx, end_idx]
def plotCPU(self, cpus=None): """ Plot CPU-related signals for both big and LITTLE clusters. :param cpus: list of CPUs to be plotted :type cpus: list(int) """ if not self._trace.hasEvents('sched_load_avg_cpu'): logging.warn('Events [sched_load_avg_cpu] not found, ' 'plot DISABLED!') return # Filter on specified cpus if cpus is None: cpus = sorted(self._platform['clusters']['little'] + self._platform['clusters']['big']) cpus = listify(cpus) # Plot: big CPUs bcpus = set(cpus) & set(self._platform['clusters']['big']) if bcpus: self._plotCPU(bcpus, "big") # Plot: LITTLE CPUs lcpus = set(cpus) & set(self._platform['clusters']['little']) if lcpus: self._plotCPU(lcpus, "LITTLE")
def __init__(self, name="", normalize_time=True, scope="all", events=[], window=(0, None), abs_window=(0, None)): super(GenericFTrace, self).__init__(name) if not hasattr(self, "needs_raw_parsing"): self.needs_raw_parsing = False self.class_definitions.update(self.dynamic_classes.items()) self.__add_events(listify(events)) if scope == "thermal": self.class_definitions.update(self.thermal_classes.items()) elif scope == "sched": self.class_definitions.update(self.sched_classes.items()) elif scope != "custom": self.class_definitions.update(self.thermal_classes.items() + self.sched_classes.items()) for attr, class_def in self.class_definitions.iteritems(): trace_class = class_def() setattr(self, attr, trace_class) self.trace_classes.append(trace_class) self.__parse_trace_file(self.trace_path, window, abs_window) if self.needs_raw_parsing and (self.trace_path_raw is not None): self.__parse_trace_file(self.trace_path_raw, window, abs_window, raw=True) self.finalize_objects() if normalize_time: self.normalize_time()
def plotClusterIdleStateResidency(self, clusters=None, pct=False): """ Plot per-cluster idle state residency in a given cluster, i.e. the amount of time cluster `cluster` spent in idle state `i`. By default, both 'big' and 'LITTLE' clusters data are plotted. Requires cpu_idle following trace events. :param clusters: name of the clusters to be plotted (all of them by default) :type clusters: str ot list(str) """ if not self._trace.hasEvents('cpu_idle'): logging.warn('Events [cpu_idle] not found, plot DISABLED!') return # Sanitize clusters if clusters is None: _clusters = self._platform['clusters'].keys() else: _clusters = listify(clusters) # Precompute residencies for each cluster residencies = [] xmax = 0.0 for c in _clusters: r = self._dfg_cluster_idle_state_residency(c.lower()) residencies.append(ResidencyData('{} Cluster'.format(c), r)) max_time = r.max().values[0] if xmax < max_time: xmax = max_time self._plotIdleStateResidency(residencies, 'cluster', xmax, pct=pct)
def _parse_value(self, signal_def): """Parse a signal definition into a (template, column) tuple :param signal_def: A signal definition. E.g. "trace_class:column" :type signal_def: str """ match = re.match(r"(?P<event>[^:]+):(?P<column>[^:]+)(?P<color>:.+)?", signal_def) event = match.group("event") column = match.group("column") color_match = match.group("color") if color_match: color_list = color_match[1:].split(",", 2) color = [ int(n, 16) if n.startswith("0x") else int(n) for n in color_list ] else: color = None try: return self._event_map[event], column, color except KeyError: for trace in listify(self.traces): if event in trace.class_definitions: self._event_map[event] = trace.class_definitions[event] return self._event_map[event], column, color raise ValueError("Event: " + event + " not found in Trace Object")
def _get_data_frame(self, cls): """Get the data frame from the BareTrace object, applying the window and the filters""" data_frame = getattr(self.data, cls.name).data_frame if data_frame.empty: return data_frame elif self._window[1] is None: data_frame = data_frame.loc[self._window[0]:] else: data_frame = data_frame.loc[self._window[0]:self._window[1]] if self._filters: criterion = pd.Series([True] * len(data_frame), index=data_frame.index) for filter_col, wanted_vals in self._filters.iteritems(): try: dfr_col = data_frame[filter_col] except KeyError: continue criterion &= dfr_col.isin(listify(wanted_vals)) data_frame = data_frame[criterion] return data_frame
def _get_data_frame(self, cls): """Get the data frame from the BareTrace object, applying the window and the filters""" data_frame = getattr(self.data, cls.name).data_frame if self._window[1] is None: data_frame = data_frame.loc[self._window[0] :] else: data_frame = data_frame.loc[self._window[0] : self._window[1]] if self._filters: criterion = pd.Series([True] * len(data_frame), index=data_frame.index) for filter_col, wanted_vals in self._filters.iteritems(): try: dfr_col = data_frame[filter_col] except KeyError: continue criterion &= dfr_col.isin(listify(wanted_vals)) data_frame = data_frame[criterion] return data_frame
def plotClusterIdleStateResidency(self, clusters=None, pct=False): """ Plot per-cluster idle state residency in a given cluster, i.e. the amount of time cluster `cluster` spent in idle state `i`. By default, both 'big' and 'LITTLE' clusters data are plotted. Requires cpu_idle following trace events. :param clusters: name of the clusters to be plotted (all of them by default) :type clusters: str ot list(str) """ if not self._trace.hasEvents('cpu_idle'): self._log.warning('Events [cpu_idle] not found, plot DISABLED!') return # Sanitize clusters if clusters is None: _clusters = self._platform['clusters'].keys() else: _clusters = listify(clusters) # Precompute residencies for each cluster residencies = [] xmax = 0.0 for c in _clusters: r = self._dfg_cluster_idle_state_residency(c.lower()) residencies.append(ResidencyData('{} Cluster'.format(c), r)) max_time = r.max().values[0] if xmax < max_time: xmax = max_time self._plotIdleStateResidency(residencies, 'cluster', xmax, pct=pct)
def __init__(self, path=".", name="", normalize_time=True, scope="all", events=[], window=(0, None), abs_window=(0, None)): super(FTrace, self).__init__(name) self.trace_path, self.trace_path_raw = self.__process_path(path) self.class_definitions.update(self.dynamic_classes.items()) self.__add_events(listify(events)) if scope == "thermal": self.class_definitions.update(self.thermal_classes.items()) elif scope == "sched": self.class_definitions.update(self.sched_classes.items()) elif scope != "custom": self.class_definitions.update(self.thermal_classes.items() + self.sched_classes.items()) for attr, class_def in self.class_definitions.iteritems(): trace_class = class_def() setattr(self, attr, trace_class) self.trace_classes.append(trace_class) self.__parse_trace_file(window, abs_window) self.__parse_trace_file(window, abs_window, raw=True) self.finalize_objects() if normalize_time: self.normalize_time()
def plotClusterFrequencyResidency(self, clusters=None, pct=False, active=False): """ Plot the frequency residency in a given cluster, i.e. the amount of time cluster `cluster` spent at frequency `f_i`. By default, both 'big' and 'LITTLE' clusters data are plotted. Requires the following trace events: - cpu_frequency - cpu_idle :param clusters: name of the clusters to be plotted (all of them by default) :type clusters: str ot list(str) :param pct: plot residencies in percentage :type pct: bool :param active: for percentage plot specify whether to plot active or total time. Default is TOTAL time :type active: bool """ if not self._trace.hasEvents('cpu_frequency'): self._log.warning( 'Events [cpu_frequency] not found, plot DISABLED!') return if not self._trace.hasEvents('cpu_idle'): self._log.warning('Events [cpu_idle] not found, plot DISABLED!') return # Assumption: all CPUs in a cluster run at the same frequency, i.e. the # frequency is scaled per-cluster not per-CPU. Hence, we can limit the # cluster frequencies data to a single CPU if not self._trace.freq_coherency: self._log.warning( 'Cluster frequency is not coherent, plot DISABLED!') return # Sanitize clusters if clusters is None: _clusters = self._platform['clusters'].keys() else: _clusters = listify(clusters) # Precompute active and total time for each cluster residencies = [] xmax = 0.0 for cluster in _clusters: res = self._getFrequencyResidency( self._platform['clusters'][cluster.lower()]) residencies.append(ResidencyData('{} Cluster'.format(cluster), res)) max_time = res.total.max().values[0] if xmax < max_time: xmax = max_time self._plotFrequencyResidency(residencies, 'cluster', xmax, pct, active)
def __init__(self, name="", normalize_time=True, scope="all", events=[], window=(0, None), abs_window=(0, None)): super(GenericFTrace, self).__init__(name) self.class_definitions.update(self.dynamic_classes.items()) self.__add_events(listify(events)) if scope == "thermal": self.class_definitions.update(self.thermal_classes.items()) elif scope == "sched": self.class_definitions.update(self.sched_classes.items()) elif scope != "custom": self.class_definitions.update(self.thermal_classes.items() + self.sched_classes.items()) for attr, class_def in self.class_definitions.iteritems(): trace_class = class_def() setattr(self, attr, trace_class) self.trace_classes.append(trace_class) # save parameters to complete init later self.normalize_time = normalize_time self.window = window self.abs_window = abs_window self.max_window = (0, None) self._do_parse()
def _parse_value(self, signal_def): """Parse a signal definition into a (template, column) tuple :param signal_def: A signal definition. E.g. "trace_class:column" :type signal_def: str """ match = re.match(r"(?P<event>[^:]+):(?P<column>[^:]+)(?P<color>:.+)?", signal_def) event = match.group("event") column = match.group("column") color_match = match.group("color") if color_match: color_list = color_match[1:].split(",", 2) color = [int(n, 16) if n.startswith("0x") else int(n) for n in color_list] else: color = None try: return self._event_map[event], column, color except KeyError: for trace in listify(self.traces): if event in trace.class_definitions: self._event_map[event] = trace.class_definitions[event] return self._event_map[event], column, color raise ValueError( "Event: " + event + " not found in Trace Object")
def plotClusterFrequencyResidency(self, clusters=None, pct=False, active=False): """ Plot the frequency residency in a given cluster, i.e. the amount of time cluster `cluster` spent at frequency `f_i`. By default, both 'big' and 'LITTLE' clusters data are plotted. Requires the following trace events: - cpu_frequency - cpu_idle :param clusters: name of the clusters to be plotted (all of them by default) :type clusters: str ot list(str) :param pct: plot residencies in percentage :type pct: bool :param active: for percentage plot specify whether to plot active or total time. Default is TOTAL time :type active: bool """ if not self._trace.hasEvents('cpu_frequency'): self._log.warning('Events [cpu_frequency] not found, plot DISABLED!') return if not self._trace.hasEvents('cpu_idle'): self._log.warning('Events [cpu_idle] not found, plot DISABLED!') return if 'clusters' not in self._platform: self._log.warning('No platform cluster info. Plot DISABLED!') return # Assumption: all CPUs in a cluster run at the same frequency, i.e. the # frequency is scaled per-cluster not per-CPU. Hence, we can limit the # cluster frequencies data to a single CPU if not self._trace.freq_coherency: self._log.warning('Cluster frequency is not coherent, plot DISABLED!') return # Sanitize clusters if clusters is None: _clusters = self._platform['clusters'].keys() else: _clusters = listify(clusters) # Precompute active and total time for each cluster residencies = [] xmax = 0.0 for cluster in _clusters: res = self._getFrequencyResidency( self._platform['clusters'][cluster.lower()]) residencies.append(ResidencyData('{} Cluster'.format(cluster), res)) max_time = res.total.max().values[0] if xmax < max_time: xmax = max_time self._plotFrequencyResidency(residencies, 'cluster', xmax, pct, active)
def plotCPUFrequencyResidency(self, cpus=None, pct=False, active=False): """ Plot per-CPU frequency residency. big CPUs are plotted first and then LITTLEs. Requires the following trace events: - cpu_frequency - cpu_idle :param cpus: list of CPU IDs. By default plot all CPUs :type cpus: list(int) or int :param pct: plot residencies in percentage :type pct: bool :param active: for percentage plot specify whether to plot active or total time. Default is TOTAL time :type active: bool """ if not self._trace.hasEvents('cpu_frequency'): self._log.warning( 'Events [cpu_frequency] not found, plot DISABLED!') return if not self._trace.hasEvents('cpu_idle'): self._log.warning('Events [cpu_idle] not found, plot DISABLED!') return if cpus is None: # Generate plots only for available CPUs cpufreq_data = self._dfg_trace_event('cpu_frequency') _cpus = range(cpufreq_data.cpu.max() + 1) else: _cpus = listify(cpus) # Split between big and LITTLE CPUs ordered from higher to lower ID _cpus.reverse() big_cpus = [c for c in _cpus if c in self._platform['clusters']['big']] little_cpus = [ c for c in _cpus if c in self._platform['clusters']['little'] ] _cpus = big_cpus + little_cpus # Precompute active and total time for each CPU residencies = [] xmax = 0.0 for cpu in _cpus: res = self._getFrequencyResidency(cpu) residencies.append(ResidencyData('CPU{}'.format(cpu), res)) max_time = res.total.max().values[0] if xmax < max_time: xmax = max_time self._plotFrequencyResidency(residencies, 'cpu', xmax, pct, active)
def __init__(self, traces, columns, templates, pivot, filters, zip_constraints=True): self._ip_vec = [] self._ip_vec.append(listify(traces)) self._ip_vec.append(listify(columns)) self._ip_vec.append(listify(templates)) self._lens = map(len, self._ip_vec) self._max_len = max(self._lens) self._pivot = pivot self._filters = filters self._constraints = [] self._trace_expanded = False self._expand() if zip_constraints: self._populate_zip_constraints() else: self._populate_constraints()
def plotCPUFrequencyResidency(self, cpus=None, pct=False, active=False): """ Plot per-CPU frequency residency. big CPUs are plotted first and then LITTLEs. Requires the following trace events: - cpu_frequency - cpu_idle :param cpus: List of cpus. By default plot all CPUs :type cpus: list(str) :param pct: plot residencies in percentage :type pct: bool :param active: for percentage plot specify whether to plot active or total time. Default is TOTAL time :type active: bool """ if not self._trace.hasEvents('cpu_frequency'): logging.warn('Events [cpu_frequency] not found, plot DISABLED!') return if not self._trace.hasEvents('cpu_idle'): logging.warn('Events [cpu_idle] not found, plot DISABLED!') return if cpus is None: # Generate plots only for available CPUs cpufreq_data = self._dfg_trace_event('cpu_frequency') _cpus = range(cpufreq_data.cpu.max()+1) else: _cpus = listify(cpus) # Split between big and LITTLE CPUs ordered from higher to lower ID _cpus.reverse() big_cpus = [c for c in _cpus if c in self._platform['clusters']['big']] little_cpus = [c for c in _cpus if c in self._platform['clusters']['little']] _cpus = big_cpus + little_cpus # Precompute active and total time for each CPU residencies = [] xmax = 0.0 for cpu in _cpus: res = self._getCPUFrequencyResidency(cpu) residencies.append(ResidencyData('CPU{}'.format(cpu), res)) max_time = res.total.max().values[0] if xmax < max_time: xmax = max_time self._plotFrequencyResidency(residencies, 'cpu', xmax, pct, active)
def aggregate(self, **kwargs): """ Aggregate implementation that aggregates triggers for a given topological level. All the arguments passed to it are forwarded to the aggregator function except level (if present) :return: A scalar or a vector aggregated result. Each group in the level produces an element in the result list with a one to one index correspondence :: groups["level"] = [[1,2], [3,4]] result = [result_1, result_2] """ level = kwargs.pop("level", "all") # This function is a hot spot in the code. It is # worth considering a memoize decorator to cache # the function. The memoization can also be # maintained by the aggregator object. This will # help the code scale efficeintly level_groups = self.topology.get_level(level) result = [] if not self._aggregated: self._aggregate_base() for group in level_groups: group = listify(group) if self._aggfunc is not None: level_res = self._aggfunc(self._result[group[0]], **kwargs) else: level_res = self._result[group[0]] for node in group[1:]: if self._aggfunc is not None: node_res = self._aggfunc(self._result[node], **kwargs) else: node_res = self._result[node] level_res += node_res result.append(level_res) return result
def _describe_signals(self): """Internal Function for populating templates and columns from signals """ if "column" in self._attr or self.templates: raise ValueError("column/templates specified with values") self._attr["column"] = [] if self.templates is None: self.templates = [] for value in listify(self._attr["signals"]): template, column = self._value_parser.parseString(value)[0] self.templates.append(template) self._attr["column"].append(column)
def _check_data(self): """Internal function to check the received data""" data = listify(self.traces) if len(data): mask = map(lambda x: isinstance(x, DataFrame), data) data_frame = reduce(lambda x, y: x and y, mask) sig_or_template = self.templates or "signals" in self._attr if not data_frame and not sig_or_template: raise ValueError( "Cannot understand data. Accepted DataFormats are pandas.DataFrame or trappy.FTrace/BareTrace/SysTrace (with templates)") elif data_frame and not self._attr["column"]: raise ValueError("Column not specified for DataFrame input") else: raise ValueError("Empty Data received")
def plotProfilingStats(self, functions=None, metrics='avg'): """ Plot functions profiling metrics for the specified kernel functions. For each speficied metric a barplot is generated which report the value of the metric when the kernel function has been executed on each CPU. By default all the kernel functions are plotted. :param functions: the name of list of name of kernel functions to plot :type functions: str or list(str) :param metrics: the metrics to plot avg - average execution time time - total execution time :type metrics: srt or list(str) """ if not hasattr(self._trace, '_functions_stats_df'): logging.warning('Functions stats data not available') return metrics = listify(metrics) df = self._trace.data_frame.functions_stats(functions) # Check that all the required metrics are acutally availabe available_metrics = df.columns.tolist() if not set(metrics).issubset(set(available_metrics)): msg = 'Metrics {} not supported, available metrics are {}'\ .format(set(metrics) - set(available_metrics), available_metrics) raise ValueError(msg) for metric in metrics: if metric.upper() == 'AVG': title = 'Average Completion Time per CPUs' ylabel = 'Completion Time [us]' if metric.upper() == 'TIME': title = 'Total Execution Time per CPUs' ylabel = 'Execution Time [us]' data = df[metric.lower()].unstack() axes = data.plot(kind='bar', figsize=(16, 8), legend=True, title=title, table=True) axes.set_ylabel(ylabel) axes.get_xaxis().set_visible(False)
def _check_data(self): """Internal function to check the received data""" data = listify(self.traces) if len(data): mask = map(lambda x: isinstance(x, DataFrame), data) data_frame = reduce(lambda x, y: x and y, mask) sig_or_template = self.templates or "signals" in self._attr if not data_frame and not sig_or_template: raise ValueError( "Cannot understand data. Accepted DataFormats are pandas.DataFrame or trappy.FTrace/BareTrace/SysTrace (with templates)") elif data_frame and "column" not in self._attr: raise ValueError("Column not specified for DataFrame input") else: raise ValueError("Empty Data received")
def _parse_value(self, tokens): """Grammar parser function to parse a signal""" event, column = tokens[0] try: return self._event_map[event], column except KeyError: for trace in listify(self.traces): if event in trace.class_definitions: self._event_map[event] = trace.class_definitions[event] return self._event_map[event], column raise ValueError( "Event: " + event + " not found in Trace Object")
def plotCPUIdleStateResidency(self, cpus=None, pct=False): """ Plot per-CPU idle state residency. big CPUs are plotted first and then LITTLEs. Requires cpu_idle trace events. :param cpus: list of CPU IDs. By default plot all CPUs :type cpus: list(int) or int :param pct: plot residencies in percentage :type pct: bool """ if not self._trace.hasEvents('cpu_idle'): logging.warn('Events [cpu_idle] not found, '\ 'plot DISABLED!') return if cpus is None: # Generate plots only for available CPUs cpuidle_data = self._dfg_trace_event('cpu_idle') _cpus = range(cpuidle_data.cpu_id.max() + 1) else: _cpus = listify(cpus) # Split between big and LITTLE CPUs ordered from higher to lower ID _cpus.reverse() big_cpus = [c for c in _cpus if c in self._platform['clusters']['big']] little_cpus = [ c for c in _cpus if c in self._platform['clusters']['little'] ] _cpus = big_cpus + little_cpus residencies = [] xmax = 0.0 for cpu in _cpus: r = self._dfg_cpu_idle_state_residency(cpu) residencies.append(ResidencyData('CPU{}'.format(cpu), r)) max_time = r.max().values[0] if xmax < max_time: xmax = max_time self._plotIdleStateResidency(residencies, 'cpu', xmax, pct=pct)
def plotProfilingStats(self, functions=None, metrics='avg'): """ Plot functions profiling metrics for the specified kernel functions. For each speficied metric a barplot is generated which report the value of the metric when the kernel function has been executed on each CPU. By default all the kernel functions are plotted. :param functions: the name of list of name of kernel functions to plot :type functions: str or list(str) :param metrics: the metrics to plot avg - average execution time time - total execution time :type metrics: srt or list(str) """ if not hasattr(self._trace, '_functions_stats_df'): self._log.warning('Functions stats data not available') return metrics = listify(metrics) df = self._trace.data_frame.functions_stats(functions) # Check that all the required metrics are acutally availabe available_metrics = df.columns.tolist() if not set(metrics).issubset(set(available_metrics)): msg = 'Metrics {} not supported, available metrics are {}'\ .format(set(metrics) - set(available_metrics), available_metrics) raise ValueError(msg) for metric in metrics: if metric.upper() == 'AVG': title = 'Average Completion Time per CPUs' ylabel = 'Completion Time [us]' if metric.upper() == 'TIME': title = 'Total Execution Time per CPUs' ylabel = 'Execution Time [us]' data = df[metric.lower()].unstack() axes = data.plot(kind='bar', figsize=(16, 8), legend=True, title=title, table=True) axes.set_ylabel(ylabel) axes.get_xaxis().set_visible(False)
def __init__(self, name="", normalize_time=True, scope="all", events=[], window=(0, None), abs_window=(0, None)): super(GenericFTrace, self).__init__(name) self.__add_events(listify(events)) if scope == "thermal": self.class_definitions.update(self.thermal_classes) self.class_definitions.update(self.dynamic_classes) elif scope == "sched": self.class_definitions.update(self.sched_classes) self.class_definitions.update(self.dynamic_classes) elif scope != "custom": self.class_definitions.update(self.thermal_classes) self.class_definitions.update(self.sched_classes) self.class_definitions.update(self.dynamic_classes) # Sanity check on the unique words for cls1, cls2 in itertools.combinations( self.class_definitions.values(), 2): if cls1.unique_word in cls2.unique_word or \ cls2.unique_word in cls1.unique_word: raise RuntimeError( 'Events unique words must not be a substring of the unique word of another event: "{cls1.unique_word}" {cls1} and "{cls2.unique_word}" {cls2}' .format(cls1=cls1, cls2=cls2)) for attr, class_def in self.class_definitions.items(): trace_class = class_def() setattr(self, attr, trace_class) self.trace_classes.append(trace_class) # save parameters to complete init later self.normalize_time = normalize_time self.window = window self.abs_window = abs_window self.max_window = (0, None) self._do_parse()
def plotCPUIdleStateResidency(self, cpus=None, pct=False): """ Plot per-CPU idle state residency. big CPUs are plotted first and then LITTLEs. Requires cpu_idle trace events. :param cpus: list of CPU IDs. By default plot all CPUs :type cpus: list(int) or int :param pct: plot residencies in percentage :type pct: bool """ if not self._trace.hasEvents('cpu_idle'): logging.warn('Events [cpu_idle] not found, '\ 'plot DISABLED!') return if cpus is None: # Generate plots only for available CPUs cpuidle_data = self._dfg_trace_event('cpu_idle') _cpus = range(cpuidle_data.cpu_id.max() + 1) else: _cpus = listify(cpus) # Split between big and LITTLE CPUs ordered from higher to lower ID _cpus.reverse() big_cpus = [c for c in _cpus if c in self._platform['clusters']['big']] little_cpus = [c for c in _cpus if c in self._platform['clusters']['little']] _cpus = big_cpus + little_cpus residencies = [] xmax = 0.0 for cpu in _cpus: r = self._dfg_cpu_idle_state_residency(cpu) residencies.append(ResidencyData('CPU{}'.format(cpu), r)) max_time = r.max().values[0] if xmax < max_time: xmax = max_time self._plotIdleStateResidency(residencies, 'cpu', xmax, pct=pct)
def _describe_signals(self): """Internal Function for populating templates and columns from signals """ if "column" in self._attr or self.templates: raise ValueError("column/templates specified with values") self._attr["column"] = [] self.templates = [] colors = [] for value in listify(self._attr["signals"]): template, column, color = self._parse_value(value) self.templates.append(template) self._attr["column"].append(column) colors.append(color) if any(colors): self._attr["colors"] = colors
def __init__(self, name="", normalize_time=True, scope="all", events=[], event_callbacks={}, window=(0, None), abs_window=(0, None), build_df=True): super(GenericFTrace, self).__init__(name, build_df) self.normalized_time = normalize_time if not hasattr(self, "needs_raw_parsing"): self.needs_raw_parsing = False self.class_definitions.update(self.dynamic_classes.items()) self.__add_events(listify(events)) if scope == "thermal": self.class_definitions.update(self.thermal_classes.items()) elif scope == "sched": self.class_definitions.update(self.sched_classes.items()) elif scope != "custom": self.class_definitions.update(self.thermal_classes.items() + self.sched_classes.items()) for attr, class_def in self.class_definitions.iteritems(): trace_class = class_def() if event_callbacks.has_key(attr): trace_class.callback = event_callbacks[attr] setattr(self, attr, trace_class) self.trace_classes.append(trace_class) self.__parse_trace_file(self.trace_path, window, abs_window) if self.needs_raw_parsing and (self.trace_path_raw is not None): self.__parse_trace_file(self.trace_path_raw, window, abs_window, raw=True) self.finalize_objects()
def _dfg_functions_stats(self, functions=None): """ Get a DataFrame of specified kernel functions profile data For each profiled function a DataFrame is returned which reports stats on kernel functions execution time. The reported stats are per-CPU and includes: number of times the function has been executed (hits), average execution time (avg), overall execution time (time) and samples variance (s_2). By default returns a DataFrame of all the functions profiled. :param functions: the name of the function or a list of function names to report :type functions: str or list(str) """ if not hasattr(self, '_functions_stats_df'): return None df = self._functions_stats_df if not functions: return df return df.loc[df.index.get_level_values(1).isin(listify(functions))]
def apply_filter_kv(key, value, data_frame, mask): """Internal function to apply a key value filter to a data_frame and update the initial condition provided in mask. :param value: The value to checked for :param data_frame: The data to be filtered :type data_frame: :mod:`pandas.DataFrame` :param mask: Initial Condition Mask :type mask: :mod:`pandas.Series` :return: A **mask** to index the data frame """ value = listify(value) if key not in data_frame.columns: return mask else: for val in value: mask = mask & (data_frame[key] == val) return mask
def plotClusterFrequencyTransitions(self, clusters=None, pct=False): """ Plot frequency transitions count of the specified clusters (all of them is not specified). Requires cpu_frequency events to be available in the trace. Notice that we assume that frequency is scaled at cluster level, therefore we always consider the first CPU of a cluster for this computation. :param clusters: name of the clusters to be plotted (all of them by default) :type clusters: str or list(str) :param pct: plot frequency transitions in percentage :type pct: bool """ if not self._trace.hasEvents('cpu_frequency'): self._log.warn('Events [cpu_frequency] not found, plot DISABLED!') return if not self._platform or 'clusters' not in self._platform: self._log.warn('No platform cluster info, plot DISABLED!') return if clusters is None: _clusters = self._platform['clusters'].keys() else: _clusters = listify(clusters) n_plots = len(_clusters) gs = gridspec.GridSpec(n_plots, 1) fig = plt.figure() # Precompute frequency transitions transitions = {} xmax = 0 for c in _clusters: # We assume frequency is scaled at cluster level and we therefore # pick information from the first CPU in the cluster. cpu_id = self._platform['clusters'][c.lower()][0] t = self._dfg_cpu_frequency_transitions(cpu_id) if pct: tot = t.transitions.sum() t = t.apply(lambda x: x * 100.0 / tot) transitions[c] = t max_cnt = t.transitions.max() if xmax < max_cnt: xmax = max_cnt if pct: yrange = 0.4 * max(6, len(t)) * n_plots figtype = "_pct" labeltype = " [%]" else: yrange = 3 * n_plots figtype = "" labeltype = "" for idx, c in enumerate(_clusters): t = transitions[c] axes = fig.add_subplot(gs[idx]) if pct: t.T.plot.barh(ax=axes, figsize=(16, yrange), stacked=True, title='{} Cluster'.format(c)) axes.legend(loc='lower center', ncol=7) axes.set_xlim(0, 100) axes.set_yticklabels([]) else: t.plot.barh(ax=axes, figsize=(16, yrange), color='g', legend=False, title='{} Cluster'.format(c)) axes.set_xlim(0, xmax*1.05) axes.grid(True) axes.set_ylabel('Frequency [MHz]') if idx+1 < n_plots: axes.set_xticklabels([]) axes = fig.axes[0] legend_y = axes.get_ylim()[1] axes.annotate('OPP Transitions{}'.format(labeltype), xy=(0, legend_y), xytext=(-50, 25), textcoords='offset points', fontsize=18) fig.axes[-1].set_xlabel('Number of transitions{}'.format(labeltype)) figname = '{}cluster_freq_transitions{}.png'.format( self._trace.plots_prefix, figtype) fig.savefig(os.path.join(self._trace.plots_dir, figname), bbox_inches='tight')
def plotCPUFrequencyTransitions(self, cpus=None, pct=False): """ Plot frequency transitions count of the specified CPUs (or all if not specified). Requires cpu_frequency events to be available in the trace. :param cpus: list of CPU IDs (all CPUs by default) :type clusters: int or list(int) :param pct: plot frequency transitions in percentage :type pct: bool """ if not self._trace.hasEvents('cpu_frequency'): self._log.warn('Events [cpu_frequency] not found, plot DISABLED!') return df = self._dfg_trace_event('cpu_frequency') if cpus is None: _cpus = range(df.cpu.max() + 1) else: _cpus = listify(cpus) n_plots = len(_cpus) gs = gridspec.GridSpec(n_plots, 1) fig = plt.figure() # Precompute frequency transitions transitions = {} xmax = 0 for cpu_id in _cpus: t = self._dfg_cpu_frequency_transitions(cpu_id) if pct: tot = t.transitions.sum() t = t.apply(lambda x: x * 100.0 / tot) transitions[cpu_id] = t max_cnt = t.transitions.max() if xmax < max_cnt: xmax = max_cnt if pct: yrange = 0.4 * max(6, len(t)) * n_plots figtype = "_pct" labeltype = " [%]" else: yrange = 3 * n_plots figtype = "" labeltype = "" for idx, cpu_id in enumerate(_cpus): t = transitions[cpu_id] axes = fig.add_subplot(gs[idx]) if pct: t.T.plot.barh(ax=axes, figsize=(16, yrange), stacked=True, title='CPU{}'.format(cpu_id)) axes.legend(loc='lower center', ncol=7) axes.set_xlim(0, 100) axes.set_yticklabels([]) else: t.plot.barh(ax=axes, figsize=(16, yrange), color='g', legend=False, title='CPU{}'.format(cpu_id)) axes.set_xlim(0, xmax*1.05) axes.grid(True) axes.set_ylabel('Frequency [MHz]') if idx+1 < n_plots: axes.set_xticklabels([]) axes = fig.axes[0] legend_y = axes.get_ylim()[1] axes.annotate('OPP Transitions{}'.format(labeltype), xy=(0, legend_y), xytext=(-50, 25), textcoords='offset points', fontsize=18) fig.axes[-1].set_xlabel('Number of transitions{}'.format(labeltype)) figname = '{}cpu_freq_transitions{}.png'.format( self._trace.plots_prefix, figtype) fig.savefig(os.path.join(self._trace.plots_dir, figname), bbox_inches='tight')
def plotCPUFrequencies(self, cpus=None): """ Plot frequency for the specified CPUs (or all if not specified). If sched_overutilized events are available, the plots will also show the intervals of time where the system was overutilized. The generated plots are also saved as PNG images under the folder specified by the `plots_dir` parameter of :class:`Trace`. :param cpus: the list of CPUs to plot, if None it generate a plot for each available CPU :type cpus: int or list(int) :return: a dictionary of average frequency for each CPU. """ if not self._trace.hasEvents('cpu_frequency'): self._log.warning('Events [cpu_frequency] not found, plot DISABLED!') return df = self._dfg_trace_event('cpu_frequency') if cpus is None: # Generate plots only for available CPUs cpus = range(df.cpu.max()+1) else: # Generate plots only specified CPUs cpus = listify(cpus) chained_assignment = pd.options.mode.chained_assignment pd.options.mode.chained_assignment = None freq = {} for cpu_id in listify(cpus): # Extract CPUs' frequencies and scale them to [MHz] _df = df[df.cpu == cpu_id] if _df.empty: self._log.warning('No [cpu_frequency] events for CPU%d, ' 'plot DISABLED!', cpu_id) continue _df['frequency'] = _df.frequency / 1e3 # Compute AVG frequency for this CPU avg_freq = 0 if len(_df) > 1: timespan = _df.index[-1] - _df.index[0] avg_freq = area_under_curve(_df['frequency'], method='rect') / timespan # Store DF for plotting freq[cpu_id] = { 'df' : _df, 'avg' : avg_freq, } pd.options.mode.chained_assignment = chained_assignment plots_count = len(freq) if not plots_count: return # Setup CPUs plots fig, pltaxes = plt.subplots(len(freq), 1, figsize=(16, 4 * plots_count)) avg_freqs = {} for plot_idx, cpu_id in enumerate(freq): # CPU frequencies and average value _df = freq[cpu_id]['df'] _avg = freq[cpu_id]['avg'] # Plot average frequency try: axes = pltaxes[plot_idx] except TypeError: axes = pltaxes axes.set_title('CPU{:2d} Frequency'.format(cpu_id)) axes.axhline(_avg, color='r', linestyle='--', linewidth=2) # Set plot limit based on CPU min/max frequencies if 'clusters' in self._platform: for cluster,cpus in self._platform['clusters'].iteritems(): if cpu_id not in cpus: continue freqs = self._platform['freqs'][cluster] break else: freqs = df['frequency'].unique() axes.set_ylim((min(freqs) - 100000) / 1e3, (max(freqs) + 100000) / 1e3) # Plot CPU frequency transitions _df['frequency'].plot(style=['r-'], ax=axes, drawstyle='steps-post', alpha=0.4) # Plot overutilzied regions (if signal available) self._trace.analysis.status.plotOverutilized(axes) # Finalize plot axes.set_xlim(self._trace.x_min, self._trace.x_max) axes.set_ylabel('MHz') axes.grid(True) if plot_idx + 1 < plots_count: axes.set_xticklabels([]) axes.set_xlabel('') avg_freqs[cpu_id] = _avg/1e3 self._log.info('CPU%02d average frequency: %.3f GHz', cpu_id, avg_freqs[cpu_id]) # Save generated plots into datadir figname = '{}/{}cpus_freqs.png'\ .format(self._trace.plots_dir, self._trace.plots_prefix) pl.savefig(figname, bbox_inches='tight') return avg_freqs
def __init__(self, triggers): self._triggers = listify(triggers) super(MultiTriggerIndexer, self).__init__(self._unify())
def plotTasks(self, tasks=None, signals=None): """ Generate a common set of useful plots for each of the specified tasks This method allows to filter which signals should be plot, if data are available in the input trace. The list of signals supported are: Tasks signals plot: load_avg, util_avg, boosted_util, sched_overutilized Tasks residencies on CPUs: residencies, sched_overutilized Tasks PELT signals: load_sum, util_sum, period_contrib, sched_overutilized Note: sched_overutilized: enable the plotting of overutilization bands on top of each subplot residencies: enable the generation of the CPUs residencies plot :param tasks: the list of task names and/or PIDs to plot. Numerical PIDs and string task names can be mixed in the same list. default: all tasks defined in Trace creation time are plotted :type tasks: list(str) or list(int) :param signals: list of signals (and thus plots) to generate default: all the plots and signals available in the current trace :type signals: list(str) """ if not signals: signals = ['load_avg', 'util_avg', 'boosted_util', 'sched_overutilized', 'load_sum', 'util_sum', 'period_contrib', 'residencies'] # Check for the minimum required signals to be available if not self._trace.hasEvents('sched_load_avg_task'): logging.warn('Events [sched_load_avg_task] not found, ' 'plot DISABLED!') return # Defined list of tasks to plot if tasks and \ not isinstance(tasks, str) and \ not isinstance(tasks, list): raise ValueError('Wrong format for tasks parameter') if tasks: tasks_to_plot = listify(tasks) elif self._tasks: tasks_to_plot = sorted(self._tasks) else: raise ValueError('No tasks to plot specified') # Compute number of plots to produce plots_count = 0 plots_signals = [ # Fist plot: task's utilization {'load_avg', 'util_avg', 'boosted_util'}, # Second plot: task residency {'residencies'}, # Third plot: tasks's load {'load_sum', 'util_sum', 'period_contrib'} ] for signals_to_plot in plots_signals: signals_to_plot = signals_to_plot.intersection(signals) if len(signals_to_plot): plots_count = plots_count + 1 # Grid gs = gridspec.GridSpec(plots_count, 1, height_ratios=[2, 1, 1]) gs.update(wspace=0.1, hspace=0.1) # Build list of all PIDs for each task_name to plot pids_to_plot = [] for task in tasks_to_plot: # Add specified PIDs to the list if isinstance(task, int): pids_to_plot.append(task) continue # Otherwise: add all the PIDs for task with the specified name pids_to_plot.extend(self._trace.getTaskByName(task)) for tid in pids_to_plot: task_name = self._trace.getTaskByPid(tid) if len(task_name) == 1: task_name = task_name[0] logging.info('Plotting %5d: %s...', tid, task_name) else: logging.info('Plotting %5d: %s...', tid, ', '.join(task_name)) plot_id = 0 # For each task create a figure with plots_count plots plt.figure(figsize=(16, 2*6+3)) plt.suptitle("Task Signals", y=.94, fontsize=16, horizontalalignment='center') # Plot load and utilization signals_to_plot = {'load_avg', 'util_avg', 'boosted_util'} signals_to_plot = list(signals_to_plot.intersection(signals)) if len(signals_to_plot) > 0: axes = plt.subplot(gs[plot_id, 0]) axes.set_title('Task [{0:d}:{1:s}] Signals' .format(tid, task_name)) plot_id = plot_id + 1 is_last = (plot_id == plots_count) if 'sched_overutilized' in signals: signals_to_plot.append('sched_overutilized') self._plotTaskSignals(axes, tid, signals_to_plot, is_last) # Plot CPUs residency signals_to_plot = {'residencies'} signals_to_plot = list(signals_to_plot.intersection(signals)) if len(signals_to_plot) > 0: axes = plt.subplot(gs[plot_id, 0]) axes.set_title( 'Task [{0:d}:{1:s}] Residency (green: LITTLE, red: big)' .format(tid, task_name) ) plot_id = plot_id + 1 is_last = (plot_id == plots_count) if 'sched_overutilized' in signals: signals_to_plot.append('sched_overutilized') self._plotTaskResidencies(axes, tid, signals_to_plot, is_last) # Plot PELT signals signals_to_plot = {'load_sum', 'util_sum', 'period_contrib'} signals_to_plot = list(signals_to_plot.intersection(signals)) if len(signals_to_plot) > 0: axes = plt.subplot(gs[plot_id, 0]) axes.set_title('Task [{0:d}:{1:s}] PELT Signals' .format(tid, task_name)) plot_id = plot_id + 1 if 'sched_overutilized' in signals: signals_to_plot.append('sched_overutilized') self._plotTaskPelt(axes, tid, signals_to_plot) # Save generated plots into datadir if isinstance(task_name, list): task_name = re.sub('[:/]', '_', task_name[0]) else: task_name = re.sub('[:/]', '_', task_name) figname = '{}/{}task_util_{}_{}.png'\ .format(self._trace.plots_dir, self._trace.plots_prefix, tid, task_name) pl.savefig(figname, bbox_inches='tight')
def _plot_generic(self, dfr, pivot, filters=None, columns=None, prettify_name=None, width=16, height=4, drawstyle="default", ax=None, title=""): """ Generic trace plotting method The values in the column 'pivot' will be used as discriminant Let's consider a df with these columns: | time | cpu | load_avg | util_avg | ==================================== | 42 | 2 | 1812 | 400 | ------------------------------------ | 43 | 0 | 1337 | 290 | ------------------------------------ | .. | ... | .. | .. | To plot the 'util_avg' value of CPU2, the function would be used like so: :: plot_generic(df, pivot='cpu', filters={'cpu' : [2]}, columns='util_avg') CPUs could be compared by using: :: plot_generic(df, pivot='cpu', filters={'cpu' : [2, 3]}, columns='util_avg') :param dfr: Trace dataframe :type dfr: `pandas.DataFrame` :param pivot: Name of column that will serve as a pivot :type pivot: str :param filters: Dataframe column filters :type filters: dict :param columns: Name of columns whose data will be plotted :type columns: str or list(str) :param prettify_name: user-friendly stringify function for pivot values :type prettify_name: callable[str] :param width: The width of the plot :type width: int :param height: The height of the plot :type height: int :param drawstyle: The drawstyle setting of the plot :type drawstyle: str """ if prettify_name is None: def prettify_name(name): return '{}={}'.format(pivot, name) if pivot not in dfr.columns: raise ValueError( 'Invalid "pivot" parameter value: no {} column'.format(pivot)) if columns is None: # Find available columns columns = dfr.columns.tolist() columns.remove(pivot) else: # Filter out unwanted columns columns = listify(columns) try: dfr = dfr[columns + [pivot]] except KeyError as err: raise ValueError( 'Invalid "columns" parameter value: {}'.format( err.message)) # Apply filters if filters is None: filters = {} for col, vals in filters.iteritems(): dfr = dfr[dfr[col].isin(vals)] setup_plot = False if ax is None: _, ax = self._plot_setup(width, height) setup_plot = True matches = dfr[pivot].unique().tolist() for match in matches: renamed_cols = [] for col in columns: renamed_cols.append('{} {}'.format(prettify_name(match), col)) plot_dfr = dfr[dfr[pivot] == match][columns] plot_dfr.columns = renamed_cols plot_dfr.plot(ax=ax, drawstyle=drawstyle) if setup_plot: ax.set_title(title) ax.set_xlim(self._trace.x_min, self._trace.x_max) # Extend ylim for better visibility cur_lim = ax.get_ylim() lim = (cur_lim[0] - 0.1 * (cur_lim[1] - cur_lim[0]), cur_lim[1] + 0.1 * (cur_lim[1] - cur_lim[0])) ax.set_ylim(lim) plt.legend() return ax
def plotTasks(self, tasks, signals=None): """ Generate a common set of useful plots for each of the specified tasks This method allows to filter which signals should be plot, if data are available in the input trace. The list of signals supported are: Tasks signals plot: load_avg, util_avg, boosted_util, sched_overutilized Tasks residencies on CPUs: residencies, sched_overutilized Tasks PELT signals: load_sum, util_sum, period_contrib, sched_overutilized At least one of the previous signals must be specified to get a valid plot. Addidional custom signals can be specified and they will be represented in the "Task signals plots" if they represent valid keys of the task load/utilization trace event (e.g. sched_load_avg_task). Note: sched_overutilized: enable the plotting of overutilization bands on top of each subplot residencies: enable the generation of the CPUs residencies plot :param tasks: the list of task names and/or PIDs to plot. Numerical PIDs and string task names can be mixed in the same list. :type tasks: list(str) or list(int) :param signals: list of signals (and thus plots) to generate default: all the plots and signals available in the current trace :type signals: list(str) """ if not signals: signals = ['load_avg', 'util_avg', 'boosted_util', 'sched_overutilized', 'load_sum', 'util_sum', 'period_contrib', 'residencies'] # Check for the minimum required signals to be available if self._dfg_task_load_events() is None: self._log.warning('No trace events for task signals, plot DISABLED') return # Defined list of tasks to plot if tasks and \ not isinstance(tasks, str) and \ not isinstance(tasks, list): raise ValueError('Wrong format for tasks parameter') if tasks: tasks_to_plot = listify(tasks) else: raise ValueError('No tasks to plot specified') # Compute number of plots to produce plots_count = 0 plots_signals = [ # Fist plot: task's utilization {'load_avg', 'util_avg', 'boosted_util'}, # Second plot: task residency {'residencies'}, # Third plot: tasks's load {'load_sum', 'util_sum', 'period_contrib'} ] hr = [] ysize = 0 for plot_id, signals_to_plot in enumerate(plots_signals): signals_to_plot = signals_to_plot.intersection(signals) if len(signals_to_plot): plots_count = plots_count + 1 # Use bigger size only for the first plot hr.append(3 if plot_id == 0 else 1) ysize = ysize + (8 if plot_id else 4) # Grid gs = gridspec.GridSpec(plots_count, 1, height_ratios=hr) gs.update(wspace=0.1, hspace=0.1) # Build list of all PIDs for each task_name to plot pids_to_plot = [] for task in tasks_to_plot: # Add specified PIDs to the list if isinstance(task, int): pids_to_plot.append(task) continue # Otherwise: add all the PIDs for task with the specified name pids_to_plot.extend(self._trace.getTaskByName(task)) for tid in pids_to_plot: savefig = False task_name = self._trace.getTaskByPid(tid) self._log.info('Plotting [%d:%s]...', tid, task_name) plot_id = 0 # For each task create a figure with plots_count plots plt.figure(figsize=(16, ysize)) plt.suptitle('Task Signals', y=.94, fontsize=16, horizontalalignment='center') # Plot load and utilization signals_to_plot = {'load_avg', 'util_avg', 'boosted_util'} signals_to_plot = list(signals_to_plot.intersection(signals)) if len(signals_to_plot) > 0: axes = plt.subplot(gs[plot_id, 0]) axes.set_title('Task [{0:d}:{1:s}] Signals' .format(tid, task_name)) plot_id = plot_id + 1 is_last = (plot_id == plots_count) self._plotTaskSignals(axes, tid, signals, is_last) savefig = True # Plot CPUs residency signals_to_plot = {'residencies'} signals_to_plot = list(signals_to_plot.intersection(signals)) if len(signals_to_plot) > 0: if not self._trace.has_big_little: self._log.warning( 'No big.LITTLE platform data, residencies plot disabled') else: axes = plt.subplot(gs[plot_id, 0]) axes.set_title( 'Task [{0:d}:{1:s}] Residency (green: LITTLE, red: big)' .format(tid, task_name) ) plot_id = plot_id + 1 is_last = (plot_id == plots_count) if 'sched_overutilized' in signals: signals_to_plot.append('sched_overutilized') self._plotTaskResidencies(axes, tid, signals_to_plot, is_last) savefig = True # Plot PELT signals signals_to_plot = {'load_sum', 'util_sum', 'period_contrib'} signals_to_plot = list(signals_to_plot.intersection(signals)) if len(signals_to_plot) > 0: axes = plt.subplot(gs[plot_id, 0]) axes.set_title('Task [{0:d}:{1:s}] PELT Signals' .format(tid, task_name)) plot_id = plot_id + 1 if 'sched_overutilized' in signals: signals_to_plot.append('sched_overutilized') self._plotTaskPelt(axes, tid, signals_to_plot) savefig = True if not savefig: self._log.warning('Nothing to plot for %s', task_name) continue # Save generated plots into datadir if isinstance(task_name, list): task_name = re.sub('[:/]', '_', task_name[0]) else: task_name = re.sub('[:/]', '_', task_name) figname = '{}/{}task_util_{}_{}.png'\ .format(self._trace.plots_dir, self._trace.plots_prefix, tid, task_name) pl.savefig(figname, bbox_inches='tight')
def _getFrequencyResidency(self, cluster): """ Get a DataFrame with per cluster frequency residency, i.e. amount of time spent at a given frequency in each cluster. :param cluster: this can be either a single CPU ID or a list of CPU IDs belonging to a cluster :type cluster: int or list(int) :returns: namedtuple(ResidencyTime) - tuple of total and active time dataframes """ if not self._trace.hasEvents('cpu_frequency'): self._log.warning('Events [cpu_frequency] not found, ' 'frequency residency computation not possible!') return None if not self._trace.hasEvents('cpu_idle'): self._log.warning('Events [cpu_idle] not found, ' 'frequency residency computation not possible!') return None _cluster = listify(cluster) freq_df = self._dfg_trace_event('cpu_frequency') # Assumption: all CPUs in a cluster run at the same frequency, i.e. the # frequency is scaled per-cluster not per-CPU. Hence, we can limit the # cluster frequencies data to a single CPU. This assumption is verified # by the Trace module when parsing the trace. if len(_cluster) > 1 and not self._trace.freq_coherency: self._log.warning('Cluster frequency is NOT coherent,' 'cannot compute residency!') return None cluster_freqs = freq_df[freq_df.cpu == _cluster[0]] # Compute TOTAL Time time_intervals = cluster_freqs.index[1:] - cluster_freqs.index[:-1] total_time = pd.DataFrame({ 'time': time_intervals, 'frequency': [f/1000.0 for f in cluster_freqs.iloc[:-1].frequency] }) total_time = total_time.groupby(['frequency']).sum() # Compute ACTIVE Time cluster_active = self._trace.getClusterActiveSignal(_cluster) # In order to compute the active time spent at each frequency we # multiply 2 square waves: # - cluster_active, a square wave of the form: # cluster_active[t] == 1 if at least one CPU is reported to be # non-idle by CPUFreq at time t # cluster_active[t] == 0 otherwise # - freq_active, square wave of the form: # freq_active[t] == 1 if at time t the frequency is f # freq_active[t] == 0 otherwise available_freqs = sorted(cluster_freqs.frequency.unique()) cluster_freqs = cluster_freqs.join( cluster_active.to_frame(name='active'), how='outer') cluster_freqs.fillna(method='ffill', inplace=True) nonidle_time = [] for f in available_freqs: freq_active = cluster_freqs.frequency.apply(lambda x: 1 if x == f else 0) active_t = cluster_freqs.active * freq_active # Compute total time by integrating the square wave nonidle_time.append(self._trace.integrate_square_wave(active_t)) active_time = pd.DataFrame({'time': nonidle_time}, index=[f/1000.0 for f in available_freqs]) active_time.index.name = 'frequency' return ResidencyTime(total_time, active_time)
def _plot_generic(self, dfr, pivot, filters=None, columns=None, prettify_name=None, width=16, height=4, drawstyle="default", ax=None, title=""): """ Generic trace plotting method The values in the column 'pivot' will be used as discriminant Let's consider a df with these columns: | time | cpu | load_avg | util_avg | ==================================== | 42 | 2 | 1812 | 400 | ------------------------------------ | 43 | 0 | 1337 | 290 | ------------------------------------ | .. | ... | .. | .. | To plot the 'util_avg' value of CPU2, the function would be used like so: :: plot_generic(df, pivot='cpu', filters={'cpu' : [2]}, columns='util_avg') CPUs could be compared by using: :: plot_generic(df, pivot='cpu', filters={'cpu' : [2, 3]}, columns='util_avg') :param dfr: Trace dataframe :type dfr: `pandas.DataFrame` :param pivot: Name of column that will serve as a pivot :type pivot: str :param filters: Dataframe column filters :type filters: dict :param columns: Name of columns whose data will be plotted :type columns: str or list(str) :param prettify_name: user-friendly stringify function for pivot values :type prettify_name: callable[str] :param width: The width of the plot :type width: int :param height: The height of the plot :type height: int :param drawstyle: The drawstyle setting of the plot :type drawstyle: str """ if prettify_name is None: def prettify_name(name): return '{}={}'.format(pivot, name) if pivot not in dfr.columns: raise ValueError('Invalid "pivot" parameter value: no {} column' .format(pivot) ) if columns is None: # Find available columns columns = dfr.columns.tolist() columns.remove(pivot) else: # Filter out unwanted columns columns = listify(columns) try: dfr = dfr[columns + [pivot]] except KeyError as err: raise ValueError('Invalid "columns" parameter value: {}' .format(err.message) ) # Apply filters if filters is None: filters = {} for col, vals in filters.iteritems(): dfr = dfr[dfr[col].isin(vals)] setup_plot = False if ax is None: _, ax = self._plot_setup(width, height) setup_plot = True matches = dfr[pivot].unique().tolist() for match in matches: renamed_cols = [] for col in columns: renamed_cols.append('{} {}'.format(prettify_name(match), col)) plot_dfr = dfr[dfr[pivot] == match][columns] plot_dfr.columns = renamed_cols plot_dfr.plot(ax=ax, drawstyle=drawstyle) if setup_plot: ax.set_title(title) ax.set_xlim(self._trace.x_min, self._trace.x_max) # Extend ylim for better visibility cur_lim = ax.get_ylim() lim = (cur_lim[0] - 0.1 * (cur_lim[1] - cur_lim[0]), cur_lim[1] + 0.1 * (cur_lim[1] - cur_lim[0])) ax.set_ylim(lim) plt.legend() return ax
def plotTasks(self, tasks=None, signals=None): """ Generate a common set of useful plots for each of the specified tasks This method allows to filter which signals should be plot, if data are available in the input trace. The list of signals supported are: Tasks signals plot: load_avg, util_avg, boosted_util, sched_overutilized Tasks residencies on CPUs: residencies, sched_overutilized Tasks PELT signals: load_sum, util_sum, period_contrib, sched_overutilized Note: sched_overutilized: enable the plotting of overutilization bands on top of each subplot residencies: enable the generation of the CPUs residencies plot :param tasks: the list of task names and/or PIDs to plot. Numerical PIDs and string task names can be mixed in the same list. default: all tasks defined in Trace creation time are plotted :type tasks: list(str) or list(int) :param signals: list of signals (and thus plots) to generate default: all the plots and signals available in the current trace :type signals: list(str) """ if not signals: signals = ['load_avg', 'util_avg', 'boosted_util', 'sched_overutilized', 'load_sum', 'util_sum', 'period_contrib', 'residencies'] # Check for the minimum required signals to be available if not self._trace.hasEvents('sched_load_avg_task'): self._log.warning('Events [sched_load_avg_task] not found, ' 'plot DISABLED!') return # Defined list of tasks to plot if tasks and \ not isinstance(tasks, str) and \ not isinstance(tasks, list): raise ValueError('Wrong format for tasks parameter') if tasks: tasks_to_plot = listify(tasks) elif self._tasks: tasks_to_plot = sorted(self._tasks) else: raise ValueError('No tasks to plot specified') # Compute number of plots to produce plots_count = 0 plots_signals = [ # Fist plot: task's utilization {'load_avg', 'util_avg', 'boosted_util'}, # Second plot: task residency {'residencies'}, # Third plot: tasks's load {'load_sum', 'util_sum', 'period_contrib'} ] for signals_to_plot in plots_signals: signals_to_plot = signals_to_plot.intersection(signals) if len(signals_to_plot): plots_count = plots_count + 1 # Grid gs = gridspec.GridSpec(plots_count, 1, height_ratios=[2, 1, 1]) gs.update(wspace=0.1, hspace=0.1) # Build list of all PIDs for each task_name to plot pids_to_plot = [] for task in tasks_to_plot: # Add specified PIDs to the list if isinstance(task, int): pids_to_plot.append(task) continue # Otherwise: add all the PIDs for task with the specified name pids_to_plot.extend(self._trace.getTaskByName(task)) for tid in pids_to_plot: task_name = self._trace.getTaskByPid(tid) if len(task_name) == 1: task_name = task_name[0] self._log.info('Plotting %5d: %s...', tid, task_name) else: self._log.info('Plotting %5d: %s...', tid, ', '.join(task_name)) plot_id = 0 # For each task create a figure with plots_count plots plt.figure(figsize=(16, 2*6+3)) plt.suptitle('Task Signals', y=.94, fontsize=16, horizontalalignment='center') # Plot load and utilization signals_to_plot = {'load_avg', 'util_avg', 'boosted_util'} signals_to_plot = list(signals_to_plot.intersection(signals)) if len(signals_to_plot) > 0: axes = plt.subplot(gs[plot_id, 0]) axes.set_title('Task [{0:d}:{1:s}] Signals' .format(tid, task_name)) plot_id = plot_id + 1 is_last = (plot_id == plots_count) if 'sched_overutilized' in signals: signals_to_plot.append('sched_overutilized') self._plotTaskSignals(axes, tid, signals_to_plot, is_last) # Plot CPUs residency signals_to_plot = {'residencies'} signals_to_plot = list(signals_to_plot.intersection(signals)) if len(signals_to_plot) > 0: axes = plt.subplot(gs[plot_id, 0]) axes.set_title( 'Task [{0:d}:{1:s}] Residency (green: LITTLE, red: big)' .format(tid, task_name) ) plot_id = plot_id + 1 is_last = (plot_id == plots_count) if 'sched_overutilized' in signals: signals_to_plot.append('sched_overutilized') self._plotTaskResidencies(axes, tid, signals_to_plot, is_last) # Plot PELT signals signals_to_plot = {'load_sum', 'util_sum', 'period_contrib'} signals_to_plot = list(signals_to_plot.intersection(signals)) if len(signals_to_plot) > 0: axes = plt.subplot(gs[plot_id, 0]) axes.set_title('Task [{0:d}:{1:s}] PELT Signals' .format(tid, task_name)) plot_id = plot_id + 1 if 'sched_overutilized' in signals: signals_to_plot.append('sched_overutilized') self._plotTaskPelt(axes, tid, signals_to_plot) # Save generated plots into datadir if isinstance(task_name, list): task_name = re.sub('[:/]', '_', task_name[0]) else: task_name = re.sub('[:/]', '_', task_name) figname = '{}/{}task_util_{}_{}.png'\ .format(self._trace.plots_dir, self._trace.plots_prefix, tid, task_name) pl.savefig(figname, bbox_inches='tight')