def show_core_runs(df, task_re, label, duration): time_span_msec = get_time_span_msec(df) # remove unneeded columns df = df.drop('next_pid', axis=1) df.drop('pid', axis=1, inplace=True) df.drop('usecs', axis=1, inplace=True) df.drop('next_comm', axis=1, inplace=True) # filter out all events except the switch events df = df[df.event == 'sched__sched_switch'] df = df.drop('event', axis=1) df = df[df['task_name'].str.match(task_re)] # at this point we have a df that looks like this: # task_name cpu duration # 0 ASA.1.vcpu0 8 7954 # 1 ASA.1.vcpu0 9 5475 # 2 ASA.1.vcpu0 10 4151 # 3 ASA.1.vcpu0 11 12391 # 4 ASA.1.vcpu0 12 21025 # 5 ASA.1.vcpu0 13 6447 # 6 ASA.1.vcpu0 14 16798 # 7 ASA.1.vcpu0 15 3911 # 8 ASA.10.vcpu0 8 4248 # 9 ASA.10.vcpu0 9 3534 # 10 ASA.10.vcpu0 10 15624 # 11 ASA.10.vcpu0 11 6925 # etc... if len(df) == 0: print print 'No selection matching "%s"' % (task_re) return gb = df.groupby(['task_name', 'cpu'], as_index=False) if duration: # add duration values df = gb.aggregate(np.sum) max_core = df.cpu.max() dfsum = df.drop('cpu', axis=1) gb = dfsum.groupby('task_name', as_index=False) dfsum = gb.aggregate(np.sum) # dfsum is the sum of all duration for given task # 0 ASA.1.vcpu0 78152 # 1 ASA.10.vcpu0 65637 # 2 ASA.11.vcpu0 81525 # 3 ASA.12.vcpu0 56488 # For each task, the maximum runtime is the time_span_msec (100% of 1 core) # The idle time for each task is therefore time_span_msec * 1000 - sum(duration) dfsum['cpu'] = 'IDLE' time_span_usec = time_span_msec * 1000 dfsum['duration'] = time_span_usec - dfsum['duration'] # now we need to reinsert that data back to the df dfm = pandas.concat([df, dfsum], ignore_index=True) # task_name cpu duration total # 0 ASA.1.vcpu0 8 7954 78152 # 1 ASA.1.vcpu0 9 5475 78152 # 2 ASA.1.vcpu0 10 4151 78152 # Add a % column dfm['percent'] = ((dfm['duration'] * 100 * 10) // time_span_usec) / 10 # This is for the legend min_count = 0 max_count = 100 range_unit = '%' # many core-pinned system tasks have a duration of 0 (swapper, watchdog...) dfm.fillna(100, inplace=True) dfm.drop(['duration'], axis=1, inplace=True) tooltip_count = ("time", "@percent% of core @cpu") title = "Task Run Time %% per Core (%s, %d msec window)" % (label, time_span_msec) # this is YlGnBu9[::-1] (Reverse the color order so dark is highest value) # with an extra intermediate color to make it 10 palette = ['#ffffd9', '#edf8b1', '#c7e9b4', '#a3dbb7', '#7fcdbb', '#41b6c4', '#1d91c0', '#225ea8', '#253494', '#081d58'] html_prefix = 'core_runtime' # add 1 extra column in the heatmap for core "IDLE" to represent the IDLE time # this requires adding 1 row per real core that has a runtime set to # capture window size - sum(duration on all cores) else: # count number of rows with same task and cpu dfm = DataFrame(gb.size()) dfm.reset_index(inplace=True) dfm.rename(columns={0: 'count'}, inplace=True) min_count = dfm['count'].min() max_count = dfm['count'].max() range_unit = '' spread = max_count - min_count # Add a % column dfm['percent'] = ((dfm['count'] - min_count) * 100) / spread tooltip_count = ("context switches", "@count") title = "Task Context Switches per Core (%s, %d msec window)" % (label, time_span_msec) palette = YlOrRd9[::-1] html_prefix = 'core_switches' max_core = dfm.cpu.max() # get the list of cores # round up to next mutliple of 4 - 1 # 0..3 -> 3 # 4..7 -> 7 etc max_core |= 0x03 max_core = max(max_core, 3) core_list = [str(x) for x in range(max_core + 1)] if duration: core_list.append('IDLE') # make room for the legend by adding 3 empty columns core_list += ['', '', ''] dfm['cpu'] = dfm['cpu'].astype(str) # replace ':' with '_' as it would cause bokeh to misplace the labels on the chart dfm['task_name'] = dfm['task_name'].str.replace(':', '_') normalize_df_task_name(dfm) # Add a column for the Y axis # each task name should be associated to a unique Y index # Get a unique list of task names sorted task_list = pandas.unique(dfm.task_name.ravel()).tolist() task_list.sort() # Add a color column dfm['color'] = dfm.apply(lambda row: get_color(row['percent'], palette), axis=1) # switch to str type to prevent the tooltip to # display percent value with 3 digits dfm.percent = dfm.percent.astype(str) # make enough vertical space for the legend # the legend needs 1 row per palette color + 1 to fit the max value # so we need at least len(palette) + 1 rows if len(task_list) < len(palette) + 1: task_list += ['' for _ in range(len(palette) + 1 - len(task_list))] TOOLS = "resize,hover,save" p = figure(title=title, tools=TOOLS, x_range=core_list, y_range=task_list, **title_style) p.plot_width = 1000 p.plot_height = 80 + len(task_list) * 16 p.toolbar_location = "left" source = ColumnDataSource(dfm) # the name is to flag these rectangles to enable tooltip hover on them # (and not enable tooltips on the legend patches) p.rect("cpu", "task_name", width=1, height=0.9, source=source, fill_alpha=0.8, color="color", name='patches') p.grid.grid_line_color = None # trace separator lines to isolate blocks across core groups (numa sockets) and task-like names max_y = len(task_list) # trace a vertical line every 8 cores for seg_x in range(8, max_core + 7, 8): p.segment(x0=[seg_x + 0.5], y0=[0], x1=[seg_x + 0.5], y1=[max_y + 0.5], color=GRAY, line_width=2) prev_task_name = None # trace a horizontal line around every group of tasks that have the same first 3 characters for y in range(max_y): cur_task_name = task_list[y] if prev_task_name: if prev_task_name[0:3] != cur_task_name[0:3]: p.segment(x0=[0], y0=[y + 0.5], x1=[max_core + 1.5], y1=[y + 0.5], color=GRAY, line_width=0.5) prev_task_name = task_list[y] hover = p.select(dict(type=HoverTool)) hover.tooltips = OrderedDict([ ("task", "@task_name"), ("core", "@cpu"), tooltip_count ]) # only enable tooltip on rectangles with name 'patches' hover.names = ['patches'] # legend to the right # we try to center the legend vertically legend_base_y = 0 palette_len = len(palette) if max_y > palette_len + 1: legend_base_y = (max_y - palette_len - 1) // 2 if duration: # IDLE cpu inserted so shift the legend by 1 position to the right max_core += 1 # pass 1 is to draw the color patches # prepare a data source with a x, y and a color column x_values = np.empty(palette_len) x_values.fill(max_core + 2.5) y_values = np.arange(legend_base_y + 1.5, legend_base_y + palette_len + 1, 1) dfl = DataFrame({'x': x_values, 'y': y_values, 'color': palette}) source = ColumnDataSource(dfl) p.rect(x='x', y='y', color='color', width=1, height=1, source=source) # pass 2 is to draw the text describing the ranges for the color patches color_value_list = get_color_value_list(min_count, max_count, palette, range_unit) x_values = np.empty(len(color_value_list)) x_values.fill(max_core + 3.1) y_values = np.arange(legend_base_y + 0.7, legend_base_y + len(color_value_list), 1) dfl = DataFrame({'x': x_values, 'y': y_values, 'color_values': color_value_list}) source = ColumnDataSource(dfl) p.text(x='x', y='y', text='color_values', source=source, text_font_size='8pt') output_html(p, html_prefix, task_re)
def show_kvm_exit_types(df, task_re, label): df = df[df['event'] == 'kvm_exit'] df = df[df['task_name'].str.match(task_re)] # the next_comm column contains the exit code exit_codes = Series(KVM_EXIT_REASONS) # add new column congaining the exit reason in clear text df['exit_reason'] = df['next_comm'].map(exit_codes) time_span_msec = get_time_span_msec(df) df.drop(['cpu', 'duration', 'event', 'next_pid', 'pid', 'next_comm', 'usecs'], inplace=True, axis=1) # Get the list of exit reasons, sorted alphabetically reasons = pandas.unique(df.exit_reason.ravel()).tolist() reasons.sort() # group by task name then exit reasons gb = df.groupby(['task_name', 'exit_reason']) # number of exit types size_series = gb.size() df = size_series.to_frame('count') df.reset_index(inplace=True) p = Bar(df, label='task_name', values='count', stack='exit_reason', title="KVM Exit types per task (%s, %d msec window)" % (label, time_span_msec), legend='top_right', tools="resize,hover,save", width=1000, height=800) p._xaxis.axis_label = "Task Name" p._xaxis.axis_label_text_font_size = "12pt" p._yaxis.axis_label = "Exit Count (sum)" p._yaxis.axis_label_text_font_size = "12pt" # Cannot find a way to display the exit reason in the tooltip # from bokeh.models.renderers import GlyphRenderer # glr = p.select(dict(type=GlyphRenderer)) # bar_source = glr[0].data_source # print bar_source.data # bar_source = glr[1].data_source # bar_source.data['exit_reason'] = ['HOHO'] hover = p.select(dict(type=HoverTool)) hover.tooltips = OrderedDict([ ("task", "$x"), # {"reason", "@exit_reason"}, ("count", "@height") ]) # specify how to output the plot(s) # table with counts gb = df.groupby(['exit_reason']) keys = gb.groups.keys() dfr_list = [] for reason in keys: dfr = gb.get_group(reason) # drop the exit reason column dfr = dfr.drop(['exit_reason'], axis=1) # rename the count column with the reason name dfr.rename(columns={'count': reason}, inplace=True) # set the task name as the index dfr.set_index('task_name', inplace=True) dfr_list.append(dfr) # concatenate all task columns into 1 dataframe that has the exit reason as the index # counts for missing exit reasons will be set to NaN dft = pandas.concat(dfr_list, axis=1) dft.fillna(0, inplace=True) # Add a total column dft['TOTAL'] = dft.sum(axis=1) sfmt = StringFormatter(text_align='center', font_style='bold') nfmt = NumberFormatter(format='0,0') col_names = list(dft.columns.values) col_names.sort() # move 'TOTAL' at end of list col_names.remove('TOTAL') col_names.append('TOTAL') # convert index to column name dft.reset_index(level=0, inplace=True) dft.rename(columns={'index': 'Task'}, inplace=True) columns = [TableColumn(field=name, title=name, formatter=nfmt) for name in col_names] columns.insert(0, TableColumn(field='Task', title='Task', formatter=sfmt)) table = DataTable(source=ColumnDataSource(dft), columns=columns, width=1000, row_headers=False, height='auto') output_html(vplot(p, table), 'kvm-types', task_re) '''
def show_core_runs(df, task_re, label, duration): time_span_msec = get_time_span_msec(df) # remove unneeded columns df = df.drop('next_pid', axis=1) df.drop('pid', axis=1, inplace=True) df.drop('usecs', axis=1, inplace=True) df.drop('next_comm', axis=1, inplace=True) # filter out all events except the switch events df = df[df.event == 'sched__sched_switch'] df = df.drop('event', axis=1) df = df[df['task_name'].str.match(task_re)] # at this point we have a df that looks like this: # task_name cpu duration # 0 ASA.1.vcpu0 8 7954 # 1 ASA.1.vcpu0 9 5475 # 2 ASA.1.vcpu0 10 4151 # 3 ASA.1.vcpu0 11 12391 # 4 ASA.1.vcpu0 12 21025 # 5 ASA.1.vcpu0 13 6447 # 6 ASA.1.vcpu0 14 16798 # 7 ASA.1.vcpu0 15 3911 # 8 ASA.10.vcpu0 8 4248 # 9 ASA.10.vcpu0 9 3534 # 10 ASA.10.vcpu0 10 15624 # 11 ASA.10.vcpu0 11 6925 # etc... if len(df) == 0: print print 'No selection matching "%s"' % (task_re) return gb = df.groupby(['task_name', 'cpu'], as_index=False) if duration: # add duration values df = gb.aggregate(np.sum) max_core = df.cpu.max() dfsum = df.drop('cpu', axis=1) gb = dfsum.groupby('task_name', as_index=False) dfsum = gb.aggregate(np.sum) # dfsum is the sum of all duration for given task # 0 ASA.1.vcpu0 78152 # 1 ASA.10.vcpu0 65637 # 2 ASA.11.vcpu0 81525 # 3 ASA.12.vcpu0 56488 # For each task, the maximum runtime is the time_span_msec (100% of 1 core) # The idle time for each task is therefore time_span_msec * 1000 - sum(duration) dfsum['cpu'] = 'IDLE' time_span_usec = time_span_msec * 1000 dfsum['duration'] = time_span_usec - dfsum['duration'] # now we need to reinsert that data back to the df dfm = pandas.concat([df, dfsum], ignore_index=True) # task_name cpu duration total # 0 ASA.1.vcpu0 8 7954 78152 # 1 ASA.1.vcpu0 9 5475 78152 # 2 ASA.1.vcpu0 10 4151 78152 # Add a % column dfm['percent'] = ((dfm['duration'] * 100 * 10) // time_span_usec) / 10 # This is for the legend min_count = 0 max_count = 100 range_unit = '%' # many core-pinned system tasks have a duration of 0 (swapper, watchdog...) dfm.fillna(100, inplace=True) dfm.drop(['duration'], axis=1, inplace=True) tooltip_count = ("time", "@percent% of core @cpu") title = "Task Run Time %% per Core (%s, %d msec window)" % ( label, time_span_msec) # this is YlGnBu9[::-1] (Reverse the color order so dark is highest value) # with an extra intermediate color to make it 10 palette = [ '#ffffd9', '#edf8b1', '#c7e9b4', '#a3dbb7', '#7fcdbb', '#41b6c4', '#1d91c0', '#225ea8', '#253494', '#081d58' ] html_prefix = 'core_runtime' # add 1 extra column in the heatmap for core "IDLE" to represent the IDLE time # this requires adding 1 row per real core that has a runtime set to # capture window size - sum(duration on all cores) else: # count number of rows with same task and cpu dfm = DataFrame(gb.size()) dfm.reset_index(inplace=True) dfm.rename(columns={0: 'count'}, inplace=True) min_count = dfm['count'].min() max_count = dfm['count'].max() range_unit = '' spread = max_count - min_count # Add a % column dfm['percent'] = ((dfm['count'] - min_count) * 100) / spread tooltip_count = ("context switches", "@count") title = "Task Context Switches per Core (%s, %d msec window)" % ( label, time_span_msec) palette = YlOrRd9[::-1] html_prefix = 'core_switches' max_core = dfm.cpu.max() # get the list of cores # round up to next mutliple of 4 - 1 # 0..3 -> 3 # 4..7 -> 7 etc max_core |= 0x03 max_core = max(max_core, 3) core_list = [str(x) for x in range(max_core + 1)] if duration: core_list.append('IDLE') # make room for the legend by adding 3 empty columns core_list += ['', '', ''] dfm['cpu'] = dfm['cpu'].astype(str) # replace ':' with '_' as it would cause bokeh to misplace the labels on the chart dfm['task_name'] = dfm['task_name'].str.replace(':', '_') normalize_df_task_name(dfm) # Add a column for the Y axis # each task name should be associated to a unique Y index # Get a unique list of task names sorted task_list = pandas.unique(dfm.task_name.ravel()).tolist() task_list.sort() # Add a color column dfm['color'] = dfm.apply(lambda row: get_color(row['percent'], palette), axis=1) # switch to str type to prevent the tooltip to # display percent value with 3 digits dfm.percent = dfm.percent.astype(str) # make enough vertical space for the legend # the legend needs 1 row per palette color + 1 to fit the max value # so we need at least len(palette) + 1 rows if len(task_list) < len(palette) + 1: task_list += ['' for _ in range(len(palette) + 1 - len(task_list))] TOOLS = "resize,hover,save" p = figure(title=title, tools=TOOLS, x_range=core_list, y_range=task_list, **title_style) p.plot_width = 1000 p.plot_height = 80 + len(task_list) * 16 p.toolbar_location = "left" source = ColumnDataSource(dfm) # the name is to flag these rectangles to enable tooltip hover on them # (and not enable tooltips on the legend patches) p.rect("cpu", "task_name", width=1, height=0.9, source=source, fill_alpha=0.8, color="color", name='patches') p.grid.grid_line_color = None # trace separator lines to isolate blocks across core groups (numa sockets) and task-like names max_y = len(task_list) # trace a vertical line every 8 cores for seg_x in range(8, max_core + 7, 8): p.segment(x0=[seg_x + 0.5], y0=[0], x1=[seg_x + 0.5], y1=[max_y + 0.5], color=GRAY, line_width=2) prev_task_name = None # trace a horizontal line around every group of tasks that have the same first 3 characters for y in range(max_y): cur_task_name = task_list[y] if prev_task_name: if prev_task_name[0:3] != cur_task_name[0:3]: p.segment(x0=[0], y0=[y + 0.5], x1=[max_core + 1.5], y1=[y + 0.5], color=GRAY, line_width=0.5) prev_task_name = task_list[y] hover = p.select(dict(type=HoverTool)) hover.tooltips = OrderedDict([("task", "@task_name"), ("core", "@cpu"), tooltip_count]) # only enable tooltip on rectangles with name 'patches' hover.names = ['patches'] # legend to the right # we try to center the legend vertically legend_base_y = 0 palette_len = len(palette) if max_y > palette_len + 1: legend_base_y = (max_y - palette_len - 1) // 2 if duration: # IDLE cpu inserted so shift the legend by 1 position to the right max_core += 1 # pass 1 is to draw the color patches # prepare a data source with a x, y and a color column x_values = np.empty(palette_len) x_values.fill(max_core + 2.5) y_values = np.arange(legend_base_y + 1.5, legend_base_y + palette_len + 1, 1) dfl = DataFrame({'x': x_values, 'y': y_values, 'color': palette}) source = ColumnDataSource(dfl) p.rect(x='x', y='y', color='color', width=1, height=1, source=source) # pass 2 is to draw the text describing the ranges for the color patches color_value_list = get_color_value_list(min_count, max_count, palette, range_unit) x_values = np.empty(len(color_value_list)) x_values.fill(max_core + 3.1) y_values = np.arange(legend_base_y + 0.7, legend_base_y + len(color_value_list), 1) dfl = DataFrame({ 'x': x_values, 'y': y_values, 'color_values': color_value_list }) source = ColumnDataSource(dfl) p.text(x='x', y='y', text='color_values', source=source, text_font_size='8pt') output_html(p, html_prefix, task_re)
def show_kvm_exit_types(df, task_re, label): df = df[df['event'] == 'kvm_exit'] df = df[df['task_name'].str.match(task_re)] # the next_comm column contains the exit code exit_codes = Series(KVM_EXIT_REASONS) # add new column congaining the exit reason in clear text df['exit_reason'] = df['next_comm'].map(exit_codes) time_span_msec = get_time_span_msec(df) df.drop( ['cpu', 'duration', 'event', 'next_pid', 'pid', 'next_comm', 'usecs'], inplace=True, axis=1) # Get the list of exit reasons, sorted alphabetically reasons = pandas.unique(df.exit_reason.ravel()).tolist() reasons.sort() # group by task name then exit reasons gb = df.groupby(['task_name', 'exit_reason']) # number of exit types size_series = gb.size() df = size_series.to_frame('count') df.reset_index(inplace=True) p = Bar(df, label='task_name', values='count', stack='exit_reason', title="KVM Exit types per task (%s, %d msec window)" % (label, time_span_msec), legend='top_right', tools="resize,hover,save", width=1000, height=800) p._xaxis.axis_label = "Task Name" p._xaxis.axis_label_text_font_size = "12pt" p._yaxis.axis_label = "Exit Count (sum)" p._yaxis.axis_label_text_font_size = "12pt" # Cannot find a way to display the exit reason in the tooltip # from bokeh.models.renderers import GlyphRenderer # glr = p.select(dict(type=GlyphRenderer)) # bar_source = glr[0].data_source # print bar_source.data # bar_source = glr[1].data_source # bar_source.data['exit_reason'] = ['HOHO'] hover = p.select(dict(type=HoverTool)) hover.tooltips = OrderedDict([ ("task", "$x"), # {"reason", "@exit_reason"}, ("count", "@height") ]) # specify how to output the plot(s) # table with counts gb = df.groupby(['exit_reason']) keys = gb.groups.keys() dfr_list = [] for reason in keys: dfr = gb.get_group(reason) # drop the exit reason column dfr = dfr.drop(['exit_reason'], axis=1) # rename the count column with the reason name dfr.rename(columns={'count': reason}, inplace=True) # set the task name as the index dfr.set_index('task_name', inplace=True) dfr_list.append(dfr) # concatenate all task columns into 1 dataframe that has the exit reason as the index # counts for missing exit reasons will be set to NaN dft = pandas.concat(dfr_list, axis=1) dft.fillna(0, inplace=True) # Add a total column dft['TOTAL'] = dft.sum(axis=1) sfmt = StringFormatter(text_align='center', font_style='bold') nfmt = NumberFormatter(format='0,0') col_names = list(dft.columns.values) col_names.sort() # move 'TOTAL' at end of list col_names.remove('TOTAL') col_names.append('TOTAL') # convert index to column name dft.reset_index(level=0, inplace=True) dft.rename(columns={'index': 'Task'}, inplace=True) columns = [ TableColumn(field=name, title=name, formatter=nfmt) for name in col_names ] columns.insert(0, TableColumn(field='Task', title='Task', formatter=sfmt)) table = DataTable(source=ColumnDataSource(dft), columns=columns, width=1000, row_headers=False, height='auto') output_html(vplot(p, table), 'kvm-types', task_re) '''