Example #1
0
def show_core_runs(df, task_re, label, duration):
    time_span_msec = get_time_span_msec(df)

    # remove unneeded columns
    df = df.drop('next_pid', axis=1)
    df.drop('pid', axis=1, inplace=True)
    df.drop('usecs', axis=1, inplace=True)
    df.drop('next_comm', axis=1, inplace=True)

    # filter out all events except the switch events
    df = df[df.event == 'sched__sched_switch']
    df = df.drop('event', axis=1)
    df = df[df['task_name'].str.match(task_re)]

    # at this point we have a df that looks like this:
    #         task_name  cpu  duration
    # 0     ASA.1.vcpu0    8      7954
    # 1     ASA.1.vcpu0    9      5475
    # 2     ASA.1.vcpu0   10      4151
    # 3     ASA.1.vcpu0   11     12391
    # 4     ASA.1.vcpu0   12     21025
    # 5     ASA.1.vcpu0   13      6447
    # 6     ASA.1.vcpu0   14     16798
    # 7     ASA.1.vcpu0   15      3911
    # 8    ASA.10.vcpu0    8      4248
    # 9    ASA.10.vcpu0    9      3534
    # 10   ASA.10.vcpu0   10     15624
    # 11   ASA.10.vcpu0   11      6925
    # etc...
    if len(df) == 0:
        print
        print 'No selection matching "%s"' % (task_re)
        return
    gb = df.groupby(['task_name', 'cpu'], as_index=False)
    if duration:
        # add duration values
        df = gb.aggregate(np.sum)
        max_core = df.cpu.max()

        dfsum = df.drop('cpu', axis=1)
        gb = dfsum.groupby('task_name', as_index=False)
        dfsum = gb.aggregate(np.sum)
        # dfsum is the sum of all duration for given task
        # 0    ASA.1.vcpu0     78152
        # 1   ASA.10.vcpu0     65637
        # 2   ASA.11.vcpu0     81525
        # 3   ASA.12.vcpu0     56488
        # For each task, the maximum runtime is the time_span_msec (100% of 1 core)
        # The idle time for each task is therefore time_span_msec * 1000 - sum(duration)
        dfsum['cpu'] = 'IDLE'
        time_span_usec = time_span_msec * 1000
        dfsum['duration'] = time_span_usec - dfsum['duration']

        # now we need to reinsert that data back to the df
        dfm = pandas.concat([df, dfsum], ignore_index=True)

        #         task_name  cpu  duration  total
        # 0     ASA.1.vcpu0    8      7954  78152
        # 1     ASA.1.vcpu0    9      5475  78152
        # 2     ASA.1.vcpu0   10      4151  78152

        # Add a % column
        dfm['percent'] = ((dfm['duration'] * 100 * 10) // time_span_usec) / 10

        # This is for the legend
        min_count = 0
        max_count = 100
        range_unit = '%'

        # many core-pinned system tasks have a duration of 0 (swapper, watchdog...)
        dfm.fillna(100, inplace=True)
        dfm.drop(['duration'], axis=1, inplace=True)
        tooltip_count = ("time", "@percent% of core @cpu")
        title = "Task Run Time %% per Core (%s, %d msec window)" % (label, time_span_msec)
        # this is YlGnBu9[::-1]  (Reverse the color order so dark is highest value)
        # with an extra intermediate color to make it 10
        palette = ['#ffffd9', '#edf8b1', '#c7e9b4', '#a3dbb7', '#7fcdbb',
                   '#41b6c4', '#1d91c0', '#225ea8', '#253494', '#081d58']
        html_prefix = 'core_runtime'
        # add 1 extra column in the heatmap for core "IDLE" to represent the IDLE time
        # this requires adding 1 row per real core that has a runtime set to
        # capture window size - sum(duration on all cores)
    else:
        # count number of rows with same task and cpu
        dfm = DataFrame(gb.size())
        dfm.reset_index(inplace=True)
        dfm.rename(columns={0: 'count'}, inplace=True)
        min_count = dfm['count'].min()
        max_count = dfm['count'].max()
        range_unit = ''
        spread = max_count - min_count
        # Add a % column
        dfm['percent'] = ((dfm['count'] - min_count) * 100) / spread
        tooltip_count = ("context switches", "@count")
        title = "Task Context Switches per Core (%s, %d msec window)" % (label, time_span_msec)
        palette = YlOrRd9[::-1]
        html_prefix = 'core_switches'
        max_core = dfm.cpu.max()

    # get the list of cores
    # round up to next mutliple of 4 - 1
    # 0..3 -> 3
    # 4..7 -> 7 etc
    max_core |= 0x03
    max_core = max(max_core, 3)
    core_list = [str(x) for x in range(max_core + 1)]
    if duration:
        core_list.append('IDLE')
    # make room for the legend by adding 3 empty columns
    core_list += ['', '', '']
    dfm['cpu'] = dfm['cpu'].astype(str)
    # replace ':' with '_' as it would cause bokeh to misplace the labels on the chart
    dfm['task_name'] = dfm['task_name'].str.replace(':', '_')

    normalize_df_task_name(dfm)

    # Add a column for the Y axis
    # each task name should be associated to a unique Y index
    # Get a unique list of task names sorted
    task_list = pandas.unique(dfm.task_name.ravel()).tolist()
    task_list.sort()

    # Add a color column
    dfm['color'] = dfm.apply(lambda row: get_color(row['percent'],
                                                   palette), axis=1)
    # switch to str type to prevent the tooltip to
    # display percent value with 3 digits
    dfm.percent = dfm.percent.astype(str)
    # make enough vertical space for the legend
    # the legend needs 1 row per palette color + 1 to fit the max value
    # so we need at least len(palette) + 1 rows
    if len(task_list) < len(palette) + 1:
        task_list += ['' for _ in range(len(palette) + 1 - len(task_list))]
    TOOLS = "resize,hover,save"
    p = figure(title=title, tools=TOOLS, x_range=core_list, y_range=task_list, **title_style)

    p.plot_width = 1000
    p.plot_height = 80 + len(task_list) * 16
    p.toolbar_location = "left"
    source = ColumnDataSource(dfm)
    # the name is to flag these rectangles to enable tooltip hover on them
    # (and not enable tooltips on the legend patches)
    p.rect("cpu", "task_name", width=1, height=0.9, source=source, fill_alpha=0.8, color="color", name='patches')
    p.grid.grid_line_color = None
    # trace separator lines to isolate blocks across core groups (numa sockets) and task-like names
    max_y = len(task_list)
    # trace a vertical line every 8 cores
    for seg_x in range(8, max_core + 7, 8):
        p.segment(x0=[seg_x + 0.5], y0=[0], x1=[seg_x + 0.5],
                  y1=[max_y + 0.5], color=GRAY, line_width=2)
    prev_task_name = None
    # trace a horizontal line around every group of tasks that have the same first 3 characters
    for y in range(max_y):
        cur_task_name = task_list[y]
        if prev_task_name:
            if prev_task_name[0:3] != cur_task_name[0:3]:
                p.segment(x0=[0], y0=[y + 0.5], x1=[max_core + 1.5],
                          y1=[y + 0.5], color=GRAY, line_width=0.5)
        prev_task_name = task_list[y]

    hover = p.select(dict(type=HoverTool))
    hover.tooltips = OrderedDict([
        ("task", "@task_name"),
        ("core", "@cpu"),
        tooltip_count
    ])
    # only enable tooltip on rectangles with name 'patches'
    hover.names = ['patches']

    # legend to the right
    # we try to center the legend vertically
    legend_base_y = 0
    palette_len = len(palette)
    if max_y > palette_len + 1:
        legend_base_y = (max_y - palette_len - 1) // 2

    if duration:
        # IDLE cpu inserted so shift the legend by 1 position to the right
        max_core += 1
    # pass 1 is to draw the color patches
    # prepare a data source with a x, y and a color column
    x_values = np.empty(palette_len)
    x_values.fill(max_core + 2.5)
    y_values = np.arange(legend_base_y + 1.5, legend_base_y + palette_len + 1, 1)
    dfl = DataFrame({'x': x_values, 'y': y_values, 'color': palette})
    source = ColumnDataSource(dfl)
    p.rect(x='x', y='y', color='color', width=1, height=1, source=source)

    # pass 2 is to draw the text describing the ranges for the color patches
    color_value_list = get_color_value_list(min_count, max_count, palette, range_unit)
    x_values = np.empty(len(color_value_list))
    x_values.fill(max_core + 3.1)
    y_values = np.arange(legend_base_y + 0.7, legend_base_y + len(color_value_list), 1)

    dfl = DataFrame({'x': x_values, 'y': y_values, 'color_values': color_value_list})
    source = ColumnDataSource(dfl)
    p.text(x='x', y='y', text='color_values', source=source,
           text_font_size='8pt')

    output_html(p, html_prefix, task_re)
def show_kvm_exit_types(df, task_re, label):

    df = df[df['event'] == 'kvm_exit']
    df = df[df['task_name'].str.match(task_re)]
    # the next_comm column contains the exit code
    exit_codes = Series(KVM_EXIT_REASONS)
    # add  new column congaining the exit reason in clear text
    df['exit_reason'] = df['next_comm'].map(exit_codes)
    time_span_msec = get_time_span_msec(df)
    df.drop(['cpu', 'duration', 'event', 'next_pid', 'pid', 'next_comm', 'usecs'], inplace=True, axis=1)

    # Get the list of exit reasons, sorted alphabetically
    reasons = pandas.unique(df.exit_reason.ravel()).tolist()
    reasons.sort()

    # group by task name then exit reasons
    gb = df.groupby(['task_name', 'exit_reason'])
    # number of exit types
    size_series = gb.size()
    df = size_series.to_frame('count')
    df.reset_index(inplace=True)

    p = Bar(df, label='task_name', values='count', stack='exit_reason',
            title="KVM Exit types per task (%s, %d msec window)" % (label, time_span_msec),
            legend='top_right',
            tools="resize,hover,save",
            width=1000, height=800)
    p._xaxis.axis_label = "Task Name"
    p._xaxis.axis_label_text_font_size = "12pt"
    p._yaxis.axis_label = "Exit Count (sum)"
    p._yaxis.axis_label_text_font_size = "12pt"

    # Cannot find a way to display the exit reason in the tooltip
    # from bokeh.models.renderers import GlyphRenderer
    # glr = p.select(dict(type=GlyphRenderer))
    # bar_source = glr[0].data_source
    # print bar_source.data
    # bar_source = glr[1].data_source
    # bar_source.data['exit_reason'] = ['HOHO']
    hover = p.select(dict(type=HoverTool))
    hover.tooltips = OrderedDict([
        ("task", "$x"),
        # {"reason", "@exit_reason"},
        ("count", "@height")
    ])
    # specify how to output the plot(s)

    # table with counts
    gb = df.groupby(['exit_reason'])
    keys = gb.groups.keys()
    dfr_list = []
    for reason in keys:
        dfr = gb.get_group(reason)
        # drop the exit reason column
        dfr = dfr.drop(['exit_reason'], axis=1)
        # rename the count column with the reason name
        dfr.rename(columns={'count': reason}, inplace=True)
        # set the task name as the index
        dfr.set_index('task_name', inplace=True)
        dfr_list.append(dfr)
    # concatenate all task columns into 1 dataframe that has the exit reason as the index
    # counts for missing exit reasons will be set to NaN
    dft = pandas.concat(dfr_list, axis=1)
    dft.fillna(0, inplace=True)
    # Add a total column
    dft['TOTAL'] = dft.sum(axis=1)
    sfmt = StringFormatter(text_align='center', font_style='bold')
    nfmt = NumberFormatter(format='0,0')

    col_names = list(dft.columns.values)
    col_names.sort()
    # move 'TOTAL' at end of list
    col_names.remove('TOTAL')
    col_names.append('TOTAL')
    # convert index to column name
    dft.reset_index(level=0, inplace=True)
    dft.rename(columns={'index': 'Task'}, inplace=True)
    columns = [TableColumn(field=name, title=name, formatter=nfmt) for name in col_names]
    columns.insert(0, TableColumn(field='Task', title='Task', formatter=sfmt))
    table = DataTable(source=ColumnDataSource(dft), columns=columns, width=1000,
                      row_headers=False,
                      height='auto')
    output_html(vplot(p, table), 'kvm-types', task_re)

    '''
Example #3
0
def show_core_runs(df, task_re, label, duration):
    time_span_msec = get_time_span_msec(df)

    # remove unneeded columns
    df = df.drop('next_pid', axis=1)
    df.drop('pid', axis=1, inplace=True)
    df.drop('usecs', axis=1, inplace=True)
    df.drop('next_comm', axis=1, inplace=True)

    # filter out all events except the switch events
    df = df[df.event == 'sched__sched_switch']
    df = df.drop('event', axis=1)
    df = df[df['task_name'].str.match(task_re)]

    # at this point we have a df that looks like this:
    #         task_name  cpu  duration
    # 0     ASA.1.vcpu0    8      7954
    # 1     ASA.1.vcpu0    9      5475
    # 2     ASA.1.vcpu0   10      4151
    # 3     ASA.1.vcpu0   11     12391
    # 4     ASA.1.vcpu0   12     21025
    # 5     ASA.1.vcpu0   13      6447
    # 6     ASA.1.vcpu0   14     16798
    # 7     ASA.1.vcpu0   15      3911
    # 8    ASA.10.vcpu0    8      4248
    # 9    ASA.10.vcpu0    9      3534
    # 10   ASA.10.vcpu0   10     15624
    # 11   ASA.10.vcpu0   11      6925
    # etc...
    if len(df) == 0:
        print
        print 'No selection matching "%s"' % (task_re)
        return
    gb = df.groupby(['task_name', 'cpu'], as_index=False)
    if duration:
        # add duration values
        df = gb.aggregate(np.sum)
        max_core = df.cpu.max()

        dfsum = df.drop('cpu', axis=1)
        gb = dfsum.groupby('task_name', as_index=False)
        dfsum = gb.aggregate(np.sum)
        # dfsum is the sum of all duration for given task
        # 0    ASA.1.vcpu0     78152
        # 1   ASA.10.vcpu0     65637
        # 2   ASA.11.vcpu0     81525
        # 3   ASA.12.vcpu0     56488
        # For each task, the maximum runtime is the time_span_msec (100% of 1 core)
        # The idle time for each task is therefore time_span_msec * 1000 - sum(duration)
        dfsum['cpu'] = 'IDLE'
        time_span_usec = time_span_msec * 1000
        dfsum['duration'] = time_span_usec - dfsum['duration']

        # now we need to reinsert that data back to the df
        dfm = pandas.concat([df, dfsum], ignore_index=True)

        #         task_name  cpu  duration  total
        # 0     ASA.1.vcpu0    8      7954  78152
        # 1     ASA.1.vcpu0    9      5475  78152
        # 2     ASA.1.vcpu0   10      4151  78152

        # Add a % column
        dfm['percent'] = ((dfm['duration'] * 100 * 10) // time_span_usec) / 10

        # This is for the legend
        min_count = 0
        max_count = 100
        range_unit = '%'

        # many core-pinned system tasks have a duration of 0 (swapper, watchdog...)
        dfm.fillna(100, inplace=True)
        dfm.drop(['duration'], axis=1, inplace=True)
        tooltip_count = ("time", "@percent% of core @cpu")
        title = "Task Run Time %% per Core (%s, %d msec window)" % (
            label, time_span_msec)
        # this is YlGnBu9[::-1]  (Reverse the color order so dark is highest value)
        # with an extra intermediate color to make it 10
        palette = [
            '#ffffd9', '#edf8b1', '#c7e9b4', '#a3dbb7', '#7fcdbb', '#41b6c4',
            '#1d91c0', '#225ea8', '#253494', '#081d58'
        ]
        html_prefix = 'core_runtime'
        # add 1 extra column in the heatmap for core "IDLE" to represent the IDLE time
        # this requires adding 1 row per real core that has a runtime set to
        # capture window size - sum(duration on all cores)
    else:
        # count number of rows with same task and cpu
        dfm = DataFrame(gb.size())
        dfm.reset_index(inplace=True)
        dfm.rename(columns={0: 'count'}, inplace=True)
        min_count = dfm['count'].min()
        max_count = dfm['count'].max()
        range_unit = ''
        spread = max_count - min_count
        # Add a % column
        dfm['percent'] = ((dfm['count'] - min_count) * 100) / spread
        tooltip_count = ("context switches", "@count")
        title = "Task Context Switches per Core (%s, %d msec window)" % (
            label, time_span_msec)
        palette = YlOrRd9[::-1]
        html_prefix = 'core_switches'
        max_core = dfm.cpu.max()

    # get the list of cores
    # round up to next mutliple of 4 - 1
    # 0..3 -> 3
    # 4..7 -> 7 etc
    max_core |= 0x03
    max_core = max(max_core, 3)
    core_list = [str(x) for x in range(max_core + 1)]
    if duration:
        core_list.append('IDLE')
    # make room for the legend by adding 3 empty columns
    core_list += ['', '', '']
    dfm['cpu'] = dfm['cpu'].astype(str)
    # replace ':' with '_' as it would cause bokeh to misplace the labels on the chart
    dfm['task_name'] = dfm['task_name'].str.replace(':', '_')

    normalize_df_task_name(dfm)

    # Add a column for the Y axis
    # each task name should be associated to a unique Y index
    # Get a unique list of task names sorted
    task_list = pandas.unique(dfm.task_name.ravel()).tolist()
    task_list.sort()

    # Add a color column
    dfm['color'] = dfm.apply(lambda row: get_color(row['percent'], palette),
                             axis=1)
    # switch to str type to prevent the tooltip to
    # display percent value with 3 digits
    dfm.percent = dfm.percent.astype(str)
    # make enough vertical space for the legend
    # the legend needs 1 row per palette color + 1 to fit the max value
    # so we need at least len(palette) + 1 rows
    if len(task_list) < len(palette) + 1:
        task_list += ['' for _ in range(len(palette) + 1 - len(task_list))]
    TOOLS = "resize,hover,save"
    p = figure(title=title,
               tools=TOOLS,
               x_range=core_list,
               y_range=task_list,
               **title_style)

    p.plot_width = 1000
    p.plot_height = 80 + len(task_list) * 16
    p.toolbar_location = "left"
    source = ColumnDataSource(dfm)
    # the name is to flag these rectangles to enable tooltip hover on them
    # (and not enable tooltips on the legend patches)
    p.rect("cpu",
           "task_name",
           width=1,
           height=0.9,
           source=source,
           fill_alpha=0.8,
           color="color",
           name='patches')
    p.grid.grid_line_color = None
    # trace separator lines to isolate blocks across core groups (numa sockets) and task-like names
    max_y = len(task_list)
    # trace a vertical line every 8 cores
    for seg_x in range(8, max_core + 7, 8):
        p.segment(x0=[seg_x + 0.5],
                  y0=[0],
                  x1=[seg_x + 0.5],
                  y1=[max_y + 0.5],
                  color=GRAY,
                  line_width=2)
    prev_task_name = None
    # trace a horizontal line around every group of tasks that have the same first 3 characters
    for y in range(max_y):
        cur_task_name = task_list[y]
        if prev_task_name:
            if prev_task_name[0:3] != cur_task_name[0:3]:
                p.segment(x0=[0],
                          y0=[y + 0.5],
                          x1=[max_core + 1.5],
                          y1=[y + 0.5],
                          color=GRAY,
                          line_width=0.5)
        prev_task_name = task_list[y]

    hover = p.select(dict(type=HoverTool))
    hover.tooltips = OrderedDict([("task", "@task_name"), ("core", "@cpu"),
                                  tooltip_count])
    # only enable tooltip on rectangles with name 'patches'
    hover.names = ['patches']

    # legend to the right
    # we try to center the legend vertically
    legend_base_y = 0
    palette_len = len(palette)
    if max_y > palette_len + 1:
        legend_base_y = (max_y - palette_len - 1) // 2

    if duration:
        # IDLE cpu inserted so shift the legend by 1 position to the right
        max_core += 1
    # pass 1 is to draw the color patches
    # prepare a data source with a x, y and a color column
    x_values = np.empty(palette_len)
    x_values.fill(max_core + 2.5)
    y_values = np.arange(legend_base_y + 1.5, legend_base_y + palette_len + 1,
                         1)
    dfl = DataFrame({'x': x_values, 'y': y_values, 'color': palette})
    source = ColumnDataSource(dfl)
    p.rect(x='x', y='y', color='color', width=1, height=1, source=source)

    # pass 2 is to draw the text describing the ranges for the color patches
    color_value_list = get_color_value_list(min_count, max_count, palette,
                                            range_unit)
    x_values = np.empty(len(color_value_list))
    x_values.fill(max_core + 3.1)
    y_values = np.arange(legend_base_y + 0.7,
                         legend_base_y + len(color_value_list), 1)

    dfl = DataFrame({
        'x': x_values,
        'y': y_values,
        'color_values': color_value_list
    })
    source = ColumnDataSource(dfl)
    p.text(x='x',
           y='y',
           text='color_values',
           source=source,
           text_font_size='8pt')

    output_html(p, html_prefix, task_re)
def show_kvm_exit_types(df, task_re, label):

    df = df[df['event'] == 'kvm_exit']
    df = df[df['task_name'].str.match(task_re)]
    # the next_comm column contains the exit code
    exit_codes = Series(KVM_EXIT_REASONS)
    # add  new column congaining the exit reason in clear text
    df['exit_reason'] = df['next_comm'].map(exit_codes)
    time_span_msec = get_time_span_msec(df)
    df.drop(
        ['cpu', 'duration', 'event', 'next_pid', 'pid', 'next_comm', 'usecs'],
        inplace=True,
        axis=1)

    # Get the list of exit reasons, sorted alphabetically
    reasons = pandas.unique(df.exit_reason.ravel()).tolist()
    reasons.sort()

    # group by task name then exit reasons
    gb = df.groupby(['task_name', 'exit_reason'])
    # number of exit types
    size_series = gb.size()
    df = size_series.to_frame('count')
    df.reset_index(inplace=True)

    p = Bar(df,
            label='task_name',
            values='count',
            stack='exit_reason',
            title="KVM Exit types per task (%s, %d msec window)" %
            (label, time_span_msec),
            legend='top_right',
            tools="resize,hover,save",
            width=1000,
            height=800)
    p._xaxis.axis_label = "Task Name"
    p._xaxis.axis_label_text_font_size = "12pt"
    p._yaxis.axis_label = "Exit Count (sum)"
    p._yaxis.axis_label_text_font_size = "12pt"

    # Cannot find a way to display the exit reason in the tooltip
    # from bokeh.models.renderers import GlyphRenderer
    # glr = p.select(dict(type=GlyphRenderer))
    # bar_source = glr[0].data_source
    # print bar_source.data
    # bar_source = glr[1].data_source
    # bar_source.data['exit_reason'] = ['HOHO']
    hover = p.select(dict(type=HoverTool))
    hover.tooltips = OrderedDict([
        ("task", "$x"),
        # {"reason", "@exit_reason"},
        ("count", "@height")
    ])
    # specify how to output the plot(s)

    # table with counts
    gb = df.groupby(['exit_reason'])
    keys = gb.groups.keys()
    dfr_list = []
    for reason in keys:
        dfr = gb.get_group(reason)
        # drop the exit reason column
        dfr = dfr.drop(['exit_reason'], axis=1)
        # rename the count column with the reason name
        dfr.rename(columns={'count': reason}, inplace=True)
        # set the task name as the index
        dfr.set_index('task_name', inplace=True)
        dfr_list.append(dfr)
    # concatenate all task columns into 1 dataframe that has the exit reason as the index
    # counts for missing exit reasons will be set to NaN
    dft = pandas.concat(dfr_list, axis=1)
    dft.fillna(0, inplace=True)
    # Add a total column
    dft['TOTAL'] = dft.sum(axis=1)
    sfmt = StringFormatter(text_align='center', font_style='bold')
    nfmt = NumberFormatter(format='0,0')

    col_names = list(dft.columns.values)
    col_names.sort()
    # move 'TOTAL' at end of list
    col_names.remove('TOTAL')
    col_names.append('TOTAL')
    # convert index to column name
    dft.reset_index(level=0, inplace=True)
    dft.rename(columns={'index': 'Task'}, inplace=True)
    columns = [
        TableColumn(field=name, title=name, formatter=nfmt)
        for name in col_names
    ]
    columns.insert(0, TableColumn(field='Task', title='Task', formatter=sfmt))
    table = DataTable(source=ColumnDataSource(dft),
                      columns=columns,
                      width=1000,
                      row_headers=False,
                      height='auto')
    output_html(vplot(p, table), 'kvm-types', task_re)
    '''