コード例 #1
0
def _parse_entity(cmd_opts, csv_fields, post_process_entity=None, debug=False):
    format_opts = [
        'csv',
        'noheader',
        'nounits',
    ]

    cmd = [
        NVIDIA_SMI_EXEC,
        '--format={opts}'.format(opts=','.join(format_opts)),
    ] + cmd_opts

    if debug:
        print_cmd(cmd)
    # output = subprocess.check_output(cmd, stderr=subprocess.STDOUT).decode('utf-8')
    # lines = output.split(os.linesep)
    lines = run_cmd(cmd)
    entities = []
    for line in lines:
        if re.search(r'^\s*$', line):
            continue
        fields = line.split(', ')
        if len(fields) != len(csv_fields):
            if debug:
                logger.info(
                    pprint_msg({
                        'fields': fields,
                        'csv_fields': csv_fields,
                    }))
            assert len(fields) == len(csv_fields)
        dic = dict()
        for k, v in zip(csv_fields, fields):
            value = parse_value(k, v)
            dic[k] = value
        entities.append(dic)
    if post_process_entity is not None:
        for entity in entities:
            post_process_entity(entity)
    return entities
コード例 #2
0
        def add_percent_bar_labels(df, ax):
            xticklabels = ax.get_xticklabels()
            xticks = ax.get_xticks()
            ins_df = df[df['config'].apply(
                lambda config: bool(config_is_instrumented(config)))]
            bar_width = ax.patches[0].get_width()
            xticklabel_to_xtick = dict()

            num_bars = len(set(df['config']))
            bar_order = dict()
            i = 0
            for config in df['config']:
                if config not in bar_order:
                    bar_order[config] = i
                    i += 1

            logger.info(
                pprint_msg({
                    'len(patches)': len(ax.patches),
                    'len(df)': len(df),
                    'bar_width': bar_width,
                    'bar_order': bar_order,
                }))

            for xtick, xticklabel in zip(xticks, xticklabels):
                xticklabel_to_xtick[xticklabel.get_text()] = xtick

            for i in range(len(ins_df)):
                row = ins_df.iloc[i]

                x_field = row['x_field']
                config = row['config']

                # Keep single decimal place.
                # bar_label = "{perc:.1f}%".format(
                #     perc=df.loc[i]['profiling_overhead_percent'])

                # Round to nearest percent.
                # bar_label = "{perc:.0f}%".format(
                #     perc=df.loc[i]['profiling_overhead_percent'])

                profiling_overhead_percent = row['profiling_overhead_percent']
                bar_label = "{perc:.0f}%".format(
                    perc=profiling_overhead_percent)

                total_trace_time_sec = row['total_trace_time_sec']
                #  _   _
                # | |_| |_
                # | | | | |
                # |_|_|_|_|
                #     |
                # ---------
                # bar_width
                # bar_order = 0, 1, 2, 3
                #
                # Middle tick "|" is at xtick.
                # Bars are located at:
                # 1) xtick - 2*bar_width
                # 2) xtick - 1*bar_width
                # 3) xtick
                # 4) xtick + 1*bar_width
                #
                # num_bars = 4
                #
                # In general, bars are located at:
                #   xtick + (bar_order - num_bars/2)*bar_width

                xtick = xticklabel_to_xtick[x_field]
                # pos = (xtick - bar_width / 2, total_trace_time_sec)
                pos = (xtick + (bar_order[config] - num_bars / 2) * bar_width +
                       bar_width / 2, total_trace_time_sec)

                logger.info(
                    pprint_msg({
                        'bar_label': bar_label,
                        'x_field': x_field,
                        'pos': pos,
                        'total_trace_time_sec': total_trace_time_sec,
                    }))

                ax.annotate(bar_label,
                            pos,
                            ha='center',
                            va='center',
                            xytext=(0, 10),
                            textcoords='offset points')
コード例 #3
0
    def plot(self):

        # figlegend.tight_layout()
        # figlegend.savefig(self.legend_path, bbox_inches='tight', pad_inches=0)
        # plt.close(figlegend)

        if self.width is not None and self.height is not None:
            figsize = (self.width, self.height)
            logger.info("Setting figsize = {fig}".format(fig=figsize))
            # sns.set_context({"figure.figsize": figsize})
        else:
            figsize = None
        # This is causing XIO error....
        fig = plt.figure(figsize=figsize)
        ax = fig.add_subplot(111)
        figlegend = plt.figure()
        ax_leg = figlegend.add_subplot(111)

        # ax = fig.add_subplot(111)
        # ax2 = None
        # if self.y2_field is not None:
        #     ax2 = ax.twinx()
        #     # Need to do this, otherwise, training time bar is ABOVE gridlines from ax.
        #     ax.set_zorder(ax2.get_zorder()+1)
        #     # Need to do this, otherwise training time bar is invisible.
        #     ax.patch.set_visible(False)

        # def is_cpu(device_name):
        #     if re.search(r'Intel|Xeon|CPU', device_name):
        #         return True
        #     return False
        #
        # def is_gpu(device_name):
        #     return not is_cpu(device_name)
        #
        # def should_keep(row):
        #     if row['machine_name'] == 'reddirtx-ubuntu':
        #         # Ignore 'Tesla K40c' (unused, 0 util)
        #         return row['device_name'] == 'GeForce RTX 2080 Ti'
        #     return True
        #
        # self.df_gpu = self.df
        #
        # self.df_gpu = self.df_gpu[self.df_gpu['device_name'].apply(is_gpu)]
        #
        # self.df_gpu = self.df_gpu[self.df_gpu.apply(should_keep, axis=1)]

        logger.info(pprint_msg(self.df))

        # ax = sns.violinplot(x=self.df_gpu['x_field'], y=100*self.df_gpu['util'],
        #                     inner="box",
        #                     # cut=0.,
        #                     )

        # ax = sns.boxplot(x=self.df['x_field'], y=100*self.df['util'],
        #                  showfliers=False,
        #                  )

        logger.info(pprint_msg(self.df[self.plot_data_fields]))
        ax = sns.barplot(x='x_field',
                         y='total_trace_time_sec',
                         hue='config_pretty',
                         data=self.df,
                         ax=ax)
        ax.get_legend().remove()

        # leg = ax.legend()
        # leg.set_title(None)

        # PROBLEM: (a2c, half-cheetah) profile percent is shown as 188%, but it's actually 222...
        # 188 is the (ppo, half-cheetah) result...
        # TODO: index by x_field, retrieve x_field from plot/patches.

        def add_percent_bar_labels(df, ax):
            xticklabels = ax.get_xticklabels()
            xticks = ax.get_xticks()
            ins_df = df[df['config'].apply(
                lambda config: bool(config_is_instrumented(config)))]
            bar_width = ax.patches[0].get_width()
            xticklabel_to_xtick = dict()

            num_bars = len(set(df['config']))
            bar_order = dict()
            i = 0
            for config in df['config']:
                if config not in bar_order:
                    bar_order[config] = i
                    i += 1

            logger.info(
                pprint_msg({
                    'len(patches)': len(ax.patches),
                    'len(df)': len(df),
                    'bar_width': bar_width,
                    'bar_order': bar_order,
                }))

            for xtick, xticklabel in zip(xticks, xticklabels):
                xticklabel_to_xtick[xticklabel.get_text()] = xtick

            for i in range(len(ins_df)):
                row = ins_df.iloc[i]

                x_field = row['x_field']
                config = row['config']

                # Keep single decimal place.
                # bar_label = "{perc:.1f}%".format(
                #     perc=df.loc[i]['profiling_overhead_percent'])

                # Round to nearest percent.
                # bar_label = "{perc:.0f}%".format(
                #     perc=df.loc[i]['profiling_overhead_percent'])

                profiling_overhead_percent = row['profiling_overhead_percent']
                bar_label = "{perc:.0f}%".format(
                    perc=profiling_overhead_percent)

                total_trace_time_sec = row['total_trace_time_sec']
                #  _   _
                # | |_| |_
                # | | | | |
                # |_|_|_|_|
                #     |
                # ---------
                # bar_width
                # bar_order = 0, 1, 2, 3
                #
                # Middle tick "|" is at xtick.
                # Bars are located at:
                # 1) xtick - 2*bar_width
                # 2) xtick - 1*bar_width
                # 3) xtick
                # 4) xtick + 1*bar_width
                #
                # num_bars = 4
                #
                # In general, bars are located at:
                #   xtick + (bar_order - num_bars/2)*bar_width

                xtick = xticklabel_to_xtick[x_field]
                # pos = (xtick - bar_width / 2, total_trace_time_sec)
                pos = (xtick + (bar_order[config] - num_bars / 2) * bar_width +
                       bar_width / 2, total_trace_time_sec)

                logger.info(
                    pprint_msg({
                        'bar_label': bar_label,
                        'x_field': x_field,
                        'pos': pos,
                        'total_trace_time_sec': total_trace_time_sec,
                    }))

                ax.annotate(bar_label,
                            pos,
                            ha='center',
                            va='center',
                            xytext=(0, 10),
                            textcoords='offset points')

        add_percent_bar_labels(self.df, ax)

        # groupby_cols = ['algo', 'env_id']
        # # label_df = self.df_gpu[list(set(groupby_cols + ['x_field', 'util']))]
        # label_df = self.df_gpu.groupby(groupby_cols).mean()
        # add_hierarchical_labels(fig, ax, self.df_gpu, label_df, groupby_cols)

        # df = self.df
        # ax = sns.violinplot(x=df['x_field'], y=100*df['util'],
        #                     # hue=df['algo'],
        #                     # hue=df['env_id'],
        #                     inner="box", cut=0.)

        if self.rotation is not None:
            # ax = bottom_plot.axes
            ax.set_xticklabels(ax.get_xticklabels(), rotation=self.rotation)

        # Remove legend-title that seaborn adds:
        # https://stackoverflow.com/questions/51579215/remove-seaborn-lineplot-legend-title?rq=1
        # handles, labels = ax.get_legend_handles_labels()
        # ax.legend(handles=handles[1:], labels=labels[1:])

        # Default ylim for violinplot is slightly passed bottom/top of data:
        #   ipdb> ax.get_ylim()
        #   (-2.3149999976158147, 48.614999949932105)
        #   ipdb> np.min(100*self.df['util'])
        #   0.0
        #   ipdb> np.max(100*self.df['util'])
        #   46.29999995231629
        ymin, ymax = ax.get_ylim()
        ax.set_ylim(0., ymax)

        ax.set_xlabel(self.x_axis_label)
        if self.y_title is not None:
            ax.set_ylabel(self.y_title)

        png_path = self._get_plot_path('png')
        logger.info('Save figure to {path}'.format(path=png_path))
        fig.tight_layout()
        fig.savefig(png_path)
        plt.close(fig)

        leg = ax_leg.legend(*ax.get_legend_handles_labels(), loc='center')
        ax_leg.axis('off')
        leg.set_title(None)
        figlegend.tight_layout()
        figlegend.savefig(self.legend_path('png'),
                          bbox_inches='tight',
                          pad_inches=0)
        plt.close(figlegend)
        trim_border(self.legend_path('png'))

        return

        #Set general plot properties

        sns.set_style("white")

        # ax = plt.subplot()
        # ax_list = fig.axes
        # plt.subplot()
        # fig, ax = plt.subplots()
        # ax.set_xs
        # sns.set_context({"figure.figsize": (24, 10)})

        if self.fontsize is not None:
            sns.set_style('font', {
                'size': self.fontsize,
            })

        # plt.rc('xtick', rotation=40)
        # sns.set_style('xtick', {
        #     'rotation': 40,
        # })

        # TODO:
        # - Make it so plot legends appear to right of the plot
        # - Make it so we can choose NOT to show plot legend (ideally just make it invisible...)
        # - All fonts should be same size

        if self.y2_field is not None:
            # Total training time bar gets its own color.
            num_colors = len(self.groups) + 1
        else:
            num_colors = len(self.groups)
        self.colors = sns.color_palette("hls", num_colors)

        if self.y2_field is not None:
            bar_width = 0.25
        else:
            bar_width = 0.5

        ind = np.arange(len(self.data[self.x_field]))
        ax.set_xticks(ind + bar_width / 2)
        ax.set_xticklabels(self.data[self.x_field])

        n_bars = len(self.data[self.groups[0]])
        accum_ys = np.zeros(n_bars)
        barplot_kwargs = []
        bar_zorder = 0
        # bar_zorder = -1
        grid_zorder = 1
        for i, group in enumerate(self.groups):
            accum_ys += self.data[group]
            ys = copy.copy(accum_ys)
            if self.y2_field is not None:
                xs = ind
            else:
                xs = ind + bar_width / 2
            bar_kwargs = {
                'x': xs,
                # 'y': ys,
                'height': ys,
                'color': self.colors[i],
                # 'ax': ax,
                # 'position': 0,
                'zorder': bar_zorder,
            }
            if bar_width is not None:
                bar_kwargs['width'] = bar_width
            barplot_kwargs.append(bar_kwargs)

        if self.y2_field is not None:
            # TODO: we need to group rows and sum them based on matching df[group]...?
            # for i, group in enumerate(self.groups):
            y_color = self.colors[-1]
            bar_kwargs = {
                # 'x': self.data[self.x_field],
                'x': ind + bar_width,
                'height': self.data[self.y2_field],
                # 'y': self.data[self.y2_field],
                'color': y_color,
                # 'ax': ax2,
                # 'position': 1,
                'zorder': bar_zorder,
            }
            if bar_width is not None:
                bar_kwargs['width'] = bar_width
            # sns.barplot(**bar_kwargs)
            # plt.bar(**bar_kwargs)
            ax2.bar(**bar_kwargs)

        barplots = []
        for kwargs in reversed(barplot_kwargs):
            # TODO: color?
            # barplot = sns.barplot(**kwargs)
            # barplot = plt.bar(**kwargs)
            barplot = ax.bar(**kwargs)
            barplots.append(barplot)
        barplots.reverse()

        if self.y2_field is not None and self.y2_logscale:
            # ax2.set_yscale('log')
            ax2.set_yscale('log', basey=2)

            # ax2.set_yscale('log')
            # ax2.set_yticks([1,10,100] + [max(y)])
            # from matplotlib.ticker import FormatStrFormatter

            # ax2.yaxis.set_major_formatter(mpl_ticker.FormatStrFormatter('%.d'))
            ax2.yaxis.set_major_formatter(DaysHoursMinutesSecondsFormatter())

        # #Plot 1 - background - "total" (top) series
        # sns.barplot(x = self.data.Group, y = self.data.total, color = "red")
        #
        # #Plot 2 - overlay - "bottom" series
        # bottom_plot = sns.barplot(x = self.data.Group, y = self.data.Series1, color = "#0000A3")

        bottom_plot = barplots[-1]

        figlegend = plt.figure()
        self._add_legend(
            figlegend,
            loc='center',
            bbox_to_anchor=None,
        )

        if self.show_legend:
            self._add_legend(
                fig,
                loc='upper left',
                bbox_to_anchor=(1.05, 1),
            )

        # , prop={'size': self.fontsize}

        # topbar = plt.Rectangle((0,0),1,1,fc="red", edgecolor = 'none')
        # bottombar = plt.Rectangle((0,0),1,1,fc='#0000A3',  edgecolor = 'none')
        # l = plt.legend([bottombar, topbar], ['Bottom Bar', 'Top Bar'], loc=1, ncol = 2, prop={'size':16})

        #Optional code - Make plot look nicer
        sns.despine(fig=fig, left=True)
        # bottom_plot.set_ylabel(self.y_axis_label)
        # bottom_plot.set_xlabel(self.x_axis_label)
        ax.set_ylabel(self.y_axis_label)
        if self.title is not None:
            # bottom_plot.set_title(self.title)
            ax.set_title(self.title)

        # Suggestions about how to prevent x-label overlap with matplotlib:
        #
        # https://stackoverflow.com/questions/42528921/how-to-prevent-overlapping-x-axis-labels-in-sns-countplot

        # #Set fonts to consistent 16pt size
        # for item in ([bottom_plot.xaxis.label, bottom_plot.yaxis.label] +
        #              bottom_plot.get_xticklabels() + bottom_plot.get_yticklabels()):
        #     if self.fontsize is not None:
        #         item.set_fontsize(self.fontsize)

        ax.grid(zorder=grid_zorder)
        if self.y2_field is not None:
            # ax2.grid(True)

            if self.y2_axis_label is not None:
                ax2.set_ylabel(self.y2_axis_label)

            # Align training time against percent.
            # (weird training time labels).
            #
            # l = ax.get_ylim()
            # l2 = ax2.get_ylim()
            # f = lambda x : l2[0]+(x-l[0])/(l[1]-l[0])*(l2[1]-l2[0])
            # ticks = f(ax.get_yticks())
            # ax2.yaxis.set_major_locator(matplotlib.ticker.FixedLocator(ticks))

            # Align percent against training time.
            # (weird percent labels).
            #
            # l = ax2.get_ylim()
            # l2 = ax.get_ylim()
            # f = lambda x : l2[0]+(x-l[0])/(l[1]-l[0])*(l2[1]-l2[0])
            # ticks = f(ax2.get_yticks())
            # ax.yaxis.set_major_locator(matplotlib.ticker.FixedLocator(ticks))

        logger.info('Save figure to {path}'.format(path=self.path))
        fig.tight_layout()
        fig.savefig(self.path)
        plt.close(fig)
コード例 #4
0
def my_label_len(label_df, col):
    # labels = my_index.get_level_values(level)
    labels = label_df[col]
    ret = [(k, sum(1 for i in g)) for k, g in itertools.groupby(labels)]
    logger.info(pprint_msg({'label_len': ret}))
    return ret
コード例 #5
0
    def run(self):
        """
        # PSEUDOCODE for computing total training time without profiling overhead

        End_time = latest event.end_time_us recorded in Event's

        Tps = Select all TrainingProgress up to end_training_time_us = end_time
        Events = Select all Event's from start of trace up to end_time_us = end_time

        Def Compute_training_time(events):
          Use split_op_stacks on events
          Instead of computing overlap, just compute total event time that doesn't include "profiling overhead"
          op_stack = []
          # NOTE: CPU events may overlap with GPU events...
          # We just want time covered by all CPU/GPU events, MINUS time covered by "profiling overhead" events.
          # for event in sorted(events, by=event.start_time_usec):
          total_time_us = 0
          while len(events) > 0:
              go to next next start or end of an event
              if start of event:
                  if is_op_event(event):
                      op_event = event
                  elif is_cpu_event(event):
                      start_t = event.start_time_usec
                  elif is_gpu_event():
              elif end of event:
                  if is_op_event(event):
                      total_time_us += event.end_time_us - start_t
                      start_t = None

        # total training time
        trace_time_sec = compute_training_time(events)
        Last_tps = tps[-1]
        timesteps_per_sec = ( last_tps['end_num_timesteps'] - last_tps['start_num_timesteps'] ) / trace_time_sec
        Total_trace_time_sec = ( 1/timesteps_per_sec ) * tps['total_timesteps']

        - Q: How do we compute total training time; particularly, how do we do it for minigo?
            - We need to be able to compute the "critical path", then take the "total training time" summed across that path
            - How do we get the critical path?
                - Follow all paths from leaf-node to root node, and determine which among those paths was the longest.
                - If there are multiple leaf-nodes, collect paths from all starting leaf-nodes.
                - NOTE: the length of a node should be the extrapolated total training time of that node.
            - For visualizing minigo figure, to find start time of a particular (process, phase), we must increment its start time by the increased extrapolation of its parent nodes.

        PSEUDOCODE:
        Type: Path = ListOf[Phase]

        def path_length(path):
            return sum(phase.extrapolated_total_training_time for phase in path)

        # PROBLEM: the topology of phases gathered from the minigo script isn't
        # reflective of the fork-join pattern of the scripts...
        # In particular, sgd_updates is NOT the child of selfplay_worker_1 and selfplay_worker_2.
        # In reality, there's a shell-script that coordinates launching these phases in a serialized order:
        # - loop_main.sh
        #   - loop_selfplay.py
        #     - loop_selfplay_worker.py [1]
        #     - loop_selfplay_worker.py [2]
        # - loop_train_eval.py
        #   - sgd_updates
        #   - evaluate
        #
        # However, conceptually it makes more sense to think of as a dependency graph.
        # Currently, the dependency graph structure cannot be recovered from this fork-join pattern,
        # and it must be hard-coded.
        # So, we cannot use to determine "paths" needed for computing total training time.
        # HACK: If there is more than one phase, require the user to specify the dependencies
        # in a dependency.json file:
        dependency.json
        {
        'directed_edges': [
            [
              # A -> B
              [A.machine_name, A.process_name, A.phase_name],
              [B.machine_name, B.process_name, B.phase_name],
            ]
        ]
        }

        def find_all_paths(leaf):
            def _find_all_paths(phase):
                if phase.has_parents:
                    for parent in phase.parents:
                        for path in _find_all_paths(parent)
                            path = list(path)
                            path.append(parent)
                            yield path
                else:
                    # Base case:
                    yield [phase]

        # Return all the Phase's that have NO children.
        leaves = sql_reader.leaf_nodes()
        paths = []
        for leaf in leaves:
            leaf_paths = find_all_paths(leaf)
            paths.extend(leaf_paths)
        critical_path = min(paths, key=path_length)
        total_training_time = path_length(critical_path)
        """
        self.sql_reader = SQLCategoryTimesReader(self.db_path,
                                                 host=self.host,
                                                 user=self.user,
                                                 password=self.password)

        machines = self.sql_reader.machines()
        for machine in machines:
            processes = self.sql_reader.processes(
                machine_name=machine.machine_name)
            for process in processes:
                phases = self.sql_reader.phases(
                    machine_name=machine.machine_name,
                    process_name=process.process_name)
                logger.info(
                    pprint_msg({
                        'machines': machines,
                        'processes': processes,
                        'phases': phases
                    }))
                if len(phases) == 1:
                    self.csv_single_phase(machine, process, phases[0])
                else:
                    raise NotImplementedError(
                        "Haven't implemented total training time extrapolation for multi-phase apps."
                    )

        self.sql_reader.close()