def histogram_plot(experiment_log_dir, plot_spec_string, output_filename,
                   has_legend, x_limit, verbose):
    queries = [
        plot_utils.plot_spec_string_to_query(plot_spec_string, 0, "HIST")]

    plot_data = metaprogram_utils.process_queries(
        queries, experiment_log_dir, verbose)

    if "plot_points" not in plot_data:
        warnings.warn("No data to plot!")
        return

    histogram_data = plot_data["plot_points"][0]

    cumulative_histogram = {}

    layout = PlotLayout()
    layout.dpi = 250

    for stat_name in histogram_data:
        plot = Plot()
        plot.setTitle(stat_name)
        if has_legend:
            plot.hasLegend(labelSize=8)

        if x_limit is not None:
            plot.setXLimits(0, x_limit)

        style_plot(plot, stat_name)

        for key, points in sorted(histogram_data[stat_name].items()):
            for size, count in itertools.izip(points["bin"], points["count"]):
                if size not in cumulative_histogram:
                    cumulative_histogram[size] = 0
                cumulative_histogram[size] += count

            line = Line()
            line.stepFunction("pre")
            line.label = str(key)
            line.xValues = points["bin"]
            line.yValues = points["count"]
            plot.add(line)

        layout.addPlot(plot)

        cumulative_plot = Plot()

        if x_limit is not None:
            cumulative_plot.setXLimits(0, x_limit)

        cumulative_plot.setTitle("Cumulative Histogram for " + stat_name)
        style_plot(cumulative_plot, stat_name)
        line = Line()
        line.stepFunction("pre")
        line.xValues = sorted(cumulative_histogram.keys())
        line.yValues = [cumulative_histogram[key] for key in line.xValues]

        cumulative_plot.add(line)
        layout.addPlot(cumulative_plot)
    layout.save(output_filename)
def calculate_rate(input_directory, skip_phase_zero, skip_phase_one,
                   skip_phase_two, verbose):

    phaseTimesQuery = StatQuery("DATM", ("phase_name", None), ("epoch", None),
                                ("logger_name", None),
                                ("stat_name", "phase_runtime"),
                                ("start_time", None))
    phaseTimesQuery.match_processor_function = handleTimestampQueryMatch

    diskCountQuery = StatQuery(
        "DATM", ("phase_name", None), ("epoch", None),
        ("logger_name", "mapreduce"),
        ("stat_name", ["num_input_disks", "num_intermediate_disks"]),
        ("uint_value", None))
    diskCountQuery.match_processor_function = handleDiskCountMatch

    inputSizeQuery = StatQuery("DATM", ("phase_name", None), ("epoch", None),
                               ("stage_name", "reader"), ("id", None),
                               ("stat_name", "bytes_produced"),
                               ("uint_value", None))
    inputSizeQuery.match_processor_function = handleReaderInputMatch

    writerOutputQuery = StatQuery("DATM", ("phase_name", None),
                                  ("epoch", None), ("stage_name", "writer"),
                                  ("id", None),
                                  ("stat_name", "bytes_consumed"),
                                  ("uint_value", None))
    writerOutputQuery.match_processor_function = handleWriterOutputMatch

    queries = [
        phaseTimesQuery, diskCountQuery, inputSizeQuery, writerOutputQuery
    ]

    skipped_phases = []
    if skip_phase_zero:
        skipped_phases.append("phase_zero")
    if skip_phase_one:
        skipped_phases.append("phase_one")
    if skip_phase_two:
        skipped_phases.append("phase_two")
    output_data = utils.process_queries(queries, input_directory, verbose,
                                        skipped_phases)

    data_for_display = postprocess_rate_data(output_data)

    for key in sorted(data_for_display.keys()):
        env = jinja2.Environment(loader=jinja2.FileSystemLoader(
            os.path.dirname(__file__)),
                                 trim_blocks=True)

        template = env.get_template('rate_summary_template.jinja')

        rendered_template = template.render(**data_for_display[key])

        print rendered_template.strip() + "\n"
def list_time_series(
    experiment_log_dir, plot_spec_strings, output_filename, verbose):
    queries = []

    queries.extend(get_list_time_series_queries())
    time_series_data = utils.process_queries(
        queries, experiment_log_dir, verbose)
    time_series_keys = time_series_data["time_series_keys"]

    output_fp = open(output_filename, 'w')
    for time_series_key in sorted(time_series_keys, key=itemgetter(0,1,3,2)):
        print >> output_fp, time_series_tuple_to_str(time_series_key)

    output_fp.close()
Beispiel #4
0
def list_time_series(experiment_log_dir, plot_spec_strings, output_filename,
                     verbose):
    queries = []

    queries.extend(get_list_time_series_queries())
    time_series_data = utils.process_queries(queries, experiment_log_dir,
                                             verbose)
    time_series_keys = time_series_data["time_series_keys"]

    output_fp = open(output_filename, 'w')
    for time_series_key in sorted(time_series_keys, key=itemgetter(0, 1, 3,
                                                                   2)):
        print >> output_fp, time_series_tuple_to_str(time_series_key)

    output_fp.close()
Beispiel #5
0
def time_series_plot(experiment_log_dir, plot_spec_strings, make_legend,
                     split_by_host, group_by_query, verbose):
    queries = []

    for i, plot_spec_string in enumerate(plot_spec_strings):
        queries.append(
            plot_utils.plot_spec_string_to_query(plot_spec_string, i, "COLL"))

    plot_data = metaprogram_utils.process_queries(queries, experiment_log_dir,
                                                  verbose)
    plots = make_plots(plot_data, make_legend, split_by_host, group_by_query)

    if len(plots) == 0:
        warnings.warn("No data to plot!")
        return

    return plots
def time_series_plot(experiment_log_dir, plot_spec_strings,
                     make_legend, split_by_host, group_by_query, verbose):
    queries = []

    for i, plot_spec_string in enumerate(plot_spec_strings):
        queries.append(
            plot_utils.plot_spec_string_to_query(
                plot_spec_string, i, "COLL"))

    plot_data = metaprogram_utils.process_queries(
        queries, experiment_log_dir, verbose)
    plots = make_plots(plot_data, make_legend, split_by_host, group_by_query)

    if len(plots) == 0:
        warnings.warn("No data to plot!")
        return

    return plots
Beispiel #7
0
def histogram_plot(experiment_log_dir, plot_spec_string, output_filename,
                   has_legend, x_limit, verbose):
    queries = [
        plot_utils.plot_spec_string_to_query(plot_spec_string, 0, "HIST")
    ]

    plot_data = metaprogram_utils.process_queries(queries, experiment_log_dir,
                                                  verbose)

    if "plot_points" not in plot_data:
        warnings.warn("No data to plot!")
        return

    histogram_data = plot_data["plot_points"][0]

    cumulative_histogram = {}

    layout = PlotLayout()
    layout.dpi = 250

    for stat_name in histogram_data:
        plot = Plot()
        plot.setTitle(stat_name)
        if has_legend:
            plot.hasLegend(labelSize=8)

        if x_limit is not None:
            plot.setXLimits(0, x_limit)

        style_plot(plot, stat_name)

        for key, points in sorted(histogram_data[stat_name].items()):
            for size, count in itertools.izip(points["bin"], points["count"]):
                if size not in cumulative_histogram:
                    cumulative_histogram[size] = 0
                cumulative_histogram[size] += count

            line = Line()
            line.stepFunction("pre")
            line.label = str(key)
            line.xValues = points["bin"]
            line.yValues = points["count"]
            plot.add(line)

        layout.addPlot(plot)

        cumulative_plot = Plot()

        if x_limit is not None:
            cumulative_plot.setXLimits(0, x_limit)

        cumulative_plot.setTitle("Cumulative Histogram for " + stat_name)
        style_plot(cumulative_plot, stat_name)
        line = Line()
        line.stepFunction("pre")
        line.xValues = sorted(cumulative_histogram.keys())
        line.yValues = [cumulative_histogram[key] for key in line.xValues]

        cumulative_plot.add(line)
        layout.addPlot(cumulative_plot)
    layout.save(output_filename)
def gather_runtime_info(experiment_directory, verbose, skipped_phases=[]):
    total_runtime_query = StatQuery("SUMM", ("phase_name", None),
                                    ("epoch", None), ("stage_name", None),
                                    ("id", None), ("stat_name", "runtime"),
                                    ("summary_stat_name", "sum"))
    total_runtime_query.match_processor_function = \
        stat_container_append_matcher("total_runtime", "value")

    total_idle_time_query = StatQuery("SUMM", ("phase_name", None),
                                      ("epoch", None), ("stage_name", None),
                                      ("id", None), ("stat_name", "wait"),
                                      ("summary_stat_name", "sum"))
    total_idle_time_query.match_processor_function = \
        stat_container_append_matcher("total_idle_time", "value")

    pipeline_saturation_time_query = StatQuery(
        "SUMM", ("phase_name", None), ("epoch", None), ("stage_name", None),
        ("id", None), ("stat_name", "pipeline_saturation_wait"),
        ("summary_stat_name", "sum"))
    pipeline_saturation_time_query.match_processor_function = (
        stat_container_append_matcher("pipeline_saturation_time", "value"))

    num_workers_query = StatQuery("DATM", ("phase_name", None),
                                  ("epoch", None), ("logger_name", None),
                                  ("stat_name", "num_workers"),
                                  ("uint_value", None))
    num_workers_query.match_processor_function = stat_container_append_matcher(
        "num_workers", "uint_value")

    teardown_time_query = StatQuery("DATM", ("phase_name", None),
                                    ("epoch", None), ("stage_name", None),
                                    ("id", None), ("stat_name", "teardown"),
                                    ("start_time", None))
    teardown_time_query.match_processor_function = \
        stat_container_append_matcher("total_teardown_time", "elapsed_time")

    stage_runtime_query = StatQuery("DATM", ("phase_name", None),
                                    ("epoch", None), ("logger_name", None),
                                    ("stat_name", "stage_runtime"),
                                    ("start_time", None))
    stage_runtime_query.match_processor_function = \
        stat_container_append_matcher("stage_runtime", "elapsed_time")

    input_size_query = StatQuery("DATM", ("phase_name", None), ("epoch", None),
                                 ("stage_name", None), ("id", None),
                                 ("stat_name", "bytes_consumed"),
                                 ("uint_value", None))
    input_size_query.match_processor_function = stat_container_append_matcher(
        "total_bytes_in", "uint_value")

    output_size_query = StatQuery("DATM", ("phase_name", None),
                                  ("epoch", None), ("stage_name", None),
                                  ("id", None),
                                  ("stat_name", "bytes_produced"),
                                  ("uint_value", None))
    output_size_query.match_processor_function = stat_container_append_matcher(
        "total_bytes_out", "uint_value")

    allocation_time_query = StatQuery("SUMM", ("phase_name", None),
                                      ("epoch", None), ("stage_name", None),
                                      ("id", None),
                                      ("stat_name", "allocation_wait_time"),
                                      ("summary_stat_name", "sum"))
    allocation_time_query.match_processor_function = \
        stat_container_append_matcher("total_mem_wait_time", "value")

    enqueue_block_time_query = StatQuery(
        "SUMM", ("phase_name", None), ("epoch", None), ("stage_name", None),
        ("id", None), ("stat_name", "queue_saturation_block_time"),
        ("summary_stat_name", "sum"))
    enqueue_block_time_query.match_processor_function = \
        stat_container_append_matcher(
        "total_enqueue_block_time", "value")

    worker_type_query = StatQuery("DATM", ("phase_name", None),
                                  ("epoch", None), ("stage_name", None),
                                  ("id", None), ("stat_name", "worker_type"),
                                  ("str_value", None))
    worker_type_query.match_processor_function = set_stage_value_matcher

    would_have_blocked_query = StatQuery("DATM", ("phase_name", None),
                                         ("epoch", None), ("stage_name", None),
                                         ("id", None),
                                         ("stat_name", "would_have_blocked"),
                                         ("uint_value", None))
    would_have_blocked_query.match_processor_function = \
        stat_container_append_matcher("would_have_blocked", "uint_value")

    total_ios_query = StatQuery("DATM", ("phase_name", None), ("epoch", None),
                                ("stage_name", None), ("id", None),
                                ("stat_name", "total_ios"),
                                ("uint_value", None))
    total_ios_query.match_processor_function = stat_container_append_matcher(
        "total_ios", "uint_value")

    worker_start_time_query = StatQuery("DATM", ("phase_name", None),
                                        ("epoch", None), ("stage_name", None),
                                        ("id", None),
                                        ("stat_name", "worker_start_time"),
                                        ("uint_value", None))
    worker_start_time_query.match_processor_function = \
        stat_container_append_matcher("worker_start_time", "uint_value")

    worker_stop_time_query = StatQuery("DATM", ("phase_name", None),
                                       ("epoch", None), ("stage_name", None),
                                       ("id", None),
                                       ("stat_name", "worker_stop_time"),
                                       ("uint_value", None))
    worker_stop_time_query.match_processor_function = \
        stat_container_append_matcher("worker_stop_time", "uint_value")

    queries = [
        total_runtime_query, total_idle_time_query,
        pipeline_saturation_time_query, num_workers_query, teardown_time_query,
        stage_runtime_query, input_size_query, output_size_query,
        allocation_time_query, enqueue_block_time_query, worker_type_query,
        would_have_blocked_query, total_ios_query, worker_start_time_query,
        worker_stop_time_query
    ]

    runtime_info = utils.process_queries(queries, experiment_directory,
                                         verbose, skipped_phases)

    runtime_info = postprocess(runtime_info, experiment_directory)

    return runtime_info
def gather_runtime_info(experiment_directory, verbose, skipped_phases=[]):
    total_runtime_query = StatQuery(
        "SUMM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "runtime"),
        ("summary_stat_name", "sum"))
    total_runtime_query.match_processor_function = \
        stat_container_append_matcher("total_runtime", "value")

    total_idle_time_query = StatQuery(
        "SUMM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "wait"),
        ("summary_stat_name", "sum"))
    total_idle_time_query.match_processor_function = \
        stat_container_append_matcher("total_idle_time", "value")

    pipeline_saturation_time_query = StatQuery(
        "SUMM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "pipeline_saturation_wait"),
        ("summary_stat_name", "sum"))
    pipeline_saturation_time_query.match_processor_function = (
        stat_container_append_matcher("pipeline_saturation_time", "value"))

    num_workers_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("logger_name", None),
        ("stat_name", "num_workers"),
        ("uint_value", None))
    num_workers_query.match_processor_function = stat_container_append_matcher(
        "num_workers", "uint_value")

    teardown_time_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "teardown"),
        ("start_time", None))
    teardown_time_query.match_processor_function = \
        stat_container_append_matcher("total_teardown_time", "elapsed_time")

    stage_runtime_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("logger_name", None),
        ("stat_name", "stage_runtime"),
        ("start_time", None))
    stage_runtime_query.match_processor_function = \
        stat_container_append_matcher("stage_runtime", "elapsed_time")

    input_size_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "bytes_consumed"),
        ("uint_value", None))
    input_size_query.match_processor_function = stat_container_append_matcher(
        "total_bytes_in", "uint_value")

    output_size_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "bytes_produced"),
        ("uint_value", None))
    output_size_query.match_processor_function = stat_container_append_matcher(
        "total_bytes_out", "uint_value")

    allocation_time_query = StatQuery(
        "SUMM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "allocation_wait_time"),
        ("summary_stat_name", "sum"))
    allocation_time_query.match_processor_function = \
        stat_container_append_matcher("total_mem_wait_time", "value")

    enqueue_block_time_query = StatQuery(
        "SUMM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "queue_saturation_block_time"),
        ("summary_stat_name", "sum"))
    enqueue_block_time_query.match_processor_function = \
        stat_container_append_matcher(
        "total_enqueue_block_time", "value")

    worker_type_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "worker_type"),
        ("str_value", None))
    worker_type_query.match_processor_function = set_stage_value_matcher

    would_have_blocked_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "would_have_blocked"),
        ("uint_value", None))
    would_have_blocked_query.match_processor_function = \
        stat_container_append_matcher("would_have_blocked", "uint_value")

    total_ios_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "total_ios"),
        ("uint_value", None))
    total_ios_query.match_processor_function = stat_container_append_matcher(
        "total_ios", "uint_value")

    worker_start_time_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "worker_start_time"),
        ("uint_value", None))
    worker_start_time_query.match_processor_function = \
        stat_container_append_matcher("worker_start_time", "uint_value")

    worker_stop_time_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "worker_stop_time"),
        ("uint_value", None))
    worker_stop_time_query.match_processor_function = \
        stat_container_append_matcher("worker_stop_time", "uint_value")

    queries = [total_runtime_query, total_idle_time_query,
               pipeline_saturation_time_query, num_workers_query,
               teardown_time_query, stage_runtime_query, input_size_query,
               output_size_query, allocation_time_query,
               enqueue_block_time_query, worker_type_query,
               would_have_blocked_query, total_ios_query,
               worker_start_time_query, worker_stop_time_query]

    runtime_info = utils.process_queries(
        queries, experiment_directory, verbose, skipped_phases)

    runtime_info = postprocess(runtime_info, experiment_directory)

    return runtime_info
def calculate_rate(
    input_directory, skip_phase_zero, skip_phase_one, skip_phase_two, verbose):

    phaseTimesQuery = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("logger_name", None),
        ("stat_name", "phase_runtime"),
        ("start_time", None))
    phaseTimesQuery.match_processor_function = handleTimestampQueryMatch

    diskCountQuery = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("logger_name", "mapreduce"),
        ("stat_name", ["num_input_disks", "num_intermediate_disks"]),
        ("uint_value", None))
    diskCountQuery.match_processor_function = handleDiskCountMatch

    inputSizeQuery = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", "reader"),
        ("id", None),
        ("stat_name", "bytes_produced"),
        ("uint_value", None)
        )
    inputSizeQuery.match_processor_function = handleReaderInputMatch

    writerOutputQuery = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", "writer"),
        ("id", None),
        ("stat_name", "bytes_consumed"),
        ("uint_value", None)
        )
    writerOutputQuery.match_processor_function = handleWriterOutputMatch

    queries = [phaseTimesQuery, diskCountQuery, inputSizeQuery,
               writerOutputQuery]

    skipped_phases = []
    if skip_phase_zero:
        skipped_phases.append("phase_zero")
    if skip_phase_one:
        skipped_phases.append("phase_one")
    if skip_phase_two:
        skipped_phases.append("phase_two")
    output_data = utils.process_queries(
        queries, input_directory, verbose, skipped_phases)

    data_for_display = postprocess_rate_data(output_data)

    for key in sorted(data_for_display.keys()):
        env = jinja2.Environment(
            loader = jinja2.FileSystemLoader(os.path.dirname(__file__)),
            trim_blocks = True)

        template = env.get_template('rate_summary_template.jinja')

        rendered_template = template.render(**data_for_display[key])

        print rendered_template.strip() + "\n"