def worker_completion_plot(log_directory, verbose):
    queries = []

    # Get each runtime for each worker in the cluster
    query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "worker_runtime"),
        ("start_time", None))
    query.match_processor_function = handle_worker_runtime
    queries.append(query)

    worker_runtime_data = log_utils.process_queries(
        queries, log_directory, verbose)

    for (job, phase) in worker_runtime_data:
        phase_data = worker_runtime_data[(job, phase)]
        plot = plot_timeline_for_phase(
            log_directory, job, phase, phase_data)
        plot.dpi = 250
        print "Saving plot for job %s phase %s" % (job, phase)
        plot.save("%s_%s_completion_plot.png" % (job, phase))

    return 0
Exemple #2
0
def gen_worker_query(
    query_number, stat_type, hostname, phase, stage, worker_id, stat):
    """
    This function generates a worker stat query. Worker stat queries operate
    over statistics logged by individual workers. As such, they specify either
    a particular worker ID or all worker IDs. If the query applies to all
    worker IDs, worker_id is None.
    """
    if worker_id is not None:
        worker_id = int(worker_id)

    query_parts = [stat_type, ("phase_name", phase), ("epoch", None),
                   ("stage_name", stage), ("id", worker_id)]

    if stat_type == "COLL":
        query_parts.append(("collection_stat_name", stat))
    elif stat_type == "HIST":
        query_parts.append(("stat_name", stat))

    query = StatQuery(*query_parts)

    if hostname is not None:
        hostname_regex = re.compile(hostname)
    else:
        hostname_regex = None

    def match_function(match, data):
        match_hostname = match["hostname"].split('.')[0]
        phase = match["phase_name"]
        stage = match["stage_name"]
        worker_id = match["id"]

        if (hostname is not None and hostname_regex is not None and
            hostname_regex.match(match_hostname) is None):
            return

        if stat_type == "COLL":
            stat_type_matcher_function = gather_timestamped_points_matcher
        elif stat_type == "HIST":
            stat_type_matcher_function = gather_histogram_points_matcher

        stat_type_matcher_function(
            query_number, match, data,
            (match_hostname, phase, stage, worker_id))

    query.match_processor_function = match_function
    return query
Exemple #3
0
def gen_logger_query(
    query_number, stat_type, hostname, phase, logger_name, stat):
    """
    This function generates general time-series log queries. In general,
    time-series logs can be logged by anything as long as the logger provides a
    logger name that distinguishes it from other loggers. In particular,
    queries of this form cover stats logged by memory allocators and stage
    trackers.
    """
    query_parts = [stat_type, ("phase_name", phase), ("epoch", None),
                   ("logger_name", logger_name)]

    if stat_type == "COLL":
        query_parts.append(("collection_stat_name", stat))
    elif stat_type == "HIST":
        query_parts.append(("stat_name", stat))

    query = StatQuery(*query_parts)

    if hostname is not None:
        hostname_regex = re.compile(hostname)
    else:
        hostname_regex = None

    def match_function(match, data):
        match_hostname = match["hostname"].split('.')[0]
        phase = match["phase_name"]
        logger_name = match["logger_name"]

        if (hostname is not None and hostname_regex is not None and
            hostname_regex.match(match_hostname) is None):
            return

        if stat_type == "COLL":
            stat_type_matcher_function = gather_timestamped_points_matcher
        elif stat_type == "HIST":
            stat_type_matcher_function = gather_histogram_points_matcher

        stat_type_matcher_function(
            query_number, match, data, (match_hostname, phase, logger_name))


    query.match_processor_function = match_function

    return query
def get_list_time_series_queries():

    worker_query = StatQuery(
        "COLL",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("collection_stat_name", None))

    def worker_match_function(match, data):
        phase = match["phase_name"]
        stage = match["stage_name"]
        worker_id = str(match["id"])
        stat_name = match["collection_stat_name"]

        if "time_series_keys" not in data:
            data["time_series_keys"] = set()

        time_series_key = (phase, stage, worker_id, stat_name)
        data["time_series_keys"].add(time_series_key)

    worker_query.match_processor_function = worker_match_function

    tracker_query = StatQuery(
        "COLL",
        ("phase_name", None),
        ("epoch", None),
        ("logger_name", None),
        ("collection_stat_name", None))

    def tracker_match_function(match, data):
        phase = match["phase_name"]
        stage = match["logger_name"]
        stat_name = match["collection_stat_name"]

        if "time_series_keys" not in data:
            data["time_series_keys"] = set()

        time_series_key = (phase, stage, None, stat_name)
        data["time_series_keys"].add(time_series_key)

    tracker_query.match_processor_function = tracker_match_function

    return (worker_query, tracker_query)
def gen_worker_query(query_number, stat_type, hostname, phase, stage, worker_id, stat):
    """
    This function generates a worker stat query. Worker stat queries operate
    over statistics logged by individual workers. As such, they specify either
    a particular worker ID or all worker IDs. If the query applies to all
    worker IDs, worker_id is None.
    """
    if worker_id is not None:
        worker_id = int(worker_id)

    query_parts = [stat_type, ("phase_name", phase), ("epoch", None), ("stage_name", stage), ("id", worker_id)]

    if stat_type == "COLL":
        query_parts.append(("collection_stat_name", stat))
    elif stat_type == "HIST":
        query_parts.append(("stat_name", stat))

    query = StatQuery(*query_parts)

    if hostname is not None:
        hostname_regex = re.compile(hostname)
    else:
        hostname_regex = None

    def match_function(match, data):
        match_hostname = match["hostname"].split(".")[0]
        phase = match["phase_name"]
        stage = match["stage_name"]
        worker_id = match["id"]

        if hostname is not None and hostname_regex is not None and hostname_regex.match(match_hostname) is None:
            return

        if stat_type == "COLL":
            stat_type_matcher_function = gather_timestamped_points_matcher
        elif stat_type == "HIST":
            stat_type_matcher_function = gather_histogram_points_matcher

        stat_type_matcher_function(query_number, match, data, (match_hostname, phase, stage, worker_id))

    query.match_processor_function = match_function
    return query
def worker_completion_plot(log_directory, verbose):
    queries = []

    # Get each runtime for each worker in the cluster
    query = StatQuery("DATM", ("phase_name", None), ("epoch", None),
                      ("stage_name", None), ("id", None),
                      ("stat_name", "worker_runtime"), ("start_time", None))
    query.match_processor_function = handle_worker_runtime
    queries.append(query)

    worker_runtime_data = log_utils.process_queries(queries, log_directory,
                                                    verbose)

    for (job, phase) in worker_runtime_data:
        phase_data = worker_runtime_data[(job, phase)]
        plot = plot_timeline_for_phase(log_directory, job, phase, phase_data)
        plot.dpi = 250
        print "Saving plot for job %s phase %s" % (job, phase)
        plot.save("%s_%s_completion_plot.png" % (job, phase))

    return 0
def gen_logger_query(query_number, stat_type, hostname, phase, logger_name, stat):
    """
    This function generates general time-series log queries. In general,
    time-series logs can be logged by anything as long as the logger provides a
    logger name that distinguishes it from other loggers. In particular,
    queries of this form cover stats logged by memory allocators and stage
    trackers.
    """
    query_parts = [stat_type, ("phase_name", phase), ("epoch", None), ("logger_name", logger_name)]

    if stat_type == "COLL":
        query_parts.append(("collection_stat_name", stat))
    elif stat_type == "HIST":
        query_parts.append(("stat_name", stat))

    query = StatQuery(*query_parts)

    if hostname is not None:
        hostname_regex = re.compile(hostname)
    else:
        hostname_regex = None

    def match_function(match, data):
        match_hostname = match["hostname"].split(".")[0]
        phase = match["phase_name"]
        logger_name = match["logger_name"]

        if hostname is not None and hostname_regex is not None and hostname_regex.match(match_hostname) is None:
            return

        if stat_type == "COLL":
            stat_type_matcher_function = gather_timestamped_points_matcher
        elif stat_type == "HIST":
            stat_type_matcher_function = gather_histogram_points_matcher

        stat_type_matcher_function(query_number, match, data, (match_hostname, phase, logger_name))

    query.match_processor_function = match_function

    return query
Exemple #8
0
def get_list_time_series_queries():

    worker_query = StatQuery("COLL", ("phase_name", None), ("epoch", None),
                             ("stage_name", None), ("id", None),
                             ("collection_stat_name", None))

    def worker_match_function(match, data):
        phase = match["phase_name"]
        stage = match["stage_name"]
        worker_id = str(match["id"])
        stat_name = match["collection_stat_name"]

        if "time_series_keys" not in data:
            data["time_series_keys"] = set()

        time_series_key = (phase, stage, worker_id, stat_name)
        data["time_series_keys"].add(time_series_key)

    worker_query.match_processor_function = worker_match_function

    tracker_query = StatQuery("COLL", ("phase_name", None), ("epoch", None),
                              ("logger_name", None),
                              ("collection_stat_name", None))

    def tracker_match_function(match, data):
        phase = match["phase_name"]
        stage = match["logger_name"]
        stat_name = match["collection_stat_name"]

        if "time_series_keys" not in data:
            data["time_series_keys"] = set()

        time_series_key = (phase, stage, None, stat_name)
        data["time_series_keys"].add(time_series_key)

    tracker_query.match_processor_function = tracker_match_function

    return (worker_query, tracker_query)
def calculate_rate(input_directory, skip_phase_zero, skip_phase_one,
                   skip_phase_two, verbose):

    phaseTimesQuery = StatQuery("DATM", ("phase_name", None), ("epoch", None),
                                ("logger_name", None),
                                ("stat_name", "phase_runtime"),
                                ("start_time", None))
    phaseTimesQuery.match_processor_function = handleTimestampQueryMatch

    diskCountQuery = StatQuery(
        "DATM", ("phase_name", None), ("epoch", None),
        ("logger_name", "mapreduce"),
        ("stat_name", ["num_input_disks", "num_intermediate_disks"]),
        ("uint_value", None))
    diskCountQuery.match_processor_function = handleDiskCountMatch

    inputSizeQuery = StatQuery("DATM", ("phase_name", None), ("epoch", None),
                               ("stage_name", "reader"), ("id", None),
                               ("stat_name", "bytes_produced"),
                               ("uint_value", None))
    inputSizeQuery.match_processor_function = handleReaderInputMatch

    writerOutputQuery = StatQuery("DATM", ("phase_name", None),
                                  ("epoch", None), ("stage_name", "writer"),
                                  ("id", None),
                                  ("stat_name", "bytes_consumed"),
                                  ("uint_value", None))
    writerOutputQuery.match_processor_function = handleWriterOutputMatch

    queries = [
        phaseTimesQuery, diskCountQuery, inputSizeQuery, writerOutputQuery
    ]

    skipped_phases = []
    if skip_phase_zero:
        skipped_phases.append("phase_zero")
    if skip_phase_one:
        skipped_phases.append("phase_one")
    if skip_phase_two:
        skipped_phases.append("phase_two")
    output_data = utils.process_queries(queries, input_directory, verbose,
                                        skipped_phases)

    data_for_display = postprocess_rate_data(output_data)

    for key in sorted(data_for_display.keys()):
        env = jinja2.Environment(loader=jinja2.FileSystemLoader(
            os.path.dirname(__file__)),
                                 trim_blocks=True)

        template = env.get_template('rate_summary_template.jinja')

        rendered_template = template.render(**data_for_display[key])

        print rendered_template.strip() + "\n"
def gather_runtime_info(experiment_directory, verbose, skipped_phases=[]):
    total_runtime_query = StatQuery("SUMM", ("phase_name", None),
                                    ("epoch", None), ("stage_name", None),
                                    ("id", None), ("stat_name", "runtime"),
                                    ("summary_stat_name", "sum"))
    total_runtime_query.match_processor_function = \
        stat_container_append_matcher("total_runtime", "value")

    total_idle_time_query = StatQuery("SUMM", ("phase_name", None),
                                      ("epoch", None), ("stage_name", None),
                                      ("id", None), ("stat_name", "wait"),
                                      ("summary_stat_name", "sum"))
    total_idle_time_query.match_processor_function = \
        stat_container_append_matcher("total_idle_time", "value")

    pipeline_saturation_time_query = StatQuery(
        "SUMM", ("phase_name", None), ("epoch", None), ("stage_name", None),
        ("id", None), ("stat_name", "pipeline_saturation_wait"),
        ("summary_stat_name", "sum"))
    pipeline_saturation_time_query.match_processor_function = (
        stat_container_append_matcher("pipeline_saturation_time", "value"))

    num_workers_query = StatQuery("DATM", ("phase_name", None),
                                  ("epoch", None), ("logger_name", None),
                                  ("stat_name", "num_workers"),
                                  ("uint_value", None))
    num_workers_query.match_processor_function = stat_container_append_matcher(
        "num_workers", "uint_value")

    teardown_time_query = StatQuery("DATM", ("phase_name", None),
                                    ("epoch", None), ("stage_name", None),
                                    ("id", None), ("stat_name", "teardown"),
                                    ("start_time", None))
    teardown_time_query.match_processor_function = \
        stat_container_append_matcher("total_teardown_time", "elapsed_time")

    stage_runtime_query = StatQuery("DATM", ("phase_name", None),
                                    ("epoch", None), ("logger_name", None),
                                    ("stat_name", "stage_runtime"),
                                    ("start_time", None))
    stage_runtime_query.match_processor_function = \
        stat_container_append_matcher("stage_runtime", "elapsed_time")

    input_size_query = StatQuery("DATM", ("phase_name", None), ("epoch", None),
                                 ("stage_name", None), ("id", None),
                                 ("stat_name", "bytes_consumed"),
                                 ("uint_value", None))
    input_size_query.match_processor_function = stat_container_append_matcher(
        "total_bytes_in", "uint_value")

    output_size_query = StatQuery("DATM", ("phase_name", None),
                                  ("epoch", None), ("stage_name", None),
                                  ("id", None),
                                  ("stat_name", "bytes_produced"),
                                  ("uint_value", None))
    output_size_query.match_processor_function = stat_container_append_matcher(
        "total_bytes_out", "uint_value")

    allocation_time_query = StatQuery("SUMM", ("phase_name", None),
                                      ("epoch", None), ("stage_name", None),
                                      ("id", None),
                                      ("stat_name", "allocation_wait_time"),
                                      ("summary_stat_name", "sum"))
    allocation_time_query.match_processor_function = \
        stat_container_append_matcher("total_mem_wait_time", "value")

    enqueue_block_time_query = StatQuery(
        "SUMM", ("phase_name", None), ("epoch", None), ("stage_name", None),
        ("id", None), ("stat_name", "queue_saturation_block_time"),
        ("summary_stat_name", "sum"))
    enqueue_block_time_query.match_processor_function = \
        stat_container_append_matcher(
        "total_enqueue_block_time", "value")

    worker_type_query = StatQuery("DATM", ("phase_name", None),
                                  ("epoch", None), ("stage_name", None),
                                  ("id", None), ("stat_name", "worker_type"),
                                  ("str_value", None))
    worker_type_query.match_processor_function = set_stage_value_matcher

    would_have_blocked_query = StatQuery("DATM", ("phase_name", None),
                                         ("epoch", None), ("stage_name", None),
                                         ("id", None),
                                         ("stat_name", "would_have_blocked"),
                                         ("uint_value", None))
    would_have_blocked_query.match_processor_function = \
        stat_container_append_matcher("would_have_blocked", "uint_value")

    total_ios_query = StatQuery("DATM", ("phase_name", None), ("epoch", None),
                                ("stage_name", None), ("id", None),
                                ("stat_name", "total_ios"),
                                ("uint_value", None))
    total_ios_query.match_processor_function = stat_container_append_matcher(
        "total_ios", "uint_value")

    worker_start_time_query = StatQuery("DATM", ("phase_name", None),
                                        ("epoch", None), ("stage_name", None),
                                        ("id", None),
                                        ("stat_name", "worker_start_time"),
                                        ("uint_value", None))
    worker_start_time_query.match_processor_function = \
        stat_container_append_matcher("worker_start_time", "uint_value")

    worker_stop_time_query = StatQuery("DATM", ("phase_name", None),
                                       ("epoch", None), ("stage_name", None),
                                       ("id", None),
                                       ("stat_name", "worker_stop_time"),
                                       ("uint_value", None))
    worker_stop_time_query.match_processor_function = \
        stat_container_append_matcher("worker_stop_time", "uint_value")

    queries = [
        total_runtime_query, total_idle_time_query,
        pipeline_saturation_time_query, num_workers_query, teardown_time_query,
        stage_runtime_query, input_size_query, output_size_query,
        allocation_time_query, enqueue_block_time_query, worker_type_query,
        would_have_blocked_query, total_ios_query, worker_start_time_query,
        worker_stop_time_query
    ]

    runtime_info = utils.process_queries(queries, experiment_directory,
                                         verbose, skipped_phases)

    runtime_info = postprocess(runtime_info, experiment_directory)

    return runtime_info
def gather_runtime_info(experiment_directory, verbose, skipped_phases=[]):
    total_runtime_query = StatQuery(
        "SUMM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "runtime"),
        ("summary_stat_name", "sum"))
    total_runtime_query.match_processor_function = \
        stat_container_append_matcher("total_runtime", "value")

    total_idle_time_query = StatQuery(
        "SUMM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "wait"),
        ("summary_stat_name", "sum"))
    total_idle_time_query.match_processor_function = \
        stat_container_append_matcher("total_idle_time", "value")

    pipeline_saturation_time_query = StatQuery(
        "SUMM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "pipeline_saturation_wait"),
        ("summary_stat_name", "sum"))
    pipeline_saturation_time_query.match_processor_function = (
        stat_container_append_matcher("pipeline_saturation_time", "value"))

    num_workers_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("logger_name", None),
        ("stat_name", "num_workers"),
        ("uint_value", None))
    num_workers_query.match_processor_function = stat_container_append_matcher(
        "num_workers", "uint_value")

    teardown_time_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "teardown"),
        ("start_time", None))
    teardown_time_query.match_processor_function = \
        stat_container_append_matcher("total_teardown_time", "elapsed_time")

    stage_runtime_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("logger_name", None),
        ("stat_name", "stage_runtime"),
        ("start_time", None))
    stage_runtime_query.match_processor_function = \
        stat_container_append_matcher("stage_runtime", "elapsed_time")

    input_size_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "bytes_consumed"),
        ("uint_value", None))
    input_size_query.match_processor_function = stat_container_append_matcher(
        "total_bytes_in", "uint_value")

    output_size_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "bytes_produced"),
        ("uint_value", None))
    output_size_query.match_processor_function = stat_container_append_matcher(
        "total_bytes_out", "uint_value")

    allocation_time_query = StatQuery(
        "SUMM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "allocation_wait_time"),
        ("summary_stat_name", "sum"))
    allocation_time_query.match_processor_function = \
        stat_container_append_matcher("total_mem_wait_time", "value")

    enqueue_block_time_query = StatQuery(
        "SUMM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "queue_saturation_block_time"),
        ("summary_stat_name", "sum"))
    enqueue_block_time_query.match_processor_function = \
        stat_container_append_matcher(
        "total_enqueue_block_time", "value")

    worker_type_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "worker_type"),
        ("str_value", None))
    worker_type_query.match_processor_function = set_stage_value_matcher

    would_have_blocked_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "would_have_blocked"),
        ("uint_value", None))
    would_have_blocked_query.match_processor_function = \
        stat_container_append_matcher("would_have_blocked", "uint_value")

    total_ios_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "total_ios"),
        ("uint_value", None))
    total_ios_query.match_processor_function = stat_container_append_matcher(
        "total_ios", "uint_value")

    worker_start_time_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "worker_start_time"),
        ("uint_value", None))
    worker_start_time_query.match_processor_function = \
        stat_container_append_matcher("worker_start_time", "uint_value")

    worker_stop_time_query = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", None),
        ("id", None),
        ("stat_name", "worker_stop_time"),
        ("uint_value", None))
    worker_stop_time_query.match_processor_function = \
        stat_container_append_matcher("worker_stop_time", "uint_value")

    queries = [total_runtime_query, total_idle_time_query,
               pipeline_saturation_time_query, num_workers_query,
               teardown_time_query, stage_runtime_query, input_size_query,
               output_size_query, allocation_time_query,
               enqueue_block_time_query, worker_type_query,
               would_have_blocked_query, total_ios_query,
               worker_start_time_query, worker_stop_time_query]

    runtime_info = utils.process_queries(
        queries, experiment_directory, verbose, skipped_phases)

    runtime_info = postprocess(runtime_info, experiment_directory)

    return runtime_info
def calculate_rate(
    input_directory, skip_phase_zero, skip_phase_one, skip_phase_two, verbose):

    phaseTimesQuery = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("logger_name", None),
        ("stat_name", "phase_runtime"),
        ("start_time", None))
    phaseTimesQuery.match_processor_function = handleTimestampQueryMatch

    diskCountQuery = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("logger_name", "mapreduce"),
        ("stat_name", ["num_input_disks", "num_intermediate_disks"]),
        ("uint_value", None))
    diskCountQuery.match_processor_function = handleDiskCountMatch

    inputSizeQuery = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", "reader"),
        ("id", None),
        ("stat_name", "bytes_produced"),
        ("uint_value", None)
        )
    inputSizeQuery.match_processor_function = handleReaderInputMatch

    writerOutputQuery = StatQuery(
        "DATM",
        ("phase_name", None),
        ("epoch", None),
        ("stage_name", "writer"),
        ("id", None),
        ("stat_name", "bytes_consumed"),
        ("uint_value", None)
        )
    writerOutputQuery.match_processor_function = handleWriterOutputMatch

    queries = [phaseTimesQuery, diskCountQuery, inputSizeQuery,
               writerOutputQuery]

    skipped_phases = []
    if skip_phase_zero:
        skipped_phases.append("phase_zero")
    if skip_phase_one:
        skipped_phases.append("phase_one")
    if skip_phase_two:
        skipped_phases.append("phase_two")
    output_data = utils.process_queries(
        queries, input_directory, verbose, skipped_phases)

    data_for_display = postprocess_rate_data(output_data)

    for key in sorted(data_for_display.keys()):
        env = jinja2.Environment(
            loader = jinja2.FileSystemLoader(os.path.dirname(__file__)),
            trim_blocks = True)

        template = env.get_template('rate_summary_template.jinja')

        rendered_template = template.render(**data_for_display[key])

        print rendered_template.strip() + "\n"