def worker_completion_plot(log_directory, verbose): queries = [] # Get each runtime for each worker in the cluster query = StatQuery( "DATM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "worker_runtime"), ("start_time", None)) query.match_processor_function = handle_worker_runtime queries.append(query) worker_runtime_data = log_utils.process_queries( queries, log_directory, verbose) for (job, phase) in worker_runtime_data: phase_data = worker_runtime_data[(job, phase)] plot = plot_timeline_for_phase( log_directory, job, phase, phase_data) plot.dpi = 250 print "Saving plot for job %s phase %s" % (job, phase) plot.save("%s_%s_completion_plot.png" % (job, phase)) return 0
def gen_worker_query( query_number, stat_type, hostname, phase, stage, worker_id, stat): """ This function generates a worker stat query. Worker stat queries operate over statistics logged by individual workers. As such, they specify either a particular worker ID or all worker IDs. If the query applies to all worker IDs, worker_id is None. """ if worker_id is not None: worker_id = int(worker_id) query_parts = [stat_type, ("phase_name", phase), ("epoch", None), ("stage_name", stage), ("id", worker_id)] if stat_type == "COLL": query_parts.append(("collection_stat_name", stat)) elif stat_type == "HIST": query_parts.append(("stat_name", stat)) query = StatQuery(*query_parts) if hostname is not None: hostname_regex = re.compile(hostname) else: hostname_regex = None def match_function(match, data): match_hostname = match["hostname"].split('.')[0] phase = match["phase_name"] stage = match["stage_name"] worker_id = match["id"] if (hostname is not None and hostname_regex is not None and hostname_regex.match(match_hostname) is None): return if stat_type == "COLL": stat_type_matcher_function = gather_timestamped_points_matcher elif stat_type == "HIST": stat_type_matcher_function = gather_histogram_points_matcher stat_type_matcher_function( query_number, match, data, (match_hostname, phase, stage, worker_id)) query.match_processor_function = match_function return query
def gen_logger_query( query_number, stat_type, hostname, phase, logger_name, stat): """ This function generates general time-series log queries. In general, time-series logs can be logged by anything as long as the logger provides a logger name that distinguishes it from other loggers. In particular, queries of this form cover stats logged by memory allocators and stage trackers. """ query_parts = [stat_type, ("phase_name", phase), ("epoch", None), ("logger_name", logger_name)] if stat_type == "COLL": query_parts.append(("collection_stat_name", stat)) elif stat_type == "HIST": query_parts.append(("stat_name", stat)) query = StatQuery(*query_parts) if hostname is not None: hostname_regex = re.compile(hostname) else: hostname_regex = None def match_function(match, data): match_hostname = match["hostname"].split('.')[0] phase = match["phase_name"] logger_name = match["logger_name"] if (hostname is not None and hostname_regex is not None and hostname_regex.match(match_hostname) is None): return if stat_type == "COLL": stat_type_matcher_function = gather_timestamped_points_matcher elif stat_type == "HIST": stat_type_matcher_function = gather_histogram_points_matcher stat_type_matcher_function( query_number, match, data, (match_hostname, phase, logger_name)) query.match_processor_function = match_function return query
def get_list_time_series_queries(): worker_query = StatQuery( "COLL", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("collection_stat_name", None)) def worker_match_function(match, data): phase = match["phase_name"] stage = match["stage_name"] worker_id = str(match["id"]) stat_name = match["collection_stat_name"] if "time_series_keys" not in data: data["time_series_keys"] = set() time_series_key = (phase, stage, worker_id, stat_name) data["time_series_keys"].add(time_series_key) worker_query.match_processor_function = worker_match_function tracker_query = StatQuery( "COLL", ("phase_name", None), ("epoch", None), ("logger_name", None), ("collection_stat_name", None)) def tracker_match_function(match, data): phase = match["phase_name"] stage = match["logger_name"] stat_name = match["collection_stat_name"] if "time_series_keys" not in data: data["time_series_keys"] = set() time_series_key = (phase, stage, None, stat_name) data["time_series_keys"].add(time_series_key) tracker_query.match_processor_function = tracker_match_function return (worker_query, tracker_query)
def gen_worker_query(query_number, stat_type, hostname, phase, stage, worker_id, stat): """ This function generates a worker stat query. Worker stat queries operate over statistics logged by individual workers. As such, they specify either a particular worker ID or all worker IDs. If the query applies to all worker IDs, worker_id is None. """ if worker_id is not None: worker_id = int(worker_id) query_parts = [stat_type, ("phase_name", phase), ("epoch", None), ("stage_name", stage), ("id", worker_id)] if stat_type == "COLL": query_parts.append(("collection_stat_name", stat)) elif stat_type == "HIST": query_parts.append(("stat_name", stat)) query = StatQuery(*query_parts) if hostname is not None: hostname_regex = re.compile(hostname) else: hostname_regex = None def match_function(match, data): match_hostname = match["hostname"].split(".")[0] phase = match["phase_name"] stage = match["stage_name"] worker_id = match["id"] if hostname is not None and hostname_regex is not None and hostname_regex.match(match_hostname) is None: return if stat_type == "COLL": stat_type_matcher_function = gather_timestamped_points_matcher elif stat_type == "HIST": stat_type_matcher_function = gather_histogram_points_matcher stat_type_matcher_function(query_number, match, data, (match_hostname, phase, stage, worker_id)) query.match_processor_function = match_function return query
def worker_completion_plot(log_directory, verbose): queries = [] # Get each runtime for each worker in the cluster query = StatQuery("DATM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "worker_runtime"), ("start_time", None)) query.match_processor_function = handle_worker_runtime queries.append(query) worker_runtime_data = log_utils.process_queries(queries, log_directory, verbose) for (job, phase) in worker_runtime_data: phase_data = worker_runtime_data[(job, phase)] plot = plot_timeline_for_phase(log_directory, job, phase, phase_data) plot.dpi = 250 print "Saving plot for job %s phase %s" % (job, phase) plot.save("%s_%s_completion_plot.png" % (job, phase)) return 0
def gen_logger_query(query_number, stat_type, hostname, phase, logger_name, stat): """ This function generates general time-series log queries. In general, time-series logs can be logged by anything as long as the logger provides a logger name that distinguishes it from other loggers. In particular, queries of this form cover stats logged by memory allocators and stage trackers. """ query_parts = [stat_type, ("phase_name", phase), ("epoch", None), ("logger_name", logger_name)] if stat_type == "COLL": query_parts.append(("collection_stat_name", stat)) elif stat_type == "HIST": query_parts.append(("stat_name", stat)) query = StatQuery(*query_parts) if hostname is not None: hostname_regex = re.compile(hostname) else: hostname_regex = None def match_function(match, data): match_hostname = match["hostname"].split(".")[0] phase = match["phase_name"] logger_name = match["logger_name"] if hostname is not None and hostname_regex is not None and hostname_regex.match(match_hostname) is None: return if stat_type == "COLL": stat_type_matcher_function = gather_timestamped_points_matcher elif stat_type == "HIST": stat_type_matcher_function = gather_histogram_points_matcher stat_type_matcher_function(query_number, match, data, (match_hostname, phase, logger_name)) query.match_processor_function = match_function return query
def get_list_time_series_queries(): worker_query = StatQuery("COLL", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("collection_stat_name", None)) def worker_match_function(match, data): phase = match["phase_name"] stage = match["stage_name"] worker_id = str(match["id"]) stat_name = match["collection_stat_name"] if "time_series_keys" not in data: data["time_series_keys"] = set() time_series_key = (phase, stage, worker_id, stat_name) data["time_series_keys"].add(time_series_key) worker_query.match_processor_function = worker_match_function tracker_query = StatQuery("COLL", ("phase_name", None), ("epoch", None), ("logger_name", None), ("collection_stat_name", None)) def tracker_match_function(match, data): phase = match["phase_name"] stage = match["logger_name"] stat_name = match["collection_stat_name"] if "time_series_keys" not in data: data["time_series_keys"] = set() time_series_key = (phase, stage, None, stat_name) data["time_series_keys"].add(time_series_key) tracker_query.match_processor_function = tracker_match_function return (worker_query, tracker_query)
def calculate_rate(input_directory, skip_phase_zero, skip_phase_one, skip_phase_two, verbose): phaseTimesQuery = StatQuery("DATM", ("phase_name", None), ("epoch", None), ("logger_name", None), ("stat_name", "phase_runtime"), ("start_time", None)) phaseTimesQuery.match_processor_function = handleTimestampQueryMatch diskCountQuery = StatQuery( "DATM", ("phase_name", None), ("epoch", None), ("logger_name", "mapreduce"), ("stat_name", ["num_input_disks", "num_intermediate_disks"]), ("uint_value", None)) diskCountQuery.match_processor_function = handleDiskCountMatch inputSizeQuery = StatQuery("DATM", ("phase_name", None), ("epoch", None), ("stage_name", "reader"), ("id", None), ("stat_name", "bytes_produced"), ("uint_value", None)) inputSizeQuery.match_processor_function = handleReaderInputMatch writerOutputQuery = StatQuery("DATM", ("phase_name", None), ("epoch", None), ("stage_name", "writer"), ("id", None), ("stat_name", "bytes_consumed"), ("uint_value", None)) writerOutputQuery.match_processor_function = handleWriterOutputMatch queries = [ phaseTimesQuery, diskCountQuery, inputSizeQuery, writerOutputQuery ] skipped_phases = [] if skip_phase_zero: skipped_phases.append("phase_zero") if skip_phase_one: skipped_phases.append("phase_one") if skip_phase_two: skipped_phases.append("phase_two") output_data = utils.process_queries(queries, input_directory, verbose, skipped_phases) data_for_display = postprocess_rate_data(output_data) for key in sorted(data_for_display.keys()): env = jinja2.Environment(loader=jinja2.FileSystemLoader( os.path.dirname(__file__)), trim_blocks=True) template = env.get_template('rate_summary_template.jinja') rendered_template = template.render(**data_for_display[key]) print rendered_template.strip() + "\n"
def gather_runtime_info(experiment_directory, verbose, skipped_phases=[]): total_runtime_query = StatQuery("SUMM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "runtime"), ("summary_stat_name", "sum")) total_runtime_query.match_processor_function = \ stat_container_append_matcher("total_runtime", "value") total_idle_time_query = StatQuery("SUMM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "wait"), ("summary_stat_name", "sum")) total_idle_time_query.match_processor_function = \ stat_container_append_matcher("total_idle_time", "value") pipeline_saturation_time_query = StatQuery( "SUMM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "pipeline_saturation_wait"), ("summary_stat_name", "sum")) pipeline_saturation_time_query.match_processor_function = ( stat_container_append_matcher("pipeline_saturation_time", "value")) num_workers_query = StatQuery("DATM", ("phase_name", None), ("epoch", None), ("logger_name", None), ("stat_name", "num_workers"), ("uint_value", None)) num_workers_query.match_processor_function = stat_container_append_matcher( "num_workers", "uint_value") teardown_time_query = StatQuery("DATM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "teardown"), ("start_time", None)) teardown_time_query.match_processor_function = \ stat_container_append_matcher("total_teardown_time", "elapsed_time") stage_runtime_query = StatQuery("DATM", ("phase_name", None), ("epoch", None), ("logger_name", None), ("stat_name", "stage_runtime"), ("start_time", None)) stage_runtime_query.match_processor_function = \ stat_container_append_matcher("stage_runtime", "elapsed_time") input_size_query = StatQuery("DATM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "bytes_consumed"), ("uint_value", None)) input_size_query.match_processor_function = stat_container_append_matcher( "total_bytes_in", "uint_value") output_size_query = StatQuery("DATM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "bytes_produced"), ("uint_value", None)) output_size_query.match_processor_function = stat_container_append_matcher( "total_bytes_out", "uint_value") allocation_time_query = StatQuery("SUMM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "allocation_wait_time"), ("summary_stat_name", "sum")) allocation_time_query.match_processor_function = \ stat_container_append_matcher("total_mem_wait_time", "value") enqueue_block_time_query = StatQuery( "SUMM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "queue_saturation_block_time"), ("summary_stat_name", "sum")) enqueue_block_time_query.match_processor_function = \ stat_container_append_matcher( "total_enqueue_block_time", "value") worker_type_query = StatQuery("DATM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "worker_type"), ("str_value", None)) worker_type_query.match_processor_function = set_stage_value_matcher would_have_blocked_query = StatQuery("DATM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "would_have_blocked"), ("uint_value", None)) would_have_blocked_query.match_processor_function = \ stat_container_append_matcher("would_have_blocked", "uint_value") total_ios_query = StatQuery("DATM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "total_ios"), ("uint_value", None)) total_ios_query.match_processor_function = stat_container_append_matcher( "total_ios", "uint_value") worker_start_time_query = StatQuery("DATM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "worker_start_time"), ("uint_value", None)) worker_start_time_query.match_processor_function = \ stat_container_append_matcher("worker_start_time", "uint_value") worker_stop_time_query = StatQuery("DATM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "worker_stop_time"), ("uint_value", None)) worker_stop_time_query.match_processor_function = \ stat_container_append_matcher("worker_stop_time", "uint_value") queries = [ total_runtime_query, total_idle_time_query, pipeline_saturation_time_query, num_workers_query, teardown_time_query, stage_runtime_query, input_size_query, output_size_query, allocation_time_query, enqueue_block_time_query, worker_type_query, would_have_blocked_query, total_ios_query, worker_start_time_query, worker_stop_time_query ] runtime_info = utils.process_queries(queries, experiment_directory, verbose, skipped_phases) runtime_info = postprocess(runtime_info, experiment_directory) return runtime_info
def gather_runtime_info(experiment_directory, verbose, skipped_phases=[]): total_runtime_query = StatQuery( "SUMM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "runtime"), ("summary_stat_name", "sum")) total_runtime_query.match_processor_function = \ stat_container_append_matcher("total_runtime", "value") total_idle_time_query = StatQuery( "SUMM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "wait"), ("summary_stat_name", "sum")) total_idle_time_query.match_processor_function = \ stat_container_append_matcher("total_idle_time", "value") pipeline_saturation_time_query = StatQuery( "SUMM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "pipeline_saturation_wait"), ("summary_stat_name", "sum")) pipeline_saturation_time_query.match_processor_function = ( stat_container_append_matcher("pipeline_saturation_time", "value")) num_workers_query = StatQuery( "DATM", ("phase_name", None), ("epoch", None), ("logger_name", None), ("stat_name", "num_workers"), ("uint_value", None)) num_workers_query.match_processor_function = stat_container_append_matcher( "num_workers", "uint_value") teardown_time_query = StatQuery( "DATM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "teardown"), ("start_time", None)) teardown_time_query.match_processor_function = \ stat_container_append_matcher("total_teardown_time", "elapsed_time") stage_runtime_query = StatQuery( "DATM", ("phase_name", None), ("epoch", None), ("logger_name", None), ("stat_name", "stage_runtime"), ("start_time", None)) stage_runtime_query.match_processor_function = \ stat_container_append_matcher("stage_runtime", "elapsed_time") input_size_query = StatQuery( "DATM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "bytes_consumed"), ("uint_value", None)) input_size_query.match_processor_function = stat_container_append_matcher( "total_bytes_in", "uint_value") output_size_query = StatQuery( "DATM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "bytes_produced"), ("uint_value", None)) output_size_query.match_processor_function = stat_container_append_matcher( "total_bytes_out", "uint_value") allocation_time_query = StatQuery( "SUMM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "allocation_wait_time"), ("summary_stat_name", "sum")) allocation_time_query.match_processor_function = \ stat_container_append_matcher("total_mem_wait_time", "value") enqueue_block_time_query = StatQuery( "SUMM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "queue_saturation_block_time"), ("summary_stat_name", "sum")) enqueue_block_time_query.match_processor_function = \ stat_container_append_matcher( "total_enqueue_block_time", "value") worker_type_query = StatQuery( "DATM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "worker_type"), ("str_value", None)) worker_type_query.match_processor_function = set_stage_value_matcher would_have_blocked_query = StatQuery( "DATM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "would_have_blocked"), ("uint_value", None)) would_have_blocked_query.match_processor_function = \ stat_container_append_matcher("would_have_blocked", "uint_value") total_ios_query = StatQuery( "DATM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "total_ios"), ("uint_value", None)) total_ios_query.match_processor_function = stat_container_append_matcher( "total_ios", "uint_value") worker_start_time_query = StatQuery( "DATM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "worker_start_time"), ("uint_value", None)) worker_start_time_query.match_processor_function = \ stat_container_append_matcher("worker_start_time", "uint_value") worker_stop_time_query = StatQuery( "DATM", ("phase_name", None), ("epoch", None), ("stage_name", None), ("id", None), ("stat_name", "worker_stop_time"), ("uint_value", None)) worker_stop_time_query.match_processor_function = \ stat_container_append_matcher("worker_stop_time", "uint_value") queries = [total_runtime_query, total_idle_time_query, pipeline_saturation_time_query, num_workers_query, teardown_time_query, stage_runtime_query, input_size_query, output_size_query, allocation_time_query, enqueue_block_time_query, worker_type_query, would_have_blocked_query, total_ios_query, worker_start_time_query, worker_stop_time_query] runtime_info = utils.process_queries( queries, experiment_directory, verbose, skipped_phases) runtime_info = postprocess(runtime_info, experiment_directory) return runtime_info
def calculate_rate( input_directory, skip_phase_zero, skip_phase_one, skip_phase_two, verbose): phaseTimesQuery = StatQuery( "DATM", ("phase_name", None), ("epoch", None), ("logger_name", None), ("stat_name", "phase_runtime"), ("start_time", None)) phaseTimesQuery.match_processor_function = handleTimestampQueryMatch diskCountQuery = StatQuery( "DATM", ("phase_name", None), ("epoch", None), ("logger_name", "mapreduce"), ("stat_name", ["num_input_disks", "num_intermediate_disks"]), ("uint_value", None)) diskCountQuery.match_processor_function = handleDiskCountMatch inputSizeQuery = StatQuery( "DATM", ("phase_name", None), ("epoch", None), ("stage_name", "reader"), ("id", None), ("stat_name", "bytes_produced"), ("uint_value", None) ) inputSizeQuery.match_processor_function = handleReaderInputMatch writerOutputQuery = StatQuery( "DATM", ("phase_name", None), ("epoch", None), ("stage_name", "writer"), ("id", None), ("stat_name", "bytes_consumed"), ("uint_value", None) ) writerOutputQuery.match_processor_function = handleWriterOutputMatch queries = [phaseTimesQuery, diskCountQuery, inputSizeQuery, writerOutputQuery] skipped_phases = [] if skip_phase_zero: skipped_phases.append("phase_zero") if skip_phase_one: skipped_phases.append("phase_one") if skip_phase_two: skipped_phases.append("phase_two") output_data = utils.process_queries( queries, input_directory, verbose, skipped_phases) data_for_display = postprocess_rate_data(output_data) for key in sorted(data_for_display.keys()): env = jinja2.Environment( loader = jinja2.FileSystemLoader(os.path.dirname(__file__)), trim_blocks = True) template = env.get_template('rate_summary_template.jinja') rendered_template = template.render(**data_for_display[key]) print rendered_template.strip() + "\n"