def main(argv): (local_event_log_file, continuous_monitor_file) = copy_logs.copy_logs(argv) analyzer = parse_event_logs.Analyzer(local_event_log_file, shuffle_job_filterer.filter) analyzer.output_utilizations(local_event_log_file) analyzer.output_load_balancing_badness(local_event_log_file) analyzer.output_runtimes(local_event_log_file)
def __get_jcts_from_logs(log_dir, warmup_count): """ Returns a tuple of (list of write job JCTs, list of read job JCTs) parsed from the event log contained in the provided directory. """ event_log_filepath = path.join(log_dir, "event_log") sorted_job_pairs = sorted( parse_event_logs.Analyzer(event_log_filepath).jobs.iteritems()) return (__get_jcts_for_phase(sorted_job_pairs, warmup_count, phase="write"), __get_jcts_for_phase(sorted_job_pairs, warmup_count, phase="read"))
def __get_num_tasks_to_jcts(log_dir, num_warmup_trials): """ Returns a mapping from number of tasks to a list of the JCTs from the jobs that used that number of tasks. """ num_tasks_to_event_log = __get_num_tasks_to_event_log(log_dir) partial_filterer = functools.partial(__filterer, num_warmup_trials) return { num_tasks: [ float(job.runtime()) / 1000 for job in parse_event_logs.Analyzer( event_log, partial_filterer).jobs.itervalues() ] for num_tasks, event_log in num_tasks_to_event_log.iteritems() }
def __add_jct_results(data_file, event_log, query_name, num_warmup_trials, x_coordinate, estimate): """ Parses the provided event log, extracts the JCTs, and writes the min, median, and max JCTs to the provided data file. """ # Each trial of queries 3abc and 4 consists of two jobs., has_two_jobs_per_trial = ("3" in query_name) or ("4" in query_name) num_warmup_jobs = 2 * num_warmup_trials if has_two_jobs_per_trial else num_warmup_trials filterer = functools.partial(__drop_warmup_filterer, num_warmup_jobs) analyzer = parse_event_logs.Analyzer(event_log, filterer) if not estimate: analyzer.output_stage_resource_metrics(event_log) analyzer.output_job_resource_metrics(event_log) analyzer.output_utilizations(event_log) analyzer.output_ideal_time_metrics(event_log) jcts = [job.runtime() for _, job in sorted(analyzer.jobs.iteritems())] else: jcts = [] for _, job in sorted(analyzer.jobs.iteritems()): job_runtime = job.runtime() for s_id, stage in job.stages.iteritems(): (cpu, network, disk) = stage.get_ideal_times_from_metrics(10) ser = stage.get_ideal_ser_deser_time_s() if ser > 0: disk_read = stage.get_disk_read_time_s() print "ser time is ", str( ser), "and disk read is ", disk_read, "of ", disk old_ideal = max(cpu, network, disk) new_ideal = max(cpu - ser, network, disk - disk_read) print "old idea", old_ideal, "new ideal", new_ideal multiplier = float(new_ideal) / old_ideal stage_time = multiplier * stage.runtime() # This strategy avoids needing to deal with concurrent stages. print "Adjusting job runtime from ", job_runtime job_runtime = job_runtime - stage.runtime() + stage_time print "to", job_runtime jcts.append(job_runtime) if has_two_jobs_per_trial: # We sum adjacent JCTs together in order to get the total JCT for each trial. jcts = __sum_adjacent_items(jcts) data_values = [numpy.median(jcts), min(jcts), max(jcts)] data_file.write(__build_data_line(query_name, x_coordinate, data_values))
def __add_jct_results(data_file, event_log, query_name, num_warmup_trials, x_coordinate): """ Parses the provided event log, extracts the JCTs, and writes the min, median, and max JCTs to the provided data file. """ # Each trial of queries 3abc and 4 consists of two jobs., has_two_jobs_per_trial = ("3" in query_name) or ("4" in query_name) num_warmup_jobs = 2 * num_warmup_trials if has_two_jobs_per_trial else num_warmup_trials filterer = functools.partial(__drop_warmup_filterer, num_warmup_jobs) analyzer = parse_event_logs.Analyzer(event_log, filterer) analyzer.output_stage_resource_metrics(event_log) analyzer.output_job_resource_metrics(event_log) analyzer.output_utilizations(event_log) analyzer.output_ideal_time_metrics(event_log) jcts = [job.runtime() for _, job in sorted(analyzer.jobs.iteritems())] if has_two_jobs_per_trial: # We sum adjacent JCTs together in order to get the total JCT for each trial. jcts = __sum_adjacent_items(jcts) data_values = [numpy.median(jcts), min(jcts), max(jcts)] data_file.write(__build_data_line(query_name, x_coordinate, data_values))
def main(argv): if len(argv) < 2: print( "Usage: parse_vary_num_tasks.py output_directory [opt (to copy data): driver_hostname " + "identity_file num_experiments [opt username]]") sys.exit(1) output_prefix = argv[1] if (not os.path.exists(output_prefix)): os.mkdir(output_prefix) num_cores = 8 if len(argv) >= 5: driver_hostname = argv[2] if "millennium" in driver_hostname: # The millennium machines have 16 cores. num_cores = 16 identity_file = argv[3] num_experiments = argv[4] if len(argv) >= 6: username = argv[5] else: username = "******" utils.copy_latest_zipped_logs(driver_hostname, identity_file, output_prefix, num_experiments, username) all_dirnames = [ d for d in os.listdir(output_prefix) if "experiment" in d and "tar.gz" not in d ] all_dirnames.sort( key=lambda d: int(re.search('experiment_log_([0-9]*)_', d).group(1))) output_filename = os.path.join(output_prefix, "actual_runtimes") output_file = open(output_filename, "w") for dirname in all_dirnames: local_event_log_filename = os.path.join(output_prefix, dirname, "event_log") print "Parsing event log in %s" % local_event_log_filename analyzer = parse_event_logs.Analyzer(local_event_log_filename, job_filterer=filter) all_jobs = analyzer.jobs.values() num_tasks_values = [ len(stage.tasks) for job in all_jobs for (stage_id, stage) in job.stages.iteritems() ] # Assumes all of the map and reduce staages use the same number of tasks. num_tasks = num_tasks_values[0] ideal_runtimes_millis = [] ideal_map_runtimes_millis = [] actual_map_runtimes_millis = [] ideal_reduce_runtimes_millis = [] actual_reduce_runtimes_millis = [] for job in all_jobs: job_ideal_millis = 0 for (stage_id, stage) in job.stages.iteritems(): stage_ideal_millis = 1000 * stage.ideal_time_s( metrics.AWS_M24XLARGE_MAX_NETWORK_GIGABITS_PER_S, num_cores_per_executor=num_cores) job_ideal_millis += stage_ideal_millis if stage.has_shuffle_read(): ideal_reduce_runtimes_millis.append(stage_ideal_millis) actual_reduce_runtimes_millis.append(stage.runtime()) else: ideal_map_runtimes_millis.append(stage_ideal_millis) actual_map_runtimes_millis.append(stage.runtime()) ideal_runtimes_millis.append(job_ideal_millis) print "Ideal runtimes:", ideal_runtimes_millis print "Ideal map runtimes:", ideal_map_runtimes_millis print "Ideal reduce runtimes:", ideal_reduce_runtimes_millis actual_runtimes_millis = [job.runtime() for job in all_jobs] actual_over_ideal = [ actual / ideal for actual, ideal in zip(actual_runtimes_millis, ideal_runtimes_millis) ] print "Actual runtimes:", actual_runtimes_millis data_to_write = [ num_tasks, min(actual_runtimes_millis), numpy.percentile(actual_runtimes_millis, 50), # 3 max(actual_runtimes_millis), min(ideal_runtimes_millis), numpy.percentile(ideal_runtimes_millis, 50), # 6 max(ideal_runtimes_millis), min(actual_over_ideal), numpy.percentile(actual_over_ideal, 50), # 9 max(actual_over_ideal), min(ideal_runtimes_millis), numpy.percentile(ideal_runtimes_millis, 50), # 12 max(ideal_runtimes_millis), min(actual_map_runtimes_millis), numpy.percentile(actual_map_runtimes_millis, 50), # 15 max(actual_map_runtimes_millis), min(ideal_map_runtimes_millis), numpy.percentile(ideal_map_runtimes_millis, 50), # 18 max(ideal_map_runtimes_millis), min(actual_reduce_runtimes_millis), numpy.percentile(actual_reduce_runtimes_millis, 50), # 21 max(actual_reduce_runtimes_millis), min(ideal_reduce_runtimes_millis), numpy.percentile(ideal_reduce_runtimes_millis, 50), # 24 max(ideal_reduce_runtimes_millis) ] output_file.write("\t".join([str(x) for x in data_to_write])) output_file.write("\n") output_file.close() plot(output_prefix, "actual_runtimes", "actual_runtimes.gp", "gnuplot_files/plot_vary_num_tasks_base.gp") plot(output_prefix, "actual_runtimes", "actual_runtimes_map_reduce.gp", "gnuplot_files/plot_vary_num_tasks_map_reduce_base.gp")