def median_progress_rate_speedup(self, prefix): """ Returns how fast the job would have run if all tasks had the median progress rate. """ total_median_progress_rate_runtime = 0 runtimes_for_combined_stages = [] all_start_finish_times = [] for id, stage in self.stages.iteritems(): median_rate_runtimes = stage.task_runtimes_with_median_progress_rate() if id in self.stages_to_combine: runtimes_for_combined_stages.extend(median_rate_runtimes) else: no_stragglers_runtime, start_finish_times = simulate.simulate( median_rate_runtimes, concurrency.get_max_concurrency(stage.tasks)) start_finish_times_adjusted = [ (start + total_median_progress_rate_runtime, finish + total_median_progress_rate_runtime) \ for start, finish in start_finish_times] total_median_progress_rate_runtime += no_stragglers_runtime all_start_finish_times.append(start_finish_times_adjusted) print "No stragglers runtime: ", no_stragglers_runtime print "MAx concurrency: ", concurrency.get_max_concurrency(stage.tasks) if len(runtimes_for_combined_stages) > 0: no_stragglers_runtime, start_finish_times = simulate.simulate( runtimes_for_combined_stages, self.combined_stages_concurrency) start_finish_times_adjusted = [ (start + total_median_progress_rate_runtime, finish + total_median_progress_rate_runtime) \ for start, finish in start_finish_times] total_median_progress_rate_runtime += no_stragglers_runtime all_start_finish_times.append(start_finish_times_adjusted) self.write_simulated_waterfall(all_start_finish_times, "%s_sim_median_progress_rate" % prefix) return total_median_progress_rate_runtime * 1.0 / self.get_simulated_runtime()
def median_progress_rate_speedup(self, prefix): """ Returns how fast the job would have run if all tasks had the median progress rate. """ total_median_progress_rate_runtime = 0 runtimes_for_combined_stages = [] all_start_finish_times = [] for id, stage in self.stages.iteritems(): median_rate_runtimes = stage.task_runtimes_with_median_progress_rate() if id in self.stages_to_combine: runtimes_for_combined_stages.extend(median_rate_runtimes) else: no_stragglers_runtime, start_finish_times = simulate.simulate( median_rate_runtimes, concurrency.get_max_concurrency(stage.tasks)) start_finish_times_adjusted = [ (start + total_median_progress_rate_runtime, finish + total_median_progress_rate_runtime) \ for start, finish in start_finish_times] total_median_progress_rate_runtime += no_stragglers_runtime all_start_finish_times.append(start_finish_times_adjusted) if len(runtimes_for_combined_stages) > 0: no_stragglers_runtime, start_finish_times = simulate.simulate( runtimes_for_combined_stages, self.combined_stages_concurrency) start_finish_times_adjusted = [ (start + total_median_progress_rate_runtime, finish + total_median_progress_rate_runtime) \ for start, finish in start_finish_times] total_median_progress_rate_runtime += no_stragglers_runtime all_start_finish_times.append(start_finish_times_adjusted) self.write_simulated_waterfall(all_start_finish_times, "%s_sim_median_progress_rate" % prefix) return total_median_progress_rate_runtime * 1.0 / self.get_simulated_runtime()
def median_progress_rate_speedup(self): """ Returns how fast the job would have run if all tasks had the median progress rate. """ total_median_progress_rate_runtime = 0 runtimes_for_combined_stages = [] for id, stage in self.stages.iteritems(): median_rate_runtimes = stage.task_runtimes_with_median_progress_rate() if id in self.stages_to_combine: runtimes_for_combined_stages.extend(median_rate_runtimes) else: total_median_progress_rate_runtime += simulate.simulate(median_rate_runtimes)[0] if len(runtimes_for_combined_stages) > 0: total_median_progress_rate_runtime += simulate.simulate(runtimes_for_combined_stages)[0] return total_median_progress_rate_runtime * 1.0 / self.get_simulated_runtime()