def fraction_time_scheduler_delay(self): """ Of the total time spent across all machines in the cluster, what fraction of time was spent waiting on the scheduler?""" total_scheduler_delay = 0 total_runtime = 0 for id, stage in self.stages.iteritems(): total_scheduler_delay += sum([t.scheduler_delay for t in stage.tasks]) total_runtime += stage.total_runtime() return total_scheduler_delay * 1.0 / total_runtime
def no_stragglers_perfect_parallelism_speedup(self): """ Returns how fast the job would have run if time were perfectly spread across 32 slots. """ ideal_runtime = 0 total_runtime_combined_stages = 0 for id, stage in self.stages.iteritems(): if id in self.stages_to_combine: total_runtime_combined_stages += stage.total_runtime() else: new_runtime = float(stage.total_runtime()) / concurrency.get_max_concurrency(stage.tasks) print "New runtime: %s, original runtime: %s" % (new_runtime, stage.finish_time() - stage.start_time) ideal_runtime += new_runtime print "Total runtime combined: %s (concurrency %d" % (total_runtime_combined_stages, self.combined_stages_concurrency) ideal_runtime += float(total_runtime_combined_stages) / self.combined_stages_concurrency print "Getting simulated runtime" simulated_actual_runtime = self.get_simulated_runtime() print "Ideal runtime for all: %s, simulated: %s" % (ideal_runtime, simulated_actual_runtime) return ideal_runtime / simulated_actual_runtime
def fraction_time_waiting_on_shuffle_read(self): """ Of the total time spent across all machines in the cluster, what fraction of time was spent waiting on the network? """ total_fetch_wait = 0 # This is just used as a sanity check: total_runtime_no_shuffle_read + total_fetch_wait # should equal total_runtime. total_runtime_no_shuffle_read = 0 total_runtime = 0 for id, stage in self.stages.iteritems(): total_fetch_wait += stage.total_fetch_wait() total_runtime_no_shuffle_read += stage.total_runtime_no_remote_shuffle_read() total_runtime += stage.total_runtime() assert(total_runtime == total_fetch_wait + total_runtime_no_shuffle_read) return total_fetch_wait * 1.0 / total_runtime