def make_cdfs_for_performance_model(self, prefix): """ Writes plot files to create CDFS of the compute / network / disk rate. """ all_tasks = self.all_tasks() compute_rates = [task.compute_time() * 1.0 / task.input_data for task in all_tasks] write_cdf(compute_rates, "%s_compute_rate_cdf" % prefix) network_rates = [task.compute_time() * 1.0 / task.input_data for task in all_tasks] write_cdf(network_rates, "%s_network_rate_cdf" % prefix) write_rates = [task.shuffle_write_time * 1.0 / task.shuffle_mb_written for task in all_tasks] write_cdf(write_rates, "%s_write_rate_cdf" % prefix)
def fraction_time_serializing(self): """ Returns the fraction of time spent serializing and deserializing data. """ total_serialize_time = 0 total_runtime = 0 for stage in self.stages.values(): for task in stage.tasks: serialize_time = task.estimated_serialization_millis + task.estimated_deserialization_millis if (serialize_time > task.compute_time()): print ("!!!! Warning: For task %s, serialize time (%s) is larger than compute time (%s)" % (task, serialize_time, task.compute_time())) total_serialize_time += serialize_time total_runtime += task.runtime() return total_serialize_time * 1.0 / total_runtime
def fraction_time_computing(self): total_compute_time = 0 total_runtime = 0 for stage in self.stages.values(): for task in stage.tasks: total_compute_time += task.compute_time() total_runtime += task.runtime() return total_compute_time * 1.0 / total_runtime