Ejemplo n.º 1
0
  def median_progress_rate_speedup(self, prefix):
    """ Returns how fast the job would have run if all tasks had the median progress rate. """
    total_median_progress_rate_runtime = 0
    runtimes_for_combined_stages = []
    all_start_finish_times = []
    for id, stage in self.stages.iteritems():
      median_rate_runtimes = stage.task_runtimes_with_median_progress_rate()
      if id in self.stages_to_combine:
        runtimes_for_combined_stages.extend(median_rate_runtimes)
      else:
        no_stragglers_runtime, start_finish_times = simulate.simulate(
          median_rate_runtimes, concurrency.get_max_concurrency(stage.tasks))
        start_finish_times_adjusted = [
          (start + total_median_progress_rate_runtime, finish + total_median_progress_rate_runtime) \
          for start, finish in start_finish_times]
        total_median_progress_rate_runtime += no_stragglers_runtime
        all_start_finish_times.append(start_finish_times_adjusted)
        print "No stragglers runtime: ", no_stragglers_runtime
        print "MAx concurrency: ", concurrency.get_max_concurrency(stage.tasks)

    if len(runtimes_for_combined_stages) > 0:
      no_stragglers_runtime, start_finish_times = simulate.simulate(
        runtimes_for_combined_stages, self.combined_stages_concurrency)
      start_finish_times_adjusted = [
        (start + total_median_progress_rate_runtime, finish + total_median_progress_rate_runtime) \
        for start, finish in start_finish_times]
      total_median_progress_rate_runtime += no_stragglers_runtime
      all_start_finish_times.append(start_finish_times_adjusted)

    self.write_simulated_waterfall(all_start_finish_times, "%s_sim_median_progress_rate" % prefix)
    return total_median_progress_rate_runtime * 1.0 / self.get_simulated_runtime()
Ejemplo n.º 2
0
  def get_simulated_runtime(self, waterfall_prefix=""):
    """ Returns the simulated runtime for the job.

    This should be approximately the same as the original runtime of the job, except
    that it doesn't include scheduler delay.

    If a non-empty waterfall_prefix is passed in, makes a waterfall plot based on the simulated
    runtimes.
    """
    total_runtime = 0
    tasks_for_combined_stages = []
    all_start_finish_times = []
    for id, stage in self.stages.iteritems():
      if id in self.stages_to_combine:
        tasks_for_combined_stages.extend(stage.tasks)
      else:
        tasks = sorted(stage.tasks, key = lambda task: task.start_time)
        simulated_runtime, start_finish_times = simulate.simulate(
          [t.runtime() for t in tasks], concurrency.get_max_concurrency(tasks))
        start_finish_times_adjusted = [
          (start + total_runtime, finish + total_runtime) for start, finish in start_finish_times]
        all_start_finish_times.append(start_finish_times_adjusted)
        total_runtime += simulated_runtime
    if len(tasks_for_combined_stages) > 0:
      tasks = sorted(tasks_for_combined_stages, key = lambda task: task.start_time)
      simulated_runtime, start_finish_times = simulate.simulate(
        [task.runtime() for task in tasks], self.combined_stages_concurrency)
      start_finish_times_adjusted = [
        (start - simulated_runtime, finish - simulated_runtime) for start, finish in start_finish_times]
      all_start_finish_times.append(start_finish_times_adjusted)
      total_runtime += simulated_runtime

    if waterfall_prefix:
      self.write_simulated_waterfall(all_start_finish_times, "%s_simulated" % waterfall_prefix)
    return total_runtime 
Ejemplo n.º 3
0
  def get_simulated_runtime(self, waterfall_prefix=""):
    """ Returns the simulated runtime for the job.

    This should be approximately the same as the original runtime of the job, except
    that it doesn't include scheduler delay.

    If a non-empty waterfall_prefix is passed in, makes a waterfall plot based on the simulated
    runtimes.
    """
    total_runtime = 0
    tasks_for_combined_stages = []
    all_start_finish_times = []
    for id, stage in self.stages.iteritems():
      if id in self.stages_to_combine:
        tasks_for_combined_stages.extend(stage.tasks)
      else:
        tasks = sorted(stage.tasks, key = lambda task: task.start_time)
        simulated_runtime, start_finish_times = simulate.simulate(
          [t.runtime() for t in tasks], concurrency.get_max_concurrency(tasks))
        start_finish_times_adjusted = [
          (start + total_runtime, finish + total_runtime) for start, finish in start_finish_times]
        all_start_finish_times.append(start_finish_times_adjusted)
        total_runtime += simulated_runtime
    if len(tasks_for_combined_stages) > 0:
      tasks = sorted(tasks_for_combined_stages, key = lambda task: task.start_time)
      simulated_runtime, start_finish_times = simulate.simulate(
        [task.runtime() for task in tasks], self.combined_stages_concurrency)
      start_finish_times_adjusted = [
        (start - simulated_runtime, finish - simulated_runtime) for start, finish in start_finish_times]
      all_start_finish_times.append(start_finish_times_adjusted)
      total_runtime += simulated_runtime

    if waterfall_prefix:
      self.write_simulated_waterfall(all_start_finish_times, "%s_simulated" % waterfall_prefix)
    return total_runtime 
Ejemplo n.º 4
0
 def __str__(self):
     max_task_runtime = max([t.runtime() for t in self.tasks])
     if self.tasks[0].has_fetch:
         input_method = "shuffle"
     else:
         input_method = self.tasks[0].input_read_method
     return ((
         "%s tasks (avg runtime: %s, max runtime: %s) Start: %s, runtime: %s, "
         "Max concurrency: %s, "
         "Input MB: %s (from %s), Output MB: %s, Straggers: %s, Progress rate straggers: %s, "
         "Progress rate stragglers explained by scheduler delay (%s), HDFS read (%s), "
         "HDFS and read (%s), GC (%s), Network (%s), JIT (%s), output rate stragglers: %s"
     ) % (
         len(self.tasks),
         self.average_task_runtime(),
         max_task_runtime,
         self.start_time,
         self.finish_time() - self.start_time,
         concurrency.get_max_concurrency(self.tasks),
         self.input_mb(),
         input_method,
         self.output_mb(),
         self.traditional_stragglers(),
         self.progress_rate_stragglers()[0],
         self.scheduler_delay_stragglers()[0],
         self.hdfs_read_stragglers()[0],
         self.hdfs_read_and_scheduler_delay_stragglers()[0],
         self.gc_stragglers()[0],
         # Do not compute the JIT stragglers here! Screws up the calculation.
         self.network_stragglers()[0],
         -1,
         self.output_progress_rate_stragglers()[0]))
Ejemplo n.º 5
0
  def replace_stragglers_with_median_speedup(self, threshold_fn):
    """ Returns how much faster the job would have run if there were no stragglers.

    For each stage, passes the list of task runtimes into threshold_fn, which should
    return a threshold runtime. Then, replaces all task runtimes greater than the given
    threshold with the median runtime.

    For example, to replace the tasks with the longest 5% of runtimes with the median:
      self.replace_stragglers_with_median_speedup(lambda runtimes: numpy.percentile(runtimes, 95)
    """
    self.print_heading("Computing speedup from replacing straggler tasks with median")
    total_no_stragglers_runtime = 0
    start_and_runtimes_for_combined_stages = []
    original_start_and_runtimes_for_combined_stages = []
    num_stragglers_combined_stages = 0
    for id, stage in self.stages.iteritems():
      runtimes = [task.runtime() for task in stage.tasks]
      median_runtime = numpy.percentile(runtimes, 50)
      threshold_runtime = threshold_fn(runtimes)
      no_straggler_start_and_runtimes = []
      num_stragglers = 0
      sorted_stage_tasks = sorted(stage.tasks, key = lambda t: t.runtime())
      for task in sorted_stage_tasks:
        if task.runtime() >= threshold_runtime:
          assert(median_runtime <= task.runtime())
          no_straggler_start_and_runtimes.append((task.start_time, median_runtime))
          num_stragglers += 1 
        else:
          no_straggler_start_and_runtimes.append((task.start_time, task.runtime()))
      if id in self.stages_to_combine:
        start_and_runtimes_for_combined_stages.extend(no_straggler_start_and_runtimes)
        original_start_and_runtimes_for_combined_stages.extend(
          [(t.start_time, t.runtime()) for t in stage.tasks])
        num_stragglers_combined_stages += num_stragglers
      else:
        max_concurrency = concurrency.get_max_concurrency(stage.tasks)
        no_stragglers_runtime = simulate.simulate(
          [x[1] for x in no_straggler_start_and_runtimes], max_concurrency)[0]
        total_no_stragglers_runtime += no_stragglers_runtime
        original_runtime = simulate.simulate(
          [task.runtime() for task in sorted_stage_tasks], max_concurrency)[0]
        print ("%s: Original: %s, Orig (sim): %s, no stragg: %s (%s stragglers)" %
          (id, stage.finish_time() - stage.start_time, original_runtime, no_stragglers_runtime,
           num_stragglers))
    if len(start_and_runtimes_for_combined_stages) > 0:
      original_start_time = min([x[0] for x in start_and_runtimes_for_combined_stages])
      original_finish_time = max([x[0] + x[1] for x in start_and_runtimes_for_combined_stages])
      start_and_runtimes_for_combined_stages.sort()
      runtimes_for_combined_stages = [x[1] for x in start_and_runtimes_for_combined_stages]
      new_runtime = simulate.simulate(
        runtimes_for_combined_stages, self.combined_stages_concurrency)[0]
      original_runtime = simulate.simulate(
        [x[1] for x in sorted(original_start_and_runtimes_for_combined_stages)],
        self.combined_stages_concurrency)[0]
      print ("Combined: Original: %s, Orig (sim): %s, no stragg: %s (%s stragglers)" %
        (original_finish_time - original_start_time, original_runtime, new_runtime,
         num_stragglers_combined_stages))
      total_no_stragglers_runtime += new_runtime
    return total_no_stragglers_runtime * 1.0 / self.get_simulated_runtime()
Ejemplo n.º 6
0
  def replace_stragglers_with_median_speedup(self, threshold_fn):
    """ Returns how much faster the job would have run if there were no stragglers.

    For each stage, passes the list of task runtimes into threshold_fn, which should
    return a threshold runtime. Then, replaces all task runtimes greater than the given
    threshold with the median runtime.

    For example, to replace the tasks with the longest 5% of runtimes with the median:
      self.replace_stragglers_with_median_speedup(lambda runtimes: numpy.percentile(runtimes, 95)
    """
    self.print_heading("Computing speedup from replacing straggler tasks with median")
    total_no_stragglers_runtime = 0
    start_and_runtimes_for_combined_stages = []
    original_start_and_runtimes_for_combined_stages = []
    num_stragglers_combined_stages = 0
    for id, stage in self.stages.iteritems():
      runtimes = [task.runtime() for task in stage.tasks]
      median_runtime = numpy.percentile(runtimes, 50)
      threshold_runtime = threshold_fn(runtimes)
      no_straggler_start_and_runtimes = []
      num_stragglers = 0
      sorted_stage_tasks = sorted(stage.tasks, key = lambda t: t.runtime())
      for task in sorted_stage_tasks:
        if task.runtime() >= threshold_runtime:
          assert(median_runtime <= task.runtime())
          no_straggler_start_and_runtimes.append((task.start_time, median_runtime))
          num_stragglers += 1 
        else:
          no_straggler_start_and_runtimes.append((task.start_time, task.runtime()))
      if id in self.stages_to_combine:
        start_and_runtimes_for_combined_stages.extend(no_straggler_start_and_runtimes)
        original_start_and_runtimes_for_combined_stages.extend(
          [(t.start_time, t.runtime()) for t in stage.tasks])
        num_stragglers_combined_stages += num_stragglers
      else:
        max_concurrency = concurrency.get_max_concurrency(stage.tasks)
        no_stragglers_runtime = simulate.simulate(
          [x[1] for x in no_straggler_start_and_runtimes], max_concurrency)[0]
        total_no_stragglers_runtime += no_stragglers_runtime
        original_runtime = simulate.simulate(
          [task.runtime() for task in sorted_stage_tasks], max_concurrency)[0]
        print ("%s: Original: %s, Orig (sim): %s, no stragg: %s (%s stragglers)" %
          (id, stage.finish_time() - stage.start_time, original_runtime, no_stragglers_runtime,
           num_stragglers))
    if len(start_and_runtimes_for_combined_stages) > 0:
      original_start_time = min([x[0] for x in start_and_runtimes_for_combined_stages])
      original_finish_time = max([x[0] + x[1] for x in start_and_runtimes_for_combined_stages])
      start_and_runtimes_for_combined_stages.sort()
      runtimes_for_combined_stages = [x[1] for x in start_and_runtimes_for_combined_stages]
      new_runtime = simulate.simulate(
        runtimes_for_combined_stages, self.combined_stages_concurrency)[0]
      original_runtime = simulate.simulate(
        [x[1] for x in sorted(original_start_and_runtimes_for_combined_stages)],
        self.combined_stages_concurrency)[0]
      print ("Combined: Original: %s, Orig (sim): %s, no stragg: %s (%s stragglers)" %
        (original_finish_time - original_start_time, original_runtime, new_runtime,
         num_stragglers_combined_stages))
      total_no_stragglers_runtime += new_runtime
    return total_no_stragglers_runtime * 1.0 / self.get_simulated_runtime()
Ejemplo n.º 7
0
    def add_tasks_to_totals(unsorted_tasks):
      # Sort the tasks by the start time, not the finish time -- otherwise the longest tasks
      # end up getting run last, which can artificially inflate job completion time.
      tasks = sorted(unsorted_tasks, key = lambda task: task.start_time)
      max_concurrency = concurrency.get_max_concurrency(tasks)

      # Get the runtime for the stage
      task_runtimes = [compute_base_runtime(task) for task in tasks]
      base_runtime = simulate.simulate(task_runtimes, max_concurrency)[0]
      total_time[0] += base_runtime

      faster_runtimes = [compute_faster_runtime(task) for task in tasks]
      faster_runtime = simulate.simulate(faster_runtimes, max_concurrency)[0]
      total_faster_time[0] += faster_runtime
      print "Base: %s, faster: %s" % (base_runtime, faster_runtime)
Ejemplo n.º 8
0
    def add_tasks_to_totals(unsorted_tasks):
      # Sort the tasks by the start time, not the finish time -- otherwise the longest tasks
      # end up getting run last, which can artificially inflate job completion time.
      tasks = sorted(unsorted_tasks, key = lambda task: task.start_time)
      max_concurrency = concurrency.get_max_concurrency(tasks)

      # Get the runtime for the stage
      task_runtimes = [compute_base_runtime(task) for task in tasks]
      base_runtime = simulate.simulate(task_runtimes, max_concurrency)[0]
      total_time[0] += base_runtime

      faster_runtimes = [compute_faster_runtime(task) for task in tasks]
      faster_runtime = simulate.simulate(faster_runtimes, max_concurrency)[0]
      total_faster_time[0] += faster_runtime
      print "Base: %s, faster: %s" % (base_runtime, faster_runtime)
Ejemplo n.º 9
0
  def initialize_job(self, time, initialTime):
    """ Should be called after adding all events to the job. """
    # Drop empty stages.
    self.submittingTime = time
    stages_to_drop = []
    for id, s in self.stages.iteritems():
      if len(s.tasks) == 0:
        stages_to_drop.append(id)
    for id in stages_to_drop:
      print "Dropping stage %s" % id
      del self.stages[id]

    # Compute the amount of overlapped time between stages
    # (there should just be two stages, at the beginning, that overlap and run concurrently).
    # This computation assumes that not more than two stages overlap.
    #print "  ", ["%s: %s tasks" % (id, len(s.tasks)) for id, s in self.stages.iteritems()]
    start_and_finish_times = [(id, s.start_time, s.conservative_finish_time())
        for id, s in self.stages.iteritems()]
    start_and_finish_times.sort(key = lambda x: x[1])
    self.overlap = 0
    old_end = 0
    previous_id = ""
    self.stages_to_combine = set()
    ts = 0.0
    for id, start, finish in start_and_finish_times:
      print "id, submission, start, finish, runTime, duration"
      print id, self.submittingTime-initialTime, start-initialTime, finish-initialTime, finish-start, finish-self.submittingTime
      return finish-self.submittingTime
      if start < old_end:
        self.overlap += old_end - start
        print "   Overlap:", self.overlap, "between ", id, "and", previous_id
        self.stages_to_combine.add(id)
        self.stages_to_combine.add(previous_id)
        old_end = max(old_end, finish)
      if finish > old_end:
        old_end = finish
        previous_id = id

#    print "Stages to combine: ", self.stages_to_combine

    self.combined_stages_concurrency = -1
    if len(self.stages_to_combine) > 0:
      tasks_for_combined_stages = []
      for stage_id in self.stages_to_combine:
        tasks_for_combined_stages.extend(self.stages[stage_id].tasks)
      self.combined_stages_concurrency = concurrency.get_max_concurrency(tasks_for_combined_stages)
Ejemplo n.º 10
0
    def initialize_job(self):
        """ Should be called after adding all events to the job. """
        # Drop empty stages.
        stages_to_drop = []
        for id, s in self.stages.iteritems():
            if len(s.tasks) == 0:
                stages_to_drop.append(id)
        for id in stages_to_drop:
            print "Dropping stage %s" % id
            del self.stages[id]

        # Compute the amount of overlapped time between stages
        # (there should just be two stages, at the beginning, that overlap and run concurrently).
        # This computation assumes that not more than two stages overlap.
        print[
            "%s: %s tasks" % (id, len(s.tasks))
            for id, s in self.stages.iteritems()
        ]
        start_and_finish_times = [(id, s.start_time,
                                   s.conservative_finish_time())
                                  for id, s in self.stages.iteritems()]
        start_and_finish_times.sort(key=lambda x: x[1])
        self.overlap = 0
        old_end = 0
        previous_id = ""
        self.stages_to_combine = set()
        for id, start, finish in start_and_finish_times:
            if start < old_end:
                self.overlap += old_end - start
                print "Overlap:", self.overlap, "between ", id, "and", previous_id
                self.stages_to_combine.add(id)
                self.stages_to_combine.add(previous_id)
                old_end = max(old_end, finish)
            if finish > old_end:
                old_end = finish
                previous_id = id

        print "Stages to combine: ", self.stages_to_combine

        self.combined_stages_concurrency = -1
        if len(self.stages_to_combine) > 0:
            tasks_for_combined_stages = []
            for stage_id in self.stages_to_combine:
                tasks_for_combined_stages.extend(self.stages[stage_id].tasks)
            self.combined_stages_concurrency = concurrency.get_max_concurrency(
                tasks_for_combined_stages)
Ejemplo n.º 11
0
 def __str__(self):
   max_task_runtime = max([t.runtime() for t in self.tasks])
   if self.tasks[0].has_fetch:
     input_method = "shuffle"
   else:
     input_method = self.tasks[0].input_read_method
   return (("%s tasks (avg runtime: %s, max runtime: %s) Start: %s, runtime: %s, "
     "Max concurrency: %s, "
     "Input MB: %s (from %s), Output MB: %s, Straggers: %s, Progress rate straggers: %s, "
     "Progress rate stragglers explained by scheduler delay (%s), HDFS read (%s), "
     "HDFS and read (%s), GC (%s), Network (%s), JIT (%s), output rate stragglers: %s") %
     (len(self.tasks), self.average_task_runtime(), max_task_runtime, self.start_time,
      self.finish_time() - self.start_time, concurrency.get_max_concurrency(self.tasks),
      self.input_mb(), input_method, self.output_mb(),
      self.traditional_stragglers(), self.progress_rate_stragglers()[0],
      self.scheduler_delay_stragglers()[0], self.hdfs_read_stragglers()[0],
      self.hdfs_read_and_scheduler_delay_stragglers()[0], self.gc_stragglers()[0],
      self.network_stragglers()[0], self.jit_stragglers()[0],
      self.output_progress_rate_stragglers()[0]))
Ejemplo n.º 12
0
  def no_stragglers_perfect_parallelism_speedup(self):
    """ Returns how fast the job would have run if time were perfectly spread across 32 slots. """
    ideal_runtime = 0
    total_runtime_combined_stages = 0
    for id, stage in self.stages.iteritems():
      if id in self.stages_to_combine:
        total_runtime_combined_stages += stage.total_runtime()
      else:
        new_runtime = float(stage.total_runtime()) / concurrency.get_max_concurrency(stage.tasks)
        print "New runtime: %s, original runtime: %s" % (new_runtime, stage.finish_time() - stage.start_time)
        ideal_runtime += new_runtime


    print "Total runtime combined: %s (concurrency %d" % (total_runtime_combined_stages, self.combined_stages_concurrency)
    ideal_runtime += float(total_runtime_combined_stages) / self.combined_stages_concurrency
    print "Getting simulated runtime"
    simulated_actual_runtime = self.get_simulated_runtime()
    print "Ideal runtime for all: %s, simulated: %s" % (ideal_runtime, simulated_actual_runtime)
    return ideal_runtime / simulated_actual_runtime
Ejemplo n.º 13
0
  def no_stragglers_perfect_parallelism_speedup(self):
    """ Returns how fast the job would have run if time were perfectly spread across 32 slots. """
    ideal_runtime = 0
    total_runtime_combined_stages = 0
    for id, stage in self.stages.iteritems():
      if id in self.stages_to_combine:
        total_runtime_combined_stages += stage.total_runtime()
      else:
        new_runtime = float(stage.total_runtime()) / concurrency.get_max_concurrency(stage.tasks)
        print "New runtime: %s, original runtime: %s" % (new_runtime, stage.finish_time() - stage.start_time)
        ideal_runtime += new_runtime


    print "Total runtime combined: %s (concurrency %d" % (total_runtime_combined_stages, self.combined_stages_concurrency)
    ideal_runtime += float(total_runtime_combined_stages) / self.combined_stages_concurrency
    print "Getting simulated runtime"
    simulated_actual_runtime = self.get_simulated_runtime()
    print "Ideal runtime for all: %s, simulated: %s" % (ideal_runtime, simulated_actual_runtime)
    return ideal_runtime / simulated_actual_runtime
Ejemplo n.º 14
0
    def replace_all_tasks_with_average_speedup(self, prefix):
        """ Returns how much faster the job would have run if there were no stragglers.

    Eliminates stragglers by replacing each task's runtime with the average runtime
    for tasks in the job.
    """
        self.print_heading("Computing speedup by averaging out stragglers")
        total_no_stragglers_runtime = 0
        averaged_runtimes_for_combined_stages = []
        all_start_finish_times = []
        for id, stage in self.stages.iteritems():
            averaged_runtimes = [stage.average_task_runtime()] * len(
                stage.tasks)
            if id in self.stages_to_combine:
                averaged_runtimes_for_combined_stages.extend(averaged_runtimes)
            else:
                no_stragglers_runtime, start_finish_times = simulate.simulate(
                    averaged_runtimes,
                    concurrency.get_max_concurrency(stage.tasks))
                # Adjust the start and finish times based on when the stage staged.
                start_finish_times_adjusted = [
                  (start + total_no_stragglers_runtime, finish + total_no_stragglers_runtime) \
                  for start, finish in start_finish_times]
                total_no_stragglers_runtime += no_stragglers_runtime
                all_start_finish_times.append(start_finish_times_adjusted)
        if len(averaged_runtimes_for_combined_stages) > 0:
            no_stragglers_runtime, start_finish_times = simulate.simulate(
                averaged_runtimes_for_combined_stages,
                self.combined_stages_concurrency)
            # Adjust the start and finish times based on when the stage staged.
            # The subtraction is a hack to put the combined stages at the beginning, which
            # is when they usually occur.
            start_finish_times_adjusted = [
                (start - no_stragglers_runtime, finish - no_stragglers_runtime)
                for start, finish in start_finish_times
            ]
            total_no_stragglers_runtime += no_stragglers_runtime
            all_start_finish_times.append(start_finish_times_adjusted)

        self.write_simulated_waterfall(all_start_finish_times,
                                       "%s_sim_no_stragglers" % prefix)
        return total_no_stragglers_runtime * 1.0 / self.get_simulated_runtime()
Ejemplo n.º 15
0
  def replace_all_tasks_with_median_speedup(self):
    """ Returns how much faster the job would have run if there were no stragglers.

    Removes stragglers by replacing all task runtimes with the median runtime for tasks in the
    stage.
    """
    total_no_stragglers_runtime = 0
    runtimes_for_combined_stages = []
    for id, stage in self.stages.iteritems():
      runtimes = [task.runtime() for task in stage.tasks]
      median_runtime = numpy.median(runtimes)
      no_straggler_runtimes = [numpy.median(runtimes)] * len(stage.tasks)
      if id in self.stages_to_combine:
        runtimes_for_combined_stages.extend(no_straggler_runtimes)
      else:
        total_no_stragglers_runtime += simulate.simulate(
          no_straggler_runtimes, concurrency.get_max_concurrency(stage.tasks))[0]
    if len(runtimes_for_combined_stages) > 0:
      total_no_stragglers_runtime += simulate.simulate(
        runtimes_for_combined_stages, self.combined_stages_concurrency)[0]
    return total_no_stragglers_runtime * 1.0 / self.get_simulated_runtime()
Ejemplo n.º 16
0
  def replace_all_tasks_with_median_speedup(self):
    """ Returns how much faster the job would have run if there were no stragglers.

    Removes stragglers by replacing all task runtimes with the median runtime for tasks in the
    stage.
    """
    total_no_stragglers_runtime = 0
    runtimes_for_combined_stages = []
    for id, stage in self.stages.iteritems():
      runtimes = [task.runtime() for task in stage.tasks]
      median_runtime = numpy.median(runtimes)
      no_straggler_runtimes = [numpy.median(runtimes)] * len(stage.tasks)
      if id in self.stages_to_combine:
        runtimes_for_combined_stages.extend(no_straggler_runtimes)
      else:
        total_no_stragglers_runtime += simulate.simulate(
          no_straggler_runtimes, concurrency.get_max_concurrency(stage.tasks))[0]
    if len(runtimes_for_combined_stages) > 0:
      total_no_stragglers_runtime += simulate.simulate(
        runtimes_for_combined_stages, self.combined_stages_concurrency)[0]
    return total_no_stragglers_runtime * 1.0 / self.get_simulated_runtime()
Ejemplo n.º 17
0
  def replace_all_tasks_with_average_speedup(self, prefix):
    """ Returns how much faster the job would have run if there were no stragglers.

    Eliminates stragglers by replacing each task's runtime with the average runtime
    for tasks in the job.
    """
    self.print_heading("Computing speedup by averaging out stragglers")
    total_no_stragglers_runtime = 0
    averaged_runtimes_for_combined_stages = []
    all_start_finish_times = []
    for id, stage in self.stages.iteritems():
      averaged_runtimes = [stage.average_task_runtime()] * len(stage.tasks)
      if id in self.stages_to_combine:
        averaged_runtimes_for_combined_stages.extend(averaged_runtimes) 
      else:
        no_stragglers_runtime, start_finish_times = simulate.simulate(
          averaged_runtimes, concurrency.get_max_concurrency(stage.tasks))
        # Adjust the start and finish times based on when the stage staged.
        start_finish_times_adjusted = [
          (start + total_no_stragglers_runtime, finish + total_no_stragglers_runtime) \
          for start, finish in start_finish_times]
        total_no_stragglers_runtime += no_stragglers_runtime
        all_start_finish_times.append(start_finish_times_adjusted)
    if len(averaged_runtimes_for_combined_stages) > 0:
      no_stragglers_runtime, start_finish_times = simulate.simulate(
        averaged_runtimes_for_combined_stages, self.combined_stages_concurrency)
      # Adjust the start and finish times based on when the stage staged.
      # The subtraction is a hack to put the combined stages at the beginning, which
      # is when they usually occur.
      start_finish_times_adjusted = [
        (start - no_stragglers_runtime, finish - no_stragglers_runtime) for start, finish in start_finish_times]
      total_no_stragglers_runtime += no_stragglers_runtime
      all_start_finish_times.append(start_finish_times_adjusted)

    self.write_simulated_waterfall(all_start_finish_times, "%s_sim_no_stragglers" % prefix)
    return total_no_stragglers_runtime * 1.0 / self.get_simulated_runtime()
Ejemplo n.º 18
0
        print "Overlap:", self.overlap, "between ", id, "and", previous_id
        self.stages_to_combine.add(id)
        self.stages_to_combine.add(previous_id)
        old_end = max(old_end, finish)
      if finish > old_end:
        old_end = finish
        previous_id = id

    print "Stages to combine: ", self.stages_to_combine

    self.combined_stages_concurrency = -1
    if len(self.stages_to_combine) > 0:
      tasks_for_combined_stages = []
      for stage_id in self.stages_to_combine:
        tasks_for_combined_stages.extend(self.stages[stage_id].tasks)
      self.combined_stages_concurrency = concurrency.get_max_concurrency(tasks_for_combined_stages)

  def all_tasks(self):
    """ Returns a list of all tasks. """
    return [task for stage in self.stages.values() for task in stage.tasks]

  def print_stage_info(self):
    for id, stage in self.stages.iteritems():
      print "STAGE %s: %s" % (id, stage.verbose_str())

  def print_heading(self, text):
    print "\n******** %s ********" % text

  def get_simulated_runtime(self, waterfall_prefix=""):
    """ Returns the simulated runtime for the job.