Example #1
0
  def write_stage_info(self, query_id, prefix):
    f = open("%s_stage_info" % prefix, "a")
    last_stage_runtime = -1
    last_stage_finish_time = 0
    for stage in self.stages.values():
      # This is a hack! Count the most recent stage with runtime > 1s as the "last".
      # Shark produces 1-2 very short stages at the end that do not seem to do anything (and
      # certainly aren't doing the output write we're trying to account for).
      if (stage.finish_time() - stage.start_time) > 1000 and stage.finish_time() > last_stage_finish_time:
        last_stage_finish_time = stage.finish_time()
        last_stage_runtime = stage.finish_time() - stage.start_time

    f.write("%s\t%s\t%s\n" % (query_id, last_stage_runtime, self.original_runtime()))
    f.close()
Example #2
0
  def write_stage_info(self, query_id, prefix):
    f = open("%s_stage_info" % prefix, "a")
    last_stage_runtime = -1
    last_stage_finish_time = 0
    for stage in self.stages.values():
      # This is a hack! Count the most recent stage with runtime > 1s as the "last".
      # Shark produces 1-2 very short stages at the end that do not seem to do anything (and
      # certainly aren't doing the output write we're trying to account for).
      if (stage.finish_time() - stage.start_time) > 1000 and stage.finish_time() > last_stage_finish_time:
        last_stage_finish_time = stage.finish_time()
        last_stage_runtime = stage.finish_time() - stage.start_time

    f.write("%s\t%s\t%s\n" % (query_id, last_stage_runtime, self.original_runtime()))
    f.close()
Example #3
0
  def replace_stragglers_with_median_speedup(self, threshold_fn):
    """ Returns how much faster the job would have run if there were no stragglers.

    For each stage, passes the list of task runtimes into threshold_fn, which should
    return a threshold runtime. Then, replaces all task runtimes greater than the given
    threshold with the median runtime.

    For example, to replace the tasks with the longest 5% of runtimes with the median:
      self.replace_stragglers_with_median_speedup(lambda runtimes: numpy.percentile(runtimes, 95)
    """
    self.print_heading("Computing speedup from replacing straggler tasks with median")
    total_no_stragglers_runtime = 0
    start_and_runtimes_for_combined_stages = []
    original_start_and_runtimes_for_combined_stages = []
    num_stragglers_combined_stages = 0
    for id, stage in self.stages.iteritems():
      runtimes = [task.runtime() for task in stage.tasks]
      median_runtime = numpy.percentile(runtimes, 50)
      threshold_runtime = threshold_fn(runtimes)
      no_straggler_start_and_runtimes = []
      num_stragglers = 0
      sorted_stage_tasks = sorted(stage.tasks, key = lambda t: t.runtime())
      for task in sorted_stage_tasks:
        if task.runtime() >= threshold_runtime:
          assert(median_runtime <= task.runtime())
          no_straggler_start_and_runtimes.append((task.start_time, median_runtime))
          num_stragglers += 1 
        else:
          no_straggler_start_and_runtimes.append((task.start_time, task.runtime()))
      if id in self.stages_to_combine:
        start_and_runtimes_for_combined_stages.extend(no_straggler_start_and_runtimes)
        original_start_and_runtimes_for_combined_stages.extend(
          [(t.start_time, t.runtime()) for t in stage.tasks])
        num_stragglers_combined_stages += num_stragglers
      else:
        max_concurrency = concurrency.get_max_concurrency(stage.tasks)
        no_stragglers_runtime = simulate.simulate(
          [x[1] for x in no_straggler_start_and_runtimes], max_concurrency)[0]
        total_no_stragglers_runtime += no_stragglers_runtime
        original_runtime = simulate.simulate(
          [task.runtime() for task in sorted_stage_tasks], max_concurrency)[0]
        print ("%s: Original: %s, Orig (sim): %s, no stragg: %s (%s stragglers)" %
          (id, stage.finish_time() - stage.start_time, original_runtime, no_stragglers_runtime,
           num_stragglers))
    if len(start_and_runtimes_for_combined_stages) > 0:
      original_start_time = min([x[0] for x in start_and_runtimes_for_combined_stages])
      original_finish_time = max([x[0] + x[1] for x in start_and_runtimes_for_combined_stages])
      start_and_runtimes_for_combined_stages.sort()
      runtimes_for_combined_stages = [x[1] for x in start_and_runtimes_for_combined_stages]
      new_runtime = simulate.simulate(
        runtimes_for_combined_stages, self.combined_stages_concurrency)[0]
      original_runtime = simulate.simulate(
        [x[1] for x in sorted(original_start_and_runtimes_for_combined_stages)],
        self.combined_stages_concurrency)[0]
      print ("Combined: Original: %s, Orig (sim): %s, no stragg: %s (%s stragglers)" %
        (original_finish_time - original_start_time, original_runtime, new_runtime,
         num_stragglers_combined_stages))
      total_no_stragglers_runtime += new_runtime
    return total_no_stragglers_runtime * 1.0 / self.get_simulated_runtime()
Example #4
0
  def replace_stragglers_with_median_speedup(self, threshold_fn):
    """ Returns how much faster the job would have run if there were no stragglers.

    For each stage, passes the list of task runtimes into threshold_fn, which should
    return a threshold runtime. Then, replaces all task runtimes greater than the given
    threshold with the median runtime.

    For example, to replace the tasks with the longest 5% of runtimes with the median:
      self.replace_stragglers_with_median_speedup(lambda runtimes: numpy.percentile(runtimes, 95)
    """
    self.print_heading("Computing speedup from replacing straggler tasks with median")
    total_no_stragglers_runtime = 0
    start_and_runtimes_for_combined_stages = []
    original_start_and_runtimes_for_combined_stages = []
    num_stragglers_combined_stages = 0
    for id, stage in self.stages.iteritems():
      runtimes = [task.runtime() for task in stage.tasks]
      median_runtime = numpy.percentile(runtimes, 50)
      threshold_runtime = threshold_fn(runtimes)
      no_straggler_start_and_runtimes = []
      num_stragglers = 0
      sorted_stage_tasks = sorted(stage.tasks, key = lambda t: t.runtime())
      for task in sorted_stage_tasks:
        if task.runtime() >= threshold_runtime:
          assert(median_runtime <= task.runtime())
          no_straggler_start_and_runtimes.append((task.start_time, median_runtime))
          num_stragglers += 1 
        else:
          no_straggler_start_and_runtimes.append((task.start_time, task.runtime()))
      if id in self.stages_to_combine:
        start_and_runtimes_for_combined_stages.extend(no_straggler_start_and_runtimes)
        original_start_and_runtimes_for_combined_stages.extend(
          [(t.start_time, t.runtime()) for t in stage.tasks])
        num_stragglers_combined_stages += num_stragglers
      else:
        max_concurrency = concurrency.get_max_concurrency(stage.tasks)
        no_stragglers_runtime = simulate.simulate(
          [x[1] for x in no_straggler_start_and_runtimes], max_concurrency)[0]
        total_no_stragglers_runtime += no_stragglers_runtime
        original_runtime = simulate.simulate(
          [task.runtime() for task in sorted_stage_tasks], max_concurrency)[0]
        print ("%s: Original: %s, Orig (sim): %s, no stragg: %s (%s stragglers)" %
          (id, stage.finish_time() - stage.start_time, original_runtime, no_stragglers_runtime,
           num_stragglers))
    if len(start_and_runtimes_for_combined_stages) > 0:
      original_start_time = min([x[0] for x in start_and_runtimes_for_combined_stages])
      original_finish_time = max([x[0] + x[1] for x in start_and_runtimes_for_combined_stages])
      start_and_runtimes_for_combined_stages.sort()
      runtimes_for_combined_stages = [x[1] for x in start_and_runtimes_for_combined_stages]
      new_runtime = simulate.simulate(
        runtimes_for_combined_stages, self.combined_stages_concurrency)[0]
      original_runtime = simulate.simulate(
        [x[1] for x in sorted(original_start_and_runtimes_for_combined_stages)],
        self.combined_stages_concurrency)[0]
      print ("Combined: Original: %s, Orig (sim): %s, no stragg: %s (%s stragglers)" %
        (original_finish_time - original_start_time, original_runtime, new_runtime,
         num_stragglers_combined_stages))
      total_no_stragglers_runtime += new_runtime
    return total_no_stragglers_runtime * 1.0 / self.get_simulated_runtime()
Example #5
0
  def no_stragglers_perfect_parallelism_speedup(self):
    """ Returns how fast the job would have run if time were perfectly spread across 32 slots. """
    ideal_runtime = 0
    total_runtime_combined_stages = 0
    for id, stage in self.stages.iteritems():
      if id in self.stages_to_combine:
        total_runtime_combined_stages += stage.total_runtime()
      else:
        new_runtime = float(stage.total_runtime()) / concurrency.get_max_concurrency(stage.tasks)
        print "New runtime: %s, original runtime: %s" % (new_runtime, stage.finish_time() - stage.start_time)
        ideal_runtime += new_runtime


    print "Total runtime combined: %s (concurrency %d" % (total_runtime_combined_stages, self.combined_stages_concurrency)
    ideal_runtime += float(total_runtime_combined_stages) / self.combined_stages_concurrency
    print "Getting simulated runtime"
    simulated_actual_runtime = self.get_simulated_runtime()
    print "Ideal runtime for all: %s, simulated: %s" % (ideal_runtime, simulated_actual_runtime)
    return ideal_runtime / simulated_actual_runtime
Example #6
0
  def no_stragglers_perfect_parallelism_speedup(self):
    """ Returns how fast the job would have run if time were perfectly spread across 32 slots. """
    ideal_runtime = 0
    total_runtime_combined_stages = 0
    for id, stage in self.stages.iteritems():
      if id in self.stages_to_combine:
        total_runtime_combined_stages += stage.total_runtime()
      else:
        new_runtime = float(stage.total_runtime()) / concurrency.get_max_concurrency(stage.tasks)
        print "New runtime: %s, original runtime: %s" % (new_runtime, stage.finish_time() - stage.start_time)
        ideal_runtime += new_runtime


    print "Total runtime combined: %s (concurrency %d" % (total_runtime_combined_stages, self.combined_stages_concurrency)
    ideal_runtime += float(total_runtime_combined_stages) / self.combined_stages_concurrency
    print "Getting simulated runtime"
    simulated_actual_runtime = self.get_simulated_runtime()
    print "Ideal runtime for all: %s, simulated: %s" % (ideal_runtime, simulated_actual_runtime)
    return ideal_runtime / simulated_actual_runtime