Esempio n. 1
0
def RunBenchmark(spec, collector):
    """Runs a single benchmark and adds the results to the collector.

  Args:
    spec: The BenchmarkSpec object with run information.
    collector: The SampleCollector object to add samples to.
  """
    spec.status = benchmark_status.FAILED
    # Modify the logger prompt for messages logged within this function.
    label_extension = '{}({}/{})'.format(spec.name, spec.sequence_number,
                                         spec.total_benchmarks)
    context.SetThreadBenchmarkSpec(spec)
    log_context = log_util.GetThreadLogContext()
    with log_context.ExtendLabel(label_extension):
        with spec.RedirectGlobalFlags():
            end_to_end_timer = timing_util.IntervalTimer()
            detailed_timer = timing_util.IntervalTimer()
            try:
                with end_to_end_timer.Measure('End to End'):
                    if stages.PROVISION in FLAGS.run_stage:
                        DoProvisionPhase(spec, detailed_timer)

                    if stages.PREPARE in FLAGS.run_stage:
                        DoPreparePhase(spec, detailed_timer)

                    if stages.RUN in FLAGS.run_stage:
                        DoRunPhase(spec, collector, detailed_timer)

                    if stages.CLEANUP in FLAGS.run_stage:
                        DoCleanupPhase(spec, detailed_timer)

                    if stages.TEARDOWN in FLAGS.run_stage:
                        DoTeardownPhase(spec, detailed_timer)

                # Add timing samples.
                if (FLAGS.run_stage == stages.STAGES
                        and timing_util.EndToEndRuntimeMeasurementEnabled()):
                    collector.AddSamples(end_to_end_timer.GenerateSamples(),
                                         spec.name, spec)
                if timing_util.RuntimeMeasurementsEnabled():
                    collector.AddSamples(detailed_timer.GenerateSamples(),
                                         spec.name, spec)

            except:
                # Resource cleanup (below) can take a long time. Log the error to give
                # immediate feedback, then re-throw.
                logging.exception('Error during benchmark %s', spec.name)
                # If the particular benchmark requests us to always call cleanup, do it
                # here.
                if stages.CLEANUP in FLAGS.run_stage and spec.always_call_cleanup:
                    DoCleanupPhase(spec, detailed_timer)
                raise
            finally:
                if stages.TEARDOWN in FLAGS.run_stage:
                    spec.Delete()
                events.benchmark_end.send(benchmark_spec=spec)
                # Pickle spec to save final resource state.
                spec.Pickle()
    spec.status = benchmark_status.SUCCEEDED
    def testBackgroundWorkloadSpec(self):
        """ Check that the benchmark spec calls the prepare, stop, and start
    methods on the vms """

        with mock_flags.PatchFlags() as mocked_flags:
            self.setupCommonFlags(mocked_flags)
            mocked_flags.background_cpu_threads = 1
            collector = mock.MagicMock()
            config = configs.LoadConfig(ping_benchmark.BENCHMARK_CONFIG, {},
                                        NAME)
            spec = benchmark_spec.BenchmarkSpec(config, NAME, UID)
            vm0 = mock.MagicMock()
            vm1 = mock.MagicMock()
            spec.ConstructVirtualMachines()
            spec.vms = [vm0, vm1]
            timer = timing_util.IntervalTimer()
            pkb.DoPreparePhase(ping_benchmark, NAME, spec, timer)
            for vm in spec.vms:
                self.assertEqual(vm.PrepareBackgroundWorkload.call_count, 1)

            with mock.patch(ping_benchmark.__name__ + '.Run'):
                ping_benchmark.Run.side_effect = functools.partial(
                    self._CheckAndIncrement, expected_last_call=1)
                vm0.StartBackgroundWorkload.side_effect = functools.partial(
                    self._CheckAndIncrement, expected_last_call=0)
                vm0.StopBackgroundWorkload.side_effect = functools.partial(
                    self._CheckAndIncrement, expected_last_call=2)
                pkb.DoRunPhase(ping_benchmark, NAME, spec, collector, timer)
                self.assertEqual(ping_benchmark.Run.call_count, 1)
                for vm in spec.vms:
                    self.assertEqual(vm.StartBackgroundWorkload.call_count, 1)
                    self.assertEqual(vm.StopBackgroundWorkload.call_count, 1)
Esempio n. 3
0
 def testGenerateSamplesMeasureNotCalled(self):
   """GenerateSamples should return an empty list if Measure was not called."""
   timer = timing_util.IntervalTimer()
   self.assertEqual(timer.intervals, [])
   samples = timer.GenerateSamples()
   self.assertEqual(timer.intervals, [])
   self.assertEqual(samples, [])
  def testBackgroundWorkloadSpec(self):
    """ Check that the benchmark spec calls the prepare, stop, and start
    methods on the vms """

    config = configs.LoadConfig(ping_benchmark.BENCHMARK_CONFIG, {}, NAME)
    config_spec = benchmark_config_spec.BenchmarkConfigSpec(
        NAME, flag_values=self.mocked_flags, **config)
    spec = benchmark_spec.BenchmarkSpec(config_spec, NAME, UID)
    vm0 = mock.MagicMock()
    vm1 = mock.MagicMock()
    spec.ConstructVirtualMachines()
    spec.vms = [vm0, vm1]
    timer = timing_util.IntervalTimer()
    pkb.DoPreparePhase(ping_benchmark, NAME, spec, timer)
    for vm in spec.vms:
      self.assertEqual(vm.PrepareBackgroundWorkload.call_count, 1)

    with mock.patch(ping_benchmark.__name__ + '.Run'):
      vm0.StopBackgroundWorkload.side_effect = functools.partial(
          self._CheckAndIncrement, expected_last_call=0)
      pkb.DoCleanupPhase(ping_benchmark, NAME, spec, timer)
      for vm in spec.vms:
        self.assertEqual(vm.StartBackgroundWorkload.call_count, 1)
        self.assertEqual(vm.StopBackgroundWorkload.call_count, 1)
        self.assertEqual(vm.PrepareBackgroundWorkload.call_count, 1)
 def testGenerateSamplesNoRuntimeNoTimestamps(self):
     """No samples when include_runtime and include_timestamps are False."""
     timer = timing_util.IntervalTimer()
     with timer.Measure('First Interval'):
         pass
     with timer.Measure('Second Interval'):
         pass
     samples = timer.GenerateSamples(include_runtime=False,
                                     include_timestamps=False)
     self.assertEqual(samples, [])
Esempio n. 6
0
 def testGenerateSamplesRuntimeNoTimestamps(self):
   """Test generating runtime sample but no timestamp samples."""
   timer = timing_util.IntervalTimer()
   with timer.Measure('First'):
     pass
   with timer.Measure('Second'):
     pass
   start0 = timer.intervals[0][1]
   stop0 = timer.intervals[0][2]
   start1 = timer.intervals[1][1]
   stop1 = timer.intervals[1][2]
   samples = timer.GenerateSamples()
   exp_samples = [
       sample.Sample('First Runtime', stop0 - start0, 'seconds'),
       sample.Sample('Second Runtime', stop1 - start1, 'seconds')]
   self.assertSampleListsEqualUpToTimestamp(samples, exp_samples)
 def testGenerateSamplesTimestampsNoRuntime(self):
     """Test generating timestamp samples but no runtime sample."""
     timer = timing_util.IntervalTimer()
     with timer.Measure('First'):
         pass
     with timer.Measure('Second'):
         pass
     start0 = timer.intervals[0][1]
     stop0 = timer.intervals[0][2]
     start1 = timer.intervals[1][1]
     stop1 = timer.intervals[1][2]
     samples = timer.GenerateSamples(include_runtime=False,
                                     include_timestamps=True)
     exp_samples = [
         sample.Sample('First Start Timestamp', start0, 'seconds'),
         sample.Sample('First Stop Timestamp', stop0, 'seconds'),
         sample.Sample('Second Start Timestamp', start1, 'seconds'),
         sample.Sample('Second Stop Timestamp', stop1, 'seconds')
     ]
     self.assertSampleListsEqualUpToTimestamp(samples, exp_samples)
Esempio n. 8
0
 def testGenerateSamplesRuntimeAndTimestamps(self):
     """Test generating both runtime and timestamp samples."""
     timer = timing_util.IntervalTimer()
     with timer.Measure('First'):
         pass
     with timer.Measure('Second'):
         pass
     start0 = timer.intervals[0][1]
     stop0 = timer.intervals[0][2]
     start1 = timer.intervals[1][1]
     stop1 = timer.intervals[1][2]
     samples = timer.GenerateSamples(include_runtime=True,
                                     include_timestamps=True)
     exp_samples = [
         sample.Sample('First Runtime', stop0 - start0, 'seconds'),
         sample.Sample('First Start Timestamp', start0, 'seconds'),
         sample.Sample('First Stop Timestamp', stop0, 'seconds'),
         sample.Sample('Second Runtime', stop1 - start1, 'seconds'),
         sample.Sample('Second Start Timestamp', start1, 'seconds'),
         sample.Sample('Second Stop Timestamp', stop1, 'seconds')
     ]
     self.assertEqual(samples, exp_samples)
Esempio n. 9
0
 def testMeasureNested(self):
   """Verify correct interval tuple generation in nested measurements."""
   timer = timing_util.IntervalTimer()
   self.assertEqual(timer.intervals, [])
   with timer.Measure('Outer Interval'):
     with timer.Measure('Inner Interval'):
       pass
   self.assertEqual(len(timer.intervals), 2)
   inner_interval = timer.intervals[0]
   self.assertEqual(len(inner_interval), 3)
   inner_name = inner_interval[0]
   inner_start = inner_interval[1]
   inner_stop = inner_interval[2]
   self.assertEqual(inner_name, 'Inner Interval')
   outer_interval = timer.intervals[1]
   self.assertEqual(len(outer_interval), 3)
   outer_name = outer_interval[0]
   outer_start = outer_interval[1]
   outer_stop = outer_interval[2]
   self.assertEqual(outer_name, 'Outer Interval')
   self.assertLessEqual(outer_start, inner_start)
   self.assertLessEqual(inner_start, inner_stop)
   self.assertLessEqual(inner_stop, outer_stop)
Esempio n. 10
0
 def testGenerateSamplesRuntimeAndTimestamps(self):
   """Test generating both runtime and timestamp samples."""
   timer = timing_util.IntervalTimer()
   with timer.Measure('First'):
     pass
   with timer.Measure('Second'):
     pass
   start0 = timer.intervals[0][1]
   stop0 = timer.intervals[0][2]
   start1 = timer.intervals[1][1]
   stop1 = timer.intervals[1][2]
   with mock.patch(
       'perfkitbenchmarker.timing_util.TimestampMeasurementsEnabled',
       return_value=True):
     samples = timer.GenerateSamples()
   exp_samples = [
       sample.Sample('First Runtime', stop0 - start0, 'seconds'),
       sample.Sample('First Start Timestamp', start0, 'seconds'),
       sample.Sample('First Stop Timestamp', stop0, 'seconds'),
       sample.Sample('Second Runtime', stop1 - start1, 'seconds'),
       sample.Sample('Second Start Timestamp', start1, 'seconds'),
       sample.Sample('Second Stop Timestamp', stop1, 'seconds')]
   self.assertSampleListsEqualUpToTimestamp(samples, exp_samples)
Esempio n. 11
0
 def testMeasureSequential(self):
   """Verify correct interval tuple generation in sequential measurements."""
   timer = timing_util.IntervalTimer()
   self.assertEqual(timer.intervals, [])
   with timer.Measure('First Interval'):
     pass
   with timer.Measure('Second Interval'):
     pass
   self.assertEqual(len(timer.intervals), 2)
   first_interval = timer.intervals[0]
   self.assertEqual(len(first_interval), 3)
   first_name = first_interval[0]
   first_start = first_interval[1]
   first_stop = first_interval[2]
   self.assertEqual(first_name, 'First Interval')
   second_interval = timer.intervals[1]
   self.assertEqual(len(second_interval), 3)
   second_name = second_interval[0]
   second_start = second_interval[1]
   second_stop = second_interval[2]
   self.assertEqual(second_name, 'Second Interval')
   self.assertLessEqual(first_start, first_stop)
   self.assertLessEqual(first_stop, second_start)
   self.assertLessEqual(second_start, second_stop)
Esempio n. 12
0
def RunBenchmark(benchmark, collector, sequence_number, total_benchmarks):
    """Runs a single benchmark and adds the results to the collector.

  Args:
    benchmark: The benchmark module to be run.
    collector: The SampleCollector object to add samples to.
    sequence_number: The sequence number of when the benchmark was started
      relative to the other benchmarks in the suite.
    total_benchmarks: The total number of benchmarks in the suite.
  """
    benchmark_info = benchmark.GetInfo()
    if not ValidateBenchmarkInfo(benchmark_info):
        return
    benchmark_name = benchmark_info['name']

    # Modify the logger prompt for messages logged within this function.
    label_extension = '{}({}/{})'.format(benchmark_name, sequence_number,
                                         total_benchmarks)
    log_context = log_util.GetThreadLogContext()
    with log_context.ExtendLabel(label_extension):
        # Optional prerequisite checking.
        check_prereqs = getattr(benchmark, 'CheckPrerequisites', None)
        if check_prereqs:
            try:
                check_prereqs()
            except:
                logging.exception('Prerequisite check failed for %s',
                                  benchmark_name)
                raise

        end_to_end_timer = timing_util.IntervalTimer()
        detailed_timer = timing_util.IntervalTimer()
        spec = None
        try:
            with end_to_end_timer.Measure('End to End'):
                if FLAGS.run_stage in [STAGE_ALL, STAGE_PREPARE]:
                    # It is important to create the spec outside of DoPreparePhase
                    # because if DoPreparePhase raises an exception, we still need
                    # a reference to the spec in order to delete it in the "finally"
                    # section below.
                    spec = benchmark_spec.BenchmarkSpec(benchmark_info)
                    DoPreparePhase(benchmark, benchmark_name, spec,
                                   detailed_timer)
                else:
                    spec = benchmark_spec.BenchmarkSpec.GetSpecFromFile(
                        benchmark_name)

                if FLAGS.run_stage in [STAGE_ALL, STAGE_RUN]:
                    DoRunPhase(benchmark, benchmark_name, spec, collector,
                               detailed_timer)

                if FLAGS.run_stage in [STAGE_ALL, STAGE_CLEANUP]:
                    DoCleanupPhase(benchmark, benchmark_name, spec,
                                   detailed_timer)

            # Add samples for any timed interval that was measured.
            include_end_to_end = timing_util.EndToEndRuntimeMeasurementEnabled(
            )
            include_runtimes = timing_util.RuntimeMeasurementsEnabled()
            include_timestamps = timing_util.TimestampMeasurementsEnabled()
            if FLAGS.run_stage == STAGE_ALL:
                collector.AddSamples(
                    end_to_end_timer.GenerateSamples(
                        include_runtime=include_end_to_end or include_runtimes,
                        include_timestamps=include_timestamps), benchmark_name,
                    spec)
            collector.AddSamples(
                detailed_timer.GenerateSamples(include_runtimes,
                                               include_timestamps),
                benchmark_name, spec)

        except Exception:
            # Resource cleanup (below) can take a long time. Log the error to give
            # immediate feedback, then re-throw.
            logging.exception('Error during benchmark %s', benchmark_name)
            # If the particular benchmark requests us to always call cleanup, do it
            # here.
            if (FLAGS.run_stage in [STAGE_ALL, STAGE_CLEANUP] and spec
                    and spec.always_call_cleanup):
                DoCleanupPhase(benchmark, benchmark_name, spec, detailed_timer)
            raise
        finally:
            if spec:
                if FLAGS.run_stage in [STAGE_ALL, STAGE_CLEANUP]:
                    spec.Delete()
                # Pickle spec to save final resource state.
                spec.PickleSpec()
Esempio n. 13
0
def RunBenchmark(spec, collector):
  """Runs a single benchmark and adds the results to the collector.

  Args:
    spec: The BenchmarkSpec object with run information.
    collector: The SampleCollector object to add samples to.
  """

  # Since there are issues with the handling SIGINT/KeyboardInterrupt (see
  # further dicussion in _BackgroundProcessTaskManager) this mechanism is
  # provided for defense in depth to force skip pending runs after SIGINT.
  for f in _SKIP_PENDING_RUNS_CHECKS:
    if f():
      logging.warning('Skipping benchmark.')
      return

  spec.status = benchmark_status.FAILED
  current_run_stage = stages.PROVISION
  # Modify the logger prompt for messages logged within this function.
  label_extension = '{}({}/{})'.format(
      spec.name, spec.sequence_number, spec.total_benchmarks)
  context.SetThreadBenchmarkSpec(spec)
  log_context = log_util.GetThreadLogContext()
  with log_context.ExtendLabel(label_extension):
    with spec.RedirectGlobalFlags():
      end_to_end_timer = timing_util.IntervalTimer()
      detailed_timer = timing_util.IntervalTimer()
      try:
        with end_to_end_timer.Measure('End to End'):
          if stages.PROVISION in FLAGS.run_stage:
            DoProvisionPhase(spec, detailed_timer)

          if stages.PREPARE in FLAGS.run_stage:
            current_run_stage = stages.PREPARE
            DoPreparePhase(spec, detailed_timer)

          if stages.RUN in FLAGS.run_stage:
            current_run_stage = stages.RUN
            DoRunPhase(spec, collector, detailed_timer)

          if stages.CLEANUP in FLAGS.run_stage:
            current_run_stage = stages.CLEANUP
            DoCleanupPhase(spec, detailed_timer)

          if stages.TEARDOWN in FLAGS.run_stage:
            current_run_stage = stages.TEARDOWN
            DoTeardownPhase(spec, detailed_timer)

        # Add timing samples.
        if (FLAGS.run_stage == stages.STAGES and
            timing_util.EndToEndRuntimeMeasurementEnabled()):
          collector.AddSamples(
              end_to_end_timer.GenerateSamples(), spec.name, spec)
        if timing_util.RuntimeMeasurementsEnabled():
          collector.AddSamples(
              detailed_timer.GenerateSamples(), spec.name, spec)

        # Add resource related samples.
        collector.AddSamples(spec.GetSamples(), spec.name, spec)

      except Exception as e:
        # Log specific type of failure, if known
        # TODO(dlott) Move to exception chaining with Python3 support
        if (isinstance(e, errors.Benchmarks.InsufficientCapacityCloudFailure)
            or 'InsufficientCapacityCloudFailure' in str(e)):
          spec.failed_substatus = (
              benchmark_status.FailedSubstatus.INSUFFICIENT_CAPACITY)
          spec.status_detail = str(e)
        elif (isinstance(e, errors.Benchmarks.QuotaFailure)
              or 'QuotaFailure' in str(e)):
          spec.failed_substatus = benchmark_status.FailedSubstatus.QUOTA
          spec.status_detail = str(e)

        # Resource cleanup (below) can take a long time. Log the error to give
        # immediate feedback, then re-throw.
        logging.exception('Error during benchmark %s', spec.name)
        if FLAGS.create_failed_run_samples:
          collector.AddSamples(MakeFailedRunSample(spec, str(e),
                                                   current_run_stage),
                               spec.name,
                               spec)
        # If the particular benchmark requests us to always call cleanup, do it
        # here.
        if stages.CLEANUP in FLAGS.run_stage and spec.always_call_cleanup:
          DoCleanupPhase(spec, detailed_timer)
        raise
      finally:
        # Deleting resources should happen first so any errors with publishing
        # don't prevent teardown.
        if stages.TEARDOWN in FLAGS.run_stage:
          spec.Delete()
        if FLAGS.publish_after_run:
          collector.PublishSamples()
        events.benchmark_end.send(benchmark_spec=spec)
        # Pickle spec to save final resource state.
        spec.Pickle()
  spec.status = benchmark_status.SUCCEEDED
Esempio n. 14
0
def RunBenchmark(benchmark, sequence_number, total_benchmarks,
                 benchmark_config, benchmark_uid, collector):
    """Runs a single benchmark and adds the results to the collector.

  Args:
    benchmark: The benchmark module to be run.
    sequence_number: The sequence number of when the benchmark was started
        relative to the other benchmarks in the suite.
    total_benchmarks: The total number of benchmarks in the suite.
    benchmark_config: BenchmarkConfigSpec. The config to run the benchmark with.
    benchmark_uid: An identifier unique to this run of the benchmark even
        if the same benchmark is run multiple times with different configs.
    collector: The SampleCollector object to add samples to.
  """
    benchmark_name = benchmark.BENCHMARK_NAME

    # Modify the logger prompt for messages logged within this function.
    label_extension = '{}({}/{})'.format(benchmark_name, sequence_number,
                                         total_benchmarks)
    log_context = log_util.GetThreadLogContext()
    with log_context.ExtendLabel(label_extension):
        spec = _GetBenchmarkSpec(benchmark_config, benchmark_name,
                                 benchmark_uid)
        with spec.RedirectGlobalFlags():
            end_to_end_timer = timing_util.IntervalTimer()
            detailed_timer = timing_util.IntervalTimer()
            try:
                with end_to_end_timer.Measure('End to End'):
                    if stages.PROVISION in FLAGS.run_stage:
                        DoProvisionPhase(benchmark_name, spec, detailed_timer)

                    if stages.PREPARE in FLAGS.run_stage:
                        DoPreparePhase(benchmark, benchmark_name, spec,
                                       detailed_timer)

                    if stages.RUN in FLAGS.run_stage:
                        DoRunPhase(benchmark, benchmark_name, spec, collector,
                                   detailed_timer)

                    if stages.CLEANUP in FLAGS.run_stage:
                        DoCleanupPhase(benchmark, benchmark_name, spec,
                                       detailed_timer)

                    if stages.TEARDOWN in FLAGS.run_stage:
                        DoTeardownPhase(benchmark_name, spec, detailed_timer)

                # Add samples for any timed interval that was measured.
                include_end_to_end = timing_util.EndToEndRuntimeMeasurementEnabled(
                )
                include_runtimes = timing_util.RuntimeMeasurementsEnabled()
                include_timestamps = timing_util.TimestampMeasurementsEnabled()
                if FLAGS.run_stage == stages.STAGES:
                    # Ran all stages.
                    collector.AddSamples(
                        end_to_end_timer.GenerateSamples(
                            include_runtime=include_end_to_end
                            or include_runtimes,
                            include_timestamps=include_timestamps),
                        benchmark_name, spec)
                collector.AddSamples(
                    detailed_timer.GenerateSamples(include_runtimes,
                                                   include_timestamps),
                    benchmark_name, spec)

            except:
                # Resource cleanup (below) can take a long time. Log the error to give
                # immediate feedback, then re-throw.
                logging.exception('Error during benchmark %s', benchmark_name)
                # If the particular benchmark requests us to always call cleanup, do it
                # here.
                if stages.CLEANUP in FLAGS.run_stage and spec.always_call_cleanup:
                    DoCleanupPhase(benchmark, benchmark_name, spec,
                                   detailed_timer)
                raise
            finally:
                if stages.TEARDOWN in FLAGS.run_stage:
                    spec.Delete()
                events.benchmark_end.send(benchmark_spec=spec)
                # Pickle spec to save final resource state.
                spec.PickleSpec()
Esempio n. 15
0
def RunBenchmark(benchmark, collector, sequence_number, total_benchmarks,
                 benchmark_config, benchmark_uid):
    """Runs a single benchmark and adds the results to the collector.

  Args:
    benchmark: The benchmark module to be run.
    collector: The SampleCollector object to add samples to.
    sequence_number: The sequence number of when the benchmark was started
      relative to the other benchmarks in the suite.
    total_benchmarks: The total number of benchmarks in the suite.
    benchmark_config: The config to run the benchmark with.
    benchmark_uid: An identifier unique to this run of the benchmark even
      if the same benchmark is run multiple times with different configs.
  """
    benchmark_name = benchmark.BENCHMARK_NAME

    # Modify the logger prompt for messages logged within this function.
    label_extension = '{}({}/{})'.format(benchmark_name, sequence_number,
                                         total_benchmarks)
    log_context = log_util.GetThreadLogContext()
    with log_context.ExtendLabel(label_extension):
        # Optional prerequisite checking.
        check_prereqs = getattr(benchmark, 'CheckPrerequisites', None)
        if check_prereqs:
            try:
                check_prereqs()
            except:
                logging.exception('Prerequisite check failed for %s',
                                  benchmark_name)
                raise

        end_to_end_timer = timing_util.IntervalTimer()
        detailed_timer = timing_util.IntervalTimer()
        spec = None
        try:
            with end_to_end_timer.Measure('End to End'):
                if FLAGS.run_stage in [STAGE_ALL, STAGE_PROVISION]:
                    # It is important to create the spec outside of DoProvisionPhase
                    # because if DoPreparePhase raises an exception, we still need
                    # a reference to the spec in order to delete it in the "finally"
                    # section below.
                    spec = benchmark_spec.BenchmarkSpec(
                        benchmark_config, benchmark_name, benchmark_uid)
                    spec.ConstructVirtualMachines()
                    DoProvisionPhase(benchmark_name, spec, detailed_timer)
                else:
                    try:
                        spec = benchmark_spec.BenchmarkSpec.GetSpecFromFile(
                            benchmark_uid)
                    except IOError:
                        if FLAGS.run_stage == STAGE_PREPARE:
                            logging.error(
                                'We were unable to load the BenchmarkSpec. This may be '
                                'related to two additional run stages which have recently '
                                'been added. Please make sure to run the stage "provision" '
                                'before "prepare". Similarly, make sure to run "teardown" '
                                'after "cleanup".')
                        raise

                if FLAGS.run_stage in [STAGE_ALL, STAGE_PREPARE]:
                    DoPreparePhase(benchmark, benchmark_name, spec,
                                   detailed_timer)

                if FLAGS.run_stage in [STAGE_ALL, STAGE_RUN]:
                    DoRunPhase(benchmark, benchmark_name, spec, collector,
                               detailed_timer)

                if FLAGS.run_stage in [STAGE_ALL, STAGE_CLEANUP]:
                    DoCleanupPhase(benchmark, benchmark_name, spec,
                                   detailed_timer)

                if FLAGS.run_stage in [STAGE_ALL, STAGE_TEARDOWN]:
                    DoTeardownPhase(benchmark_name, spec, detailed_timer)

            # Add samples for any timed interval that was measured.
            include_end_to_end = timing_util.EndToEndRuntimeMeasurementEnabled(
            )
            include_runtimes = timing_util.RuntimeMeasurementsEnabled()
            include_timestamps = timing_util.TimestampMeasurementsEnabled()
            if FLAGS.run_stage == STAGE_ALL:
                collector.AddSamples(
                    end_to_end_timer.GenerateSamples(
                        include_runtime=include_end_to_end or include_runtimes,
                        include_timestamps=include_timestamps), benchmark_name,
                    spec)
            collector.AddSamples(
                detailed_timer.GenerateSamples(include_runtimes,
                                               include_timestamps),
                benchmark_name, spec)

        except:
            # Resource cleanup (below) can take a long time. Log the error to give
            # immediate feedback, then re-throw.
            logging.exception('Error during benchmark %s', benchmark_name)
            # If the particular benchmark requests us to always call cleanup, do it
            # here.
            if (FLAGS.run_stage in [STAGE_ALL, STAGE_CLEANUP] and spec
                    and spec.always_call_cleanup):
                DoCleanupPhase(benchmark, benchmark_name, spec, detailed_timer)
            raise
        finally:
            if spec:
                if FLAGS.run_stage in [STAGE_ALL, STAGE_TEARDOWN]:
                    spec.Delete()
                # Pickle spec to save final resource state.
                spec.PickleSpec()