Example #1
0
    def test_basic_sampler(self):
        # Set up state sampler.
        counter_factory = CounterFactory()
        sampler = statesampler.StateSampler('basic-',
                                            counter_factory,
                                            sampling_period_ms=1)

        # Run basic workload transitioning between 3 states.
        sampler.start()
        with sampler.scoped_state('statea'):
            time.sleep(0.1)
            with sampler.scoped_state('stateb'):
                time.sleep(0.2 / 2)
                with sampler.scoped_state('statec'):
                    time.sleep(0.3)
                time.sleep(0.2 / 2)
        sampler.stop()
        sampler.commit_counters()

        # Test that sampled state timings are close to their expected values.
        expected_counter_values = {
            'basic-statea-msecs': 100,
            'basic-stateb-msecs': 200,
            'basic-statec-msecs': 300,
        }
        for counter in counter_factory.get_counters():
            self.assertIn(counter.name, expected_counter_values)
            expected_value = expected_counter_values[counter.name]
            actual_value = counter.value()
            self.assertGreater(actual_value, expected_value * 0.75)
            self.assertLess(actual_value, expected_value * 1.25)
  def test_basic_sampler(self):
    # Set up state sampler.
    counter_factory = CounterFactory()
    sampler = statesampler.StateSampler('basic', counter_factory,
                                        sampling_period_ms=1)

    # Run basic workload transitioning between 3 states.
    sampler.start()
    with sampler.scoped_state('statea'):
      time.sleep(0.1)
      with sampler.scoped_state('stateb'):
        time.sleep(0.2 / 2)
        with sampler.scoped_state('statec'):
          time.sleep(0.3)
        time.sleep(0.2 / 2)
    sampler.stop()
    sampler.commit_counters()

    # Test that sampled state timings are close to their expected values.
    expected_counter_values = {
        'basic-statea-msecs': 100,
        'basic-stateb-msecs': 200,
        'basic-statec-msecs': 300,
    }
    for counter in counter_factory.get_counters():
      self.assertIn(counter.name, expected_counter_values)
      expected_value = expected_counter_values[counter.name]
      actual_value = counter.value()
      self.assertGreater(actual_value, expected_value * 0.75)
      self.assertLess(actual_value, expected_value * 1.25)
Example #3
0
  def test_basic_sampler(self):
    # Set up state sampler.
    counter_factory = CounterFactory()
    sampler = statesampler.StateSampler(
        'basic', counter_factory, sampling_period_ms=1)

    # Duration of the fastest state. Total test duration is 6 times longer.
    state_duration_ms = 1000
    margin_of_error = 0.25
    # Run basic workload transitioning between 3 states.
    sampler.start()
    with sampler.scoped_state('step1', 'statea'):
      time.sleep(state_duration_ms / 1000)
      self.assertEqual(
          sampler.current_state().name,
          CounterName('statea-msecs', step_name='step1', stage_name='basic'))
      with sampler.scoped_state('step1', 'stateb'):
        time.sleep(state_duration_ms / 1000)
        self.assertEqual(
            sampler.current_state().name,
            CounterName('stateb-msecs', step_name='step1', stage_name='basic'))
        with sampler.scoped_state('step1', 'statec'):
          time.sleep(3 * state_duration_ms / 1000)
          self.assertEqual(
              sampler.current_state().name,
              CounterName(
                  'statec-msecs', step_name='step1', stage_name='basic'))
        time.sleep(state_duration_ms / 1000)

    sampler.stop()
    sampler.commit_counters()

    if not statesampler.FAST_SAMPLER:
      # The slow sampler does not implement sampling, so we won't test it.
      return

    # Test that sampled state timings are close to their expected values.
    # yapf: disable
    expected_counter_values = {
        CounterName('statea-msecs', step_name='step1', stage_name='basic'):
            state_duration_ms,
        CounterName('stateb-msecs', step_name='step1', stage_name='basic'): 2 *
        state_duration_ms,
        CounterName('statec-msecs', step_name='step1', stage_name='basic'): 3 *
        state_duration_ms,
    }
    # yapf: enable
    for counter in counter_factory.get_counters():
      self.assertIn(counter.name, expected_counter_values)
      expected_value = expected_counter_values[counter.name]
      actual_value = counter.value()
      deviation = float(abs(actual_value - expected_value)) / expected_value
      _LOGGER.info('Sampling deviation from expectation: %f', deviation)
      self.assertGreater(actual_value, expected_value * (1.0 - margin_of_error))
      self.assertLess(actual_value, expected_value * (1.0 + margin_of_error))
Example #4
0
  def test_basic_sampler(self):
    # Set up state sampler.
    counter_factory = CounterFactory()
    sampler = statesampler.StateSampler('basic', counter_factory,
                                        sampling_period_ms=1)

    # Run basic workload transitioning between 3 states.
    sampler.start()
    with sampler.scoped_state('step1', 'statea'):
      time.sleep(0.1)
      self.assertEqual(
          sampler.current_state().name,
          CounterName(
              'statea-msecs', step_name='step1', stage_name='basic'))
      with sampler.scoped_state('step1', 'stateb'):
        time.sleep(0.2 / 2)
        self.assertEqual(
            sampler.current_state().name,
            CounterName(
                'stateb-msecs', step_name='step1', stage_name='basic'))
        with sampler.scoped_state('step1', 'statec'):
          time.sleep(0.3)
          self.assertEqual(
              sampler.current_state().name,
              CounterName(
                  'statec-msecs', step_name='step1', stage_name='basic'))
        time.sleep(0.2 / 2)

    sampler.stop()
    sampler.commit_counters()

    if not statesampler.FAST_SAMPLER:
      # The slow sampler does not implement sampling, so we won't test it.
      return

    # Test that sampled state timings are close to their expected values.
    expected_counter_values = {
        CounterName('statea-msecs', step_name='step1', stage_name='basic'): 100,
        CounterName('stateb-msecs', step_name='step1', stage_name='basic'): 200,
        CounterName('statec-msecs', step_name='step1', stage_name='basic'): 300,
    }
    for counter in counter_factory.get_counters():
      self.assertIn(counter.name, expected_counter_values)
      expected_value = expected_counter_values[counter.name]
      actual_value = counter.value()
      deviation = float(abs(actual_value - expected_value)) / expected_value
      logging.info('Sampling deviation from expectation: %f', deviation)
      self.assertGreater(actual_value, expected_value * 0.75)
      self.assertLess(actual_value, expected_value * 1.25)
Example #5
0
    def test_should_sample(self):
        # Order of magnitude more buckets than highest constant in code under test.
        buckets = [0] * 300
        # The seed is arbitrary and exists just to ensure this test is robust.
        # If you don't like this seed, try your own; the test should still pass.
        random.seed(1720)
        # Do enough runs that the expected hits even in the last buckets
        # is big enough to expect some statistical smoothing.
        total_runs = 10 * len(buckets)

        # Fill the buckets.
        for _ in range(total_runs):
            opcounts = OperationCounters(CounterFactory(), 'some-name',
                                         coders.PickleCoder(), 0)
            for i in range(len(buckets)):
                if opcounts.should_sample():
                    buckets[i] += 1

        # Look at the buckets to see if they are likely.
        for i in range(10):
            self.assertEqual(total_runs, buckets[i])
        for i in range(10, len(buckets)):
            self.assertTrue(
                buckets[i] > 7 * total_runs / i,
                'i=%d, buckets[i]=%d, expected=%d, ratio=%f' %
                (i, buckets[i], 10 * total_runs / i, buckets[i] /
                 (10.0 * total_runs / i)))
            self.assertTrue(
                buckets[i] < 14 * total_runs / i,
                'i=%d, buckets[i]=%d, expected=%d, ratio=%f' %
                (i, buckets[i], 10 * total_runs / i, buckets[i] /
                 (10.0 * total_runs / i)))
Example #6
0
    def test_sampler_transition_overhead(self):
        # Set up state sampler.
        counter_factory = CounterFactory()
        sampler = statesampler.StateSampler('overhead-',
                                            counter_factory,
                                            sampling_period_ms=10)

        # Run basic workload transitioning between 3 states.
        state_a = sampler.scoped_state('step1', 'statea')
        state_b = sampler.scoped_state('step1', 'stateb')
        state_c = sampler.scoped_state('step1', 'statec')
        start_time = time.time()
        sampler.start()
        for _ in range(100000):
            with state_a:
                with state_b:
                    for _ in range(10):
                        with state_c:
                            pass
        sampler.stop()
        elapsed_time = time.time() - start_time
        state_transition_count = sampler.get_info().transition_count
        overhead_us = 1000000.0 * elapsed_time / state_transition_count

        # TODO: This test is flaky when it is run under load. A better solution
        # would be to change the test structure to not depend on specific timings.
        overhead_us = 2 * overhead_us

        _LOGGER.info('Overhead per transition: %fus', overhead_us)
        # Conservative upper bound on overhead in microseconds (we expect this to
        # take 0.17us when compiled in opt mode or 0.48 us when compiled with in
        # debug mode).
        self.assertLess(overhead_us, 10.0)
Example #7
0
  def test_sampler_transition_overhead(self):
    # Set up state sampler.
    counter_factory = CounterFactory()
    sampler = statesampler.StateSampler('overhead-', counter_factory,
                                        sampling_period_ms=10)

    # Run basic workload transitioning between 3 states.
    state_a = sampler.scoped_state('step1', 'statea')
    state_b = sampler.scoped_state('step1', 'stateb')
    state_c = sampler.scoped_state('step1', 'statec')
    start_time = time.time()
    sampler.start()
    for _ in range(100000):
      with state_a:
        with state_b:
          for _ in range(10):
            with state_c:
              pass
    sampler.stop()
    elapsed_time = time.time() - start_time
    state_transition_count = sampler.get_info().transition_count
    overhead_us = 1000000.0 * elapsed_time / state_transition_count
    logging.info('Overhead per transition: %fus', overhead_us)
    # Conservative upper bound on overhead in microseconds (we expect this to
    # take 0.17us when compiled in opt mode or 0.48 us when compiled with in
    # debug mode).
    self.assertLess(overhead_us, 10.0)
Example #8
0
 def test_update_int(self):
     opcounts = OperationCounters(CounterFactory(), 'some-name',
                                  coders.PickleCoder(), 0)
     self.verify_counters(opcounts, 0)
     opcounts.update_from(GlobalWindows.windowed_value(1))
     opcounts.update_collect()
     self.verify_counters(opcounts, 1)
Example #9
0
def run_benchmark(num_runs=50, input_per_source=4000, num_sources=4):
  print("Number of runs:", num_runs)
  print("Input size:", num_sources * input_per_source)
  print("Sources:", num_sources)

  times = []
  for i in range(num_runs):
    counter_factory = CounterFactory()
    state_sampler = statesampler.StateSampler('basic', counter_factory)
    with state_sampler.scoped_state('step1', 'state'):
      si_counter = opcounters.SideInputReadCounter(
          counter_factory, state_sampler, 'step1', 1)
      si_counter = opcounters.NoOpTransformIOCounter()
      sources = [
          FakeSource(long_generator(i, input_per_source))
          for i in range(num_sources)]
      iterator_fn = sideinputs.get_iterator_fn_for_sources(
          sources, read_counter=si_counter)
      start = time.time()
      list(iterator_fn())
      time_cost = time.time() - start
      times.append(time_cost)

  print("Runtimes:", times)

  avg_runtime = sum(times) // len(times)
  print("Average runtime:", avg_runtime)
  print("Time per element:", avg_runtime // (input_per_source *
                                             num_sources))
Example #10
0
 def execute_map_tasks(self, ordered_map_tasks):
     tt = time.time()
     for ix, (_, map_task) in enumerate(ordered_map_tasks):
         logging.info('Running %s', map_task)
         t = time.time()
         stage_names, all_operations = zip(*map_task)
         # TODO(robertwb): The DataflowRunner worker receives system step names
         # (e.g. "s3") that are used to label the output msec counters.  We use the
         # operation names here, but this is not the same scheme used by the
         # DataflowRunner; the result is that the output msec counters are named
         # differently.
         system_names = stage_names
         # Create the CounterFactory and StateSampler for this MapTask.
         # TODO(robertwb): Output counters produced here are currently ignored.
         counter_factory = CounterFactory()
         state_sampler = statesampler.StateSampler('%s-' % ix,
                                                   counter_factory)
         map_executor = operations.SimpleMapTaskExecutor(
             operation_specs.MapTask(all_operations, 'S%02d' % ix,
                                     system_names, stage_names,
                                     system_names), counter_factory,
             state_sampler)
         self.executors.append(map_executor)
         map_executor.execute()
         logging.info('Stage %s finished: %0.3f sec', stage_names[0],
                      time.time() - t)
     logging.info('Total time: %0.3f sec', time.time() - tt)
Example #11
0
  def test_nested_with_per_thread_info(self):
    self.maxDiff = None
    tracker = statesampler.StateSampler('stage', CounterFactory())
    statesampler.set_current_tracker(tracker)
    formatter = logger.JsonLogFormatter(job_id='jobid', worker_id='workerid')
    with logger.PerThreadLoggingContext(work_item_id='workitem'):
      with tracker.scoped_state('step1', 'process'):
        record = self.create_log_record(**self.SAMPLE_RECORD)
        log_output1 = json.loads(formatter.format(record))

        with tracker.scoped_state('step2', 'process'):
          record = self.create_log_record(**self.SAMPLE_RECORD)
          log_output2 = json.loads(formatter.format(record))

        record = self.create_log_record(**self.SAMPLE_RECORD)
        log_output3 = json.loads(formatter.format(record))

    statesampler.set_current_tracker(None)
    record = self.create_log_record(**self.SAMPLE_RECORD)
    log_output4 = json.loads(formatter.format(record))

    self.assertEqual(
        log_output1,
        dict(self.SAMPLE_OUTPUT, work='workitem', stage='stage', step='step1'))
    self.assertEqual(
        log_output2,
        dict(self.SAMPLE_OUTPUT, work='workitem', stage='stage', step='step2'))
    self.assertEqual(
        log_output3,
        dict(self.SAMPLE_OUTPUT, work='workitem', stage='stage', step='step1'))
    self.assertEqual(log_output4, self.SAMPLE_OUTPUT)
 def test_update_str(self):
     coder = coders.PickleCoder()
     opcounts = OperationCounters(CounterFactory(), 'some-name', coder, 0)
     self.verify_counters(opcounts, 0, float('nan'))
     value = GlobalWindows.windowed_value('abcde')
     opcounts.update_from(value)
     estimated_size = coder.estimate_size(value)
     self.verify_counters(opcounts, 1, estimated_size)
 def test_update_old_object(self):
     coder = coders.PickleCoder()
     opcounts = OperationCounters(CounterFactory(), 'some-name', coder, 0)
     self.verify_counters(opcounts, 0, float('nan'))
     obj = OldClassThatDoesNotImplementLen()
     value = GlobalWindows.windowed_value(obj)
     opcounts.update_from(value)
     estimated_size = coder.estimate_size(value)
     self.verify_counters(opcounts, 1, estimated_size)
Example #14
0
    def test_basic_counters(self):
        counter_factory = CounterFactory()
        sampler = statesampler.StateSampler('stage1', counter_factory)
        sampler.start()

        with sampler.scoped_state('step1', 'stateA'):
            counter = opcounters.SideInputReadCounter(counter_factory,
                                                      sampler,
                                                      declaring_step='step1',
                                                      input_index=1)
        with sampler.scoped_state('step2', 'stateB'):
            with counter:
                counter.add_bytes_read(10)

            counter.update_current_step()

        sampler.stop()
        sampler.commit_counters()

        actual_counter_names = set(
            [c.name for c in counter_factory.get_counters()])
        expected_counter_names = set([
            # Counter names for STEP 1
            counters.CounterName('read-sideinput-msecs',
                                 stage_name='stage1',
                                 step_name='step1',
                                 io_target=counters.side_input_id('step1', 1)),
            counters.CounterName('read-sideinput-byte-count',
                                 step_name='step1',
                                 io_target=counters.side_input_id('step1', 1)),

            # Counter names for STEP 2
            counters.CounterName('read-sideinput-msecs',
                                 stage_name='stage1',
                                 step_name='step1',
                                 io_target=counters.side_input_id('step2', 1)),
            counters.CounterName('read-sideinput-byte-count',
                                 step_name='step1',
                                 io_target=counters.side_input_id('step2', 1)),
        ])
        self.assertTrue(
            actual_counter_names.issuperset(expected_counter_names))
Example #15
0
  def test_basic_counters(self):
    counter_factory = CounterFactory()
    sampler = statesampler.StateSampler('stage1', counter_factory)
    sampler.start()

    with sampler.scoped_state('step1', 'stateA'):
      counter = opcounters.SideInputReadCounter(counter_factory, sampler,
                                                declaring_step='step1',
                                                input_index=1)
    with sampler.scoped_state('step2', 'stateB'):
      with counter:
        counter.add_bytes_read(10)

      counter.update_current_step()

    sampler.stop()
    sampler.commit_counters()

    actual_counter_names = set([c.name for c in counter_factory.get_counters()])
    expected_counter_names = set([
        # Counter names for STEP 1
        counters.CounterName('read-sideinput-msecs',
                             stage_name='stage1',
                             step_name='step1',
                             io_target=counters.side_input_id('step1', 1)),
        counters.CounterName('read-sideinput-byte-count',
                             step_name='step1',
                             io_target=counters.side_input_id('step1', 1)),

        # Counter names for STEP 2
        counters.CounterName('read-sideinput-msecs',
                             stage_name='stage1',
                             step_name='step1',
                             io_target=counters.side_input_id('step2', 1)),
        counters.CounterName('read-sideinput-byte-count',
                             step_name='step1',
                             io_target=counters.side_input_id('step2', 1)),
    ])
    self.assertTrue(actual_counter_names.issuperset(expected_counter_names))
Example #16
0
 def test_record_with_per_thread_info(self):
   self.maxDiff = None
   tracker = statesampler.StateSampler('stage', CounterFactory())
   statesampler.set_current_tracker(tracker)
   formatter = logger.JsonLogFormatter(job_id='jobid', worker_id='workerid')
   with logger.PerThreadLoggingContext(work_item_id='workitem'):
     with tracker.scoped_state('step', 'process'):
       record = self.create_log_record(**self.SAMPLE_RECORD)
       log_output = json.loads(formatter.format(record))
   expected_output = dict(self.SAMPLE_OUTPUT)
   expected_output.update(
       {'work': 'workitem', 'stage': 'stage', 'step': 'step'})
   self.assertEqual(log_output, expected_output)
   statesampler.set_current_tracker(None)
 def test_update_multiple(self):
     coder = coders.PickleCoder()
     total_size = 0
     opcounts = OperationCounters(CounterFactory(), 'some-name', coder, 0)
     self.verify_counters(opcounts, 0, float('nan'))
     value = GlobalWindows.windowed_value('abcde')
     opcounts.update_from(value)
     total_size += coder.estimate_size(value)
     value = GlobalWindows.windowed_value('defghij')
     opcounts.update_from(value)
     total_size += coder.estimate_size(value)
     self.verify_counters(opcounts, 2, float(total_size) / 2)
     value = GlobalWindows.windowed_value('klmnop')
     opcounts.update_from(value)
     total_size += coder.estimate_size(value)
     self.verify_counters(opcounts, 3, float(total_size) / 3)
Example #18
0
    def test_update_batch(self):
        coder = coders.FastPrimitivesCoder()
        opcounts = OperationCounters(
            CounterFactory(),
            'some-name',
            coder,
            0,
            producer_batch_converter=typehints.batch.BatchConverter.
            from_typehints(element_type=typehints.Any,
                           batch_type=typehints.List[typehints.Any]))

        size_per_element = coder.estimate_size(50)

        self.verify_counters(opcounts, 0, float('nan'))

        opcounts.update_from_batch(
            GlobalWindows.windowed_batch(list(range(100))))

        self.verify_counters(opcounts, 100, size_per_element)

        opcounts.update_from_batch(
            GlobalWindows.windowed_batch(list(range(100, 200))))

        self.verify_counters(opcounts, 200, size_per_element)
Example #19
0
def for_test():
    set_current_tracker(StateSampler('test', CounterFactory()))
    return get_current_tracker()