Python MetricsContainer Examples, apache_beam.metrics.execution.MetricsContainer Python Examples

Example #1

0

Show file

File: executor.py Project: gamars/beam

  def call(self):
    self._call_count += 1
    assert self._call_count <= (1 + len(self._applied_ptransform.side_inputs))
    metrics_container = MetricsContainer(self._applied_ptransform.full_label)
    scoped_metrics_container = ScopedMetricsContainer(metrics_container)

    for side_input in self._applied_ptransform.side_inputs:
      if side_input not in self._side_input_values:
        has_result, value = (
            self._evaluation_context.get_value_or_schedule_after_output(
                side_input, self))
        if not has_result:
          # Monitor task will reschedule this executor once the side input is
          # available.
          return
        self._side_input_values[side_input] = value

    side_input_values = [self._side_input_values[side_input]
                         for side_input in self._applied_ptransform.side_inputs]

    try:
      evaluator = self._transform_evaluator_registry.get_evaluator(
          self._applied_ptransform, self._input_bundle,
          side_input_values, scoped_metrics_container)

      if self._fired_timers:
        for timer_firing in self._fired_timers:
          evaluator.process_timer_wrapper(timer_firing)

      if self._input_bundle:
        for value in self._input_bundle.get_elements_iterable():
          evaluator.process_element(value)

      with scoped_metrics_container:
        result = evaluator.finish_bundle()
        result.logical_metric_updates = metrics_container.get_cumulative()

      if self._evaluation_context.has_cache:
        for uncommitted_bundle in result.uncommitted_output_bundles:
          self._evaluation_context.append_to_cache(
              self._applied_ptransform, uncommitted_bundle.tag,
              uncommitted_bundle.get_elements_iterable())
        undeclared_tag_values = result.undeclared_tag_values
        if undeclared_tag_values:
          for tag, value in undeclared_tag_values.iteritems():
            self._evaluation_context.append_to_cache(
                self._applied_ptransform, tag, value)

      self._completion_callback.handle_result(self, self._input_bundle, result)
      return result
    except Exception as e:  # pylint: disable=broad-except
      self._completion_callback.handle_exception(self, e)
    finally:
      self._evaluation_context.metrics().commit_physical(
          self._input_bundle,
          metrics_container.get_cumulative())
      self._transform_evaluation_state.complete(self)

Example #2

0

Show file

File: operations.py Project: ocadotechnology/incubator-beam

  def __init__(self, name_context, spec, counter_factory, state_sampler):
    """Initializes a worker operation instance.

    Args:
      name_context: A NameContext instance or string(deprecated), with the
        name information for this operation.
      spec: A operation_specs.Worker* instance.
      counter_factory: The CounterFactory to use for our counters.
      state_sampler: The StateSampler for the current operation.
    """
    if isinstance(name_context, common.NameContext):
      # TODO(BEAM-4028): Clean this up once it's completely migrated.
      # We use the specific operation name that is used for metrics and state
      # sampling.
      self.name_context = name_context
    else:
      self.name_context = common.NameContext(name_context)

    self.spec = spec
    self.counter_factory = counter_factory
    self.consumers = collections.defaultdict(list)

    # These are overwritten in the legacy harness.
    self.metrics_container = MetricsContainer(self.name_context.metrics_name())

    self.state_sampler = state_sampler
    self.scoped_start_state = self.state_sampler.scoped_state(
        self.name_context, 'start', metrics_container=self.metrics_container)
    self.scoped_process_state = self.state_sampler.scoped_state(
        self.name_context, 'process', metrics_container=self.metrics_container)
    self.scoped_finish_state = self.state_sampler.scoped_state(
        self.name_context, 'finish', metrics_container=self.metrics_container)
    # TODO(ccy): the '-abort' state can be added when the abort is supported in
    # Operations.
    self.receivers = []

Example #3

0

Show file

File: operations.py Project: aljoscha/incubator-beam

  def __init__(self, operation_name, spec, counter_factory, state_sampler):
    """Initializes a worker operation instance.

    Args:
      operation_name: The system name assigned by the runner for this
        operation.
      spec: A operation_specs.Worker* instance.
      counter_factory: The CounterFactory to use for our counters.
      state_sampler: The StateSampler for the current operation.
    """
    self.operation_name = operation_name
    self.spec = spec
    self.counter_factory = counter_factory
    self.consumers = collections.defaultdict(list)

    # These are overwritten in the legacy harness.
    self.step_name = operation_name
    self.metrics_container = MetricsContainer(self.step_name)
    self.scoped_metrics_container = ScopedMetricsContainer(
        self.metrics_container)

    self.state_sampler = state_sampler
    self.scoped_start_state = self.state_sampler.scoped_state(
        self.operation_name, 'start')
    self.scoped_process_state = self.state_sampler.scoped_state(
        self.operation_name, 'process')
    self.scoped_finish_state = self.state_sampler.scoped_state(
        self.operation_name, 'finish')
    # TODO(ccy): the '-abort' state can be added when the abort is supported in
    # Operations.
    self.receivers = []

Example #4

0

Show file

File: execution_test.py Project: eljefe6a/incubator-beam

  def test_uses_right_container(self):
    c1 = MetricsContainer('step1')
    c2 = MetricsContainer('step2')
    counter = Metrics.counter('ns', 'name')
    MetricsEnvironment.set_current_container(c1)
    counter.inc()
    MetricsEnvironment.set_current_container(c2)
    counter.inc(3)
    MetricsEnvironment.unset_current_container()

    self.assertEqual(
        c1.get_cumulative().counters.items(),
        [(MetricKey('step1', MetricName('ns', 'name')), 1)])

    self.assertEqual(
        c2.get_cumulative().counters.items(),
        [(MetricKey('step2', MetricName('ns', 'name')), 3)])

Example #5

0

Show file

  def __init__(self,
               name_context,  # type: Union[str, common.NameContext]
               spec,
               counter_factory,
               state_sampler  # type: StateSampler
              ):
    """Initializes a worker operation instance.

    Args:
      name_context: A NameContext instance or string(deprecated), with the
        name information for this operation.
      spec: A operation_specs.Worker* instance.
      counter_factory: The CounterFactory to use for our counters.
      state_sampler: The StateSampler for the current operation.
    """
    if isinstance(name_context, common.NameContext):
      # TODO(BEAM-4028): Clean this up once it's completely migrated.
      # We use the specific operation name that is used for metrics and state
      # sampling.
      self.name_context = name_context
    else:
      self.name_context = common.NameContext(name_context)

    self.spec = spec
    self.counter_factory = counter_factory
    self.execution_context = None  # type: Optional[ExecutionContext]
    self.consumers = collections.defaultdict(
        list)  # type: DefaultDict[int, List[Operation]]

    # These are overwritten in the legacy harness.
    self.metrics_container = MetricsContainer(self.name_context.metrics_name())

    self.state_sampler = state_sampler
    self.scoped_start_state = self.state_sampler.scoped_state(
        self.name_context, 'start', metrics_container=self.metrics_container)
    self.scoped_process_state = self.state_sampler.scoped_state(
        self.name_context, 'process', metrics_container=self.metrics_container)
    self.scoped_finish_state = self.state_sampler.scoped_state(
        self.name_context, 'finish', metrics_container=self.metrics_container)
    # TODO(ccy): the '-abort' state can be added when the abort is supported in
    # Operations.
    self.receivers = []  # type: List[ConsumerSet]
    # Legacy workers cannot call setup() until after setting additional state
    # on the operation.
    self.setup_done = False
    self.step_name = None  # type: Optional[str]

Example #6

0

Show file

    def call(self):
        self._call_count += 1
        assert self._call_count <= (1 +
                                    len(self._applied_ptransform.side_inputs))
        metrics_container = MetricsContainer(
            self._applied_ptransform.full_label)
        scoped_metrics_container = ScopedMetricsContainer(metrics_container)

        for side_input in self._applied_ptransform.side_inputs:
            if side_input not in self._side_input_values:
                has_result, value = (self._evaluation_context.
                                     get_value_or_schedule_after_output(
                                         side_input, self))
                if not has_result:
                    # Monitor task will reschedule this executor once the side input is
                    # available.
                    return
                self._side_input_values[side_input] = value
        side_input_values = [
            self._side_input_values[side_input]
            for side_input in self._applied_ptransform.side_inputs
        ]

        while self._retry_count < self._max_retries_per_bundle:
            try:
                self.attempt_call(metrics_container, scoped_metrics_container,
                                  side_input_values)
                break
            except Exception as e:
                self._retry_count += 1
                logging.error(
                    'Exception at bundle %r, due to an exception.\n %s',
                    self._input_bundle, traceback.format_exc())
                if self._retry_count == self._max_retries_per_bundle:
                    logging.error('Giving up after %s attempts.',
                                  self._max_retries_per_bundle)
                    if self._retry_count == 1:
                        logging.info(
                            'Use the experimental flag --direct_runner_bundle_retry'
                            ' to retry failed bundles (up to %d times).',
                            TransformExecutor._MAX_RETRY_PER_BUNDLE)
                    self._completion_callback.handle_exception(self, e)

        self._evaluation_context.metrics().commit_physical(
            self._input_bundle, metrics_container.get_cumulative())
        self._transform_evaluation_state.complete(self)

Example #7

0

Show file

File: operations.py Project: matzew/beam

  def __init__(self, name_context, spec, counter_factory, state_sampler):
    """Initializes a worker operation instance.

    Args:
      name_context: A NameContext instance or string(deprecated), with the
        name information for this operation.
      spec: A operation_specs.Worker* instance.
      counter_factory: The CounterFactory to use for our counters.
      state_sampler: The StateSampler for the current operation.
    """
    if isinstance(name_context, common.NameContext):
      # TODO(BEAM-4028): Clean this up once it's completely migrated.
      # We use the specific operation name that is used for metrics and state
      # sampling.
      self.name_context = name_context
    else:
      self.name_context = common.NameContext(name_context)

    # TODO(BEAM-4028): Remove following two lines. Rely on name context.
    self.operation_name = self.name_context.step_name
    self.step_name = self.name_context.logging_name()

    self.spec = spec
    self.counter_factory = counter_factory
    self.consumers = collections.defaultdict(list)

    # These are overwritten in the legacy harness.
    self.metrics_container = MetricsContainer(self.name_context.metrics_name())
    # TODO(BEAM-4094): Remove ScopedMetricsContainer after Dataflow no longer
    # depends on it.
    self.scoped_metrics_container = ScopedMetricsContainer()

    self.state_sampler = state_sampler
    self.scoped_start_state = self.state_sampler.scoped_state(
        self.name_context.metrics_name(), 'start',
        metrics_container=self.metrics_container)
    self.scoped_process_state = self.state_sampler.scoped_state(
        self.name_context.metrics_name(), 'process',
        metrics_container=self.metrics_container)
    self.scoped_finish_state = self.state_sampler.scoped_state(
        self.name_context.metrics_name(), 'finish',
        metrics_container=self.metrics_container)
    # TODO(ccy): the '-abort' state can be added when the abort is supported in
    # Operations.
    self.receivers = []

Example #8

0

Show file

File: executor.py Project: wangjie05/beam

  def call(self):
    self._call_count += 1
    assert self._call_count <= (1 + len(self._applied_ptransform.side_inputs))
    metrics_container = MetricsContainer(self._applied_ptransform.full_label)
    scoped_metrics_container = ScopedMetricsContainer(metrics_container)

    for side_input in self._applied_ptransform.side_inputs:
      # Find the projection of main's window onto the side input's window.
      window_mapping_fn = side_input._view_options().get(
          'window_mapping_fn', sideinputs._global_window_mapping_fn)
      main_onto_side_window = window_mapping_fn(self._latest_main_input_window)
      block_until = main_onto_side_window.end

      if side_input not in self._side_input_values:
        value = self._evaluation_context.get_value_or_block_until_ready(
            side_input, self, block_until)
        if not value:
          # Monitor task will reschedule this executor once the side input is
          # available.
          return
        self._side_input_values[side_input] = value
    side_input_values = [self._side_input_values[side_input]
                         for side_input in self._applied_ptransform.side_inputs]

    while self._retry_count < self._max_retries_per_bundle:
      try:
        self.attempt_call(metrics_container,
                          scoped_metrics_container,
                          side_input_values)
        break
      except Exception as e:
        self._retry_count += 1
        logging.error(
            'Exception at bundle %r, due to an exception.\n %s',
            self._input_bundle, traceback.format_exc())
        if self._retry_count == self._max_retries_per_bundle:
          logging.error('Giving up after %s attempts.',
                        self._max_retries_per_bundle)
          self._completion_callback.handle_exception(self, e)

    self._evaluation_context.metrics().commit_physical(
        self._input_bundle,
        metrics_container.get_cumulative())
    self._transform_evaluation_state.complete(self)

Example #9

0

Show file

File: executor.py Project: eljefe6a/incubator-beam

  def call(self):
    self._call_count += 1
    assert self._call_count <= (1 + len(self._applied_ptransform.side_inputs))
    metrics_container = MetricsContainer(self._applied_ptransform.full_label)
    scoped_metrics_container = ScopedMetricsContainer(metrics_container)

    for side_input in self._applied_ptransform.side_inputs:
      if side_input not in self._side_input_values:
        has_result, value = (
            self._evaluation_context.get_value_or_schedule_after_output(
                side_input, self))
        if not has_result:
          # Monitor task will reschedule this executor once the side input is
          # available.
          return
        self._side_input_values[side_input] = value
    side_input_values = [self._side_input_values[side_input]
                         for side_input in self._applied_ptransform.side_inputs]

    while self._retry_count < self._max_retries_per_bundle:
      try:
        self.attempt_call(metrics_container,
                          scoped_metrics_container,
                          side_input_values)
        break
      except Exception as e:
        self._retry_count += 1
        logging.error(
            'Exception at bundle %r, due to an exception.\n %s',
            self._input_bundle, traceback.format_exc())
        if self._retry_count == self._max_retries_per_bundle:
          logging.error('Giving up after %s attempts.',
                        self._max_retries_per_bundle)
          if self._retry_count == 1:
            logging.info(
                'Use the experimental flag --direct_runner_bundle_retry'
                ' to retry failed bundles (up to %d times).',
                TransformExecutor._MAX_RETRY_PER_BUNDLE)
          self._completion_callback.handle_exception(self, e)

    self._evaluation_context.metrics().commit_physical(
        self._input_bundle,
        metrics_container.get_cumulative())
    self._transform_evaluation_state.complete(self)

Example #10

0

Show file

File: execution_test.py Project: NarasimhaKattunga/kafka-1

  def test_get_cumulative_or_updates(self):
    mc = MetricsContainer('astep')

    all_values = []
    for i in range(1, 11):
      counter = mc.get_counter(MetricName('namespace', 'name{}'.format(i)))
      distribution = mc.get_distribution(
          MetricName('namespace', 'name{}'.format(i)))
      gauge = mc.get_gauge(MetricName('namespace', 'name{}'.format(i)))

      counter.inc(i)
      distribution.update(i)
      gauge.set(i)
      all_values.append(i)

    # Retrieve ALL updates.
    cumulative = mc.get_cumulative()
    self.assertEqual(len(cumulative.counters), 10)
    self.assertEqual(len(cumulative.distributions), 10)
    self.assertEqual(len(cumulative.gauges), 10)

    self.assertEqual(
        set(all_values), set([v for _, v in cumulative.counters.items()]))
    self.assertEqual(
        set(all_values), set([v.value for _, v in cumulative.gauges.items()]))

Example #11

0

Show file

File: execution_test.py Project: eljefe6a/incubator-beam

  def test_scoped_container(self):
    c1 = MetricsContainer('mystep')
    c2 = MetricsContainer('myinternalstep')
    with ScopedMetricsContainer(c1):
      self.assertEqual(c1, MetricsEnvironment.current_container())
      counter = Metrics.counter('ns', 'name')
      counter.inc(2)

      with ScopedMetricsContainer(c2):
        self.assertEqual(c2, MetricsEnvironment.current_container())
        counter = Metrics.counter('ns', 'name')
        counter.inc(3)
        self.assertEqual(
            c2.get_cumulative().counters.items(),
            [(MetricKey('myinternalstep', MetricName('ns', 'name')), 3)])

      self.assertEqual(c1, MetricsEnvironment.current_container())
      counter = Metrics.counter('ns', 'name')
      counter.inc(4)
      self.assertEqual(
          c1.get_cumulative().counters.items(),
          [(MetricKey('mystep', MetricName('ns', 'name')), 6)])

Example #12

0

Show file

    def test_get_cumulative_or_updates(self):
        mc = MetricsContainer('astep')

        clean_values = []
        dirty_values = []
        for i in range(1, 11):
            counter = mc.get_counter(
                MetricName('namespace', 'name{}'.format(i)))
            distribution = mc.get_distribution(
                MetricName('namespace', 'name{}'.format(i)))
            gauge = mc.get_gauge(MetricName('namespace', 'name{}'.format(i)))

            counter.inc(i)
            distribution.update(i)
            gauge.set(i)
            if i % 2 == 0:
                # Some are left to be DIRTY (i.e. not yet committed).
                # Some are left to be CLEAN (i.e. already committed).
                dirty_values.append(i)
                continue
            # Assert: Counter/Distribution is DIRTY or COMMITTING (not CLEAN)
            self.assertEqual(distribution.commit.before_commit(), True)
            self.assertEqual(counter.commit.before_commit(), True)
            self.assertEqual(gauge.commit.before_commit(), True)
            distribution.commit.after_commit()
            counter.commit.after_commit()
            gauge.commit.after_commit()
            # Assert: Counter/Distribution has been committed, therefore it's CLEAN
            self.assertEqual(counter.commit.state, CellCommitState.CLEAN)
            self.assertEqual(distribution.commit.state, CellCommitState.CLEAN)
            self.assertEqual(gauge.commit.state, CellCommitState.CLEAN)
            clean_values.append(i)

        # Retrieve NON-COMMITTED updates.
        logical = mc.get_updates()
        self.assertEqual(len(logical.counters), 5)
        self.assertEqual(len(logical.distributions), 5)
        self.assertEqual(len(logical.gauges), 5)

        self.assertEqual(set(dirty_values),
                         set([v.value for _, v in logical.gauges.items()]))
        self.assertEqual(set(dirty_values),
                         set([v for _, v in logical.counters.items()]))

        # Retrieve ALL updates.
        cumulative = mc.get_cumulative()
        self.assertEqual(len(cumulative.counters), 10)
        self.assertEqual(len(cumulative.distributions), 10)
        self.assertEqual(len(cumulative.gauges), 10)

        self.assertEqual(set(dirty_values + clean_values),
                         set([v for _, v in cumulative.counters.items()]))
        self.assertEqual(set(dirty_values + clean_values),
                         set([v.value for _, v in cumulative.gauges.items()]))

Example #13

0

Show file

File: execution_test.py Project: eljefe6a/incubator-beam

  def test_get_cumulative_or_updates(self):
    mc = MetricsContainer('astep')

    clean_values = []
    dirty_values = []
    for i in range(1, 11):
      counter = mc.get_counter(MetricName('namespace', 'name{}'.format(i)))
      distribution = mc.get_distribution(
          MetricName('namespace', 'name{}'.format(i)))
      counter.inc(i)
      distribution.update(i)
      if i % 2 == 0:
        # Some are left to be DIRTY (i.e. not yet committed).
        # Some are left to be CLEAN (i.e. already committed).
        dirty_values.append(i)
        continue
      # Assert: Counter/Distribution is DIRTY or COMMITTING (not CLEAN)
      self.assertEqual(distribution.commit.before_commit(), True)
      self.assertEqual(counter.commit.before_commit(), True)
      distribution.commit.after_commit()
      counter.commit.after_commit()
      # Assert: Counter/Distribution has been committed, therefore it's CLEAN
      self.assertEqual(counter.commit.state, CellCommitState.CLEAN)
      self.assertEqual(distribution.commit.state, CellCommitState.CLEAN)
      clean_values.append(i)

    # Retrieve NON-COMMITTED updates.
    logical = mc.get_updates()
    self.assertEqual(len(logical.counters), 5)
    self.assertEqual(len(logical.distributions), 5)
    self.assertEqual(set(dirty_values),
                     set([v for _, v in logical.counters.items()]))
    # Retrieve ALL updates.
    cumulative = mc.get_cumulative()
    self.assertEqual(len(cumulative.counters), 10)
    self.assertEqual(len(cumulative.distributions), 10)
    self.assertEqual(set(dirty_values + clean_values),
                     set([v for _, v in cumulative.counters.items()]))

Example #14

0

Show file

    def test_uses_right_container(self):
        c1 = MetricsContainer('step1')
        c2 = MetricsContainer('step2')
        counter = Metrics.counter('ns', 'name')
        MetricsEnvironment.set_current_container(c1)
        counter.inc()
        MetricsEnvironment.set_current_container(c2)
        counter.inc(3)
        MetricsEnvironment.unset_current_container()

        self.assertEqual(list(c1.get_cumulative().counters.items()),
                         [(MetricKey('step1', MetricName('ns', 'name')), 1)])

        self.assertEqual(list(c2.get_cumulative().counters.items()),
                         [(MetricKey('step2', MetricName('ns', 'name')), 3)])

Example #15

0

Show file

File: metric_test.py Project: wardva/beam

  def test_create_counter_distribution(self):
    MetricsEnvironment.set_current_container(MetricsContainer('mystep'))
    counter_ns = 'aCounterNamespace'
    distro_ns = 'aDistributionNamespace'
    name = 'a_name'
    counter = Metrics.counter(counter_ns, name)
    distro = Metrics.distribution(distro_ns, name)
    counter.inc(10)
    counter.dec(3)
    distro.update(10)
    distro.update(2)
    self.assertTrue(isinstance(counter, Metrics.DelegatingCounter))
    self.assertTrue(isinstance(distro, Metrics.DelegatingDistribution))

    del distro
    del counter

    container = MetricsEnvironment.current_container()
    self.assertEqual(
        container.counters[MetricName(counter_ns, name)].get_cumulative(),
        7)
    self.assertEqual(
        container.distributions[MetricName(distro_ns, name)].get_cumulative(),
        DistributionData(12, 2, 2, 10))

Example #16

0

Show file

    def test_create_counter_distribution(self):
        sampler = statesampler.StateSampler('', counters.CounterFactory())
        statesampler.set_current_tracker(sampler)
        state1 = sampler.scoped_state(
            'mystep', 'myState', metrics_container=MetricsContainer('mystep'))

        try:
            sampler.start()
            with state1:
                counter_ns = 'aCounterNamespace'
                distro_ns = 'aDistributionNamespace'
                name = 'a_name'
                counter = Metrics.counter(counter_ns, name)
                distro = Metrics.distribution(distro_ns, name)
                counter.inc(10)
                counter.dec(3)
                distro.update(10)
                distro.update(2)
                self.assertTrue(isinstance(counter, Metrics.DelegatingCounter))
                self.assertTrue(
                    isinstance(distro, Metrics.DelegatingDistribution))

                del distro
                del counter

                container = MetricsEnvironment.current_container()
                self.assertEqual(
                    container.get_counter(MetricName(counter_ns,
                                                     name)).get_cumulative(),
                    7)
                self.assertEqual(
                    container.get_distribution(MetricName(
                        distro_ns, name)).get_cumulative(),
                    DistributionData(12, 2, 2, 10))
        finally:
            sampler.stop()

Example #17

0

Show file

    def test_scoped_container(self):
        c1 = MetricsContainer('mystep')
        c2 = MetricsContainer('myinternalstep')
        with ScopedMetricsContainer(c1):
            self.assertEqual(c1, MetricsEnvironment.current_container())
            counter = Metrics.counter('ns', 'name')
            counter.inc(2)

            with ScopedMetricsContainer(c2):
                self.assertEqual(c2, MetricsEnvironment.current_container())
                counter = Metrics.counter('ns', 'name')
                counter.inc(3)
                self.assertEqual(list(c2.get_cumulative().counters.items()),
                                 [(MetricKey('myinternalstep',
                                             MetricName('ns', 'name')), 3)])

            self.assertEqual(c1, MetricsEnvironment.current_container())
            counter = Metrics.counter('ns', 'name')
            counter.inc(4)
            self.assertEqual(
                list(c1.get_cumulative().counters.items()),
                [(MetricKey('mystep', MetricName('ns', 'name')), 6)])

Example #18

0

Show file

File: operations.py Project: aljoscha/incubator-beam

class Operation(object):
  """An operation representing the live version of a work item specification.

  An operation can have one or more outputs and for each output it can have
  one or more receiver operations that will take that as input.
  """

  def __init__(self, operation_name, spec, counter_factory, state_sampler):
    """Initializes a worker operation instance.

    Args:
      operation_name: The system name assigned by the runner for this
        operation.
      spec: A operation_specs.Worker* instance.
      counter_factory: The CounterFactory to use for our counters.
      state_sampler: The StateSampler for the current operation.
    """
    self.operation_name = operation_name
    self.spec = spec
    self.counter_factory = counter_factory
    self.consumers = collections.defaultdict(list)

    # These are overwritten in the legacy harness.
    self.step_name = operation_name
    self.metrics_container = MetricsContainer(self.step_name)
    self.scoped_metrics_container = ScopedMetricsContainer(
        self.metrics_container)

    self.state_sampler = state_sampler
    self.scoped_start_state = self.state_sampler.scoped_state(
        self.operation_name, 'start')
    self.scoped_process_state = self.state_sampler.scoped_state(
        self.operation_name, 'process')
    self.scoped_finish_state = self.state_sampler.scoped_state(
        self.operation_name, 'finish')
    # TODO(ccy): the '-abort' state can be added when the abort is supported in
    # Operations.
    self.receivers = []

  def start(self):
    """Start operation."""
    self.debug_logging_enabled = logging.getLogger().isEnabledFor(
        logging.DEBUG)
    # Everything except WorkerSideInputSource, which is not a
    # top-level operation, should have output_coders
    if getattr(self.spec, 'output_coders', None):
      self.receivers = [ConsumerSet(self.counter_factory, self.step_name,
                                    i, self.consumers[i], coder)
                        for i, coder in enumerate(self.spec.output_coders)]

  def finish(self):
    """Finish operation."""
    pass

  def process(self, o):
    """Process element in operation."""
    pass

  def output(self, windowed_value, output_index=0):
    cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value)

  def add_receiver(self, operation, output_index=0):
    """Adds a receiver operation for the specified output."""
    self.consumers[output_index].append(operation)

  def progress_metrics(self):
    return beam_fn_api_pb2.Metrics.PTransform(
        processed_elements=beam_fn_api_pb2.Metrics.PTransform.ProcessedElements(
            measured=beam_fn_api_pb2.Metrics.PTransform.Measured(
                total_time_spent=(
                    self.scoped_start_state.sampled_seconds()
                    + self.scoped_process_state.sampled_seconds()
                    + self.scoped_finish_state.sampled_seconds()),
                # Multi-output operations should override this.
                output_element_counts=(
                    # If there is exactly one output, we can unambiguously
                    # fix its name later, which we do.
                    # TODO(robertwb): Plumb the actual name here.
                    {'ONLY_OUTPUT': self.receivers[0].opcounter
                                    .element_counter.value()}
                    if len(self.receivers) == 1
                    else None))),
        user=self.metrics_container.to_runner_api())

  def __str__(self):
    """Generates a useful string for this object.

    Compactly displays interesting fields.  In particular, pickled
    fields are not displayed.  Note that we collapse the fields of the
    contained Worker* object into this object, since there is a 1-1
    mapping between Operation and operation_specs.Worker*.

    Returns:
      Compact string representing this object.
    """
    return self.str_internal()

  def str_internal(self, is_recursive=False):
    """Internal helper for __str__ that supports recursion.

    When recursing on receivers, keep the output short.
    Args:
      is_recursive: whether to omit some details, particularly receivers.
    Returns:
      Compact string representing this object.
    """
    printable_name = self.__class__.__name__
    if hasattr(self, 'step_name'):
      printable_name += ' %s' % self.step_name
      if is_recursive:
        # If we have a step name, stop here, no more detail needed.
        return '<%s>' % printable_name

    if self.spec is None:
      printable_fields = []
    else:
      printable_fields = operation_specs.worker_printable_fields(self.spec)

    if not is_recursive and getattr(self, 'receivers', []):
      printable_fields.append('receivers=[%s]' % ', '.join([
          str(receiver) for receiver in self.receivers]))

    return '<%s %s>' % (printable_name, ', '.join(printable_fields))

Example #19

0

Show file

File: operations.py Project: eralmas7/beam

class Operation(object):
  """An operation representing the live version of a work item specification.

  An operation can have one or more outputs and for each output it can have
  one or more receiver operations that will take that as input.
  """

  def __init__(self, name_context, spec, counter_factory, state_sampler):
    """Initializes a worker operation instance.

    Args:
      name_context: A NameContext instance or string(deprecated), with the
        name information for this operation.
      spec: A operation_specs.Worker* instance.
      counter_factory: The CounterFactory to use for our counters.
      state_sampler: The StateSampler for the current operation.
    """
    if isinstance(name_context, common.NameContext):
      # TODO(BEAM-4028): Clean this up once it's completely migrated.
      # We use the specific operation name that is used for metrics and state
      # sampling.
      self.name_context = name_context
    else:
      self.name_context = common.NameContext(name_context)

    self.spec = spec
    self.counter_factory = counter_factory
    self.execution_context = None
    self.consumers = collections.defaultdict(list)

    # These are overwritten in the legacy harness.
    self.metrics_container = MetricsContainer(self.name_context.metrics_name())

    self.state_sampler = state_sampler
    self.scoped_start_state = self.state_sampler.scoped_state(
        self.name_context, 'start', metrics_container=self.metrics_container)
    self.scoped_process_state = self.state_sampler.scoped_state(
        self.name_context, 'process', metrics_container=self.metrics_container)
    self.scoped_finish_state = self.state_sampler.scoped_state(
        self.name_context, 'finish', metrics_container=self.metrics_container)
    # TODO(ccy): the '-abort' state can be added when the abort is supported in
    # Operations.
    self.receivers = []
    # Legacy workers cannot call setup() until after setting additional state
    # on the operation.
    self.setup_done = False

  def setup(self):
    with self.scoped_start_state:
      self.debug_logging_enabled = logging.getLogger().isEnabledFor(
          logging.DEBUG)
      # Everything except WorkerSideInputSource, which is not a
      # top-level operation, should have output_coders
      #TODO(pabloem): Define better what step name is used here.
      if getattr(self.spec, 'output_coders', None):
        self.receivers = [
            ConsumerSet.create(
                self.counter_factory,
                self.name_context.logging_name(),
                i,
                self.consumers[i], coder)
            for i, coder in enumerate(self.spec.output_coders)]
    self.setup_done = True

  def start(self):
    """Start operation."""
    if not self.setup_done:
      # For legacy workers.
      self.setup()

  def process(self, o):
    """Process element in operation."""
    pass

  def try_split(self, fraction_of_remainder):
    return None

  def finish(self):
    """Finish operation."""
    pass

  def reset(self):
    self.metrics_container.reset()

  def output(self, windowed_value, output_index=0):
    cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value)

  def add_receiver(self, operation, output_index=0):
    """Adds a receiver operation for the specified output."""
    self.consumers[output_index].append(operation)

  def progress_metrics(self):
    return beam_fn_api_pb2.Metrics.PTransform(
        processed_elements=beam_fn_api_pb2.Metrics.PTransform.ProcessedElements(
            measured=beam_fn_api_pb2.Metrics.PTransform.Measured(
                total_time_spent=(
                    self.scoped_start_state.sampled_seconds()
                    + self.scoped_process_state.sampled_seconds()
                    + self.scoped_finish_state.sampled_seconds()),
                # Multi-output operations should override this.
                output_element_counts=(
                    # If there is exactly one output, we can unambiguously
                    # fix its name later, which we do.
                    # TODO(robertwb): Plumb the actual name here.
                    {'ONLY_OUTPUT': self.receivers[0].opcounter
                                    .element_counter.value()}
                    if len(self.receivers) == 1
                    else None))),
        user=self.metrics_container.to_runner_api())

  def monitoring_infos(self, transform_id):
    """Returns the list of MonitoringInfos collected by this operation."""
    all_monitoring_infos = self.execution_time_monitoring_infos(transform_id)
    all_monitoring_infos.update(
        self.element_count_monitoring_infos(transform_id))
    all_monitoring_infos.update(self.user_monitoring_infos(transform_id))
    return all_monitoring_infos

  def element_count_monitoring_infos(self, transform_id):
    """Returns the element count MonitoringInfo collected by this operation."""
    if len(self.receivers) == 1:
      # If there is exactly one output, we can unambiguously
      # fix its name later, which we do.
      # TODO(robertwb): Plumb the actual name here.
      mi = monitoring_infos.int64_counter(
          monitoring_infos.ELEMENT_COUNT_URN,
          self.receivers[0].opcounter.element_counter.value(),
          ptransform=transform_id,
          tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None),
      )
      return {monitoring_infos.to_key(mi) : mi}
    return {}

  def user_monitoring_infos(self, transform_id):
    """Returns the user MonitoringInfos collected by this operation."""
    return self.metrics_container.to_runner_api_monitoring_infos(transform_id)

  def execution_time_monitoring_infos(self, transform_id):
    total_time_spent_msecs = (
        self.scoped_start_state.sampled_msecs_int()
        + self.scoped_process_state.sampled_msecs_int()
        + self.scoped_finish_state.sampled_msecs_int())
    mis = [
        monitoring_infos.int64_counter(
            monitoring_infos.START_BUNDLE_MSECS_URN,
            self.scoped_start_state.sampled_msecs_int(),
            ptransform=transform_id
        ),
        monitoring_infos.int64_counter(
            monitoring_infos.PROCESS_BUNDLE_MSECS_URN,
            self.scoped_process_state.sampled_msecs_int(),
            ptransform=transform_id
        ),
        monitoring_infos.int64_counter(
            monitoring_infos.FINISH_BUNDLE_MSECS_URN,
            self.scoped_finish_state.sampled_msecs_int(),
            ptransform=transform_id
        ),
        monitoring_infos.int64_counter(
            monitoring_infos.TOTAL_MSECS_URN,
            total_time_spent_msecs,
            ptransform=transform_id
        ),
    ]
    return {monitoring_infos.to_key(mi) : mi for mi in mis}

  def __str__(self):
    """Generates a useful string for this object.

    Compactly displays interesting fields.  In particular, pickled
    fields are not displayed.  Note that we collapse the fields of the
    contained Worker* object into this object, since there is a 1-1
    mapping between Operation and operation_specs.Worker*.

    Returns:
      Compact string representing this object.
    """
    return self.str_internal()

  def str_internal(self, is_recursive=False):
    """Internal helper for __str__ that supports recursion.

    When recursing on receivers, keep the output short.
    Args:
      is_recursive: whether to omit some details, particularly receivers.
    Returns:
      Compact string representing this object.
    """
    printable_name = self.__class__.__name__
    if hasattr(self, 'step_name'):
      printable_name += ' %s' % self.name_context.logging_name()
      if is_recursive:
        # If we have a step name, stop here, no more detail needed.
        return '<%s>' % printable_name

    if self.spec is None:
      printable_fields = []
    else:
      printable_fields = operation_specs.worker_printable_fields(self.spec)

    if not is_recursive and getattr(self, 'receivers', []):
      printable_fields.append('receivers=[%s]' % ', '.join([
          str(receiver) for receiver in self.receivers]))

    return '<%s %s>' % (printable_name, ', '.join(printable_fields))

Example #20

0

Show file

File: operations.py Project: zhou1243991952/beam

class Operation(object):
    """An operation representing the live version of a work item specification.

  An operation can have one or more outputs and for each output it can have
  one or more receiver operations that will take that as input.
  """
    def __init__(self, name_context, spec, counter_factory, state_sampler):
        """Initializes a worker operation instance.

    Args:
      name_context: A NameContext instance or string(deprecated), with the
        name information for this operation.
      spec: A operation_specs.Worker* instance.
      counter_factory: The CounterFactory to use for our counters.
      state_sampler: The StateSampler for the current operation.
    """
        if isinstance(name_context, common.NameContext):
            # TODO(BEAM-4028): Clean this up once it's completely migrated.
            # We use the specific operation name that is used for metrics and state
            # sampling.
            self.name_context = name_context
        else:
            self.name_context = common.NameContext(name_context)

        self.spec = spec
        self.counter_factory = counter_factory
        self.consumers = collections.defaultdict(list)

        # These are overwritten in the legacy harness.
        self.metrics_container = MetricsContainer(
            self.name_context.metrics_name())

        self.state_sampler = state_sampler
        self.scoped_start_state = self.state_sampler.scoped_state(
            self.name_context,
            'start',
            metrics_container=self.metrics_container)
        self.scoped_process_state = self.state_sampler.scoped_state(
            self.name_context,
            'process',
            metrics_container=self.metrics_container)
        self.scoped_finish_state = self.state_sampler.scoped_state(
            self.name_context,
            'finish',
            metrics_container=self.metrics_container)
        # TODO(ccy): the '-abort' state can be added when the abort is supported in
        # Operations.
        self.receivers = []

    def start(self):
        """Start operation."""
        self.debug_logging_enabled = logging.getLogger().isEnabledFor(
            logging.DEBUG)
        # Everything except WorkerSideInputSource, which is not a
        # top-level operation, should have output_coders
        #TODO(pabloem): Define better what step name is used here.
        if getattr(self.spec, 'output_coders', None):
            self.receivers = [
                ConsumerSet(self.counter_factory,
                            self.name_context.logging_name(), i,
                            self.consumers[i], coder)
                for i, coder in enumerate(self.spec.output_coders)
            ]

    def process(self, o):
        """Process element in operation."""
        pass

    def finish(self):
        """Finish operation."""
        pass

    def output(self, windowed_value, output_index=0):
        cython.cast(Receiver,
                    self.receivers[output_index]).receive(windowed_value)

    def add_receiver(self, operation, output_index=0):
        """Adds a receiver operation for the specified output."""
        self.consumers[output_index].append(operation)

    def progress_metrics(self):
        return beam_fn_api_pb2.Metrics.PTransform(
            processed_elements=beam_fn_api_pb2.Metrics.PTransform.
            ProcessedElements(
                measured=beam_fn_api_pb2.Metrics.PTransform.Measured(
                    total_time_spent=(
                        self.scoped_start_state.sampled_seconds() +
                        self.scoped_process_state.sampled_seconds() +
                        self.scoped_finish_state.sampled_seconds()),
                    # Multi-output operations should override this.
                    output_element_counts=(
                        # If there is exactly one output, we can unambiguously
                        # fix its name later, which we do.
                        # TODO(robertwb): Plumb the actual name here.
                        {
                            'ONLY_OUTPUT':
                            self.receivers[0].opcounter.element_counter.value(
                            )
                        } if len(self.receivers) == 1 else None))),
            user=self.metrics_container.to_runner_api())

    def __str__(self):
        """Generates a useful string for this object.

    Compactly displays interesting fields.  In particular, pickled
    fields are not displayed.  Note that we collapse the fields of the
    contained Worker* object into this object, since there is a 1-1
    mapping between Operation and operation_specs.Worker*.

    Returns:
      Compact string representing this object.
    """
        return self.str_internal()

    def str_internal(self, is_recursive=False):
        """Internal helper for __str__ that supports recursion.

    When recursing on receivers, keep the output short.
    Args:
      is_recursive: whether to omit some details, particularly receivers.
    Returns:
      Compact string representing this object.
    """
        printable_name = self.__class__.__name__
        if hasattr(self, 'step_name'):
            printable_name += ' %s' % self.name_context.logging_name()
            if is_recursive:
                # If we have a step name, stop here, no more detail needed.
                return '<%s>' % printable_name

        if self.spec is None:
            printable_fields = []
        else:
            printable_fields = operation_specs.worker_printable_fields(
                self.spec)

        if not is_recursive and getattr(self, 'receivers', []):
            printable_fields.append(
                'receivers=[%s]' %
                ', '.join([str(receiver) for receiver in self.receivers]))

        return '<%s %s>' % (printable_name, ', '.join(printable_fields))

Example #21

0

Show file

def create_operation(operation_name,
                     spec,
                     counter_factory,
                     step_name,
                     state_sampler,
                     test_shuffle_source=None,
                     test_shuffle_sink=None,
                     is_streaming=False):
    """Create Operation object for given operation specification."""
    if isinstance(spec, operation_specs.WorkerRead):
        if isinstance(spec.source, iobase.SourceBundle):
            op = ReadOperation(operation_name, spec, counter_factory,
                               state_sampler)
        else:
            from dataflow_worker.native_operations import NativeReadOperation
            op = NativeReadOperation(operation_name, spec, counter_factory,
                                     state_sampler)
    elif isinstance(spec, operation_specs.WorkerWrite):
        from dataflow_worker.native_operations import NativeWriteOperation
        op = NativeWriteOperation(operation_name, spec, counter_factory,
                                  state_sampler)
    elif isinstance(spec, operation_specs.WorkerCombineFn):
        op = CombineOperation(operation_name, spec, counter_factory,
                              state_sampler)
    elif isinstance(spec, operation_specs.WorkerPartialGroupByKey):
        op = create_pgbk_op(operation_name, spec, counter_factory,
                            state_sampler)
    elif isinstance(spec, operation_specs.WorkerDoFn):
        op = DoOperation(operation_name, spec, counter_factory, state_sampler)
    elif isinstance(spec, operation_specs.WorkerGroupingShuffleRead):
        from dataflow_worker.shuffle_operations import GroupedShuffleReadOperation
        op = GroupedShuffleReadOperation(operation_name,
                                         spec,
                                         counter_factory,
                                         state_sampler,
                                         shuffle_source=test_shuffle_source)
    elif isinstance(spec, operation_specs.WorkerUngroupedShuffleRead):
        from dataflow_worker.shuffle_operations import UngroupedShuffleReadOperation
        op = UngroupedShuffleReadOperation(operation_name,
                                           spec,
                                           counter_factory,
                                           state_sampler,
                                           shuffle_source=test_shuffle_source)
    elif isinstance(spec, operation_specs.WorkerInMemoryWrite):
        op = InMemoryWriteOperation(operation_name, spec, counter_factory,
                                    state_sampler)
    elif isinstance(spec, operation_specs.WorkerShuffleWrite):
        from dataflow_worker.shuffle_operations import ShuffleWriteOperation
        op = ShuffleWriteOperation(operation_name,
                                   spec,
                                   counter_factory,
                                   state_sampler,
                                   shuffle_sink=test_shuffle_sink)
    elif isinstance(spec, operation_specs.WorkerFlatten):
        op = FlattenOperation(operation_name, spec, counter_factory,
                              state_sampler)
    elif isinstance(spec, operation_specs.WorkerMergeWindows):
        from dataflow_worker.shuffle_operations import BatchGroupAlsoByWindowsOperation
        from dataflow_worker.shuffle_operations import StreamingGroupAlsoByWindowsOperation
        if is_streaming:
            op = StreamingGroupAlsoByWindowsOperation(operation_name, spec,
                                                      counter_factory,
                                                      state_sampler)
        else:
            op = BatchGroupAlsoByWindowsOperation(operation_name, spec,
                                                  counter_factory,
                                                  state_sampler)
    elif isinstance(spec, operation_specs.WorkerReifyTimestampAndWindows):
        from dataflow_worker.shuffle_operations import ReifyTimestampAndWindowsOperation
        op = ReifyTimestampAndWindowsOperation(operation_name, spec,
                                               counter_factory, state_sampler)
    else:
        raise TypeError(
            'Expected an instance of operation_specs.Worker* class '
            'instead of %s' % (spec, ))
    op.step_name = step_name
    op.metrics_container = MetricsContainer(step_name)
    op.scoped_metrics_container = ScopedMetricsContainer(op.metrics_container)
    return op

Example #22

0

Show file

 def test_add_to_counter(self):
   mc = MetricsContainer('astep')
   counter = mc.get_counter(MetricName('namespace', 'name'))
   counter.inc()
   counter = mc.get_counter(MetricName('namespace', 'name'))
   self.assertEqual(counter.value, 1)

Example #23

0

Show file

 def test_create_new_counter(self):
   mc = MetricsContainer('astep')
   self.assertFalse(MetricName('namespace', 'name') in mc.counters)
   mc.get_counter(MetricName('namespace', 'name'))
   self.assertTrue(MetricName('namespace', 'name') in mc.counters)

Example #24

0

Show file

class Operation(object):
    """An operation representing the live version of a work item specification.

  An operation can have one or more outputs and for each output it can have
  one or more receiver operations that will take that as input.
  """
    def __init__(
            self,
            name_context,  # type: Union[str, common.NameContext]
            spec,
            counter_factory,
            state_sampler  # type: StateSampler
    ):
        """Initializes a worker operation instance.

    Args:
      name_context: A NameContext instance or string(deprecated), with the
        name information for this operation.
      spec: A operation_specs.Worker* instance.
      counter_factory: The CounterFactory to use for our counters.
      state_sampler: The StateSampler for the current operation.
    """
        if isinstance(name_context, common.NameContext):
            # TODO(BEAM-4028): Clean this up once it's completely migrated.
            # We use the specific operation name that is used for metrics and state
            # sampling.
            self.name_context = name_context
        else:
            self.name_context = common.NameContext(name_context)

        self.spec = spec
        self.counter_factory = counter_factory
        self.execution_context = None  # type: Optional[ExecutionContext]
        self.consumers = collections.defaultdict(
            list)  # type: DefaultDict[int, List[Operation]]

        # These are overwritten in the legacy harness.
        self.metrics_container = MetricsContainer(
            self.name_context.metrics_name())

        self.state_sampler = state_sampler
        self.scoped_start_state = self.state_sampler.scoped_state(
            self.name_context,
            'start',
            metrics_container=self.metrics_container)
        self.scoped_process_state = self.state_sampler.scoped_state(
            self.name_context,
            'process',
            metrics_container=self.metrics_container)
        self.scoped_finish_state = self.state_sampler.scoped_state(
            self.name_context,
            'finish',
            metrics_container=self.metrics_container)
        # TODO(ccy): the '-abort' state can be added when the abort is supported in
        # Operations.
        self.receivers = []  # type: List[ConsumerSet]
        # Legacy workers cannot call setup() until after setting additional state
        # on the operation.
        self.setup_done = False
        self.step_name = None  # type: Optional[str]

    def setup(self):
        # type: () -> None
        """Set up operation.

    This must be called before any other methods of the operation."""
        with self.scoped_start_state:
            self.debug_logging_enabled = logging.getLogger().isEnabledFor(
                logging.DEBUG)
            # Everything except WorkerSideInputSource, which is not a
            # top-level operation, should have output_coders
            #TODO(pabloem): Define better what step name is used here.
            if getattr(self.spec, 'output_coders', None):
                self.receivers = [
                    ConsumerSet.create(self.counter_factory,
                                       self.name_context.logging_name(), i,
                                       self.consumers[i], coder)
                    for i, coder in enumerate(self.spec.output_coders)
                ]
        self.setup_done = True

    def start(self):
        # type: () -> None
        """Start operation."""
        if not self.setup_done:
            # For legacy workers.
            self.setup()

    def process(self, o):
        # type: (WindowedValue) -> None
        """Process element in operation."""
        pass

    def finalize_bundle(self):
        # type: () -> None
        pass

    def needs_finalization(self):
        return False

    def try_split(self, fraction_of_remainder):
        # type: (...) -> Optional[Any]
        return None

    def current_element_progress(self):
        return None

    def finish(self):
        # type: () -> None
        """Finish operation."""
        pass

    def teardown(self):
        # type: () -> None
        """Tear down operation.

    No other methods of this operation should be called after this."""
        pass

    def reset(self):
        # type: () -> None
        self.metrics_container.reset()

    def output(self, windowed_value, output_index=0):
        # type: (WindowedValue, int) -> None
        cython.cast(Receiver,
                    self.receivers[output_index]).receive(windowed_value)

    def add_receiver(self, operation, output_index=0):
        # type: (Operation, int) -> None
        """Adds a receiver operation for the specified output."""
        self.consumers[output_index].append(operation)

    def progress_metrics(self):
        # type: () -> beam_fn_api_pb2.Metrics.PTransform
        return beam_fn_api_pb2.Metrics.PTransform(
            processed_elements=beam_fn_api_pb2.Metrics.PTransform.
            ProcessedElements(
                measured=beam_fn_api_pb2.Metrics.PTransform.Measured(
                    total_time_spent=(
                        self.scoped_start_state.sampled_seconds() +
                        self.scoped_process_state.sampled_seconds() +
                        self.scoped_finish_state.sampled_seconds()),
                    # Multi-output operations should override this.
                    output_element_counts=(
                        # If there is exactly one output, we can unambiguously
                        # fix its name later, which we do.
                        # TODO(robertwb): Plumb the actual name here.
                        {
                            'ONLY_OUTPUT':
                            self.receivers[0].opcounter.element_counter.value(
                            )
                        } if len(self.receivers) == 1 else None))),
            user=self.metrics_container.to_runner_api())

    def monitoring_infos(self, transform_id):
        # type: (str) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo]
        """Returns the list of MonitoringInfos collected by this operation."""
        all_monitoring_infos = self.execution_time_monitoring_infos(
            transform_id)
        all_monitoring_infos.update(
            self.pcollection_count_monitoring_infos(transform_id))
        all_monitoring_infos.update(self.user_monitoring_infos(transform_id))
        return all_monitoring_infos

    def pcollection_count_monitoring_infos(self, transform_id):
        """Returns the element count MonitoringInfo collected by this operation."""
        if len(self.receivers) == 1:
            # If there is exactly one output, we can unambiguously
            # fix its name later, which we do.
            # TODO(robertwb): Plumb the actual name here.
            elem_count_mi = monitoring_infos.int64_counter(
                monitoring_infos.ELEMENT_COUNT_URN,
                self.receivers[0].opcounter.element_counter.value(),
                ptransform=transform_id,
                tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None),
            )

            (unused_mean, sum, count, min,
             max) = (self.receivers[0].opcounter.mean_byte_counter.value())
            metric = metrics_pb2.Metric(
                distribution_data=metrics_pb2.DistributionData(
                    int_distribution_data=metrics_pb2.IntDistributionData(
                        count=count, sum=sum, min=min, max=max)))
            sampled_byte_count = monitoring_infos.int64_distribution(
                monitoring_infos.SAMPLED_BYTE_SIZE_URN,
                metric,
                ptransform=transform_id,
                tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None),
            )
            return {
                monitoring_infos.to_key(elem_count_mi): elem_count_mi,
                monitoring_infos.to_key(sampled_byte_count): sampled_byte_count
            }
        return {}

    def user_monitoring_infos(self, transform_id):
        """Returns the user MonitoringInfos collected by this operation."""
        return self.metrics_container.to_runner_api_monitoring_infos(
            transform_id)

    def execution_time_monitoring_infos(self, transform_id):
        # type: (str) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo]
        total_time_spent_msecs = (
            self.scoped_start_state.sampled_msecs_int() +
            self.scoped_process_state.sampled_msecs_int() +
            self.scoped_finish_state.sampled_msecs_int())
        mis = [
            monitoring_infos.int64_counter(
                monitoring_infos.START_BUNDLE_MSECS_URN,
                self.scoped_start_state.sampled_msecs_int(),
                ptransform=transform_id),
            monitoring_infos.int64_counter(
                monitoring_infos.PROCESS_BUNDLE_MSECS_URN,
                self.scoped_process_state.sampled_msecs_int(),
                ptransform=transform_id),
            monitoring_infos.int64_counter(
                monitoring_infos.FINISH_BUNDLE_MSECS_URN,
                self.scoped_finish_state.sampled_msecs_int(),
                ptransform=transform_id),
            monitoring_infos.int64_counter(monitoring_infos.TOTAL_MSECS_URN,
                                           total_time_spent_msecs,
                                           ptransform=transform_id),
        ]
        return {monitoring_infos.to_key(mi): mi for mi in mis}

    def __str__(self):
        """Generates a useful string for this object.

    Compactly displays interesting fields.  In particular, pickled
    fields are not displayed.  Note that we collapse the fields of the
    contained Worker* object into this object, since there is a 1-1
    mapping between Operation and operation_specs.Worker*.

    Returns:
      Compact string representing this object.
    """
        return self.str_internal()

    def str_internal(self, is_recursive=False):
        """Internal helper for __str__ that supports recursion.

    When recursing on receivers, keep the output short.
    Args:
      is_recursive: whether to omit some details, particularly receivers.
    Returns:
      Compact string representing this object.
    """
        printable_name = self.__class__.__name__
        if hasattr(self, 'step_name'):
            printable_name += ' %s' % self.name_context.logging_name()
            if is_recursive:
                # If we have a step name, stop here, no more detail needed.
                return '<%s>' % printable_name

        if self.spec is None:
            printable_fields = []
        else:
            printable_fields = operation_specs.worker_printable_fields(
                self.spec)

        if not is_recursive and getattr(self, 'receivers', []):
            printable_fields.append(
                'receivers=[%s]' %
                ', '.join([str(receiver) for receiver in self.receivers]))

        return '<%s %s>' % (printable_name, ', '.join(printable_fields))