Example #1
0
    def call(self):
        self._call_count += 1
        assert self._call_count <= (1 +
                                    len(self._applied_transform.side_inputs))
        metrics_container = MetricsContainer(
            self._applied_transform.full_label)
        scoped_metrics_container = ScopedMetricsContainer(metrics_container)

        for side_input in self._applied_transform.side_inputs:
            if side_input not in self._side_input_values:
                has_result, value = (self._evaluation_context.
                                     get_value_or_schedule_after_output(
                                         side_input, self))
                if not has_result:
                    # Monitor task will reschedule this executor once the side input is
                    # available.
                    return
                self._side_input_values[side_input] = value

        side_input_values = [
            self._side_input_values[side_input]
            for side_input in self._applied_transform.side_inputs
        ]

        try:
            evaluator = self._transform_evaluator_registry.for_application(
                self._applied_transform, self._input_bundle, side_input_values,
                scoped_metrics_container)

            if self._input_bundle:
                for value in self._input_bundle.get_elements_iterable():
                    evaluator.process_element(value)

            with scoped_metrics_container:
                result = evaluator.finish_bundle()
                result.logical_metric_updates = metrics_container.get_cumulative(
                )

            if self._evaluation_context.has_cache:
                for uncommitted_bundle in result.uncommitted_output_bundles:
                    self._evaluation_context.append_to_cache(
                        self._applied_transform, uncommitted_bundle.tag,
                        uncommitted_bundle.get_elements_iterable())
                undeclared_tag_values = result.undeclared_tag_values
                if undeclared_tag_values:
                    for tag, value in undeclared_tag_values.iteritems():
                        self._evaluation_context.append_to_cache(
                            self._applied_transform, tag, value)

            self._completion_callback.handle_result(self._input_bundle, result)
            return result
        except Exception as e:  # pylint: disable=broad-except
            logging.warning('Task failed: %s',
                            traceback.format_exc(),
                            exc_info=True)
            self._completion_callback.handle_exception(e)
        finally:
            self._evaluation_context.metrics().commit_physical(
                self._input_bundle, metrics_container.get_cumulative())
            self._transform_evaluation_state.complete(self)
Example #2
0
  def call(self):
    self._call_count += 1
    assert self._call_count <= (1 + len(self._applied_ptransform.side_inputs))
    metrics_container = MetricsContainer(self._applied_ptransform.full_label)
    scoped_metrics_container = ScopedMetricsContainer(metrics_container)

    for side_input in self._applied_ptransform.side_inputs:
      if side_input not in self._side_input_values:
        has_result, value = (
            self._evaluation_context.get_value_or_schedule_after_output(
                side_input, self))
        if not has_result:
          # Monitor task will reschedule this executor once the side input is
          # available.
          return
        self._side_input_values[side_input] = value

    side_input_values = [self._side_input_values[side_input]
                         for side_input in self._applied_ptransform.side_inputs]

    try:
      evaluator = self._transform_evaluator_registry.get_evaluator(
          self._applied_ptransform, self._input_bundle,
          side_input_values, scoped_metrics_container)

      if self._fired_timers:
        for timer_firing in self._fired_timers:
          evaluator.process_timer_wrapper(timer_firing)

      if self._input_bundle:
        for value in self._input_bundle.get_elements_iterable():
          evaluator.process_element(value)

      with scoped_metrics_container:
        result = evaluator.finish_bundle()
        result.logical_metric_updates = metrics_container.get_cumulative()

      if self._evaluation_context.has_cache:
        for uncommitted_bundle in result.uncommitted_output_bundles:
          self._evaluation_context.append_to_cache(
              self._applied_ptransform, uncommitted_bundle.tag,
              uncommitted_bundle.get_elements_iterable())
        undeclared_tag_values = result.undeclared_tag_values
        if undeclared_tag_values:
          for tag, value in undeclared_tag_values.iteritems():
            self._evaluation_context.append_to_cache(
                self._applied_ptransform, tag, value)

      self._completion_callback.handle_result(self, self._input_bundle, result)
      return result
    except Exception as e:  # pylint: disable=broad-except
      self._completion_callback.handle_exception(self, e)
    finally:
      self._evaluation_context.metrics().commit_physical(
          self._input_bundle,
          metrics_container.get_cumulative())
      self._transform_evaluation_state.complete(self)
Example #3
0
    def test_uses_right_container(self):
        c1 = MetricsContainer('step1')
        c2 = MetricsContainer('step2')
        counter = Metrics.counter('ns', 'name')
        MetricsEnvironment.set_current_container(c1)
        counter.inc()
        MetricsEnvironment.set_current_container(c2)
        counter.inc(3)
        MetricsEnvironment.unset_current_container()

        self.assertEqual(list(c1.get_cumulative().counters.items()),
                         [(MetricKey('step1', MetricName('ns', 'name')), 1)])

        self.assertEqual(list(c2.get_cumulative().counters.items()),
                         [(MetricKey('step2', MetricName('ns', 'name')), 3)])
Example #4
0
    def call(self, state_sampler):
        self._call_count += 1
        assert self._call_count <= (1 +
                                    len(self._applied_ptransform.side_inputs))
        metrics_container = MetricsContainer(
            self._applied_ptransform.full_label)
        start_state = state_sampler.scoped_state(
            self._applied_ptransform.full_label,
            'start',
            metrics_container=metrics_container)
        process_state = state_sampler.scoped_state(
            self._applied_ptransform.full_label,
            'process',
            metrics_container=metrics_container)
        finish_state = state_sampler.scoped_state(
            self._applied_ptransform.full_label,
            'finish',
            metrics_container=metrics_container)

        with start_state:
            # Side input initialization should be accounted for in start_state.
            for side_input in self._applied_ptransform.side_inputs:
                # Find the projection of main's window onto the side input's window.
                window_mapping_fn = side_input._view_options().get(
                    'window_mapping_fn', sideinputs._global_window_mapping_fn)
                main_onto_side_window = window_mapping_fn(
                    self._latest_main_input_window)
                block_until = main_onto_side_window.end

                if side_input not in self._side_input_values:
                    value = self._evaluation_context.get_value_or_block_until_ready(
                        side_input, self, block_until)
                    if not value:
                        # Monitor task will reschedule this executor once the side input is
                        # available.
                        return
                    self._side_input_values[side_input] = value
            side_input_values = [
                self._side_input_values[side_input]
                for side_input in self._applied_ptransform.side_inputs
            ]

        while self._retry_count < self._max_retries_per_bundle:
            try:
                self.attempt_call(metrics_container, side_input_values,
                                  start_state, process_state, finish_state)
                break
            except Exception as e:
                self._retry_count += 1
                _LOGGER.error(
                    'Exception at bundle %r, due to an exception.\n %s',
                    self._input_bundle, traceback.format_exc())
                if self._retry_count == self._max_retries_per_bundle:
                    _LOGGER.error('Giving up after %s attempts.',
                                  self._max_retries_per_bundle)
                    self._completion_callback.handle_exception(self, e)

        self._evaluation_context.metrics().commit_physical(
            self._input_bundle, metrics_container.get_cumulative())
        self._transform_evaluation_state.complete(self)
  def test_get_cumulative_or_updates(self):
    mc = MetricsContainer('astep')

    all_values = []
    for i in range(1, 11):
      counter = mc.get_counter(MetricName('namespace', 'name{}'.format(i)))
      distribution = mc.get_distribution(
          MetricName('namespace', 'name{}'.format(i)))
      gauge = mc.get_gauge(MetricName('namespace', 'name{}'.format(i)))

      counter.inc(i)
      distribution.update(i)
      gauge.set(i)
      all_values.append(i)

    # Retrieve ALL updates.
    cumulative = mc.get_cumulative()
    self.assertEqual(len(cumulative.counters), 10)
    self.assertEqual(len(cumulative.distributions), 10)
    self.assertEqual(len(cumulative.gauges), 10)

    self.assertEqual(
        set(all_values), set([v for _, v in cumulative.counters.items()]))
    self.assertEqual(
        set(all_values), set([v.value for _, v in cumulative.gauges.items()]))
  def test_uses_right_container(self):
    c1 = MetricsContainer('step1')
    c2 = MetricsContainer('step2')
    counter = Metrics.counter('ns', 'name')
    MetricsEnvironment.set_current_container(c1)
    counter.inc()
    MetricsEnvironment.set_current_container(c2)
    counter.inc(3)
    MetricsEnvironment.unset_current_container()

    self.assertEqual(
        c1.get_cumulative().counters.items(),
        [(MetricKey('step1', MetricName('ns', 'name')), 1)])

    self.assertEqual(
        c2.get_cumulative().counters.items(),
        [(MetricKey('step2', MetricName('ns', 'name')), 3)])
Example #7
0
    def test_get_cumulative_or_updates(self):
        mc = MetricsContainer('astep')

        clean_values = []
        dirty_values = []
        for i in range(1, 11):
            counter = mc.get_counter(
                MetricName('namespace', 'name{}'.format(i)))
            distribution = mc.get_distribution(
                MetricName('namespace', 'name{}'.format(i)))
            gauge = mc.get_gauge(MetricName('namespace', 'name{}'.format(i)))

            counter.inc(i)
            distribution.update(i)
            gauge.set(i)
            if i % 2 == 0:
                # Some are left to be DIRTY (i.e. not yet committed).
                # Some are left to be CLEAN (i.e. already committed).
                dirty_values.append(i)
                continue
            # Assert: Counter/Distribution is DIRTY or COMMITTING (not CLEAN)
            self.assertEqual(distribution.commit.before_commit(), True)
            self.assertEqual(counter.commit.before_commit(), True)
            self.assertEqual(gauge.commit.before_commit(), True)
            distribution.commit.after_commit()
            counter.commit.after_commit()
            gauge.commit.after_commit()
            # Assert: Counter/Distribution has been committed, therefore it's CLEAN
            self.assertEqual(counter.commit.state, CellCommitState.CLEAN)
            self.assertEqual(distribution.commit.state, CellCommitState.CLEAN)
            self.assertEqual(gauge.commit.state, CellCommitState.CLEAN)
            clean_values.append(i)

        # Retrieve NON-COMMITTED updates.
        logical = mc.get_updates()
        self.assertEqual(len(logical.counters), 5)
        self.assertEqual(len(logical.distributions), 5)
        self.assertEqual(len(logical.gauges), 5)

        self.assertEqual(set(dirty_values),
                         set([v.value for _, v in logical.gauges.items()]))
        self.assertEqual(set(dirty_values),
                         set([v for _, v in logical.counters.items()]))

        # Retrieve ALL updates.
        cumulative = mc.get_cumulative()
        self.assertEqual(len(cumulative.counters), 10)
        self.assertEqual(len(cumulative.distributions), 10)
        self.assertEqual(len(cumulative.gauges), 10)

        self.assertEqual(set(dirty_values + clean_values),
                         set([v for _, v in cumulative.counters.items()]))
        self.assertEqual(set(dirty_values + clean_values),
                         set([v.value for _, v in cumulative.gauges.items()]))
Example #8
0
  def test_get_cumulative_or_updates(self):
    mc = MetricsContainer('astep')

    clean_values = []
    dirty_values = []
    for i in range(1, 11):
      counter = mc.get_counter(MetricName('namespace', 'name{}'.format(i)))
      distribution = mc.get_distribution(
          MetricName('namespace', 'name{}'.format(i)))
      gauge = mc.get_gauge(MetricName('namespace', 'name{}'.format(i)))

      counter.inc(i)
      distribution.update(i)
      gauge.set(i)
      if i % 2 == 0:
        # Some are left to be DIRTY (i.e. not yet committed).
        # Some are left to be CLEAN (i.e. already committed).
        dirty_values.append(i)
        continue
      # Assert: Counter/Distribution is DIRTY or COMMITTING (not CLEAN)
      self.assertEqual(distribution.commit.before_commit(), True)
      self.assertEqual(counter.commit.before_commit(), True)
      self.assertEqual(gauge.commit.before_commit(), True)
      distribution.commit.after_commit()
      counter.commit.after_commit()
      gauge.commit.after_commit()
      # Assert: Counter/Distribution has been committed, therefore it's CLEAN
      self.assertEqual(counter.commit.state, CellCommitState.CLEAN)
      self.assertEqual(distribution.commit.state, CellCommitState.CLEAN)
      self.assertEqual(gauge.commit.state, CellCommitState.CLEAN)
      clean_values.append(i)

    # Retrieve NON-COMMITTED updates.
    logical = mc.get_updates()
    self.assertEqual(len(logical.counters), 5)
    self.assertEqual(len(logical.distributions), 5)
    self.assertEqual(len(logical.gauges), 5)

    self.assertEqual(set(dirty_values),
                     set([v.value for _, v in logical.gauges.items()]))
    self.assertEqual(set(dirty_values),
                     set([v for _, v in logical.counters.items()]))

    # Retrieve ALL updates.
    cumulative = mc.get_cumulative()
    self.assertEqual(len(cumulative.counters), 10)
    self.assertEqual(len(cumulative.distributions), 10)
    self.assertEqual(len(cumulative.gauges), 10)

    self.assertEqual(set(dirty_values + clean_values),
                     set([v for _, v in cumulative.counters.items()]))
    self.assertEqual(set(dirty_values + clean_values),
                     set([v.value for _, v in cumulative.gauges.items()]))
Example #9
0
    def test_scoped_container(self):
        c1 = MetricsContainer('mystep')
        c2 = MetricsContainer('myinternalstep')
        with ScopedMetricsContainer(c1):
            self.assertEqual(c1, MetricsEnvironment.current_container())
            counter = Metrics.counter('ns', 'name')
            counter.inc(2)

            with ScopedMetricsContainer(c2):
                self.assertEqual(c2, MetricsEnvironment.current_container())
                counter = Metrics.counter('ns', 'name')
                counter.inc(3)
                self.assertEqual(list(c2.get_cumulative().counters.items()),
                                 [(MetricKey('myinternalstep',
                                             MetricName('ns', 'name')), 3)])

            self.assertEqual(c1, MetricsEnvironment.current_container())
            counter = Metrics.counter('ns', 'name')
            counter.inc(4)
            self.assertEqual(
                list(c1.get_cumulative().counters.items()),
                [(MetricKey('mystep', MetricName('ns', 'name')), 6)])
Example #10
0
  def test_scoped_container(self):
    c1 = MetricsContainer('mystep')
    c2 = MetricsContainer('myinternalstep')
    with ScopedMetricsContainer(c1):
      self.assertEqual(c1, MetricsEnvironment.current_container())
      counter = Metrics.counter('ns', 'name')
      counter.inc(2)

      with ScopedMetricsContainer(c2):
        self.assertEqual(c2, MetricsEnvironment.current_container())
        counter = Metrics.counter('ns', 'name')
        counter.inc(3)
        self.assertEqual(
            c2.get_cumulative().counters.items(),
            [(MetricKey('myinternalstep', MetricName('ns', 'name')), 3)])

      self.assertEqual(c1, MetricsEnvironment.current_container())
      counter = Metrics.counter('ns', 'name')
      counter.inc(4)
      self.assertEqual(
          c1.get_cumulative().counters.items(),
          [(MetricKey('mystep', MetricName('ns', 'name')), 6)])
Example #11
0
    def call(self):
        self._call_count += 1
        assert self._call_count <= (1 +
                                    len(self._applied_ptransform.side_inputs))
        metrics_container = MetricsContainer(
            self._applied_ptransform.full_label)
        scoped_metrics_container = ScopedMetricsContainer(metrics_container)

        for side_input in self._applied_ptransform.side_inputs:
            if side_input not in self._side_input_values:
                has_result, value = (self._evaluation_context.
                                     get_value_or_schedule_after_output(
                                         side_input, self))
                if not has_result:
                    # Monitor task will reschedule this executor once the side input is
                    # available.
                    return
                self._side_input_values[side_input] = value
        side_input_values = [
            self._side_input_values[side_input]
            for side_input in self._applied_ptransform.side_inputs
        ]

        while self._retry_count < self._max_retries_per_bundle:
            try:
                self.attempt_call(metrics_container, scoped_metrics_container,
                                  side_input_values)
                break
            except Exception as e:
                self._retry_count += 1
                logging.error(
                    'Exception at bundle %r, due to an exception.\n %s',
                    self._input_bundle, traceback.format_exc())
                if self._retry_count == self._max_retries_per_bundle:
                    logging.error('Giving up after %s attempts.',
                                  self._max_retries_per_bundle)
                    if self._retry_count == 1:
                        logging.info(
                            'Use the experimental flag --direct_runner_bundle_retry'
                            ' to retry failed bundles (up to %d times).',
                            TransformExecutor._MAX_RETRY_PER_BUNDLE)
                    self._completion_callback.handle_exception(self, e)

        self._evaluation_context.metrics().commit_physical(
            self._input_bundle, metrics_container.get_cumulative())
        self._transform_evaluation_state.complete(self)
Example #12
0
  def call(self):
    self._call_count += 1
    assert self._call_count <= (1 + len(self._applied_ptransform.side_inputs))
    metrics_container = MetricsContainer(self._applied_ptransform.full_label)
    scoped_metrics_container = ScopedMetricsContainer(metrics_container)

    for side_input in self._applied_ptransform.side_inputs:
      if side_input not in self._side_input_values:
        has_result, value = (
            self._evaluation_context.get_value_or_schedule_after_output(
                side_input, self))
        if not has_result:
          # Monitor task will reschedule this executor once the side input is
          # available.
          return
        self._side_input_values[side_input] = value
    side_input_values = [self._side_input_values[side_input]
                         for side_input in self._applied_ptransform.side_inputs]

    while self._retry_count < self._max_retries_per_bundle:
      try:
        self.attempt_call(metrics_container,
                          scoped_metrics_container,
                          side_input_values)
        break
      except Exception as e:
        self._retry_count += 1
        logging.error(
            'Exception at bundle %r, due to an exception.\n %s',
            self._input_bundle, traceback.format_exc())
        if self._retry_count == self._max_retries_per_bundle:
          logging.error('Giving up after %s attempts.',
                        self._max_retries_per_bundle)
          if self._retry_count == 1:
            logging.info(
                'Use the experimental flag --direct_runner_bundle_retry'
                ' to retry failed bundles (up to %d times).',
                TransformExecutor._MAX_RETRY_PER_BUNDLE)
          self._completion_callback.handle_exception(self, e)

    self._evaluation_context.metrics().commit_physical(
        self._input_bundle,
        metrics_container.get_cumulative())
    self._transform_evaluation_state.complete(self)
Example #13
0
  def call(self):
    self._call_count += 1
    assert self._call_count <= (1 + len(self._applied_ptransform.side_inputs))
    metrics_container = MetricsContainer(self._applied_ptransform.full_label)
    scoped_metrics_container = ScopedMetricsContainer(metrics_container)

    for side_input in self._applied_ptransform.side_inputs:
      # Find the projection of main's window onto the side input's window.
      window_mapping_fn = side_input._view_options().get(
          'window_mapping_fn', sideinputs._global_window_mapping_fn)
      main_onto_side_window = window_mapping_fn(self._latest_main_input_window)
      block_until = main_onto_side_window.end

      if side_input not in self._side_input_values:
        value = self._evaluation_context.get_value_or_block_until_ready(
            side_input, self, block_until)
        if not value:
          # Monitor task will reschedule this executor once the side input is
          # available.
          return
        self._side_input_values[side_input] = value
    side_input_values = [self._side_input_values[side_input]
                         for side_input in self._applied_ptransform.side_inputs]

    while self._retry_count < self._max_retries_per_bundle:
      try:
        self.attempt_call(metrics_container,
                          scoped_metrics_container,
                          side_input_values)
        break
      except Exception as e:
        self._retry_count += 1
        logging.error(
            'Exception at bundle %r, due to an exception.\n %s',
            self._input_bundle, traceback.format_exc())
        if self._retry_count == self._max_retries_per_bundle:
          logging.error('Giving up after %s attempts.',
                        self._max_retries_per_bundle)
          self._completion_callback.handle_exception(self, e)

    self._evaluation_context.metrics().commit_physical(
        self._input_bundle,
        metrics_container.get_cumulative())
    self._transform_evaluation_state.complete(self)