def call(self): self._call_count += 1 assert self._call_count <= (1 + len(self._applied_transform.side_inputs)) metrics_container = MetricsContainer( self._applied_transform.full_label) scoped_metrics_container = ScopedMetricsContainer(metrics_container) for side_input in self._applied_transform.side_inputs: if side_input not in self._side_input_values: has_result, value = (self._evaluation_context. get_value_or_schedule_after_output( side_input, self)) if not has_result: # Monitor task will reschedule this executor once the side input is # available. return self._side_input_values[side_input] = value side_input_values = [ self._side_input_values[side_input] for side_input in self._applied_transform.side_inputs ] try: evaluator = self._transform_evaluator_registry.for_application( self._applied_transform, self._input_bundle, side_input_values, scoped_metrics_container) if self._input_bundle: for value in self._input_bundle.get_elements_iterable(): evaluator.process_element(value) with scoped_metrics_container: result = evaluator.finish_bundle() result.logical_metric_updates = metrics_container.get_cumulative( ) if self._evaluation_context.has_cache: for uncommitted_bundle in result.uncommitted_output_bundles: self._evaluation_context.append_to_cache( self._applied_transform, uncommitted_bundle.tag, uncommitted_bundle.get_elements_iterable()) undeclared_tag_values = result.undeclared_tag_values if undeclared_tag_values: for tag, value in undeclared_tag_values.iteritems(): self._evaluation_context.append_to_cache( self._applied_transform, tag, value) self._completion_callback.handle_result(self._input_bundle, result) return result except Exception as e: # pylint: disable=broad-except logging.warning('Task failed: %s', traceback.format_exc(), exc_info=True) self._completion_callback.handle_exception(e) finally: self._evaluation_context.metrics().commit_physical( self._input_bundle, metrics_container.get_cumulative()) self._transform_evaluation_state.complete(self)
def call(self): self._call_count += 1 assert self._call_count <= (1 + len(self._applied_ptransform.side_inputs)) metrics_container = MetricsContainer(self._applied_ptransform.full_label) scoped_metrics_container = ScopedMetricsContainer(metrics_container) for side_input in self._applied_ptransform.side_inputs: if side_input not in self._side_input_values: has_result, value = ( self._evaluation_context.get_value_or_schedule_after_output( side_input, self)) if not has_result: # Monitor task will reschedule this executor once the side input is # available. return self._side_input_values[side_input] = value side_input_values = [self._side_input_values[side_input] for side_input in self._applied_ptransform.side_inputs] try: evaluator = self._transform_evaluator_registry.get_evaluator( self._applied_ptransform, self._input_bundle, side_input_values, scoped_metrics_container) if self._fired_timers: for timer_firing in self._fired_timers: evaluator.process_timer_wrapper(timer_firing) if self._input_bundle: for value in self._input_bundle.get_elements_iterable(): evaluator.process_element(value) with scoped_metrics_container: result = evaluator.finish_bundle() result.logical_metric_updates = metrics_container.get_cumulative() if self._evaluation_context.has_cache: for uncommitted_bundle in result.uncommitted_output_bundles: self._evaluation_context.append_to_cache( self._applied_ptransform, uncommitted_bundle.tag, uncommitted_bundle.get_elements_iterable()) undeclared_tag_values = result.undeclared_tag_values if undeclared_tag_values: for tag, value in undeclared_tag_values.iteritems(): self._evaluation_context.append_to_cache( self._applied_ptransform, tag, value) self._completion_callback.handle_result(self, self._input_bundle, result) return result except Exception as e: # pylint: disable=broad-except self._completion_callback.handle_exception(self, e) finally: self._evaluation_context.metrics().commit_physical( self._input_bundle, metrics_container.get_cumulative()) self._transform_evaluation_state.complete(self)
def test_uses_right_container(self): c1 = MetricsContainer('step1') c2 = MetricsContainer('step2') counter = Metrics.counter('ns', 'name') MetricsEnvironment.set_current_container(c1) counter.inc() MetricsEnvironment.set_current_container(c2) counter.inc(3) MetricsEnvironment.unset_current_container() self.assertEqual(list(c1.get_cumulative().counters.items()), [(MetricKey('step1', MetricName('ns', 'name')), 1)]) self.assertEqual(list(c2.get_cumulative().counters.items()), [(MetricKey('step2', MetricName('ns', 'name')), 3)])
def call(self, state_sampler): self._call_count += 1 assert self._call_count <= (1 + len(self._applied_ptransform.side_inputs)) metrics_container = MetricsContainer( self._applied_ptransform.full_label) start_state = state_sampler.scoped_state( self._applied_ptransform.full_label, 'start', metrics_container=metrics_container) process_state = state_sampler.scoped_state( self._applied_ptransform.full_label, 'process', metrics_container=metrics_container) finish_state = state_sampler.scoped_state( self._applied_ptransform.full_label, 'finish', metrics_container=metrics_container) with start_state: # Side input initialization should be accounted for in start_state. for side_input in self._applied_ptransform.side_inputs: # Find the projection of main's window onto the side input's window. window_mapping_fn = side_input._view_options().get( 'window_mapping_fn', sideinputs._global_window_mapping_fn) main_onto_side_window = window_mapping_fn( self._latest_main_input_window) block_until = main_onto_side_window.end if side_input not in self._side_input_values: value = self._evaluation_context.get_value_or_block_until_ready( side_input, self, block_until) if not value: # Monitor task will reschedule this executor once the side input is # available. return self._side_input_values[side_input] = value side_input_values = [ self._side_input_values[side_input] for side_input in self._applied_ptransform.side_inputs ] while self._retry_count < self._max_retries_per_bundle: try: self.attempt_call(metrics_container, side_input_values, start_state, process_state, finish_state) break except Exception as e: self._retry_count += 1 _LOGGER.error( 'Exception at bundle %r, due to an exception.\n %s', self._input_bundle, traceback.format_exc()) if self._retry_count == self._max_retries_per_bundle: _LOGGER.error('Giving up after %s attempts.', self._max_retries_per_bundle) self._completion_callback.handle_exception(self, e) self._evaluation_context.metrics().commit_physical( self._input_bundle, metrics_container.get_cumulative()) self._transform_evaluation_state.complete(self)
def test_get_cumulative_or_updates(self): mc = MetricsContainer('astep') all_values = [] for i in range(1, 11): counter = mc.get_counter(MetricName('namespace', 'name{}'.format(i))) distribution = mc.get_distribution( MetricName('namespace', 'name{}'.format(i))) gauge = mc.get_gauge(MetricName('namespace', 'name{}'.format(i))) counter.inc(i) distribution.update(i) gauge.set(i) all_values.append(i) # Retrieve ALL updates. cumulative = mc.get_cumulative() self.assertEqual(len(cumulative.counters), 10) self.assertEqual(len(cumulative.distributions), 10) self.assertEqual(len(cumulative.gauges), 10) self.assertEqual( set(all_values), set([v for _, v in cumulative.counters.items()])) self.assertEqual( set(all_values), set([v.value for _, v in cumulative.gauges.items()]))
def test_uses_right_container(self): c1 = MetricsContainer('step1') c2 = MetricsContainer('step2') counter = Metrics.counter('ns', 'name') MetricsEnvironment.set_current_container(c1) counter.inc() MetricsEnvironment.set_current_container(c2) counter.inc(3) MetricsEnvironment.unset_current_container() self.assertEqual( c1.get_cumulative().counters.items(), [(MetricKey('step1', MetricName('ns', 'name')), 1)]) self.assertEqual( c2.get_cumulative().counters.items(), [(MetricKey('step2', MetricName('ns', 'name')), 3)])
def test_get_cumulative_or_updates(self): mc = MetricsContainer('astep') clean_values = [] dirty_values = [] for i in range(1, 11): counter = mc.get_counter( MetricName('namespace', 'name{}'.format(i))) distribution = mc.get_distribution( MetricName('namespace', 'name{}'.format(i))) gauge = mc.get_gauge(MetricName('namespace', 'name{}'.format(i))) counter.inc(i) distribution.update(i) gauge.set(i) if i % 2 == 0: # Some are left to be DIRTY (i.e. not yet committed). # Some are left to be CLEAN (i.e. already committed). dirty_values.append(i) continue # Assert: Counter/Distribution is DIRTY or COMMITTING (not CLEAN) self.assertEqual(distribution.commit.before_commit(), True) self.assertEqual(counter.commit.before_commit(), True) self.assertEqual(gauge.commit.before_commit(), True) distribution.commit.after_commit() counter.commit.after_commit() gauge.commit.after_commit() # Assert: Counter/Distribution has been committed, therefore it's CLEAN self.assertEqual(counter.commit.state, CellCommitState.CLEAN) self.assertEqual(distribution.commit.state, CellCommitState.CLEAN) self.assertEqual(gauge.commit.state, CellCommitState.CLEAN) clean_values.append(i) # Retrieve NON-COMMITTED updates. logical = mc.get_updates() self.assertEqual(len(logical.counters), 5) self.assertEqual(len(logical.distributions), 5) self.assertEqual(len(logical.gauges), 5) self.assertEqual(set(dirty_values), set([v.value for _, v in logical.gauges.items()])) self.assertEqual(set(dirty_values), set([v for _, v in logical.counters.items()])) # Retrieve ALL updates. cumulative = mc.get_cumulative() self.assertEqual(len(cumulative.counters), 10) self.assertEqual(len(cumulative.distributions), 10) self.assertEqual(len(cumulative.gauges), 10) self.assertEqual(set(dirty_values + clean_values), set([v for _, v in cumulative.counters.items()])) self.assertEqual(set(dirty_values + clean_values), set([v.value for _, v in cumulative.gauges.items()]))
def test_get_cumulative_or_updates(self): mc = MetricsContainer('astep') clean_values = [] dirty_values = [] for i in range(1, 11): counter = mc.get_counter(MetricName('namespace', 'name{}'.format(i))) distribution = mc.get_distribution( MetricName('namespace', 'name{}'.format(i))) gauge = mc.get_gauge(MetricName('namespace', 'name{}'.format(i))) counter.inc(i) distribution.update(i) gauge.set(i) if i % 2 == 0: # Some are left to be DIRTY (i.e. not yet committed). # Some are left to be CLEAN (i.e. already committed). dirty_values.append(i) continue # Assert: Counter/Distribution is DIRTY or COMMITTING (not CLEAN) self.assertEqual(distribution.commit.before_commit(), True) self.assertEqual(counter.commit.before_commit(), True) self.assertEqual(gauge.commit.before_commit(), True) distribution.commit.after_commit() counter.commit.after_commit() gauge.commit.after_commit() # Assert: Counter/Distribution has been committed, therefore it's CLEAN self.assertEqual(counter.commit.state, CellCommitState.CLEAN) self.assertEqual(distribution.commit.state, CellCommitState.CLEAN) self.assertEqual(gauge.commit.state, CellCommitState.CLEAN) clean_values.append(i) # Retrieve NON-COMMITTED updates. logical = mc.get_updates() self.assertEqual(len(logical.counters), 5) self.assertEqual(len(logical.distributions), 5) self.assertEqual(len(logical.gauges), 5) self.assertEqual(set(dirty_values), set([v.value for _, v in logical.gauges.items()])) self.assertEqual(set(dirty_values), set([v for _, v in logical.counters.items()])) # Retrieve ALL updates. cumulative = mc.get_cumulative() self.assertEqual(len(cumulative.counters), 10) self.assertEqual(len(cumulative.distributions), 10) self.assertEqual(len(cumulative.gauges), 10) self.assertEqual(set(dirty_values + clean_values), set([v for _, v in cumulative.counters.items()])) self.assertEqual(set(dirty_values + clean_values), set([v.value for _, v in cumulative.gauges.items()]))
def test_scoped_container(self): c1 = MetricsContainer('mystep') c2 = MetricsContainer('myinternalstep') with ScopedMetricsContainer(c1): self.assertEqual(c1, MetricsEnvironment.current_container()) counter = Metrics.counter('ns', 'name') counter.inc(2) with ScopedMetricsContainer(c2): self.assertEqual(c2, MetricsEnvironment.current_container()) counter = Metrics.counter('ns', 'name') counter.inc(3) self.assertEqual(list(c2.get_cumulative().counters.items()), [(MetricKey('myinternalstep', MetricName('ns', 'name')), 3)]) self.assertEqual(c1, MetricsEnvironment.current_container()) counter = Metrics.counter('ns', 'name') counter.inc(4) self.assertEqual( list(c1.get_cumulative().counters.items()), [(MetricKey('mystep', MetricName('ns', 'name')), 6)])
def test_scoped_container(self): c1 = MetricsContainer('mystep') c2 = MetricsContainer('myinternalstep') with ScopedMetricsContainer(c1): self.assertEqual(c1, MetricsEnvironment.current_container()) counter = Metrics.counter('ns', 'name') counter.inc(2) with ScopedMetricsContainer(c2): self.assertEqual(c2, MetricsEnvironment.current_container()) counter = Metrics.counter('ns', 'name') counter.inc(3) self.assertEqual( c2.get_cumulative().counters.items(), [(MetricKey('myinternalstep', MetricName('ns', 'name')), 3)]) self.assertEqual(c1, MetricsEnvironment.current_container()) counter = Metrics.counter('ns', 'name') counter.inc(4) self.assertEqual( c1.get_cumulative().counters.items(), [(MetricKey('mystep', MetricName('ns', 'name')), 6)])
def call(self): self._call_count += 1 assert self._call_count <= (1 + len(self._applied_ptransform.side_inputs)) metrics_container = MetricsContainer( self._applied_ptransform.full_label) scoped_metrics_container = ScopedMetricsContainer(metrics_container) for side_input in self._applied_ptransform.side_inputs: if side_input not in self._side_input_values: has_result, value = (self._evaluation_context. get_value_or_schedule_after_output( side_input, self)) if not has_result: # Monitor task will reschedule this executor once the side input is # available. return self._side_input_values[side_input] = value side_input_values = [ self._side_input_values[side_input] for side_input in self._applied_ptransform.side_inputs ] while self._retry_count < self._max_retries_per_bundle: try: self.attempt_call(metrics_container, scoped_metrics_container, side_input_values) break except Exception as e: self._retry_count += 1 logging.error( 'Exception at bundle %r, due to an exception.\n %s', self._input_bundle, traceback.format_exc()) if self._retry_count == self._max_retries_per_bundle: logging.error('Giving up after %s attempts.', self._max_retries_per_bundle) if self._retry_count == 1: logging.info( 'Use the experimental flag --direct_runner_bundle_retry' ' to retry failed bundles (up to %d times).', TransformExecutor._MAX_RETRY_PER_BUNDLE) self._completion_callback.handle_exception(self, e) self._evaluation_context.metrics().commit_physical( self._input_bundle, metrics_container.get_cumulative()) self._transform_evaluation_state.complete(self)
def call(self): self._call_count += 1 assert self._call_count <= (1 + len(self._applied_ptransform.side_inputs)) metrics_container = MetricsContainer(self._applied_ptransform.full_label) scoped_metrics_container = ScopedMetricsContainer(metrics_container) for side_input in self._applied_ptransform.side_inputs: if side_input not in self._side_input_values: has_result, value = ( self._evaluation_context.get_value_or_schedule_after_output( side_input, self)) if not has_result: # Monitor task will reschedule this executor once the side input is # available. return self._side_input_values[side_input] = value side_input_values = [self._side_input_values[side_input] for side_input in self._applied_ptransform.side_inputs] while self._retry_count < self._max_retries_per_bundle: try: self.attempt_call(metrics_container, scoped_metrics_container, side_input_values) break except Exception as e: self._retry_count += 1 logging.error( 'Exception at bundle %r, due to an exception.\n %s', self._input_bundle, traceback.format_exc()) if self._retry_count == self._max_retries_per_bundle: logging.error('Giving up after %s attempts.', self._max_retries_per_bundle) if self._retry_count == 1: logging.info( 'Use the experimental flag --direct_runner_bundle_retry' ' to retry failed bundles (up to %d times).', TransformExecutor._MAX_RETRY_PER_BUNDLE) self._completion_callback.handle_exception(self, e) self._evaluation_context.metrics().commit_physical( self._input_bundle, metrics_container.get_cumulative()) self._transform_evaluation_state.complete(self)
def call(self): self._call_count += 1 assert self._call_count <= (1 + len(self._applied_ptransform.side_inputs)) metrics_container = MetricsContainer(self._applied_ptransform.full_label) scoped_metrics_container = ScopedMetricsContainer(metrics_container) for side_input in self._applied_ptransform.side_inputs: # Find the projection of main's window onto the side input's window. window_mapping_fn = side_input._view_options().get( 'window_mapping_fn', sideinputs._global_window_mapping_fn) main_onto_side_window = window_mapping_fn(self._latest_main_input_window) block_until = main_onto_side_window.end if side_input not in self._side_input_values: value = self._evaluation_context.get_value_or_block_until_ready( side_input, self, block_until) if not value: # Monitor task will reschedule this executor once the side input is # available. return self._side_input_values[side_input] = value side_input_values = [self._side_input_values[side_input] for side_input in self._applied_ptransform.side_inputs] while self._retry_count < self._max_retries_per_bundle: try: self.attempt_call(metrics_container, scoped_metrics_container, side_input_values) break except Exception as e: self._retry_count += 1 logging.error( 'Exception at bundle %r, due to an exception.\n %s', self._input_bundle, traceback.format_exc()) if self._retry_count == self._max_retries_per_bundle: logging.error('Giving up after %s attempts.', self._max_retries_per_bundle) self._completion_callback.handle_exception(self, e) self._evaluation_context.metrics().commit_physical( self._input_bundle, metrics_container.get_cumulative()) self._transform_evaluation_state.complete(self)