def call(self): self._call_count += 1 assert self._call_count <= (1 + len(self._applied_ptransform.side_inputs)) metrics_container = MetricsContainer(self._applied_ptransform.full_label) scoped_metrics_container = ScopedMetricsContainer(metrics_container) for side_input in self._applied_ptransform.side_inputs: if side_input not in self._side_input_values: has_result, value = ( self._evaluation_context.get_value_or_schedule_after_output( side_input, self)) if not has_result: # Monitor task will reschedule this executor once the side input is # available. return self._side_input_values[side_input] = value side_input_values = [self._side_input_values[side_input] for side_input in self._applied_ptransform.side_inputs] try: evaluator = self._transform_evaluator_registry.get_evaluator( self._applied_ptransform, self._input_bundle, side_input_values, scoped_metrics_container) if self._fired_timers: for timer_firing in self._fired_timers: evaluator.process_timer_wrapper(timer_firing) if self._input_bundle: for value in self._input_bundle.get_elements_iterable(): evaluator.process_element(value) with scoped_metrics_container: result = evaluator.finish_bundle() result.logical_metric_updates = metrics_container.get_cumulative() if self._evaluation_context.has_cache: for uncommitted_bundle in result.uncommitted_output_bundles: self._evaluation_context.append_to_cache( self._applied_ptransform, uncommitted_bundle.tag, uncommitted_bundle.get_elements_iterable()) undeclared_tag_values = result.undeclared_tag_values if undeclared_tag_values: for tag, value in undeclared_tag_values.iteritems(): self._evaluation_context.append_to_cache( self._applied_ptransform, tag, value) self._completion_callback.handle_result(self, self._input_bundle, result) return result except Exception as e: # pylint: disable=broad-except self._completion_callback.handle_exception(self, e) finally: self._evaluation_context.metrics().commit_physical( self._input_bundle, metrics_container.get_cumulative()) self._transform_evaluation_state.complete(self)
def __init__(self, name_context, spec, counter_factory, state_sampler): """Initializes a worker operation instance. Args: name_context: A NameContext instance or string(deprecated), with the name information for this operation. spec: A operation_specs.Worker* instance. counter_factory: The CounterFactory to use for our counters. state_sampler: The StateSampler for the current operation. """ if isinstance(name_context, common.NameContext): # TODO(BEAM-4028): Clean this up once it's completely migrated. # We use the specific operation name that is used for metrics and state # sampling. self.name_context = name_context else: self.name_context = common.NameContext(name_context) self.spec = spec self.counter_factory = counter_factory self.consumers = collections.defaultdict(list) # These are overwritten in the legacy harness. self.metrics_container = MetricsContainer(self.name_context.metrics_name()) self.state_sampler = state_sampler self.scoped_start_state = self.state_sampler.scoped_state( self.name_context, 'start', metrics_container=self.metrics_container) self.scoped_process_state = self.state_sampler.scoped_state( self.name_context, 'process', metrics_container=self.metrics_container) self.scoped_finish_state = self.state_sampler.scoped_state( self.name_context, 'finish', metrics_container=self.metrics_container) # TODO(ccy): the '-abort' state can be added when the abort is supported in # Operations. self.receivers = []
def __init__(self, operation_name, spec, counter_factory, state_sampler): """Initializes a worker operation instance. Args: operation_name: The system name assigned by the runner for this operation. spec: A operation_specs.Worker* instance. counter_factory: The CounterFactory to use for our counters. state_sampler: The StateSampler for the current operation. """ self.operation_name = operation_name self.spec = spec self.counter_factory = counter_factory self.consumers = collections.defaultdict(list) # These are overwritten in the legacy harness. self.step_name = operation_name self.metrics_container = MetricsContainer(self.step_name) self.scoped_metrics_container = ScopedMetricsContainer( self.metrics_container) self.state_sampler = state_sampler self.scoped_start_state = self.state_sampler.scoped_state( self.operation_name, 'start') self.scoped_process_state = self.state_sampler.scoped_state( self.operation_name, 'process') self.scoped_finish_state = self.state_sampler.scoped_state( self.operation_name, 'finish') # TODO(ccy): the '-abort' state can be added when the abort is supported in # Operations. self.receivers = []
def test_uses_right_container(self): c1 = MetricsContainer('step1') c2 = MetricsContainer('step2') counter = Metrics.counter('ns', 'name') MetricsEnvironment.set_current_container(c1) counter.inc() MetricsEnvironment.set_current_container(c2) counter.inc(3) MetricsEnvironment.unset_current_container() self.assertEqual( c1.get_cumulative().counters.items(), [(MetricKey('step1', MetricName('ns', 'name')), 1)]) self.assertEqual( c2.get_cumulative().counters.items(), [(MetricKey('step2', MetricName('ns', 'name')), 3)])
def __init__(self, name_context, # type: Union[str, common.NameContext] spec, counter_factory, state_sampler # type: StateSampler ): """Initializes a worker operation instance. Args: name_context: A NameContext instance or string(deprecated), with the name information for this operation. spec: A operation_specs.Worker* instance. counter_factory: The CounterFactory to use for our counters. state_sampler: The StateSampler for the current operation. """ if isinstance(name_context, common.NameContext): # TODO(BEAM-4028): Clean this up once it's completely migrated. # We use the specific operation name that is used for metrics and state # sampling. self.name_context = name_context else: self.name_context = common.NameContext(name_context) self.spec = spec self.counter_factory = counter_factory self.execution_context = None # type: Optional[ExecutionContext] self.consumers = collections.defaultdict( list) # type: DefaultDict[int, List[Operation]] # These are overwritten in the legacy harness. self.metrics_container = MetricsContainer(self.name_context.metrics_name()) self.state_sampler = state_sampler self.scoped_start_state = self.state_sampler.scoped_state( self.name_context, 'start', metrics_container=self.metrics_container) self.scoped_process_state = self.state_sampler.scoped_state( self.name_context, 'process', metrics_container=self.metrics_container) self.scoped_finish_state = self.state_sampler.scoped_state( self.name_context, 'finish', metrics_container=self.metrics_container) # TODO(ccy): the '-abort' state can be added when the abort is supported in # Operations. self.receivers = [] # type: List[ConsumerSet] # Legacy workers cannot call setup() until after setting additional state # on the operation. self.setup_done = False self.step_name = None # type: Optional[str]
def call(self): self._call_count += 1 assert self._call_count <= (1 + len(self._applied_ptransform.side_inputs)) metrics_container = MetricsContainer( self._applied_ptransform.full_label) scoped_metrics_container = ScopedMetricsContainer(metrics_container) for side_input in self._applied_ptransform.side_inputs: if side_input not in self._side_input_values: has_result, value = (self._evaluation_context. get_value_or_schedule_after_output( side_input, self)) if not has_result: # Monitor task will reschedule this executor once the side input is # available. return self._side_input_values[side_input] = value side_input_values = [ self._side_input_values[side_input] for side_input in self._applied_ptransform.side_inputs ] while self._retry_count < self._max_retries_per_bundle: try: self.attempt_call(metrics_container, scoped_metrics_container, side_input_values) break except Exception as e: self._retry_count += 1 logging.error( 'Exception at bundle %r, due to an exception.\n %s', self._input_bundle, traceback.format_exc()) if self._retry_count == self._max_retries_per_bundle: logging.error('Giving up after %s attempts.', self._max_retries_per_bundle) if self._retry_count == 1: logging.info( 'Use the experimental flag --direct_runner_bundle_retry' ' to retry failed bundles (up to %d times).', TransformExecutor._MAX_RETRY_PER_BUNDLE) self._completion_callback.handle_exception(self, e) self._evaluation_context.metrics().commit_physical( self._input_bundle, metrics_container.get_cumulative()) self._transform_evaluation_state.complete(self)
def __init__(self, name_context, spec, counter_factory, state_sampler): """Initializes a worker operation instance. Args: name_context: A NameContext instance or string(deprecated), with the name information for this operation. spec: A operation_specs.Worker* instance. counter_factory: The CounterFactory to use for our counters. state_sampler: The StateSampler for the current operation. """ if isinstance(name_context, common.NameContext): # TODO(BEAM-4028): Clean this up once it's completely migrated. # We use the specific operation name that is used for metrics and state # sampling. self.name_context = name_context else: self.name_context = common.NameContext(name_context) # TODO(BEAM-4028): Remove following two lines. Rely on name context. self.operation_name = self.name_context.step_name self.step_name = self.name_context.logging_name() self.spec = spec self.counter_factory = counter_factory self.consumers = collections.defaultdict(list) # These are overwritten in the legacy harness. self.metrics_container = MetricsContainer(self.name_context.metrics_name()) # TODO(BEAM-4094): Remove ScopedMetricsContainer after Dataflow no longer # depends on it. self.scoped_metrics_container = ScopedMetricsContainer() self.state_sampler = state_sampler self.scoped_start_state = self.state_sampler.scoped_state( self.name_context.metrics_name(), 'start', metrics_container=self.metrics_container) self.scoped_process_state = self.state_sampler.scoped_state( self.name_context.metrics_name(), 'process', metrics_container=self.metrics_container) self.scoped_finish_state = self.state_sampler.scoped_state( self.name_context.metrics_name(), 'finish', metrics_container=self.metrics_container) # TODO(ccy): the '-abort' state can be added when the abort is supported in # Operations. self.receivers = []
def call(self): self._call_count += 1 assert self._call_count <= (1 + len(self._applied_ptransform.side_inputs)) metrics_container = MetricsContainer(self._applied_ptransform.full_label) scoped_metrics_container = ScopedMetricsContainer(metrics_container) for side_input in self._applied_ptransform.side_inputs: # Find the projection of main's window onto the side input's window. window_mapping_fn = side_input._view_options().get( 'window_mapping_fn', sideinputs._global_window_mapping_fn) main_onto_side_window = window_mapping_fn(self._latest_main_input_window) block_until = main_onto_side_window.end if side_input not in self._side_input_values: value = self._evaluation_context.get_value_or_block_until_ready( side_input, self, block_until) if not value: # Monitor task will reschedule this executor once the side input is # available. return self._side_input_values[side_input] = value side_input_values = [self._side_input_values[side_input] for side_input in self._applied_ptransform.side_inputs] while self._retry_count < self._max_retries_per_bundle: try: self.attempt_call(metrics_container, scoped_metrics_container, side_input_values) break except Exception as e: self._retry_count += 1 logging.error( 'Exception at bundle %r, due to an exception.\n %s', self._input_bundle, traceback.format_exc()) if self._retry_count == self._max_retries_per_bundle: logging.error('Giving up after %s attempts.', self._max_retries_per_bundle) self._completion_callback.handle_exception(self, e) self._evaluation_context.metrics().commit_physical( self._input_bundle, metrics_container.get_cumulative()) self._transform_evaluation_state.complete(self)
def call(self): self._call_count += 1 assert self._call_count <= (1 + len(self._applied_ptransform.side_inputs)) metrics_container = MetricsContainer(self._applied_ptransform.full_label) scoped_metrics_container = ScopedMetricsContainer(metrics_container) for side_input in self._applied_ptransform.side_inputs: if side_input not in self._side_input_values: has_result, value = ( self._evaluation_context.get_value_or_schedule_after_output( side_input, self)) if not has_result: # Monitor task will reschedule this executor once the side input is # available. return self._side_input_values[side_input] = value side_input_values = [self._side_input_values[side_input] for side_input in self._applied_ptransform.side_inputs] while self._retry_count < self._max_retries_per_bundle: try: self.attempt_call(metrics_container, scoped_metrics_container, side_input_values) break except Exception as e: self._retry_count += 1 logging.error( 'Exception at bundle %r, due to an exception.\n %s', self._input_bundle, traceback.format_exc()) if self._retry_count == self._max_retries_per_bundle: logging.error('Giving up after %s attempts.', self._max_retries_per_bundle) if self._retry_count == 1: logging.info( 'Use the experimental flag --direct_runner_bundle_retry' ' to retry failed bundles (up to %d times).', TransformExecutor._MAX_RETRY_PER_BUNDLE) self._completion_callback.handle_exception(self, e) self._evaluation_context.metrics().commit_physical( self._input_bundle, metrics_container.get_cumulative()) self._transform_evaluation_state.complete(self)
def test_get_cumulative_or_updates(self): mc = MetricsContainer('astep') all_values = [] for i in range(1, 11): counter = mc.get_counter(MetricName('namespace', 'name{}'.format(i))) distribution = mc.get_distribution( MetricName('namespace', 'name{}'.format(i))) gauge = mc.get_gauge(MetricName('namespace', 'name{}'.format(i))) counter.inc(i) distribution.update(i) gauge.set(i) all_values.append(i) # Retrieve ALL updates. cumulative = mc.get_cumulative() self.assertEqual(len(cumulative.counters), 10) self.assertEqual(len(cumulative.distributions), 10) self.assertEqual(len(cumulative.gauges), 10) self.assertEqual( set(all_values), set([v for _, v in cumulative.counters.items()])) self.assertEqual( set(all_values), set([v.value for _, v in cumulative.gauges.items()]))
def test_scoped_container(self): c1 = MetricsContainer('mystep') c2 = MetricsContainer('myinternalstep') with ScopedMetricsContainer(c1): self.assertEqual(c1, MetricsEnvironment.current_container()) counter = Metrics.counter('ns', 'name') counter.inc(2) with ScopedMetricsContainer(c2): self.assertEqual(c2, MetricsEnvironment.current_container()) counter = Metrics.counter('ns', 'name') counter.inc(3) self.assertEqual( c2.get_cumulative().counters.items(), [(MetricKey('myinternalstep', MetricName('ns', 'name')), 3)]) self.assertEqual(c1, MetricsEnvironment.current_container()) counter = Metrics.counter('ns', 'name') counter.inc(4) self.assertEqual( c1.get_cumulative().counters.items(), [(MetricKey('mystep', MetricName('ns', 'name')), 6)])
def test_get_cumulative_or_updates(self): mc = MetricsContainer('astep') clean_values = [] dirty_values = [] for i in range(1, 11): counter = mc.get_counter( MetricName('namespace', 'name{}'.format(i))) distribution = mc.get_distribution( MetricName('namespace', 'name{}'.format(i))) gauge = mc.get_gauge(MetricName('namespace', 'name{}'.format(i))) counter.inc(i) distribution.update(i) gauge.set(i) if i % 2 == 0: # Some are left to be DIRTY (i.e. not yet committed). # Some are left to be CLEAN (i.e. already committed). dirty_values.append(i) continue # Assert: Counter/Distribution is DIRTY or COMMITTING (not CLEAN) self.assertEqual(distribution.commit.before_commit(), True) self.assertEqual(counter.commit.before_commit(), True) self.assertEqual(gauge.commit.before_commit(), True) distribution.commit.after_commit() counter.commit.after_commit() gauge.commit.after_commit() # Assert: Counter/Distribution has been committed, therefore it's CLEAN self.assertEqual(counter.commit.state, CellCommitState.CLEAN) self.assertEqual(distribution.commit.state, CellCommitState.CLEAN) self.assertEqual(gauge.commit.state, CellCommitState.CLEAN) clean_values.append(i) # Retrieve NON-COMMITTED updates. logical = mc.get_updates() self.assertEqual(len(logical.counters), 5) self.assertEqual(len(logical.distributions), 5) self.assertEqual(len(logical.gauges), 5) self.assertEqual(set(dirty_values), set([v.value for _, v in logical.gauges.items()])) self.assertEqual(set(dirty_values), set([v for _, v in logical.counters.items()])) # Retrieve ALL updates. cumulative = mc.get_cumulative() self.assertEqual(len(cumulative.counters), 10) self.assertEqual(len(cumulative.distributions), 10) self.assertEqual(len(cumulative.gauges), 10) self.assertEqual(set(dirty_values + clean_values), set([v for _, v in cumulative.counters.items()])) self.assertEqual(set(dirty_values + clean_values), set([v.value for _, v in cumulative.gauges.items()]))
def test_get_cumulative_or_updates(self): mc = MetricsContainer('astep') clean_values = [] dirty_values = [] for i in range(1, 11): counter = mc.get_counter(MetricName('namespace', 'name{}'.format(i))) distribution = mc.get_distribution( MetricName('namespace', 'name{}'.format(i))) counter.inc(i) distribution.update(i) if i % 2 == 0: # Some are left to be DIRTY (i.e. not yet committed). # Some are left to be CLEAN (i.e. already committed). dirty_values.append(i) continue # Assert: Counter/Distribution is DIRTY or COMMITTING (not CLEAN) self.assertEqual(distribution.commit.before_commit(), True) self.assertEqual(counter.commit.before_commit(), True) distribution.commit.after_commit() counter.commit.after_commit() # Assert: Counter/Distribution has been committed, therefore it's CLEAN self.assertEqual(counter.commit.state, CellCommitState.CLEAN) self.assertEqual(distribution.commit.state, CellCommitState.CLEAN) clean_values.append(i) # Retrieve NON-COMMITTED updates. logical = mc.get_updates() self.assertEqual(len(logical.counters), 5) self.assertEqual(len(logical.distributions), 5) self.assertEqual(set(dirty_values), set([v for _, v in logical.counters.items()])) # Retrieve ALL updates. cumulative = mc.get_cumulative() self.assertEqual(len(cumulative.counters), 10) self.assertEqual(len(cumulative.distributions), 10) self.assertEqual(set(dirty_values + clean_values), set([v for _, v in cumulative.counters.items()]))
def test_uses_right_container(self): c1 = MetricsContainer('step1') c2 = MetricsContainer('step2') counter = Metrics.counter('ns', 'name') MetricsEnvironment.set_current_container(c1) counter.inc() MetricsEnvironment.set_current_container(c2) counter.inc(3) MetricsEnvironment.unset_current_container() self.assertEqual(list(c1.get_cumulative().counters.items()), [(MetricKey('step1', MetricName('ns', 'name')), 1)]) self.assertEqual(list(c2.get_cumulative().counters.items()), [(MetricKey('step2', MetricName('ns', 'name')), 3)])
def test_create_counter_distribution(self): MetricsEnvironment.set_current_container(MetricsContainer('mystep')) counter_ns = 'aCounterNamespace' distro_ns = 'aDistributionNamespace' name = 'a_name' counter = Metrics.counter(counter_ns, name) distro = Metrics.distribution(distro_ns, name) counter.inc(10) counter.dec(3) distro.update(10) distro.update(2) self.assertTrue(isinstance(counter, Metrics.DelegatingCounter)) self.assertTrue(isinstance(distro, Metrics.DelegatingDistribution)) del distro del counter container = MetricsEnvironment.current_container() self.assertEqual( container.counters[MetricName(counter_ns, name)].get_cumulative(), 7) self.assertEqual( container.distributions[MetricName(distro_ns, name)].get_cumulative(), DistributionData(12, 2, 2, 10))
def test_create_counter_distribution(self): sampler = statesampler.StateSampler('', counters.CounterFactory()) statesampler.set_current_tracker(sampler) state1 = sampler.scoped_state( 'mystep', 'myState', metrics_container=MetricsContainer('mystep')) try: sampler.start() with state1: counter_ns = 'aCounterNamespace' distro_ns = 'aDistributionNamespace' name = 'a_name' counter = Metrics.counter(counter_ns, name) distro = Metrics.distribution(distro_ns, name) counter.inc(10) counter.dec(3) distro.update(10) distro.update(2) self.assertTrue(isinstance(counter, Metrics.DelegatingCounter)) self.assertTrue( isinstance(distro, Metrics.DelegatingDistribution)) del distro del counter container = MetricsEnvironment.current_container() self.assertEqual( container.get_counter(MetricName(counter_ns, name)).get_cumulative(), 7) self.assertEqual( container.get_distribution(MetricName( distro_ns, name)).get_cumulative(), DistributionData(12, 2, 2, 10)) finally: sampler.stop()
def test_scoped_container(self): c1 = MetricsContainer('mystep') c2 = MetricsContainer('myinternalstep') with ScopedMetricsContainer(c1): self.assertEqual(c1, MetricsEnvironment.current_container()) counter = Metrics.counter('ns', 'name') counter.inc(2) with ScopedMetricsContainer(c2): self.assertEqual(c2, MetricsEnvironment.current_container()) counter = Metrics.counter('ns', 'name') counter.inc(3) self.assertEqual(list(c2.get_cumulative().counters.items()), [(MetricKey('myinternalstep', MetricName('ns', 'name')), 3)]) self.assertEqual(c1, MetricsEnvironment.current_container()) counter = Metrics.counter('ns', 'name') counter.inc(4) self.assertEqual( list(c1.get_cumulative().counters.items()), [(MetricKey('mystep', MetricName('ns', 'name')), 6)])
class Operation(object): """An operation representing the live version of a work item specification. An operation can have one or more outputs and for each output it can have one or more receiver operations that will take that as input. """ def __init__(self, operation_name, spec, counter_factory, state_sampler): """Initializes a worker operation instance. Args: operation_name: The system name assigned by the runner for this operation. spec: A operation_specs.Worker* instance. counter_factory: The CounterFactory to use for our counters. state_sampler: The StateSampler for the current operation. """ self.operation_name = operation_name self.spec = spec self.counter_factory = counter_factory self.consumers = collections.defaultdict(list) # These are overwritten in the legacy harness. self.step_name = operation_name self.metrics_container = MetricsContainer(self.step_name) self.scoped_metrics_container = ScopedMetricsContainer( self.metrics_container) self.state_sampler = state_sampler self.scoped_start_state = self.state_sampler.scoped_state( self.operation_name, 'start') self.scoped_process_state = self.state_sampler.scoped_state( self.operation_name, 'process') self.scoped_finish_state = self.state_sampler.scoped_state( self.operation_name, 'finish') # TODO(ccy): the '-abort' state can be added when the abort is supported in # Operations. self.receivers = [] def start(self): """Start operation.""" self.debug_logging_enabled = logging.getLogger().isEnabledFor( logging.DEBUG) # Everything except WorkerSideInputSource, which is not a # top-level operation, should have output_coders if getattr(self.spec, 'output_coders', None): self.receivers = [ConsumerSet(self.counter_factory, self.step_name, i, self.consumers[i], coder) for i, coder in enumerate(self.spec.output_coders)] def finish(self): """Finish operation.""" pass def process(self, o): """Process element in operation.""" pass def output(self, windowed_value, output_index=0): cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value) def add_receiver(self, operation, output_index=0): """Adds a receiver operation for the specified output.""" self.consumers[output_index].append(operation) def progress_metrics(self): return beam_fn_api_pb2.Metrics.PTransform( processed_elements=beam_fn_api_pb2.Metrics.PTransform.ProcessedElements( measured=beam_fn_api_pb2.Metrics.PTransform.Measured( total_time_spent=( self.scoped_start_state.sampled_seconds() + self.scoped_process_state.sampled_seconds() + self.scoped_finish_state.sampled_seconds()), # Multi-output operations should override this. output_element_counts=( # If there is exactly one output, we can unambiguously # fix its name later, which we do. # TODO(robertwb): Plumb the actual name here. {'ONLY_OUTPUT': self.receivers[0].opcounter .element_counter.value()} if len(self.receivers) == 1 else None))), user=self.metrics_container.to_runner_api()) def __str__(self): """Generates a useful string for this object. Compactly displays interesting fields. In particular, pickled fields are not displayed. Note that we collapse the fields of the contained Worker* object into this object, since there is a 1-1 mapping between Operation and operation_specs.Worker*. Returns: Compact string representing this object. """ return self.str_internal() def str_internal(self, is_recursive=False): """Internal helper for __str__ that supports recursion. When recursing on receivers, keep the output short. Args: is_recursive: whether to omit some details, particularly receivers. Returns: Compact string representing this object. """ printable_name = self.__class__.__name__ if hasattr(self, 'step_name'): printable_name += ' %s' % self.step_name if is_recursive: # If we have a step name, stop here, no more detail needed. return '<%s>' % printable_name if self.spec is None: printable_fields = [] else: printable_fields = operation_specs.worker_printable_fields(self.spec) if not is_recursive and getattr(self, 'receivers', []): printable_fields.append('receivers=[%s]' % ', '.join([ str(receiver) for receiver in self.receivers])) return '<%s %s>' % (printable_name, ', '.join(printable_fields))
class Operation(object): """An operation representing the live version of a work item specification. An operation can have one or more outputs and for each output it can have one or more receiver operations that will take that as input. """ def __init__(self, name_context, spec, counter_factory, state_sampler): """Initializes a worker operation instance. Args: name_context: A NameContext instance or string(deprecated), with the name information for this operation. spec: A operation_specs.Worker* instance. counter_factory: The CounterFactory to use for our counters. state_sampler: The StateSampler for the current operation. """ if isinstance(name_context, common.NameContext): # TODO(BEAM-4028): Clean this up once it's completely migrated. # We use the specific operation name that is used for metrics and state # sampling. self.name_context = name_context else: self.name_context = common.NameContext(name_context) self.spec = spec self.counter_factory = counter_factory self.execution_context = None self.consumers = collections.defaultdict(list) # These are overwritten in the legacy harness. self.metrics_container = MetricsContainer(self.name_context.metrics_name()) self.state_sampler = state_sampler self.scoped_start_state = self.state_sampler.scoped_state( self.name_context, 'start', metrics_container=self.metrics_container) self.scoped_process_state = self.state_sampler.scoped_state( self.name_context, 'process', metrics_container=self.metrics_container) self.scoped_finish_state = self.state_sampler.scoped_state( self.name_context, 'finish', metrics_container=self.metrics_container) # TODO(ccy): the '-abort' state can be added when the abort is supported in # Operations. self.receivers = [] # Legacy workers cannot call setup() until after setting additional state # on the operation. self.setup_done = False def setup(self): with self.scoped_start_state: self.debug_logging_enabled = logging.getLogger().isEnabledFor( logging.DEBUG) # Everything except WorkerSideInputSource, which is not a # top-level operation, should have output_coders #TODO(pabloem): Define better what step name is used here. if getattr(self.spec, 'output_coders', None): self.receivers = [ ConsumerSet.create( self.counter_factory, self.name_context.logging_name(), i, self.consumers[i], coder) for i, coder in enumerate(self.spec.output_coders)] self.setup_done = True def start(self): """Start operation.""" if not self.setup_done: # For legacy workers. self.setup() def process(self, o): """Process element in operation.""" pass def try_split(self, fraction_of_remainder): return None def finish(self): """Finish operation.""" pass def reset(self): self.metrics_container.reset() def output(self, windowed_value, output_index=0): cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value) def add_receiver(self, operation, output_index=0): """Adds a receiver operation for the specified output.""" self.consumers[output_index].append(operation) def progress_metrics(self): return beam_fn_api_pb2.Metrics.PTransform( processed_elements=beam_fn_api_pb2.Metrics.PTransform.ProcessedElements( measured=beam_fn_api_pb2.Metrics.PTransform.Measured( total_time_spent=( self.scoped_start_state.sampled_seconds() + self.scoped_process_state.sampled_seconds() + self.scoped_finish_state.sampled_seconds()), # Multi-output operations should override this. output_element_counts=( # If there is exactly one output, we can unambiguously # fix its name later, which we do. # TODO(robertwb): Plumb the actual name here. {'ONLY_OUTPUT': self.receivers[0].opcounter .element_counter.value()} if len(self.receivers) == 1 else None))), user=self.metrics_container.to_runner_api()) def monitoring_infos(self, transform_id): """Returns the list of MonitoringInfos collected by this operation.""" all_monitoring_infos = self.execution_time_monitoring_infos(transform_id) all_monitoring_infos.update( self.element_count_monitoring_infos(transform_id)) all_monitoring_infos.update(self.user_monitoring_infos(transform_id)) return all_monitoring_infos def element_count_monitoring_infos(self, transform_id): """Returns the element count MonitoringInfo collected by this operation.""" if len(self.receivers) == 1: # If there is exactly one output, we can unambiguously # fix its name later, which we do. # TODO(robertwb): Plumb the actual name here. mi = monitoring_infos.int64_counter( monitoring_infos.ELEMENT_COUNT_URN, self.receivers[0].opcounter.element_counter.value(), ptransform=transform_id, tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None), ) return {monitoring_infos.to_key(mi) : mi} return {} def user_monitoring_infos(self, transform_id): """Returns the user MonitoringInfos collected by this operation.""" return self.metrics_container.to_runner_api_monitoring_infos(transform_id) def execution_time_monitoring_infos(self, transform_id): total_time_spent_msecs = ( self.scoped_start_state.sampled_msecs_int() + self.scoped_process_state.sampled_msecs_int() + self.scoped_finish_state.sampled_msecs_int()) mis = [ monitoring_infos.int64_counter( monitoring_infos.START_BUNDLE_MSECS_URN, self.scoped_start_state.sampled_msecs_int(), ptransform=transform_id ), monitoring_infos.int64_counter( monitoring_infos.PROCESS_BUNDLE_MSECS_URN, self.scoped_process_state.sampled_msecs_int(), ptransform=transform_id ), monitoring_infos.int64_counter( monitoring_infos.FINISH_BUNDLE_MSECS_URN, self.scoped_finish_state.sampled_msecs_int(), ptransform=transform_id ), monitoring_infos.int64_counter( monitoring_infos.TOTAL_MSECS_URN, total_time_spent_msecs, ptransform=transform_id ), ] return {monitoring_infos.to_key(mi) : mi for mi in mis} def __str__(self): """Generates a useful string for this object. Compactly displays interesting fields. In particular, pickled fields are not displayed. Note that we collapse the fields of the contained Worker* object into this object, since there is a 1-1 mapping between Operation and operation_specs.Worker*. Returns: Compact string representing this object. """ return self.str_internal() def str_internal(self, is_recursive=False): """Internal helper for __str__ that supports recursion. When recursing on receivers, keep the output short. Args: is_recursive: whether to omit some details, particularly receivers. Returns: Compact string representing this object. """ printable_name = self.__class__.__name__ if hasattr(self, 'step_name'): printable_name += ' %s' % self.name_context.logging_name() if is_recursive: # If we have a step name, stop here, no more detail needed. return '<%s>' % printable_name if self.spec is None: printable_fields = [] else: printable_fields = operation_specs.worker_printable_fields(self.spec) if not is_recursive and getattr(self, 'receivers', []): printable_fields.append('receivers=[%s]' % ', '.join([ str(receiver) for receiver in self.receivers])) return '<%s %s>' % (printable_name, ', '.join(printable_fields))
class Operation(object): """An operation representing the live version of a work item specification. An operation can have one or more outputs and for each output it can have one or more receiver operations that will take that as input. """ def __init__(self, name_context, spec, counter_factory, state_sampler): """Initializes a worker operation instance. Args: name_context: A NameContext instance or string(deprecated), with the name information for this operation. spec: A operation_specs.Worker* instance. counter_factory: The CounterFactory to use for our counters. state_sampler: The StateSampler for the current operation. """ if isinstance(name_context, common.NameContext): # TODO(BEAM-4028): Clean this up once it's completely migrated. # We use the specific operation name that is used for metrics and state # sampling. self.name_context = name_context else: self.name_context = common.NameContext(name_context) self.spec = spec self.counter_factory = counter_factory self.consumers = collections.defaultdict(list) # These are overwritten in the legacy harness. self.metrics_container = MetricsContainer( self.name_context.metrics_name()) self.state_sampler = state_sampler self.scoped_start_state = self.state_sampler.scoped_state( self.name_context, 'start', metrics_container=self.metrics_container) self.scoped_process_state = self.state_sampler.scoped_state( self.name_context, 'process', metrics_container=self.metrics_container) self.scoped_finish_state = self.state_sampler.scoped_state( self.name_context, 'finish', metrics_container=self.metrics_container) # TODO(ccy): the '-abort' state can be added when the abort is supported in # Operations. self.receivers = [] def start(self): """Start operation.""" self.debug_logging_enabled = logging.getLogger().isEnabledFor( logging.DEBUG) # Everything except WorkerSideInputSource, which is not a # top-level operation, should have output_coders #TODO(pabloem): Define better what step name is used here. if getattr(self.spec, 'output_coders', None): self.receivers = [ ConsumerSet(self.counter_factory, self.name_context.logging_name(), i, self.consumers[i], coder) for i, coder in enumerate(self.spec.output_coders) ] def process(self, o): """Process element in operation.""" pass def finish(self): """Finish operation.""" pass def output(self, windowed_value, output_index=0): cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value) def add_receiver(self, operation, output_index=0): """Adds a receiver operation for the specified output.""" self.consumers[output_index].append(operation) def progress_metrics(self): return beam_fn_api_pb2.Metrics.PTransform( processed_elements=beam_fn_api_pb2.Metrics.PTransform. ProcessedElements( measured=beam_fn_api_pb2.Metrics.PTransform.Measured( total_time_spent=( self.scoped_start_state.sampled_seconds() + self.scoped_process_state.sampled_seconds() + self.scoped_finish_state.sampled_seconds()), # Multi-output operations should override this. output_element_counts=( # If there is exactly one output, we can unambiguously # fix its name later, which we do. # TODO(robertwb): Plumb the actual name here. { 'ONLY_OUTPUT': self.receivers[0].opcounter.element_counter.value( ) } if len(self.receivers) == 1 else None))), user=self.metrics_container.to_runner_api()) def __str__(self): """Generates a useful string for this object. Compactly displays interesting fields. In particular, pickled fields are not displayed. Note that we collapse the fields of the contained Worker* object into this object, since there is a 1-1 mapping between Operation and operation_specs.Worker*. Returns: Compact string representing this object. """ return self.str_internal() def str_internal(self, is_recursive=False): """Internal helper for __str__ that supports recursion. When recursing on receivers, keep the output short. Args: is_recursive: whether to omit some details, particularly receivers. Returns: Compact string representing this object. """ printable_name = self.__class__.__name__ if hasattr(self, 'step_name'): printable_name += ' %s' % self.name_context.logging_name() if is_recursive: # If we have a step name, stop here, no more detail needed. return '<%s>' % printable_name if self.spec is None: printable_fields = [] else: printable_fields = operation_specs.worker_printable_fields( self.spec) if not is_recursive and getattr(self, 'receivers', []): printable_fields.append( 'receivers=[%s]' % ', '.join([str(receiver) for receiver in self.receivers])) return '<%s %s>' % (printable_name, ', '.join(printable_fields))
def create_operation(operation_name, spec, counter_factory, step_name, state_sampler, test_shuffle_source=None, test_shuffle_sink=None, is_streaming=False): """Create Operation object for given operation specification.""" if isinstance(spec, operation_specs.WorkerRead): if isinstance(spec.source, iobase.SourceBundle): op = ReadOperation(operation_name, spec, counter_factory, state_sampler) else: from dataflow_worker.native_operations import NativeReadOperation op = NativeReadOperation(operation_name, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerWrite): from dataflow_worker.native_operations import NativeWriteOperation op = NativeWriteOperation(operation_name, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerCombineFn): op = CombineOperation(operation_name, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerPartialGroupByKey): op = create_pgbk_op(operation_name, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerDoFn): op = DoOperation(operation_name, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerGroupingShuffleRead): from dataflow_worker.shuffle_operations import GroupedShuffleReadOperation op = GroupedShuffleReadOperation(operation_name, spec, counter_factory, state_sampler, shuffle_source=test_shuffle_source) elif isinstance(spec, operation_specs.WorkerUngroupedShuffleRead): from dataflow_worker.shuffle_operations import UngroupedShuffleReadOperation op = UngroupedShuffleReadOperation(operation_name, spec, counter_factory, state_sampler, shuffle_source=test_shuffle_source) elif isinstance(spec, operation_specs.WorkerInMemoryWrite): op = InMemoryWriteOperation(operation_name, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerShuffleWrite): from dataflow_worker.shuffle_operations import ShuffleWriteOperation op = ShuffleWriteOperation(operation_name, spec, counter_factory, state_sampler, shuffle_sink=test_shuffle_sink) elif isinstance(spec, operation_specs.WorkerFlatten): op = FlattenOperation(operation_name, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerMergeWindows): from dataflow_worker.shuffle_operations import BatchGroupAlsoByWindowsOperation from dataflow_worker.shuffle_operations import StreamingGroupAlsoByWindowsOperation if is_streaming: op = StreamingGroupAlsoByWindowsOperation(operation_name, spec, counter_factory, state_sampler) else: op = BatchGroupAlsoByWindowsOperation(operation_name, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerReifyTimestampAndWindows): from dataflow_worker.shuffle_operations import ReifyTimestampAndWindowsOperation op = ReifyTimestampAndWindowsOperation(operation_name, spec, counter_factory, state_sampler) else: raise TypeError( 'Expected an instance of operation_specs.Worker* class ' 'instead of %s' % (spec, )) op.step_name = step_name op.metrics_container = MetricsContainer(step_name) op.scoped_metrics_container = ScopedMetricsContainer(op.metrics_container) return op
def test_add_to_counter(self): mc = MetricsContainer('astep') counter = mc.get_counter(MetricName('namespace', 'name')) counter.inc() counter = mc.get_counter(MetricName('namespace', 'name')) self.assertEqual(counter.value, 1)
def test_create_new_counter(self): mc = MetricsContainer('astep') self.assertFalse(MetricName('namespace', 'name') in mc.counters) mc.get_counter(MetricName('namespace', 'name')) self.assertTrue(MetricName('namespace', 'name') in mc.counters)
class Operation(object): """An operation representing the live version of a work item specification. An operation can have one or more outputs and for each output it can have one or more receiver operations that will take that as input. """ def __init__( self, name_context, # type: Union[str, common.NameContext] spec, counter_factory, state_sampler # type: StateSampler ): """Initializes a worker operation instance. Args: name_context: A NameContext instance or string(deprecated), with the name information for this operation. spec: A operation_specs.Worker* instance. counter_factory: The CounterFactory to use for our counters. state_sampler: The StateSampler for the current operation. """ if isinstance(name_context, common.NameContext): # TODO(BEAM-4028): Clean this up once it's completely migrated. # We use the specific operation name that is used for metrics and state # sampling. self.name_context = name_context else: self.name_context = common.NameContext(name_context) self.spec = spec self.counter_factory = counter_factory self.execution_context = None # type: Optional[ExecutionContext] self.consumers = collections.defaultdict( list) # type: DefaultDict[int, List[Operation]] # These are overwritten in the legacy harness. self.metrics_container = MetricsContainer( self.name_context.metrics_name()) self.state_sampler = state_sampler self.scoped_start_state = self.state_sampler.scoped_state( self.name_context, 'start', metrics_container=self.metrics_container) self.scoped_process_state = self.state_sampler.scoped_state( self.name_context, 'process', metrics_container=self.metrics_container) self.scoped_finish_state = self.state_sampler.scoped_state( self.name_context, 'finish', metrics_container=self.metrics_container) # TODO(ccy): the '-abort' state can be added when the abort is supported in # Operations. self.receivers = [] # type: List[ConsumerSet] # Legacy workers cannot call setup() until after setting additional state # on the operation. self.setup_done = False self.step_name = None # type: Optional[str] def setup(self): # type: () -> None """Set up operation. This must be called before any other methods of the operation.""" with self.scoped_start_state: self.debug_logging_enabled = logging.getLogger().isEnabledFor( logging.DEBUG) # Everything except WorkerSideInputSource, which is not a # top-level operation, should have output_coders #TODO(pabloem): Define better what step name is used here. if getattr(self.spec, 'output_coders', None): self.receivers = [ ConsumerSet.create(self.counter_factory, self.name_context.logging_name(), i, self.consumers[i], coder) for i, coder in enumerate(self.spec.output_coders) ] self.setup_done = True def start(self): # type: () -> None """Start operation.""" if not self.setup_done: # For legacy workers. self.setup() def process(self, o): # type: (WindowedValue) -> None """Process element in operation.""" pass def finalize_bundle(self): # type: () -> None pass def needs_finalization(self): return False def try_split(self, fraction_of_remainder): # type: (...) -> Optional[Any] return None def current_element_progress(self): return None def finish(self): # type: () -> None """Finish operation.""" pass def teardown(self): # type: () -> None """Tear down operation. No other methods of this operation should be called after this.""" pass def reset(self): # type: () -> None self.metrics_container.reset() def output(self, windowed_value, output_index=0): # type: (WindowedValue, int) -> None cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value) def add_receiver(self, operation, output_index=0): # type: (Operation, int) -> None """Adds a receiver operation for the specified output.""" self.consumers[output_index].append(operation) def progress_metrics(self): # type: () -> beam_fn_api_pb2.Metrics.PTransform return beam_fn_api_pb2.Metrics.PTransform( processed_elements=beam_fn_api_pb2.Metrics.PTransform. ProcessedElements( measured=beam_fn_api_pb2.Metrics.PTransform.Measured( total_time_spent=( self.scoped_start_state.sampled_seconds() + self.scoped_process_state.sampled_seconds() + self.scoped_finish_state.sampled_seconds()), # Multi-output operations should override this. output_element_counts=( # If there is exactly one output, we can unambiguously # fix its name later, which we do. # TODO(robertwb): Plumb the actual name here. { 'ONLY_OUTPUT': self.receivers[0].opcounter.element_counter.value( ) } if len(self.receivers) == 1 else None))), user=self.metrics_container.to_runner_api()) def monitoring_infos(self, transform_id): # type: (str) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo] """Returns the list of MonitoringInfos collected by this operation.""" all_monitoring_infos = self.execution_time_monitoring_infos( transform_id) all_monitoring_infos.update( self.pcollection_count_monitoring_infos(transform_id)) all_monitoring_infos.update(self.user_monitoring_infos(transform_id)) return all_monitoring_infos def pcollection_count_monitoring_infos(self, transform_id): """Returns the element count MonitoringInfo collected by this operation.""" if len(self.receivers) == 1: # If there is exactly one output, we can unambiguously # fix its name later, which we do. # TODO(robertwb): Plumb the actual name here. elem_count_mi = monitoring_infos.int64_counter( monitoring_infos.ELEMENT_COUNT_URN, self.receivers[0].opcounter.element_counter.value(), ptransform=transform_id, tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None), ) (unused_mean, sum, count, min, max) = (self.receivers[0].opcounter.mean_byte_counter.value()) metric = metrics_pb2.Metric( distribution_data=metrics_pb2.DistributionData( int_distribution_data=metrics_pb2.IntDistributionData( count=count, sum=sum, min=min, max=max))) sampled_byte_count = monitoring_infos.int64_distribution( monitoring_infos.SAMPLED_BYTE_SIZE_URN, metric, ptransform=transform_id, tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None), ) return { monitoring_infos.to_key(elem_count_mi): elem_count_mi, monitoring_infos.to_key(sampled_byte_count): sampled_byte_count } return {} def user_monitoring_infos(self, transform_id): """Returns the user MonitoringInfos collected by this operation.""" return self.metrics_container.to_runner_api_monitoring_infos( transform_id) def execution_time_monitoring_infos(self, transform_id): # type: (str) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo] total_time_spent_msecs = ( self.scoped_start_state.sampled_msecs_int() + self.scoped_process_state.sampled_msecs_int() + self.scoped_finish_state.sampled_msecs_int()) mis = [ monitoring_infos.int64_counter( monitoring_infos.START_BUNDLE_MSECS_URN, self.scoped_start_state.sampled_msecs_int(), ptransform=transform_id), monitoring_infos.int64_counter( monitoring_infos.PROCESS_BUNDLE_MSECS_URN, self.scoped_process_state.sampled_msecs_int(), ptransform=transform_id), monitoring_infos.int64_counter( monitoring_infos.FINISH_BUNDLE_MSECS_URN, self.scoped_finish_state.sampled_msecs_int(), ptransform=transform_id), monitoring_infos.int64_counter(monitoring_infos.TOTAL_MSECS_URN, total_time_spent_msecs, ptransform=transform_id), ] return {monitoring_infos.to_key(mi): mi for mi in mis} def __str__(self): """Generates a useful string for this object. Compactly displays interesting fields. In particular, pickled fields are not displayed. Note that we collapse the fields of the contained Worker* object into this object, since there is a 1-1 mapping between Operation and operation_specs.Worker*. Returns: Compact string representing this object. """ return self.str_internal() def str_internal(self, is_recursive=False): """Internal helper for __str__ that supports recursion. When recursing on receivers, keep the output short. Args: is_recursive: whether to omit some details, particularly receivers. Returns: Compact string representing this object. """ printable_name = self.__class__.__name__ if hasattr(self, 'step_name'): printable_name += ' %s' % self.name_context.logging_name() if is_recursive: # If we have a step name, stop here, no more detail needed. return '<%s>' % printable_name if self.spec is None: printable_fields = [] else: printable_fields = operation_specs.worker_printable_fields( self.spec) if not is_recursive and getattr(self, 'receivers', []): printable_fields.append( 'receivers=[%s]' % ', '.join([str(receiver) for receiver in self.receivers])) return '<%s %s>' % (printable_name, ', '.join(printable_fields))