def setup(self): # type: () -> None with self.scoped_start_state: super(DoOperation, self).setup() # See fn_data in dataflow_runner.py fn, args, kwargs, tags_and_types, window_fn = (pickler.loads( self.spec.serialized_fn)) state = common.DoFnState(self.counter_factory) state.step_name = self.name_context.logging_name() # Tag to output index map used to dispatch the side output values emitted # by the DoFn function to the appropriate receivers. The main output is # tagged with None and is associated with its corresponding index. self.tagged_receivers = _TaggedReceivers( self.counter_factory, self.name_context.logging_name()) output_tag_prefix = PropertyNames.OUT + '_' for index, tag in enumerate(self.spec.output_tags): if tag == PropertyNames.OUT: original_tag = None # type: Optional[str] elif tag.startswith(output_tag_prefix): original_tag = tag[len(output_tag_prefix):] else: raise ValueError( 'Unexpected output name for operation: %s' % tag) self.tagged_receivers[original_tag] = self.receivers[index] if self.user_state_context: self.user_state_context.update_timer_receivers( self.tagged_receivers) self.timer_specs = { spec.name: spec for spec in userstate.get_dofn_specs(fn)[1] } if self.side_input_maps is None: if tags_and_types: self.side_input_maps = list( self._read_side_inputs(tags_and_types)) else: self.side_input_maps = [] self.dofn_runner = common.DoFnRunner( fn, args, kwargs, self.side_input_maps, window_fn, tagged_receivers=self.tagged_receivers, step_name=self.name_context.logging_name(), state=state, user_state_context=self.user_state_context, operation_name=self.name_context.metrics_name()) self.dofn_runner.setup() self.dofn_receiver = (self.dofn_runner if isinstance( self.dofn_runner, Receiver) else DoFnRunnerReceiver( self.dofn_runner))
def start(self): with self.scoped_start_state: super(DoOperation, self).start() # See fn_data in dataflow_runner.py fn, args, kwargs, tags_and_types, window_fn = (pickler.loads( self.spec.serialized_fn)) state = common.DoFnState(self.counter_factory) state.step_name = self.step_name # TODO(silviuc): What is the proper label here? PCollection being # processed? context = common.DoFnContext('label', state=state) # Tag to output index map used to dispatch the side output values emitted # by the DoFn function to the appropriate receivers. The main output is # tagged with None and is associated with its corresponding index. self.tagged_receivers = _TaggedReceivers(self.counter_factory, self.step_name) output_tag_prefix = PropertyNames.OUT + '_' for index, tag in enumerate(self.spec.output_tags): if tag == PropertyNames.OUT: original_tag = None elif tag.startswith(output_tag_prefix): original_tag = tag[len(output_tag_prefix):] else: raise ValueError( 'Unexpected output name for operation: %s' % tag) self.tagged_receivers[original_tag] = self.receivers[index] if self.side_input_maps is None: if tags_and_types: self.side_input_maps = list( self._read_side_inputs(tags_and_types)) else: self.side_input_maps = [] self.dofn_runner = common.DoFnRunner( fn, args, kwargs, self.side_input_maps, window_fn, context, self.tagged_receivers, logger, self.step_name, scoped_metrics_container=self.scoped_metrics_container) self.dofn_receiver = (self.dofn_runner if isinstance( self.dofn_runner, Receiver) else DoFnRunnerReceiver( self.dofn_runner)) self.dofn_runner.start()
def setup(self): # type: () -> None with self.scoped_start_state: super(DoOperation, self).setup() # See fn_data in dataflow_runner.py fn, args, kwargs, tags_and_types, window_fn = (pickler.loads( self.spec.serialized_fn)) state = common.DoFnState(self.counter_factory) state.step_name = self.name_context.logging_name() # Tag to output index map used to dispatch the output values emitted # by the DoFn function to the appropriate receivers. The main output is # either the only output or the output tagged with 'None' and is # associated with its corresponding index. self.tagged_receivers = _TaggedReceivers( self.counter_factory, self.name_context.logging_name()) if len(self.spec.output_tags) == 1: self.tagged_receivers[None] = self.receivers[0] self.tagged_receivers[ self.spec.output_tags[0]] = self.receivers[0] else: for index, tag in enumerate(self.spec.output_tags): self.tagged_receivers[tag] = self.receivers[index] if tag == 'None': self.tagged_receivers[None] = self.receivers[index] if self.user_state_context: self.timer_specs = { spec.name: spec for spec in userstate.get_dofn_specs(fn)[1] } if self.side_input_maps is None: if tags_and_types: self.side_input_maps = list( self._read_side_inputs(tags_and_types)) else: self.side_input_maps = [] self.dofn_runner = common.DoFnRunner( fn, args, kwargs, self.side_input_maps, window_fn, tagged_receivers=self.tagged_receivers, step_name=self.name_context.logging_name(), state=state, user_state_context=self.user_state_context, operation_name=self.name_context.metrics_name()) self.dofn_runner.setup()