Exemple #1
0
    def setup(self):
        # type: () -> None
        with self.scoped_start_state:
            super(DoOperation, self).setup()

            # See fn_data in dataflow_runner.py
            fn, args, kwargs, tags_and_types, window_fn = (pickler.loads(
                self.spec.serialized_fn))

            state = common.DoFnState(self.counter_factory)
            state.step_name = self.name_context.logging_name()

            # Tag to output index map used to dispatch the side output values emitted
            # by the DoFn function to the appropriate receivers. The main output is
            # tagged with None and is associated with its corresponding index.
            self.tagged_receivers = _TaggedReceivers(
                self.counter_factory, self.name_context.logging_name())

            output_tag_prefix = PropertyNames.OUT + '_'
            for index, tag in enumerate(self.spec.output_tags):
                if tag == PropertyNames.OUT:
                    original_tag = None  # type: Optional[str]
                elif tag.startswith(output_tag_prefix):
                    original_tag = tag[len(output_tag_prefix):]
                else:
                    raise ValueError(
                        'Unexpected output name for operation: %s' % tag)
                self.tagged_receivers[original_tag] = self.receivers[index]

            if self.user_state_context:
                self.user_state_context.update_timer_receivers(
                    self.tagged_receivers)
                self.timer_specs = {
                    spec.name: spec
                    for spec in userstate.get_dofn_specs(fn)[1]
                }

            if self.side_input_maps is None:
                if tags_and_types:
                    self.side_input_maps = list(
                        self._read_side_inputs(tags_and_types))
                else:
                    self.side_input_maps = []

            self.dofn_runner = common.DoFnRunner(
                fn,
                args,
                kwargs,
                self.side_input_maps,
                window_fn,
                tagged_receivers=self.tagged_receivers,
                step_name=self.name_context.logging_name(),
                state=state,
                user_state_context=self.user_state_context,
                operation_name=self.name_context.metrics_name())
            self.dofn_runner.setup()

            self.dofn_receiver = (self.dofn_runner if isinstance(
                self.dofn_runner, Receiver) else DoFnRunnerReceiver(
                    self.dofn_runner))
Exemple #2
0
    def start(self):
        with self.scoped_start_state:
            super(DoOperation, self).start()

            # See fn_data in dataflow_runner.py
            fn, args, kwargs, tags_and_types, window_fn = (pickler.loads(
                self.spec.serialized_fn))

            state = common.DoFnState(self.counter_factory)
            state.step_name = self.step_name

            # TODO(silviuc): What is the proper label here? PCollection being
            # processed?
            context = common.DoFnContext('label', state=state)
            # Tag to output index map used to dispatch the side output values emitted
            # by the DoFn function to the appropriate receivers. The main output is
            # tagged with None and is associated with its corresponding index.
            self.tagged_receivers = _TaggedReceivers(self.counter_factory,
                                                     self.step_name)

            output_tag_prefix = PropertyNames.OUT + '_'
            for index, tag in enumerate(self.spec.output_tags):
                if tag == PropertyNames.OUT:
                    original_tag = None
                elif tag.startswith(output_tag_prefix):
                    original_tag = tag[len(output_tag_prefix):]
                else:
                    raise ValueError(
                        'Unexpected output name for operation: %s' % tag)
                self.tagged_receivers[original_tag] = self.receivers[index]

            if self.side_input_maps is None:
                if tags_and_types:
                    self.side_input_maps = list(
                        self._read_side_inputs(tags_and_types))
                else:
                    self.side_input_maps = []

            self.dofn_runner = common.DoFnRunner(
                fn,
                args,
                kwargs,
                self.side_input_maps,
                window_fn,
                context,
                self.tagged_receivers,
                logger,
                self.step_name,
                scoped_metrics_container=self.scoped_metrics_container)
            self.dofn_receiver = (self.dofn_runner if isinstance(
                self.dofn_runner, Receiver) else DoFnRunnerReceiver(
                    self.dofn_runner))

            self.dofn_runner.start()
Exemple #3
0
    def setup(self):
        # type: () -> None
        with self.scoped_start_state:
            super(DoOperation, self).setup()

            # See fn_data in dataflow_runner.py
            fn, args, kwargs, tags_and_types, window_fn = (pickler.loads(
                self.spec.serialized_fn))

            state = common.DoFnState(self.counter_factory)
            state.step_name = self.name_context.logging_name()

            # Tag to output index map used to dispatch the output values emitted
            # by the DoFn function to the appropriate receivers. The main output is
            # either the only output or the output tagged with 'None' and is
            # associated with its corresponding index.
            self.tagged_receivers = _TaggedReceivers(
                self.counter_factory, self.name_context.logging_name())

            if len(self.spec.output_tags) == 1:
                self.tagged_receivers[None] = self.receivers[0]
                self.tagged_receivers[
                    self.spec.output_tags[0]] = self.receivers[0]
            else:
                for index, tag in enumerate(self.spec.output_tags):
                    self.tagged_receivers[tag] = self.receivers[index]
                    if tag == 'None':
                        self.tagged_receivers[None] = self.receivers[index]

            if self.user_state_context:
                self.timer_specs = {
                    spec.name: spec
                    for spec in userstate.get_dofn_specs(fn)[1]
                }

            if self.side_input_maps is None:
                if tags_and_types:
                    self.side_input_maps = list(
                        self._read_side_inputs(tags_and_types))
                else:
                    self.side_input_maps = []

            self.dofn_runner = common.DoFnRunner(
                fn,
                args,
                kwargs,
                self.side_input_maps,
                window_fn,
                tagged_receivers=self.tagged_receivers,
                step_name=self.name_context.logging_name(),
                state=state,
                user_state_context=self.user_state_context,
                operation_name=self.name_context.metrics_name())
            self.dofn_runner.setup()