예제 #1
0
  def start_bundle(self):
    transform = self._applied_ptransform.transform

    self._tagged_receivers = _TaggedReceivers(self._evaluation_context)
    for output_tag in self._applied_ptransform.outputs:
      output_pcollection = pvalue.PCollection(None, tag=output_tag)
      output_pcollection.producer = self._applied_ptransform
      self._tagged_receivers[output_tag] = (
          self._evaluation_context.create_bundle(output_pcollection))
      self._tagged_receivers[output_tag].tag = output_tag

    self._counter_factory = counters.CounterFactory()

    # TODO(aaltay): Consider storing the serialized form as an optimization.
    dofn = pickler.loads(pickler.dumps(transform.dofn))

    pipeline_options = self._evaluation_context.pipeline_options
    if (pipeline_options is not None
        and pipeline_options.view_as(TypeOptions).runtime_type_check):
      dofn = TypeCheckWrapperDoFn(dofn, transform.get_type_hints())

    dofn = OutputCheckWrapperDoFn(dofn, self._applied_ptransform.full_label)
    self.runner = DoFnRunner(
        dofn, transform.args, transform.kwargs,
        self._side_inputs,
        self._applied_ptransform.inputs[0].windowing,
        tagged_receivers=self._tagged_receivers,
        step_name=self._applied_ptransform.full_label,
        state=DoFnState(self._counter_factory),
        scoped_metrics_container=self.scoped_metrics_container)
    self.runner.start()
    def start_bundle(self):
        transform = self._applied_ptransform.transform

        self._tagged_receivers = _TaggedReceivers(self._evaluation_context)
        for output_tag in self._applied_ptransform.outputs:
            output_pcollection = pvalue.PCollection(None, tag=output_tag)
            output_pcollection.producer = self._applied_ptransform
            self._tagged_receivers[output_tag] = (
                self._evaluation_context.create_bundle(output_pcollection))
            self._tagged_receivers[output_tag].tag = output_tag

        self._counter_factory = counters.CounterFactory()

        # TODO(aaltay): Consider storing the serialized form as an optimization.
        dofn = (pickler.loads(pickler.dumps(transform.dofn))
                if self._perform_dofn_pickle_test else transform.dofn)

        args = transform.args if hasattr(transform, 'args') else []
        kwargs = transform.kwargs if hasattr(transform, 'kwargs') else {}

        self.runner = DoFnRunner(dofn,
                                 args,
                                 kwargs,
                                 self._side_inputs,
                                 self._applied_ptransform.inputs[0].windowing,
                                 tagged_receivers=self._tagged_receivers,
                                 step_name=self._applied_ptransform.full_label,
                                 state=DoFnState(self._counter_factory))
        self.runner.start()
예제 #3
0
    def start_bundle(self):
        transform = self._applied_ptransform.transform

        self._tagged_receivers = _TaggedReceivers(self._evaluation_context)
        if isinstance(self._applied_ptransform.parent.transform,
                      core._MultiParDo):  # pylint: disable=protected-access
            do_outputs_tuple = self._applied_ptransform.parent.outputs[0]
            assert isinstance(do_outputs_tuple, pvalue.DoOutputsTuple)
            main_output_pcollection = do_outputs_tuple[
                do_outputs_tuple._main_tag]  # pylint: disable=protected-access

            for side_output_tag in transform.side_output_tags:
                output_pcollection = do_outputs_tuple[side_output_tag]
                self._tagged_receivers[side_output_tag] = (
                    self._evaluation_context.create_bundle(output_pcollection))
                self._tagged_receivers[side_output_tag].tag = side_output_tag
        else:
            assert len(self._outputs) == 1
            main_output_pcollection = list(self._outputs)[0]

        self._tagged_receivers[None] = self._evaluation_context.create_bundle(
            main_output_pcollection)
        self._tagged_receivers[None].tag = None  # main_tag is None.

        self._counter_factory = counters.CounterFactory()

        # TODO(aaltay): Consider storing the serialized form as an optimization.
        dofn = pickler.loads(pickler.dumps(transform.dofn))

        pipeline_options = self._evaluation_context.pipeline_options
        if (pipeline_options is not None
                and pipeline_options.view_as(TypeOptions).runtime_type_check):
            # TODO(sourabhbajaj): Remove this if-else
            if isinstance(dofn, core.NewDoFn):
                dofn = TypeCheckWrapperNewDoFn(dofn,
                                               transform.get_type_hints())
            else:
                dofn = TypeCheckWrapperDoFn(dofn, transform.get_type_hints())

        # TODO(sourabhbajaj): Remove this if-else
        if isinstance(dofn, core.NewDoFn):
            dofn = OutputCheckWrapperNewDoFn(
                dofn, self._applied_ptransform.full_label)
        else:
            dofn = OutputCheckWrapperDoFn(dofn,
                                          self._applied_ptransform.full_label)
        self.runner = DoFnRunner(
            dofn,
            transform.args,
            transform.kwargs,
            self._side_inputs,
            self._applied_ptransform.inputs[0].windowing,
            tagged_receivers=self._tagged_receivers,
            step_name=self._applied_ptransform.full_label,
            state=DoFnState(self._counter_factory),
            scoped_metrics_container=self.scoped_metrics_container)
        self.runner.start()
예제 #4
0
  def start_bundle(self):
    transform = self._applied_ptransform.transform

    self._tagged_receivers = _TaggedReceivers(self._evaluation_context)
    for output_tag in self._applied_ptransform.outputs:
      output_pcollection = pvalue.PCollection(None, tag=output_tag)
      output_pcollection.producer = self._applied_ptransform
      self._tagged_receivers[output_tag] = (
          self._evaluation_context.create_bundle(output_pcollection))
      self._tagged_receivers[output_tag].tag = output_tag

    self._counter_factory = counters.CounterFactory()

    # TODO(aaltay): Consider storing the serialized form as an optimization.
    dofn = (
        pickler.loads(pickler.dumps(transform.dofn))
        if self._perform_dofn_pickle_test else transform.dofn)

    args = transform.args if hasattr(transform, 'args') else []
    kwargs = transform.kwargs if hasattr(transform, 'kwargs') else {}

    self.user_state_context = None
    self.user_timer_map = {}
    if is_stateful_dofn(dofn):
      kv_type_hint = self._applied_ptransform.inputs[0].element_type
      if kv_type_hint and kv_type_hint != Any:
        coder = coders.registry.get_coder(kv_type_hint)
        self.key_coder = coder.key_coder()
      else:
        self.key_coder = coders.registry.get_coder(Any)

      self.user_state_context = DirectUserStateContext(
          self._step_context, dofn, self.key_coder)
      _, all_timer_specs = get_dofn_specs(dofn)
      for timer_spec in all_timer_specs:
        self.user_timer_map['user/%s' % timer_spec.name] = timer_spec

    self.runner = DoFnRunner(
        dofn,
        args,
        kwargs,
        self._side_inputs,
        self._applied_ptransform.inputs[0].windowing,
        tagged_receivers=self._tagged_receivers,
        step_name=self._applied_ptransform.full_label,
        state=DoFnState(self._counter_factory),
        user_state_context=self.user_state_context)
    self.runner.setup()
    self.runner.start()