Exemple #1
0
  def start_bundle(self):
    transform = self._applied_ptransform.transform

    self._tagged_receivers = _TaggedReceivers(self._evaluation_context)
    for output_tag in self._applied_ptransform.outputs:
      output_pcollection = pvalue.PCollection(None, tag=output_tag)
      output_pcollection.producer = self._applied_ptransform
      self._tagged_receivers[output_tag] = (
          self._evaluation_context.create_bundle(output_pcollection))
      self._tagged_receivers[output_tag].tag = output_tag

    self._counter_factory = counters.CounterFactory()

    # TODO(aaltay): Consider storing the serialized form as an optimization.
    dofn = (
        pickler.loads(pickler.dumps(transform.dofn))
        if self._perform_dofn_pickle_test else transform.dofn)

    args = transform.args if hasattr(transform, 'args') else []
    kwargs = transform.kwargs if hasattr(transform, 'kwargs') else {}

    self.user_state_context = None
    self.user_timer_map = {}
    if is_stateful_dofn(dofn):
      kv_type_hint = self._applied_ptransform.inputs[0].element_type
      if kv_type_hint and kv_type_hint != Any:
        coder = coders.registry.get_coder(kv_type_hint)
        self.key_coder = coder.key_coder()
      else:
        self.key_coder = coders.registry.get_coder(Any)

      self.user_state_context = DirectUserStateContext(
          self._step_context, dofn, self.key_coder)
      _, all_timer_specs = get_dofn_specs(dofn)
      for timer_spec in all_timer_specs:
        self.user_timer_map['user/%s' % timer_spec.name] = timer_spec

    self.runner = DoFnRunner(
        dofn,
        args,
        kwargs,
        self._side_inputs,
        self._applied_ptransform.inputs[0].windowing,
        tagged_receivers=self._tagged_receivers,
        step_name=self._applied_ptransform.full_label,
        state=DoFnState(self._counter_factory),
        user_state_context=self.user_state_context)
    self.runner.setup()
    self.runner.start()
  def start_bundle(self):
    transform = self._applied_ptransform.transform

    self._tagged_receivers = _TaggedReceivers(self._evaluation_context)
    for output_tag in self._applied_ptransform.outputs:
      output_pcollection = pvalue.PCollection(None, tag=output_tag)
      output_pcollection.producer = self._applied_ptransform
      self._tagged_receivers[output_tag] = (
          self._evaluation_context.create_bundle(output_pcollection))
      self._tagged_receivers[output_tag].tag = output_tag

    self._counter_factory = counters.CounterFactory()

    # TODO(aaltay): Consider storing the serialized form as an optimization.
    dofn = (pickler.loads(pickler.dumps(transform.dofn))
            if self._perform_dofn_pickle_test else transform.dofn)

    args = transform.args if hasattr(transform, 'args') else []
    kwargs = transform.kwargs if hasattr(transform, 'kwargs') else {}

    self.user_state_context = None
    self.user_timer_map = {}
    if is_stateful_dofn(dofn):
      kv_type_hint = self._applied_ptransform.inputs[0].element_type
      if kv_type_hint and kv_type_hint != typehints.Any:
        coder = coders.registry.get_coder(kv_type_hint)
        self.key_coder = coder.key_coder()
      else:
        self.key_coder = coders.registry.get_coder(typehints.Any)

      self.user_state_context = DirectUserStateContext(
          self._step_context, dofn, self.key_coder)
      _, all_timer_specs = get_dofn_specs(dofn)
      for timer_spec in all_timer_specs:
        self.user_timer_map['user/%s' % timer_spec.name] = timer_spec

    self.runner = DoFnRunner(
        dofn, args, kwargs,
        self._side_inputs,
        self._applied_ptransform.inputs[0].windowing,
        tagged_receivers=self._tagged_receivers,
        step_name=self._applied_ptransform.full_label,
        state=DoFnState(self._counter_factory),
        user_state_context=self.user_state_context)
    self.runner.start()
class _ParDoEvaluator(_TransformEvaluator):
    """TransformEvaluator for ParDo transform."""
    def __init__(
            self,
            evaluation_context,  # type: EvaluationContext
            applied_ptransform,  # type: AppliedPTransform
            input_committed_bundle,
            side_inputs,
            perform_dofn_pickle_test=True):
        super(_ParDoEvaluator,
              self).__init__(evaluation_context, applied_ptransform,
                             input_committed_bundle, side_inputs)
        # This is a workaround for SDF implementation. SDF implementation adds state
        # to the SDF that is not picklable.
        self._perform_dofn_pickle_test = perform_dofn_pickle_test

    def start_bundle(self):
        transform = self._applied_ptransform.transform

        self._tagged_receivers = _TaggedReceivers(self._evaluation_context)
        for output_tag in self._applied_ptransform.outputs:
            output_pcollection = pvalue.PCollection(None, tag=output_tag)
            output_pcollection.producer = self._applied_ptransform
            self._tagged_receivers[output_tag] = (
                self._evaluation_context.create_bundle(output_pcollection))
            self._tagged_receivers[output_tag].tag = output_tag

        self._counter_factory = counters.CounterFactory()

        # TODO(aaltay): Consider storing the serialized form as an optimization.
        dofn = (pickler.loads(pickler.dumps(transform.dofn))
                if self._perform_dofn_pickle_test else transform.dofn)

        args = transform.args if hasattr(transform, 'args') else []
        kwargs = transform.kwargs if hasattr(transform, 'kwargs') else {}

        self.user_state_context = None
        self.user_timer_map = {}
        if is_stateful_dofn(dofn):
            kv_type_hint = self._applied_ptransform.inputs[0].element_type
            if kv_type_hint and kv_type_hint != Any:
                coder = coders.registry.get_coder(kv_type_hint)
                self.key_coder = coder.key_coder()
            else:
                self.key_coder = coders.registry.get_coder(Any)

            self.user_state_context = DirectUserStateContext(
                self._step_context, dofn, self.key_coder)
            _, all_timer_specs = get_dofn_specs(dofn)
            for timer_spec in all_timer_specs:
                self.user_timer_map['user/%s' % timer_spec.name] = timer_spec

        self.runner = DoFnRunner(dofn,
                                 args,
                                 kwargs,
                                 self._side_inputs,
                                 self._applied_ptransform.inputs[0].windowing,
                                 tagged_receivers=self._tagged_receivers,
                                 step_name=self._applied_ptransform.full_label,
                                 state=DoFnState(self._counter_factory),
                                 user_state_context=self.user_state_context)
        self.runner.start()

    def process_timer(self, timer_firing):
        if timer_firing.name not in self.user_timer_map:
            _LOGGER.warning('Unknown timer fired: %s', timer_firing)
        timer_spec = self.user_timer_map[timer_firing.name]
        self.runner.process_user_timer(
            timer_spec, self.key_coder.decode(timer_firing.encoded_key),
            timer_firing.window, timer_firing.timestamp)

    def process_element(self, element):
        self.runner.process(element)

    def finish_bundle(self):
        self.runner.finish()
        bundles = list(self._tagged_receivers.values())
        result_counters = self._counter_factory.get_counters()
        if self.user_state_context:
            self.user_state_context.commit()
        return TransformResult(self, bundles, [], result_counters, None)
class _ParDoEvaluator(_TransformEvaluator):
  """TransformEvaluator for ParDo transform."""

  def __init__(self, evaluation_context, applied_ptransform,
               input_committed_bundle, side_inputs,
               perform_dofn_pickle_test=True):
    super(_ParDoEvaluator, self).__init__(
        evaluation_context, applied_ptransform, input_committed_bundle,
        side_inputs)
    # This is a workaround for SDF implementation. SDF implementation adds state
    # to the SDF that is not picklable.
    self._perform_dofn_pickle_test = perform_dofn_pickle_test

  def start_bundle(self):
    transform = self._applied_ptransform.transform

    self._tagged_receivers = _TaggedReceivers(self._evaluation_context)
    for output_tag in self._applied_ptransform.outputs:
      output_pcollection = pvalue.PCollection(None, tag=output_tag)
      output_pcollection.producer = self._applied_ptransform
      self._tagged_receivers[output_tag] = (
          self._evaluation_context.create_bundle(output_pcollection))
      self._tagged_receivers[output_tag].tag = output_tag

    self._counter_factory = counters.CounterFactory()

    # TODO(aaltay): Consider storing the serialized form as an optimization.
    dofn = (pickler.loads(pickler.dumps(transform.dofn))
            if self._perform_dofn_pickle_test else transform.dofn)

    args = transform.args if hasattr(transform, 'args') else []
    kwargs = transform.kwargs if hasattr(transform, 'kwargs') else {}

    self.user_state_context = None
    self.user_timer_map = {}
    if is_stateful_dofn(dofn):
      kv_type_hint = self._applied_ptransform.inputs[0].element_type
      if kv_type_hint and kv_type_hint != typehints.Any:
        coder = coders.registry.get_coder(kv_type_hint)
        self.key_coder = coder.key_coder()
      else:
        self.key_coder = coders.registry.get_coder(typehints.Any)

      self.user_state_context = DirectUserStateContext(
          self._step_context, dofn, self.key_coder)
      _, all_timer_specs = get_dofn_specs(dofn)
      for timer_spec in all_timer_specs:
        self.user_timer_map['user/%s' % timer_spec.name] = timer_spec

    self.runner = DoFnRunner(
        dofn, args, kwargs,
        self._side_inputs,
        self._applied_ptransform.inputs[0].windowing,
        tagged_receivers=self._tagged_receivers,
        step_name=self._applied_ptransform.full_label,
        state=DoFnState(self._counter_factory),
        user_state_context=self.user_state_context)
    self.runner.start()

  def process_timer(self, timer_firing):
    if timer_firing.name not in self.user_timer_map:
      logging.warning('Unknown timer fired: %s', timer_firing)
    timer_spec = self.user_timer_map[timer_firing.name]
    self.runner.process_user_timer(
        timer_spec, self.key_coder.decode(timer_firing.encoded_key),
        timer_firing.window, timer_firing.timestamp)

  def process_element(self, element):
    self.runner.process(element)

  def finish_bundle(self):
    self.runner.finish()
    bundles = list(self._tagged_receivers.values())
    result_counters = self._counter_factory.get_counters()
    if self.user_state_context:
      self.user_state_context.commit()
    return TransformResult(
        self, bundles, [], result_counters, None)