def finish_bundle(self):
        par_do_result = self._par_do_evaluator.finish_bundle()

        transform_result = TransformResult(
            self, par_do_result.uncommitted_output_bundles,
            par_do_result.unprocessed_bundles, par_do_result.counters,
            par_do_result.keyed_watermark_holds,
            par_do_result.undeclared_tag_values)
        for key in self.keyed_holds:
            transform_result.keyed_watermark_holds[key] = self.keyed_holds[key]
        return transform_result
  def finish_bundle(self):
    par_do_result = self._par_do_evaluator.finish_bundle()

    transform_result = TransformResult(
        self, par_do_result.uncommitted_output_bundles,
        par_do_result.unprocessed_bundles, par_do_result.counters,
        par_do_result.keyed_watermark_holds,
        par_do_result.undeclared_tag_values)
    for key in self.keyed_holds:
      transform_result.keyed_watermark_holds[key] = self.keyed_holds[key]
    return transform_result
예제 #3
0
  def finish_bundle(self):
    # finish_bundle will append incoming bundles in memory until all the bundles
    # carrying data is processed. This is done to produce only a single output
    # shard (some tests depends on this behavior). It is possible to have
    # incoming empty bundles after the output is produced, these bundles will be
    # ignored and would not generate additional output files.
    # TODO(altay): Do not wait until the last bundle to write in a single shard.
    if self._is_final_bundle:
      elements = self.global_state.get_state(
          None, _NativeWriteEvaluator.ELEMENTS_TAG)
      if self._has_already_produced_output:
        # Ignore empty bundles that arrive after the output is produced.
        assert elements == []
      else:
        self._sink.pipeline_options = self._evaluation_context.pipeline_options
        with self._sink.writer() as writer:
          for v in elements:
            writer.Write(v.value)
      hold = WatermarkManager.WATERMARK_POS_INF
    else:
      hold = WatermarkManager.WATERMARK_NEG_INF
      self.global_state.set_timer(
          None, '', TimeDomain.WATERMARK, WatermarkManager.WATERMARK_POS_INF)

    return TransformResult(
        self._applied_ptransform, [], [], None, {None: hold})
예제 #4
0
 def finish_bundle(self):
   self.runner.finish()
   bundles = self._tagged_receivers.values()
   result_counters = self._counter_factory.get_counters()
   return TransformResult(
       self._applied_ptransform, bundles, [], result_counters, None,
       self._tagged_receivers.undeclared_in_memory_tag_values)
예제 #5
0
 def finish_bundle(self):
     self.runner.finish()
     bundles = list(self._tagged_receivers.values())
     result_counters = self._counter_factory.get_counters()
     if self.user_state_context:
         self.user_state_context.commit()
     return TransformResult(self, bundles, [], result_counters, None)
    def finish_bundle(self):
        data = self._read_from_pubsub(self.source.timestamp_attribute)
        if data:
            output_pcollection = list(self._outputs)[0]
            bundle = self._evaluation_context.create_bundle(output_pcollection)
            # TODO(ccy): Respect the PubSub source's id_label field.
            for timestamp, message in data:
                if self.source.with_attributes:
                    element = message
                else:
                    element = message.payload
                bundle.output(
                    GlobalWindows.windowed_value(element, timestamp=timestamp))
            bundles = [bundle]
        else:
            bundles = []
        if self._applied_ptransform.inputs:
            input_pvalue = self._applied_ptransform.inputs[0]
        else:
            input_pvalue = pvalue.PBegin(
                self._applied_ptransform.transform.pipeline)
        unprocessed_bundle = self._evaluation_context.create_bundle(
            input_pvalue)

        # TODO(udim): Correct value for watermark hold.
        return TransformResult(self, bundles, [unprocessed_bundle], None,
                               {None: Timestamp.of(time.time())})
예제 #7
0
  def finish_bundle(self):
    bundles = []
    if self.gabw_items:
      bundle = self._evaluation_context.create_bundle(self.output_pcollection)
      for item in self.gabw_items:
        bundle.add(item)
      bundles.append(bundle)

    return TransformResult(self, bundles, [], None, self.keyed_holds)
예제 #8
0
  def finish_bundle(self):
    bundles = []
    bundle = None
    for encoded_k, vs in iteritems(self.gbk_items):
      if not bundle:
        bundle = self._evaluation_context.create_bundle(self.output_pcollection)
        bundles.append(bundle)
      kwi = KeyedWorkItem(encoded_k, elements=vs)
      bundle.add(GlobalWindows.windowed_value(kwi))

    return TransformResult(self, bundles, [], None, None)
예제 #9
0
 def finish_bundle(self):
   unprocessed_bundles = []
   hold = None
   if self.current_index < len(self.test_stream.events) - 1:
     unprocessed_bundle = self._evaluation_context.create_bundle(
         pvalue.PBegin(self._applied_ptransform.transform.pipeline))
     unprocessed_bundle.add(GlobalWindows.windowed_value(
         self.current_index + 1, timestamp=self.watermark))
     unprocessed_bundles.append(unprocessed_bundle)
     hold = self.watermark
   return TransformResult(
       self._applied_ptransform, self.bundles, unprocessed_bundles, None,
       {None: hold})
예제 #10
0
  def finish_bundle(self):
    unprocessed_bundles = []
    next_index = self.test_stream.next(self.current_index)
    if not self.test_stream.end(next_index):
      unprocessed_bundle = self._evaluation_context.create_bundle(
          pvalue.PBegin(self._applied_ptransform.transform.pipeline))
      unprocessed_bundle.add(GlobalWindows.windowed_value(
          next_index, timestamp=self.watermark))
      unprocessed_bundles.append(unprocessed_bundle)

    # Returning the watermark in the dict here is used as a watermark hold.
    return TransformResult(
        self, self.bundles, unprocessed_bundles, None, {None: self.watermark})
예제 #11
0
    def finish_bundle(self):
        unprocessed_bundles = []

        # Continue to send its own state to itself via an unprocessed bundle. This
        # acts as a heartbeat, where each element will read the next event from the
        # event stream.
        if not self.is_done:
            unprocessed_bundle = self._evaluation_context.create_bundle(
                pvalue.PBegin(self._applied_ptransform.transform.pipeline))
            unprocessed_bundle.add(
                GlobalWindows.windowed_value(b'', timestamp=self.watermark))
            unprocessed_bundles.append(unprocessed_bundle)

        # Returning the watermark in the dict here is used as a watermark hold.
        return TransformResult(self, self.bundles, unprocessed_bundles, None,
                               {None: self.watermark})
예제 #12
0
    def finish_bundle(self):
        if self._is_final_bundle():
            if self.global_state.get_state(
                    None, _GroupByKeyOnlyEvaluator.COMPLETION_TAG):
                # Ignore empty bundles after emitting output. (This may happen because
                # empty bundles do not affect input watermarks.)
                bundles = []
            else:
                gbk_result = []
                # TODO(ccy): perhaps we can clean this up to not use this
                # internal attribute of the DirectStepContext.
                for encoded_k in self.step_context.keyed_existing_state:
                    # Ignore global state.
                    if encoded_k is None:
                        continue
                    k = self.key_coder.decode(encoded_k)
                    state = self.step_context.get_keyed_state(encoded_k)
                    vs = state.get_state(None,
                                         _GroupByKeyOnlyEvaluator.ELEMENTS_TAG)
                    gbk_result.append(GlobalWindows.windowed_value((k, vs)))

                def len_element_fn(element):
                    _, v = element.value
                    return len(v)

                bundles = self._split_list_into_bundles(
                    self.output_pcollection, gbk_result,
                    _GroupByKeyOnlyEvaluator.MAX_ELEMENT_PER_BUNDLE,
                    len_element_fn)

            self.global_state.add_state(
                None, _GroupByKeyOnlyEvaluator.COMPLETION_TAG, True)
            hold = WatermarkManager.WATERMARK_POS_INF
        else:
            bundles = []
            hold = WatermarkManager.WATERMARK_NEG_INF
            self.global_state.set_timer(None, '', TimeDomain.WATERMARK,
                                        WatermarkManager.WATERMARK_POS_INF)

        return TransformResult(self._applied_ptransform, bundles, [], None,
                               {None: hold})
    def finish_bundle(self):
        assert len(self._outputs) == 1
        output_pcollection = list(self._outputs)[0]

        def _read_values_to_bundles(reader):
            read_result = [GlobalWindows.windowed_value(e) for e in reader]
            return self._split_list_into_bundles(
                output_pcollection, read_result,
                _BoundedReadEvaluator.MAX_ELEMENT_PER_BUNDLE, lambda _: 1)

        if isinstance(self._source, io.iobase.BoundedSource):
            # Getting a RangeTracker for the default range of the source and reading
            # the full source using that.
            range_tracker = self._source.get_range_tracker(None, None)
            reader = self._source.read(range_tracker)
            bundles = _read_values_to_bundles(reader)
        else:
            with self._source.reader() as reader:
                bundles = _read_values_to_bundles(reader)

        return TransformResult(self, bundles, [], None, None)
예제 #14
0
 def finish_bundle(self):
   data = self._read_from_pubsub()
   if data:
     output_pcollection = list(self._outputs)[0]
     bundle = self._evaluation_context.create_bundle(output_pcollection)
     # TODO(ccy): we currently do not use the PubSub message timestamp or
     # respect the PubSub source's id_label field.
     now = Timestamp.of(time.time())
     for message_data in data:
       bundle.output(GlobalWindows.windowed_value(message_data, timestamp=now))
     bundles = [bundle]
   else:
     bundles = []
   if self._applied_ptransform.inputs:
     input_pvalue = self._applied_ptransform.inputs[0]
   else:
     input_pvalue = pvalue.PBegin(self._applied_ptransform.transform.pipeline)
   unprocessed_bundle = self._evaluation_context.create_bundle(
       input_pvalue)
   return TransformResult(
       self._applied_ptransform, bundles,
       [unprocessed_bundle], None, {None: Timestamp.of(time.time())})
예제 #15
0
 def finish_bundle(self):
     # The watermark hold we set here is the way we allow the TestStream events
     # to control the output watermark.
     return TransformResult(self, self.bundles, [], None,
                            {None: self._watermark})
예제 #16
0
 def finish_bundle(self):
   bundles = [self.bundle]
   return TransformResult(
       self._applied_ptransform, bundles, [], None, None)
 def finish_bundle(self):
     self.runner.finish()
     bundles = self._tagged_receivers.values()
     result_counters = self._counter_factory.get_counters()
     return TransformResult(self, bundles, [], result_counters, None)
 def finish_bundle(self):
     bundles = [self.bundle]
     return TransformResult(self, bundles, [], None, None)
예제 #19
0
 def finish_bundle(self):
     return TransformResult(self, [self.bundle], [], None, {})
예제 #20
0
 def finish_bundle(self):
     assert len(self._outputs) == 1
     output_pcollection = list(self._outputs)[0]
     bundle = self._evaluation_context.create_bundle(output_pcollection)
     bundle.output(GlobalWindows.windowed_value(b''))
     return TransformResult(self, [bundle], [], None, None)