def finish_bundle(self): par_do_result = self._par_do_evaluator.finish_bundle() transform_result = TransformResult( self, par_do_result.uncommitted_output_bundles, par_do_result.unprocessed_bundles, par_do_result.counters, par_do_result.keyed_watermark_holds, par_do_result.undeclared_tag_values) for key in self.keyed_holds: transform_result.keyed_watermark_holds[key] = self.keyed_holds[key] return transform_result
def finish_bundle(self): # finish_bundle will append incoming bundles in memory until all the bundles # carrying data is processed. This is done to produce only a single output # shard (some tests depends on this behavior). It is possible to have # incoming empty bundles after the output is produced, these bundles will be # ignored and would not generate additional output files. # TODO(altay): Do not wait until the last bundle to write in a single shard. if self._is_final_bundle: elements = self.global_state.get_state( None, _NativeWriteEvaluator.ELEMENTS_TAG) if self._has_already_produced_output: # Ignore empty bundles that arrive after the output is produced. assert elements == [] else: self._sink.pipeline_options = self._evaluation_context.pipeline_options with self._sink.writer() as writer: for v in elements: writer.Write(v.value) hold = WatermarkManager.WATERMARK_POS_INF else: hold = WatermarkManager.WATERMARK_NEG_INF self.global_state.set_timer( None, '', TimeDomain.WATERMARK, WatermarkManager.WATERMARK_POS_INF) return TransformResult( self._applied_ptransform, [], [], None, {None: hold})
def finish_bundle(self): self.runner.finish() bundles = self._tagged_receivers.values() result_counters = self._counter_factory.get_counters() return TransformResult( self._applied_ptransform, bundles, [], result_counters, None, self._tagged_receivers.undeclared_in_memory_tag_values)
def finish_bundle(self): self.runner.finish() bundles = list(self._tagged_receivers.values()) result_counters = self._counter_factory.get_counters() if self.user_state_context: self.user_state_context.commit() return TransformResult(self, bundles, [], result_counters, None)
def finish_bundle(self): data = self._read_from_pubsub(self.source.timestamp_attribute) if data: output_pcollection = list(self._outputs)[0] bundle = self._evaluation_context.create_bundle(output_pcollection) # TODO(ccy): Respect the PubSub source's id_label field. for timestamp, message in data: if self.source.with_attributes: element = message else: element = message.payload bundle.output( GlobalWindows.windowed_value(element, timestamp=timestamp)) bundles = [bundle] else: bundles = [] if self._applied_ptransform.inputs: input_pvalue = self._applied_ptransform.inputs[0] else: input_pvalue = pvalue.PBegin( self._applied_ptransform.transform.pipeline) unprocessed_bundle = self._evaluation_context.create_bundle( input_pvalue) # TODO(udim): Correct value for watermark hold. return TransformResult(self, bundles, [unprocessed_bundle], None, {None: Timestamp.of(time.time())})
def finish_bundle(self): bundles = [] if self.gabw_items: bundle = self._evaluation_context.create_bundle(self.output_pcollection) for item in self.gabw_items: bundle.add(item) bundles.append(bundle) return TransformResult(self, bundles, [], None, self.keyed_holds)
def finish_bundle(self): bundles = [] bundle = None for encoded_k, vs in iteritems(self.gbk_items): if not bundle: bundle = self._evaluation_context.create_bundle(self.output_pcollection) bundles.append(bundle) kwi = KeyedWorkItem(encoded_k, elements=vs) bundle.add(GlobalWindows.windowed_value(kwi)) return TransformResult(self, bundles, [], None, None)
def finish_bundle(self): unprocessed_bundles = [] hold = None if self.current_index < len(self.test_stream.events) - 1: unprocessed_bundle = self._evaluation_context.create_bundle( pvalue.PBegin(self._applied_ptransform.transform.pipeline)) unprocessed_bundle.add(GlobalWindows.windowed_value( self.current_index + 1, timestamp=self.watermark)) unprocessed_bundles.append(unprocessed_bundle) hold = self.watermark return TransformResult( self._applied_ptransform, self.bundles, unprocessed_bundles, None, {None: hold})
def finish_bundle(self): unprocessed_bundles = [] next_index = self.test_stream.next(self.current_index) if not self.test_stream.end(next_index): unprocessed_bundle = self._evaluation_context.create_bundle( pvalue.PBegin(self._applied_ptransform.transform.pipeline)) unprocessed_bundle.add(GlobalWindows.windowed_value( next_index, timestamp=self.watermark)) unprocessed_bundles.append(unprocessed_bundle) # Returning the watermark in the dict here is used as a watermark hold. return TransformResult( self, self.bundles, unprocessed_bundles, None, {None: self.watermark})
def finish_bundle(self): unprocessed_bundles = [] # Continue to send its own state to itself via an unprocessed bundle. This # acts as a heartbeat, where each element will read the next event from the # event stream. if not self.is_done: unprocessed_bundle = self._evaluation_context.create_bundle( pvalue.PBegin(self._applied_ptransform.transform.pipeline)) unprocessed_bundle.add( GlobalWindows.windowed_value(b'', timestamp=self.watermark)) unprocessed_bundles.append(unprocessed_bundle) # Returning the watermark in the dict here is used as a watermark hold. return TransformResult(self, self.bundles, unprocessed_bundles, None, {None: self.watermark})
def finish_bundle(self): if self._is_final_bundle(): if self.global_state.get_state( None, _GroupByKeyOnlyEvaluator.COMPLETION_TAG): # Ignore empty bundles after emitting output. (This may happen because # empty bundles do not affect input watermarks.) bundles = [] else: gbk_result = [] # TODO(ccy): perhaps we can clean this up to not use this # internal attribute of the DirectStepContext. for encoded_k in self.step_context.keyed_existing_state: # Ignore global state. if encoded_k is None: continue k = self.key_coder.decode(encoded_k) state = self.step_context.get_keyed_state(encoded_k) vs = state.get_state(None, _GroupByKeyOnlyEvaluator.ELEMENTS_TAG) gbk_result.append(GlobalWindows.windowed_value((k, vs))) def len_element_fn(element): _, v = element.value return len(v) bundles = self._split_list_into_bundles( self.output_pcollection, gbk_result, _GroupByKeyOnlyEvaluator.MAX_ELEMENT_PER_BUNDLE, len_element_fn) self.global_state.add_state( None, _GroupByKeyOnlyEvaluator.COMPLETION_TAG, True) hold = WatermarkManager.WATERMARK_POS_INF else: bundles = [] hold = WatermarkManager.WATERMARK_NEG_INF self.global_state.set_timer(None, '', TimeDomain.WATERMARK, WatermarkManager.WATERMARK_POS_INF) return TransformResult(self._applied_ptransform, bundles, [], None, {None: hold})
def finish_bundle(self): assert len(self._outputs) == 1 output_pcollection = list(self._outputs)[0] def _read_values_to_bundles(reader): read_result = [GlobalWindows.windowed_value(e) for e in reader] return self._split_list_into_bundles( output_pcollection, read_result, _BoundedReadEvaluator.MAX_ELEMENT_PER_BUNDLE, lambda _: 1) if isinstance(self._source, io.iobase.BoundedSource): # Getting a RangeTracker for the default range of the source and reading # the full source using that. range_tracker = self._source.get_range_tracker(None, None) reader = self._source.read(range_tracker) bundles = _read_values_to_bundles(reader) else: with self._source.reader() as reader: bundles = _read_values_to_bundles(reader) return TransformResult(self, bundles, [], None, None)
def finish_bundle(self): data = self._read_from_pubsub() if data: output_pcollection = list(self._outputs)[0] bundle = self._evaluation_context.create_bundle(output_pcollection) # TODO(ccy): we currently do not use the PubSub message timestamp or # respect the PubSub source's id_label field. now = Timestamp.of(time.time()) for message_data in data: bundle.output(GlobalWindows.windowed_value(message_data, timestamp=now)) bundles = [bundle] else: bundles = [] if self._applied_ptransform.inputs: input_pvalue = self._applied_ptransform.inputs[0] else: input_pvalue = pvalue.PBegin(self._applied_ptransform.transform.pipeline) unprocessed_bundle = self._evaluation_context.create_bundle( input_pvalue) return TransformResult( self._applied_ptransform, bundles, [unprocessed_bundle], None, {None: Timestamp.of(time.time())})
def finish_bundle(self): # The watermark hold we set here is the way we allow the TestStream events # to control the output watermark. return TransformResult(self, self.bundles, [], None, {None: self._watermark})
def finish_bundle(self): bundles = [self.bundle] return TransformResult( self._applied_ptransform, bundles, [], None, None)
def finish_bundle(self): self.runner.finish() bundles = self._tagged_receivers.values() result_counters = self._counter_factory.get_counters() return TransformResult(self, bundles, [], result_counters, None)
def finish_bundle(self): bundles = [self.bundle] return TransformResult(self, bundles, [], None, None)
def finish_bundle(self): return TransformResult(self, [self.bundle], [], None, {})
def finish_bundle(self): assert len(self._outputs) == 1 output_pcollection = list(self._outputs)[0] bundle = self._evaluation_context.create_bundle(output_pcollection) bundle.output(GlobalWindows.windowed_value(b'')) return TransformResult(self, [bundle], [], None, None)