def finish_bundle(self): self.runner.finish() bundles = self._tagged_receivers.values() result_counters = self._counter_factory.get_counters() return TransformResult( self._applied_ptransform, bundles, None, result_counters, None, self._tagged_receivers.undeclared_in_memory_tag_values)
def finish_bundle(self): if self._is_final_bundle: if self.state.completed: # Ignore empty bundles after emitting output. (This may happen because # empty bundles do not affect input watermarks.) bundles = [] else: gbk_result = (map(GlobalWindows.windowed_value, ((self.key_coder.decode(k), v) for k, v in self.state.output.iteritems()))) def len_element_fn(element): _, v = element.value return len(v) bundles = self._split_list_into_bundles( self.output_pcollection, gbk_result, _GroupByKeyOnlyEvaluator.MAX_ELEMENT_PER_BUNDLE, len_element_fn) self.state.completed = True state = self.state hold = WatermarkManager.WATERMARK_POS_INF else: bundles = [] state = self.state hold = WatermarkManager.WATERMARK_NEG_INF return TransformResult(self._applied_ptransform, bundles, state, None, None, hold)
def finish_bundle(self): bundles = [] transform = self._applied_ptransform.transform assert transform.value is not None create_result = [ GlobalWindows.windowed_value(v) for v in transform.value ] for result in create_result: self.bundle.output(result) bundles.append(self.bundle) return TransformResult(self._applied_ptransform, bundles, None, None, None, None)
def finish_bundle(self): if self._is_final_bundle: bundle = self._evaluation_context.create_bundle( self.output_pcollection) view_result = self.state for result in view_result: bundle.output(result) bundles = [bundle] state = None hold = WatermarkManager.WATERMARK_POS_INF else: bundles = [] state = self.state hold = WatermarkManager.WATERMARK_NEG_INF return TransformResult(self._applied_ptransform, bundles, state, None, None, hold)
def finish_bundle(self): if self._is_final_bundle: if self.global_state.get_state( None, _GroupByKeyOnlyEvaluator.COMPLETION_TAG): # Ignore empty bundles after emitting output. (This may happen because # empty bundles do not affect input watermarks.) bundles = [] else: gbk_result = [] # TODO(ccy): perhaps we can clean this up to not use this # internal attribute of the DirectStepContext. for encoded_k in self.step_context.keyed_existing_state: # Ignore global state. if encoded_k is None: continue k = self.key_coder.decode(encoded_k) state = self.step_context.get_keyed_state(encoded_k) vs = state.get_state(None, _GroupByKeyOnlyEvaluator.ELEMENTS_TAG) gbk_result.append(GlobalWindows.windowed_value((k, vs))) def len_element_fn(element): _, v = element.value return len(v) bundles = self._split_list_into_bundles( self.output_pcollection, gbk_result, _GroupByKeyOnlyEvaluator.MAX_ELEMENT_PER_BUNDLE, len_element_fn) self.global_state.add_state( None, _GroupByKeyOnlyEvaluator.COMPLETION_TAG, True) hold = WatermarkManager.WATERMARK_POS_INF else: bundles = [] hold = WatermarkManager.WATERMARK_NEG_INF return TransformResult(self._applied_ptransform, bundles, None, None, hold)
def finish_bundle(self): assert len(self._outputs) == 1 output_pcollection = list(self._outputs)[0] def _read_values_to_bundles(reader): read_result = [GlobalWindows.windowed_value(e) for e in reader] return self._split_list_into_bundles( output_pcollection, read_result, _BoundedReadEvaluator.MAX_ELEMENT_PER_BUNDLE, lambda _: 1) if isinstance(self._source, io.iobase.BoundedSource): # Getting a RangeTracker for the default range of the source and reading # the full source using that. range_tracker = self._source.get_range_tracker(None, None) reader = self._source.read(range_tracker) bundles = _read_values_to_bundles(reader) else: with self._source.reader() as reader: bundles = _read_values_to_bundles(reader) return TransformResult(self._applied_ptransform, bundles, None, None, None)
def finish_bundle(self): # finish_bundle will append incoming bundles in memory until all the bundles # carrying data is processed. This is done to produce only a single output # shard (some tests depends on this behavior). It is possible to have # incoming empty bundles after the output is produced, these bundles will be # ignored and would not generate additional output files. # TODO(altay): Do not wait until the last bundle to write in a single shard. if self._is_final_bundle: elements = self.global_state.get_state( None, _NativeWriteEvaluator.ELEMENTS_TAG) if self._has_already_produced_output: # Ignore empty bundles that arrive after the output is produced. assert elements == [] else: self._sink.pipeline_options = self._evaluation_context.pipeline_options with self._sink.writer() as writer: for v in elements: writer.Write(v.value) hold = WatermarkManager.WATERMARK_POS_INF else: hold = WatermarkManager.WATERMARK_NEG_INF return TransformResult(self._applied_ptransform, [], None, None, hold)
def finish_bundle(self): bundles = [self.bundle] return TransformResult(self._applied_ptransform, bundles, None, None, None)