コード例 #1
0
 def finish_bundle(self):
     self.runner.finish()
     bundles = self._tagged_receivers.values()
     result_counters = self._counter_factory.get_counters()
     return TransformResult(
         self._applied_ptransform, bundles, None, result_counters, None,
         self._tagged_receivers.undeclared_in_memory_tag_values)
コード例 #2
0
    def finish_bundle(self):
        if self._is_final_bundle:
            if self.state.completed:
                # Ignore empty bundles after emitting output. (This may happen because
                # empty bundles do not affect input watermarks.)
                bundles = []
            else:
                gbk_result = (map(GlobalWindows.windowed_value,
                                  ((self.key_coder.decode(k), v)
                                   for k, v in self.state.output.iteritems())))

                def len_element_fn(element):
                    _, v = element.value
                    return len(v)

                bundles = self._split_list_into_bundles(
                    self.output_pcollection, gbk_result,
                    _GroupByKeyOnlyEvaluator.MAX_ELEMENT_PER_BUNDLE,
                    len_element_fn)

            self.state.completed = True
            state = self.state
            hold = WatermarkManager.WATERMARK_POS_INF
        else:
            bundles = []
            state = self.state
            hold = WatermarkManager.WATERMARK_NEG_INF

        return TransformResult(self._applied_ptransform, bundles, state, None,
                               None, hold)
コード例 #3
0
    def finish_bundle(self):
        bundles = []
        transform = self._applied_ptransform.transform

        assert transform.value is not None
        create_result = [
            GlobalWindows.windowed_value(v) for v in transform.value
        ]
        for result in create_result:
            self.bundle.output(result)
        bundles.append(self.bundle)

        return TransformResult(self._applied_ptransform, bundles, None, None,
                               None, None)
コード例 #4
0
ファイル: transform_evaluator.py プロジェクト: wikier/beam
    def finish_bundle(self):
        if self._is_final_bundle:
            bundle = self._evaluation_context.create_bundle(
                self.output_pcollection)

            view_result = self.state
            for result in view_result:
                bundle.output(result)

            bundles = [bundle]
            state = None
            hold = WatermarkManager.WATERMARK_POS_INF
        else:
            bundles = []
            state = self.state
            hold = WatermarkManager.WATERMARK_NEG_INF

        return TransformResult(self._applied_ptransform, bundles, state, None,
                               None, hold)
コード例 #5
0
    def finish_bundle(self):
        if self._is_final_bundle:
            if self.global_state.get_state(
                    None, _GroupByKeyOnlyEvaluator.COMPLETION_TAG):
                # Ignore empty bundles after emitting output. (This may happen because
                # empty bundles do not affect input watermarks.)
                bundles = []
            else:
                gbk_result = []
                # TODO(ccy): perhaps we can clean this up to not use this
                # internal attribute of the DirectStepContext.
                for encoded_k in self.step_context.keyed_existing_state:
                    # Ignore global state.
                    if encoded_k is None:
                        continue
                    k = self.key_coder.decode(encoded_k)
                    state = self.step_context.get_keyed_state(encoded_k)
                    vs = state.get_state(None,
                                         _GroupByKeyOnlyEvaluator.ELEMENTS_TAG)
                    gbk_result.append(GlobalWindows.windowed_value((k, vs)))

                def len_element_fn(element):
                    _, v = element.value
                    return len(v)

                bundles = self._split_list_into_bundles(
                    self.output_pcollection, gbk_result,
                    _GroupByKeyOnlyEvaluator.MAX_ELEMENT_PER_BUNDLE,
                    len_element_fn)

            self.global_state.add_state(
                None, _GroupByKeyOnlyEvaluator.COMPLETION_TAG, True)
            hold = WatermarkManager.WATERMARK_POS_INF
        else:
            bundles = []
            hold = WatermarkManager.WATERMARK_NEG_INF

        return TransformResult(self._applied_ptransform, bundles, None, None,
                               hold)
コード例 #6
0
    def finish_bundle(self):
        assert len(self._outputs) == 1
        output_pcollection = list(self._outputs)[0]

        def _read_values_to_bundles(reader):
            read_result = [GlobalWindows.windowed_value(e) for e in reader]
            return self._split_list_into_bundles(
                output_pcollection, read_result,
                _BoundedReadEvaluator.MAX_ELEMENT_PER_BUNDLE, lambda _: 1)

        if isinstance(self._source, io.iobase.BoundedSource):
            # Getting a RangeTracker for the default range of the source and reading
            # the full source using that.
            range_tracker = self._source.get_range_tracker(None, None)
            reader = self._source.read(range_tracker)
            bundles = _read_values_to_bundles(reader)
        else:
            with self._source.reader() as reader:
                bundles = _read_values_to_bundles(reader)

        return TransformResult(self._applied_ptransform, bundles, None, None,
                               None)
コード例 #7
0
    def finish_bundle(self):
        # finish_bundle will append incoming bundles in memory until all the bundles
        # carrying data is processed. This is done to produce only a single output
        # shard (some tests depends on this behavior). It is possible to have
        # incoming empty bundles after the output is produced, these bundles will be
        # ignored and would not generate additional output files.
        # TODO(altay): Do not wait until the last bundle to write in a single shard.
        if self._is_final_bundle:
            elements = self.global_state.get_state(
                None, _NativeWriteEvaluator.ELEMENTS_TAG)
            if self._has_already_produced_output:
                # Ignore empty bundles that arrive after the output is produced.
                assert elements == []
            else:
                self._sink.pipeline_options = self._evaluation_context.pipeline_options
                with self._sink.writer() as writer:
                    for v in elements:
                        writer.Write(v.value)
            hold = WatermarkManager.WATERMARK_POS_INF
        else:
            hold = WatermarkManager.WATERMARK_NEG_INF

        return TransformResult(self._applied_ptransform, [], None, None, hold)
コード例 #8
0
 def finish_bundle(self):
     bundles = [self.bundle]
     return TransformResult(self._applied_ptransform, bundles, None, None,
                            None)