def test_update_multiple(self): opcounts = OperationCounters(CounterFactory(), 'some-name', coders.PickleCoder(), 0) self.verify_counters(opcounts, 0) opcounts.update_from(GlobalWindows.WindowedValue('abcde')) opcounts.update_from(GlobalWindows.WindowedValue('defghij')) opcounts.update_collect() self.verify_counters(opcounts, 2) opcounts.update_from(GlobalWindows.WindowedValue('klmnop')) opcounts.update_collect() self.verify_counters(opcounts, 3)
def test_update_int(self): opcounts = OperationCounters(CounterFactory(), 'some-name', coders.PickleCoder(), 0) self.verify_counters(opcounts, 0) opcounts.update_from(GlobalWindows.WindowedValue(1)) opcounts.update_collect() self.verify_counters(opcounts, 1)
def run_Create(self, transform_node): transform = transform_node.transform create_result = [ GlobalWindows.WindowedValue(v) for v in transform.value ] self.debug_counters['element_counts'][ transform_node.full_label] += len(create_result) self._cache.cache_output(transform_node, create_result)
def test_update_old_object(self): opcounts = OperationCounters(CounterFactory(), 'some-name', coders.PickleCoder(), 0) self.verify_counters(opcounts, 0) obj = OldClassThatDoesNotImplementLen() opcounts.update_from(GlobalWindows.WindowedValue(obj)) opcounts.update_collect() self.verify_counters(opcounts, 1)
def run_Read(self, transform_node): # TODO(chamikara) Implement a more generic way for passing PipelineOptions # to sources and sinks when using DirectRunner. source = transform_node.transform.source source.pipeline_options = transform_node.inputs[0].pipeline.options with source.reader() as reader: read_result = [GlobalWindows.WindowedValue(e) for e in reader] self.debug_counters['element_counts'][ transform_node.full_label] += len(read_result) self._cache.cache_output(transform_node, read_result)
def start(self): # We cache reader progress to make sure that the progress reporting # thread does not get blocked due to any reader related operations. self._current_progress = None super(ReadOperation, self).start() with self.spec.source.reader() as reader: self._reader = reader for value in reader: self._current_progress = reader.get_progress() if reader.returns_windowed_values: windowed_value = value else: windowed_value = GlobalWindows.WindowedValue(value) self.output(windowed_value)
def start(self): super(UngroupedShuffleReadOperation, self).start() write_coder = None if self.shuffle_source is None: coders = (BytesCoder(), self.spec.coder) write_coder = WindowedValueCoder(TupleCoder(coders)) self.shuffle_source = shuffle.UngroupedShuffleSource( self.spec.shuffle_reader_config, coder=coders, start_position=self.spec.start_shuffle_position, end_position=self.spec.end_shuffle_position) with self.shuffle_source.reader() as reader: for value in reader: self._reader = reader windowed_value = GlobalWindows.WindowedValue(value) self.output(windowed_value, coder=write_coder)
def __iter__(self): for bundle in self.source.context.work_item.message_bundles: for message in bundle.messages: yield GlobalWindows.WindowedValue( self.source.coder.decode(message.data), timestamp=windmill_to_harness_timestamp(message.timestamp))