Ejemplo n.º 1
0
 def test_update_multiple(self):
   opcounts = OperationCounters(CounterFactory(), 'some-name',
                                coders.PickleCoder(), 0)
   self.verify_counters(opcounts, 0)
   opcounts.update_from(GlobalWindows.windowed_value('abcde'))
   opcounts.update_from(GlobalWindows.windowed_value('defghij'))
   opcounts.update_collect()
   self.verify_counters(opcounts, 2)
   opcounts.update_from(GlobalWindows.windowed_value('klmnop'))
   opcounts.update_collect()
   self.verify_counters(opcounts, 3)
 def test_update_multiple(self):
     opcounts = OperationCounters(CounterFactory(), 'some-name',
                                  coders.PickleCoder(), 0)
     self.verify_counters(opcounts, 0)
     opcounts.update_from(GlobalWindows.WindowedValue('abcde'))
     opcounts.update_from(GlobalWindows.WindowedValue('defghij'))
     opcounts.update_collect()
     self.verify_counters(opcounts, 2)
     opcounts.update_from(GlobalWindows.WindowedValue('klmnop'))
     opcounts.update_collect()
     self.verify_counters(opcounts, 3)
Ejemplo n.º 3
0
 def test_update_int(self):
   opcounts = OperationCounters(CounterFactory(), 'some-name',
                                coders.PickleCoder(), 0)
   self.verify_counters(opcounts, 0)
   opcounts.update_from(GlobalWindows.windowed_value(1))
   opcounts.update_collect()
   self.verify_counters(opcounts, 1)
 def test_update_int(self):
     opcounts = OperationCounters(CounterFactory(), 'some-name',
                                  coders.PickleCoder(), 0)
     self.verify_counters(opcounts, 0)
     opcounts.update_from(GlobalWindows.WindowedValue(1))
     opcounts.update_collect()
     self.verify_counters(opcounts, 1)
Ejemplo n.º 5
0
 def run_Create(self, transform_node):
     transform = transform_node.transform
     create_result = [
         GlobalWindows.WindowedValue(v) for v in transform.value
     ]
     self.debug_counters['element_counts'][
         transform_node.full_label] += len(create_result)
     self._cache.cache_output(transform_node, create_result)
 def test_update_old_object(self):
     opcounts = OperationCounters(CounterFactory(), 'some-name',
                                  coders.PickleCoder(), 0)
     self.verify_counters(opcounts, 0)
     obj = OldClassThatDoesNotImplementLen()
     opcounts.update_from(GlobalWindows.WindowedValue(obj))
     opcounts.update_collect()
     self.verify_counters(opcounts, 1)
Ejemplo n.º 7
0
 def test_update_old_object(self):
   opcounts = OperationCounters(CounterFactory(), 'some-name',
                                coders.PickleCoder(), 0)
   self.verify_counters(opcounts, 0)
   obj = OldClassThatDoesNotImplementLen()
   opcounts.update_from(GlobalWindows.windowed_value(obj))
   opcounts.update_collect()
   self.verify_counters(opcounts, 1)
Ejemplo n.º 8
0
 def run_Read(self, transform_node):
   # TODO(chamikara) Implement a more generic way for passing PipelineOptions
   # to sources and sinks when using DirectRunner.
   source = transform_node.transform.source
   source.pipeline_options = transform_node.inputs[0].pipeline.options
   with source.reader() as reader:
     read_result = [GlobalWindows.windowed_value(e) for e in reader]
     self.debug_counters['element_counts'][
         transform_node.full_label] += len(read_result)
     self._cache.cache_output(transform_node, read_result)
Ejemplo n.º 9
0
 def run_Read(self, transform_node):
     # TODO(chamikara) Implement a more generic way for passing PipelineOptions
     # to sources and sinks when using DirectRunner.
     source = transform_node.transform.source
     source.pipeline_options = transform_node.inputs[0].pipeline.options
     with source.reader() as reader:
         read_result = [GlobalWindows.WindowedValue(e) for e in reader]
         self.debug_counters['element_counts'][
             transform_node.full_label] += len(read_result)
         self._cache.cache_output(transform_node, read_result)
Ejemplo n.º 10
0
 def start(self):
     # We cache reader progress to make sure that the progress reporting
     # thread does not get blocked due to any reader related operations.
     self._current_progress = None
     super(ReadOperation, self).start()
     with self.spec.source.reader() as reader:
         self._reader = reader
         for value in reader:
             self._current_progress = reader.get_progress()
             if reader.returns_windowed_values:
                 windowed_value = value
             else:
                 windowed_value = GlobalWindows.windowed_value(value)
             self.output(windowed_value)
Ejemplo n.º 11
0
 def start(self):
   # We cache reader progress to make sure that the progress reporting
   # thread does not get blocked due to any reader related operations.
   self._current_progress = None
   super(ReadOperation, self).start()
   with self.spec.source.reader() as reader:
     self._reader = reader
     for value in reader:
       self._current_progress = reader.get_progress()
       if reader.returns_windowed_values:
         windowed_value = value
       else:
         windowed_value = GlobalWindows.windowed_value(value)
       self.output(windowed_value)
Ejemplo n.º 12
0
 def start(self):
   super(UngroupedShuffleReadOperation, self).start()
   write_coder = None
   if self.shuffle_source is None:
     coders = (BytesCoder(), self.spec.coder)
     write_coder = WindowedValueCoder(TupleCoder(coders))
     self.shuffle_source = shuffle.UngroupedShuffleSource(
         self.spec.shuffle_reader_config, coder=coders,
         start_position=self.spec.start_shuffle_position,
         end_position=self.spec.end_shuffle_position)
   with self.shuffle_source.reader() as reader:
     for value in reader:
       self._reader = reader
       windowed_value = GlobalWindows.windowed_value(value)
       self.output(windowed_value, coder=write_coder)
Ejemplo n.º 13
0
 def start(self):
   super(UngroupedShuffleReadOperation, self).start()
   write_coder = None
   if self.shuffle_source is None:
     coders = (BytesCoder(), self.spec.coder)
     write_coder = WindowedValueCoder(TupleCoder(coders))
     self.shuffle_source = shuffle.UngroupedShuffleSource(
         self.spec.shuffle_reader_config, coder=coders,
         start_position=self.spec.start_shuffle_position,
         end_position=self.spec.end_shuffle_position)
   with self.shuffle_source.reader() as reader:
     for value in reader:
       self._reader = reader
       windowed_value = GlobalWindows.WindowedValue(value)
       self.output(windowed_value, coder=write_coder)
Ejemplo n.º 14
0
 def read_values(reader):
   read_result = [GlobalWindows.windowed_value(e) for e in reader]
   self.debug_counters['element_counts'][
       transform_node.full_label] += len(read_result)
   self._cache.cache_output(transform_node, read_result)
Ejemplo n.º 15
0
 def run_Create(self, transform_node):
   transform = transform_node.transform
   create_result = [GlobalWindows.windowed_value(v) for v in transform.value]
   self.debug_counters['element_counts'][
       transform_node.full_label] += len(create_result)
   self._cache.cache_output(transform_node, create_result)
Ejemplo n.º 16
0
 def read_values(reader):
     read_result = [GlobalWindows.windowed_value(e) for e in reader]
     self.debug_counters['element_counts'][
         transform_node.full_label] += len(read_result)
     self._cache.cache_output(transform_node, read_result)
Ejemplo n.º 17
0
 def __iter__(self):
   for bundle in self.source.context.work_item.message_bundles:
     for message in bundle.messages:
       yield GlobalWindows.WindowedValue(
           self.source.coder.decode(message.data),
           timestamp=windmill_to_harness_timestamp(message.timestamp))