Esempio n. 1
0
 def start(self):
     super(ShuffleWriteOperation, self).start()
     self.is_ungrouped = self.spec.shuffle_kind == 'ungrouped'
     coder = self.spec.output_coders[0]
     if self.is_ungrouped:
         coders = (BytesCoder(), coder)
     else:
         coders = (coder.key_coder(), coder.value_coder())
     self._write_coder = WindowedValueCoder(TupleCoder(coders))
     if self.shuffle_sink is None:
         self.shuffle_sink = shuffle.ShuffleSink(
             self.spec.shuffle_writer_config, coder=coders)
     self.writer = self.shuffle_sink.writer()
     self.writer.__enter__()
Esempio n. 2
0
 def start(self):
   super(UngroupedShuffleReadOperation, self).start()
   write_coder = None
   if self.shuffle_source is None:
     coders = (BytesCoder(), self.spec.coder)
     write_coder = WindowedValueCoder(TupleCoder(coders))
     self.shuffle_source = shuffle.UngroupedShuffleSource(
         self.spec.shuffle_reader_config, coder=coders,
         start_position=self.spec.start_shuffle_position,
         end_position=self.spec.end_shuffle_position)
   with self.shuffle_source.reader() as reader:
     for value in reader:
       self._reader = reader
       windowed_value = GlobalWindows.WindowedValue(value)
       self.output(windowed_value, coder=write_coder)