Example #1
0
def output_file_for(window, shard, pane):
  """
  Returns:
    an OutputFile object constructed with pane, window and shard.
  """
  filename = '%s/LOG-%s-%s-%03d-%s' % (
      output_path, window.max_timestamp(), shard, pane.index,
      pane.timing) if output_path else None
  return OutputFile(
      window.max_timestamp(), shard, pane.index, pane.timing, filename)
Example #2
0
 def finish_bundle(self):
   for window, batch in self._batches.items():
     if batch:
       with self._batch_size_estimator.record_time(self._batch_size):
         yield windowed_value.WindowedValue(
             batch, window.max_timestamp(), (window,))
   self._batches = None
   self._batch_size = self._batch_size_estimator.next_batch_size()
Example #3
0
File: util.py Project: xmarker/beam
 def finish_bundle(self):
   for window, batch in self._batches.items():
     if batch:
       with self._batch_size_estimator.record_time(self._batch_size):
         yield windowed_value.WindowedValue(
             batch, window.max_timestamp(), (window,))
   self._batches = None
   self._batch_size = self._batch_size_estimator.next_batch_size()
Example #4
0
 def process(self, element, window=DoFn.WindowParam):
   self._batches[window].append(element)
   if len(self._batches[window]) >= self._batch_size:
     with self._batch_size_estimator.record_time(self._batch_size):
       yield windowed_value.WindowedValue(
           self._batches[window], window.max_timestamp(), (window,))
     del self._batches[window]
     self._batch_size = self._batch_size_estimator.next_batch_size()
   elif len(self._batches) > self._MAX_LIVE_WINDOWS:
     window, _ = sorted(
         self._batches.items(),
         key=lambda window_batch: len(window_batch[1]),
         reverse=True)[0]
     with self._batch_size_estimator.record_time(self._batch_size):
       yield windowed_value.WindowedValue(
           self._batches[window], window.max_timestamp(), (window,))
     del self._batches[window]
     self._batch_size = self._batch_size_estimator.next_batch_size()
Example #5
0
File: util.py Project: xmarker/beam
 def process(self, element, window=DoFn.WindowParam):
   self._batches[window].append(element)
   if len(self._batches[window]) >= self._batch_size:
     with self._batch_size_estimator.record_time(self._batch_size):
       yield windowed_value.WindowedValue(
           self._batches[window], window.max_timestamp(), (window,))
     del self._batches[window]
     self._batch_size = self._batch_size_estimator.next_batch_size()
   elif len(self._batches) > self._MAX_LIVE_WINDOWS:
     window, _ = sorted(
         self._batches.items(),
         key=lambda window_batch: len(window_batch[1]),
         reverse=True)[0]
     with self._batch_size_estimator.record_time(self._batch_size):
       yield windowed_value.WindowedValue(
           self._batches[window], window.max_timestamp(), (window,))
     del self._batches[window]
     self._batch_size = self._batch_size_estimator.next_batch_size()
Example #6
0
def index_path_for(window):
  """
  Returns:
    path to the index file containing all shard names or None if no output_path
      is set
  """
  if output_path:
    return '%s/INDEX-%s' % (output_path, window.max_timestamp())
  else:
    return None