def test_windowed_value_coder(self):
     coder = coders.WindowedValueCoder(coders.VarIntCoder(),
                                       coders.GlobalWindowCoder())
     # Verify cloud object representation
     self.assertEqual(
         {
             '@type':
             'kind:windowed_value',
             'is_wrapper':
             True,
             'component_encodings': [
                 coders.VarIntCoder().as_cloud_object(),
                 coders.GlobalWindowCoder().as_cloud_object(),
             ],
         }, coder.as_cloud_object())
     # Test binary representation
     self.assertEqual(
         '\x01\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01',
         coder.encode(window.GlobalWindows.windowed_value(1)))
     # Test unnested
     self.check_coder(coders.WindowedValueCoder(coders.VarIntCoder()),
                      windowed_value.WindowedValue(3, -100, ()),
                      windowed_value.WindowedValue(-1, 100, (1, 2, 3)))
     # Test nested
     self.check_coder(
         coders.TupleCoder(
             (coders.WindowedValueCoder(coders.FloatCoder()),
              coders.WindowedValueCoder(coders.StrUtf8Coder()))),
         (windowed_value.WindowedValue(1.5, 0, ()),
          windowed_value.WindowedValue("abc", 10, ('window', ))))
Beispiel #2
0
  def test_windowedvalue_coder_paneinfo(self):
    coder = coders.WindowedValueCoder(coders.VarIntCoder(),
                                      coders.GlobalWindowCoder())
    test_paneinfo_values = [
        windowed_value.PANE_INFO_UNKNOWN,
        windowed_value.PaneInfo(
            True, True, windowed_value.PaneInfoTiming.EARLY, 0, -1),
        windowed_value.PaneInfo(
            True, False, windowed_value.PaneInfoTiming.ON_TIME, 0, 0),
        windowed_value.PaneInfo(
            True, False, windowed_value.PaneInfoTiming.ON_TIME, 10, 0),
        windowed_value.PaneInfo(
            False, True, windowed_value.PaneInfoTiming.ON_TIME, 0, 23),
        windowed_value.PaneInfo(
            False, True, windowed_value.PaneInfoTiming.ON_TIME, 12, 23),
        windowed_value.PaneInfo(
            False, False, windowed_value.PaneInfoTiming.LATE, 0, 123),]

    test_values = [windowed_value.WindowedValue(123, 234, (GlobalWindow(),), p)
                   for p in test_paneinfo_values]

    # Test unnested.
    self.check_coder(coder, windowed_value.WindowedValue(
        123, 234, (GlobalWindow(),), windowed_value.PANE_INFO_UNKNOWN))
    for value in test_values:
      self.check_coder(coder, value)

    # Test nested.
    for value1 in test_values:
      for value2 in test_values:
        self.check_coder(coders.TupleCoder((coder, coder)), (value1, value2))
 def test_timestamps(self):
   wv = windowed_value.WindowedValue(None, 3, ())
   self.assertEqual(wv.timestamp, Timestamp.of(3))
   self.assertTrue(wv.timestamp is wv.timestamp)
   self.assertEqual(
       windowed_value.WindowedValue(None, -2.5, ()).timestamp,
       Timestamp.of(-2.5))
Beispiel #4
0
    def test_homogeneous_windowed_batch_as_windowed_values(self):
        pane_info = windowed_value.PaneInfo(
            True, True, windowed_value.PaneInfoTiming.ON_TIME, 0, 0)
        wb = windowed_value.HomogeneousWindowedBatch.of(['foo', 'bar'], 3, (),
                                                        pane_info)

        self.assertEqual(list(wb.as_windowed_values(iter)), [
            windowed_value.WindowedValue('foo', 3, (), pane_info),
            windowed_value.WindowedValue('bar', 3, (), pane_info)
        ])
Beispiel #5
0
  def test_param_windowed_value_coder(self):
    from apache_beam.transforms.window import IntervalWindow
    from apache_beam.utils.windowed_value import PaneInfo
    wv = windowed_value.create(
        b'',
        # Milliseconds to microseconds
        1000 * 1000,
        (IntervalWindow(11, 21),),
        PaneInfo(True, False, 1, 2, 3))
    windowed_value_coder = coders.WindowedValueCoder(
        coders.BytesCoder(), coders.IntervalWindowCoder())
    payload = windowed_value_coder.encode(wv)
    coder = coders.ParamWindowedValueCoder(
        payload, [coders.VarIntCoder(), coders.IntervalWindowCoder()])

    # Test binary representation
    self.assertEqual(b'\x01',
                     coder.encode(window.GlobalWindows.windowed_value(1)))

    # Test unnested
    self.check_coder(
        coders.ParamWindowedValueCoder(
            payload, [coders.VarIntCoder(), coders.IntervalWindowCoder()]),
        windowed_value.WindowedValue(
            3,
            1,
            (window.IntervalWindow(11, 21),),
            PaneInfo(True, False, 1, 2, 3)),
        windowed_value.WindowedValue(
            1,
            1,
            (window.IntervalWindow(11, 21),),
            PaneInfo(True, False, 1, 2, 3)))

    # Test nested
    self.check_coder(
        coders.TupleCoder((
            coders.ParamWindowedValueCoder(
                payload, [
                    coders.FloatCoder(),
                    coders.IntervalWindowCoder()]),
            coders.ParamWindowedValueCoder(
                payload, [
                    coders.StrUtf8Coder(),
                    coders.IntervalWindowCoder()]))),
        (windowed_value.WindowedValue(
            1.5,
            1,
            (window.IntervalWindow(11, 21),),
            PaneInfo(True, False, 1, 2, 3)),
         windowed_value.WindowedValue(
             "abc",
             1,
             (window.IntervalWindow(11, 21),),
             PaneInfo(True, False, 1, 2, 3))))
Beispiel #6
0
    def test_homogeneous_from_windowed_values(self):
        pane_info = windowed_value.PaneInfo(
            True, True, windowed_value.PaneInfoTiming.ON_TIME, 0, 0)

        windowed_values = [
            windowed_value.WindowedValue('foofoo', 3, (), pane_info),
            windowed_value.WindowedValue('foobar', 6, (), pane_info),
            windowed_value.WindowedValue('foobaz', 9, (), pane_info),
            windowed_value.WindowedValue('barfoo', 3, (), pane_info),
            windowed_value.WindowedValue('barbar', 6, (), pane_info),
            windowed_value.WindowedValue('barbaz', 9, (), pane_info),
            windowed_value.WindowedValue('bazfoo', 3, (), pane_info),
            windowed_value.WindowedValue('bazbar', 6, (), pane_info),
            windowed_value.WindowedValue('bazbaz', 9, (), pane_info),
        ]

        self.assertEqual(
            list(
                windowed_value.WindowedBatch.from_windowed_values(
                    windowed_values, produce_fn=list)), [
                        windowed_value.HomogeneousWindowedBatch.of(
                            ['foofoo', 'barfoo', 'bazfoo'], 3, (), pane_info),
                        windowed_value.HomogeneousWindowedBatch.of(
                            ['foobar', 'barbar', 'bazbar'], 6, (), pane_info),
                        windowed_value.HomogeneousWindowedBatch.of(
                            ['foobaz', 'barbaz', 'bazbaz'], 9, (), pane_info)
                    ])
Beispiel #7
0
    def test_equality(self):
        self.assertEqual(windowed_value.WindowedValue(1, 3, ()),
                         windowed_value.WindowedValue(1, 3, ()))
        self.assertNotEqual(windowed_value.WindowedValue(1, 3, ()),
                            windowed_value.WindowedValue(100, 3, ()))
        self.assertNotEqual(windowed_value.WindowedValue(1, 3, ()),
                            windowed_value.WindowedValue(1, 300, ()))
        self.assertNotEqual(windowed_value.WindowedValue(1, 3, ()),
                            windowed_value.WindowedValue(1, 300, ((), )))

        self.assertNotEqual(windowed_value.WindowedValue(1, 3, ()), object())
def random_windowed_value(num_windows):
  return windowed_value.WindowedValue(
      value=small_int(),
      timestamp=12345678,
      windows=tuple(
          window.IntervalWindow(i * 10, i * 10 + small_int())
          for i in range(num_windows)))
Beispiel #9
0
 def finish_bundle(self):
     for (window, timestamp), tagged_parts in self._parts.items():
         yield windowed_value.WindowedValue(
             {tag: pd.concat(parts)
              for tag, parts in tagged_parts.items()}, timestamp,
             (window, ))
     self.start_bundle()
Beispiel #10
0
 def reify_timestamps(element,
                      timestamp=DoFn.TimestampParam,
                      window=DoFn.WindowParam):
     key, value = element
     # Transport the window as part of the value and restore it later.
     return key, windowed_value.WindowedValue(
         value, timestamp, [window])
Beispiel #11
0
 def finish_bundle(self):
   for window, batch in self._batches.items():
     if batch:
       with self._batch_size_estimator.record_time(self._batch_size):
         yield windowed_value.WindowedValue(
             batch, window.max_timestamp(), (window,))
   self._batches = None
   self._batch_size = self._batch_size_estimator.next_batch_size()
Beispiel #12
0
 def restore_timestamps(element, window=DoFn.WindowParam):
   # Pass the current window since _IdentityWindowFn wouldn't know how
   # to generate it.
   key, values = element
   return [
       windowed_value.WindowedValue(
           (key, value.value), value.timestamp, [window])
       for value in values]
Beispiel #13
0
 def finish_bundle(self):
     for (window, timestamp), tagged_parts in self._parts.items():
         yield windowed_value.WindowedValue(  # yapf break
         {
             tag: pd.concat(parts) if parts else None
             for (tag, parts) in tagged_parts.items()
         },
         timestamp, (window, ))
     self.start_bundle()
Beispiel #14
0
 def process(self, element, window=DoFn.WindowParam):
   self._batches[window].append(element)
   if len(self._batches[window]) >= self._batch_size:
     with self._batch_size_estimator.record_time(self._batch_size):
       yield windowed_value.WindowedValue(
           self._batches[window], window.max_timestamp(), (window,))
     del self._batches[window]
     self._batch_size = self._batch_size_estimator.next_batch_size()
   elif len(self._batches) > self._MAX_LIVE_WINDOWS:
     window, _ = sorted(
         self._batches.items(),
         key=lambda window_batch: len(window_batch[1]),
         reverse=True)[0]
     with self._batch_size_estimator.record_time(self._batch_size):
       yield windowed_value.WindowedValue(
           self._batches[window], window.max_timestamp(), (window,))
     del self._batches[window]
     self._batch_size = self._batch_size_estimator.next_batch_size()
Beispiel #15
0
  def test_windowed_value_coder(self):
    coder = coders.WindowedValueCoder(
        coders.VarIntCoder(), coders.GlobalWindowCoder())
    # Verify cloud object representation
    self.assertEqual({
        '@type': 'kind:windowed_value',
        'is_wrapper': True,
        'component_encodings': [
            coders.VarIntCoder().as_cloud_object(),
            coders.GlobalWindowCoder().as_cloud_object(),
        ],
    },
                     coder.as_cloud_object())
    # Test binary representation
    self.assertEqual(
        b'\x7f\xdf;dZ\x1c\xac\t\x00\x00\x00\x01\x0f\x01',
        coder.encode(window.GlobalWindows.windowed_value(1)))

    # Test decoding large timestamp
    self.assertEqual(
        coder.decode(b'\x7f\xdf;dZ\x1c\xac\x08\x00\x00\x00\x01\x0f\x00'),
        windowed_value.create(0, MIN_TIMESTAMP.micros, (GlobalWindow(), )))

    # Test unnested
    self.check_coder(
        coders.WindowedValueCoder(coders.VarIntCoder()),
        windowed_value.WindowedValue(3, -100, ()),
        windowed_value.WindowedValue(-1, 100, (1, 2, 3)))

    # Test Global Window
    self.check_coder(
        coders.WindowedValueCoder(
            coders.VarIntCoder(), coders.GlobalWindowCoder()),
        window.GlobalWindows.windowed_value(1))

    # Test nested
    self.check_coder(
        coders.TupleCoder((
            coders.WindowedValueCoder(coders.FloatCoder()),
            coders.WindowedValueCoder(coders.StrUtf8Coder()))),
        (
            windowed_value.WindowedValue(1.5, 0, ()),
            windowed_value.WindowedValue("abc", 10, ('window', ))))
    def process(self,
                element,
                window=DoFn.WindowParam,
                timestamp=DoFn.TimestampParam):
        try:
            k, v = element
        except TypeError:
            raise TypeCheckError(
                'Input to GroupByKey must be a PCollection with '
                'elements compatible with KV[A, B]')

        yield (k, windowed_value.WindowedValue(v, timestamp, [window]))
Beispiel #17
0
  def _window(self, output, add_window=False):
    """Forces an output into the global window.

    While 'process' will output to the same window as its incomming element,
    'finish_bundle' has to specify BatchedInferencea window to output into.
    Since we are dealing with a bounded input, we can use 'GlobalWindow'.

    Args:
      output: The function output that may need to be added to a window.
      add_window: Adds output to the GlobalWindow.

    Returns:
      output or output encapsulated in 'WindowedValue'.
    """
    if add_window:
      return windowed_value.WindowedValue(output, -1, [window.GlobalWindow()])
    return output
Beispiel #18
0
    def test_nested_observables(self):
        class FakeObservableIterator(observable.ObservableMixin):
            def __iter__(self):
                return iter([1, 2, 3])

        # Coder for elements from the observable iterator.
        elem_coder = coders.VarIntCoder()
        iter_coder = coders.TupleSequenceCoder(elem_coder)

        # Test nested WindowedValue observable.
        coder = coders.WindowedValueCoder(iter_coder)
        observ = FakeObservableIterator()
        value = windowed_value.WindowedValue(observ, 0, ())
        self.assertEqual(
            coder.get_impl().get_estimated_size_and_observables(value)[1],
            [(observ, elem_coder.get_impl())])

        # Test nested tuple observable.
        coder = coders.TupleCoder((coders.StrUtf8Coder(), iter_coder))
        value = (u'123', observ)
        self.assertEqual(
            coder.get_impl().get_estimated_size_and_observables(value)[1],
            [(observ, elem_coder.get_impl())])
 def test_pickle(self):
   pane_info = windowed_value.PaneInfo(
       True, True, windowed_value.PaneInfoTiming.ON_TIME, 0, 0)
   wv = windowed_value.WindowedValue(1, 3, (), pane_info)
   self.assertTrue(pickle.loads(pickle.dumps(wv)) == wv)
 def test_hash(self):
   wv = windowed_value.WindowedValue(1, 3, ())
   wv_copy = copy.copy(wv)
   self.assertFalse(wv is wv_copy)
   self.assertEqual({wv: 100}.get(wv_copy), 100)
 def test_with_value(self):
   pane_info = windowed_value.PaneInfo(
       True, True, windowed_value.PaneInfoTiming.ON_TIME, 0, 0)
   wv = windowed_value.WindowedValue(1, 3, (), pane_info)
   self.assertEqual(
       wv.with_value(10), windowed_value.WindowedValue(10, 3, (), pane_info))
 def test_with_value(self):
     wv = windowed_value.WindowedValue(1, 3, ())
     self.assertEqual(wv.with_value(10),
                      windowed_value.WindowedValue(10, 3, ()))
 def test_pickle(self):
     wv = windowed_value.WindowedValue(1, 3, ())
     self.assertTrue(pickle.loads(pickle.dumps(wv)) == wv)
Beispiel #24
0
 def finish_bundle(self):
     for (window, timestamp), parts in self._parts.items():
         yield windowed_value.WindowedValue(_concat(parts), timestamp,
                                            (window, ))
     self.start_bundle()
def globally_windowed_value():
  return windowed_value.WindowedValue(
      value=small_int(), timestamp=12345678, windows=(window.GlobalWindow(), ))
 def finish_bundle(self):
     for k, vals in self.keys.items():
         yield windowed_value.WindowedValue((k, vals), MIN_TIMESTAMP,
                                            [GlobalWindow()])
Beispiel #27
0
 def set(self, ts):
   ts = timestamp.Timestamp.of(ts)
   self._receiver.receive(
       windowed_value.WindowedValue(
           (self._key, dict(timestamp=ts)), ts, (self._window,)))
Beispiel #28
0
 def process(self, element, window=DoFn.WindowParam):
     # Pass the current window since _IdentityWindowFn wouldn't know how
     # to generate it.
     yield windowed_value.WindowedValue(
         (element[0], element[1].value), element[1].timestamp,
         [window])
Beispiel #29
0
 def clear(self):
   dummy_millis = int(common_urns.constants.MAX_TIMESTAMP_MILLIS.constant) + 1
   clear_ts = timestamp.Timestamp(micros=dummy_millis * 1000)
   self._receiver.receive(
       windowed_value.WindowedValue(
           (self._key, dict(timestamp=clear_ts)), 0, (self._window,)))