Esempio n. 1
0
  def test_no_window_context_fails(self):
    expected_timestamp = timestamp.Timestamp(5)
    # Assuming the default window function is window.GlobalWindows.
    expected_window = window.GlobalWindow()

    class AddTimestampDoFn(beam.DoFn):
      def process(self, element):
        yield window.TimestampedValue(element, expected_timestamp)

    pipeline = TestPipeline()
    data = [(1, 1), (2, 1), (3, 1), (1, 2), (2, 2), (1, 4)]
    expected_windows = [
        TestWindowedValue(kv, expected_timestamp, [expected_window])
        for kv in data]
    before_identity = (pipeline
                       | 'start' >> beam.Create(data)
                       | 'add_timestamps' >> beam.ParDo(AddTimestampDoFn()))
    assert_that(before_identity, equal_to(expected_windows),
                label='before_identity', reify_windows=True)
    after_identity = (before_identity
                      | 'window' >> beam.WindowInto(
                          beam.transforms.util._IdentityWindowFn(
                              coders.GlobalWindowCoder()))
                      # This DoFn will return TimestampedValues, making
                      # WindowFn.AssignContext passed to IdentityWindowFn
                      # contain a window of None. IdentityWindowFn should
                      # raise an exception.
                      | 'add_timestamps2' >> beam.ParDo(AddTimestampDoFn()))
    assert_that(after_identity, equal_to(expected_windows),
                label='after_identity', reify_windows=True)
    with self.assertRaisesRegexp(ValueError, r'window.*None.*add_timestamps2'):
      pipeline.run()
Esempio n. 2
0
  def test_windowedvalue_coder_paneinfo(self):
    coder = coders.WindowedValueCoder(coders.VarIntCoder(),
                                      coders.GlobalWindowCoder())
    test_paneinfo_values = [
        windowed_value.PANE_INFO_UNKNOWN,
        windowed_value.PaneInfo(
            True, True, windowed_value.PaneInfoTiming.EARLY, 0, -1),
        windowed_value.PaneInfo(
            True, False, windowed_value.PaneInfoTiming.ON_TIME, 0, 0),
        windowed_value.PaneInfo(
            True, False, windowed_value.PaneInfoTiming.ON_TIME, 10, 0),
        windowed_value.PaneInfo(
            False, True, windowed_value.PaneInfoTiming.ON_TIME, 0, 23),
        windowed_value.PaneInfo(
            False, True, windowed_value.PaneInfoTiming.ON_TIME, 12, 23),
        windowed_value.PaneInfo(
            False, False, windowed_value.PaneInfoTiming.LATE, 0, 123),]

    test_values = [windowed_value.WindowedValue(123, 234, (GlobalWindow(),), p)
                   for p in test_paneinfo_values]

    # Test unnested.
    self.check_coder(coder, windowed_value.WindowedValue(
        123, 234, (GlobalWindow(),), windowed_value.PANE_INFO_UNKNOWN))
    for value in test_values:
      self.check_coder(coder, value)

    # Test nested.
    for value1 in test_values:
      for value2 in test_values:
        self.check_coder(coders.TupleCoder((coder, coder)), (value1, value2))
Esempio n. 3
0
 def to_runner_api(self, context):
     # type: (PipelineContext) -> beam_runner_api_pb2.TimerFamilySpec
     return beam_runner_api_pb2.TimerFamilySpec(
         time_domain=TimeDomain.to_runner_api(self.time_domain),
         timer_family_coder_id=context.coders.get_id(
             coders._TimerCoder(coders.StrUtf8Coder(),
                                coders.GlobalWindowCoder())))
Esempio n. 4
0
def run_coder_benchmarks(num_runs,
                         input_size,
                         seed,
                         verbose,
                         filter_regex='.*'):
    random.seed(seed)

    # TODO(BEAM-4441): Pick coders using type hints, for example:
    # tuple_coder = typecoders.registry.get_coder(typing.Tuple[int, ...])
    benchmarks = [
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_int),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_int),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_string),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_string),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_list),
        coder_benchmark_factory(
            coders.IterableCoder(coders.FastPrimitivesCoder()), small_list),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_list),
        coder_benchmark_factory(
            coders.IterableCoder(coders.FastPrimitivesCoder()), large_list),
        coder_benchmark_factory(
            coders.IterableCoder(coders.FastPrimitivesCoder()),
            large_iterable),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_tuple),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_tuple),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_dict),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_dict),
        coder_benchmark_factory(coders.ProtoCoder(test_message.MessageWithMap),
                                small_message_with_map),
        coder_benchmark_factory(coders.ProtoCoder(test_message.MessageWithMap),
                                large_message_with_map),
        coder_benchmark_factory(
            coders.DeterministicProtoCoder(test_message.MessageWithMap),
            small_message_with_map),
        coder_benchmark_factory(
            coders.DeterministicProtoCoder(test_message.MessageWithMap),
            large_message_with_map),
        coder_benchmark_factory(
            coders.WindowedValueCoder(coders.FastPrimitivesCoder()),
            wv_with_one_window),
        coder_benchmark_factory(
            coders.WindowedValueCoder(coders.FastPrimitivesCoder(),
                                      coders.IntervalWindowCoder()),
            wv_with_multiple_windows),
        coder_benchmark_factory(
            coders.WindowedValueCoder(coders.FastPrimitivesCoder(),
                                      coders.GlobalWindowCoder()),
            globally_windowed_value),
        coder_benchmark_factory(
            coders.LengthPrefixCoder(coders.FastPrimitivesCoder()), small_int)
    ]

    suite = [
        utils.BenchmarkConfig(b, input_size, num_runs) for b in benchmarks
        if re.search(filter_regex, b.__name__, flags=re.I)
    ]
    utils.run_benchmarks(suite, verbose=verbose)
Esempio n. 5
0
  def test_windowed_value_coder(self):
    coder = coders.WindowedValueCoder(
        coders.VarIntCoder(), coders.GlobalWindowCoder())
    # Verify cloud object representation
    self.assertEqual({
        '@type': 'kind:windowed_value',
        'is_wrapper': True,
        'component_encodings': [
            coders.VarIntCoder().as_cloud_object(),
            coders.GlobalWindowCoder().as_cloud_object(),
        ],
    },
                     coder.as_cloud_object())
    # Test binary representation
    self.assertEqual(
        b'\x7f\xdf;dZ\x1c\xac\t\x00\x00\x00\x01\x0f\x01',
        coder.encode(window.GlobalWindows.windowed_value(1)))

    # Test decoding large timestamp
    self.assertEqual(
        coder.decode(b'\x7f\xdf;dZ\x1c\xac\x08\x00\x00\x00\x01\x0f\x00'),
        windowed_value.create(0, MIN_TIMESTAMP.micros, (GlobalWindow(), )))

    # Test unnested
    self.check_coder(
        coders.WindowedValueCoder(coders.VarIntCoder()),
        windowed_value.WindowedValue(3, -100, ()),
        windowed_value.WindowedValue(-1, 100, (1, 2, 3)))

    # Test Global Window
    self.check_coder(
        coders.WindowedValueCoder(
            coders.VarIntCoder(), coders.GlobalWindowCoder()),
        window.GlobalWindows.windowed_value(1))

    # Test nested
    self.check_coder(
        coders.TupleCoder((
            coders.WindowedValueCoder(coders.FloatCoder()),
            coders.WindowedValueCoder(coders.StrUtf8Coder()))),
        (
            windowed_value.WindowedValue(1.5, 0, ()),
            windowed_value.WindowedValue("abc", 10, ('window', ))))
Esempio n. 6
0
 def test_global_window_coder(self):
   coder = coders.GlobalWindowCoder()
   value = window.GlobalWindow()
   # Verify cloud object representation
   self.assertEqual({'@type': 'kind:global_window'}, coder.as_cloud_object())
   # Test binary representation
   self.assertEqual(b'', coder.encode(value))
   self.assertEqual(value, coder.decode(b''))
   # Test unnested
   self.check_coder(coder, value)
   # Test nested
   self.check_coder(coders.TupleCoder((coder, coder)), (value, value))
Esempio n. 7
0
 def test_timer_coder(self):
     self.check_coder(
         coders._TimerCoder(coders.StrUtf8Coder(),
                            coders.GlobalWindowCoder()),
         *[
             userstate.Timer(user_key="key",
                             dynamic_timer_tag="tag",
                             windows=(GlobalWindow(), ),
                             clear_bit=True,
                             fire_timestamp=None,
                             hold_timestamp=None,
                             paneinfo=None),
             userstate.Timer(user_key="key",
                             dynamic_timer_tag="tag",
                             windows=(GlobalWindow(), ),
                             clear_bit=False,
                             fire_timestamp=timestamp.Timestamp.of(123),
                             hold_timestamp=timestamp.Timestamp.of(456),
                             paneinfo=windowed_value.PANE_INFO_UNKNOWN)
         ])
Esempio n. 8
0
 def get_window_coder(self):
   return coders.GlobalWindowCoder()
Esempio n. 9
0
 def get_window_coder(self):
   # type: () -> coders.GlobalWindowCoder
   return coders.GlobalWindowCoder()