def test_no_window_context_fails(self): expected_timestamp = timestamp.Timestamp(5) # Assuming the default window function is window.GlobalWindows. expected_window = window.GlobalWindow() class AddTimestampDoFn(beam.DoFn): def process(self, element): yield window.TimestampedValue(element, expected_timestamp) pipeline = TestPipeline() data = [(1, 1), (2, 1), (3, 1), (1, 2), (2, 2), (1, 4)] expected_windows = [ TestWindowedValue(kv, expected_timestamp, [expected_window]) for kv in data] before_identity = (pipeline | 'start' >> beam.Create(data) | 'add_timestamps' >> beam.ParDo(AddTimestampDoFn())) assert_that(before_identity, equal_to(expected_windows), label='before_identity', reify_windows=True) after_identity = (before_identity | 'window' >> beam.WindowInto( beam.transforms.util._IdentityWindowFn( coders.GlobalWindowCoder())) # This DoFn will return TimestampedValues, making # WindowFn.AssignContext passed to IdentityWindowFn # contain a window of None. IdentityWindowFn should # raise an exception. | 'add_timestamps2' >> beam.ParDo(AddTimestampDoFn())) assert_that(after_identity, equal_to(expected_windows), label='after_identity', reify_windows=True) with self.assertRaisesRegexp(ValueError, r'window.*None.*add_timestamps2'): pipeline.run()
def test_windowedvalue_coder_paneinfo(self): coder = coders.WindowedValueCoder(coders.VarIntCoder(), coders.GlobalWindowCoder()) test_paneinfo_values = [ windowed_value.PANE_INFO_UNKNOWN, windowed_value.PaneInfo( True, True, windowed_value.PaneInfoTiming.EARLY, 0, -1), windowed_value.PaneInfo( True, False, windowed_value.PaneInfoTiming.ON_TIME, 0, 0), windowed_value.PaneInfo( True, False, windowed_value.PaneInfoTiming.ON_TIME, 10, 0), windowed_value.PaneInfo( False, True, windowed_value.PaneInfoTiming.ON_TIME, 0, 23), windowed_value.PaneInfo( False, True, windowed_value.PaneInfoTiming.ON_TIME, 12, 23), windowed_value.PaneInfo( False, False, windowed_value.PaneInfoTiming.LATE, 0, 123),] test_values = [windowed_value.WindowedValue(123, 234, (GlobalWindow(),), p) for p in test_paneinfo_values] # Test unnested. self.check_coder(coder, windowed_value.WindowedValue( 123, 234, (GlobalWindow(),), windowed_value.PANE_INFO_UNKNOWN)) for value in test_values: self.check_coder(coder, value) # Test nested. for value1 in test_values: for value2 in test_values: self.check_coder(coders.TupleCoder((coder, coder)), (value1, value2))
def to_runner_api(self, context): # type: (PipelineContext) -> beam_runner_api_pb2.TimerFamilySpec return beam_runner_api_pb2.TimerFamilySpec( time_domain=TimeDomain.to_runner_api(self.time_domain), timer_family_coder_id=context.coders.get_id( coders._TimerCoder(coders.StrUtf8Coder(), coders.GlobalWindowCoder())))
def run_coder_benchmarks(num_runs, input_size, seed, verbose, filter_regex='.*'): random.seed(seed) # TODO(BEAM-4441): Pick coders using type hints, for example: # tuple_coder = typecoders.registry.get_coder(typing.Tuple[int, ...]) benchmarks = [ coder_benchmark_factory(coders.FastPrimitivesCoder(), small_int), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_int), coder_benchmark_factory(coders.FastPrimitivesCoder(), small_string), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_string), coder_benchmark_factory(coders.FastPrimitivesCoder(), small_list), coder_benchmark_factory( coders.IterableCoder(coders.FastPrimitivesCoder()), small_list), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_list), coder_benchmark_factory( coders.IterableCoder(coders.FastPrimitivesCoder()), large_list), coder_benchmark_factory( coders.IterableCoder(coders.FastPrimitivesCoder()), large_iterable), coder_benchmark_factory(coders.FastPrimitivesCoder(), small_tuple), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_tuple), coder_benchmark_factory(coders.FastPrimitivesCoder(), small_dict), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_dict), coder_benchmark_factory(coders.ProtoCoder(test_message.MessageWithMap), small_message_with_map), coder_benchmark_factory(coders.ProtoCoder(test_message.MessageWithMap), large_message_with_map), coder_benchmark_factory( coders.DeterministicProtoCoder(test_message.MessageWithMap), small_message_with_map), coder_benchmark_factory( coders.DeterministicProtoCoder(test_message.MessageWithMap), large_message_with_map), coder_benchmark_factory( coders.WindowedValueCoder(coders.FastPrimitivesCoder()), wv_with_one_window), coder_benchmark_factory( coders.WindowedValueCoder(coders.FastPrimitivesCoder(), coders.IntervalWindowCoder()), wv_with_multiple_windows), coder_benchmark_factory( coders.WindowedValueCoder(coders.FastPrimitivesCoder(), coders.GlobalWindowCoder()), globally_windowed_value), coder_benchmark_factory( coders.LengthPrefixCoder(coders.FastPrimitivesCoder()), small_int) ] suite = [ utils.BenchmarkConfig(b, input_size, num_runs) for b in benchmarks if re.search(filter_regex, b.__name__, flags=re.I) ] utils.run_benchmarks(suite, verbose=verbose)
def test_windowed_value_coder(self): coder = coders.WindowedValueCoder( coders.VarIntCoder(), coders.GlobalWindowCoder()) # Verify cloud object representation self.assertEqual({ '@type': 'kind:windowed_value', 'is_wrapper': True, 'component_encodings': [ coders.VarIntCoder().as_cloud_object(), coders.GlobalWindowCoder().as_cloud_object(), ], }, coder.as_cloud_object()) # Test binary representation self.assertEqual( b'\x7f\xdf;dZ\x1c\xac\t\x00\x00\x00\x01\x0f\x01', coder.encode(window.GlobalWindows.windowed_value(1))) # Test decoding large timestamp self.assertEqual( coder.decode(b'\x7f\xdf;dZ\x1c\xac\x08\x00\x00\x00\x01\x0f\x00'), windowed_value.create(0, MIN_TIMESTAMP.micros, (GlobalWindow(), ))) # Test unnested self.check_coder( coders.WindowedValueCoder(coders.VarIntCoder()), windowed_value.WindowedValue(3, -100, ()), windowed_value.WindowedValue(-1, 100, (1, 2, 3))) # Test Global Window self.check_coder( coders.WindowedValueCoder( coders.VarIntCoder(), coders.GlobalWindowCoder()), window.GlobalWindows.windowed_value(1)) # Test nested self.check_coder( coders.TupleCoder(( coders.WindowedValueCoder(coders.FloatCoder()), coders.WindowedValueCoder(coders.StrUtf8Coder()))), ( windowed_value.WindowedValue(1.5, 0, ()), windowed_value.WindowedValue("abc", 10, ('window', ))))
def test_global_window_coder(self): coder = coders.GlobalWindowCoder() value = window.GlobalWindow() # Verify cloud object representation self.assertEqual({'@type': 'kind:global_window'}, coder.as_cloud_object()) # Test binary representation self.assertEqual(b'', coder.encode(value)) self.assertEqual(value, coder.decode(b'')) # Test unnested self.check_coder(coder, value) # Test nested self.check_coder(coders.TupleCoder((coder, coder)), (value, value))
def test_timer_coder(self): self.check_coder( coders._TimerCoder(coders.StrUtf8Coder(), coders.GlobalWindowCoder()), *[ userstate.Timer(user_key="key", dynamic_timer_tag="tag", windows=(GlobalWindow(), ), clear_bit=True, fire_timestamp=None, hold_timestamp=None, paneinfo=None), userstate.Timer(user_key="key", dynamic_timer_tag="tag", windows=(GlobalWindow(), ), clear_bit=False, fire_timestamp=timestamp.Timestamp.of(123), hold_timestamp=timestamp.Timestamp.of(456), paneinfo=windowed_value.PANE_INFO_UNKNOWN) ])
def get_window_coder(self): return coders.GlobalWindowCoder()
def get_window_coder(self): # type: () -> coders.GlobalWindowCoder return coders.GlobalWindowCoder()