def test_sliding_windows_assignment_fraction(self): windowfn = SlidingWindows(size=3.5, period=2.5, offset=1.5) self.assertEqual([IntervalWindow(1.5, 5.0), IntervalWindow(-1.0, 2.5)], windowfn.assign(context('v', 1.7))) self.assertEqual([IntervalWindow(1.5, 5.0)], windowfn.assign(context('v', 3)))
def test_sliding_windows_assignment(self): windowfn = SlidingWindows(size=15, period=5, offset=2) expected = [IntervalWindow(7, 22), IntervalWindow(2, 17), IntervalWindow(-3, 12)] self.assertEqual(expected, windowfn.assign(context('v', 7))) self.assertEqual(expected, windowfn.assign(context('v', 8))) self.assertEqual(expected, windowfn.assign(context('v', 11)))
def test_sdf_with_windowed_timestamped_input(self): with TestPipeline(runner='DirectRunner') as p: result = (p | beam.Create([1, 3, 5, 10]) | beam.FlatMap(lambda t: [ TimestampedValue(('A', t), t), TimestampedValue(('B', t), t) ]) | beam.WindowInto( SlidingWindows(10, 5), accumulation_mode=AccumulationMode.DISCARDING) | beam.ParDo(ExpandStrings(record_window=True))) expected_result = [ 'A:1:-5', 'A:1:0', 'A:3:-5', 'A:3:0', 'A:5:0', 'A:5:5', 'A:10:5', 'A:10:10', 'B:1:-5', 'B:1:0', 'B:3:-5', 'B:3:0', 'B:5:0', 'B:5:5', 'B:10:5', 'B:10:10', ] assert_that(result, equal_to(expected_result))
def test_windowfn_encoding(self): for window_fn in (GlobalWindows(), FixedWindows(37), SlidingWindows(2, 389), Sessions(5077)): context = pipeline_context.PipelineContext() self.assertEqual( window_fn, WindowFn.from_runner_api(window_fn.to_runner_api(context), context))
def test_sliding_windows(self): with TestPipeline() as p: pcoll = self.timestamped_key_values(p, 'key', 1, 2, 3) result = (pcoll | 'w' >> WindowInto(SlidingWindows(period=2, size=4)) | GroupByKey() | reify_windows) expected = [('key @ [-2.0, 2.0)', [1]), ('key @ [0.0, 4.0)', [1, 2, 3]), ('key @ [2.0, 6.0)', [2, 3])] assert_that(result, equal_to(expected))
def test_windowing_encoding(self): for windowing in ( Windowing(GlobalWindows()), Windowing(FixedWindows(1, 3), AfterCount(6), accumulation_mode=AccumulationMode.ACCUMULATING), Windowing(SlidingWindows(10, 15, 21), AfterCount(28), timestamp_combiner=TimestampCombiner.OUTPUT_AT_LATEST, accumulation_mode=AccumulationMode.DISCARDING)): context = pipeline_context.PipelineContext() self.assertEqual( windowing, Windowing.from_runner_api(windowing.to_runner_api(context), context))
def test_rewindow(self): with TestPipeline() as p: result = (p | Create([(k, k) for k in range(10)]) | Map(lambda x_t1: TimestampedValue(x_t1[0], x_t1[1])) | 'window' >> WindowInto(SlidingWindows(period=2, size=6)) # Per the model, each element is now duplicated across # three windows. Rewindowing must preserve this duplication. | 'rewindow' >> WindowInto(FixedWindows(5)) | 'rewindow2' >> WindowInto(FixedWindows(5)) | Map(lambda v: ('key', v)) | GroupByKey()) assert_that(result, equal_to([('key', sorted([0, 1, 2, 3, 4] * 3)), ('key', sorted([5, 6, 7, 8, 9] * 3))]))
def test_window_param(self): class TestDoFn(DoFn): def process(self, element, window=DoFn.WindowParam): yield (element, (float(window.start), float(window.end))) pipeline = TestPipeline() pcoll = (pipeline | Create([1, 7]) | Map(lambda x: TimestampedValue(x, x)) | WindowInto(windowfn=SlidingWindows(10, 5)) | ParDo(TestDoFn())) assert_that(pcoll, equal_to([(1, (-5, 5)), (1, (0, 10)), (7, (0, 10)), (7, (5, 15))])) pipeline.run()
def test_reshuffle_sliding_window(self): pipeline = TestPipeline() data = [(1, 1), (2, 1), (3, 1), (1, 2), (2, 2), (1, 4)] window_size = 2 expected_data = [(1, [1, 2, 4]), (2, [1, 2]), (3, [1])] * window_size before_reshuffle = (pipeline | beam.Create(data) | beam.WindowInto(SlidingWindows( size=window_size, period=1)) | beam.GroupByKey()) assert_that(before_reshuffle, equal_to(expected_data), label='before_reshuffle') after_reshuffle = before_reshuffle | beam.Reshuffle() # If Reshuffle applies the sliding window function a second time there # should be extra values for each key. assert_that(after_reshuffle, equal_to(expected_data), label='after reshuffle') pipeline.run()
def run(): pipeline_options = PipelineOptions(streaming=True) resolution = pipeline_options.view_as(MyOptions).resolution.get() with beam.Pipeline(options=pipeline_options) as p: subscription_id = 'projects/iex-stream/subscriptions/iex-aggregate-' + str( resolution) lines = (p | beam.io.ReadFromPubSub( subscription=subscription_id).with_output_types(bytes) | 'decode' >> beam.Map(lambda x: x.decode('utf-8')) | beam.Map(json.loads)) schema = 'symbol:STRING,latest_price:FLOAT,window_end:TIMESTAMP,event_time:TIMESTAMP,resolution_minutes:INTEGER' (lines | 'CreateWindow' >> beam.WindowInto( SlidingWindows(60 * resolution, 10, 5)) | 'AddWindowEndTimestamp' >> beam.ParDo( AddTimestamp(resolution=resolution)) | 'WriteToBigQuery' >> beam.io.WriteToBigQuery('iex.quote', schema=schema))
def test_sliding_windows_simple_watermark(self): # yapf: disable test_stream = ( TestStream() .advance_watermark_to(0) .add_elements([('k1', 1), ('k2', 1), ('k1', 1), ('k2', 1)]) .add_elements([('k1', 1), ('k2', 1)]) .advance_watermark_to(1) .add_elements([('k1', 2), ('k2', 2)]) .add_elements([('k1', 2), ('k2', 2)]) .advance_watermark_to(2) .add_elements([('k1', 3), ('k2', 3)]) .add_elements([('k1', 3), ('k2', 3)]) .advance_watermark_to_infinity()) # yapf: enable # Fixed, one-second windows with DefaultTrigger (after watermark) windowing = Windowing(SlidingWindows(2, 1)) with TestPipeline() as p: result = ( p | test_stream | WindowInto(windowing.windowfn) | ParDo(trigger_manager._ReifyWindows()) | ParDo(trigger_manager._GroupBundlesByKey()) | ParDo(trigger_manager.GeneralTriggerManagerDoFn(windowing)) | Map(lambda elm: (elm[0], elm[1][0].windows[0], [v.value for v in elm[1]]))) assert_that( result, equal_to([ ('k1', IntervalWindow(-1, 1), [1, 1, 1]), ('k2', IntervalWindow(-1, 1), [1, 1, 1]), ('k1', IntervalWindow(0, 2), [1, 1, 1, 2, 2]), ('k2', IntervalWindow(0, 2), [1, 1, 1, 2, 2]), ('k1', IntervalWindow(1, 3), [2, 2, 3, 3]), ('k2', IntervalWindow(1, 3), [2, 2, 3, 3]), ('k1', IntervalWindow(2, 4), [3, 3]), ('k2', IntervalWindow(2, 4), [3, 3]), ]))
def test_sliding_windows_assignment_fraction_large_offset(self): windowfn = SlidingWindows(size=3.5, period=2.5, offset=4.0) self.assertEqual([IntervalWindow(1.5, 5.0), IntervalWindow(-1.0, 2.5)], windowfn.assign(context('v', 1.7))) self.assertEqual([IntervalWindow(4.0, 7.5), IntervalWindow(1.5, 5.0)], windowfn.assign(context('v', 4.5)))