def test_reshuffle_window_fn_preserved(self): pipeline = TestPipeline() data = [(1, 1), (2, 1), (3, 1), (1, 2), (2, 2), (1, 4)] expected_windows = [TestWindowedValue(v, t, [w]) for (v, t, w) in [ ((1, 1), 1.0, IntervalWindow(1.0, 3.0)), ((2, 1), 1.0, IntervalWindow(1.0, 3.0)), ((3, 1), 1.0, IntervalWindow(1.0, 3.0)), ((1, 2), 2.0, IntervalWindow(2.0, 4.0)), ((2, 2), 2.0, IntervalWindow(2.0, 4.0)), ((1, 4), 4.0, IntervalWindow(4.0, 6.0))]] expected_merged_windows = [TestWindowedValue(v, t, [w]) for (v, t, w) in [ ((1, contains_in_any_order([2, 1])), 4.0, IntervalWindow(1.0, 4.0)), ((2, contains_in_any_order([2, 1])), 4.0, IntervalWindow(1.0, 4.0)), ((3, [1]), 3.0, IntervalWindow(1.0, 3.0)), ((1, [4]), 6.0, IntervalWindow(4.0, 6.0))]] before_reshuffle = (pipeline | 'start' >> beam.Create(data) | 'add_timestamp' >> beam.Map( lambda v: TimestampedValue(v, v[1])) | 'window' >> beam.WindowInto(Sessions(gap_size=2))) assert_that(before_reshuffle, equal_to(expected_windows), label='before_reshuffle', reify_windows=True) after_reshuffle = (before_reshuffle | 'reshuffle' >> beam.Reshuffle()) assert_that(after_reshuffle, equal_to(expected_windows), label='after_reshuffle', reify_windows=True) after_group = (after_reshuffle | 'group_by_key' >> beam.GroupByKey()) assert_that(after_group, equal_to(expected_merged_windows), label='after_group', reify_windows=True) pipeline.run()
def test_reshuffle_window_fn_preserved(self): pipeline = TestPipeline() data = [(1, 1), (2, 1), (3, 1), (1, 2), (2, 2), (1, 4)] expected_windows = [TestWindowedValue(v, t, [w]) for (v, t, w) in [ ((1, 1), 1.0, IntervalWindow(1.0, 3.0)), ((2, 1), 1.0, IntervalWindow(1.0, 3.0)), ((3, 1), 1.0, IntervalWindow(1.0, 3.0)), ((1, 2), 2.0, IntervalWindow(2.0, 4.0)), ((2, 2), 2.0, IntervalWindow(2.0, 4.0)), ((1, 4), 4.0, IntervalWindow(4.0, 6.0))]] expected_merged_windows = [TestWindowedValue(v, t, [w]) for (v, t, w) in [ ((1, contains_in_any_order([2, 1])), 4.0, IntervalWindow(1.0, 4.0)), ((2, contains_in_any_order([2, 1])), 4.0, IntervalWindow(1.0, 4.0)), ((3, [1]), 3.0, IntervalWindow(1.0, 3.0)), ((1, [4]), 6.0, IntervalWindow(4.0, 6.0))]] before_reshuffle = (pipeline | 'start' >> beam.Create(data) | 'add_timestamp' >> beam.Map( lambda v: TimestampedValue(v, v[1])) | 'window' >> beam.WindowInto(Sessions(gap_size=2))) assert_that(before_reshuffle, equal_to(expected_windows), label='before_reshuffle', reify_windows=True) after_reshuffle = before_reshuffle | beam.Reshuffle() assert_that(after_reshuffle, equal_to(expected_windows), label='after_reshuffle', reify_windows=True) after_group = after_reshuffle | beam.GroupByKey() assert_that(after_group, equal_to(expected_merged_windows), label='after_group', reify_windows=True) pipeline.run()
def test_reshuffle_windows_unchanged(self): with TestPipeline() as pipeline: data = [(1, 1), (2, 1), (3, 1), (1, 2), (2, 2), (1, 4)] expected_data = [ TestWindowedValue(v, t - .001, [w]) for (v, t, w) in [((1, contains_in_any_order([2, 1])), 4.0, IntervalWindow(1.0, 4.0)), ((2, contains_in_any_order([2, 1])), 4.0, IntervalWindow(1.0, 4.0)), ( (3, [1]), 3.0, IntervalWindow(1.0, 3.0)), ( (1, [4]), 6.0, IntervalWindow(4.0, 6.0))] ] before_reshuffle = ( pipeline | 'start' >> beam.Create(data) | 'add_timestamp' >> beam.Map(lambda v: beam.window.TimestampedValue(v, v[1])) | 'window' >> beam.WindowInto(Sessions(gap_size=2)) | 'group_by_key' >> beam.GroupByKey()) assert_that( before_reshuffle, equal_to(expected_data), label='before_reshuffle', reify_windows=True) after_reshuffle = before_reshuffle | beam.Reshuffle() assert_that( after_reshuffle, equal_to(expected_data), label='after reshuffle', reify_windows=True)