def test_timestamped_value(self): p = Pipeline('DirectPipelineRunner') result = (p | Create('start', [(k, k) for k in range(10)]) | Map(lambda (x, t): TimestampedValue(x, t)) | WindowInto('w', FixedWindows(5)) | Map(lambda v: ('key', v)) | GroupByKey()) assert_that(result, equal_to([('key', [0, 1, 2, 3, 4]), ('key', [5, 6, 7, 8, 9])])) p.run()
def test_sessions(self): p = Pipeline('DirectPipelineRunner') pcoll = self.timestamped_key_values(p, 'key', 1, 2, 3, 20, 35, 27) result = (pcoll | WindowInto('w', Sessions(10)) | GroupByKey() | sort_values | reify_windows) expected = [('key @ [1.0, 13.0)', [1, 2, 3]), ('key @ [20.0, 45.0)', [20, 27, 35])] assert_that(result, equal_to(expected)) p.run()
def test_sliding_windows(self): p = Pipeline('DirectPipelineRunner') pcoll = self.timestamped_key_values(p, 'key', 1, 2, 3) result = (pcoll | WindowInto('w', SlidingWindows(period=2, size=4)) | GroupByKey() | reify_windows) expected = [('key @ [-2.0, 2.0)', [1]), ('key @ [0.0, 4.0)', [1, 2, 3]), ('key @ [2.0, 6.0)', [2, 3])] assert_that(result, equal_to(expected)) p.run()
def test_window_transform(self): class TestWindowFn(WindowFn): """Windowing function adding two disjoint windows to each element.""" def assign(self, assign_context): _ = assign_context return [IntervalWindow(10, 20), IntervalWindow(20, 30)] def merge(self, existing_windows): return existing_windows pipeline = Pipeline('DirectPipelineRunner') numbers = pipeline | Create('KVs', [(1, 10), (2, 20), (3, 30)]) result = (numbers | WindowInto('W', windowfn=TestWindowFn()) | GroupByKey('G')) assert_that( result, equal_to([(1, [10]), (1, [10]), (2, [20]), (2, [20]), (3, [30]), (3, [30])])) pipeline.run()
def Count(pcoll): # pylint: disable=invalid-name """A Count transform: v, ... => (v, n), ...""" return (pcoll | Map('AddCount', lambda x: (x, 1)) | GroupByKey('GroupCounts') | Map('AddCounts', lambda (x, ones): (x, sum(ones))))