def test_read_and_write_multiple_outputs(self): """An integration test between the Sink and Source with multiple outputs. This tests the funcionatlity that the StreamingCache reads from multiple files and combines them into a single sorted output. """ LETTERS_TAG = repr(CacheKey('letters', '', '', '')) NUMBERS_TAG = repr(CacheKey('numbers', '', '', '')) # Units here are in seconds. test_stream = (TestStream() .advance_watermark_to(0, tag=LETTERS_TAG) .advance_processing_time(5) .add_elements(['a', 'b', 'c'], tag=LETTERS_TAG) .advance_watermark_to(10, tag=NUMBERS_TAG) .advance_processing_time(1) .add_elements( [ TimestampedValue('1', 15), TimestampedValue('2', 15), TimestampedValue('3', 15) ], tag=NUMBERS_TAG)) # yapf: disable cache = StreamingCache(cache_dir=None, sample_resolution_sec=1.0) coder = SafeFastPrimitivesCoder() options = StandardOptions(streaming=True) with TestPipeline(options=options) as p: # pylint: disable=expression-not-assigned events = p | test_stream events[LETTERS_TAG] | 'Letters sink' >> cache.sink([LETTERS_TAG]) events[NUMBERS_TAG] | 'Numbers sink' >> cache.sink([NUMBERS_TAG]) reader = cache.read_multiple([[LETTERS_TAG], [NUMBERS_TAG]]) actual_events = list(reader) # Units here are in microseconds. expected_events = [ TestStreamPayload.Event( processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime( advance_duration=5 * 10**6)), TestStreamPayload.Event( watermark_event=TestStreamPayload.Event.AdvanceWatermark( new_watermark=0, tag=LETTERS_TAG)), TestStreamPayload.Event( element_event=TestStreamPayload.Event.AddElements( elements=[ TestStreamPayload.TimestampedElement( encoded_element=coder.encode('a'), timestamp=0), TestStreamPayload.TimestampedElement( encoded_element=coder.encode('b'), timestamp=0), TestStreamPayload.TimestampedElement( encoded_element=coder.encode('c'), timestamp=0), ], tag=LETTERS_TAG)), TestStreamPayload.Event( processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime( advance_duration=1 * 10**6)), TestStreamPayload.Event( watermark_event=TestStreamPayload.Event.AdvanceWatermark( new_watermark=10 * 10**6, tag=NUMBERS_TAG)), TestStreamPayload.Event( watermark_event=TestStreamPayload.Event.AdvanceWatermark( new_watermark=0, tag=LETTERS_TAG)), TestStreamPayload.Event( element_event=TestStreamPayload.Event.AddElements( elements=[ TestStreamPayload.TimestampedElement( encoded_element=coder.encode('1'), timestamp=15 * 10**6), TestStreamPayload.TimestampedElement( encoded_element=coder.encode('2'), timestamp=15 * 10**6), TestStreamPayload.TimestampedElement( encoded_element=coder.encode('3'), timestamp=15 * 10**6), ], tag=NUMBERS_TAG)), ] self.assertListEqual(actual_events, expected_events)
def test_multiple_readers(self): """Tests that the service advances the clock with multiple outputs. """ CACHED_LETTERS = repr(CacheKey('letters', '', '', '')) CACHED_NUMBERS = repr(CacheKey('numbers', '', '', '')) CACHED_LATE = repr(CacheKey('late', '', '', '')) letters = (FileRecordsBuilder(CACHED_LETTERS) .advance_processing_time(1) .advance_watermark(watermark_secs=0) .add_element(element='a', event_time_secs=0) .advance_processing_time(10) .advance_watermark(watermark_secs=10) .add_element(element='b', event_time_secs=10) .build()) # yapf: disable numbers = (FileRecordsBuilder(CACHED_NUMBERS) .advance_processing_time(2) .add_element(element=1, event_time_secs=0) .advance_processing_time(1) .add_element(element=2, event_time_secs=0) .advance_processing_time(1) .add_element(element=2, event_time_secs=0) .build()) # yapf: disable late = (FileRecordsBuilder(CACHED_LATE) .advance_processing_time(101) .add_element(element='late', event_time_secs=0) .build()) # yapf: disable cache = StreamingCache(cache_dir=None) cache.write(letters, CACHED_LETTERS) cache.write(numbers, CACHED_NUMBERS) cache.write(late, CACHED_LATE) reader = cache.read_multiple([[CACHED_LETTERS], [CACHED_NUMBERS], [CACHED_LATE]]) coder = coders.FastPrimitivesCoder() events = list(reader) # Units here are in microseconds. expected = [ # Advances clock from 0 to 1 TestStreamPayload.Event( processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime( advance_duration=1 * 10**6)), TestStreamPayload.Event( watermark_event=TestStreamPayload.Event.AdvanceWatermark( new_watermark=0, tag=CACHED_LETTERS)), TestStreamPayload.Event( element_event=TestStreamPayload.Event.AddElements( elements=[ TestStreamPayload.TimestampedElement( encoded_element=coder.encode('a'), timestamp=0) ], tag=CACHED_LETTERS)), # Advances clock from 1 to 2 TestStreamPayload.Event( processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime( advance_duration=1 * 10**6)), TestStreamPayload.Event( element_event=TestStreamPayload.Event.AddElements( elements=[ TestStreamPayload.TimestampedElement( encoded_element=coder.encode(1), timestamp=0) ], tag=CACHED_NUMBERS)), # Advances clock from 2 to 3 TestStreamPayload.Event( processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime( advance_duration=1 * 10**6)), TestStreamPayload.Event( element_event=TestStreamPayload.Event.AddElements( elements=[ TestStreamPayload.TimestampedElement( encoded_element=coder.encode(2), timestamp=0) ], tag=CACHED_NUMBERS)), # Advances clock from 3 to 4 TestStreamPayload.Event( processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime( advance_duration=1 * 10**6)), TestStreamPayload.Event( element_event=TestStreamPayload.Event.AddElements( elements=[ TestStreamPayload.TimestampedElement( encoded_element=coder.encode(2), timestamp=0) ], tag=CACHED_NUMBERS)), # Advances clock from 4 to 11 TestStreamPayload.Event( processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime( advance_duration=7 * 10**6)), TestStreamPayload.Event( watermark_event=TestStreamPayload.Event.AdvanceWatermark( new_watermark=10 * 10**6, tag=CACHED_LETTERS)), TestStreamPayload.Event( element_event=TestStreamPayload.Event.AddElements( elements=[ TestStreamPayload.TimestampedElement( encoded_element=coder.encode('b'), timestamp=10 * 10**6) ], tag=CACHED_LETTERS)), # Advances clock from 11 to 101 TestStreamPayload.Event( processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime( advance_duration=90 * 10**6)), TestStreamPayload.Event( element_event=TestStreamPayload.Event.AddElements( elements=[ TestStreamPayload.TimestampedElement( encoded_element=coder.encode('late'), timestamp=0) ], tag=CACHED_LATE)), ] self.assertSequenceEqual(events, expected)