def test_clear(self): cache = StreamingCache(cache_dir=None) self.assertFalse(cache.exists('my_label')) cache.sink(['my_label'], is_capture=True) cache.write([TestStreamFileRecord()], 'my_label') self.assertTrue(cache.exists('my_label')) self.assertEqual(cache.capture_keys, set(['my_label'])) self.assertTrue(cache.clear('my_label')) self.assertFalse(cache.exists('my_label')) self.assertFalse(cache.capture_keys)
def test_capture_size_limit_not_reached_when_file_size_under_limit(self): ib.options.capture_size_limit = 100 cache = StreamingCache(cache_dir=None) # Build a sink object to track the label as a capture in the test. cache.sink(['my_label'], is_capture=True) cache.write([TestStreamFileRecord()], 'my_label') self.assertTrue(cache.exists('my_label')) ie.current_env().set_cache_manager(cache) self.assertFalse(ie.current_env().options.capture_control. is_capture_size_limit_reached())
def test_capture_size_limit_not_reached_when_file_size_under_limit(self): ib.options.capture_size_limit = 100 cache = StreamingCache(cache_dir=None) # Build a sink object to track the label as a capture in the test. cache.sink(['my_label'], is_capture=True) cache.write([beam_interactive_api_pb2.TestStreamFileRecord()], 'my_label') self.assertTrue(cache.exists('my_label')) ie.current_env().set_cache_manager(cache, 'dummy pipeline') limiter = capture_limiters.SizeLimiter(ib.options.capture_size_limit) self.assertFalse(limiter.is_triggered())
def test_capture_size_limit_reached_when_file_size_above_limit(self): ib.options.capture_size_limit = 1 cache = StreamingCache(cache_dir=None) cache.sink(['my_label'], is_capture=True) cache.write([ TestStreamFileRecord(recorded_event=TestStreamPayload.Event( element_event=TestStreamPayload.Event.AddElements(elements=[ TestStreamPayload.TimestampedElement( encoded_element=coders.FastPrimitivesCoder().encode( 'a'), timestamp=0) ]))) ], 'my_label') self.assertTrue(cache.exists('my_label')) ie.current_env().set_cache_manager(cache) self.assertTrue(ie.current_env().options.capture_control. is_capture_size_limit_reached())
def test_capture_size_limit_reached_when_file_size_above_limit(self): ib.options.capture_size_limit = 1 cache = StreamingCache(cache_dir=None) cache.sink(['my_label'], is_capture=True) cache.write([ TestStreamFileRecord(recorded_event=TestStreamPayload.Event( element_event=TestStreamPayload.Event.AddElements(elements=[ TestStreamPayload.TimestampedElement( encoded_element=coders.FastPrimitivesCoder().encode( 'a'), timestamp=0) ]))) ], 'my_label') self.assertTrue(cache.exists('my_label')) p = _build_an_empty_streaming_pipeline() ie.current_env().set_cache_manager(cache, p) limiter = capture_limiters.SizeLimiter(1) self.assertTrue(limiter.is_triggered())
def test_read_and_write_multiple_outputs(self): """An integration test between the Sink and Source with multiple outputs. This tests the funcionatlity that the StreamingCache reads from multiple files and combines them into a single sorted output. """ LETTERS_TAG = repr(CacheKey('letters', '', '', '')) NUMBERS_TAG = repr(CacheKey('numbers', '', '', '')) # Units here are in seconds. test_stream = (TestStream() .advance_watermark_to(0, tag=LETTERS_TAG) .advance_processing_time(5) .add_elements(['a', 'b', 'c'], tag=LETTERS_TAG) .advance_watermark_to(10, tag=NUMBERS_TAG) .advance_processing_time(1) .add_elements( [ TimestampedValue('1', 15), TimestampedValue('2', 15), TimestampedValue('3', 15) ], tag=NUMBERS_TAG)) # yapf: disable cache = StreamingCache(cache_dir=None, sample_resolution_sec=1.0) coder = SafeFastPrimitivesCoder() options = StandardOptions(streaming=True) with TestPipeline(options=options) as p: # pylint: disable=expression-not-assigned events = p | test_stream events[LETTERS_TAG] | 'Letters sink' >> cache.sink([LETTERS_TAG]) events[NUMBERS_TAG] | 'Numbers sink' >> cache.sink([NUMBERS_TAG]) reader = cache.read_multiple([[LETTERS_TAG], [NUMBERS_TAG]]) actual_events = list(reader) # Units here are in microseconds. expected_events = [ TestStreamPayload.Event( processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime( advance_duration=5 * 10**6)), TestStreamPayload.Event( watermark_event=TestStreamPayload.Event.AdvanceWatermark( new_watermark=0, tag=LETTERS_TAG)), TestStreamPayload.Event( element_event=TestStreamPayload.Event.AddElements( elements=[ TestStreamPayload.TimestampedElement( encoded_element=coder.encode('a'), timestamp=0), TestStreamPayload.TimestampedElement( encoded_element=coder.encode('b'), timestamp=0), TestStreamPayload.TimestampedElement( encoded_element=coder.encode('c'), timestamp=0), ], tag=LETTERS_TAG)), TestStreamPayload.Event( processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime( advance_duration=1 * 10**6)), TestStreamPayload.Event( watermark_event=TestStreamPayload.Event.AdvanceWatermark( new_watermark=10 * 10**6, tag=NUMBERS_TAG)), TestStreamPayload.Event( watermark_event=TestStreamPayload.Event.AdvanceWatermark( new_watermark=0, tag=LETTERS_TAG)), TestStreamPayload.Event( element_event=TestStreamPayload.Event.AddElements( elements=[ TestStreamPayload.TimestampedElement( encoded_element=coder.encode('1'), timestamp=15 * 10**6), TestStreamPayload.TimestampedElement( encoded_element=coder.encode('2'), timestamp=15 * 10**6), TestStreamPayload.TimestampedElement( encoded_element=coder.encode('3'), timestamp=15 * 10**6), ], tag=NUMBERS_TAG)), ] self.assertListEqual(actual_events, expected_events)
def test_read_and_write(self): """An integration test between the Sink and Source. This ensures that the sink and source speak the same language in terms of coders, protos, order, and units. """ CACHED_RECORDS = repr(CacheKey('records', '', '', '')) # Units here are in seconds. test_stream = ( TestStream(output_tags=(CACHED_RECORDS)) .advance_watermark_to(0, tag=CACHED_RECORDS) .advance_processing_time(5) .add_elements(['a', 'b', 'c'], tag=CACHED_RECORDS) .advance_watermark_to(10, tag=CACHED_RECORDS) .advance_processing_time(1) .add_elements( [ TimestampedValue('1', 15), TimestampedValue('2', 15), TimestampedValue('3', 15) ], tag=CACHED_RECORDS)) # yapf: disable coder = SafeFastPrimitivesCoder() cache = StreamingCache(cache_dir=None, sample_resolution_sec=1.0) # Assert that there are no capture keys at first. self.assertEqual(cache.capture_keys, set()) options = StandardOptions(streaming=True) with TestPipeline(options=options) as p: records = (p | test_stream)[CACHED_RECORDS] # pylint: disable=expression-not-assigned records | cache.sink([CACHED_RECORDS], is_capture=True) reader, _ = cache.read(CACHED_RECORDS) actual_events = list(reader) # Assert that the capture keys are forwarded correctly. self.assertEqual(cache.capture_keys, set([CACHED_RECORDS])) # Units here are in microseconds. expected_events = [ TestStreamPayload.Event( processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime( advance_duration=5 * 10**6)), TestStreamPayload.Event( watermark_event=TestStreamPayload.Event.AdvanceWatermark( new_watermark=0, tag=CACHED_RECORDS)), TestStreamPayload.Event( element_event=TestStreamPayload.Event.AddElements( elements=[ TestStreamPayload.TimestampedElement( encoded_element=coder.encode('a'), timestamp=0), TestStreamPayload.TimestampedElement( encoded_element=coder.encode('b'), timestamp=0), TestStreamPayload.TimestampedElement( encoded_element=coder.encode('c'), timestamp=0), ], tag=CACHED_RECORDS)), TestStreamPayload.Event( processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime( advance_duration=1 * 10**6)), TestStreamPayload.Event( watermark_event=TestStreamPayload.Event.AdvanceWatermark( new_watermark=10 * 10**6, tag=CACHED_RECORDS)), TestStreamPayload.Event( element_event=TestStreamPayload.Event.AddElements( elements=[ TestStreamPayload.TimestampedElement( encoded_element=coder.encode('1'), timestamp=15 * 10**6), TestStreamPayload.TimestampedElement( encoded_element=coder.encode('2'), timestamp=15 * 10**6), TestStreamPayload.TimestampedElement( encoded_element=coder.encode('3'), timestamp=15 * 10**6), ], tag=CACHED_RECORDS)), ] self.assertEqual(actual_events, expected_events)
def test_read_and_write(self): """An integration test between the Sink and Source. This ensures that the sink and source speak the same language in terms of coders, protos, order, and units. """ # Units here are in seconds. test_stream = (TestStream() .advance_watermark_to(0, tag='records') .advance_processing_time(5) .add_elements(['a', 'b', 'c'], tag='records') .advance_watermark_to(10, tag='records') .advance_processing_time(1) .add_elements( [ TimestampedValue('1', 15), TimestampedValue('2', 15), TimestampedValue('3', 15) ], tag='records')) # yapf: disable coder = SafeFastPrimitivesCoder() cache = StreamingCache(cache_dir=None, sample_resolution_sec=1.0) options = StandardOptions(streaming=True) options.view_as(DebugOptions).add_experiment( 'passthrough_pcollection_output_ids') with TestPipeline(options=options) as p: # pylint: disable=expression-not-assigned p | test_stream | cache.sink(['records']) reader, _ = cache.read('records') actual_events = list(reader) # Units here are in microseconds. expected_events = [ TestStreamPayload.Event(processing_time_event=TestStreamPayload. Event.AdvanceProcessingTime( advance_duration=5 * 10**6)), TestStreamPayload.Event( watermark_event=TestStreamPayload.Event.AdvanceWatermark( new_watermark=0, tag='records')), TestStreamPayload.Event( element_event=TestStreamPayload.Event.AddElements( elements=[ TestStreamPayload.TimestampedElement( encoded_element=coder.encode('a'), timestamp=0), TestStreamPayload.TimestampedElement( encoded_element=coder.encode('b'), timestamp=0), TestStreamPayload.TimestampedElement( encoded_element=coder.encode('c'), timestamp=0), ], tag='records')), TestStreamPayload.Event(processing_time_event=TestStreamPayload. Event.AdvanceProcessingTime( advance_duration=1 * 10**6)), TestStreamPayload.Event( watermark_event=TestStreamPayload.Event.AdvanceWatermark( new_watermark=10 * 10**6, tag='records')), TestStreamPayload.Event(element_event=TestStreamPayload.Event. AddElements(elements=[ TestStreamPayload.TimestampedElement( encoded_element=coder.encode('1'), timestamp=15 * 10**6), TestStreamPayload.TimestampedElement( encoded_element=coder.encode('2'), timestamp=15 * 10**6), TestStreamPayload.TimestampedElement( encoded_element=coder.encode('3'), timestamp=15 * 10**6), ], tag='records')), ] self.assertEqual(actual_events, expected_events)