def test_describe(self): p = beam.Pipeline(InteractiveRunner()) numbers = p | 'numbers' >> beam.Create([0, 1, 2]) letters = p | 'letters' >> beam.Create(['a', 'b', 'c']) ib.watch(locals()) # Create a MockPipelineResult to control the state of a fake run of the # pipeline. mock_result = MockPipelineResult() ie.current_env().track_user_pipelines() ie.current_env().set_pipeline_result(p, mock_result) cache_manager = InMemoryCache() ie.current_env().set_cache_manager(cache_manager, p) # Create a recording with an arbitrary start time. start_time = 100 recording = Recording(p, [numbers, letters], mock_result, pi.PipelineInstrument(p), max_n=10, max_duration_secs=60, start_time_for_test=start_time) # Get the cache key of the stream and write something to cache. This is # so that a pipeline doesn't have to run in the test. numbers_stream = recording.stream(numbers) cache_manager.write([0, 1, 2], 'full', numbers_stream.cache_key) cache_manager.save_pcoder(None, 'full', numbers_stream.cache_key) letters_stream = recording.stream(letters) cache_manager.write(['a', 'b', 'c'], 'full', letters_stream.cache_key) cache_manager.save_pcoder(None, 'full', letters_stream.cache_key) # Get the description. description = recording.describe() size = description['size'] start = description['start'] self.assertEqual( size, cache_manager.size('full', numbers_stream.cache_key) + cache_manager.size('full', letters_stream.cache_key)) self.assertEqual(start, start_time)
class ElementStreamTest(unittest.TestCase): def setUp(self): self.cache = InMemoryCache() self.p = beam.Pipeline() self.pcoll = self.p | beam.Create([]) self.cache_key = str(CacheKey('pcoll', '', '', '')) # Create a MockPipelineResult to control the state of a fake run of the # pipeline. self.mock_result = MockPipelineResult() ie.current_env().add_user_pipeline(self.p) ie.current_env().set_pipeline_result(self.p, self.mock_result) ie.current_env().set_cache_manager(self.cache, self.p) def test_read(self): """Test reading and if a stream is done no more elements are returned.""" self.mock_result.set_state(PipelineState.DONE) self.cache.write(['expected'], 'full', self.cache_key) self.cache.save_pcoder(None, 'full', self.cache_key) stream = ElementStream(self.pcoll, '', self.cache_key, max_n=1, max_duration_secs=1) self.assertFalse(stream.is_done()) self.assertEqual(list(stream.read())[0], 'expected') self.assertTrue(stream.is_done()) def test_done_if_terminated(self): """Test that terminating the job sets the stream as done.""" self.cache.write(['expected'], 'full', self.cache_key) self.cache.save_pcoder(None, 'full', self.cache_key) stream = ElementStream(self.pcoll, '', self.cache_key, max_n=100, max_duration_secs=10) self.assertFalse(stream.is_done()) self.assertEqual(list(stream.read(tail=False))[0], 'expected') # The limiters were not reached, so the stream is not done yet. self.assertFalse(stream.is_done()) self.mock_result.set_state(PipelineState.DONE) self.assertEqual(list(stream.read(tail=False))[0], 'expected') # The underlying pipeline is terminated, so the stream won't yield new # elements. self.assertTrue(stream.is_done()) def test_read_n(self): """Test that the stream only reads 'n' elements.""" self.mock_result.set_state(PipelineState.DONE) self.cache.write(list(range(5)), 'full', self.cache_key) self.cache.save_pcoder(None, 'full', self.cache_key) stream = ElementStream(self.pcoll, '', self.cache_key, max_n=1, max_duration_secs=1) self.assertEqual(list(stream.read()), [0]) self.assertTrue(stream.is_done()) stream = ElementStream(self.pcoll, '', self.cache_key, max_n=2, max_duration_secs=1) self.assertEqual(list(stream.read()), [0, 1]) self.assertTrue(stream.is_done()) stream = ElementStream(self.pcoll, '', self.cache_key, max_n=5, max_duration_secs=1) self.assertEqual(list(stream.read()), list(range(5))) self.assertTrue(stream.is_done()) # Test that if the user asks for more than in the cache it still returns. stream = ElementStream(self.pcoll, '', self.cache_key, max_n=10, max_duration_secs=1) self.assertEqual(list(stream.read()), list(range(5))) self.assertTrue(stream.is_done()) def test_read_duration(self): """Test that the stream only reads a 'duration' of elements.""" def as_windowed_value(element): return WindowedValueHolder(WindowedValue(element, 0, [])) values = (FileRecordsBuilder(tag=self.cache_key) .advance_processing_time(1) .add_element(element=as_windowed_value(0), event_time_secs=0) .advance_processing_time(1) .add_element(element=as_windowed_value(1), event_time_secs=1) .advance_processing_time(1) .add_element(element=as_windowed_value(2), event_time_secs=3) .advance_processing_time(1) .add_element(element=as_windowed_value(3), event_time_secs=4) .advance_processing_time(1) .add_element(element=as_windowed_value(4), event_time_secs=5) .build()) # yapf: disable values = [ v.recorded_event for v in values if isinstance(v, beam_interactive_api_pb2.TestStreamFileRecord) ] self.mock_result.set_state(PipelineState.DONE) self.cache.write(values, 'full', self.cache_key) self.cache.save_pcoder(coders.FastPrimitivesCoder(), 'full', self.cache_key) # The following tests a progression of reading different durations from the # cache. stream = ElementStream(self.pcoll, '', self.cache_key, max_n=100, max_duration_secs=1) self.assertSequenceEqual([e.value for e in stream.read()], [0]) stream = ElementStream(self.pcoll, '', self.cache_key, max_n=100, max_duration_secs=2) self.assertSequenceEqual([e.value for e in stream.read()], [0, 1]) stream = ElementStream(self.pcoll, '', self.cache_key, max_n=100, max_duration_secs=10) self.assertSequenceEqual([e.value for e in stream.read()], [0, 1, 2, 3, 4])
class ElementStreamTest(unittest.TestCase): def setUp(self): ie.new_env() self.cache = InMemoryCache() self.p = beam.Pipeline() self.pcoll = self.p | beam.Create([]) self.cache_key = str(pi.CacheKey('pcoll', '', '', '')) # Create a MockPipelineResult to control the state of a fake run of the # pipeline. self.mock_result = MockPipelineResult() ie.current_env().track_user_pipelines() ie.current_env().set_pipeline_result(self.p, self.mock_result) ie.current_env().set_cache_manager(self.cache, self.p) def test_read(self): """Test reading and if a stream is done no more elements are returned.""" self.mock_result.set_state(PipelineState.DONE) self.cache.write(['expected'], 'full', self.cache_key) self.cache.save_pcoder(None, 'full', self.cache_key) stream = ElementStream(self.pcoll, '', self.cache_key, max_n=1, max_duration_secs=1) self.assertFalse(stream.is_done()) self.assertEqual(list(stream.read())[0], 'expected') self.assertTrue(stream.is_done()) def test_done_if_terminated(self): """Test that terminating the job sets the stream as done.""" self.cache.write(['expected'], 'full', self.cache_key) self.cache.save_pcoder(None, 'full', self.cache_key) stream = ElementStream(self.pcoll, '', self.cache_key, max_n=100, max_duration_secs=10) self.assertFalse(stream.is_done()) self.assertEqual(list(stream.read(tail=False))[0], 'expected') # The limiters were not reached, so the stream is not done yet. self.assertFalse(stream.is_done()) self.mock_result.set_state(PipelineState.DONE) self.assertEqual(list(stream.read(tail=False))[0], 'expected') # The underlying pipeline is terminated, so the stream won't yield new # elements. self.assertTrue(stream.is_done()) def test_read_n(self): """Test that the stream only reads 'n' elements.""" self.mock_result.set_state(PipelineState.DONE) self.cache.write(list(range(5)), 'full', self.cache_key) self.cache.save_pcoder(None, 'full', self.cache_key) stream = ElementStream(self.pcoll, '', self.cache_key, max_n=1, max_duration_secs=1) self.assertEqual(list(stream.read()), [0]) self.assertTrue(stream.is_done()) stream = ElementStream(self.pcoll, '', self.cache_key, max_n=2, max_duration_secs=1) self.assertEqual(list(stream.read()), [0, 1]) self.assertTrue(stream.is_done()) stream = ElementStream(self.pcoll, '', self.cache_key, max_n=5, max_duration_secs=1) self.assertEqual(list(stream.read()), list(range(5))) self.assertTrue(stream.is_done()) # Test that if the user asks for more than in the cache it still returns. stream = ElementStream(self.pcoll, '', self.cache_key, max_n=10, max_duration_secs=1) self.assertEqual(list(stream.read()), list(range(5))) self.assertTrue(stream.is_done()) def test_read_duration(self): """Test that the stream only reads a 'duration' of elements.""" values = (FileRecordsBuilder(tag=self.cache_key) .advance_processing_time(1) .add_element(element=0, event_time_secs=0) .advance_processing_time(1) .add_element(element=1, event_time_secs=1) .advance_processing_time(1) .add_element(element=2, event_time_secs=3) .advance_processing_time(1) .add_element(element=3, event_time_secs=4) .advance_processing_time(1) .add_element(element=4, event_time_secs=5) .build()) # yapf: disable self.mock_result.set_state(PipelineState.DONE) self.cache.write(values, 'full', self.cache_key) self.cache.save_pcoder(None, 'full', self.cache_key) # The elements read from the cache are TestStreamFileRecord instances and # have the underlying elements encoded. This method decodes the elements # from the TestStreamFileRecord. def get_elements(events): coder = coders.FastPrimitivesCoder() elements = [] for e in events: if not isinstance(e, TestStreamFileRecord): continue if e.recorded_event.element_event: elements += ([ coder.decode(el.encoded_element) for el in e.recorded_event.element_event.elements ]) return elements # The following tests a progression of reading different durations from the # cache. stream = ElementStream(self.pcoll, '', self.cache_key, max_n=100, max_duration_secs=1) self.assertSequenceEqual(get_elements(stream.read()), [0]) stream = ElementStream(self.pcoll, '', self.cache_key, max_n=100, max_duration_secs=2) self.assertSequenceEqual(get_elements(stream.read()), [0, 1]) stream = ElementStream(self.pcoll, '', self.cache_key, max_n=100, max_duration_secs=10) self.assertSequenceEqual(get_elements(stream.read()), [0, 1, 2, 3, 4])