def test_capture_control_evict_captured_data( self, mocked_test_stream_service_stop, mocked_background_caching_job_cancel): p = _build_an_empty_streaming_pipeline() ie.current_env().track_user_pipelines() self.assertFalse(ie.current_env().tracked_user_pipelines == set()) background_caching_job = bcj.BackgroundCachingJob( runner.PipelineResult(runner.PipelineState.RUNNING), limiters=[]) ie.current_env().set_background_caching_job(p, background_caching_job) _fake_a_running_test_stream_service(p) # Fake the canceling state of the main job. background_caching_job._pipeline_result = runner.PipelineResult( runner.PipelineState.CANCELLING) self.assertIsNotNone( ie.current_env().get_test_stream_service_controller(p)) ie.current_env().set_cached_source_signature(p, 'a signature') ie.current_env().mark_pcollection_computed(['fake_pcoll']) capture_control.evict_captured_data() mocked_background_caching_job_cancel.assert_called() mocked_test_stream_service_stop.assert_called_once() # Neither timer nor capture size limit is reached, thus, the cancelling # main job's background caching job is not considered as done. self.assertFalse(background_caching_job.is_done()) self.assertIsNone( ie.current_env().get_test_stream_service_controller(p)) self.assertTrue(ie.current_env().computed_pcollections == set()) self.assertTrue( ie.current_env().get_cached_source_signature(p) == set())
def test_track_user_pipeline_cleanup_non_inspectable_pipeline(self): dummy_pipeline_1 = beam.Pipeline() dummy_pipeline_2 = beam.Pipeline() dummy_pipeline_3 = beam.Pipeline() dummy_pipeline_4 = beam.Pipeline() dummy_pcoll = dummy_pipeline_4 | beam.Create([1]) dummy_pipeline_5 = beam.Pipeline() dummy_non_inspectable_pipeline = 'dummy' ie.current_env().watch(locals()) from apache_beam.runners.interactive.background_caching_job import BackgroundCachingJob ie.current_env().set_background_caching_job( dummy_pipeline_1, BackgroundCachingJob(runner.PipelineResult( runner.PipelineState.DONE), limiters=[])) ie.current_env().set_test_stream_service_controller( dummy_pipeline_2, None) ie.current_env().set_cache_manager(cache.FileBasedCacheManager(), dummy_pipeline_3) ie.current_env().mark_pcollection_computed([dummy_pcoll]) ie.current_env().set_cached_source_signature( dummy_non_inspectable_pipeline, None) ie.current_env().set_pipeline_result( dummy_pipeline_5, runner.PipelineResult(runner.PipelineState.RUNNING)) with patch('apache_beam.runners.interactive.interactive_environment' '.InteractiveEnvironment.cleanup') as mocked_cleanup: ie.current_env().track_user_pipelines() mocked_cleanup.assert_called_once()
def test_auto_stop_dynamic_plotting_when_job_is_terminated(self): fake_pipeline_result = runner.PipelineResult( runner.PipelineState.RUNNING) ie.current_env().set_pipeline_result(self._p, fake_pipeline_result) # When job is running, the dynamic plotting will not be stopped. self.assertFalse(ie.current_env().is_terminated(self._p)) fake_pipeline_result = runner.PipelineResult(runner.PipelineState.DONE) ie.current_env().set_pipeline_result(self._p, fake_pipeline_result) # When job is done, the dynamic plotting will be stopped. self.assertTrue(ie.current_env().is_terminated(self._p))
def test_auto_stop_dynamic_plotting_when_job_is_terminated( self, mocked_timeloop): fake_pipeline_result = runner.PipelineResult( runner.PipelineState.RUNNING) ie.current_env().set_pipeline_result(self._p, fake_pipeline_result) # Starts non-stopping async dynamic plotting until the job is terminated. pv.visualize(self._pcoll, dynamic_plotting_interval=0.001) # Blocking so the above async task can execute some iterations. time.sleep(1) mocked_timeloop.assert_not_called() fake_pipeline_result = runner.PipelineResult(runner.PipelineState.DONE) ie.current_env().set_pipeline_result(self._p, fake_pipeline_result) # Blocking so the above async task can execute some iterations. time.sleep(1) # "assert_called" is new in Python 3.6. mocked_timeloop.assert_called()
def _fake_a_running_background_caching_job(pipeline): background_caching_job = bcj.BackgroundCachingJob( runner.PipelineResult(runner.PipelineState.RUNNING), # Do not start multithreaded checkers in tests. start_limit_checkers=False) ie.current_env().set_background_caching_job(pipeline, background_caching_job) return background_caching_job
def test_evict_pipeline_result(self): pipeline_result = runner.PipelineResult(runner.PipelineState.DONE) ie.current_env().set_pipeline_result(self._p, pipeline_result, is_main_job=True) self.assertIs(ie.current_env().evict_pipeline_result(self._p), pipeline_result) self.assertIs(ie.current_env().pipeline_result(self._p), None)
def test_determine_terminal_state(self): for state in (runner.PipelineState.DONE, runner.PipelineState.FAILED, runner.PipelineState.CANCELLED, runner.PipelineState.UPDATED, runner.PipelineState.DRAINED): ie.current_env().set_pipeline_result(self._p, runner.PipelineResult(state)) self.assertTrue(ie.current_env().is_terminated(self._p)) for state in (runner.PipelineState.UNKNOWN, runner.PipelineState.STARTING, runner.PipelineState.STOPPED, runner.PipelineState.RUNNING, runner.PipelineState.DRAINING, runner.PipelineState.PENDING, runner.PipelineState.CANCELLING, runner.PipelineState.UNRECOGNIZED): ie.current_env().set_pipeline_result(self._p, runner.PipelineResult(state)) self.assertFalse(ie.current_env().is_terminated(self._p))
def test_fail_to_set_pipeline_result_key_not_pipeline(self): class NotPipeline(object): pass with self.assertRaises(AssertionError) as ctx: ie.current_env().set_pipeline_result( NotPipeline(), runner.PipelineResult(runner.PipelineState.RUNNING)) self.assertTrue( 'pipeline must be an instance of apache_beam.Pipeline ' 'or its subclass' in ctx.exception)
def test_background_caching_job_not_start_when_such_job_is_done(self): p = _build_a_test_stream_pipeline() a_done_result = runner.PipelineResult(runner.PipelineState.DONE) ie.current_env().set_pipeline_result(p, a_done_result, is_main_job=False) main_job_result = p.run() # No background caching job is started so result is still the running one. self.assertIs(a_done_result, ie.current_env().pipeline_result(p, is_main_job=False)) # A new main job is started so result of the main job is set. self.assertIs(main_job_result, ie.current_env().pipeline_result(p))
def test_background_caching_job_not_start_when_such_job_exists(self): p = _build_a_test_stream_pipeline() a_running_background_caching_job = bcj.BackgroundCachingJob( runner.PipelineResult(runner.PipelineState.RUNNING)) ie.current_env().set_background_caching_job( p, a_running_background_caching_job) main_job_result = p.run() # No background caching job is started so result is still the running one. self.assertIs(a_running_background_caching_job, ie.current_env().get_background_caching_job(p)) # A new main job is started so result of the main job is set. self.assertIs(main_job_result, ie.current_env().pipeline_result(p))
def test_background_caching_job_not_start_when_such_job_is_done(self): p = _build_a_test_stream_pipeline() _setup_test_streaming_cache(p) a_done_background_caching_job = bcj.BackgroundCachingJob( runner.PipelineResult(runner.PipelineState.DONE), limiters=[]) ie.current_env().set_background_caching_job( p, a_done_background_caching_job) main_job_result = p.run() # No background caching job is started so result is still the running one. self.assertIs(a_done_background_caching_job, ie.current_env().get_background_caching_job(p)) # A new main job is started so result of the main job is set. self.assertIs(main_job_result, ie.current_env().pipeline_result(p))
def test_dynamic_plotting_update_same_display(self, mocked_display_facets): fake_pipeline_result = runner.PipelineResult( runner.PipelineState.RUNNING) ie.current_env().set_pipeline_result(self._p, fake_pipeline_result) # Starts async dynamic plotting that never ends in this test. h = pv.visualize(self._pcoll, dynamic_plotting_interval=0.001) # Blocking so the above async task can execute some iterations. time.sleep(1) # The first iteration doesn't provide updating_pv to display_facets. _, first_kwargs = mocked_display_facets.call_args_list[0] self.assertEqual(first_kwargs, {}) # The following iterations use the same updating_pv to display_facets and so # on. _, second_kwargs = mocked_display_facets.call_args_list[1] updating_pv = second_kwargs['updating_pv'] for call in mocked_display_facets.call_args_list[2:]: _, kwargs = call self.assertIs(kwargs['updating_pv'], updating_pv) h.stop()
def test_timer_terminates_capture_size_checker(self): p = _build_an_empty_streaming_pipeline() class FakeLimiter(capture_limiters.Limiter): def __init__(self): self.trigger = False def is_triggered(self): return self.trigger limiter = FakeLimiter() background_caching_job = bcj.BackgroundCachingJob( runner.PipelineResult(runner.PipelineState.CANCELLING), limiters=[limiter]) ie.current_env().set_background_caching_job(p, background_caching_job) self.assertFalse(background_caching_job.is_done()) limiter.trigger = True self.assertTrue(background_caching_job.is_done())