Python BackgroundCachingJob Examples, apache_beam.runners.interactive.background_caching_job.BackgroundCachingJob Python Examples

Example #1

0

Show file

    def test_capture_control_evict_captured_data(
            self, mocked_test_stream_service_stop,
            mocked_background_caching_job_cancel):
        p = _build_an_empty_streaming_pipeline()
        ie.current_env().track_user_pipelines()
        self.assertFalse(ie.current_env().tracked_user_pipelines == set())

        background_caching_job = bcj.BackgroundCachingJob(
            runner.PipelineResult(runner.PipelineState.RUNNING), limiters=[])
        ie.current_env().set_background_caching_job(p, background_caching_job)

        _fake_a_running_test_stream_service(p)
        # Fake the canceling state of the main job.
        background_caching_job._pipeline_result = runner.PipelineResult(
            runner.PipelineState.CANCELLING)
        self.assertIsNotNone(
            ie.current_env().get_test_stream_service_controller(p))
        ie.current_env().set_cached_source_signature(p, 'a signature')
        ie.current_env().mark_pcollection_computed(['fake_pcoll'])
        capture_control.evict_captured_data()
        mocked_background_caching_job_cancel.assert_called()
        mocked_test_stream_service_stop.assert_called_once()
        # Neither timer nor capture size limit is reached, thus, the cancelling
        # main job's background caching job is not considered as done.
        self.assertFalse(background_caching_job.is_done())
        self.assertIsNone(
            ie.current_env().get_test_stream_service_controller(p))
        self.assertTrue(ie.current_env().computed_pcollections == set())
        self.assertTrue(
            ie.current_env().get_cached_source_signature(p) == set())

Example #2

0

Show file

def _fake_a_running_background_caching_job(pipeline):
    background_caching_job = bcj.BackgroundCachingJob(
        runner.PipelineResult(runner.PipelineState.RUNNING),
        # Do not start multithreaded checkers in tests.
        start_limit_checkers=False)
    ie.current_env().set_background_caching_job(pipeline,
                                                background_caching_job)
    return background_caching_job

Example #3

0

Show file

File: background_caching_job_test.py Project: aryansingh12/apache-beam

 def test_background_caching_job_not_start_when_such_job_is_done(self):
     p = _build_a_test_stream_pipeline()
     a_done_background_caching_job = bcj.BackgroundCachingJob(
         runner.PipelineResult(runner.PipelineState.DONE))
     ie.current_env().set_background_caching_job(
         p, a_done_background_caching_job)
     main_job_result = p.run()
     # No background caching job is started so result is still the running one.
     self.assertIs(a_done_background_caching_job,
                   ie.current_env().get_background_caching_job(p))
     # A new main job is started so result of the main job is set.
     self.assertIs(main_job_result, ie.current_env().pipeline_result(p))

Example #4

0

Show file

File: background_caching_job_test.py Project: ramseskamanda/beam

 def test_background_caching_job_not_start_when_such_job_exists(self):
     p = _build_a_test_stream_pipeline()
     _setup_test_streaming_cache(p)
     a_running_background_caching_job = bcj.BackgroundCachingJob(
         runner.PipelineResult(runner.PipelineState.RUNNING), limiters=[])
     ie.current_env().set_background_caching_job(
         p, a_running_background_caching_job)
     main_job_result = p.run()
     # No background caching job is started so result is still the running one.
     self.assertIs(a_running_background_caching_job,
                   ie.current_env().get_background_caching_job(p))
     # A new main job is started so result of the main job is set.
     self.assertIs(main_job_result, ie.current_env().pipeline_result(p))

Example #5

0

Show file

    def test_timer_terminates_capture_size_checker(self):
        p = _build_an_empty_streaming_pipeline()

        class FakeLimiter(capture_limiters.Limiter):
            def __init__(self):
                self.trigger = False

            def is_triggered(self):
                return self.trigger

        limiter = FakeLimiter()
        background_caching_job = bcj.BackgroundCachingJob(
            runner.PipelineResult(runner.PipelineState.CANCELLING),
            limiters=[limiter])
        ie.current_env().set_background_caching_job(p, background_caching_job)

        self.assertFalse(background_caching_job.is_done())

        limiter.trigger = True
        self.assertTrue(background_caching_job.is_done())

Example #6

0

Show file

    def test_computed(self):
        """Tests that a PCollection is marked as computed only in a complete state.

    Because the background caching job is now long-lived, repeated runs of a
    PipelineFragment may yield different results for the same PCollection.
    """

        p = beam.Pipeline(InteractiveRunner())
        elems = p | beam.Create([0, 1, 2])

        ib.watch(locals())

        # Create a MockPipelineResult to control the state of a fake run of the
        # pipeline.
        mock_result = MockPipelineResult()
        ie.current_env().track_user_pipelines()
        ie.current_env().set_pipeline_result(p, mock_result)

        # Create a mock BackgroundCachingJob that will control whether to set the
        # PCollections as computed or not.
        bcj_mock_result = MockPipelineResult()
        background_caching_job = bcj.BackgroundCachingJob(bcj_mock_result, [])

        # Create a recording.
        recording = Recording(p, [elems],
                              mock_result,
                              max_n=10,
                              max_duration_secs=60)

        # The background caching job and the recording isn't done yet so there may
        # be more elements to be recorded.
        self.assertFalse(recording.is_computed())
        self.assertFalse(recording.computed())
        self.assertTrue(recording.uncomputed())

        # The recording is finished but the background caching job is not. There
        # may still be more elements to record, or the intermediate PCollection may
        # have stopped caching in an incomplete state, e.g. before a window could
        # fire.
        mock_result.set_state(PipelineState.DONE)
        recording.wait_until_finish()

        self.assertFalse(recording.is_computed())
        self.assertFalse(recording.computed())
        self.assertTrue(recording.uncomputed())

        # The background caching job finished before we started a recording which
        # is a sure signal that there will be no more elements.
        bcj_mock_result.set_state(PipelineState.DONE)
        ie.current_env().set_background_caching_job(p, background_caching_job)
        recording = Recording(p, [elems],
                              mock_result,
                              max_n=10,
                              max_duration_secs=60)
        recording.wait_until_finish()

        # There are no more elements and the recording finished, meaning that the
        # intermediate PCollections are in a complete state. They can now be marked
        # as computed.
        self.assertTrue(recording.is_computed())
        self.assertTrue(recording.computed())
        self.assertFalse(recording.uncomputed())