Exemple #1
0
    def test_should_run_beam_job(
        self,
        state_mock,
    ):
        # given
        state_mock.return_value = "DONE"

        driver = CountWordsDriver()
        job = BeamJob(
            id='count_words',
            entry_point=driver.run,
            entry_point_arguments={
                'words_to_filter': ['valid', 'word'],
                'words_to_count': ['trash', 'valid', 'word', 'valid']
            },
            test_pipeline=self._test_pipeline_with_label('count_words'))

        # when
        job.execute(JobContext.make())

        # then executes the job with the arguments
        self.assertEqual(driver.counter, {'valid': 2, 'word': 1})

        # and passes the context
        self.assertIsNotNone(driver.context)
        self.assertTrue(isinstance(driver.context, JobContext))

        # and labels the job
        self.assertEqual(driver.pipeline._options.get_all_options()['labels'],
                         ['workflow_id=count_words'])

        # and sets default value for execution_timeout_sec
        self.assertEqual(job.execution_timeout_sec,
                         DEFAULT_EXECUTION_TIMEOUT_IN_SECONDS)
Exemple #2
0
    def test_add_docker_to_pipelineoptions(
        self,
        get_project_spec_mock: mock.Mock,
        _create_pipeline_mock: mock.Mock,
    ):
        # given
        _create_pipeline_mock.return_value.run.return_value = RunnerResult(
            'DONE', None)
        get_project_spec_mock.return_value.version = "1.2.3"
        get_project_spec_mock.return_value.docker_repository = "my_repo"

        driver = CountWordsDriver()
        job = BeamJob(
            id='count_words',
            entry_point=driver.nope,
            pipeline_options={},
            use_docker_image=True,
        )

        # when
        job.execute(JobContext.make())

        # then
        options2 = _create_pipeline_mock.call_args[1][
            'options'].get_all_options(drop_default=True)

        self.assertIn('job_name', options2)
        self.assertEqual('my_repo:1.2.3',
                         options2['worker_harness_container_image'])
        self.assertIn('use_runner_v2', options2['experiments'])
Exemple #3
0
    def test_add_setuppy_onle_for_dataflow_runner(
        self,
        _create_pipeline_mock: mock.Mock,
    ):
        # given
        _create_pipeline_mock.return_value.run.return_value = RunnerResult(
            'DONE', None)

        driver = CountWordsDriver()
        options = {
            'job_name': "custom-my-job",
            'runner': 'DirectRunner',
        }

        job = BeamJob(
            id='count_words',
            entry_point=driver.nope,
            pipeline_options=options,
        )

        # when
        job.execute(JobContext.make())

        # then
        options2 = _create_pipeline_mock.call_args[1][
            'options'].get_all_options(drop_default=True)

        self.assertNotIn('setup_file', options2)
Exemple #4
0
    def test_should_run_beam_job_with_timeout_without_cancel(
        self,
        wait_until_finish_mock,
        cancel_mock,
        state_mock,
    ):
        # given
        wait_until_finish_mock.return_value = 'DONE'
        state_mock.return_value = 'DONE'

        driver = CountWordsDriver()
        job = BeamJob(
            id='count_words',
            entry_point=driver.nope,
            test_pipeline=self._test_pipeline_with_label('count_words'),
            execution_timeout_sec=600)

        # when
        job.execute(JobContext.make())

        # then
        self.assertEqual(cancel_mock.call_count, 0)
        wait_until_finish_mock.assert_called_with(
            (600 - DEFAULT_PIPELINE_LEVEL_EXECUTION_TIMEOUT_SHIFT_IN_SECONDS) *
            1000)
Exemple #5
0
    def test_create_pipeline_dict_options(
        self,
        _create_pipeline_mock: mock.Mock,
    ):
        # given
        _create_pipeline_mock.return_value.run.return_value = RunnerResult(
            'DONE', None)

        driver = CountWordsDriver()
        options = {
            'job_name': "custom-my-job",
            'labels': ["lable1", "label2"],
            'streaming': True,
            'project': "my-gcp-project",
            'machine_type': "n2-standard-16",
        }

        job = BeamJob(
            id='count_words',
            entry_point=driver.nope,
            pipeline_options=options,
            execution_timeout_sec=10,
        )

        # when
        job.execute(JobContext.make())

        # then
        options2 = _create_pipeline_mock.call_args[1][
            'options'].get_all_options(drop_default=True)

        self.assertTrue(options2.pop('setup_file'))
        self.assertEqual(options2.pop('runner'), 'DataflowRunner')
        self.assertDictEqual(options, options2)
Exemple #6
0
    def test_should_run_new_entry_point(
        self,
        state_mock,
    ):
        # given
        state_mock.return_value = "DONE"

        driver = CountWordsDriver()
        job = BeamJob(
            id='count_words',
            entry_point=driver.run_ex,
            entry_point_args=(['trash', 'valid', 'word', 'valid'], ),
            entry_point_kwargs={
                'words_to_filter': ['valid', 'word'],
            },
            test_pipeline=self._test_pipeline_with_label('count_words'))

        # when
        job.execute(JobContext.make())

        # then executes the job with the arguments
        self.assertEqual(driver.counter, {'valid': 2, 'word': 1})

        # and passes the context
        self.assertIsNotNone(driver.context)
        self.assertTrue(isinstance(driver.context, JobContext))

        # and labels the job
        self.assertEqual(driver.pipeline._options.get_all_options()['labels'],
                         ['workflow_id=count_words'])
Exemple #7
0
    def test_should_run_beam_job_with_timeout_without_cancel(
            self, wait_until_finish_mock, cancel_mock,
            is_in_terminal_state_mock):
        # given
        wait_until_finish_mock.return_value = 'DONE'
        is_in_terminal_state_mock.return_value = True
        driver = CountWordsDriver(CounterStater(CounterState()))
        job = BeamJob(
            id='count_words',
            entry_point=driver.run,
            entry_point_arguments={
                'words_to_filter': ['valid', 'word'],
                'words_to_count': ['trash', 'valid', 'word', 'valid']
            },
            test_pipeline=self._test_pipeline_with_label('count_words'),
            execution_timeout_sec=600)

        count_words = Workflow(workflow_id='count_words', definition=[job])

        # when
        count_words.run('2020-01-01')

        # then
        self.assertEqual(cancel_mock.call_count, 0)
        wait_until_finish_mock.assert_called_with(
            (600 - DEFAULT_PIPELINE_LEVEL_EXECUTION_TIMEOUT_SHIFT_IN_SECONDS) *
            1000)
Exemple #8
0
 def test_should_throw_if_pipeline_options_and_pipeline_both_not_provided(
     self, ):
     with self.assertRaises(ValueError):
         driver = CountWordsDriver()
         BeamJob(id='count_words',
                 entry_point=driver.nope,
                 execution_timeout_sec=1)
Exemple #9
0
    def test_should_run_old_entry_point_withoutargs(
        self,
        state_mock,
    ):
        # given
        state_mock.return_value = "DONE"

        driver = CountWordsDriver()
        job = BeamJob(
            id='count_words',
            entry_point=driver.nope_driverargs,
            test_pipeline=self._test_pipeline_with_label('count_words'))

        # when
        job.execute(JobContext.make())

        # then
        self.assertTrue(driver.context, "Driver was called")
Exemple #10
0
    def test_should_create_pipeline_from_pipeline_options(
        self,
        _create_pipeline_mock: mock.Mock,
    ):
        # given
        _create_pipeline_mock.return_value.run.return_value = RunnerResult(
            'DONE', None)

        driver = CountWordsDriver()

        options = PipelineOptions()
        options.view_as(StandardOptions).runner = 'DataflowRunner'
        options.view_as(GoogleCloudOptions).project = 'gcp_project_id'
        options.view_as(GoogleCloudOptions).job_name = 'beam-wordcount-uuid'
        options.view_as(
            GoogleCloudOptions).staging_location = "gs://staging_location"
        options.view_as(
            GoogleCloudOptions).temp_location = "gs://temp_location"
        options.view_as(GoogleCloudOptions).region = 'region'
        options.view_as(
            GoogleCloudOptions).service_account_email = 'service-account'
        options.view_as(WorkerOptions).machine_type = 'n2-standard-8'
        options.view_as(WorkerOptions).max_num_workers = 2
        options.view_as(
            WorkerOptions).autoscaling_algorithm = 'THROUGHPUT_BASED'
        options.view_as(SetupOptions).setup_file = "/path/to/setup.py"

        job = BeamJob(
            id='count_words',
            entry_point=driver.nope,
            pipeline_options=options,
            execution_timeout_sec=10,
        )

        # when
        job.execute(JobContext.make())

        # then
        options.get_all_options()
        self.assertDictEqual(
            options.get_all_options(),
            _create_pipeline_mock.call_args[1]['options'].get_all_options(),
        )
Exemple #11
0
 def test_should_throw_if_pipeline_options_and_pipeline_both_not_provided(
         self):
     with self.assertRaises(ValueError):
         driver = CountWordsDriver(CounterStater(CounterState()))
         BeamJob(id='count_words',
                 entry_point=driver.run,
                 entry_point_arguments={
                     'words_to_filter': ['valid', 'word'],
                     'words_to_count': ['trash', 'valid', 'word', 'valid']
                 },
                 execution_timeout_sec=1)
Exemple #12
0
 def test_should_throw_if_wait_until_finish_set_to_false_and_execution_timeout_passed(
     self, ):
     # given
     with self.assertRaises(ValueError):
         driver = CountWordsDriver()
         job = BeamJob(
             id='count_words',
             entry_point=driver.nope,
             test_pipeline=self._test_pipeline_with_label('count_words'),
             execution_timeout_sec=1,
             wait_until_finish=False)
Exemple #13
0
 def test_should_throw_if_wait_until_finish_set_to_false_and_execution_timeout_passed(
         self):
     # given
     with self.assertRaises(ValueError):
         driver = CountWordsDriver(CounterStater(CounterState()))
         job = BeamJob(
             id='count_words',
             entry_point=driver.run,
             entry_point_arguments={
                 'words_to_filter': ['valid', 'word'],
                 'words_to_count': ['trash', 'valid', 'word', 'valid']
             },
             test_pipeline=self._test_pipeline_with_label('count_words'),
             execution_timeout_sec=1,
             wait_until_finish=False)
Exemple #14
0
    def test_should_create_pipeline_from_pipeline_options(
            self, _create_pipeline_mock, is_in_terminal_state_mock):
        # given
        is_in_terminal_state_mock.return_value = True
        driver = CountWordsDriver(CounterStater(CounterState()))
        options = PipelineOptions()
        job = BeamJob(id='count_words',
                      entry_point=driver.run,
                      entry_point_arguments={
                          'words_to_filter': ['valid', 'word'],
                          'words_to_count':
                          ['trash', 'valid', 'word', 'valid']
                      },
                      pipeline_options=options,
                      execution_timeout_sec=1)

        count_words = Workflow(workflow_id='count_words', definition=[job])

        # when
        count_words.run('2020-01-01')

        # then
        _create_pipeline_mock.assert_called_with(options)