예제 #1
0
def run_beam_job(
    job_name: Optional[str] = None,
    job_class: Optional[Type[base_jobs.JobBase]] = None
) -> beam_job_domain.BeamJobRun:
    """Starts a new Apache Beam job and returns metadata about its execution.

    Args:
        job_name: str. The name of the job to run. If not provided, then
            job_class must not be None.
        job_class: type(JobBase). A subclass of JobBase to begin running. This
            value takes precedence over job_name.

    Returns:
        BeamJobRun. Metadata about the run's execution.
    """
    if job_class is None and job_name is None:
        raise ValueError('Must specify the job class or name to run')
    if job_class is None:
        # MyPy is wrong. We know job_name is not None in this branch because if
        # it were, the ValueError above would have been raised.
        job_class = jobs_registry.get_job_class_by_name(job_name) # type: ignore[arg-type]

    run_synchronously = constants.EMULATOR_MODE
    run_model = jobs_manager.run_job(job_class, run_synchronously)

    return get_beam_job_run_from_model(run_model)
예제 #2
0
    def test_failing_sync_job(self) -> None:
        run = jobs_manager.run_job(FailingJob, True, namespace=self.namespace)

        self.assertEqual(run.latest_job_state, 'FAILED')

        run_model = beam_job_models.BeamJobRunModel.get(run.id)
        self.assertEqual(run, run_model)

        self.assertIn('uh-oh',
                      beam_job_services.get_beam_job_run_result(run.id).stderr)
예제 #3
0
    def test_working_sync_job(self) -> None:
        run = jobs_manager.run_job(WorkingJob, True, namespace=self.namespace)

        self.assertEqual(run.latest_job_state, 'DONE')

        run_model = beam_job_models.BeamJobRunModel.get(run.id)
        self.assertEqual(run, run_model)

        self.assertEqual(
            beam_job_services.get_beam_job_run_result(run.id).to_dict(), {
                'stdout': 'o',
                'stderr': 'e'
            })
예제 #4
0
    def test_async_job(self) -> None:
        mock_run_result = mock.Mock()
        mock_run_result.has_job = True
        mock_run_result.job_id.return_value = '123'
        mock_run_result.state = 'PENDING'

        pipeline = beam.Pipeline(
            runner=runners.DirectRunner(),
            options=job_options.JobOptions(namespace=self.namespace))

        with self.swap_to_always_return(pipeline, 'run',
                                        value=mock_run_result):
            run = jobs_manager.run_job(WorkingJob, False, pipeline=pipeline)

        self.assertEqual(run.dataflow_job_id, '123')
        self.assertEqual(run.latest_job_state, 'PENDING')
예제 #5
0
    def test_async_job_that_does_not_start(self) -> None:
        mock_run_result = mock.Mock()
        mock_run_result.has_job = False
        mock_run_result.job_id.return_value = None
        mock_run_result.state = 'UNKNOWN'

        pipeline = beam.Pipeline(
            runner=runners.DirectRunner(),
            options=job_options.JobOptions(namespace=self.namespace))

        with self.swap_to_always_return(pipeline, 'run',
                                        value=mock_run_result):
            run = jobs_manager.run_job(WorkingJob, False, pipeline=pipeline)

        self.assertIsNone(run.dataflow_job_id)
        self.assertEqual(run.latest_job_state, 'FAILED')
        result = beam_job_services.get_beam_job_run_result(run.id)
        self.assertIn('Failed to deploy WorkingJob', result.stderr)