def run_beam_job( job_name: Optional[str] = None, job_class: Optional[Type[base_jobs.JobBase]] = None ) -> beam_job_domain.BeamJobRun: """Starts a new Apache Beam job and returns metadata about its execution. Args: job_name: str. The name of the job to run. If not provided, then job_class must not be None. job_class: type(JobBase). A subclass of JobBase to begin running. This value takes precedence over job_name. Returns: BeamJobRun. Metadata about the run's execution. """ if job_class is None and job_name is None: raise ValueError('Must specify the job class or name to run') if job_class is None: # MyPy is wrong. We know job_name is not None in this branch because if # it were, the ValueError above would have been raised. job_class = jobs_registry.get_job_class_by_name(job_name) # type: ignore[arg-type] run_synchronously = constants.EMULATOR_MODE run_model = jobs_manager.run_job(job_class, run_synchronously) return get_beam_job_run_from_model(run_model)
def test_failing_sync_job(self) -> None: run = jobs_manager.run_job(FailingJob, True, namespace=self.namespace) self.assertEqual(run.latest_job_state, 'FAILED') run_model = beam_job_models.BeamJobRunModel.get(run.id) self.assertEqual(run, run_model) self.assertIn('uh-oh', beam_job_services.get_beam_job_run_result(run.id).stderr)
def test_working_sync_job(self) -> None: run = jobs_manager.run_job(WorkingJob, True, namespace=self.namespace) self.assertEqual(run.latest_job_state, 'DONE') run_model = beam_job_models.BeamJobRunModel.get(run.id) self.assertEqual(run, run_model) self.assertEqual( beam_job_services.get_beam_job_run_result(run.id).to_dict(), { 'stdout': 'o', 'stderr': 'e' })
def test_async_job(self) -> None: mock_run_result = mock.Mock() mock_run_result.has_job = True mock_run_result.job_id.return_value = '123' mock_run_result.state = 'PENDING' pipeline = beam.Pipeline( runner=runners.DirectRunner(), options=job_options.JobOptions(namespace=self.namespace)) with self.swap_to_always_return(pipeline, 'run', value=mock_run_result): run = jobs_manager.run_job(WorkingJob, False, pipeline=pipeline) self.assertEqual(run.dataflow_job_id, '123') self.assertEqual(run.latest_job_state, 'PENDING')
def test_async_job_that_does_not_start(self) -> None: mock_run_result = mock.Mock() mock_run_result.has_job = False mock_run_result.job_id.return_value = None mock_run_result.state = 'UNKNOWN' pipeline = beam.Pipeline( runner=runners.DirectRunner(), options=job_options.JobOptions(namespace=self.namespace)) with self.swap_to_always_return(pipeline, 'run', value=mock_run_result): run = jobs_manager.run_job(WorkingJob, False, pipeline=pipeline) self.assertIsNone(run.dataflow_job_id) self.assertEqual(run.latest_job_state, 'FAILED') result = beam_job_services.get_beam_job_run_result(run.id) self.assertIn('Failed to deploy WorkingJob', result.stderr)