Exemple #1
0
    def test_overwritten_values(self):
        model_io_stub = stub_io.ModelIoStub()
        get_models = model_io_stub.get_models_ptransform

        options = job_options.JobOptions(model_getter=get_models)

        self.assertIs(options.model_getter, get_models)
Exemple #2
0
    def test_run_with_empty_model_getter(self):
        pipeline = test_pipeline.TestPipeline(
            runner=runners.DirectRunner(),
            options=job_options.JobOptions(model_getter=None))

        self.assertRaisesRegexp(
            ValueError, 'JobOptions.model_getter must not be None',
            audit_jobs.AuditAllStorageModelsJob(pipeline).run)
Exemple #3
0
def run_job(
    job_class: Type[base_jobs.JobBase],
    sync: bool,
    namespace: Optional[str] = None,
    pipeline: Optional[beam.Pipeline] = None
) -> beam_job_models.BeamJobRunModel:
    """Runs the specified job synchronously.

    In other words, the function will wait for the job to finish running before
    returning a value.

    Args:
        job_class: type(base_jobs.JobBase). The type of job to run.
        sync: bool. Whether to run the job synchronously.
        namespace: str. The namespace in which models should be created.
        pipeline: Pipeline. The pipeline to run the job upon. If omitted, then a
            new pipeline will be used instead.

    Returns:
        BeamJobRun. Contains metadata related to the execution status of the
        job.
    """
    if pipeline is None:
        pipeline = beam.Pipeline(
            runner=runners.DirectRunner() if sync else runners.DataflowRunner(),
            options=job_options.JobOptions(namespace=namespace))

    job = job_class(pipeline)
    job_name = job_class.__name__

    # NOTE: Exceptions raised within this context are logged and suppressed.
    with _job_bookkeeping_context(job_name) as run_model:
        _ = job.run() | job_io.PutResults(run_model.id)

        run_result = pipeline.run()

        if sync:
            run_result.wait_until_finish()
            run_model.latest_job_state = beam_job_models.BeamJobState.DONE.value

        elif run_result.has_job:
            run_model.dataflow_job_id = run_result.job_id()
            run_model.latest_job_state = run_result.state

        else:
            raise RuntimeError(
                'Failed to deploy %s to the Dataflow service. Please try again '
                'after a few minutes.' % job_name)

    return run_model
Exemple #4
0
    def test_async_job(self) -> None:
        mock_run_result = mock.Mock()
        mock_run_result.has_job = True
        mock_run_result.job_id.return_value = '123'
        mock_run_result.state = 'PENDING'

        pipeline = beam.Pipeline(
            runner=runners.DirectRunner(),
            options=job_options.JobOptions(namespace=self.namespace))

        with self.swap_to_always_return(pipeline, 'run', value=mock_run_result):
            run = jobs_manager.run_job(WorkingJob, False, pipeline=pipeline)

        self.assertEqual(run.dataflow_job_id, '123')
        self.assertEqual(run.latest_job_state, 'PENDING')
Exemple #5
0
    def test_async_job_that_does_not_start(self) -> None:
        mock_run_result = mock.Mock()
        mock_run_result.has_job = False
        mock_run_result.job_id.return_value = None
        mock_run_result.state = 'UNKNOWN'

        pipeline = beam.Pipeline(
            runner=runners.DirectRunner(),
            options=job_options.JobOptions(namespace=self.namespace))

        with self.swap_to_always_return(pipeline, 'run', value=mock_run_result):
            run = jobs_manager.run_job(WorkingJob, False, pipeline=pipeline)

        self.assertIsNone(run.dataflow_job_id)
        self.assertEqual(run.latest_job_state, 'FAILED')
        result = beam_job_services.get_beam_job_run_result(run.id)
        self.assertIn('Failed to deploy WorkingJob', result.stderr)
Exemple #6
0
def run_job_sync(
        job_name: str,
        job_args: List[str],
        namespace: Optional[str] = None) -> beam_job_domain.BeamJobRun:
    """Runs the specified job synchronously.

    In other words, the function will wait for the job to finish running before
    returning a value.

    Args:
        job_name: str. The name of the job to run.
        job_args: list(str). The arguments to the job's run() method.
        namespace: str. The namespace in which models should be created.

    Returns:
        BeamJobRun. Contains metadata related to the execution status of the
        job.
    """
    job_pipeline = beam.Pipeline(
        runner=runners.DirectRunner(),
        options=job_options.JobOptions(namespace=namespace))
    job_class = registry.get_job_class_by_name(job_name)

    job = job_class(job_pipeline)
    run_model = beam_job_services.create_beam_job_run_model(job_name, job_args)

    try:
        with job_pipeline:
            unused_pdone = job.run(*job_args) | job_io.PutResults(run_model.id)
    except Exception as exception:
        run_model.latest_job_state = beam_job_models.BeamJobState.FAILED.value
        # If the pipeline fails to put the results into storage, then we'll
        # explicitly write them to storage by using the caught exception.
        result_model = beam_job_services.create_beam_job_run_result_model(
            run_model.id, '', python_utils.UNICODE(exception))
        result_model.put()
    else:
        run_model.latest_job_state = beam_job_models.BeamJobState.DONE.value
    finally:
        run_model.put()

    return beam_job_services.get_beam_job_run_from_model(run_model)
 def test_unsupported_values(self):
     self.assertRaisesRegexp(ValueError,
                             re.escape('Unsupported option(s): a, b'),
                             lambda: job_options.JobOptions(a=1, b=2))
    def test_overwritten_values(self):
        datastoreio_stub = stub_io.DatastoreioStub()

        options = job_options.JobOptions(datastoreio_stub=datastoreio_stub)

        self.assertIs(options.datastoreio_stub, datastoreio_stub)
    def test_default_values(self):
        options = job_options.JobOptions()

        self.assertIsNone(options.datastoreio_stub)
Exemple #10
0
    def test_overwritten_values(self):
        options = job_options.JobOptions(namespace='abc')

        self.assertIs(options.namespace, 'abc')
Exemple #11
0
    def test_default_values(self):
        options = job_options.JobOptions()

        self.assertIsNone(options.namespace)
Exemple #12
0
 def test_unsupported_values(self):
     self.assertRaisesRegexp(ValueError, r'Unsupported option\(s\): a, b',
                             lambda: job_options.JobOptions(a=1, b=2))
Exemple #13
0
    def test_default_values(self):
        options = job_options.JobOptions()

        self.assertIsNone(options.model_getter)
Exemple #14
0
 def __init__(self, *args, **kwargs):
     super(PipelinedTestBase, self).__init__(*args, **kwargs)
     self.pipeline = test_pipeline.TestPipeline(
         runner=runners.DirectRunner(),
         options=job_options.JobOptions(namespace=self.namespace))
     self._pipeline_context_stack = None
Exemple #15
0
 def test_unsupported_values(self) -> None:
     with self.assertRaisesRegexp(
             ValueError,
             r'Unsupported option\(s\)'):  # type: ignore[no-untyped-call]
         job_options.JobOptions(a='a', b='b')
Exemple #16
0
    def test_overwritten_values(self) -> None:
        options = job_options.JobOptions(namespace='abc')

        self.assertEqual(options.namespace, 'abc')