Ejemplo n.º 1
0
    def test_run_beam_job_using_job_class(self) -> None:
        model = beam_job_services.create_beam_job_run_model('NoOpJob')

        with self.swap_to_always_return(jobs_manager, 'run_job', value=model):
            run = beam_job_services.run_beam_job(job_class=NoOpJob)

        self.assertEqual(
            beam_job_services.get_beam_job_run_from_model(model).to_dict(),
            run.to_dict())
Ejemplo n.º 2
0
    def test_create_beam_job_run_model(self) -> None:
        model = beam_job_services.create_beam_job_run_model(
            'FooJob', dataflow_job_id='123')
        model.put()

        all_runs = beam_job_services.get_beam_job_runs(refresh=False)
        self.assertEqual(len(all_runs), 1)
        run = all_runs[0]
        self.assertEqual(run.job_name, 'FooJob')
        self.assertFalse(run.job_is_synchronous)
Ejemplo n.º 3
0
    def test_cancel_beam_job_which_has_no_dataflow_job_id_raises_an_error(
            self) -> None:
        model = beam_job_services.create_beam_job_run_model(
            'NoOpJob', dataflow_job_id=None)
        model.put()

        with self.swap_to_always_return(jobs_manager, 'cancel_job'):
            self.assertRaisesRegex(  # type: ignore[no-untyped-call]
                ValueError, 'cannot be cancelled',
                lambda: beam_job_services.cancel_beam_job(model.id))
Ejemplo n.º 4
0
    def test_create_beam_job_run_model(self):
        model = beam_job_services.create_beam_job_run_model(
            'FooJob', ['num_foos'], dataflow_job_id='123')
        model.put()

        all_runs = beam_job_services.get_beam_job_runs()
        self.assertEqual(len(all_runs), 1)
        run = all_runs[0]
        self.assertEqual(run.job_name, 'FooJob')
        self.assertEqual(run.job_arguments, ['num_foos'])
        self.assertFalse(run.job_is_synchronous)
Ejemplo n.º 5
0
    def test_cancel_beam_job(self) -> None:
        model = beam_job_services.create_beam_job_run_model(
            'NoOpJob', dataflow_job_id='123')
        model.put()

        with self.swap_to_always_return(jobs_manager, 'cancel_job'):
            run = beam_job_services.cancel_beam_job(model.id)

        self.assertEqual(
            run.to_dict(),
            beam_job_services.get_beam_job_run_from_model(model).to_dict())
Ejemplo n.º 6
0
    def test_put_starts_new_job(self) -> None:
        model = beam_job_services.create_beam_job_run_model('FooJob')

        with self.swap_to_always_return(jobs_manager, 'run_job', value=model):
            response = self.put_json( # type: ignore[no-untyped-call]
                '/beam_job_run', {'job_name': 'FooJob'},
                csrf_token=self.get_new_csrf_token()) # type: ignore[no-untyped-call]

        self.assertEqual(
            response,
            beam_job_services.get_beam_job_run_from_model(model).to_dict())
Ejemplo n.º 7
0
    def test_get_returns_job_output(self) -> None:
        run_model = beam_job_services.create_beam_job_run_model('WorkingJob')
        run_model.put()
        result_model = beam_job_services.create_beam_job_run_result_model(
            run_model.id, 'o', '')
        result_model.put()

        response = (self.get_json('/beam_job_run_result?job_id=%s' %
                                  run_model.id))

        self.assertEqual(response, {'stdout': 'o', 'stderr': ''})
Ejemplo n.º 8
0
    def test_run_beam_job(self) -> None:
        run_model = beam_job_services.create_beam_job_run_model('WorkingJob')
        get_job_class_by_name_swap = self.swap_to_always_return(
            jobs_registry, 'get_job_class_by_name')
        run_job_swap = self.swap_to_always_return(
            jobs_manager, 'run_job', value=run_model)

        with get_job_class_by_name_swap, run_job_swap:
            run = beam_job_services.run_beam_job('WorkingJob')

        self.assertEqual(
            beam_job_services.get_beam_job_run_from_model(run_model).to_dict(),
            run.to_dict())
Ejemplo n.º 9
0
    def test_delete_cancels_job(self) -> None:
        model = beam_job_services.create_beam_job_run_model('FooJob')
        model.put()
        run = beam_job_domain.BeamJobRun(
            model.id, 'FooJob', 'CANCELLING',
            datetime.datetime.utcnow(), datetime.datetime.utcnow(), False)

        swap_cancel_beam_job = self.swap_to_always_return(
            beam_job_services, 'cancel_beam_job', value=run)
        with swap_cancel_beam_job:
            response = self.delete_json('/beam_job_run', {'job_id': model.id}) # type: ignore[no-untyped-call]

        self.assertEqual(response, run.to_dict())
Ejemplo n.º 10
0
    def test_get_returns_all_runs(self) -> None:
        beam_job_services.create_beam_job_run_model('FooJob').put()
        beam_job_services.create_beam_job_run_model('FooJob').put()
        beam_job_services.create_beam_job_run_model('FooJob').put()

        response = self.get_json('/beam_job_run')

        self.assertIn('runs', response)
        runs = response['runs']
        self.assertEqual(len(runs), 3)
        self.assertCountEqual([run['job_name'] for run in runs], ['FooJob'] * 3)
Ejemplo n.º 11
0
    def setUp(self) -> None:
        super().setUp()

        self.run_model = beam_job_services.create_beam_job_run_model(
            'WorkingJob', dataflow_job_id='123')

        self.dataflow_job = dataflow.Job(
            id='123',
            project_id=feconf.OPPIA_PROJECT_ID,
            location=feconf.GOOGLE_APP_ENGINE_REGION,
            current_state=dataflow.JobState.JOB_STATE_CANCELLING,
            current_state_time=datetime.datetime.utcnow())

        self.dataflow_client_mock = mock.Mock()
        self.dataflow_client_mock.update_job.return_value = self.dataflow_job

        self.exit_stack = contextlib.ExitStack()
        self.exit_stack.enter_context(self.swap_to_always_return(
            dataflow, 'JobsV1Beta3Client', value=self.dataflow_client_mock))
Ejemplo n.º 12
0
def run_job_sync(
        job_name: str,
        job_args: List[str],
        namespace: Optional[str] = None) -> beam_job_domain.BeamJobRun:
    """Runs the specified job synchronously.

    In other words, the function will wait for the job to finish running before
    returning a value.

    Args:
        job_name: str. The name of the job to run.
        job_args: list(str). The arguments to the job's run() method.
        namespace: str. The namespace in which models should be created.

    Returns:
        BeamJobRun. Contains metadata related to the execution status of the
        job.
    """
    job_pipeline = beam.Pipeline(
        runner=runners.DirectRunner(),
        options=job_options.JobOptions(namespace=namespace))
    job_class = registry.get_job_class_by_name(job_name)

    job = job_class(job_pipeline)
    run_model = beam_job_services.create_beam_job_run_model(job_name, job_args)

    try:
        with job_pipeline:
            unused_pdone = job.run(*job_args) | job_io.PutResults(run_model.id)
    except Exception as exception:
        run_model.latest_job_state = beam_job_models.BeamJobState.FAILED.value
        # If the pipeline fails to put the results into storage, then we'll
        # explicitly write them to storage by using the caught exception.
        result_model = beam_job_services.create_beam_job_run_result_model(
            run_model.id, '', python_utils.UNICODE(exception))
        result_model.put()
    else:
        run_model.latest_job_state = beam_job_models.BeamJobState.DONE.value
    finally:
        run_model.put()

    return beam_job_services.get_beam_job_run_from_model(run_model)
Ejemplo n.º 13
0
def _job_bookkeeping_context(
        job_name: str) -> Iterator[beam_job_models.BeamJobRunModel]:
    """Returns a context manager which commits failure details if an exception
    occurs.

    Args:
        job_name: str. The name of the job.

    Yields:
        BeamJobRunModel. The bookkeeping model used to record execution details.
    """
    run_model = beam_job_services.create_beam_job_run_model(job_name)

    try:
        yield run_model

    except Exception as exception:
        run_model.latest_job_state = beam_job_models.BeamJobState.FAILED.value
        _put_job_stderr(run_model.id, str(exception))

    finally:
        run_model.put()