Ejemplo n.º 1
0
    def test_keep_looking_for_recoverable_job_even_if_errored_job_exists_on_cluster(
            self):
        # Keep looking for a job to reattach to even if the first matching job found is in an irrecoverable
        #  state
        mock_job_on_cluster_running = {
            'reference': {
                'jobId': '{}_{}'.format(TASK_ID, self.UUID)
            },
            'status': {
                'state': 'RUNNING'
            }
        }

        mock_job_on_cluster_error = {
            'reference': {
                'jobId': '{}_{}'.format(TASK_ID, self.UUID)
            },
            'status': {
                'state': 'ERROR'
            }
        }

        self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.list.return_value\
            .execute.return_value = {'jobs': [mock_job_on_cluster_error, mock_job_on_cluster_running]}

        _DataProcJob(dataproc_api=self.mock_dataproc,
                     project_id=GCP_PROJECT_ID_HOOK_UNIT_TEST,
                     job=self.JOB_TO_SUBMIT)

        self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.submit\
            .assert_not_called()
Ejemplo n.º 2
0
    def test_submit_job_if_no_jobs_running_on_cluster(self):
        # If there are no other jobs already running on the cluster, then submit the job.
        self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.list.return_value\
            .execute.return_value = {'jobs': []}

        _DataProcJob(dataproc_api=self.mock_dataproc,
                     project_id=GCP_PROJECT_ID_HOOK_UNIT_TEST,
                     job=self.JOB_TO_SUBMIT)

        self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.submit\
            .assert_called_once_with(projectId=GCP_PROJECT_ID_HOOK_UNIT_TEST,
                                     region=GCP_REGION,
                                     body=self.JOB_TO_SUBMIT)
    def test_do_not_resubmit_job_if_same_job_running_on_cluster(self):
        # If a job with the same task ID is already running on the cluster, don't resubmit the job.
        mock_job_on_cluster = {'reference': {'jobId': '{}_{}'.format(TASK_ID, self.UUID)},
                               'status': {'state': 'RUNNING'}}

        self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.list.return_value. \
            execute.return_value = {'jobs': [mock_job_on_cluster]}

        _DataProcJob(dataproc_api=self.mock_dataproc,
                     project_id=GCP_PROJECT_ID_HOOK_UNIT_TEST,
                     job=self.JOB_TO_SUBMIT)

        self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.submit\
            .assert_not_called()
Ejemplo n.º 4
0
 def test_raise_error_fallback_job_error_states(self, mock_init):
     job = _DataProcJob()  # pylint: disable=no-value-for-parameter
     job.job = {'status': {'state': 'ERROR'}}
     job.job_error_states = ['CANCELLED']
     with self.assertRaises(Exception) as cm:
         job.raise_error()
     self.assertIn('ERROR', str(cm.exception))
    def test_submit_job_if_same_job_errored_on_cluster(self):
        # If a job with the same task ID finished with error on the cluster, then resubmit the job for retry.
        mock_job_on_cluster = {'reference': {'jobId': '{}_{}'.format(TASK_ID, self.UUID)},
                               'status': {'state': 'ERROR'}}

        self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.list.return_value\
            .execute.return_value = {'jobs': [mock_job_on_cluster]}

        _DataProcJob(dataproc_api=self.mock_dataproc,
                     project_id=GCP_PROJECT_ID_HOOK_UNIT_TEST,
                     job=self.JOB_TO_SUBMIT)

        self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.submit\
            .assert_called_once_with(projectId=GCP_PROJECT_ID_HOOK_UNIT_TEST,
                                     region=GCP_REGION,
                                     body=self.JOB_TO_SUBMIT)
    def test_submit_job_if_different_job_running_on_cluster(self):
        # If there are jobs running on the cluster, but none of them have the same task ID as the job we're
        #  about to submit, then submit the job.
        mock_job_on_cluster = {'reference': {'jobId': 'a-different-job-id_{}'.format(self.UUID)},
                               'status': {'state': 'RUNNING'}}

        self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.list.return_value\
            .execute.return_value = {'jobs': [mock_job_on_cluster]}

        _DataProcJob(dataproc_api=self.mock_dataproc,
                     project_id=GCP_PROJECT_ID_HOOK_UNIT_TEST,
                     job=self.JOB_TO_SUBMIT)

        self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.submit\
            .assert_called_once_with(projectId=GCP_PROJECT_ID_HOOK_UNIT_TEST,
                                     region=GCP_REGION,
                                     body=self.JOB_TO_SUBMIT)
 def test_raise_error_with_state_done(self, mock_init):
     job = _DataProcJob()  # pylint: disable=no-value-for-parameter
     job.job = {'status': {'state': 'DONE'}}
     job.job_error_states = None
     try:
         job.raise_error()
         # Pass test
     except Exception:  # pylint: disable=broad-except
         self.fail("raise_error() should not raise Exception when job=%s" % job.job)