def test_keep_looking_for_recoverable_job_even_if_errored_job_exists_on_cluster( self): # Keep looking for a job to reattach to even if the first matching job found is in an irrecoverable # state mock_job_on_cluster_running = { 'reference': { 'jobId': '{}_{}'.format(TASK_ID, self.UUID) }, 'status': { 'state': 'RUNNING' } } mock_job_on_cluster_error = { 'reference': { 'jobId': '{}_{}'.format(TASK_ID, self.UUID) }, 'status': { 'state': 'ERROR' } } self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.list.return_value\ .execute.return_value = {'jobs': [mock_job_on_cluster_error, mock_job_on_cluster_running]} _DataProcJob(dataproc_api=self.mock_dataproc, project_id=GCP_PROJECT_ID_HOOK_UNIT_TEST, job=self.JOB_TO_SUBMIT) self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.submit\ .assert_not_called()
def test_submit_job_if_no_jobs_running_on_cluster(self): # If there are no other jobs already running on the cluster, then submit the job. self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.list.return_value\ .execute.return_value = {'jobs': []} _DataProcJob(dataproc_api=self.mock_dataproc, project_id=GCP_PROJECT_ID_HOOK_UNIT_TEST, job=self.JOB_TO_SUBMIT) self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.submit\ .assert_called_once_with(projectId=GCP_PROJECT_ID_HOOK_UNIT_TEST, region=GCP_REGION, body=self.JOB_TO_SUBMIT)
def test_do_not_resubmit_job_if_same_job_running_on_cluster(self): # If a job with the same task ID is already running on the cluster, don't resubmit the job. mock_job_on_cluster = {'reference': {'jobId': '{}_{}'.format(TASK_ID, self.UUID)}, 'status': {'state': 'RUNNING'}} self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.list.return_value. \ execute.return_value = {'jobs': [mock_job_on_cluster]} _DataProcJob(dataproc_api=self.mock_dataproc, project_id=GCP_PROJECT_ID_HOOK_UNIT_TEST, job=self.JOB_TO_SUBMIT) self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.submit\ .assert_not_called()
def test_raise_error_fallback_job_error_states(self, mock_init): job = _DataProcJob() # pylint: disable=no-value-for-parameter job.job = {'status': {'state': 'ERROR'}} job.job_error_states = ['CANCELLED'] with self.assertRaises(Exception) as cm: job.raise_error() self.assertIn('ERROR', str(cm.exception))
def test_submit_job_if_same_job_errored_on_cluster(self): # If a job with the same task ID finished with error on the cluster, then resubmit the job for retry. mock_job_on_cluster = {'reference': {'jobId': '{}_{}'.format(TASK_ID, self.UUID)}, 'status': {'state': 'ERROR'}} self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.list.return_value\ .execute.return_value = {'jobs': [mock_job_on_cluster]} _DataProcJob(dataproc_api=self.mock_dataproc, project_id=GCP_PROJECT_ID_HOOK_UNIT_TEST, job=self.JOB_TO_SUBMIT) self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.submit\ .assert_called_once_with(projectId=GCP_PROJECT_ID_HOOK_UNIT_TEST, region=GCP_REGION, body=self.JOB_TO_SUBMIT)
def test_submit_job_if_different_job_running_on_cluster(self): # If there are jobs running on the cluster, but none of them have the same task ID as the job we're # about to submit, then submit the job. mock_job_on_cluster = {'reference': {'jobId': 'a-different-job-id_{}'.format(self.UUID)}, 'status': {'state': 'RUNNING'}} self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.list.return_value\ .execute.return_value = {'jobs': [mock_job_on_cluster]} _DataProcJob(dataproc_api=self.mock_dataproc, project_id=GCP_PROJECT_ID_HOOK_UNIT_TEST, job=self.JOB_TO_SUBMIT) self.mock_dataproc.projects.return_value.regions.return_value.jobs.return_value.submit\ .assert_called_once_with(projectId=GCP_PROJECT_ID_HOOK_UNIT_TEST, region=GCP_REGION, body=self.JOB_TO_SUBMIT)
def test_raise_error_with_state_done(self, mock_init): job = _DataProcJob() # pylint: disable=no-value-for-parameter job.job = {'status': {'state': 'DONE'}} job.job_error_states = None try: job.raise_error() # Pass test except Exception: # pylint: disable=broad-except self.fail("raise_error() should not raise Exception when job=%s" % job.job)