def test_that_should_stop_if_max_retry_exceeded(self, create_copy_job, _): # given retry_count = 5 # when ResultCheck().check( ResultCheckRequest(task_name_suffix="task_name_suffix", copy_job_type_id="backups", job_reference=BigQueryJobReference( project_id="target_project_id", job_id="job_id", location='EU'), retry_count=retry_count, post_copy_action_request=None)) retry_count += 1 ResultCheck().check( ResultCheckRequest(task_name_suffix="task_name_suffix", copy_job_type_id="backups", job_reference=BigQueryJobReference( project_id="target_project_id", job_id="job_id", location='EU'), retry_count=retry_count, post_copy_action_request=None)) # then create_copy_job.assert_called_once()
def test_that_post_copy_action_request_is_passed( self, create_copy_job_result_check, _): # given post_copy_action_request = \ PostCopyActionRequest(url='/my/url', data={'key1': 'value1'}) # when CopyJobService().run_copy_job_request( CopyJobRequest(task_name_suffix='task_name_suffix', copy_job_type_id='test-process', source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY", retry_count=0, post_copy_action_request=post_copy_action_request)) # then create_copy_job_result_check.assert_called_once_with( ResultCheckRequest( task_name_suffix='task_name_suffix', copy_job_type_id='test-process', job_reference=BigQueryJobReference(project_id='test_project', job_id='job123', location='EU'), retry_count=0, post_copy_action_request=post_copy_action_request))
def test_that_create_and_write_disposition_are_passed_to_result_check( self, create_copy_job_result_check, _): # given create_disposition = "SOME_CREATE_DISPOSITION" write_disposition = "SOME_WRITE_DISPOSITION" # when CopyJobService().run_copy_job_request( CopyJobRequest(task_name_suffix='task_name_suffix', copy_job_type_id='test-process', source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition=create_disposition, write_disposition=write_disposition, retry_count=0, post_copy_action_request=None)) # then create_copy_job_result_check.assert_called_once_with( ResultCheckRequest(task_name_suffix='task_name_suffix', copy_job_type_id='test-process', job_reference=BigQueryJobReference( project_id='test_project', job_id='job123', location='EU'), retry_count=0, post_copy_action_request=None))
def test_that_should_re_trigger_copy_job_task_with_proper_create_and_write_dispositions_if_retry_error_occurs( self, create_copy_job, _): # given retry_count = 0 post_copy_action_request = \ PostCopyActionRequest(url="/my/url", data={"key1": "value1"}) create_disposition = "CREATE_NEVER" write_disposition = "WRITE_TRUNCATE" # when ResultCheck().check( ResultCheckRequest( task_name_suffix="task_name_suffix", copy_job_type_id="backups", job_reference=BigQueryJobReference( project_id="target_project_id", job_id="job_id", location='EU'), retry_count=retry_count, post_copy_action_request=post_copy_action_request)) # then copy_job_result = CopyJobResult( JobResultExample.DONE_WITH_RETRY_ERRORS) copy_job_request = CopyJobRequest( task_name_suffix=None, copy_job_type_id="backups", source_big_query_table=copy_job_result.source_bq_table, target_big_query_table=copy_job_result.target_bq_table, create_disposition=create_disposition, write_disposition=write_disposition, retry_count=retry_count + 1, post_copy_action_request=post_copy_action_request) create_copy_job.assert_called_once_with(copy_job_request)
def test_bug_regression_job_already_exists_after_internal_error( self, _, insert_job, _create_random_job_id, create_copy_job_result_check, table_metadata): # given post_copy_action_request = \ PostCopyActionRequest(url='/my/url', data={'key1': 'value1'}) table_metadata._BigQueryTableMetadata__get_table_or_partition.return_value.get_location.return_value = 'EU' # when CopyJobService().run_copy_job_request( CopyJobRequest(task_name_suffix='task_name_suffix', copy_job_type_id='test-process', source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY", retry_count=0, post_copy_action_request=post_copy_action_request)) # then self.assertEqual(insert_job.call_count, 2) create_copy_job_result_check.assert_called_once_with( ResultCheckRequest( task_name_suffix='task_name_suffix', copy_job_type_id='test-process', job_reference=BigQueryJobReference( project_id='target_project_id_1', job_id='random_job_123', location='EU'), retry_count=0, post_copy_action_request=post_copy_action_request))
def test_copy_job_result_check_creation(self): # given result_check_request = ResultCheckRequest( task_name_suffix='task-name-suffix', copy_job_type_id='backups', job_reference=BigQueryJobReference(project_id="project_abc", job_id="job123", location='EU'), retry_count=2, post_copy_action_request=PostCopyActionRequest( url="/my/url", data={"key1": "value1"}) ) TaskCreator.create_copy_job_result_check(result_check_request) # then expected_queue_name = 'backups-result-check' executed_tasks = self.taskqueue_stub.get_filtered_tasks( queue_names=expected_queue_name ) self.assertEqual(len(executed_tasks), 1, "Should create one task in queue") executed_task = executed_tasks[0] self.assertEqual(json.dumps(result_check_request, cls=RequestEncoder), executed_task.extract_params()['resultCheckRequest']) self.assertEqual('POST', executed_task.method) self.assertEqual(executed_task.url, '/tasks/copy_job_async/result_check')
def insert_job(self, project_id, body): response = self.service.jobs().insert(projectId=project_id, body=body).execute() logging.info('Insert job response: ' + json.dumps(response)) return BigQueryJobReference( project_id=response['jobReference']['projectId'], job_id=response['jobReference']['jobId'], location=response['jobReference']['location'])
def create_example_result_check_request(project_id, job_id, location, retry_count, post_copy_action_request): return ResultCheckRequest( task_name_suffix=None, copy_job_type_id=None, job_reference=BigQueryJobReference(project_id=project_id, job_id=job_id, location=location), retry_count=retry_count, post_copy_action_request=post_copy_action_request)
def __schedule(source_big_query_table, target_big_query_table, job_id, create_disposition, write_disposition): logging.info("Scheduling job ID: " + job_id) target_project_id = target_big_query_table.get_project_id() job_data = { "jobReference": { "jobId": job_id, "projectId": target_project_id }, "configuration": { "copy": { "sourceTable": { "projectId": source_big_query_table.get_project_id(), "datasetId": source_big_query_table.get_dataset_id(), "tableId": source_big_query_table.get_table_id(), }, "destinationTable": { "projectId": target_project_id, "datasetId": target_big_query_table.get_dataset_id(), "tableId": target_big_query_table.get_table_id(), }, "createDisposition": create_disposition, "writeDisposition": write_disposition } } } try: job_reference = BigQuery().insert_job(target_project_id, job_data) logging.info("Successfully insert: %s", job_reference) return job_reference except HttpError as bq_error: copy_job_error = BigQueryJobError(bq_error, source_big_query_table, target_big_query_table) if copy_job_error.is_deadline_exceeded(): job_json = CopyJobService.__get_job(job_id, target_project_id, copy_job_error.location) return CopyJobService.__to_bq_job_reference(job_json) elif copy_job_error.should_be_retried(): logging.warning(copy_job_error) return BigQueryJobReference( project_id=target_project_id, job_id=job_id, location=BigQueryTableMetadata. get_table_by_big_query_table( source_big_query_table).get_location()) else: logging.exception(copy_job_error) return copy_job_error except Exception as error: logging.error("%s Exception thrown during Copy Job creation: %s", type(error), error) raise error
def test_copy_job_result_check_task_should_not_be_created_when_retry_smaller_than_0( self): with self.assertRaises(AssertionError): TaskCreator.create_copy_job_result_check( ResultCheckRequest( task_name_suffix=None, copy_job_type_id='backups', job_reference=BigQueryJobReference(project_id="project_abc", job_id="job123", location='EU'), retry_count=-1 ) )
def create_example_result_check_request(self): retry_count = 0 post_copy_action_request = \ PostCopyActionRequest(url="/my/url", data={"key1": "value1"}) # when result_check_request = ResultCheckRequest( task_name_suffix="task_name_suffix", copy_job_type_id="backups", job_reference=BigQueryJobReference(project_id="target_project_id", job_id="job_id", location='EU'), retry_count=retry_count, post_copy_action_request=post_copy_action_request) return result_check_request
def from_json(cls, json): from src.commons.big_query.big_query_job_reference import \ BigQueryJobReference job_reference = BigQueryJobReference.from_json(json["job_reference"]) from src.commons.big_query.copy_job_async.post_copy_action_request import \ PostCopyActionRequest post_copy_action_request = PostCopyActionRequest.from_json( json["post_copy_action_request"]) return ResultCheckRequest( task_name_suffix=json["task_name_suffix"], copy_job_type_id=json["copy_job_type_id"], job_reference=job_reference, retry_count=json["retry_count"], post_copy_action_request=post_copy_action_request)
def test_create_copy_job_result_check_throws_error_on_unknown_queue(self): # when with self.assertRaises(UnknownQueueError) as error: TaskCreator.create_copy_job_result_check(ResultCheckRequest( task_name_suffix=None, copy_job_type_id="unknown-copying", job_reference=BigQueryJobReference(project_id="project_abc", job_id="job123", location='EU'), retry_count=0, post_copy_action_request=PostCopyActionRequest( '/my/post/copy/url', {'mypayload': 'mypayload_value'})) ) self.assertEqual(error.exception.message, "There is no queue 'unknown-copying-result-check'. " "Please add it to your queue.yaml definition.")
def test_that_after_successful_job_no_post_action_is_created( self, create_post_copy_action, _): # given post_copy_action_request = None # when ResultCheck().check( ResultCheckRequest( task_name_suffix='task_name_suffix', copy_job_type_id="backups", job_reference=BigQueryJobReference( project_id="target_project_id", job_id="job_id", location='EU'), retry_count=0, post_copy_action_request=post_copy_action_request)) # then create_post_copy_action.assert_not_called()
def __to_bq_job_reference(job_json): job_reference = job_json["jobReference"] return BigQueryJobReference(job_reference["projectId"], job_reference["jobId"], job_reference["location"])
def __get_job(job_id, project_id, location): job_reference = BigQueryJobReference(project_id=project_id, job_id=job_id, location=location) return BigQuery().get_job(job_reference)
class TestCopyJobService(unittest.TestCase): def setUp(self): self.testbed = testbed.Testbed() self.testbed.activate() ndb.get_context().clear_cache() patch('googleapiclient.discovery.build').start() patch( 'oauth2client.client.GoogleCredentials.get_application_default') \ .start() self._create_http = patch.object(BigQuery, '_create_http').start() self.example_source_bq_table = BigQueryTable('source_project_id_1', 'source_dataset_id_1', 'source_table_id_1') self.example_target_bq_table = BigQueryTable('target_project_id_1', 'target_dataset_id_1', 'target_table_id_1') def tearDown(self): patch.stopall() self.testbed.deactivate() @patch.object(BigQuery, 'insert_job', return_value=BigQueryJobReference(project_id='test_project', job_id='job123', location='EU')) @patch.object(TaskCreator, 'create_copy_job_result_check') def test_that_post_copy_action_request_is_passed( self, create_copy_job_result_check, _): # given post_copy_action_request = \ PostCopyActionRequest(url='/my/url', data={'key1': 'value1'}) # when CopyJobService().run_copy_job_request( CopyJobRequest(task_name_suffix='task_name_suffix', copy_job_type_id='test-process', source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY", retry_count=0, post_copy_action_request=post_copy_action_request)) # then create_copy_job_result_check.assert_called_once_with( ResultCheckRequest( task_name_suffix='task_name_suffix', copy_job_type_id='test-process', job_reference=BigQueryJobReference(project_id='test_project', job_id='job123', location='EU'), retry_count=0, post_copy_action_request=post_copy_action_request)) @patch.object(BigQuery, 'insert_job', return_value=BigQueryJobReference(project_id='test_project', job_id='job123', location='EU')) @patch.object(TaskCreator, 'create_copy_job_result_check') def test_that_create_and_write_disposition_are_passed_to_result_check( self, create_copy_job_result_check, _): # given create_disposition = "SOME_CREATE_DISPOSITION" write_disposition = "SOME_WRITE_DISPOSITION" # when CopyJobService().run_copy_job_request( CopyJobRequest(task_name_suffix='task_name_suffix', copy_job_type_id='test-process', source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition=create_disposition, write_disposition=write_disposition, retry_count=0, post_copy_action_request=None)) # then create_copy_job_result_check.assert_called_once_with( ResultCheckRequest(task_name_suffix='task_name_suffix', copy_job_type_id='test-process', job_reference=BigQueryJobReference( project_id='test_project', job_id='job123', location='EU'), retry_count=0, post_copy_action_request=None)) @patch.object(BigQuery, 'insert_job') @patch('time.sleep', side_effect=lambda _: None) def test_that_copy_table_should_throw_error_after_exception_not_being_http_error_thrown_on_copy_job_creation( self, _, insert_job): # given error_message = 'test exception' insert_job.side_effect = Exception(error_message) request = CopyJobRequest( task_name_suffix=None, copy_job_type_id=None, source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY") # when with self.assertRaises(Exception) as context: CopyJobService().run_copy_job_request(request) # then self.assertTrue(error_message in context.exception) @patch.object(BigQuery, 'insert_job') @patch('time.sleep', side_effect=lambda _: None) def test_that_copy_table_should_throw_unhandled_errors( self, _, insert_job): # given exception = HttpError(Mock(status=500), 'internal error') exception._get_reason = Mock(return_value='internal error') insert_job.side_effect = exception request = CopyJobRequest( task_name_suffix=None, copy_job_type_id=None, source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY") # when with self.assertRaises(HttpError) as context: CopyJobService().run_copy_job_request(request) # then self.assertEqual(context.exception, exception) @patch.object(BigQuery, 'insert_job') @patch.object(TaskCreator, 'create_post_copy_action') def test_that_copy_table_should_create_correct_post_copy_action_if_404_http_error_thrown_on_copy_job_creation( self, create_post_copy_action, insert_job): # given error = HttpError(Mock(status=404), 'not found') error._get_reason = Mock(return_value='not found') insert_job.side_effect = error post_copy_action_request = PostCopyActionRequest( url='/my/url', data={'key1': 'value1'}) request = CopyJobRequest( task_name_suffix='task_name_suffix', copy_job_type_id='test-process', source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY", retry_count=0, post_copy_action_request=post_copy_action_request) # when CopyJobService().run_copy_job_request(request) # then create_post_copy_action.assert_called_once_with( copy_job_type_id='test-process', post_copy_action_request=post_copy_action_request, job_json={ 'status': { 'state': 'DONE', 'errors': [{ 'reason': 'Invalid', 'message': ("404 while creating Copy Job from {} to {}".format( self.example_source_bq_table, self.example_target_bq_table)) }] }, 'configuration': { 'copy': { 'sourceTable': { 'projectId': self.example_source_bq_table.get_project_id(), 'tableId': self.example_source_bq_table.get_table_id(), 'datasetId': self.example_source_bq_table.get_dataset_id() }, 'destinationTable': { 'projectId': self.example_target_bq_table.get_project_id(), 'tableId': self.example_target_bq_table.get_table_id(), 'datasetId': self.example_target_bq_table.get_dataset_id() } } } }) @patch.object(BigQuery, 'insert_job') @patch.object(TaskCreator, 'create_post_copy_action') def test_that_copy_table_should_create_correct_post_copy_action_if_access_denied_http_error_thrown_on_copy_job_creation( self, create_post_copy_action, insert_job): # given http_error_content = "{\"error\": " \ " {\"errors\": [" \ " {\"reason\": \"Access Denied\"," \ " \"message\": \"Access Denied\"," \ " \"location\": \"US\"" \ " }]," \ " \"code\": 403," \ " \"message\": \"Access Denied\"}}" insert_job.side_effect = HttpError(Mock(status=403), http_error_content) post_copy_action_request = PostCopyActionRequest( url='/my/url', data={'key1': 'value1'}) request = CopyJobRequest( task_name_suffix='task_name_suffix', copy_job_type_id='test-process', source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY", retry_count=0, post_copy_action_request=post_copy_action_request) # when CopyJobService().run_copy_job_request(request) # then create_post_copy_action.assert_called_once_with( copy_job_type_id='test-process', post_copy_action_request=post_copy_action_request, job_json={ 'status': { 'state': 'DONE', 'errors': [{ 'reason': 'Invalid', 'message': ("Access Denied while creating Copy Job from {} to {}". format(self.example_source_bq_table, self.example_target_bq_table)) }] }, 'configuration': { 'copy': { 'sourceTable': { 'projectId': self.example_source_bq_table.get_project_id(), 'tableId': self.example_source_bq_table.get_table_id(), 'datasetId': self.example_source_bq_table.get_dataset_id() }, 'destinationTable': { 'projectId': self.example_target_bq_table.get_project_id(), 'tableId': self.example_target_bq_table.get_table_id(), 'datasetId': self.example_target_bq_table.get_dataset_id() } } } }) @patch.object(BigQuery, 'get_job') @patch.object(BigQuery, 'insert_job') @patch.object(TaskCreator, 'create_copy_job_result_check') def test_that_copy_table_will_try_to_wait_if_deadline_exceeded( self, create_copy_job_result_check, insert_job, get_job): # given http_error_content = "{\"error\": " \ " {\"errors\": [" \ " {\"reason\": \"Deadline exceeded\"," \ " \"message\": \"Deadline exceeded\"," \ " \"location\": \"US\"" \ " }]," \ " \"code\": 500," \ " \"message\": \"Deadline exceeded\"}}" successful_job_json = { 'status': { 'state': 'DONE' }, 'jobReference': { 'projectId': self.example_target_bq_table.get_project_id(), 'location': 'EU', 'jobId': 'job123', }, 'configuration': { 'copy': { 'sourceTable': { 'projectId': self.example_source_bq_table.get_project_id(), 'tableId': self.example_source_bq_table.get_table_id(), 'datasetId': self.example_source_bq_table.get_dataset_id() }, 'destinationTable': { 'projectId': self.example_target_bq_table.get_project_id(), 'tableId': self.example_target_bq_table.get_table_id(), 'datasetId': self.example_target_bq_table.get_dataset_id() } } } } insert_job.side_effect = HttpError(Mock(status=500), http_error_content) get_job.return_value = successful_job_json request = CopyJobRequest( task_name_suffix='task_name_suffix', copy_job_type_id='test-process', source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY", retry_count=0, post_copy_action_request=None) # when CopyJobService().run_copy_job_request(request) # then create_copy_job_result_check.assert_called_once_with( ResultCheckRequest( task_name_suffix='task_name_suffix', copy_job_type_id='test-process', job_reference=BigQueryJobReference( project_id=self.example_target_bq_table.get_project_id(), job_id='job123', location='EU'), retry_count=0, post_copy_action_request=None)) @patch( 'src.commons.big_query.big_query_table_metadata.BigQueryTableMetadata') @patch.object(TaskCreator, 'create_copy_job_result_check') @patch.object(CopyJobService, '_create_random_job_id', return_value='random_job_123') @patch.object(BigQuery, 'insert_job', side_effect=[ HttpError(Mock(status=503), 'internal error'), HttpError(Mock(status=409), 'job exists') ]) @patch('time.sleep', side_effect=lambda _: None) def test_bug_regression_job_already_exists_after_internal_error( self, _, insert_job, _create_random_job_id, create_copy_job_result_check, table_metadata): # given post_copy_action_request = \ PostCopyActionRequest(url='/my/url', data={'key1': 'value1'}) table_metadata._BigQueryTableMetadata__get_table_or_partition.return_value.get_location.return_value = 'EU' # when CopyJobService().run_copy_job_request( CopyJobRequest(task_name_suffix='task_name_suffix', copy_job_type_id='test-process', source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY", retry_count=0, post_copy_action_request=post_copy_action_request)) # then self.assertEqual(insert_job.call_count, 2) create_copy_job_result_check.assert_called_once_with( ResultCheckRequest( task_name_suffix='task_name_suffix', copy_job_type_id='test-process', job_reference=BigQueryJobReference( project_id='target_project_id_1', job_id='random_job_123', location='EU'), retry_count=0, post_copy_action_request=post_copy_action_request))
def test_that_copy_table_will_try_to_wait_if_deadline_exceeded( self, create_copy_job_result_check, insert_job, get_job): # given http_error_content = "{\"error\": " \ " {\"errors\": [" \ " {\"reason\": \"Deadline exceeded\"," \ " \"message\": \"Deadline exceeded\"," \ " \"location\": \"US\"" \ " }]," \ " \"code\": 500," \ " \"message\": \"Deadline exceeded\"}}" successful_job_json = { 'status': { 'state': 'DONE' }, 'jobReference': { 'projectId': self.example_target_bq_table.get_project_id(), 'location': 'EU', 'jobId': 'job123', }, 'configuration': { 'copy': { 'sourceTable': { 'projectId': self.example_source_bq_table.get_project_id(), 'tableId': self.example_source_bq_table.get_table_id(), 'datasetId': self.example_source_bq_table.get_dataset_id() }, 'destinationTable': { 'projectId': self.example_target_bq_table.get_project_id(), 'tableId': self.example_target_bq_table.get_table_id(), 'datasetId': self.example_target_bq_table.get_dataset_id() } } } } insert_job.side_effect = HttpError(Mock(status=500), http_error_content) get_job.return_value = successful_job_json request = CopyJobRequest( task_name_suffix='task_name_suffix', copy_job_type_id='test-process', source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY", retry_count=0, post_copy_action_request=None) # when CopyJobService().run_copy_job_request(request) # then create_copy_job_result_check.assert_called_once_with( ResultCheckRequest( task_name_suffix='task_name_suffix', copy_job_type_id='test-process', job_reference=BigQueryJobReference( project_id=self.example_target_bq_table.get_project_id(), job_id='job123', location='EU'), retry_count=0, post_copy_action_request=None))