class DataStreamer(object): def __init__(self, project_id, dataset_id, table_id): self.big_query_table = BigQueryTable(project_id, dataset_id, table_id) self.service = googleapiclient.discovery.build( 'bigquery', 'v2', credentials=self._create_credentials(), http=self._create_http() ) @staticmethod def _create_credentials(): return GoogleCredentials.get_application_default() @staticmethod def _create_http(): return None def stream_stats(self, rows): insert_all_data = { 'rows': [{ 'json': data } for data in rows] } logging.info("Streaming data to table %s", self.big_query_table) insert_all_response = self._stream_metadata(insert_all_data) if 'insertErrors' in insert_all_response: logging.debug("Sent json: \n%s", json.dumps(insert_all_data)) error_message = "Error during streaming metadata to BigQuery: \n{}"\ .format(json.dumps(insert_all_response['insertErrors'])) logging.error(error_message) ErrorReporting().report(error_message) else: logging.debug("Stats have been sent successfully to %s table", self.big_query_table) @retry(Error, tries=2, delay=2, backoff=2) def _stream_metadata(self, insert_all_data): partition = datetime.datetime.now().strftime("%Y%m%d") return self.service.tabledata().insertAll( projectId=self.big_query_table.get_project_id(), datasetId=self.big_query_table.get_dataset_id(), tableId='{}${}'.format(self.big_query_table.get_table_id(), partition), body=insert_all_data).execute(num_retries=3)
class TestCopyJobService(unittest.TestCase): def setUp(self): self.testbed = testbed.Testbed() self.testbed.activate() ndb.get_context().clear_cache() patch('googleapiclient.discovery.build').start() patch( 'oauth2client.client.GoogleCredentials.get_application_default') \ .start() self._create_http = patch.object(BigQuery, '_create_http').start() self.example_source_bq_table = BigQueryTable('source_project_id_1', 'source_dataset_id_1', 'source_table_id_1') self.example_target_bq_table = BigQueryTable('target_project_id_1', 'target_dataset_id_1', 'target_table_id_1') def tearDown(self): patch.stopall() self.testbed.deactivate() @patch.object(BigQuery, 'insert_job', return_value=BigQueryJobReference(project_id='test_project', job_id='job123', location='EU')) @patch.object(TaskCreator, 'create_copy_job_result_check') def test_that_post_copy_action_request_is_passed( self, create_copy_job_result_check, _): # given post_copy_action_request = \ PostCopyActionRequest(url='/my/url', data={'key1': 'value1'}) # when CopyJobService().run_copy_job_request( CopyJobRequest(task_name_suffix='task_name_suffix', copy_job_type_id='test-process', source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY", retry_count=0, post_copy_action_request=post_copy_action_request)) # then create_copy_job_result_check.assert_called_once_with( ResultCheckRequest( task_name_suffix='task_name_suffix', copy_job_type_id='test-process', job_reference=BigQueryJobReference(project_id='test_project', job_id='job123', location='EU'), retry_count=0, post_copy_action_request=post_copy_action_request)) @patch.object(BigQuery, 'insert_job', return_value=BigQueryJobReference(project_id='test_project', job_id='job123', location='EU')) @patch.object(TaskCreator, 'create_copy_job_result_check') def test_that_create_and_write_disposition_are_passed_to_result_check( self, create_copy_job_result_check, _): # given create_disposition = "SOME_CREATE_DISPOSITION" write_disposition = "SOME_WRITE_DISPOSITION" # when CopyJobService().run_copy_job_request( CopyJobRequest(task_name_suffix='task_name_suffix', copy_job_type_id='test-process', source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition=create_disposition, write_disposition=write_disposition, retry_count=0, post_copy_action_request=None)) # then create_copy_job_result_check.assert_called_once_with( ResultCheckRequest(task_name_suffix='task_name_suffix', copy_job_type_id='test-process', job_reference=BigQueryJobReference( project_id='test_project', job_id='job123', location='EU'), retry_count=0, post_copy_action_request=None)) @patch.object(BigQuery, 'insert_job') @patch('time.sleep', side_effect=lambda _: None) def test_that_copy_table_should_throw_error_after_exception_not_being_http_error_thrown_on_copy_job_creation( self, _, insert_job): # given error_message = 'test exception' insert_job.side_effect = Exception(error_message) request = CopyJobRequest( task_name_suffix=None, copy_job_type_id=None, source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY") # when with self.assertRaises(Exception) as context: CopyJobService().run_copy_job_request(request) # then self.assertTrue(error_message in context.exception) @patch.object(BigQuery, 'insert_job') @patch('time.sleep', side_effect=lambda _: None) def test_that_copy_table_should_throw_unhandled_errors( self, _, insert_job): # given exception = HttpError(Mock(status=500), 'internal error') exception._get_reason = Mock(return_value='internal error') insert_job.side_effect = exception request = CopyJobRequest( task_name_suffix=None, copy_job_type_id=None, source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY") # when with self.assertRaises(HttpError) as context: CopyJobService().run_copy_job_request(request) # then self.assertEqual(context.exception, exception) @patch.object(BigQuery, 'insert_job') @patch.object(TaskCreator, 'create_post_copy_action') def test_that_copy_table_should_create_correct_post_copy_action_if_404_http_error_thrown_on_copy_job_creation( self, create_post_copy_action, insert_job): # given error = HttpError(Mock(status=404), 'not found') error._get_reason = Mock(return_value='not found') insert_job.side_effect = error post_copy_action_request = PostCopyActionRequest( url='/my/url', data={'key1': 'value1'}) request = CopyJobRequest( task_name_suffix='task_name_suffix', copy_job_type_id='test-process', source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY", retry_count=0, post_copy_action_request=post_copy_action_request) # when CopyJobService().run_copy_job_request(request) # then create_post_copy_action.assert_called_once_with( copy_job_type_id='test-process', post_copy_action_request=post_copy_action_request, job_json={ 'status': { 'state': 'DONE', 'errors': [{ 'reason': 'Invalid', 'message': ("404 while creating Copy Job from {} to {}".format( self.example_source_bq_table, self.example_target_bq_table)) }] }, 'configuration': { 'copy': { 'sourceTable': { 'projectId': self.example_source_bq_table.get_project_id(), 'tableId': self.example_source_bq_table.get_table_id(), 'datasetId': self.example_source_bq_table.get_dataset_id() }, 'destinationTable': { 'projectId': self.example_target_bq_table.get_project_id(), 'tableId': self.example_target_bq_table.get_table_id(), 'datasetId': self.example_target_bq_table.get_dataset_id() } } } }) @patch.object(BigQuery, 'insert_job') @patch.object(TaskCreator, 'create_post_copy_action') def test_that_copy_table_should_create_correct_post_copy_action_if_access_denied_http_error_thrown_on_copy_job_creation( self, create_post_copy_action, insert_job): # given http_error_content = "{\"error\": " \ " {\"errors\": [" \ " {\"reason\": \"Access Denied\"," \ " \"message\": \"Access Denied\"," \ " \"location\": \"US\"" \ " }]," \ " \"code\": 403," \ " \"message\": \"Access Denied\"}}" insert_job.side_effect = HttpError(Mock(status=403), http_error_content) post_copy_action_request = PostCopyActionRequest( url='/my/url', data={'key1': 'value1'}) request = CopyJobRequest( task_name_suffix='task_name_suffix', copy_job_type_id='test-process', source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY", retry_count=0, post_copy_action_request=post_copy_action_request) # when CopyJobService().run_copy_job_request(request) # then create_post_copy_action.assert_called_once_with( copy_job_type_id='test-process', post_copy_action_request=post_copy_action_request, job_json={ 'status': { 'state': 'DONE', 'errors': [{ 'reason': 'Invalid', 'message': ("Access Denied while creating Copy Job from {} to {}". format(self.example_source_bq_table, self.example_target_bq_table)) }] }, 'configuration': { 'copy': { 'sourceTable': { 'projectId': self.example_source_bq_table.get_project_id(), 'tableId': self.example_source_bq_table.get_table_id(), 'datasetId': self.example_source_bq_table.get_dataset_id() }, 'destinationTable': { 'projectId': self.example_target_bq_table.get_project_id(), 'tableId': self.example_target_bq_table.get_table_id(), 'datasetId': self.example_target_bq_table.get_dataset_id() } } } }) @patch.object(BigQuery, 'get_job') @patch.object(BigQuery, 'insert_job') @patch.object(TaskCreator, 'create_copy_job_result_check') def test_that_copy_table_will_try_to_wait_if_deadline_exceeded( self, create_copy_job_result_check, insert_job, get_job): # given http_error_content = "{\"error\": " \ " {\"errors\": [" \ " {\"reason\": \"Deadline exceeded\"," \ " \"message\": \"Deadline exceeded\"," \ " \"location\": \"US\"" \ " }]," \ " \"code\": 500," \ " \"message\": \"Deadline exceeded\"}}" successful_job_json = { 'status': { 'state': 'DONE' }, 'jobReference': { 'projectId': self.example_target_bq_table.get_project_id(), 'location': 'EU', 'jobId': 'job123', }, 'configuration': { 'copy': { 'sourceTable': { 'projectId': self.example_source_bq_table.get_project_id(), 'tableId': self.example_source_bq_table.get_table_id(), 'datasetId': self.example_source_bq_table.get_dataset_id() }, 'destinationTable': { 'projectId': self.example_target_bq_table.get_project_id(), 'tableId': self.example_target_bq_table.get_table_id(), 'datasetId': self.example_target_bq_table.get_dataset_id() } } } } insert_job.side_effect = HttpError(Mock(status=500), http_error_content) get_job.return_value = successful_job_json request = CopyJobRequest( task_name_suffix='task_name_suffix', copy_job_type_id='test-process', source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY", retry_count=0, post_copy_action_request=None) # when CopyJobService().run_copy_job_request(request) # then create_copy_job_result_check.assert_called_once_with( ResultCheckRequest( task_name_suffix='task_name_suffix', copy_job_type_id='test-process', job_reference=BigQueryJobReference( project_id=self.example_target_bq_table.get_project_id(), job_id='job123', location='EU'), retry_count=0, post_copy_action_request=None)) @patch( 'src.commons.big_query.big_query_table_metadata.BigQueryTableMetadata') @patch.object(TaskCreator, 'create_copy_job_result_check') @patch.object(CopyJobService, '_create_random_job_id', return_value='random_job_123') @patch.object(BigQuery, 'insert_job', side_effect=[ HttpError(Mock(status=503), 'internal error'), HttpError(Mock(status=409), 'job exists') ]) @patch('time.sleep', side_effect=lambda _: None) def test_bug_regression_job_already_exists_after_internal_error( self, _, insert_job, _create_random_job_id, create_copy_job_result_check, table_metadata): # given post_copy_action_request = \ PostCopyActionRequest(url='/my/url', data={'key1': 'value1'}) table_metadata._BigQueryTableMetadata__get_table_or_partition.return_value.get_location.return_value = 'EU' # when CopyJobService().run_copy_job_request( CopyJobRequest(task_name_suffix='task_name_suffix', copy_job_type_id='test-process', source_big_query_table=self.example_source_bq_table, target_big_query_table=self.example_target_bq_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY", retry_count=0, post_copy_action_request=post_copy_action_request)) # then self.assertEqual(insert_job.call_count, 2) create_copy_job_result_check.assert_called_once_with( ResultCheckRequest( task_name_suffix='task_name_suffix', copy_job_type_id='test-process', job_reference=BigQueryJobReference( project_id='target_project_id_1', job_id='random_job_123', location='EU'), retry_count=0, post_copy_action_request=post_copy_action_request))