def test_copy_job_creation(self, _): # given copy_job_request = CopyJobRequest( task_name_suffix='task-name-suffix', copy_job_type_id="backups", source_big_query_table=BigQueryTable('source_project', 'source_dataset', 'source_table'), target_big_query_table=BigQueryTable('target_project', 'target_dataset', 'target_table'), create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY", post_copy_action_request=PostCopyActionRequest(url="/my/url", data={ "key1": "value1"}) ) # when TaskCreator.create_copy_job( copy_job_request=copy_job_request ) # then expected_queue_name = 'backups-copy-job' executed_tasks = self.taskqueue_stub.get_filtered_tasks( queue_names=expected_queue_name ) self.assertEqual(len(executed_tasks), 1, "Should create one task in queue") executed_task = executed_tasks[0] self.assertEqual(json.dumps(copy_job_request, cls=RequestEncoder), executed_task.extract_params()['copyJobRequest']) self.assertEqual('POST', executed_task.method) self.assertEqual('task_name', executed_task.name) self.assertEqual(executed_task.url, '/tasks/copy_job_async/copy_job')
def setUp(self): self.testbed = testbed.Testbed() self.testbed.activate() ndb.get_context().clear_cache() patch('googleapiclient.discovery.build').start() patch( 'oauth2client.client.GoogleCredentials.get_application_default') \ .start() self._create_http = patch.object(BigQuery, '_create_http').start() self.example_source_bq_table = BigQueryTable('source_project_id_1', 'source_dataset_id_1', 'source_table_id_1') self.example_target_bq_table = BigQueryTable('target_project_id_1', 'target_dataset_id_1', 'target_table_id_1')
def test_should_disable_partition_expiration_if_backup_table_has_it( self, disable_partition_expiration, _, _1, _2, _3, _4, _5): # given table_entity = Table(project_id="source_project_id", dataset_id="source_dataset_id", table_id="source_table_id", partition_id="123") table_entity.put() source_bq_table = TableReference.from_table_entity( table_entity).create_big_query_table() destination_bq_table = BigQueryTable("target_project_id", "target_dataset_id", "target_table_id") data = { "sourceBqTable": source_bq_table, "targetBqTable": destination_bq_table } payload = json.dumps({ "data": data, "jobJson": JobResultExample.DONE }, cls=RequestEncoder) # when response = self.under_test.post( '/callback/backup-created/project/dataset/table', params=payload) # then self.assertEqual(response.status_int, 200) disable_partition_expiration.assert_called_once()
def __init__(self, project_id, dataset_id, table_id): self.big_query_table = BigQueryTable(project_id, dataset_id, table_id) self.service = googleapiclient.discovery.build( 'bigquery', 'v2', credentials=self._create_credentials(), http=self._create_http() )
def test_source_table_reference(self): # given copy_job_result = CopyJobResult(JobResultExample.DONE) # when source_bq_table = copy_job_result.source_bq_table # then self.assertEqual( source_bq_table, BigQueryTable(project_id="source_project_id", dataset_id="source_dataset_id", table_id="source_table_id$123"))
def test_target_table_reference(self): # given copy_job_result = CopyJobResult(JobResultExample.DONE) # when target_bq_table = copy_job_result.target_bq_table # then self.assertEqual( target_bq_table, BigQueryTable(project_id="target_project_id", dataset_id="target_dataset_id", table_id="target_table_id"))
def test_that_async_copy_job_is_called_with_correct_parameters_when_creating_new_backup( # pylint: disable=C0301 self, async_copy): # given table_to_backup = Table(project_id="src_project", dataset_id="src_dataset", table_id="src_table", partition_id="20180416") source_bq_table = BigQueryTable("src_project", "src_dataset", "src_table$20180416") destination_bq_table = BigQueryTable( "bkup_storage_project", "2018_16_US_src_project", "20180416_000000_src_project_src_dataset_src_table_partition_20180416" ) # pylint: disable=C0301 under_test = BackupCreator(datetime.datetime.utcnow()) # when under_test.create_backup(table_to_backup, BigQueryTableMetadata({})) # then async_copy.assert_called_with(source_bq_table, destination_bq_table)
def test_happy_path(self, copy_table_mock): # given source_big_query_table = BigQueryTable("source_project_id", "source_dataset_id", "source_table_id") target_big_query_table = BigQueryTable("target_project_id", "target_dataset_id", "target_table_id") post_copy_action_request = PostCopyActionRequest( url="/my/url", data={"key1": "value1"}) url = '/tasks/copy_job_async/copy_job' # when self.under_test.post( url=url, params={ "copyJobRequest": json.dumps(CopyJobRequest( task_name_suffix=None, copy_job_type_id=None, source_big_query_table=source_big_query_table, target_big_query_table=target_big_query_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY", retry_count=0, post_copy_action_request=post_copy_action_request), cls=RequestEncoder) }) # then copy_table_mock.assert_called_with( CopyJobRequest(task_name_suffix=None, copy_job_type_id=None, source_big_query_table=source_big_query_table, target_big_query_table=target_big_query_table, create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY", retry_count=0, post_copy_action_request=post_copy_action_request))
def create_backup(self, source_table_entity, bq_table_metadata): logging.info( 'Scheduling copy job for backup, request correlation id:' ' %s', request_correlation_id.get()) target_project_id = configuration.backup_project_id target_dataset_id = DatasetIdCreator.create( self.now, bq_table_metadata.get_location(), source_table_entity.project_id) target_table_id = self.__create_table_id(source_table_entity) source_table_id_with_partition_id = BigQueryTableMetadata\ .get_table_id_with_partition_id(source_table_entity.table_id, source_table_entity.partition_id) source_bq_table = BigQueryTable(source_table_entity.project_id, source_table_entity.dataset_id, source_table_id_with_partition_id) destination_bq_table = BigQueryTable(target_project_id, target_dataset_id, target_table_id) self.__copy_table_async(source_bq_table, destination_bq_table)
def test_return_none_if_calculated_name_is_too_long(self): # given task_name_suffix = "x" * 501 copy_job_request = CopyJobRequest( task_name_suffix=task_name_suffix, copy_job_type_id="unknown-copying", source_big_query_table=BigQueryTable('source_project', 'source_dataset', 'source_table'), target_big_query_table=BigQueryTable('target_project', 'target_dataset', 'target_table'), create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY", retry_count=0) # when copy_job_task_name = CopyJobTaskName(copy_job_request).create() # then self.assertIsNone(copy_job_task_name)
def test_copy_job_creation_throws_error_on_unknown_queue(self, _): # when with self.assertRaises(UnknownQueueError) as error: TaskCreator.create_copy_job( copy_job_request=CopyJobRequest( task_name_suffix=None, copy_job_type_id="unknown-copying", source_big_query_table=BigQueryTable('source_project', 'source_dataset', 'source_table'), target_big_query_table=BigQueryTable('target_project', 'target_dataset', 'target_table'), create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY" ) ) self.assertEqual( error.exception.message, "There is no queue " "'unknown-copying-copy-job'. Please add " "it to your queue.yaml definition.")
def test_creating_task_name(self): # given copy_job_request = CopyJobRequest( task_name_suffix='task_name_suffix', copy_job_type_id="unknown-copying", source_big_query_table=BigQueryTable('source_project', 'source_dataset', 'source_table'), target_big_query_table=BigQueryTable('target_project', 'target_dataset', 'target_table'), create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_EMPTY", retry_count=0) # when copy_job_task_name = CopyJobTaskName(copy_job_request).create() # then self.assertEqual( copy_job_task_name, '2017-12-06_source_project_source_dataset_source_table_0_task_name_suffix' )
def test_should_create_datastore_backup_entity(self, _create_http, _): # given _create_http.return_value = HttpMockSequence([ ({ 'status': '200' }, content('tests/json_samples/bigquery_v2_test_schema.json')), ({ 'status': '200' }, content('tests/json_samples/table_get/' 'bigquery_partitioned_table_get.json')) ]) table_entity = Table(project_id="source_project_id", dataset_id="source_dataset_id", table_id="source_table_id", partition_id="123") table_entity.put() source_bq_table = TableReference.from_table_entity( table_entity).create_big_query_table() destination_bq_table = BigQueryTable("target_project_id", "target_dataset_id", "target_table_id") data = { "sourceBqTable": source_bq_table, "targetBqTable": destination_bq_table } payload = json.dumps({ "data": data, "jobJson": JobResultExample.DONE }, cls=RequestEncoder) copy_job_result = CopyJobResult(json.loads(payload).get('jobJson')) # when response = self.under_test.post( '/callback/backup-created/project/dataset/table', params=payload) backup = table_entity.last_backup # then self.assertEqual(response.status_int, 200) self.assertEqual(backup.dataset_id, "target_dataset_id") self.assertEqual(backup.table_id, "target_table_id") self.assertTrue(isinstance(backup.created, datetime)) self.assertEqual(backup.created, copy_job_result.end_time) self.assertTrue(isinstance(backup.last_modified, datetime)) self.assertEqual(backup.last_modified, copy_job_result.start_time)
def create_table(self, projectId, datasetId, body): table = BigQueryTable(projectId, datasetId, body.get("tableReference").get("tableId")) logging.info("Creating table %s", table) logging.info("BODY: %s", json.dumps(body)) try: self.service.tables().insert(projectId=projectId, datasetId=datasetId, body=body).execute() except HttpError as error: if error.resp.status == 409: logging.info('Table already exists %s', table) else: raise
def test_should_not_create_backups_entity_if_backup_table_doesnt_exist( self, _create_http, error_reporting, _): # given _create_http.return_value = HttpMockSequence([ ({ 'status': '200' }, content('tests/json_samples/bigquery_v2_test_schema.json')), ( { 'status': '404' }, # Table not found content('tests/json_samples/table_get/' 'bigquery_partitioned_table_get.json')) ]) table_entity = Table(project_id="source_project_id", dataset_id="source_dataset_id", table_id="source_table_id", partition_id="123") table_entity.put() source_bq_table = TableReference.from_table_entity( table_entity).create_big_query_table() destination_bq_table = BigQueryTable("target_project_id", "target_dataset_id", "target_table_id") data = { "sourceBqTable": source_bq_table, "targetBqTable": destination_bq_table } payload = json.dumps({ "data": data, "jobJson": JobResultExample.DONE }, cls=RequestEncoder) # when response = self.under_test.post( '/callback/backup-created/project/dataset/table', params=payload) backup = table_entity.last_backup # then self.assertEqual(response.status_int, 200) self.assertIsNone(backup) error_reporting.assert_called_once()
def get_table(self, project_id, dataset_id, table_id, log_table=True): logging.info("getting table %s", BigQueryTable(project_id, dataset_id, table_id)) try: table = self.service.tables().get( projectId=project_id, datasetId=dataset_id, tableId=table_id).execute(num_retries=3) if log_table and table: self.__log_table(table) return table except HttpError as ex: if ex.resp.status == 404: logging.info("Table '%s' Not Found", TableReference(project_id, dataset_id, table_id)) return None raise ex
def target_bq_table(self): return BigQueryTable(self.target_project_id, self.target_dataset_id, self.target_table_id)
def source_bq_table(self): return BigQueryTable(self.source_project_id, self.source_dataset_id, self.source_table_id)
def create_example_target_bq_table(self): return BigQueryTable("target_project_id_1", "target_dataset_id_1", "target_table_id_1")
def create_example_source_bq_table(self): return BigQueryTable("source_project_id_1", "source_dataset_id_1", "source_table_id_1")
def create_big_query_table(self): return BigQueryTable(self.project_id, self.dataset_id, self.get_table_id_with_partition_id())