def test_get_datasets_list(self): expected_result = {'datasets': [ { "kind": "bigquery#dataset", "location": "US", "id": "your-project:dataset_2_test", "datasetReference": { "projectId": "your-project", "datasetId": "dataset_2_test" } }, { "kind": "bigquery#dataset", "location": "US", "id": "your-project:dataset_1_test", "datasetReference": { "projectId": "your-project", "datasetId": "dataset_1_test" } } ]} project_id = "project_test"'' mocked = mock.Mock() with mock.patch.object(hook.BigQueryBaseCursor(mocked, project_id).service, 'datasets') as MockService: MockService.return_value.list( projectId=project_id).execute.return_value = expected_result result = hook.BigQueryBaseCursor( mocked, "test_create_empty_dataset").get_datasets_list( project_id=project_id) self.assertEqual(result, expected_result['datasets'])
def test_insert_all_succeed(self): project_id = 'bq-project' dataset_id = 'bq_dataset' table_id = 'bq_table' rows = [ {"json": {"a_key": "a_value_0"}} ] body = { "rows": rows, "ignoreUnknownValues": False, "kind": "bigquery#tableDataInsertAllRequest", "skipInvalidRows": False, } mock_service = mock.Mock() method = mock_service.tabledata.return_value.insertAll method.return_value.execute.return_value = { "kind": "bigquery#tableDataInsertAllResponse" } cursor = hook.BigQueryBaseCursor(mock_service, 'project_id') cursor.insert_all(project_id, dataset_id, table_id, rows) method.assert_called_once_with( projectId=project_id, datasetId=dataset_id, tableId=table_id, body=body )
def test_patch_dataset(self): dataset_resource = { "access": [ { "role": "WRITER", "groupByEmail": "*****@*****.**" } ] } dataset_id = "test_dataset" project_id = "project_test" mock_service = mock.Mock() method = (mock_service.datasets.return_value.patch) cursor = hook.BigQueryBaseCursor(mock_service, project_id) cursor.patch_dataset( dataset_id=dataset_id, project_id=project_id, dataset_resource=dataset_resource ) method.assert_called_once_with( projectId=project_id, datasetId=dataset_id, body=dataset_resource )
def test_update_dataset(self): dataset_resource = { "kind": "bigquery#dataset", "location": "US", "id": "your-project:dataset_2_test", "datasetReference": { "projectId": "your-project", "datasetId": "dataset_2_test" } } dataset_id = "test_dataset" project_id = "project_test" mock_service = mock.Mock() method = (mock_service.datasets.return_value.update) cursor = hook.BigQueryBaseCursor(mock_service, project_id) cursor.update_dataset( dataset_id=dataset_id, project_id=project_id, dataset_resource=dataset_resource ) method.assert_called_once_with( projectId=project_id, datasetId=dataset_id, body=dataset_resource )
def test_patch_table_with_kms(self): project_id = 'bq-project' dataset_id = 'bq_dataset' table_id = 'bq_table' encryption_configuration = { "kms_key_name": "projects/p/locations/l/keyRings/k/cryptoKeys/c" } mock_service = mock.Mock() method = (mock_service.tables.return_value.patch) cursor = hook.BigQueryBaseCursor(mock_service, project_id) cursor.patch_table( dataset_id=dataset_id, table_id=table_id, project_id=project_id, encryption_configuration=encryption_configuration ) body = { "encryptionConfiguration": encryption_configuration } method.assert_called_once_with( projectId=project_id, datasetId=dataset_id, tableId=table_id, body=body )
def test_create_empty_table_with_kms(self): project_id = "bq-project" dataset_id = "bq_dataset" table_id = "bq_table" schema_fields = [ {"name": "id", "type": "STRING", "mode": "REQUIRED"} ] encryption_configuration = { "kms_key_name": "projects/p/locations/l/keyRings/k/cryptoKeys/c" } mock_service = mock.Mock() method = mock_service.tables.return_value.insert cursor = hook.BigQueryBaseCursor(mock_service, project_id) cursor.create_empty_table( project_id=project_id, dataset_id=dataset_id, table_id=table_id, schema_fields=schema_fields, encryption_configuration=encryption_configuration, ) body = { "tableReference": {"tableId": table_id}, "schema": {"fields": schema_fields}, "encryptionConfiguration": encryption_configuration, } method.assert_called_once_with( projectId=project_id, datasetId=dataset_id, body=body )
def test_run_with_configuration_location(self): project_id = 'bq-project' running_job_id = 'job_vjdi28vskdui2onru23' location = 'asia-east1' mock_service = mock.Mock() method = (mock_service.jobs.return_value.get) mock_service.jobs.return_value.insert.return_value.execute.return_value = { 'jobReference': { 'jobId': running_job_id, 'location': location } } mock_service.jobs.return_value.get.return_value.execute.return_value = { 'status': { 'state': 'DONE' } } cursor = hook.BigQueryBaseCursor(mock_service, project_id) cursor.running_job_id = running_job_id cursor.run_with_configuration({}) method.assert_called_once_with( projectId=project_id, jobId=running_job_id, location=location )
def test_api_resource_configs_duplication_warning(self): with self.assertRaises(ValueError): cursor = hook.BigQueryBaseCursor(mock.Mock(), "project_id") cursor.run_query('query', use_legacy_sql=True, api_resource_configs={ 'query': {'useLegacySql': False}})
def test_api_resource_configs(self, run_with_config): for bool_val in [True, False]: cursor = hook.BigQueryBaseCursor(mock.Mock(), "project_id") cursor.run_query('query', api_resource_configs={ 'query': {'useQueryCache': bool_val}}) args, kwargs = run_with_config.call_args self.assertIs(args[0]['query']['useQueryCache'], bool_val) self.assertIs(args[0]['query']['useLegacySql'], True)
def test_run_query_sql_dialect_legacy_with_query_params_fails(self, mock_run_with_configuration): cursor = hook.BigQueryBaseCursor(mock.Mock(), "project_id") params = [{ 'name': "param_name", 'parameterType': {'type': "STRING"}, 'parameterValue': {'value': "param_value"} }] with self.assertRaises(ValueError): cursor.run_query('query', use_legacy_sql=True, query_params=params)
def test_invalid_schema_update_options(self): with self.assertRaises(Exception) as context: hook.BigQueryBaseCursor("test", "test").run_load( "test.test", "test_schema.json", ["test_data.json"], schema_update_options=["THIS IS NOT VALID"] ) self.assertIn("THIS IS NOT VALID", str(context.exception))
def test_create_empty_dataset_duplicates_call_err(self): with self.assertRaises(ValueError): hook.BigQueryBaseCursor( mock.Mock(), "test_create_empty_dataset").create_empty_dataset( dataset_id="", project_id="project_test", dataset_reference={ "datasetReference": {"datasetId": "test_dataset", "projectId": "project_test2"}})
def test_invalid_source_format(self): with self.assertRaises(Exception) as context: hook.BigQueryBaseCursor("test", "test").run_load( "test.test", "test_schema.json", ["test_data.json"], source_format="json" ) # since we passed 'json' in, and it's not valid, make sure it's present in the # error string. self.assertIn("JSON", str(context.exception))
def test_run_query_sql_dialect_legacy_with_query_params(self, run_with_config): cursor = hook.BigQueryBaseCursor(mock.Mock(), "project_id") params = [{ 'name': "param_name", 'parameterType': {'type': "STRING"}, 'parameterValue': {'value': "param_value"} }] cursor.run_query('query', use_legacy_sql=False, query_params=params) args, kwargs = run_with_config.call_args self.assertIs(args[0]['query']['useLegacySql'], False)
def test_invalid_schema_update_and_write_disposition(self): with self.assertRaises(Exception) as context: hook.BigQueryBaseCursor("test", "test").run_load( "test.test", "test_schema.json", ["test_data.json"], schema_update_options=['ALLOW_FIELD_ADDITION'], write_disposition='WRITE_EMPTY' ) self.assertIn("schema_update_options is only", str(context.exception))
def test_run_query_default(self, mocked_rwc): project_id = 12345 def run_with_config(config): self.assertIsNone(config['query'].get('timePartitioning')) mocked_rwc.side_effect = run_with_config bq_hook = hook.BigQueryBaseCursor(mock.Mock(), project_id) bq_hook.run_query(sql='select 1') assert mocked_rwc.call_count == 1
def test_location_propagates_properly(self, run_with_config): with mock.patch.object(hook.BigQueryHook, 'get_service'): bq_hook = hook.BigQueryHook(location=None) self.assertIsNone(bq_hook.location) bq_cursor = hook.BigQueryBaseCursor(mock.Mock(), 'test-project', location=None) self.assertIsNone(bq_cursor.location) bq_cursor.run_query(sql='select 1', location='US') assert run_with_config.call_count == 1 self.assertEqual(bq_cursor.location, 'US')
def test_delete_dataset(self): project_id = 'bq-project' dataset_id = 'bq_dataset' delete_contents = True mock_service = mock.Mock() method = mock_service.datasets.return_value.delete cursor = hook.BigQueryBaseCursor(mock_service, project_id) cursor.delete_dataset(project_id, dataset_id, delete_contents) method.assert_called_once_with(projectId=project_id, datasetId=dataset_id, deleteContents=delete_contents)
def test_create_empty_table_on_exception(self): project_id = 'bq-project' dataset_id = 'bq_dataset' table_id = 'bq_table' mock_service = mock.Mock() method = mock_service.tables.return_value.insert method.return_value.execute.side_effect = HttpError( resp={'status': '400'}, content=b'Bad request') cursor = hook.BigQueryBaseCursor(mock_service, project_id) with self.assertRaises(Exception): cursor.create_empty_table(project_id, dataset_id, table_id)
def test_invalid_source_format(self): with self.assertRaises(Exception) as context: hook.BigQueryBaseCursor("test", "test").create_external_table( external_project_dataset_table='test.test', schema_fields='test_schema.json', source_uris=['test_data.json'], source_format='json' ) # since we passed 'csv' in, and it's not valid, make sure it's present in the # error string. self.assertIn("JSON", str(context.exception))
def test_run_query_with_kms(self, run_with_config): encryption_configuration = { "kms_key_name": "projects/p/locations/l/keyRings/k/cryptoKeys/c" } cursor = hook.BigQueryBaseCursor(mock.Mock(), "project_id") cursor.run_query( sql='query', encryption_configuration=encryption_configuration ) args, kwargs = run_with_config.call_args self.assertIs( args[0]['query']['destinationEncryptionConfiguration'], encryption_configuration )
def test_patch_table(self): project_id = 'bq-project' dataset_id = 'bq_dataset' table_id = 'bq_table' description_patched = 'Test description.' expiration_time_patched = 2524608000000 friendly_name_patched = 'Test friendly name.' labels_patched = {'label1': 'test1', 'label2': 'test2'} schema_patched = [ {'name': 'id', 'type': 'STRING', 'mode': 'REQUIRED'}, {'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'balance', 'type': 'FLOAT', 'mode': 'NULLABLE'}, {'name': 'new_field', 'type': 'STRING', 'mode': 'NULLABLE'} ] time_partitioning_patched = { 'expirationMs': 10000000 } require_partition_filter_patched = True mock_service = mock.Mock() method = (mock_service.tables.return_value.patch) cursor = hook.BigQueryBaseCursor(mock_service, project_id) cursor.patch_table( dataset_id, table_id, project_id, description=description_patched, expiration_time=expiration_time_patched, friendly_name=friendly_name_patched, labels=labels_patched, schema=schema_patched, time_partitioning=time_partitioning_patched, require_partition_filter=require_partition_filter_patched ) body = { "description": description_patched, "expirationTime": expiration_time_patched, "friendlyName": friendly_name_patched, "labels": labels_patched, "schema": { "fields": schema_patched }, "timePartitioning": time_partitioning_patched, "requirePartitionFilter": require_partition_filter_patched } method.assert_called_once_with( projectId=project_id, datasetId=dataset_id, tableId=table_id, body=body )
def test_run_load_default(self, mocked_rwc): project_id = 12345 def run_with_config(config): self.assertIsNone(config['load'].get('timePartitioning')) mocked_rwc.side_effect = run_with_config bq_hook = hook.BigQueryBaseCursor(mock.Mock(), project_id) bq_hook.run_load( destination_project_dataset_table='my_dataset.my_table', schema_fields=[], source_uris=[], ) assert mocked_rwc.call_count == 1
def test_run_copy_with_kms(self, run_with_config): encryption_configuration = { "kms_key_name": "projects/p/locations/l/keyRings/k/cryptoKeys/c" } cursor = hook.BigQueryBaseCursor(mock.Mock(), "project_id") cursor.run_copy( source_project_dataset_tables='p.d.st', destination_project_dataset_table='p.d.dt', encryption_configuration=encryption_configuration ) args, kwargs = run_with_config.call_args self.assertIs( args[0]['copy']['destinationEncryptionConfiguration'], encryption_configuration )
def test_run_load_with_kms(self, run_with_config): encryption_configuration = { "kms_key_name": "projects/p/locations/l/keyRings/k/cryptoKeys/c" } cursor = hook.BigQueryBaseCursor(mock.Mock(), "project_id") cursor.run_load( destination_project_dataset_table='p.d.dt', source_uris=['abc.csv'], autodetect=True, encryption_configuration=encryption_configuration ) args, kwargs = run_with_config.call_args self.assertIs( args[0]['load']['destinationEncryptionConfiguration'], encryption_configuration )
def test_cancel_queries(self): project_id = 12345 running_job_id = 3 mock_jobs = mock.Mock() mock_jobs.cancel = mock.Mock(side_effect=mock_job_cancel) mock_service = mock.Mock() mock_service.jobs = mock.Mock(return_value=mock_jobs) bq_hook = hook.BigQueryBaseCursor(mock_service, project_id) bq_hook.running_job_id = running_job_id bq_hook.poll_job_complete = mock.Mock(side_effect=mock_poll_job_complete) bq_hook.cancel_query() mock_jobs.cancel.assert_called_once_with(projectId=project_id, jobId=running_job_id)
def test_run_query_with_arg(self, mocked_rwc): project_id = 12345 def run_with_config(config): self.assertEqual( config['labels'], {'label1': 'test1', 'label2': 'test2'} ) mocked_rwc.side_effect = run_with_config bq_hook = hook.BigQueryBaseCursor(mock.Mock(), project_id) bq_hook.run_query( sql='select 1', destination_dataset_table='my_dataset.my_table', labels={'label1': 'test1', 'label2': 'test2'} ) assert mocked_rwc.call_count == 1
def test_create_view_fails_on_exception(self): project_id = 'bq-project' dataset_id = 'bq_dataset' table_id = 'bq_table_view' view = { 'incorrect_key': 'SELECT * FROM `test-project-id.test_dataset_id.test_table_prefix*`', "useLegacySql": False } mock_service = mock.Mock() method = mock_service.tables.return_value.insert method.return_value.execute.side_effect = HttpError( resp={'status': '400'}, content=b'Query is required for views') cursor = hook.BigQueryBaseCursor(mock_service, project_id) with self.assertRaises(Exception): cursor.create_empty_table(project_id, dataset_id, table_id, view=view)
def test_create_empty_dataset_with_location(self): project_id = 'bq-project' dataset_id = 'bq_dataset' location = 'EU' mock_service = mock.Mock() method = mock_service.datasets.return_value.insert cursor = hook.BigQueryBaseCursor(mock_service, project_id) cursor.create_empty_dataset(project_id=project_id, dataset_id=dataset_id, location=location) expected_body = { "location": "EU", "datasetReference": { "datasetId": "bq_dataset", "projectId": "bq-project" } } method.assert_called_once_with(projectId=project_id, body=expected_body)
def test_create_empty_table_with_extras_succeed(self): project_id = 'bq-project' dataset_id = 'bq_dataset' table_id = 'bq_table' schema_fields = [ {'name': 'id', 'type': 'STRING', 'mode': 'REQUIRED'}, {'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'created', 'type': 'DATE', 'mode': 'REQUIRED'}, ] time_partitioning = {"field": "created", "type": "DAY"} cluster_fields = ['name'] mock_service = mock.Mock() method = mock_service.tables.return_value.insert cursor = hook.BigQueryBaseCursor(mock_service, project_id) cursor.create_empty_table( project_id=project_id, dataset_id=dataset_id, table_id=table_id, schema_fields=schema_fields, time_partitioning=time_partitioning, cluster_fields=cluster_fields ) body = { 'tableReference': { 'tableId': table_id }, 'schema': { 'fields': schema_fields }, 'timePartitioning': time_partitioning, 'clustering': { 'fields': cluster_fields } } method.assert_called_once_with( projectId=project_id, datasetId=dataset_id, body=body )