def test_temporary_dataset_is_unique(self, patched_time_sleep): client = mock.Mock() client.datasets.Get.return_value = bigquery.Dataset( datasetReference=bigquery.DatasetReference(projectId='project_id', datasetId='dataset_id')) wrapper = beam.io.gcp.bigquery_tools.BigQueryWrapper(client) with self.assertRaises(RuntimeError): wrapper.create_temporary_dataset('project_id', 'location') self.assertTrue(client.datasets.Get.called)
def test_get_or_create_dataset_created(self): client = mock.Mock() client.datasets.Get.side_effect = HttpError( response={'status': '404'}, url='', content='') client.datasets.Insert.return_value = bigquery.Dataset( datasetReference=bigquery.DatasetReference( projectId='project_id', datasetId='dataset_id')) wrapper = beam.io.gcp.bigquery_tools.BigQueryWrapper(client) new_dataset = wrapper.get_or_create_dataset('project_id', 'dataset_id') self.assertEqual(new_dataset.datasetReference.datasetId, 'dataset_id')
def get_or_create_dataset(self, project_id, dataset_id): # Check if dataset already exists otherwise create it try: dataset = self.client.datasets.Get(bigquery.BigqueryDatasetsGetRequest( projectId=project_id, datasetId=dataset_id)) return dataset except HttpError as exn: if exn.status_code == 404: dataset = bigquery.Dataset( datasetReference=bigquery.DatasetReference( projectId=project_id, datasetId=dataset_id)) request = bigquery.BigqueryDatasetsInsertRequest( projectId=project_id, dataset=dataset) response = self.client.datasets.Insert(request) # The response is a bigquery.Dataset instance. return response else: raise
def test_raise_error_if_dataset_not_exists(self): client = mock.Mock() client.datasets.Get.return_value = bigquery.Dataset( datasetReference=bigquery.DatasetReference(projectId='project', datasetId='dataset')) bigquery_util.raise_error_if_dataset_not_exists( client, 'project', 'dataset') client.datasets.Get.side_effect = exceptions.HttpError( response={'status': '404'}, url='', content='') self.assertRaises(ValueError, bigquery_util.raise_error_if_dataset_not_exists, client, 'project', 'dataset') client.datasets.Get.side_effect = exceptions.HttpError( response={'status': '401'}, url='', content='') self.assertRaises(exceptions.HttpError, bigquery_util.raise_error_if_dataset_not_exists, client, 'project', 'dataset')