def _load_table_for_dml(self, rows, dataset_name, table_name): from google.cloud._testing import _NamedTemporaryFile dataset = Config.CLIENT.dataset(dataset_name) retry_403(dataset.create)() self.to_delete.append(dataset) greeting = bigquery.SchemaField('greeting', 'STRING', mode='NULLABLE') table = dataset.table(table_name, schema=[greeting]) table.create() self.to_delete.insert(0, table) with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as csv_write: writer = csv.writer(csv_write) writer.writerow(('Greeting', )) writer.writerows(rows) with open(temp.name, 'rb') as csv_read: job = table.upload_from_file( csv_read, source_format='CSV', skip_leading_rows=1, create_disposition='CREATE_NEVER', write_disposition='WRITE_EMPTY', ) # Retry until done. retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() self._fetch_single_page(table)
def test_load_table_from_local_file_then_dump_table(self): import csv from google.cloud._testing import _NamedTemporaryFile ROWS = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), ('Wylma Phlyntstone', 29), ('Bhettye Rhubble', 27), ] TABLE_NAME = 'test_table' dataset = Config.CLIENT.dataset( _make_dataset_name('load_local_then_dump')) retry_403(dataset.create)() self.to_delete.append(dataset) full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table = dataset.table(TABLE_NAME, schema=[full_name, age]) table.create() self.to_delete.insert(0, table) with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as csv_write: writer = csv.writer(csv_write) writer.writerow(('Full Name', 'Age')) writer.writerows(ROWS) with open(temp.name, 'rb') as csv_read: job = table.upload_from_file( csv_read, source_format='CSV', skip_leading_rows=1, create_disposition='CREATE_NEVER', write_disposition='WRITE_EMPTY', ) def _job_done(instance): return instance.state.lower() == 'done' # Retry until done. retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() self.assertEqual(job.output_rows, len(ROWS)) rows = self._fetch_single_page(table) by_age = operator.itemgetter(1) self.assertEqual(sorted(rows, key=by_age), sorted(ROWS, key=by_age))
def test_success(self): from google.cloud.environment_vars import CREDENTIALS from google.cloud._testing import _Monkey from google.cloud._testing import _NamedTemporaryFile project_id = 'test-project-id' payload = '{"%s":"%s"}' % ('project_id', project_id) with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as creds_file: creds_file.write(payload) environ = {CREDENTIALS: temp.name} with _Monkey(os, getenv=environ.get): result = self._callFUT() self.assertEqual(result, project_id)
def _write_csv_to_storage(bucket_name, blob_name, header_row, data_rows): import csv from google.cloud._testing import _NamedTemporaryFile from google.cloud.storage import Client as StorageClient storage_client = StorageClient() # In the **very** rare case the bucket name is reserved, this # fails with a ConnectionError. bucket = storage_client.create_bucket(bucket_name) blob = bucket.blob(blob_name) with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as csv_write: writer = csv.writer(csv_write) writer.writerow(header_row) writer.writerows(data_rows) with open(temp.name, 'rb') as csv_read: blob.upload_from_file(csv_read, content_type='text/csv') return bucket, blob
def test_nix_missing_prject_key(self): from google.cloud import _helpers as MUT from google.cloud._testing import _Monkey from google.cloud._testing import _NamedTemporaryFile with _NamedTemporaryFile() as temp: config_value = '[%s]' % (MUT._GCLOUD_CONFIG_SECTION,) with open(temp.name, 'w') as config_file: config_file.write(config_value) def mock_get_path(): return temp.name with _Monkey(os, name='not-nt'): with _Monkey(MUT, _get_nix_config_path=mock_get_path, _USER_ROOT='not-None'): result = self._callFUT() self.assertEqual(result, None)
def test_nix_missing_prject_key(self): from google.cloud import _helpers as MUT from google.cloud._testing import _Monkey from google.cloud._testing import _NamedTemporaryFile with _NamedTemporaryFile() as temp: config_value = '[%s]' % (MUT._GCLOUD_CONFIG_SECTION, ) with open(temp.name, 'w') as config_file: config_file.write(config_value) def mock_get_path(): return temp.name with _Monkey(os, name='not-nt'): with _Monkey(MUT, _get_nix_config_path=mock_get_path, _USER_ROOT='not-None'): result = self._callFUT() self.assertEqual(result, None)
def test_windows(self): from google.cloud import _helpers as MUT from google.cloud._testing import _Monkey from google.cloud._testing import _NamedTemporaryFile project_id = 'test-project-id' with _NamedTemporaryFile() as temp: config_value = self.CONFIG_TEMPLATE % ( MUT._GCLOUD_CONFIG_SECTION, MUT._GCLOUD_CONFIG_KEY, project_id) with open(temp.name, 'w') as config_file: config_file.write(config_value) def mock_get_path(): return temp.name with _Monkey(os, name='nt'): with _Monkey(MUT, _get_windows_config_path=mock_get_path, _USER_ROOT=None): result = self._callFUT() self.assertEqual(result, project_id)
def test_load_table_from_storage_then_dump_table(self): import csv from google.cloud._testing import _NamedTemporaryFile from google.cloud.storage import Client as StorageClient local_id = unique_resource_id() BUCKET_NAME = 'bq_load_test' + local_id BLOB_NAME = 'person_ages.csv' GS_URL = 'gs://%s/%s' % (BUCKET_NAME, BLOB_NAME) ROWS = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), ('Wylma Phlyntstone', 29), ('Bhettye Rhubble', 27), ] TABLE_NAME = 'test_table' s_client = StorageClient() # In the **very** rare case the bucket name is reserved, this # fails with a ConnectionError. bucket = s_client.create_bucket(BUCKET_NAME) self.to_delete.append(bucket) blob = bucket.blob(BLOB_NAME) with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as csv_write: writer = csv.writer(csv_write) writer.writerow(('Full Name', 'Age')) writer.writerows(ROWS) with open(temp.name, 'rb') as csv_read: blob.upload_from_file(csv_read, content_type='text/csv') self.to_delete.insert(0, blob) dataset = Config.CLIENT.dataset( _make_dataset_name('load_gcs_then_dump')) retry_403(dataset.create)() self.to_delete.append(dataset) full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table = dataset.table(TABLE_NAME, schema=[full_name, age]) table.create() self.to_delete.insert(0, table) job = Config.CLIENT.load_table_from_storage( 'bq_load_storage_test_' + local_id, table, GS_URL) job.create_disposition = 'CREATE_NEVER' job.skip_leading_rows = 1 job.source_format = 'CSV' job.write_disposition = 'WRITE_EMPTY' job.begin() def _job_done(instance): return instance.state in ('DONE', 'done') # Allow for 90 seconds of "warm up" before rows visible. See: # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() rows = self._fetch_single_page(table) by_age = operator.itemgetter(1) self.assertEqual(sorted(rows, key=by_age), sorted(ROWS, key=by_age))
def test_load_table_from_storage_w_autodetect_schema(self): from google.cloud._testing import _NamedTemporaryFile from google.cloud.storage import Client as StorageClient from google.cloud.bigquery import SchemaField local_id = unique_resource_id() bucket_name = 'bq_load_test' + local_id blob_name = 'person_ages.csv' gs_url = 'gs://{}/{}'.format(bucket_name, blob_name) rows = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), ('Wylma Phlyntstone', 29), ('Bhettye Rhubble', 27), ] * 100 # BigQuery internally uses the first 100 rows to detect schema table_name = 'test_table' storage_client = StorageClient() # In the **very** rare case the bucket name is reserved, this # fails with a ConnectionError. bucket = storage_client.create_bucket(bucket_name) self.to_delete.append(bucket) blob = bucket.blob(blob_name) with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as csv_write: writer = csv.writer(csv_write) writer.writerow(('Full Name', 'Age')) writer.writerows(rows) with open(temp.name, 'rb') as csv_read: blob.upload_from_file(csv_read, content_type='text/csv') self.to_delete.insert(0, blob) dataset = Config.CLIENT.dataset( _make_dataset_name('load_gcs_then_dump')) retry_403(dataset.create)() self.to_delete.append(dataset) table = dataset.table(table_name) self.to_delete.insert(0, table) job = Config.CLIENT.load_table_from_storage( 'bq_load_storage_test_' + local_id, table, gs_url) job.autodetect = True job.begin() # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() table.reload() field_name = SchemaField(u'Full_Name', u'string', u'NULLABLE', None, ()) field_age = SchemaField(u'Age', u'integer', u'NULLABLE', None, ()) self.assertEqual(table.schema, [field_name, field_age]) actual_rows = self._fetch_single_page(table) by_age = operator.itemgetter(1) self.assertEqual(sorted(actual_rows, key=by_age), sorted(rows, key=by_age))