def test_list_tables(self): dataset = Config.CLIENT.dataset(DATASET_NAME) self.assertFalse(dataset.exists()) dataset.create() self.to_delete.append(dataset) # Retrieve tables before any are created for the dataset. all_tables, token = dataset.list_tables() self.assertEqual(all_tables, []) self.assertEqual(token, None) # Insert some tables to be listed. tables_to_create = [ 'new%d' % (1000 * time.time(), ), 'newer%d' % (1000 * time.time(), ), 'newest%d' % (1000 * time.time(), ), ] full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') for table_name in tables_to_create: table = dataset.table(table_name, schema=[full_name, age]) table.create() self.to_delete.insert(0, table) # Retrieve the tables. all_tables, token = dataset.list_tables() self.assertTrue(token is None) created = [ table for table in all_tables if (table.name in tables_to_create and table.dataset_name == DATASET_NAME) ] self.assertEqual(len(created), len(tables_to_create))
def test_update_table(self): dataset = Config.CLIENT.dataset(DATASET_NAME) self.assertFalse(dataset.exists()) dataset.create() self.to_delete.append(dataset) TABLE_NAME = 'test_table' full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table = dataset.table(TABLE_NAME, schema=[full_name, age]) self.assertFalse(table.exists()) table.create() self.to_delete.insert(0, table) self.assertTrue(table.exists()) voter = bigquery.SchemaField('voter', 'BOOLEAN', mode='NULLABLE') schema = table.schema schema.append(voter) table.schema = schema table.update() self.assertEqual(len(table.schema), len(schema)) for found, expected in zip(table.schema, schema): self.assertEqual(found.name, expected.name) self.assertEqual(found.field_type, expected.field_type) self.assertEqual(found.mode, expected.mode)
def test_update_table(self): dataset = Config.CLIENT.dataset(DATASET_NAME) self.assertFalse(dataset.exists()) # We need to wait to stay within the rate limits. # The alternative outcome is a 403 Forbidden response from upstream. # See: https://cloud.google.com/bigquery/quota-policy @Retry(Forbidden, tries=2, delay=30) def create_dataset(): dataset.create() create_dataset() self.to_delete.append(dataset) TABLE_NAME = 'test_table' full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table = dataset.table(TABLE_NAME, schema=[full_name, age]) self.assertFalse(table.exists()) table.create() self.to_delete.insert(0, table) self.assertTrue(table.exists()) voter = bigquery.SchemaField('voter', 'BOOLEAN', mode='NULLABLE') schema = table.schema schema.append(voter) table.schema = schema table.update() self.assertEqual(len(table.schema), len(schema)) for found, expected in zip(table.schema, schema): self.assertEqual(found.name, expected.name) self.assertEqual(found.field_type, expected.field_type) self.assertEqual(found.mode, expected.mode)
def test_create_table(self): dataset = Config.CLIENT.dataset(DATASET_NAME) self.assertFalse(dataset.exists()) dataset.create() self.to_delete.append(dataset) TABLE_NAME = 'test_table' full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table = dataset.table(TABLE_NAME, schema=[full_name, age]) self.assertFalse(table.exists()) table.create() self.to_delete.insert(0, table) self.assertTrue(table.exists()) self.assertEqual(table.name, TABLE_NAME)
def test_load_table_then_dump_table(self): import datetime from gcloud._helpers import UTC NOW_SECONDS = 1448911495.484366 NOW = datetime.datetime.utcfromtimestamp(NOW_SECONDS).replace( tzinfo=UTC) ROWS = [ ('Phred Phlyntstone', 32, NOW), ('Bharney Rhubble', 33, NOW + datetime.timedelta(seconds=10)), ('Wylma Phlyntstone', 29, NOW + datetime.timedelta(seconds=20)), ('Bhettye Rhubble', 27, None), ] ROW_IDS = range(len(ROWS)) dataset = Config.CLIENT.dataset(DATASET_NAME) self.assertFalse(dataset.exists()) dataset.create() self.to_delete.append(dataset) TABLE_NAME = 'test_table' full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') now = bigquery.SchemaField('now', 'TIMESTAMP') table = dataset.table(TABLE_NAME, schema=[full_name, age, now]) self.assertFalse(table.exists()) table.create() self.to_delete.insert(0, table) self.assertTrue(table.exists()) errors = table.insert_data(ROWS, ROW_IDS) self.assertEqual(len(errors), 0) rows = () counter = 9 # Allow for 90 seconds of "warm up" before rows visible. See: # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability while len(rows) == 0 and counter > 0: counter -= 1 rows, _, _ = table.fetch_data() if len(rows) == 0: time.sleep(10) by_age = operator.itemgetter(1) self.assertEqual(sorted(rows, key=by_age), sorted(ROWS, key=by_age))
def test_patch_table(self): dataset = CLIENT.dataset(DATASET_NAME) self.assertFalse(dataset.exists()) dataset.create() self.to_delete.append(dataset) TABLE_NAME = 'test_table' full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table = dataset.table(TABLE_NAME, schema=[full_name, age]) self.assertFalse(table.exists()) table.create() self.to_delete.insert(0, table) self.assertTrue(table.exists()) self.assertEqual(table.friendly_name, None) self.assertEqual(table.description, None) table.patch(friendly_name='Friendly', description='Description') self.assertEqual(table.friendly_name, 'Friendly') self.assertEqual(table.description, 'Description')
def test_load_table_then_dump_table(self): ROWS = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), ('Wylma Phlyntstone', 29), ('Bhettye Rhubble', 27), ] ROW_IDS = range(len(ROWS)) DATASET_NAME = 'system_tests' dataset = CLIENT.dataset(DATASET_NAME) self.assertFalse(dataset.exists()) dataset.create() self.to_delete.append(dataset) TABLE_NAME = 'test_table' full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table = dataset.table(TABLE_NAME, schema=[full_name, age]) self.assertFalse(table.exists()) table.create() self.to_delete.insert(0, table) self.assertTrue(table.exists()) errors = table.insert_data(ROWS, ROW_IDS) self.assertEqual(len(errors), 0) rows = () counter = 9 # Allow for 90 seconds of "warm up" before rows visible. See: # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability while len(rows) == 0 and counter > 0: counter -= 1 rows, _, _ = table.fetch_data() if len(rows) == 0: time.sleep(10) by_age = operator.itemgetter(1) self.assertEqual(sorted(rows, key=by_age), sorted(ROWS, key=by_age))
def test_delete_table(capsys): # Create a table to delete bigquery_client = bigquery.Client() dataset = bigquery_client.dataset(DATASET_ID) table = dataset.table('test_delete_table') if not table.exists(): table.schema = [bigquery.SchemaField('id', 'INTEGER')] table.create() snippets.delete_table(DATASET_ID, table.name) assert not table.exists()
def create_table(dataset_name, table_name, project=None): """Creates a simple table in the given dataset. If no project is specified, then the currently active project is used. """ bigquery_client = bigquery.Client(project=project) dataset = bigquery_client.dataset(dataset_name) if not dataset.exists(): print('Dataset {} does not exist.'.format(dataset_name)) return table = dataset.table(table_name) # Set the table schema table.schema = ( bigquery.SchemaField('Name', 'STRING'), bigquery.SchemaField('Age', 'INTEGER'), bigquery.SchemaField('Weight', 'FLOAT'), ) table.create() print('Created table {} in dataset {}.'.format(table_name, dataset_name))
def test_load_table_from_storage_then_dump_table(self): import csv import tempfile from gcloud.storage import Client as StorageClient TIMESTAMP = 1000 * time.time() BUCKET_NAME = 'bq_load_test_%d' % (TIMESTAMP, ) BLOB_NAME = 'person_ages.csv' GS_URL = 'gs://%s/%s' % (BUCKET_NAME, BLOB_NAME) ROWS = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), ('Wylma Phlyntstone', 29), ('Bhettye Rhubble', 27), ] TABLE_NAME = 'test_table' s_client = StorageClient() # In the **very** rare case the bucket name is reserved, this # fails with a ConnectionError. bucket = s_client.create_bucket(BUCKET_NAME) self.to_delete.append(bucket) blob = bucket.blob(BLOB_NAME) with tempfile.TemporaryFile(mode='w+') as csv_file: writer = csv.writer(csv_file) writer.writerow(('Full Name', 'Age')) writer.writerows(ROWS) blob.upload_from_file(csv_file, rewind=True, content_type='text/csv') self.to_delete.insert(0, blob) dataset = Config.CLIENT.dataset(DATASET_NAME) dataset.create() self.to_delete.append(dataset) full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table = dataset.table(TABLE_NAME, schema=[full_name, age]) table.create() self.to_delete.insert(0, table) job = Config.CLIENT.load_table_from_storage( 'bq_load_storage_test_%d' % (TIMESTAMP, ), table, GS_URL) job.create_disposition = 'CREATE_NEVER' job.skip_leading_rows = 1 job.source_format = 'CSV' job.write_disposition = 'WRITE_EMPTY' job.begin() counter = 9 # Allow for 90 seconds of lag. while job.state not in ('DONE', 'done') and counter > 0: counter -= 1 job.reload() if job.state not in ('DONE', 'done'): time.sleep(10) self.assertTrue(job.state in ('DONE', 'done')) rows, _, _ = table.fetch_data() by_age = operator.itemgetter(1) self.assertEqual(sorted(rows, key=by_age), sorted(ROWS, key=by_age))
import time from gcloud import bigquery as bq #from google.cloud import bigquery as bq from oauth2client.client import GoogleCredentials # Configuration BILLING_PROJECT_ID = 'onyx-cumulus-196507' DATASET_NAME = 'bigquery123' TABLE_NAME = 'airport' BUCKET_NAME = 'satish123' FILE = 'airport.csv' SOURCE = 'https://storage.cloud.google.com/satish123/airport.csv?_ga=2.200274028.-331489596.1519587350&_gac=1.252996475.1519744301.CjwKCAiAoNTUBRBUEiwAWje2ltt6Onlm-oURmJ0zEqOD_dy_wmi_5yUsCdGXFro37ANM_5QjwIFk5RoC4PUQAvD_BwE'.format( BUCKET_NAME, FILE) SCHEMA = [ bq.SchemaField('name', 'STRING', mode='required'), bq.SchemaField('country', 'STRING', mode='required'), bq.SchemaField('area_code', 'STRING', mode='required'), bq.SchemaField('origin', 'STRING', mode='required') ] # CREDENTIALS = GoogleCredentials.get_application_efault() client = bq.Client(project=BILLING_PROJECT_ID) # Dataset # Check if the dataset exists def create_datasets(name): dataset = client.dataset(name) try: