def _set_up_table(self, row_count): from google.cloud.spanner import KeySet def _row_data(max_index): for index in range(max_index): yield [ index, 'First%09d' % (index, ), 'Last09%d' % (index), '*****@*****.**' % (index, ) ] keyset = KeySet(all_=True) retry = RetryInstanceState(_has_all_ddl) retry(self._db.reload)() session = self._db.session() session.create() self.to_delete.append(session) with session.transaction() as transaction: transaction.delete(self.TABLE, keyset) transaction.insert(self.TABLE, self.COLUMNS, _row_data(row_count)) return session, keyset, transaction.committed
def test_job_cancel(self): DATASET_NAME = _make_dataset_name('job_cancel') JOB_NAME = 'fetch_' + DATASET_NAME TABLE_NAME = 'test_table' QUERY = 'SELECT * FROM %s.%s' % (DATASET_NAME, TABLE_NAME) dataset = Config.CLIENT.dataset(DATASET_NAME) retry_403(dataset.create)() self.to_delete.append(dataset) full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table = dataset.table(TABLE_NAME, schema=[full_name, age]) table.create() self.to_delete.insert(0, table) job = Config.CLIENT.run_async_query(JOB_NAME, QUERY) job.begin() job.cancel() def _job_done(instance): return instance.state in ('DONE', 'done') retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)()
def test_transaction_read_and_insert_or_update_then_commit(self): from google.cloud.spanner import KeySet keyset = KeySet(all_=True) retry = RetryInstanceState(_has_all_ddl) retry(self._db.reload)() session = self._db.session() session.create() self.to_delete.append(session) with session.batch() as batch: batch.delete(self.TABLE, keyset) with session.transaction() as transaction: rows = list(transaction.read(self.TABLE, self.COLUMNS, keyset)) self.assertEqual(rows, []) transaction.insert_or_update(self.TABLE, self.COLUMNS, self.ROW_DATA) # Inserted rows can't be read until after commit. rows = list(transaction.read(self.TABLE, self.COLUMNS, keyset)) self.assertEqual(rows, []) rows = list(session.read(self.TABLE, self.COLUMNS, keyset)) self._check_row_data(rows)
def test_fetch_delete_subscription_w_deleted_topic(self): from google.cloud.iterator import MethodIterator TO_DELETE = 'delete-me' + unique_resource_id('-') ORPHANED = 'orphaned' + unique_resource_id('-') topic = Config.CLIENT.topic(TO_DELETE) topic.create() subscription = topic.subscription(ORPHANED) subscription.create() topic.delete() def _fetch(): return list(MethodIterator(Config.CLIENT.list_subscriptions)) def _found_orphan(result): names = [subscription.name for subscription in result] return ORPHANED in names retry_until_found_orphan = RetryResult(_found_orphan) all_subs = retry_until_found_orphan(_fetch)() created = [ subscription for subscription in all_subs if subscription.name == ORPHANED ] self.assertEqual(len(created), 1) orphaned = created[0] def _no_topic(instance): return instance.topic is None retry_until_no_topic = RetryInstanceState(_no_topic) retry_until_no_topic(orphaned.reload)() self.assertTrue(orphaned.topic is None) orphaned.delete()
def test_load_table_from_local_file_then_dump_table(self): import csv import tempfile ROWS = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), ('Wylma Phlyntstone', 29), ('Bhettye Rhubble', 27), ] TABLE_NAME = 'test_table' dataset = Config.CLIENT.dataset( _make_dataset_name('load_local_then_dump')) retry_403(dataset.create)() self.to_delete.append(dataset) full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table = dataset.table(TABLE_NAME, schema=[full_name, age]) table.create() self.to_delete.insert(0, table) with tempfile.NamedTemporaryFile(mode='w+') as csv_file: writer = csv.writer(csv_file) writer.writerow(('Full Name', 'Age')) writer.writerows(ROWS) csv_file.flush() with open(csv_file.name, 'rb') as csv_read: job = table.upload_from_file( csv_read, source_format='CSV', skip_leading_rows=1, create_disposition='CREATE_NEVER', write_disposition='WRITE_EMPTY', ) def _job_done(instance): return instance.state.lower() == 'done' # Retry until done. retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() self.assertTrue(_job_done(job)) self.assertEqual(job.output_rows, len(ROWS)) rows, _, _ = table.fetch_data() by_age = operator.itemgetter(1) self.assertEqual(sorted(rows, key=by_age), sorted(ROWS, key=by_age))
def test_batch_insert_or_update_then_query(self): retry = RetryInstanceState(_has_all_ddl) retry(self._db.reload)() session = self._db.session() session.create() self.to_delete.append(session) with session.batch() as batch: batch.insert_or_update(self.TABLE, self.COLUMNS, self.ROW_DATA) snapshot = session.snapshot(read_timestamp=batch.committed) rows = list(snapshot.execute_sql(self.SQL)) self._check_row_data(rows)
def test_message_pull_mode_e2e(self): import operator TOPIC_NAME = 'message-e2e' + unique_resource_id('-') topic = Config.CLIENT.topic(TOPIC_NAME, timestamp_messages=True) self.assertFalse(topic.exists()) topic.create() self.to_delete.append(topic) SUBSCRIPTION_NAME = 'subscribing-now' + unique_resource_id('-') subscription = topic.subscription(SUBSCRIPTION_NAME) self.assertFalse(subscription.exists()) subscription.create() self.to_delete.append(subscription) MESSAGE_1 = b'MESSAGE ONE' MESSAGE_2 = b'MESSAGE ONE' EXTRA_1 = 'EXTRA 1' EXTRA_2 = 'EXTRA 2' topic.publish(MESSAGE_1, extra=EXTRA_1) topic.publish(MESSAGE_2, extra=EXTRA_2) class Hoover(object): def __init__(self): self.received = [] def done(self, *dummy): return len(self.received) == 2 def suction(self): with subscription.auto_ack(max_messages=2) as ack: self.received.extend(ack.values()) hoover = Hoover() retry = RetryInstanceState(hoover.done) retry(hoover.suction)() message1, message2 = sorted(hoover.received, key=operator.attrgetter('timestamp')) self.assertEqual(message1.data, MESSAGE_1) self.assertEqual(message1.attributes['extra'], EXTRA_1) self.assertIsNotNone(message1.service_timestamp) self.assertEqual(message2.data, MESSAGE_2) self.assertEqual(message2.attributes['extra'], EXTRA_2) self.assertIsNotNone(message2.service_timestamp)
def test_batch_insert_then_read(self): from google.cloud.spanner import KeySet keyset = KeySet(all_=True) retry = RetryInstanceState(_has_all_ddl) retry(self._db.reload)() session = self._db.session() session.create() self.to_delete.append(session) batch = session.batch() batch.delete(self.TABLE, keyset) batch.insert(self.TABLE, self.COLUMNS, self.ROW_DATA) batch.commit() snapshot = session.snapshot(read_timestamp=batch.committed) rows = list(snapshot.read(self.TABLE, self.COLUMNS, keyset)) self._check_row_data(rows)
def test_load_table_from_storage_then_dump_table(self): import csv import tempfile from google.cloud.storage import Client as StorageClient local_id = unique_resource_id() BUCKET_NAME = 'bq_load_test' + local_id BLOB_NAME = 'person_ages.csv' GS_URL = 'gs://%s/%s' % (BUCKET_NAME, BLOB_NAME) ROWS = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), ('Wylma Phlyntstone', 29), ('Bhettye Rhubble', 27), ] TABLE_NAME = 'test_table' s_client = StorageClient() # In the **very** rare case the bucket name is reserved, this # fails with a ConnectionError. bucket = s_client.create_bucket(BUCKET_NAME) self.to_delete.append(bucket) blob = bucket.blob(BLOB_NAME) with tempfile.TemporaryFile(mode='w+') as csv_file: writer = csv.writer(csv_file) writer.writerow(('Full Name', 'Age')) writer.writerows(ROWS) blob.upload_from_file(csv_file, rewind=True, content_type='text/csv') self.to_delete.insert(0, blob) dataset = Config.CLIENT.dataset( _make_dataset_name('load_gcs_then_dump')) retry_403(dataset.create)() self.to_delete.append(dataset) full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table = dataset.table(TABLE_NAME, schema=[full_name, age]) table.create() self.to_delete.insert(0, table) job = Config.CLIENT.load_table_from_storage( 'bq_load_storage_test_' + local_id, table, GS_URL) job.create_disposition = 'CREATE_NEVER' job.skip_leading_rows = 1 job.source_format = 'CSV' job.write_disposition = 'WRITE_EMPTY' job.begin() def _job_done(instance): return instance.state in ('DONE', 'done') # Allow for 90 seconds of "warm up" before rows visible. See: # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() rows, _, _ = table.fetch_data() by_age = operator.itemgetter(1) self.assertEqual(sorted(rows, key=by_age), sorted(ROWS, key=by_age))