def test_batch_insert_then_read_all_datatypes(self): retry = RetryInstanceState(_has_all_ddl) retry(self._db.reload)() session = self._db.session() session.create() self.to_delete.append(session) with session.batch() as batch: batch.delete(self.ALL_TYPES_TABLE, self.ALL) batch.insert(self.ALL_TYPES_TABLE, self.ALL_TYPES_COLUMNS, self.ALL_TYPES_ROWDATA) snapshot = session.snapshot(read_timestamp=batch.committed) rows = list( snapshot.read(self.ALL_TYPES_TABLE, self.ALL_TYPES_COLUMNS, self.ALL)) self._check_row_data(rows, expected=self.ALL_TYPES_ROWDATA)
def test_batch_insert_then_read(self): from google.cloud.spanner import KeySet keyset = KeySet(all_=True) retry = RetryInstanceState(_has_all_ddl) retry(self._db.reload)() session = self._db.session() session.create() self.to_delete.append(session) batch = session.batch() batch.delete(self.TABLE, keyset) batch.insert(self.TABLE, self.COLUMNS, self.ROW_DATA) batch.commit() snapshot = session.snapshot(read_timestamp=batch.committed) rows = list(snapshot.read(self.TABLE, self.COLUMNS, keyset)) self._check_row_data(rows)
def _set_up_table(self, row_count, db=None): if db is None: db = self._db retry = RetryInstanceState(_has_all_ddl) retry(db.reload)() session = db.session() session.create() self.to_delete.append(session) def _unit_of_work(transaction, test): transaction.delete(test.TABLE, test.ALL) transaction.insert(test.TABLE, test.COLUMNS, test._row_data(row_count)) committed = session.run_in_transaction(_unit_of_work, test=self) return session, committed
def test_db_run_in_transaction_then_snapshot_execute_sql(self): retry = RetryInstanceState(_has_all_ddl) retry(self._db.reload)() with self._db.batch() as batch: batch.delete(self.TABLE, self.ALL) def _unit_of_work(transaction, test): rows = list(transaction.read(test.TABLE, test.COLUMNS, self.ALL)) test.assertEqual(rows, []) transaction.insert_or_update(test.TABLE, test.COLUMNS, test.ROW_DATA) self._db.run_in_transaction(_unit_of_work, test=self) with self._db.snapshot() as after: rows = list(after.execute_sql(self.SQL)) self._check_row_data(rows)
def _transaction_concurrency_helper(self, unit_of_work, pkey): INITIAL_VALUE = 123 NUM_THREADS = 3 # conforms to equivalent Java systest. retry = RetryInstanceState(_has_all_ddl) retry(self._db.reload)() session = self._db.session() session.create() self.to_delete.append(session) with session.batch() as batch: batch.insert_or_update( COUNTERS_TABLE, COUNTERS_COLUMNS, [[pkey, INITIAL_VALUE]]) # We don't want to run the threads' transactions in the current # session, which would fail. txn_sessions = [] for _ in range(NUM_THREADS): txn_session = self._db.session() txn_sessions.append(txn_session) txn_session.create() self.to_delete.append(txn_session) threads = [ threading.Thread( target=txn_session.run_in_transaction, args=(unit_of_work, pkey)) for txn_session in txn_sessions] for thread in threads: thread.start() for thread in threads: thread.join() keyset = KeySet(keys=[(pkey,)]) rows = list(session.read( COUNTERS_TABLE, COUNTERS_COLUMNS, keyset)) self.assertEqual(len(rows), 1) _, value = rows[0] self.assertEqual(value, INITIAL_VALUE + len(threads))
def _set_up_table(self, row_count): from google.cloud.spanner import KeySet def _row_data(max_index): for index in range(max_index): yield [index, 'First%09d' % (index,), 'Last09%d' % (index), '*****@*****.**' % (index,)] keyset = KeySet(all_=True) retry = RetryInstanceState(_has_all_ddl) retry(self._db.reload)() session = self._db.session() session.create() self.to_delete.append(session) with session.transaction() as transaction: transaction.delete(self.TABLE, keyset) transaction.insert(self.TABLE, self.COLUMNS, _row_data(row_count)) return session, keyset, transaction.committed
def test_batch_insert_then_read_string_array_of_string(self): TABLE = 'string_plus_array_of_string' COLUMNS = ['id', 'name', 'tags'] ROWDATA = [ (0, None, None), (1, 'phred', ['yabba', 'dabba', 'do']), (2, 'bharney', []), (3, 'wylma', ['oh', None, 'phred']), ] retry = RetryInstanceState(_has_all_ddl) retry(self._db.reload)() session = self._db.session() session.create() self.to_delete.append(session) with session.batch() as batch: batch.delete(TABLE, self.ALL) batch.insert(TABLE, COLUMNS, ROWDATA) snapshot = session.snapshot(read_timestamp=batch.committed) rows = list(snapshot.read(TABLE, COLUMNS, self.ALL)) self._check_row_data(rows, expected=ROWDATA)
def test_db_run_in_transaction_twice_4181(self): retry = RetryInstanceState(_has_all_ddl) retry(self._db.reload)() with self._db.batch() as batch: batch.delete(COUNTERS_TABLE, self.ALL) def _unit_of_work(transaction, name): transaction.insert(COUNTERS_TABLE, COUNTERS_COLUMNS, [[name, 0]]) self._db.run_in_transaction(_unit_of_work, name='id_1') with self.assertRaises(errors.RetryError) as expected: self._db.run_in_transaction(_unit_of_work, name='id_1') self.assertEqual(exc_to_code(expected.exception.cause), StatusCode.ALREADY_EXISTS) self._db.run_in_transaction(_unit_of_work, name='id_2') with self._db.snapshot() as after: rows = list(after.read(COUNTERS_TABLE, COUNTERS_COLUMNS, self.ALL)) self.assertEqual(len(rows), 2)
def test_transaction_read_and_insert_or_update_then_commit(self): retry = RetryInstanceState(_has_all_ddl) retry(self._db.reload)() session = self._db.session() session.create() self.to_delete.append(session) with session.batch() as batch: batch.delete(self.TABLE, self.ALL) with session.transaction() as transaction: rows = list(transaction.read(self.TABLE, self.COLUMNS, self.ALL)) self.assertEqual(rows, []) transaction.insert_or_update(self.TABLE, self.COLUMNS, self.ROW_DATA) # Inserted rows can't be read until after commit. rows = list(transaction.read(self.TABLE, self.COLUMNS, self.ALL)) self.assertEqual(rows, []) rows = list(session.read(self.TABLE, self.COLUMNS, self.ALL)) self._check_row_data(rows)
def test_job_cancel(self): DATASET_NAME = _make_dataset_name('job_cancel') JOB_NAME = 'fetch_' + DATASET_NAME TABLE_NAME = 'test_table' QUERY = 'SELECT * FROM %s.%s' % (DATASET_NAME, TABLE_NAME) dataset = Config.CLIENT.dataset(DATASET_NAME) retry_403(dataset.create)() self.to_delete.append(dataset) full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table = dataset.table(TABLE_NAME, schema=[full_name, age]) table.create() self.to_delete.insert(0, table) job = Config.CLIENT.run_async_query(JOB_NAME, QUERY) job.begin() job.cancel() retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)()
def test_load_table_from_storage_then_dump_table(self): import csv from google.cloud._testing import _NamedTemporaryFile from google.cloud.storage import Client as StorageClient local_id = unique_resource_id() BUCKET_NAME = 'bq_load_test' + local_id BLOB_NAME = 'person_ages.csv' GS_URL = 'gs://%s/%s' % (BUCKET_NAME, BLOB_NAME) ROWS = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), ('Wylma Phlyntstone', 29), ('Bhettye Rhubble', 27), ] TABLE_NAME = 'test_table' s_client = StorageClient() # In the **very** rare case the bucket name is reserved, this # fails with a ConnectionError. bucket = s_client.create_bucket(BUCKET_NAME) self.to_delete.append(bucket) blob = bucket.blob(BLOB_NAME) with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as csv_write: writer = csv.writer(csv_write) writer.writerow(('Full Name', 'Age')) writer.writerows(ROWS) with open(temp.name, 'rb') as csv_read: blob.upload_from_file(csv_read, content_type='text/csv') self.to_delete.insert(0, blob) dataset = Config.CLIENT.dataset( _make_dataset_name('load_gcs_then_dump')) retry_403(dataset.create)() self.to_delete.append(dataset) full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table = dataset.table(TABLE_NAME, schema=[full_name, age]) table.create() self.to_delete.insert(0, table) job = Config.CLIENT.load_table_from_storage( 'bq_load_storage_test_' + local_id, table, GS_URL) job.create_disposition = 'CREATE_NEVER' job.skip_leading_rows = 1 job.source_format = 'CSV' job.write_disposition = 'WRITE_EMPTY' job.begin() def _job_done(instance): return instance.state in ('DONE', 'done') # Allow for 90 seconds of "warm up" before rows visible. See: # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() rows = self._fetch_single_page(table) by_age = operator.itemgetter(1) self.assertEqual(sorted(rows, key=by_age), sorted(ROWS, key=by_age))
def test_load_table_from_storage_w_autodetect_schema(self): from google.cloud._testing import _NamedTemporaryFile from google.cloud.storage import Client as StorageClient from google.cloud.bigquery import SchemaField local_id = unique_resource_id() bucket_name = 'bq_load_test' + local_id blob_name = 'person_ages.csv' gs_url = 'gs://{}/{}'.format(bucket_name, blob_name) rows = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), ('Wylma Phlyntstone', 29), ('Bhettye Rhubble', 27), ] * 100 # BigQuery internally uses the first 100 rows to detect schema table_name = 'test_table' storage_client = StorageClient() # In the **very** rare case the bucket name is reserved, this # fails with a ConnectionError. bucket = storage_client.create_bucket(bucket_name) self.to_delete.append(bucket) blob = bucket.blob(blob_name) with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as csv_write: writer = csv.writer(csv_write) writer.writerow(('Full Name', 'Age')) writer.writerows(rows) with open(temp.name, 'rb') as csv_read: blob.upload_from_file(csv_read, content_type='text/csv') self.to_delete.insert(0, blob) dataset = Config.CLIENT.dataset( _make_dataset_name('load_gcs_then_dump')) retry_403(dataset.create)() self.to_delete.append(dataset) table = dataset.table(table_name) self.to_delete.insert(0, table) job = Config.CLIENT.load_table_from_storage( 'bq_load_storage_test_' + local_id, table, gs_url) job.autodetect = True job.begin() # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() table.reload() field_name = SchemaField(u'Full_Name', u'string', u'NULLABLE', None, ()) field_age = SchemaField(u'Age', u'integer', u'NULLABLE', None, ()) self.assertEqual(table.schema, [field_name, field_age]) actual_rows = self._fetch_single_page(table) by_age = operator.itemgetter(1) self.assertEqual(sorted(actual_rows, key=by_age), sorted(rows, key=by_age))
signing_blob_content = b"This time for sure, Rocky!" def _bad_copy(bad_request): """Predicate: pass only exceptions for a failed copyTo.""" err_msg = bad_request.message return err_msg.startswith( "No file found in request. (POST") and "copyTo" in err_msg def _no_event_based_hold(blob): return not blob.event_based_hold retry_bad_copy = RetryErrors(exceptions.BadRequest, error_predicate=_bad_copy) retry_no_event_based_hold = RetryInstanceState(_no_event_based_hold) def unique_name(prefix): return prefix + unique_resource_id("-") def empty_bucket(bucket): for blob in list(bucket.list_blobs(versions=True)): try: blob.delete() except exceptions.NotFound: pass def delete_blob(blob):
"""Predicate: pass only exceptions for a failed copyTo.""" err_msg = bad_request.message return err_msg.startswith( "No file found in request. (POST") and "copyTo" in err_msg def _no_event_based_hold(blob): return not blob.event_based_hold def _has_kms_key_name(blob): return blob.kms_key_name is not None retry_bad_copy = RetryErrors(exceptions.BadRequest, error_predicate=_bad_copy) retry_no_event_based_hold = RetryInstanceState(_no_event_based_hold) retry_has_kms_key_name = RetryInstanceState(_has_kms_key_name) def unique_name(prefix): return prefix + unique_resource_id("-") def empty_bucket(bucket): for blob in list(bucket.list_blobs(versions=True)): try: blob.delete() except exceptions.NotFound: pass