def test_scale_bigtable(instance):
    bigtable_client = bigtable.Client(admin=True)

    instance = bigtable_client.instance(BIGTABLE_INSTANCE)
    instance.reload()

    cluster = instance.cluster(BIGTABLE_INSTANCE)

    _nonzero_node_count = RetryInstanceState(
        instance_predicate=lambda c: c.serve_nodes > 0,
        max_tries=10,
    )
    _nonzero_node_count(cluster.reload)()

    original_node_count = cluster.serve_nodes

    scale_bigtable(BIGTABLE_INSTANCE, BIGTABLE_INSTANCE, True)

    expected_count = original_node_count + SIZE_CHANGE_STEP
    _scaled_node_count = RetryInstanceState(
        instance_predicate=lambda c: c.serve_nodes == expected_count,
        max_tries=10,
    )
    _scaled_node_count(cluster.reload)()

    scale_bigtable(BIGTABLE_INSTANCE, BIGTABLE_INSTANCE, False)

    _restored_node_count = RetryInstanceState(
        instance_predicate=lambda c: c.serve_nodes == original_node_count,
        max_tries=10,
    )
    _restored_node_count(cluster.reload)()
Exemple #2
0
    def test_load_table_from_local_avro_file_then_dump_table(self):
        TABLE_NAME = 'test_table_avro'
        ROWS = [("violet", 400), ("indigo", 445), ("blue", 475),
                ("green", 510), ("yellow", 570), ("orange", 590), ("red", 650)]

        dataset = Config.CLIENT.dataset(
            _make_dataset_name('load_local_then_dump'))

        retry_403(dataset.create)()
        self.to_delete.append(dataset)

        table = dataset.table(TABLE_NAME)
        self.to_delete.insert(0, table)

        with open(os.path.join(WHERE, 'data', 'colors.avro'), 'rb') as avrof:
            job = table.upload_from_file(avrof,
                                         source_format='AVRO',
                                         write_disposition='WRITE_TRUNCATE')

        def _job_done(instance):
            return instance.state.lower() == 'done'

        # Retry until done.
        retry = RetryInstanceState(_job_done, max_tries=8)
        retry(job.reload)()

        self.assertEqual(job.output_rows, len(ROWS))

        # Reload table to get the schema before fetching the rows.
        table.reload()
        rows = self._fetch_single_page(table)
        by_wavelength = operator.itemgetter(1)
        self.assertEqual(sorted(rows, key=by_wavelength),
                         sorted(ROWS, key=by_wavelength))
Exemple #3
0
    def _set_up_table(self, row_count):
        from google.cloud.spanner import KeySet

        def _row_data(max_index):
            for index in range(max_index):
                yield [
                    index,
                    'First%09d' % (index, ),
                    'Last09%d' % (index),
                    '*****@*****.**' % (index, )
                ]

        keyset = KeySet(all_=True)

        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        session = self._db.session()
        session.create()
        self.to_delete.append(session)

        with session.transaction() as transaction:
            transaction.delete(self.TABLE, keyset)
            transaction.insert(self.TABLE, self.COLUMNS, _row_data(row_count))

        return session, keyset, transaction.committed
    def _load_table_for_dml(self, rows, dataset_name, table_name):
        from google.cloud._testing import _NamedTemporaryFile

        dataset = Config.CLIENT.dataset(dataset_name)
        retry_403(dataset.create)()
        self.to_delete.append(dataset)

        greeting = bigquery.SchemaField('greeting', 'STRING', mode='NULLABLE')
        table = dataset.table(table_name, schema=[greeting])
        table.create()
        self.to_delete.insert(0, table)

        with _NamedTemporaryFile() as temp:
            with open(temp.name, 'w') as csv_write:
                writer = csv.writer(csv_write)
                writer.writerow(('Greeting', ))
                writer.writerows(rows)

            with open(temp.name, 'rb') as csv_read:
                job = table.upload_from_file(
                    csv_read,
                    source_format='CSV',
                    skip_leading_rows=1,
                    create_disposition='CREATE_NEVER',
                    write_disposition='WRITE_EMPTY',
                )

        # Retry until done.
        retry = RetryInstanceState(_job_done, max_tries=8)
        retry(job.reload)()
        self._fetch_single_page(table)
Exemple #5
0
    def test_job_cancel(self):
        DATASET_NAME = _make_dataset_name('job_cancel')
        JOB_NAME = 'fetch_' + DATASET_NAME
        TABLE_NAME = 'test_table'
        QUERY = 'SELECT * FROM %s.%s' % (DATASET_NAME, TABLE_NAME)

        dataset = Config.CLIENT.dataset(DATASET_NAME)

        retry_403(dataset.create)()
        self.to_delete.append(dataset)

        full_name = bigquery.SchemaField('full_name', 'STRING',
                                         mode='REQUIRED')
        age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED')
        table = dataset.table(TABLE_NAME, schema=[full_name, age])
        table.create()
        self.to_delete.insert(0, table)

        job = Config.CLIENT.run_async_query(JOB_NAME, QUERY)
        job.begin()
        job.cancel()

        def _job_done(instance):
            return instance.state in ('DONE', 'done')

        retry = RetryInstanceState(_job_done, max_tries=8)
        retry(job.reload)()
Exemple #6
0
    def test_transaction_read_and_insert_or_update_then_commit(self):
        from google.cloud.spanner import KeySet
        keyset = KeySet(all_=True)

        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        session = self._db.session()
        session.create()
        self.to_delete.append(session)

        with session.batch() as batch:
            batch.delete(self.TABLE, keyset)

        with session.transaction() as transaction:
            rows = list(transaction.read(self.TABLE, self.COLUMNS, keyset))
            self.assertEqual(rows, [])

            transaction.insert_or_update(
                self.TABLE, self.COLUMNS, self.ROW_DATA)

            # Inserted rows can't be read until after commit.
            rows = list(transaction.read(self.TABLE, self.COLUMNS, keyset))
            self.assertEqual(rows, [])

        rows = list(session.read(self.TABLE, self.COLUMNS, keyset))
        self._check_row_data(rows)
    def test_transaction_read_and_insert_then_rollback(self):
        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        session = self._db.session()
        session.create()
        self.to_delete.append(session)

        with session.batch() as batch:
            batch.delete(self.TABLE, self.ALL)

        transaction = session.transaction()
        transaction.begin()

        rows = list(transaction.read(self.TABLE, self.COLUMNS, self.ALL))
        self.assertEqual(rows, [])

        transaction.insert(self.TABLE, self.COLUMNS, self.ROW_DATA)

        # Inserted rows can't be read until after commit.
        rows = list(transaction.read(self.TABLE, self.COLUMNS, self.ALL))
        self.assertEqual(rows, [])
        transaction.rollback()

        rows = list(session.read(self.TABLE, self.COLUMNS, self.ALL))
        self.assertEqual(rows, [])
    def test_transaction_read_w_abort(self):

        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        session = self._db.session()
        session.create()

        trigger = _ReadAbortTrigger()

        with session.batch() as batch:
            batch.delete(COUNTERS_TABLE, self.ALL)
            batch.insert(COUNTERS_TABLE, COUNTERS_COLUMNS,
                         [[trigger.KEY1, 0], [trigger.KEY2, 0]])

        provoker = threading.Thread(target=trigger.provoke_abort,
                                    args=(self._db, ))
        handler = threading.Thread(target=trigger.handle_abort,
                                   args=(self._db, ))

        provoker.start()
        trigger.provoker_started.wait()

        handler.start()
        trigger.handler_done.wait()

        provoker.join()
        handler.join()

        rows = list(session.read(COUNTERS_TABLE, COUNTERS_COLUMNS, self.ALL))
        self._check_row_data(rows,
                             expected=[[trigger.KEY1, 1], [trigger.KEY2, 1]])
    def test_db_batch_insert_then_db_snapshot_read(self):
        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        with self._db.batch() as batch:
            batch.delete(self.TABLE, self.ALL)
            batch.insert(self.TABLE, self.COLUMNS, self.ROW_DATA)

        with self._db.snapshot(read_timestamp=batch.committed) as snapshot:
            from_snap = list(snapshot.read(self.TABLE, self.COLUMNS, self.ALL))

        self._check_row_data(from_snap)
Exemple #10
0
    def test_load_table_from_local_file_then_dump_table(self):
        import csv
        from google.cloud._testing import _NamedTemporaryFile

        ROWS = [
            ('Phred Phlyntstone', 32),
            ('Bharney Rhubble', 33),
            ('Wylma Phlyntstone', 29),
            ('Bhettye Rhubble', 27),
        ]
        TABLE_NAME = 'test_table'

        dataset = Config.CLIENT.dataset(
            _make_dataset_name('load_local_then_dump'))

        retry_403(dataset.create)()
        self.to_delete.append(dataset)

        full_name = bigquery.SchemaField('full_name', 'STRING',
                                         mode='REQUIRED')
        age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED')
        table = dataset.table(TABLE_NAME, schema=[full_name, age])
        table.create()
        self.to_delete.insert(0, table)

        with _NamedTemporaryFile() as temp:
            with open(temp.name, 'w') as csv_write:
                writer = csv.writer(csv_write)
                writer.writerow(('Full Name', 'Age'))
                writer.writerows(ROWS)

            with open(temp.name, 'rb') as csv_read:
                job = table.upload_from_file(
                    csv_read,
                    source_format='CSV',
                    skip_leading_rows=1,
                    create_disposition='CREATE_NEVER',
                    write_disposition='WRITE_EMPTY',
                )

        def _job_done(instance):
            return instance.state.lower() == 'done'

        # Retry until done.
        retry = RetryInstanceState(_job_done, max_tries=8)
        retry(job.reload)()

        self.assertEqual(job.output_rows, len(ROWS))

        rows = self._fetch_single_page(table)
        by_age = operator.itemgetter(1)
        self.assertEqual(sorted(rows, key=by_age),
                         sorted(ROWS, key=by_age))
Exemple #11
0
    def test_batch_insert_or_update_then_query(self):
        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        session = self._db.session()
        session.create()
        self.to_delete.append(session)

        with session.batch() as batch:
            batch.insert_or_update(self.TABLE, self.COLUMNS, self.ROW_DATA)

        snapshot = session.snapshot(read_timestamp=batch.committed)
        rows = list(snapshot.execute_sql(self.SQL))
        self._check_row_data(rows)
Exemple #12
0
    def _set_up_table(self, row_count):
        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        session = self._db.session()
        session.create()
        self.to_delete.append(session)

        with session.transaction() as transaction:
            transaction.delete(self.TABLE, self.ALL)
            transaction.insert(self.TABLE, self.COLUMNS,
                               self._row_data(row_count))

        return session, transaction.committed
Exemple #13
0
def test_scale_bigtable(instance):
    bigtable_client = bigtable.Client(admin=True)

    instance = bigtable_client.instance(BIGTABLE_INSTANCE)
    instance.reload()

    cluster = instance.cluster(BIGTABLE_INSTANCE)

    _nonzero_node_count = RetryInstanceState(
        instance_predicate=lambda c: c.serve_nodes > 0,
        max_tries=10,
    )
    _nonzero_node_count(cluster.reload)()

    original_node_count = cluster.serve_nodes

    scale_bigtable(BIGTABLE_INSTANCE, BIGTABLE_INSTANCE, True)

    scaled_node_count_predicate = ClusterNodeCountPredicate(
        original_node_count + SIZE_CHANGE_STEP)
    scaled_node_count_predicate.__name__ = "scaled_node_count_predicate"
    _scaled_node_count = RetryInstanceState(
        instance_predicate=scaled_node_count_predicate,
        max_tries=10,
    )
    _scaled_node_count(cluster.reload)()

    scale_bigtable(BIGTABLE_INSTANCE, BIGTABLE_INSTANCE, False)

    restored_node_count_predicate = ClusterNodeCountPredicate(
        original_node_count)
    restored_node_count_predicate.__name__ = "restored_node_count_predicate"
    _restored_node_count = RetryInstanceState(
        instance_predicate=restored_node_count_predicate,
        max_tries=10,
    )
    _restored_node_count(cluster.reload)()
Exemple #14
0
    def test_db_run_in_transaction_twice(self):
        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        with self._db.batch() as batch:
            batch.delete(self.TABLE, self.ALL)

        def _unit_of_work(transaction, test):
            transaction.insert_or_update(test.TABLE, test.COLUMNS,
                                         test.ROW_DATA)

        self._db.run_in_transaction(_unit_of_work, test=self)
        self._db.run_in_transaction(_unit_of_work, test=self)

        rows = list(self._db.execute_sql(self.SQL))
        self._check_row_data(rows)
Exemple #15
0
    def test_batch_insert_then_read(self):
        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        session = self._db.session()
        session.create()
        self.to_delete.append(session)

        batch = session.batch()
        batch.delete(self.TABLE, self.ALL)
        batch.insert(self.TABLE, self.COLUMNS, self.ROW_DATA)
        batch.commit()

        snapshot = session.snapshot(read_timestamp=batch.committed)
        rows = list(snapshot.read(self.TABLE, self.COLUMNS, self.ALL))
        self._check_row_data(rows)
Exemple #16
0
    def test_message_pull_mode_e2e(self):
        import operator
        TOPIC_NAME = 'message-e2e' + unique_resource_id('-')
        topic = Config.CLIENT.topic(TOPIC_NAME,
                                    timestamp_messages=True)
        self.assertFalse(topic.exists())
        topic.create()
        self.to_delete.append(topic)
        SUBSCRIPTION_NAME = 'subscribing-now' + unique_resource_id('-')
        subscription = topic.subscription(SUBSCRIPTION_NAME)
        self.assertFalse(subscription.exists())
        subscription.create()
        self.to_delete.append(subscription)

        MESSAGE_1 = b'MESSAGE ONE'
        MESSAGE_2 = b'MESSAGE ONE'
        EXTRA_1 = 'EXTRA 1'
        EXTRA_2 = 'EXTRA 2'
        topic.publish(MESSAGE_1, extra=EXTRA_1)
        topic.publish(MESSAGE_2, extra=EXTRA_2)

        class Hoover(object):

            def __init__(self):
                self.received = []

            def done(self, *dummy):
                return len(self.received) == 2

            def suction(self):
                with subscription.auto_ack(max_messages=2) as ack:
                    self.received.extend(ack.values())

        hoover = Hoover()
        retry = RetryInstanceState(hoover.done)
        retry(hoover.suction)()

        message1, message2 = sorted(hoover.received,
                                    key=operator.attrgetter('timestamp'))

        self.assertEqual(message1.data, MESSAGE_1)
        self.assertEqual(message1.attributes['extra'], EXTRA_1)
        self.assertIsNotNone(message1.service_timestamp)

        self.assertEqual(message2.data, MESSAGE_2)
        self.assertEqual(message2.attributes['extra'], EXTRA_2)
        self.assertIsNotNone(message2.service_timestamp)
    def test_transaction_read_and_insert_then_exception(self):
        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        session = self._db.session()
        session.create()
        self.to_delete.append(session)

        with session.batch() as batch:
            batch.delete(self.TABLE, self.ALL)

        with self.assertRaises(CustomException):
            session.run_in_transaction(self._transaction_read_then_raise)

        # Transaction was rolled back.
        rows = list(session.read(self.TABLE, self.COLUMNS, self.ALL))
        self.assertEqual(rows, [])
    def _transaction_concurrency_helper(self, unit_of_work, pkey):
        INITIAL_VALUE = 123
        NUM_THREADS = 3     # conforms to equivalent Java systest.

        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        session = self._db.session()
        session.create()
        self.to_delete.append(session)

        with session.batch() as batch:
            batch.insert_or_update(
                self.COUNTERS_TABLE,
                self.COUNTERS_COLUMNS,
                [[pkey, INITIAL_VALUE]])

        # We don't want to run the threads' transactions in the current
        # session, which would fail.
        txn_sessions = []

        for _ in range(NUM_THREADS):
            txn_session = self._db.session()
            txn_sessions.append(txn_session)
            txn_session.create()
            self.to_delete.append(txn_session)

        threads = [
            threading.Thread(
                target=txn_session.run_in_transaction,
                args=(unit_of_work, pkey))
            for txn_session in txn_sessions]

        for thread in threads:
            thread.start()

        for thread in threads:
            thread.join()

        keyset = KeySet(keys=[(pkey,)])
        rows = list(session.read(
            self.COUNTERS_TABLE, self.COUNTERS_COLUMNS, keyset))
        self.assertEqual(len(rows), 1)
        _, value = rows[0]
        self.assertEqual(value, INITIAL_VALUE + len(threads))
Exemple #19
0
    def test_batch_insert_then_read_all_datatypes(self):
        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        session = self._db.session()
        session.create()
        self.to_delete.append(session)

        with session.batch() as batch:
            batch.delete(self.ALL_TYPES_TABLE, self.ALL)
            batch.insert(self.ALL_TYPES_TABLE, self.ALL_TYPES_COLUMNS,
                         self.ALL_TYPES_ROWDATA)

        snapshot = session.snapshot(read_timestamp=batch.committed)
        rows = list(
            snapshot.read(self.ALL_TYPES_TABLE, self.ALL_TYPES_COLUMNS,
                          self.ALL))
        self._check_row_data(rows, expected=self.ALL_TYPES_ROWDATA)
Exemple #20
0
    def test_batch_insert_then_read(self):
        from google.cloud.spanner import KeySet
        keyset = KeySet(all_=True)

        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        session = self._db.session()
        session.create()
        self.to_delete.append(session)

        batch = session.batch()
        batch.delete(self.TABLE, keyset)
        batch.insert(self.TABLE, self.COLUMNS, self.ROW_DATA)
        batch.commit()

        snapshot = session.snapshot(read_timestamp=batch.committed)
        rows = list(snapshot.read(self.TABLE, self.COLUMNS, keyset))
        self._check_row_data(rows)
    def test_db_run_in_transaction_then_snapshot_execute_sql(self):
        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        with self._db.batch() as batch:
            batch.delete(self.TABLE, self.ALL)

        def _unit_of_work(transaction, test):
            rows = list(transaction.read(test.TABLE, test.COLUMNS, self.ALL))
            test.assertEqual(rows, [])

            transaction.insert_or_update(test.TABLE, test.COLUMNS,
                                         test.ROW_DATA)

        self._db.run_in_transaction(_unit_of_work, test=self)

        with self._db.snapshot() as after:
            rows = list(after.execute_sql(self.SQL))
        self._check_row_data(rows)
    def _set_up_table(self, row_count, db=None):

        if db is None:
            db = self._db
            retry = RetryInstanceState(_has_all_ddl)
            retry(db.reload)()

        session = db.session()
        session.create()
        self.to_delete.append(session)

        def _unit_of_work(transaction, test):
            transaction.delete(test.TABLE, test.ALL)
            transaction.insert(test.TABLE, test.COLUMNS,
                               test._row_data(row_count))

        committed = session.run_in_transaction(_unit_of_work, test=self)

        return session, committed
    def test_db_run_in_transaction_twice_4181(self):
        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        with self._db.batch() as batch:
            batch.delete(COUNTERS_TABLE, self.ALL)

        def _unit_of_work(transaction, name):
            transaction.insert(COUNTERS_TABLE, COUNTERS_COLUMNS, [[name, 0]])

        self._db.run_in_transaction(_unit_of_work, name='id_1')

        with self.assertRaises(errors.RetryError) as expected:
            self._db.run_in_transaction(_unit_of_work, name='id_1')

        self.assertEqual(exc_to_code(expected.exception.cause),
                         StatusCode.ALREADY_EXISTS)

        self._db.run_in_transaction(_unit_of_work, name='id_2')

        with self._db.snapshot() as after:
            rows = list(after.read(COUNTERS_TABLE, COUNTERS_COLUMNS, self.ALL))
        self.assertEqual(len(rows), 2)
    def test_batch_insert_then_read_string_array_of_string(self):
        TABLE = 'string_plus_array_of_string'
        COLUMNS = ['id', 'name', 'tags']
        ROWDATA = [
            (0, None, None),
            (1, 'phred', ['yabba', 'dabba', 'do']),
            (2, 'bharney', []),
            (3, 'wylma', ['oh', None, 'phred']),
        ]
        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        session = self._db.session()
        session.create()
        self.to_delete.append(session)

        with session.batch() as batch:
            batch.delete(TABLE, self.ALL)
            batch.insert(TABLE, COLUMNS, ROWDATA)

        snapshot = session.snapshot(read_timestamp=batch.committed)
        rows = list(snapshot.read(TABLE, COLUMNS, self.ALL))
        self._check_row_data(rows, expected=ROWDATA)
    """Predicate: pass only exceptions for a failed copyTo."""
    err_msg = bad_request.message
    return err_msg.startswith(
        "No file found in request. (POST") and "copyTo" in err_msg


def _no_event_based_hold(blob):
    return not blob.event_based_hold


def _has_kms_key_name(blob):
    return blob.kms_key_name is not None


retry_bad_copy = RetryErrors(exceptions.BadRequest, error_predicate=_bad_copy)
retry_no_event_based_hold = RetryInstanceState(_no_event_based_hold)
retry_has_kms_key_name = RetryInstanceState(_has_kms_key_name)


def unique_name(prefix):
    return prefix + unique_resource_id("-")


def empty_bucket(bucket):
    for blob in list(bucket.list_blobs(versions=True)):
        try:
            blob.delete()
        except exceptions.NotFound:
            pass

Exemple #26
0
signing_blob_content = b"This time for sure, Rocky!"


def _bad_copy(bad_request):
    """Predicate: pass only exceptions for a failed copyTo."""
    err_msg = bad_request.message
    return err_msg.startswith(
        "No file found in request. (POST") and "copyTo" in err_msg


def _no_event_based_hold(blob):
    return not blob.event_based_hold


retry_bad_copy = RetryErrors(exceptions.BadRequest, error_predicate=_bad_copy)
retry_no_event_based_hold = RetryInstanceState(_no_event_based_hold)


def unique_name(prefix):
    return prefix + unique_resource_id("-")


def empty_bucket(bucket):
    for blob in list(bucket.list_blobs(versions=True)):
        try:
            blob.delete()
        except exceptions.NotFound:
            pass


def delete_blob(blob):
Exemple #27
0
    def test_load_table_from_storage_then_dump_table(self):
        import csv
        from google.cloud._testing import _NamedTemporaryFile
        from google.cloud.storage import Client as StorageClient

        local_id = unique_resource_id()
        BUCKET_NAME = 'bq_load_test' + local_id
        BLOB_NAME = 'person_ages.csv'
        GS_URL = 'gs://%s/%s' % (BUCKET_NAME, BLOB_NAME)
        ROWS = [
            ('Phred Phlyntstone', 32),
            ('Bharney Rhubble', 33),
            ('Wylma Phlyntstone', 29),
            ('Bhettye Rhubble', 27),
        ]
        TABLE_NAME = 'test_table'

        s_client = StorageClient()

        # In the **very** rare case the bucket name is reserved, this
        # fails with a ConnectionError.
        bucket = s_client.create_bucket(BUCKET_NAME)
        self.to_delete.append(bucket)

        blob = bucket.blob(BLOB_NAME)

        with _NamedTemporaryFile() as temp:
            with open(temp.name, 'w') as csv_write:
                writer = csv.writer(csv_write)
                writer.writerow(('Full Name', 'Age'))
                writer.writerows(ROWS)

            with open(temp.name, 'rb') as csv_read:
                blob.upload_from_file(csv_read, content_type='text/csv')

        self.to_delete.insert(0, blob)

        dataset = Config.CLIENT.dataset(
            _make_dataset_name('load_gcs_then_dump'))

        retry_403(dataset.create)()
        self.to_delete.append(dataset)

        full_name = bigquery.SchemaField('full_name', 'STRING',
                                         mode='REQUIRED')
        age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED')
        table = dataset.table(TABLE_NAME, schema=[full_name, age])
        table.create()
        self.to_delete.insert(0, table)

        job = Config.CLIENT.load_table_from_storage(
            'bq_load_storage_test_' + local_id, table, GS_URL)
        job.create_disposition = 'CREATE_NEVER'
        job.skip_leading_rows = 1
        job.source_format = 'CSV'
        job.write_disposition = 'WRITE_EMPTY'

        job.begin()

        def _job_done(instance):
            return instance.state in ('DONE', 'done')

        # Allow for 90 seconds of "warm up" before rows visible.  See:
        # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability
        # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds
        retry = RetryInstanceState(_job_done, max_tries=8)
        retry(job.reload)()

        rows = self._fetch_single_page(table)
        by_age = operator.itemgetter(1)
        self.assertEqual(sorted(rows, key=by_age),
                         sorted(ROWS, key=by_age))
    def test_load_table_from_storage_w_autodetect_schema(self):
        from google.cloud._testing import _NamedTemporaryFile
        from google.cloud.storage import Client as StorageClient
        from google.cloud.bigquery import SchemaField

        local_id = unique_resource_id()
        bucket_name = 'bq_load_test' + local_id
        blob_name = 'person_ages.csv'
        gs_url = 'gs://{}/{}'.format(bucket_name, blob_name)
        rows = [
            ('Phred Phlyntstone', 32),
            ('Bharney Rhubble', 33),
            ('Wylma Phlyntstone', 29),
            ('Bhettye Rhubble', 27),
        ] * 100  # BigQuery internally uses the first 100 rows to detect schema
        table_name = 'test_table'

        storage_client = StorageClient()

        # In the **very** rare case the bucket name is reserved, this
        # fails with a ConnectionError.
        bucket = storage_client.create_bucket(bucket_name)
        self.to_delete.append(bucket)

        blob = bucket.blob(blob_name)

        with _NamedTemporaryFile() as temp:
            with open(temp.name, 'w') as csv_write:
                writer = csv.writer(csv_write)
                writer.writerow(('Full Name', 'Age'))
                writer.writerows(rows)

            with open(temp.name, 'rb') as csv_read:
                blob.upload_from_file(csv_read, content_type='text/csv')

        self.to_delete.insert(0, blob)

        dataset = Config.CLIENT.dataset(
            _make_dataset_name('load_gcs_then_dump'))

        retry_403(dataset.create)()
        self.to_delete.append(dataset)

        table = dataset.table(table_name)
        self.to_delete.insert(0, table)

        job = Config.CLIENT.load_table_from_storage(
            'bq_load_storage_test_' + local_id, table, gs_url)
        job.autodetect = True

        job.begin()

        # Allow for 90 seconds of "warm up" before rows visible.  See
        # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability
        # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds
        retry = RetryInstanceState(_job_done, max_tries=8)
        retry(job.reload)()

        table.reload()
        field_name = SchemaField(u'Full_Name', u'string', u'NULLABLE', None,
                                 ())
        field_age = SchemaField(u'Age', u'integer', u'NULLABLE', None, ())
        self.assertEqual(table.schema, [field_name, field_age])

        actual_rows = self._fetch_single_page(table)
        by_age = operator.itemgetter(1)
        self.assertEqual(sorted(actual_rows, key=by_age),
                         sorted(rows, key=by_age))