Esempio n. 1
0
    def _load_table_for_dml(self, rows, dataset_name, table_name):
        from google.cloud._testing import _NamedTemporaryFile

        dataset = Config.CLIENT.dataset(dataset_name)
        retry_403(dataset.create)()
        self.to_delete.append(dataset)

        greeting = bigquery.SchemaField('greeting', 'STRING', mode='NULLABLE')
        table = dataset.table(table_name, schema=[greeting])
        table.create()
        self.to_delete.insert(0, table)

        with _NamedTemporaryFile() as temp:
            with open(temp.name, 'w') as csv_write:
                writer = csv.writer(csv_write)
                writer.writerow(('Greeting', ))
                writer.writerows(rows)

            with open(temp.name, 'rb') as csv_read:
                job = table.upload_from_file(
                    csv_read,
                    source_format='CSV',
                    skip_leading_rows=1,
                    create_disposition='CREATE_NEVER',
                    write_disposition='WRITE_EMPTY',
                )

        # Retry until done.
        retry = RetryInstanceState(_job_done, max_tries=8)
        retry(job.reload)()
        self._fetch_single_page(table)
Esempio n. 2
0
    def test_load_table_from_local_file_then_dump_table(self):
        import csv
        from google.cloud._testing import _NamedTemporaryFile

        ROWS = [
            ('Phred Phlyntstone', 32),
            ('Bharney Rhubble', 33),
            ('Wylma Phlyntstone', 29),
            ('Bhettye Rhubble', 27),
        ]
        TABLE_NAME = 'test_table'

        dataset = Config.CLIENT.dataset(
            _make_dataset_name('load_local_then_dump'))

        retry_403(dataset.create)()
        self.to_delete.append(dataset)

        full_name = bigquery.SchemaField('full_name', 'STRING',
                                         mode='REQUIRED')
        age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED')
        table = dataset.table(TABLE_NAME, schema=[full_name, age])
        table.create()
        self.to_delete.insert(0, table)

        with _NamedTemporaryFile() as temp:
            with open(temp.name, 'w') as csv_write:
                writer = csv.writer(csv_write)
                writer.writerow(('Full Name', 'Age'))
                writer.writerows(ROWS)

            with open(temp.name, 'rb') as csv_read:
                job = table.upload_from_file(
                    csv_read,
                    source_format='CSV',
                    skip_leading_rows=1,
                    create_disposition='CREATE_NEVER',
                    write_disposition='WRITE_EMPTY',
                )

        def _job_done(instance):
            return instance.state.lower() == 'done'

        # Retry until done.
        retry = RetryInstanceState(_job_done, max_tries=8)
        retry(job.reload)()

        self.assertEqual(job.output_rows, len(ROWS))

        rows = self._fetch_single_page(table)
        by_age = operator.itemgetter(1)
        self.assertEqual(sorted(rows, key=by_age),
                         sorted(ROWS, key=by_age))
Esempio n. 3
0
    def test_success(self):
        from google.cloud.environment_vars import CREDENTIALS
        from google.cloud._testing import _Monkey
        from google.cloud._testing import _NamedTemporaryFile

        project_id = 'test-project-id'
        payload = '{"%s":"%s"}' % ('project_id', project_id)
        with _NamedTemporaryFile() as temp:
            with open(temp.name, 'w') as creds_file:
                creds_file.write(payload)

            environ = {CREDENTIALS: temp.name}
            with _Monkey(os, getenv=environ.get):
                result = self._callFUT()

            self.assertEqual(result, project_id)
Esempio n. 4
0
    def test_success(self):
        from google.cloud.environment_vars import CREDENTIALS
        from google.cloud._testing import _Monkey
        from google.cloud._testing import _NamedTemporaryFile

        project_id = 'test-project-id'
        payload = '{"%s":"%s"}' % ('project_id', project_id)
        with _NamedTemporaryFile() as temp:
            with open(temp.name, 'w') as creds_file:
                creds_file.write(payload)

            environ = {CREDENTIALS: temp.name}
            with _Monkey(os, getenv=environ.get):
                result = self._callFUT()

            self.assertEqual(result, project_id)
Esempio n. 5
0
def _write_csv_to_storage(bucket_name, blob_name, header_row, data_rows):

    import csv

    from google.cloud._testing import _NamedTemporaryFile

    from google.cloud.storage import Client as StorageClient



    storage_client = StorageClient()



    # In the **very** rare case the bucket name is reserved, this

    # fails with a ConnectionError.

    bucket = storage_client.create_bucket(bucket_name)



    blob = bucket.blob(blob_name)



    with _NamedTemporaryFile() as temp:

        with open(temp.name, 'w') as csv_write:

            writer = csv.writer(csv_write)

            writer.writerow(header_row)

            writer.writerows(data_rows)



        with open(temp.name, 'rb') as csv_read:

            blob.upload_from_file(csv_read, content_type='text/csv')



    return bucket, blob
Esempio n. 6
0
    def test_nix_missing_prject_key(self):
        from google.cloud import _helpers as MUT
        from google.cloud._testing import _Monkey
        from google.cloud._testing import _NamedTemporaryFile

        with _NamedTemporaryFile() as temp:
            config_value = '[%s]' % (MUT._GCLOUD_CONFIG_SECTION,)
            with open(temp.name, 'w') as config_file:
                config_file.write(config_value)

            def mock_get_path():
                return temp.name

            with _Monkey(os, name='not-nt'):
                with _Monkey(MUT, _get_nix_config_path=mock_get_path,
                             _USER_ROOT='not-None'):
                    result = self._callFUT()

            self.assertEqual(result, None)
Esempio n. 7
0
    def test_nix_missing_prject_key(self):
        from google.cloud import _helpers as MUT
        from google.cloud._testing import _Monkey
        from google.cloud._testing import _NamedTemporaryFile

        with _NamedTemporaryFile() as temp:
            config_value = '[%s]' % (MUT._GCLOUD_CONFIG_SECTION, )
            with open(temp.name, 'w') as config_file:
                config_file.write(config_value)

            def mock_get_path():
                return temp.name

            with _Monkey(os, name='not-nt'):
                with _Monkey(MUT,
                             _get_nix_config_path=mock_get_path,
                             _USER_ROOT='not-None'):
                    result = self._callFUT()

            self.assertEqual(result, None)
Esempio n. 8
0
    def test_windows(self):
        from google.cloud import _helpers as MUT
        from google.cloud._testing import _Monkey
        from google.cloud._testing import _NamedTemporaryFile

        project_id = 'test-project-id'
        with _NamedTemporaryFile() as temp:
            config_value = self.CONFIG_TEMPLATE % (
                MUT._GCLOUD_CONFIG_SECTION, MUT._GCLOUD_CONFIG_KEY, project_id)
            with open(temp.name, 'w') as config_file:
                config_file.write(config_value)

            def mock_get_path():
                return temp.name

            with _Monkey(os, name='nt'):
                with _Monkey(MUT,
                             _get_windows_config_path=mock_get_path,
                             _USER_ROOT=None):
                    result = self._callFUT()

            self.assertEqual(result, project_id)
Esempio n. 9
0
    def test_windows(self):
        from google.cloud import _helpers as MUT
        from google.cloud._testing import _Monkey
        from google.cloud._testing import _NamedTemporaryFile

        project_id = 'test-project-id'
        with _NamedTemporaryFile() as temp:
            config_value = self.CONFIG_TEMPLATE % (
                MUT._GCLOUD_CONFIG_SECTION,
                MUT._GCLOUD_CONFIG_KEY, project_id)
            with open(temp.name, 'w') as config_file:
                config_file.write(config_value)

            def mock_get_path():
                return temp.name

            with _Monkey(os, name='nt'):
                with _Monkey(MUT, _get_windows_config_path=mock_get_path,
                             _USER_ROOT=None):
                    result = self._callFUT()

            self.assertEqual(result, project_id)
Esempio n. 10
0
    def test_load_table_from_storage_then_dump_table(self):
        import csv
        from google.cloud._testing import _NamedTemporaryFile
        from google.cloud.storage import Client as StorageClient

        local_id = unique_resource_id()
        BUCKET_NAME = 'bq_load_test' + local_id
        BLOB_NAME = 'person_ages.csv'
        GS_URL = 'gs://%s/%s' % (BUCKET_NAME, BLOB_NAME)
        ROWS = [
            ('Phred Phlyntstone', 32),
            ('Bharney Rhubble', 33),
            ('Wylma Phlyntstone', 29),
            ('Bhettye Rhubble', 27),
        ]
        TABLE_NAME = 'test_table'

        s_client = StorageClient()

        # In the **very** rare case the bucket name is reserved, this
        # fails with a ConnectionError.
        bucket = s_client.create_bucket(BUCKET_NAME)
        self.to_delete.append(bucket)

        blob = bucket.blob(BLOB_NAME)

        with _NamedTemporaryFile() as temp:
            with open(temp.name, 'w') as csv_write:
                writer = csv.writer(csv_write)
                writer.writerow(('Full Name', 'Age'))
                writer.writerows(ROWS)

            with open(temp.name, 'rb') as csv_read:
                blob.upload_from_file(csv_read, content_type='text/csv')

        self.to_delete.insert(0, blob)

        dataset = Config.CLIENT.dataset(
            _make_dataset_name('load_gcs_then_dump'))

        retry_403(dataset.create)()
        self.to_delete.append(dataset)

        full_name = bigquery.SchemaField('full_name', 'STRING',
                                         mode='REQUIRED')
        age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED')
        table = dataset.table(TABLE_NAME, schema=[full_name, age])
        table.create()
        self.to_delete.insert(0, table)

        job = Config.CLIENT.load_table_from_storage(
            'bq_load_storage_test_' + local_id, table, GS_URL)
        job.create_disposition = 'CREATE_NEVER'
        job.skip_leading_rows = 1
        job.source_format = 'CSV'
        job.write_disposition = 'WRITE_EMPTY'

        job.begin()

        def _job_done(instance):
            return instance.state in ('DONE', 'done')

        # Allow for 90 seconds of "warm up" before rows visible.  See:
        # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability
        # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds
        retry = RetryInstanceState(_job_done, max_tries=8)
        retry(job.reload)()

        rows = self._fetch_single_page(table)
        by_age = operator.itemgetter(1)
        self.assertEqual(sorted(rows, key=by_age),
                         sorted(ROWS, key=by_age))
Esempio n. 11
0
    def test_load_table_from_storage_w_autodetect_schema(self):
        from google.cloud._testing import _NamedTemporaryFile
        from google.cloud.storage import Client as StorageClient
        from google.cloud.bigquery import SchemaField

        local_id = unique_resource_id()
        bucket_name = 'bq_load_test' + local_id
        blob_name = 'person_ages.csv'
        gs_url = 'gs://{}/{}'.format(bucket_name, blob_name)
        rows = [
            ('Phred Phlyntstone', 32),
            ('Bharney Rhubble', 33),
            ('Wylma Phlyntstone', 29),
            ('Bhettye Rhubble', 27),
        ] * 100  # BigQuery internally uses the first 100 rows to detect schema
        table_name = 'test_table'

        storage_client = StorageClient()

        # In the **very** rare case the bucket name is reserved, this
        # fails with a ConnectionError.
        bucket = storage_client.create_bucket(bucket_name)
        self.to_delete.append(bucket)

        blob = bucket.blob(blob_name)

        with _NamedTemporaryFile() as temp:
            with open(temp.name, 'w') as csv_write:
                writer = csv.writer(csv_write)
                writer.writerow(('Full Name', 'Age'))
                writer.writerows(rows)

            with open(temp.name, 'rb') as csv_read:
                blob.upload_from_file(csv_read, content_type='text/csv')

        self.to_delete.insert(0, blob)

        dataset = Config.CLIENT.dataset(
            _make_dataset_name('load_gcs_then_dump'))

        retry_403(dataset.create)()
        self.to_delete.append(dataset)

        table = dataset.table(table_name)
        self.to_delete.insert(0, table)

        job = Config.CLIENT.load_table_from_storage(
            'bq_load_storage_test_' + local_id, table, gs_url)
        job.autodetect = True

        job.begin()

        # Allow for 90 seconds of "warm up" before rows visible.  See
        # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability
        # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds
        retry = RetryInstanceState(_job_done, max_tries=8)
        retry(job.reload)()

        table.reload()
        field_name = SchemaField(u'Full_Name', u'string', u'NULLABLE', None,
                                 ())
        field_age = SchemaField(u'Age', u'integer', u'NULLABLE', None, ())
        self.assertEqual(table.schema, [field_name, field_age])

        actual_rows = self._fetch_single_page(table)
        by_age = operator.itemgetter(1)
        self.assertEqual(sorted(actual_rows, key=by_age),
                         sorted(rows, key=by_age))