def schema(self):
        """List[:class:`~google.cloud.bigquery.schema.SchemaField`]: The schema
        for the data.

        See
        https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).schema
        https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.schema
        """
        prop = self._properties.get("schema", {})
        return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])]
Beispiel #2
0
def _make_field(field_type, mode="NULLABLE", name="testing", fields=()):
    from google.cloud.bigquery.schema import SchemaField

    return SchemaField(name=name, field_type=field_type, mode=mode, fields=fields)
Beispiel #3
0
 def _prepare_schema(self):
     return [SchemaField(**row) for row in SCHEMA]
# This software is released under the MIT License.
# https://opensource.org/licenses/MIT

from datetime import datetime, timezone

import pytest
from google.cloud.bigquery.job import QueryJob
from google.cloud.bigquery.schema import SchemaField

from bq_test_kit import BQTestKit
from bq_test_kit.bq_dsl.bq_resources.partitions.ingestion_time import \
    IngestionTime
from bq_test_kit.resource_loaders.package_file_loader import PackageFileLoader

json_schema = [
                SchemaField("f1", field_type="STRING"),
                SchemaField("struct_f2", field_type="RECORD", fields=[
                    SchemaField("f2_1", field_type="INT64")
                ]),
                SchemaField("array_f3", field_type="RECORD", mode="REPEATED", fields=[
                    SchemaField("f3_1", field_type="DATETIME")
                ])
              ]


def test_json_load(bqtk: BQTestKit):

    with bqtk.project("it").dataset("dataset_foo").isolate() as ds:
        with ds.table("table_bar", schema=json_schema).isolate() as t:
            pfl = PackageFileLoader("tests/it/bq_test_kit/bq_dsl/bq_resources/data_loaders/resources/dummy_data.json")
            t.json_loader(from_=pfl).load()
Beispiel #5
0
def test_to_api_repr_parameterized(field, api):
    from google.cloud.bigquery.schema import SchemaField

    assert SchemaField(**field).to_api_repr() == api
Beispiel #6
0
    def test_create_tables_from_dict_overwrite(self):
        # type: () -> None
        # Create the dataset once.
        self.client.create_tables_from_dict(
            {
                'empty_1': [
                    SchemaField('col1', 'INTEGER'),
                    SchemaField('col2', 'STRING')
                ],
                'empty_2':
                [SchemaField('col1', 'FLOAT'),
                 SchemaField('col2', 'INTEGER')]
            },
            replace_existing_tables=True)

        # Create it again with a different schema. Make sure the changes take since it should have
        # recreated the dataset.
        self.client.create_tables_from_dict(
            {
                'empty_1': [
                    SchemaField('col1_test1', 'INTEGER'),
                    SchemaField('col2_test2', 'STRING')
                ],
                'empty_2': [
                    SchemaField('col1_test1', 'FLOAT'),
                    SchemaField('col2_test2', 'INTEGER')
                ]
            },
            replace_existing_tables=True)
        self.assertEqual([('col1_test1', 'INTEGER', 'NULLABLE'),
                          ('col2_test2', 'STRING', 'NULLABLE')],
                         [(x.name, x.field_type, x.mode)
                          for x in self.client.get_schema(
                              self.default_test_dataset_id, 'empty_1')])
        self.assertEqual([('col1_test1', 'FLOAT', 'NULLABLE'),
                          ('col2_test2', 'INTEGER', 'NULLABLE')],
                         [(x.name, x.field_type, x.mode)
                          for x in self.client.get_schema(
                              self.default_test_dataset_id, 'empty_2')])

        # Try to create one of the tables again; it should raise a RuntimeError.
        with self.assertRaises(RuntimeError):
            self.client.create_tables_from_dict(
                {
                    'empty_1': [
                        SchemaField('col1', 'INTEGER'),
                        SchemaField('col2', 'STRING')
                    ],
                },
                replace_existing_tables=False)

        # Try to create a table not in the dataset. It should work fine.
        self.client.create_tables_from_dict(
            {
                'empty_3': [
                    SchemaField('col1', 'INTEGER'),
                    SchemaField('col2', 'STRING')
                ],
            },
            replace_existing_tables=False)
        self.assertEqual([('col1', 'INTEGER', 'NULLABLE'),
                          ('col2', 'STRING', 'NULLABLE')],
                         [(x.name, x.field_type, x.mode)
                          for x in self.client.get_schema(
                              self.default_test_dataset_id, 'empty_3')])
Beispiel #7
0
import time

from ddt import data, ddt, unpack
from google.cloud.bigquery.schema import SchemaField

from verily.bigquery_wrapper import bq_test_case

# We use the standard BQ_PATH_DELIMITER throughout the test cases because all the functions in
# mock BQ should take in real BQ paths and handle them correctly.
from verily.bigquery_wrapper.bq_base import BQ_PATH_DELIMITER

LONG_TABLE_LENGTH = 200000

FOO_BAR_BAZ_INTEGERS_SCHEMA = [
    SchemaField('foo', 'INTEGER'),
    SchemaField('bar', 'INTEGER'),
    SchemaField('baz', 'INTEGER')
]


@ddt
class BQSharedTests(bq_test_case.BQTestCase):
    @classmethod
    def setUpClass(cls, use_mocks=False):
        # type: () -> None
        """Set up class"""
        super(BQSharedTests, cls).setUpClass(use_mocks=use_mocks)

    @classmethod
    def create_mock_tables(cls):
Beispiel #8
0
def test_to_dataframe_w_tqdm_pending():
    from google.cloud.bigquery import table
    from google.cloud.bigquery.job import QueryJob as target_class
    from google.cloud.bigquery.schema import SchemaField
    from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL

    begun_resource = _make_job_resource(job_type="query")
    schema = [
        SchemaField("name", "STRING", mode="NULLABLE"),
        SchemaField("age", "INTEGER", mode="NULLABLE"),
    ]
    rows = [
        {
            "f": [{
                "v": "Phred Phlyntstone"
            }, {
                "v": "32"
            }]
        },
        {
            "f": [{
                "v": "Bharney Rhubble"
            }, {
                "v": "33"
            }]
        },
        {
            "f": [{
                "v": "Wylma Phlyntstone"
            }, {
                "v": "29"
            }]
        },
        {
            "f": [{
                "v": "Bhettye Rhubble"
            }, {
                "v": "27"
            }]
        },
    ]

    connection = _make_connection({})
    client = _make_client(connection=connection)
    job = target_class.from_api_repr(begun_resource, client)

    path = "/foo"
    api_request = mock.Mock(return_value={"rows": rows})
    row_iterator = table.RowIterator(client, api_request, path, schema)

    job._properties["statistics"] = {
        "query": {
            "queryPlan": [
                {
                    "name": "S00: Input",
                    "id": "0",
                    "status": "PRNDING"
                },
                {
                    "name": "S01: Output",
                    "id": "1",
                    "status": "COMPLETE"
                },
            ]
        },
    }
    reload_patch = mock.patch("google.cloud.bigquery.job._AsyncJob.reload",
                              autospec=True)
    result_patch = mock.patch(
        "google.cloud.bigquery.job.QueryJob.result",
        side_effect=[concurrent.futures.TimeoutError, row_iterator],
    )

    with result_patch as result_patch_tqdm, reload_patch:
        df = job.to_dataframe(progress_bar_type="tqdm",
                              create_bqstorage_client=False)

    assert result_patch_tqdm.call_count == 2
    assert isinstance(df, pandas.DataFrame)
    assert len(df) == 4  # verify the number of rows
    assert list(df) == ["name", "age"]  # verify the column names
    result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL,
                                         max_results=None)
Beispiel #9
0
def test_to_arrow_w_tqdm_w_pending_status():
    from google.cloud.bigquery import table
    from google.cloud.bigquery.job import QueryJob as target_class
    from google.cloud.bigquery.schema import SchemaField
    from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL

    begun_resource = _make_job_resource(job_type="query")
    rows = [
        {
            "f": [{
                "v": "Bharney Rhubble"
            }, {
                "v": "33"
            }]
        },
        {
            "f": [{
                "v": "Wylma Phlyntstone"
            }, {
                "v": "29"
            }]
        },
    ]

    schema = [
        SchemaField("name", "STRING", mode="REQUIRED"),
        SchemaField("age", "INTEGER", mode="REQUIRED"),
    ]
    connection = _make_connection({})
    client = _make_client(connection=connection)
    job = target_class.from_api_repr(begun_resource, client)

    path = "/foo"
    api_request = mock.Mock(return_value={"rows": rows})
    row_iterator = table.RowIterator(client, api_request, path, schema)

    job._properties["statistics"] = {
        "query": {
            "queryPlan": [
                {
                    "name": "S00: Input",
                    "id": "0",
                    "status": "PENDING"
                },
                {
                    "name": "S00: Input",
                    "id": "1",
                    "status": "COMPLETE"
                },
            ]
        },
    }
    reload_patch = mock.patch("google.cloud.bigquery.job._AsyncJob.reload",
                              autospec=True)
    result_patch = mock.patch(
        "google.cloud.bigquery.job.QueryJob.result",
        side_effect=[concurrent.futures.TimeoutError, row_iterator],
    )

    with result_patch as result_patch_tqdm, reload_patch:
        tbl = job.to_arrow(progress_bar_type="tqdm",
                           create_bqstorage_client=False)

    assert result_patch_tqdm.call_count == 2
    assert isinstance(tbl, pyarrow.Table)
    assert tbl.num_rows == 2
    result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL,
                                         max_results=None)
Beispiel #10
0
def _get_schema_field(schema_field):
  return SchemaField(
      name=schema_field['name'],
      field_type=schema_field['type'],
      mode=schema_field['mode'])