def schema(self): """List[:class:`~google.cloud.bigquery.schema.SchemaField`]: The schema for the data. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).schema https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.schema """ prop = self._properties.get("schema", {}) return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])]
def _make_field(field_type, mode="NULLABLE", name="testing", fields=()): from google.cloud.bigquery.schema import SchemaField return SchemaField(name=name, field_type=field_type, mode=mode, fields=fields)
def _prepare_schema(self): return [SchemaField(**row) for row in SCHEMA]
# This software is released under the MIT License. # https://opensource.org/licenses/MIT from datetime import datetime, timezone import pytest from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.schema import SchemaField from bq_test_kit import BQTestKit from bq_test_kit.bq_dsl.bq_resources.partitions.ingestion_time import \ IngestionTime from bq_test_kit.resource_loaders.package_file_loader import PackageFileLoader json_schema = [ SchemaField("f1", field_type="STRING"), SchemaField("struct_f2", field_type="RECORD", fields=[ SchemaField("f2_1", field_type="INT64") ]), SchemaField("array_f3", field_type="RECORD", mode="REPEATED", fields=[ SchemaField("f3_1", field_type="DATETIME") ]) ] def test_json_load(bqtk: BQTestKit): with bqtk.project("it").dataset("dataset_foo").isolate() as ds: with ds.table("table_bar", schema=json_schema).isolate() as t: pfl = PackageFileLoader("tests/it/bq_test_kit/bq_dsl/bq_resources/data_loaders/resources/dummy_data.json") t.json_loader(from_=pfl).load()
def test_to_api_repr_parameterized(field, api): from google.cloud.bigquery.schema import SchemaField assert SchemaField(**field).to_api_repr() == api
def test_create_tables_from_dict_overwrite(self): # type: () -> None # Create the dataset once. self.client.create_tables_from_dict( { 'empty_1': [ SchemaField('col1', 'INTEGER'), SchemaField('col2', 'STRING') ], 'empty_2': [SchemaField('col1', 'FLOAT'), SchemaField('col2', 'INTEGER')] }, replace_existing_tables=True) # Create it again with a different schema. Make sure the changes take since it should have # recreated the dataset. self.client.create_tables_from_dict( { 'empty_1': [ SchemaField('col1_test1', 'INTEGER'), SchemaField('col2_test2', 'STRING') ], 'empty_2': [ SchemaField('col1_test1', 'FLOAT'), SchemaField('col2_test2', 'INTEGER') ] }, replace_existing_tables=True) self.assertEqual([('col1_test1', 'INTEGER', 'NULLABLE'), ('col2_test2', 'STRING', 'NULLABLE')], [(x.name, x.field_type, x.mode) for x in self.client.get_schema( self.default_test_dataset_id, 'empty_1')]) self.assertEqual([('col1_test1', 'FLOAT', 'NULLABLE'), ('col2_test2', 'INTEGER', 'NULLABLE')], [(x.name, x.field_type, x.mode) for x in self.client.get_schema( self.default_test_dataset_id, 'empty_2')]) # Try to create one of the tables again; it should raise a RuntimeError. with self.assertRaises(RuntimeError): self.client.create_tables_from_dict( { 'empty_1': [ SchemaField('col1', 'INTEGER'), SchemaField('col2', 'STRING') ], }, replace_existing_tables=False) # Try to create a table not in the dataset. It should work fine. self.client.create_tables_from_dict( { 'empty_3': [ SchemaField('col1', 'INTEGER'), SchemaField('col2', 'STRING') ], }, replace_existing_tables=False) self.assertEqual([('col1', 'INTEGER', 'NULLABLE'), ('col2', 'STRING', 'NULLABLE')], [(x.name, x.field_type, x.mode) for x in self.client.get_schema( self.default_test_dataset_id, 'empty_3')])
import time from ddt import data, ddt, unpack from google.cloud.bigquery.schema import SchemaField from verily.bigquery_wrapper import bq_test_case # We use the standard BQ_PATH_DELIMITER throughout the test cases because all the functions in # mock BQ should take in real BQ paths and handle them correctly. from verily.bigquery_wrapper.bq_base import BQ_PATH_DELIMITER LONG_TABLE_LENGTH = 200000 FOO_BAR_BAZ_INTEGERS_SCHEMA = [ SchemaField('foo', 'INTEGER'), SchemaField('bar', 'INTEGER'), SchemaField('baz', 'INTEGER') ] @ddt class BQSharedTests(bq_test_case.BQTestCase): @classmethod def setUpClass(cls, use_mocks=False): # type: () -> None """Set up class""" super(BQSharedTests, cls).setUpClass(use_mocks=use_mocks) @classmethod def create_mock_tables(cls):
def test_to_dataframe_w_tqdm_pending(): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL begun_resource = _make_job_resource(job_type="query") schema = [ SchemaField("name", "STRING", mode="NULLABLE"), SchemaField("age", "INTEGER", mode="NULLABLE"), ] rows = [ { "f": [{ "v": "Phred Phlyntstone" }, { "v": "32" }] }, { "f": [{ "v": "Bharney Rhubble" }, { "v": "33" }] }, { "f": [{ "v": "Wylma Phlyntstone" }, { "v": "29" }] }, { "f": [{ "v": "Bhettye Rhubble" }, { "v": "27" }] }, ] connection = _make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) row_iterator = table.RowIterator(client, api_request, path, schema) job._properties["statistics"] = { "query": { "queryPlan": [ { "name": "S00: Input", "id": "0", "status": "PRNDING" }, { "name": "S01: Output", "id": "1", "status": "COMPLETE" }, ] }, } reload_patch = mock.patch("google.cloud.bigquery.job._AsyncJob.reload", autospec=True) result_patch = mock.patch( "google.cloud.bigquery.job.QueryJob.result", side_effect=[concurrent.futures.TimeoutError, row_iterator], ) with result_patch as result_patch_tqdm, reload_patch: df = job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) assert result_patch_tqdm.call_count == 2 assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows assert list(df) == ["name", "age"] # verify the column names result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None)
def test_to_arrow_w_tqdm_w_pending_status(): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL begun_resource = _make_job_resource(job_type="query") rows = [ { "f": [{ "v": "Bharney Rhubble" }, { "v": "33" }] }, { "f": [{ "v": "Wylma Phlyntstone" }, { "v": "29" }] }, ] schema = [ SchemaField("name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), ] connection = _make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) row_iterator = table.RowIterator(client, api_request, path, schema) job._properties["statistics"] = { "query": { "queryPlan": [ { "name": "S00: Input", "id": "0", "status": "PENDING" }, { "name": "S00: Input", "id": "1", "status": "COMPLETE" }, ] }, } reload_patch = mock.patch("google.cloud.bigquery.job._AsyncJob.reload", autospec=True) result_patch = mock.patch( "google.cloud.bigquery.job.QueryJob.result", side_effect=[concurrent.futures.TimeoutError, row_iterator], ) with result_patch as result_patch_tqdm, reload_patch: tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False) assert result_patch_tqdm.call_count == 2 assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None)
def _get_schema_field(schema_field): return SchemaField( name=schema_field['name'], field_type=schema_field['type'], mode=schema_field['mode'])