def test_to_arrow_max_results_no_progress_bar(): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField connection = _make_connection({}) client = _make_client(connection=connection) begun_resource = _make_job_resource(job_type="query") job = target_class.from_api_repr(begun_resource, client) schema = [ SchemaField("name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), ] rows = [ {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, ] path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) row_iterator = table.RowIterator(client, api_request, path, schema) result_patch = mock.patch( "google.cloud.bigquery.job.QueryJob.result", return_value=row_iterator, ) with result_patch as result_patch_tqdm: tbl = job.to_arrow(create_bqstorage_client=False, max_results=123) result_patch_tqdm.assert_called_once_with(max_results=123) assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2
def test_to_dataframe_w_tqdm(): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL begun_resource = _make_job_resource(job_type="query") schema = [ SchemaField("name", "STRING", mode="NULLABLE"), SchemaField("age", "INTEGER", mode="NULLABLE"), ] rows = [ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, ] connection = _make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) row_iterator = table.RowIterator(client, api_request, path, schema) job._properties["statistics"] = { "query": { "queryPlan": [ {"name": "S00: Input", "id": "0", "status": "COMPLETE"}, {"name": "S01: Output", "id": "1", "status": "COMPLETE"}, ] }, } reload_patch = mock.patch( "google.cloud.bigquery.job._AsyncJob.reload", autospec=True ) result_patch = mock.patch( "google.cloud.bigquery.job.QueryJob.result", side_effect=[ concurrent.futures.TimeoutError, concurrent.futures.TimeoutError, row_iterator, ], ) with result_patch as result_patch_tqdm, reload_patch: df = job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) assert result_patch_tqdm.call_count == 3 assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows assert list(df), ["name", "age"] # verify the column names result_patch_tqdm.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None )
def test_to_arrow_w_tqdm_w_query_plan(): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL begun_resource = _make_job_resource(job_type="query") rows = [ {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, ] schema = [ SchemaField("name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), ] connection = _make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) row_iterator = table.RowIterator(client, api_request, path, schema) job._properties["statistics"] = { "query": { "queryPlan": [ {"name": "S00: Input", "id": "0", "status": "COMPLETE"}, {"name": "S01: Output", "id": "1", "status": "COMPLETE"}, ] }, } reload_patch = mock.patch( "google.cloud.bigquery.job._AsyncJob.reload", autospec=True ) result_patch = mock.patch( "google.cloud.bigquery.job.QueryJob.result", side_effect=[ concurrent.futures.TimeoutError, concurrent.futures.TimeoutError, row_iterator, ], ) with result_patch as result_patch_tqdm, reload_patch: tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False) assert result_patch_tqdm.call_count == 3 assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL)