def test_read_from_table_and_job_complete_retry(self, patched_time_sleep): client = mock.Mock() client.jobs.Insert.return_value = bigquery.Job( jobReference=bigquery.JobReference( jobId='somejob')) table_rows, schema, expected_rows = self.get_test_rows() # Return jobComplete=False on first call to trigger the code path where # query needs to handle waiting a bit. client.jobs.GetQueryResults.side_effect = [ bigquery.GetQueryResultsResponse( jobComplete=False), bigquery.GetQueryResultsResponse( jobComplete=True, rows=table_rows, schema=schema)] actual_rows = [] with beam.io.BigQuerySource('dataset.table').reader(client) as reader: for row in reader: actual_rows.append(row) self.assertEqual(actual_rows, expected_rows)
def test_read_from_table_and_multiple_pages(self): client = mock.Mock() client.jobs.Insert.return_value = bigquery.Job( jobReference=bigquery.JobReference( jobId='somejob')) table_rows, schema, expected_rows = self.get_test_rows() # Return a pageToken on first call to trigger the code path where # query needs to handle multiple pages of results. client.jobs.GetQueryResults.side_effect = [ bigquery.GetQueryResultsResponse( jobComplete=True, rows=table_rows, schema=schema, pageToken='token'), bigquery.GetQueryResultsResponse( jobComplete=True, rows=table_rows, schema=schema)] actual_rows = [] with beam.io.BigQuerySource('dataset.table').reader(client) as reader: for row in reader: actual_rows.append(row) # We return expected rows for each of the two pages of results so we # adjust our expectation below accordingly. self.assertEqual(actual_rows, expected_rows * 2)
def test_read_from_table(self): client = mock.Mock() client.jobs.Insert.return_value = bigquery.Job( jobReference=bigquery.JobReference(jobId='somejob')) table_rows, schema, expected_rows = self.get_test_rows() client.jobs.GetQueryResults.return_value = bigquery.GetQueryResultsResponse( jobComplete=True, rows=table_rows, schema=schema) actual_rows = [] with beam.io.BigQuerySource('dataset.table').reader(client) as reader: for row in reader: actual_rows.append(row) self.assertEqual(actual_rows, expected_rows) self.assertEqual(schema, reader.schema)
def test_read_from_query_unflatten_records(self): client = mock.Mock() client.jobs.Insert.return_value = bigquery.Job( jobReference=bigquery.JobReference(jobId='somejob')) table_rows, schema, expected_rows = self.get_test_rows() client.jobs.GetQueryResults.return_value = bigquery.GetQueryResultsResponse( jobComplete=True, rows=table_rows, schema=schema) actual_rows = [] with beam.io.BigQuerySource( query='query', flatten_results=False).reader(client) as reader: for row in reader: actual_rows.append(row) self.assertEqual(actual_rows, expected_rows) self.assertEqual(schema, reader.schema) self.assertTrue(reader.use_legacy_sql) self.assertFalse(reader.flatten_results)
def test_read_from_table_as_tablerows(self): client = mock.Mock() client.jobs.Insert.return_value = bigquery.Job( jobReference=bigquery.JobReference( jobId='somejob')) table_rows, schema, _ = self.get_test_rows() client.jobs.GetQueryResults.return_value = bigquery.GetQueryResultsResponse( jobComplete=True, rows=table_rows, schema=schema) actual_rows = [] # We set the coder to TableRowJsonCoder, which is a signal that # the caller wants to see the rows as TableRows. with beam.io.BigQuerySource( 'dataset.table', coder=TableRowJsonCoder).reader(client) as reader: for row in reader: actual_rows.append(row) self.assertEqual(actual_rows, table_rows) self.assertEqual(schema, reader.schema)