Beispiel #1
0
 def test_read_from_table_and_multiple_pages(self):
     client = mock.Mock()
     client.jobs.Insert.return_value = bigquery.Job(
         jobReference=bigquery.JobReference(jobId='somejob'))
     table_rows, schema, expected_rows = self.get_test_rows()
     # Return a pageToken on first call to trigger the code path where
     # query needs to handle multiple pages of results.
     client.jobs.GetQueryResults.side_effect = [
         bigquery.GetQueryResultsResponse(jobComplete=True,
                                          rows=table_rows,
                                          schema=schema,
                                          pageToken='token'),
         bigquery.GetQueryResultsResponse(jobComplete=True,
                                          rows=table_rows,
                                          schema=schema)
     ]
     actual_rows = []
     with beam.io.BigQuerySource(
             'dataset.table',
             use_dataflow_native_source=True).reader(client) as reader:
         for row in reader:
             actual_rows.append(row)
     # We return expected rows for each of the two pages of results so we
     # adjust our expectation below accordingly.
     self.assertEqual(actual_rows, expected_rows * 2)
Beispiel #2
0
 def test_read_from_table_and_job_complete_retry(self, patched_time_sleep):
   client = mock.Mock()
   client.jobs.Insert.return_value = bigquery.Job(
       jobReference=bigquery.JobReference(jobId='somejob'))
   table_rows, schema, expected_rows = self.get_test_rows()
   # Return jobComplete=False on first call to trigger the code path where
   # query needs to handle waiting a bit.
   client.jobs.GetQueryResults.side_effect = [
       bigquery.GetQueryResultsResponse(jobComplete=False),
       bigquery.GetQueryResultsResponse(
           jobComplete=True, rows=table_rows, schema=schema)
   ]
   actual_rows = []
   with beam.io.BigQuerySource('dataset.table').reader(client) as reader:
     for row in reader:
       actual_rows.append(row)
   self.assertEqual(actual_rows, expected_rows)
 def test_read_from_table(self):
     client = mock.Mock()
     client.jobs.Insert.return_value = bigquery.Job(
         jobReference=bigquery.JobReference(jobId='somejob'))
     table_rows, schema, expected_rows = self.get_test_rows()
     client.jobs.GetQueryResults.return_value = bigquery.GetQueryResultsResponse(
         jobComplete=True, rows=table_rows, schema=schema)
     actual_rows = []
     with beam.io.BigQuerySource('dataset.table').reader(client) as reader:
         for row in reader:
             actual_rows.append(row)
     self.assertEqual(actual_rows, expected_rows)
     self.assertEqual(schema, reader.schema)
 def test_read_from_query_unflatten_records(self):
     client = mock.Mock()
     client.jobs.Insert.return_value = bigquery.Job(
         jobReference=bigquery.JobReference(jobId='somejob'))
     table_rows, schema, expected_rows = self.get_test_rows()
     client.jobs.GetQueryResults.return_value = bigquery.GetQueryResultsResponse(
         jobComplete=True, rows=table_rows, schema=schema)
     actual_rows = []
     with beam.io.BigQuerySource(
             query='query', flatten_results=False).reader(client) as reader:
         for row in reader:
             actual_rows.append(row)
     self.assertEqual(actual_rows, expected_rows)
     self.assertEqual(schema, reader.schema)
     self.assertTrue(reader.use_legacy_sql)
     self.assertFalse(reader.flatten_results)
Beispiel #5
0
 def test_read_from_table_as_tablerows(self):
   client = mock.Mock()
   client.jobs.Insert.return_value = bigquery.Job(
       jobReference=bigquery.JobReference(jobId='somejob'))
   table_rows, schema, _ = self.get_test_rows()
   client.jobs.GetQueryResults.return_value = bigquery.GetQueryResultsResponse(
       jobComplete=True, rows=table_rows, schema=schema)
   actual_rows = []
   # We set the coder to TableRowJsonCoder, which is a signal that
   # the caller wants to see the rows as TableRows.
   with beam.io.BigQuerySource(
       'dataset.table', coder=TableRowJsonCoder).reader(client) as reader:
     for row in reader:
       actual_rows.append(row)
   self.assertEqual(actual_rows, table_rows)
   self.assertEqual(schema, reader.schema)