예제 #1
0
 def test_read_from_table(self):
     client = mock.Mock()
     client.jobs.Insert.return_value = bigquery.Job(
         jobReference=bigquery.JobReference(jobId='somejob'))
     table_rows, schema, expected_rows = self.get_test_rows()
     client.jobs.GetQueryResults.return_value = bigquery.GetQueryResultsResponse(
         jobComplete=True, rows=table_rows, schema=schema)
     actual_rows = []
     with beam.io.BigQuerySource('dataset.table').reader(client) as reader:
         for row in reader:
             actual_rows.append(row)
     self.assertEqual(actual_rows, expected_rows)
     self.assertEqual(schema, reader.schema)
예제 #2
0
 def test_read_from_query_unflatten_records(self):
     client = mock.Mock()
     client.jobs.Insert.return_value = bigquery.Job(
         jobReference=bigquery.JobReference(jobId='somejob'))
     table_rows, schema, expected_rows = self.get_test_rows()
     client.jobs.GetQueryResults.return_value = bigquery.GetQueryResultsResponse(
         jobComplete=True, rows=table_rows, schema=schema)
     actual_rows = []
     with beam.io.BigQuerySource(
             query='query', flatten_results=False).reader(client) as reader:
         for row in reader:
             actual_rows.append(row)
     self.assertEqual(actual_rows, expected_rows)
     self.assertEqual(schema, reader.schema)
     self.assertTrue(reader.use_legacy_sql)
     self.assertFalse(reader.flatten_results)
예제 #3
0
 def test_read_from_table_as_tablerows(self):
   client = mock.Mock()
   client.jobs.Insert.return_value = bigquery.Job(
       jobReference=bigquery.JobReference(
           jobId='somejob'))
   table_rows, schema, _ = self.get_test_rows()
   client.jobs.GetQueryResults.return_value = bigquery.GetQueryResultsResponse(
       jobComplete=True, rows=table_rows, schema=schema)
   actual_rows = []
   # We set the coder to TableRowJsonCoder, which is a signal that
   # the caller wants to see the rows as TableRows.
   with beam.io.BigQuerySource(
       'dataset.table', coder=TableRowJsonCoder).reader(client) as reader:
     for row in reader:
       actual_rows.append(row)
   self.assertEqual(actual_rows, table_rows)
   self.assertEqual(schema, reader.schema)
예제 #4
0
파일: bigquery.py 프로젝트: jyucoeng/beam
  def _start_query_job(self, project_id, query, use_legacy_sql, flatten_results,
                       job_id, dry_run=False):
    reference = bigquery.JobReference(jobId=job_id, projectId=project_id)
    request = bigquery.BigqueryJobsInsertRequest(
        projectId=project_id,
        job=bigquery.Job(
            configuration=bigquery.JobConfiguration(
                dryRun=dry_run,
                query=bigquery.JobConfigurationQuery(
                    query=query,
                    useLegacySql=use_legacy_sql,
                    allowLargeResults=True,
                    destinationTable=self._get_temp_table(project_id),
                    flattenResults=flatten_results)),
            jobReference=reference))

    response = self.client.jobs.Insert(request)
    return response.jobReference.jobId
예제 #5
0
 def test_read_from_table_and_job_complete_retry(self, patched_time_sleep):
   client = mock.Mock()
   client.jobs.Insert.return_value = bigquery.Job(
       jobReference=bigquery.JobReference(
           jobId='somejob'))
   table_rows, schema, expected_rows = self.get_test_rows()
   # Return jobComplete=False on first call to trigger the code path where
   # query needs to handle waiting a bit.
   client.jobs.GetQueryResults.side_effect = [
       bigquery.GetQueryResultsResponse(
           jobComplete=False),
       bigquery.GetQueryResultsResponse(
           jobComplete=True, rows=table_rows, schema=schema)]
   actual_rows = []
   with beam.io.BigQuerySource('dataset.table').reader(client) as reader:
     for row in reader:
       actual_rows.append(row)
   self.assertEqual(actual_rows, expected_rows)
예제 #6
0
 def test_read_from_table_and_multiple_pages(self):
   client = mock.Mock()
   client.jobs.Insert.return_value = bigquery.Job(
       jobReference=bigquery.JobReference(
           jobId='somejob'))
   table_rows, schema, expected_rows = self.get_test_rows()
   # Return a pageToken on first call to trigger the code path where
   # query needs to handle multiple pages of results.
   client.jobs.GetQueryResults.side_effect = [
       bigquery.GetQueryResultsResponse(
           jobComplete=True, rows=table_rows, schema=schema,
           pageToken='token'),
       bigquery.GetQueryResultsResponse(
           jobComplete=True, rows=table_rows, schema=schema)]
   actual_rows = []
   with beam.io.BigQuerySource('dataset.table').reader(client) as reader:
     for row in reader:
       actual_rows.append(row)
   # We return expected rows for each of the two pages of results so we
   # adjust our expectation below accordingly.
   self.assertEqual(actual_rows, expected_rows * 2)