def _start_query_job(self, project_id, query, use_legacy_sql, flatten_results, job_id, dry_run=False): reference = bigquery.JobReference(jobId=job_id, projectId=project_id) request = bigquery.BigqueryJobsInsertRequest( projectId=project_id, job=bigquery.Job( configuration=bigquery.JobConfiguration( dryRun=dry_run, query=bigquery.JobConfigurationQuery( query=query, useLegacySql=use_legacy_sql, allowLargeResults=True, destinationTable=self._get_temp_table(project_id), flattenResults=flatten_results)), jobReference=reference)) response = self.client.jobs.Insert(request) return response.jobReference.jobId
def get_query_location(self, project_id, query, use_legacy_sql): """ Get the location of tables referenced in a query. This method returns the location of the first referenced table in the query and depends on the BigQuery service to provide error handling for queries that reference tables in multiple locations. """ reference = bigquery.JobReference(jobId=uuid.uuid4().hex, projectId=project_id) request = bigquery.BigqueryJobsInsertRequest( projectId=project_id, job=bigquery.Job( configuration=bigquery.JobConfiguration( dryRun=True, query=bigquery.JobConfigurationQuery( query=query, useLegacySql=use_legacy_sql, )), jobReference=reference)) response = self.client.jobs.Insert(request) if response.statistics is None: # This behavior is only expected in tests logging.warning( "Unable to get location, missing response.statistics. Query: %s", query) return None referenced_tables = response.statistics.query.referencedTables if referenced_tables: # Guards against both non-empty and non-None table = referenced_tables[0] location = self.get_table_location( table.projectId, table.datasetId, table.tableId) logging.info("Using location %r from table %r referenced by query %s", location, table, query) return location logging.debug("Query %s does not reference any tables.", query) return None
def _start_query_job(self, project_id, query, use_legacy_sql, flatten_results, job_id, dry_run=False, kms_key=None): reference = bigquery.JobReference(jobId=job_id, projectId=project_id) request = bigquery.BigqueryJobsInsertRequest( projectId=project_id, job=bigquery.Job( configuration=bigquery.JobConfiguration( dryRun=dry_run, query=bigquery.JobConfigurationQuery( query=query, useLegacySql=use_legacy_sql, allowLargeResults=not dry_run, destinationTable=self._get_temp_table(project_id) if not dry_run else None, flattenResults=flatten_results, destinationEncryptionConfiguration=bigquery .EncryptionConfiguration(kmsKeyName=kms_key))), jobReference=reference)) response = self.client.jobs.Insert(request) return response