Ejemplo n.º 1
0
  def _start_query_job(self, project_id, query, use_legacy_sql, flatten_results,
                       job_id, dry_run=False):
    reference = bigquery.JobReference(jobId=job_id, projectId=project_id)
    request = bigquery.BigqueryJobsInsertRequest(
        projectId=project_id,
        job=bigquery.Job(
            configuration=bigquery.JobConfiguration(
                dryRun=dry_run,
                query=bigquery.JobConfigurationQuery(
                    query=query,
                    useLegacySql=use_legacy_sql,
                    allowLargeResults=True,
                    destinationTable=self._get_temp_table(project_id),
                    flattenResults=flatten_results)),
            jobReference=reference))

    response = self.client.jobs.Insert(request)
    return response.jobReference.jobId
Ejemplo n.º 2
0
  def get_query_location(self, project_id, query, use_legacy_sql):
    """
    Get the location of tables referenced in a query.

    This method returns the location of the first referenced table in the query
    and depends on the BigQuery service to provide error handling for
    queries that reference tables in multiple locations.
    """
    reference = bigquery.JobReference(jobId=uuid.uuid4().hex,
                                      projectId=project_id)
    request = bigquery.BigqueryJobsInsertRequest(
        projectId=project_id,
        job=bigquery.Job(
            configuration=bigquery.JobConfiguration(
                dryRun=True,
                query=bigquery.JobConfigurationQuery(
                    query=query,
                    useLegacySql=use_legacy_sql,
                )),
            jobReference=reference))

    response = self.client.jobs.Insert(request)

    if response.statistics is None:
      # This behavior is only expected in tests
      logging.warning(
          "Unable to get location, missing response.statistics. Query: %s",
          query)
      return None

    referenced_tables = response.statistics.query.referencedTables
    if referenced_tables:  # Guards against both non-empty and non-None
      table = referenced_tables[0]
      location = self.get_table_location(
          table.projectId,
          table.datasetId,
          table.tableId)
      logging.info("Using location %r from table %r referenced by query %s",
                   location, table, query)
      return location

    logging.debug("Query %s does not reference any tables.", query)
    return None
Ejemplo n.º 3
0
  def _start_query_job(self, project_id, query, use_legacy_sql, flatten_results,
                       job_id, dry_run=False, kms_key=None):
    reference = bigquery.JobReference(jobId=job_id, projectId=project_id)
    request = bigquery.BigqueryJobsInsertRequest(
        projectId=project_id,
        job=bigquery.Job(
            configuration=bigquery.JobConfiguration(
                dryRun=dry_run,
                query=bigquery.JobConfigurationQuery(
                    query=query,
                    useLegacySql=use_legacy_sql,
                    allowLargeResults=not dry_run,
                    destinationTable=self._get_temp_table(project_id) if not
                    dry_run else None,
                    flattenResults=flatten_results,
                    destinationEncryptionConfiguration=bigquery
                    .EncryptionConfiguration(kmsKeyName=kms_key))),
            jobReference=reference))

    response = self.client.jobs.Insert(request)
    return response