Ejemplo n.º 1
0
    def _insert_copy_job(self,
                         project_id,
                         job_id,
                         from_table_reference,
                         to_table_reference,
                         create_disposition=None,
                         write_disposition=None):
        reference = bigquery.JobReference()
        reference.jobId = job_id
        reference.projectId = project_id
        request = bigquery.BigqueryJobsInsertRequest(
            projectId=project_id,
            job=bigquery.Job(
                configuration=bigquery.JobConfiguration(
                    copy=bigquery.JobConfigurationTableCopy(
                        destinationTable=to_table_reference,
                        sourceTable=from_table_reference,
                        createDisposition=create_disposition,
                        writeDisposition=write_disposition,
                    )),
                jobReference=reference,
            ))

        logging.info("Inserting job request: %s", request)
        response = self.client.jobs.Insert(request)
        logging.info("Response was %s", response)
        return response.jobReference
Ejemplo n.º 2
0
 def _insert_load_job(self,
                      project_id,
                      job_id,
                      table_reference,
                      source_uris,
                      schema=None,
                      write_disposition=None,
                      create_disposition=None,
                      additional_load_parameters=None):
     additional_load_parameters = additional_load_parameters or {}
     job_schema = None if schema == 'SCHEMA_AUTODETECT' else schema
     reference = bigquery.JobReference(jobId=job_id, projectId=project_id)
     request = bigquery.BigqueryJobsInsertRequest(
         projectId=project_id,
         job=bigquery.Job(
             configuration=bigquery.JobConfiguration(
                 load=bigquery.JobConfigurationLoad(
                     sourceUris=source_uris,
                     destinationTable=table_reference,
                     schema=job_schema,
                     writeDisposition=write_disposition,
                     createDisposition=create_disposition,
                     sourceFormat='NEWLINE_DELIMITED_JSON',
                     autodetect=schema == 'SCHEMA_AUTODETECT',
                     **additional_load_parameters)),
             jobReference=reference,
         ))
     response = self.client.jobs.Insert(request)
     return response.jobReference
Ejemplo n.º 3
0
 def _insert_load_job(self,
                      project_id,
                      job_id,
                      table_reference,
                      source_uris,
                      schema=None,
                      write_disposition=None,
                      create_disposition=None):
   reference = bigquery.JobReference(jobId=job_id, projectId=project_id)
   request = bigquery.BigqueryJobsInsertRequest(
       projectId=project_id,
       job=bigquery.Job(
           configuration=bigquery.JobConfiguration(
               load=bigquery.JobConfigurationLoad(
                   sourceUris=source_uris,
                   destinationTable=table_reference,
                   schema=schema,
                   writeDisposition=write_disposition,
                   createDisposition=create_disposition,
                   sourceFormat='NEWLINE_DELIMITED_JSON',
                   autodetect=schema is None,
               )
           ),
           jobReference=reference,
       )
   )
   response = self.client.jobs.Insert(request)
   return response.jobReference
Ejemplo n.º 4
0
  def perform_extract_job(self, destination, job_id, table_reference,
                          destination_format, include_header=True,
                          compression=ExportCompression.NONE):
    """Starts a job to export data from BigQuery.

    Returns:
      bigquery.JobReference with the information about the job that was started.
    """
    job_reference = bigquery.JobReference(jobId=job_id,
                                          projectId=table_reference.projectId)
    request = bigquery.BigqueryJobsInsertRequest(
        projectId=table_reference.projectId,
        job=bigquery.Job(
            configuration=bigquery.JobConfiguration(
                extract=bigquery.JobConfigurationExtract(
                    destinationUris=destination,
                    sourceTable=table_reference,
                    printHeader=include_header,
                    destinationFormat=destination_format,
                    compression=compression,
                )
            ),
            jobReference=job_reference,
        )
    )
    response = self.client.jobs.Insert(request)
    return response.jobReference
Ejemplo n.º 5
0
  def _start_query_job(self, project_id, query, use_legacy_sql, flatten_results,
                       job_id, dry_run=False):
    reference = bigquery.JobReference(jobId=job_id, projectId=project_id)
    request = bigquery.BigqueryJobsInsertRequest(
        projectId=project_id,
        job=bigquery.Job(
            configuration=bigquery.JobConfiguration(
                dryRun=dry_run,
                query=bigquery.JobConfigurationQuery(
                    query=query,
                    useLegacySql=use_legacy_sql,
                    allowLargeResults=True,
                    destinationTable=self._get_temp_table(project_id),
                    flattenResults=flatten_results)),
            jobReference=reference))

    response = self.client.jobs.Insert(request)
    return response.jobReference.jobId
Ejemplo n.º 6
0
  def _insert_load_job(self, project_id, job_id, table_reference, source_uris,
                       schema=None):
    reference = bigquery.JobReference(jobId=job_id, projectId=project_id)
    request = bigquery.BigqueryJobsInsertRequest(
        projectId=table_reference.project_id,
        job=bigquery.Job(
            configuration=bigquery.JobConfiguration(
                load=bigquery.JobConfigurationLoad(
                    source_uris=source_uris,
                    destination_table=table_reference,
                )
            ),
            jobReference=reference,
        )
    )

    response = self.client.jobs.Insert(request)
    return response.jobReference.jobId
Ejemplo n.º 7
0
    def load_table(self, job_id, project_id, table_ref, table_schema, gcs_urls,
                   create_disposition, write_disposition):

        job_ref = bq.JobReference(jobId=job_id, projectId=project_id)
        request = bq.BigqueryJobsInsertRequest(
            projectId=project_id,
            job=bq.Job(
                configuration=bq.JobConfiguration(load=bq.JobConfigurationLoad(
                    createDisposition=create_disposition,
                    destinationTable=table_ref,
                    schema=table_schema,
                    sourceFormat="NEWLINE_DELIMITED_JSON",
                    sourceUris=gcs_urls,
                    writeDisposition=write_disposition)),
                jobReference=job_ref))

        response = self.client.jobs.Insert(request)
        return response.jobReference.jobId
Ejemplo n.º 8
0
  def get_query_location(self, project_id, query, use_legacy_sql):
    """
    Get the location of tables referenced in a query.

    This method returns the location of the first referenced table in the query
    and depends on the BigQuery service to provide error handling for
    queries that reference tables in multiple locations.
    """
    reference = bigquery.JobReference(jobId=uuid.uuid4().hex,
                                      projectId=project_id)
    request = bigquery.BigqueryJobsInsertRequest(
        projectId=project_id,
        job=bigquery.Job(
            configuration=bigquery.JobConfiguration(
                dryRun=True,
                query=bigquery.JobConfigurationQuery(
                    query=query,
                    useLegacySql=use_legacy_sql,
                )),
            jobReference=reference))

    response = self.client.jobs.Insert(request)

    if response.statistics is None:
      # This behavior is only expected in tests
      logging.warning(
          "Unable to get location, missing response.statistics. Query: %s",
          query)
      return None

    referenced_tables = response.statistics.query.referencedTables
    if referenced_tables:  # Guards against both non-empty and non-None
      table = referenced_tables[0]
      location = self.get_table_location(
          table.projectId,
          table.datasetId,
          table.tableId)
      logging.info("Using location %r from table %r referenced by query %s",
                   location, table, query)
      return location

    logging.debug("Query %s does not reference any tables.", query)
    return None
Ejemplo n.º 9
0
  def _start_query_job(self, project_id, query, use_legacy_sql, flatten_results,
                       job_id, dry_run=False, kms_key=None):
    reference = bigquery.JobReference(jobId=job_id, projectId=project_id)
    request = bigquery.BigqueryJobsInsertRequest(
        projectId=project_id,
        job=bigquery.Job(
            configuration=bigquery.JobConfiguration(
                dryRun=dry_run,
                query=bigquery.JobConfigurationQuery(
                    query=query,
                    useLegacySql=use_legacy_sql,
                    allowLargeResults=not dry_run,
                    destinationTable=self._get_temp_table(project_id) if not
                    dry_run else None,
                    flattenResults=flatten_results,
                    destinationEncryptionConfiguration=bigquery
                    .EncryptionConfiguration(kmsKeyName=kms_key))),
            jobReference=reference))

    response = self.client.jobs.Insert(request)
    return response