def write_to_table( self, query, dataset=None, table=None, allow_large_results=None, use_query_cache=None, priority=None, create_disposition=None, write_disposition=None, ): """ Write query result to table. If dataset or table is not provided, Bigquery will write the result to temporary table. Args: query: required BigQuery query string. dataset: optional string id of the dataset table: optional string id of the table allow_large_results: optional boolean use_query_cache: optional boolean priority: optional string (one of the JOB_PRIORITY_* constants) create_disposition: optional string (one of the JOB_CREATE_* constants) write_disposition: optional string (one of the JOB_WRITE_* constants) Optional arguments with value None are determined by BigQuery as described: https://developers.google.com/bigquery/docs/reference/v2/jobs Returns: dict, a BigQuery job resource Raises: JobInsertException on http/auth failures or error in result """ configuration = { "query": query, } if dataset and table: configuration['destinationTable'] = { "projectId": self.project_id, "tableId": table, "datasetId": dataset } if allow_large_results is not None: configuration['allowLargeResults'] = allow_large_results if use_query_cache is not None: configuration['useQueryCache'] = use_query_cache if priority: configuration['priority'] = priority if create_disposition: configuration['createDisposition'] = create_disposition if write_disposition: configuration['writeDisposition'] = write_disposition body = { "configuration": { 'query': configuration } } logger.info("Creating write to table job %s" % body) job_resource = self.bigquery.jobs() \ .insert(projectId=self.project_id, body=body) \ .execute() self._raise_insert_exception_if_error(job_resource) return job_resource
def export_data_to_uris( self, destination_uris, dataset, table, job=None, compression=None, destination_format=None, print_header=None, field_delimiter=None, ): """ Export data from a BigQuery table to cloud storage. Args: destination_uris: required string or list of strings representing the uris on cloud storage of the form: gs://bucket/filename dataset: required string id of the dataset table: required string id of the table job: optional string identifying the job (a unique jobid is automatically generated if not provided) compression: optional string (one of the JOB_COMPRESSION_* constants) destination_format: optional string (one of the JOB_DESTINATION_FORMAT_* constants) print_header: optional boolean field_delimiter: optional string Optional arguments with value None are determined by BigQuery as described: https://developers.google.com/bigquery/docs/reference/v2/jobs Returns: dict, a BigQuery job resource Raises: JobInsertException on http/auth failures or error in result """ destination_uris = destination_uris \ if isinstance(destination_uris, list) else [destination_uris] configuration = { "sourceTable": { "projectId": self.project_id, "tableId": table, "datasetId": dataset }, "destinationUris": destination_uris, } if compression: configuration['compression'] = compression if destination_format: configuration['destinationFormat'] = destination_format if print_header is not None: configuration['printHeader'] = print_header if field_delimiter: configuration['fieldDelimiter'] = field_delimiter if not job: hex = self._generate_hex_for_uris(destination_uris) job = "{dataset}-{table}-{digest}".format( dataset=dataset, table=table, digest=hex ) body = { "configuration": { 'extract': configuration }, "jobReference": { "projectId": self.project_id, "jobId": job } } logger.info("Creating export job %s" % body) job_resource = self.bigquery.jobs() \ .insert(projectId=self.project_id, body=body) \ .execute() self._raise_insert_exception_if_error(job_resource) return job_resource
def export_data_to_uris( self, destination_uris, dataset, table, job=None, compression=None, destination_format=None, print_header=None, field_delimiter=None, ): """ Export data from a BigQuery table to cloud storage. Args: destination_uris: required string or list of strings representing the uris on cloud storage of the form: gs://bucket/filename dataset: required string id of the dataset table: required string id of the table job: optional string identifying the job (a unique jobid is automatically generated if not provided) compression: optional string (one of the JOB_COMPRESSION_* constants) destination_format: optional string (one of the JOB_DESTINATION_FORMAT_* constants) print_header: optional boolean field_delimiter: optional string Optional arguments with value None are determined by BigQuery as described: https://developers.google.com/bigquery/docs/reference/v2/jobs Returns: dict, a BigQuery job resource Raises: JobInsertException on http/auth failures or error in result """ destination_uris = destination_uris \ if isinstance(destination_uris, list) else [destination_uris] configuration = { "sourceTable": { "projectId": self.project_id, "tableId": table, "datasetId": dataset }, "destinationUris": destination_uris, } if compression: configuration['compression'] = compression if destination_format: configuration['destinationFormat'] = destination_format if print_header is not None: configuration['printHeader'] = print_header if field_delimiter: configuration['fieldDelimiter'] = field_delimiter if not job: hex = self._generate_hex_for_uris(destination_uris) job = "{dataset}-{table}-{digest}".format(dataset=dataset, table=table, digest=hex) body = { "configuration": { 'extract': configuration }, "jobReference": { "projectId": self.project_id, "jobId": job } } logger.info("Creating export job %s" % body) job_resource = self.bigquery.jobs() \ .insert(projectId=self.project_id, body=body) \ .execute() self._raise_insert_exception_if_error(job_resource) return job_resource
def write_to_table( self, query, dataset=None, table=None, allow_large_results=None, use_query_cache=None, priority=None, create_disposition=None, write_disposition=None, ): """ Write query result to table. If dataset or table is not provided, Bigquery will write the result to temporary table. Args: query: required BigQuery query string. dataset: optional string id of the dataset table: optional string id of the table allow_large_results: optional boolean use_query_cache: optional boolean priority: optional string (one of the JOB_PRIORITY_* constants) create_disposition: optional string (one of the JOB_CREATE_* constants) write_disposition: optional string (one of the JOB_WRITE_* constants) Optional arguments with value None are determined by BigQuery as described: https://developers.google.com/bigquery/docs/reference/v2/jobs Returns: dict, a BigQuery job resource Raises: JobInsertException on http/auth failures or error in result """ configuration = { "query": query, } if dataset and table: configuration['destinationTable'] = { "projectId": self.project_id, "tableId": table, "datasetId": dataset } if allow_large_results is not None: configuration['allowLargeResults'] = allow_large_results if use_query_cache is not None: configuration['useQueryCache'] = use_query_cache if priority: configuration['priority'] = priority if create_disposition: configuration['createDisposition'] = create_disposition if write_disposition: configuration['writeDisposition'] = write_disposition body = {"configuration": {'query': configuration}} logger.info("Creating write to table job %s" % body) job_resource = self.bigquery.jobs() \ .insert(projectId=self.project_id, body=body) \ .execute() self._raise_insert_exception_if_error(job_resource) return job_resource
def import_data_from_uris( self, source_uris, dataset, table, schema=None, job=None, source_format=None, create_disposition=None, write_disposition=None, encoding=None, ignore_unknown_values=None, max_bad_records=None, allow_jagged_rows=None, allow_quoted_newlines=None, field_delimiter=None, quote=None, skip_leading_rows=None, ): """ Imports data into a BigQuery table from cloud storage. Args: source_uris: required string or list of strings representing the uris on cloud storage of the form: gs://bucket/filename dataset: required string id of the dataset table: required string id of the table job: optional string identifying the job (a unique jobid is automatically generated if not provided) schema: optional list representing the bigquery schema source_format: optional string (one of the JOB_SOURCE_FORMAT_* constants) create_disposition: optional string (one of the JOB_CREATE_* constants) write_disposition: optional string (one of the JOB_WRITE_* constants) encoding: optional string default (one of the JOB_ENCODING_* constants) ignore_unknown_values: optional boolean max_bad_records: optional boolean allow_jagged_rows: optional boolean for csv only allow_quoted_newlines: optional boolean for csv only field_delimiter: optional string for csv only quote: optional string the quote character for csv only skip_leading_rows: optional int for csv only Optional arguments with value None are determined by BigQuery as described: https://developers.google.com/bigquery/docs/reference/v2/jobs Returns: dict, a BigQuery job resource Raises: JobInsertException on http/auth failures or error in result """ source_uris = source_uris if isinstance(source_uris, list) \ else [source_uris] configuration = { "destinationTable": { "projectId": self.project_id, "tableId": table, "datasetId": dataset }, "sourceUris": source_uris, } if max_bad_records: configuration['maxBadRecords'] = max_bad_records if ignore_unknown_values: configuration['ignoreUnknownValues'] = ignore_unknown_values if create_disposition: configuration['createDisposition'] = create_disposition if write_disposition: configuration['writeDisposition'] = write_disposition if encoding: configuration['encoding'] = encoding if schema: configuration['schema'] = {'fields': schema} if source_format: configuration['sourceFormat'] = source_format if not job: hex = self._generate_hex_for_uris(source_uris) job = "{dataset}-{table}-{digest}".format( dataset=dataset, table=table, digest=hex ) if source_format == JOB_SOURCE_FORMAT_CSV: if field_delimiter: configuration['fieldDelimiter'] = field_delimiter if allow_jagged_rows: configuration['allowJaggedRows'] = allow_jagged_rows if allow_quoted_newlines: configuration['allowQuotedNewlines'] = allow_quoted_newlines if quote: configuration['quote'] = quote if skip_leading_rows: configuration['skipLeadingRows'] = skip_leading_rows elif field_delimiter or allow_jagged_rows \ or allow_quoted_newlines or quote or skip_leading_rows: all_values = dict(field_delimiter=field_delimiter, allow_jagged_rows=allow_jagged_rows, allow_quoted_newlines=allow_quoted_newlines, skip_leading_rows=skip_leading_rows, quote=quote) non_null_values = dict((k, v) for k, v in all_values.items() if v) raise Exception("Parameters field_delimiter, allow_jagged_rows, " "allow_quoted_newlines, quote and " "skip_leading_rows are only allowed when " "source_format=JOB_SOURCE_FORMAT_CSV: %s" % non_null_values) body = { "configuration": { 'load': configuration }, "jobReference": { "projectId": self.project_id, "jobId": job } } logger.info("Creating load job %s" % body) job_resource = self.bigquery.jobs() \ .insert(projectId=self.project_id, body=body) \ .execute() self._raise_insert_exception_if_error(job_resource) return job_resource