class QueryJob(_AsyncJob): """Asynchronous job: query tables. :type name: string :param name: the name of the job :type query: string :param query: SQL query string :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). :type udf_resources: tuple :param udf_resources: An iterable of :class:`google.cloud.bigquery.job.UDFResource` (empty by default) """ _JOB_TYPE = 'query' _UDF_KEY = 'userDefinedFunctionResources' def __init__(self, name, query, client, udf_resources=()): super(QueryJob, self).__init__(name, client) self.query = query self.udf_resources = udf_resources self._configuration = _AsyncQueryConfiguration() allow_large_results = _TypedProperty('allow_large_results', bool) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.allowLargeResults """ create_disposition = CreateDisposition('create_disposition') """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.createDisposition """ default_dataset = _TypedProperty('default_dataset', Dataset) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.defaultDataset """ destination = _TypedProperty('destination', Table) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.destinationTable """ flatten_results = _TypedProperty('flatten_results', bool) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.flattenResults """ priority = QueryPriority('priority') """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.priority """ udf_resources = UDFResourcesProperty() use_query_cache = _TypedProperty('use_query_cache', bool) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.useQueryCache """ use_legacy_sql = _TypedProperty('use_legacy_sql', bool) """See: https://cloud.google.com/bigquery/docs/\ reference/v2/jobs#configuration.query.useLegacySql """ write_disposition = WriteDisposition('write_disposition') """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.writeDisposition """ def _destination_table_resource(self): """Create a JSON resource for the destination table. Helper for :meth:`_populate_config_resource` and :meth:`_scrub_local_properties` """ if self.destination is not None: return { 'projectId': self.destination.project, 'datasetId': self.destination.dataset_name, 'tableId': self.destination.name, } def _populate_config_resource(self, configuration): """Helper for _build_resource: copy config properties to resource""" if self.allow_large_results is not None: configuration['allowLargeResults'] = self.allow_large_results if self.create_disposition is not None: configuration['createDisposition'] = self.create_disposition if self.default_dataset is not None: configuration['defaultDataset'] = { 'projectId': self.default_dataset.project, 'datasetId': self.default_dataset.name, } if self.destination is not None: table_res = self._destination_table_resource() configuration['destinationTable'] = table_res if self.flatten_results is not None: configuration['flattenResults'] = self.flatten_results if self.priority is not None: configuration['priority'] = self.priority if self.use_query_cache is not None: configuration['useQueryCache'] = self.use_query_cache if self.use_legacy_sql is not None: configuration['useLegacySql'] = self.use_legacy_sql if self.write_disposition is not None: configuration['writeDisposition'] = self.write_disposition if len(self._udf_resources) > 0: configuration[self._UDF_KEY] = _build_udf_resources( self._udf_resources) def _build_resource(self): """Generate a resource for :meth:`begin`.""" resource = { 'jobReference': { 'projectId': self.project, 'jobId': self.name, }, 'configuration': { self._JOB_TYPE: { 'query': self.query, }, }, } configuration = resource['configuration'][self._JOB_TYPE] self._populate_config_resource(configuration) return resource def _scrub_local_properties(self, cleaned): """Helper: handle subclass properties in cleaned. .. note: This method assumes that the project found in the resource matches the client's project. """ configuration = cleaned['configuration']['query'] dest_remote = configuration.get('destinationTable') if dest_remote is None: if self.destination is not None: del self.destination else: dest_local = self._destination_table_resource() if dest_remote != dest_local: dataset = self._client.dataset(dest_remote['datasetId']) self.destination = dataset.table(dest_remote['tableId']) @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation :type resource: dict :param resource: dataset job representation returned from the API :type client: :class:`google.cloud.bigquery.client.Client` :param client: Client which holds credentials and project configuration for the dataset. :rtype: :class:`google.cloud.bigquery.job.RunAsyncQueryJob` :returns: Job parsed from ``resource``. """ name, config = cls._get_resource_config(resource) query = config['query'] job = cls(name, query, client=client) job._set_properties(resource) return job
class ExtractTableToStorageJob(_AsyncJob): """Asynchronous job: extract data from a table into Cloud Storage. :type name: string :param name: the name of the job :type source: :class:`google.cloud.bigquery.table.Table` :param source: Table into which data is to be loaded. :type destination_uris: list of string :param destination_uris: URIs describing Cloud Storage blobs into which extracted data will be written, in format ``gs://<bucket_name>/<object_name_or_glob>``. :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). """ _JOB_TYPE = 'extract' def __init__(self, name, source, destination_uris, client): super(ExtractTableToStorageJob, self).__init__(name, client) self.source = source self.destination_uris = destination_uris self._configuration = _ExtractConfiguration() compression = Compression('compression') """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.compression """ destination_format = DestinationFormat('destination_format') """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.destinationFormat """ field_delimiter = _TypedProperty('field_delimiter', six.string_types) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.fieldDelimiter """ print_header = _TypedProperty('print_header', bool) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.printHeader """ def _populate_config_resource(self, configuration): """Helper for _build_resource: copy config properties to resource""" if self.compression is not None: configuration['compression'] = self.compression if self.destination_format is not None: configuration['destinationFormat'] = self.destination_format if self.field_delimiter is not None: configuration['fieldDelimiter'] = self.field_delimiter if self.print_header is not None: configuration['printHeader'] = self.print_header def _build_resource(self): """Generate a resource for :meth:`begin`.""" source_ref = { 'projectId': self.source.project, 'datasetId': self.source.dataset_name, 'tableId': self.source.name, } resource = { 'jobReference': { 'projectId': self.project, 'jobId': self.name, }, 'configuration': { self._JOB_TYPE: { 'sourceTable': source_ref, 'destinationUris': self.destination_uris, }, }, } configuration = resource['configuration'][self._JOB_TYPE] self._populate_config_resource(configuration) return resource @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation .. note: This method assumes that the project found in the resource matches the client's project. :type resource: dict :param resource: dataset job representation returned from the API :type client: :class:`google.cloud.bigquery.client.Client` :param client: Client which holds credentials and project configuration for the dataset. :rtype: :class:`google.cloud.bigquery.job.ExtractTableToStorageJob` :returns: Job parsed from ``resource``. """ name, config = cls._get_resource_config(resource) source_config = config['sourceTable'] dataset = Dataset(source_config['datasetId'], client) source = Table(source_config['tableId'], dataset) destination_uris = config['destinationUris'] job = cls(name, source, destination_uris, client=client) job._set_properties(resource) return job
class QueryResults(object): """Synchronous job: query tables. :type query: str :param query: SQL query string :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). :type udf_resources: tuple :param udf_resources: An iterable of :class:`google.cloud.bigquery.job.UDFResource` (empty by default) :type query_parameters: tuple :param query_parameters: An iterable of :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` (empty by default) """ _UDF_KEY = 'userDefinedFunctionResources' _QUERY_PARAMETERS_KEY = 'queryParameters' def __init__(self, query, client, udf_resources=(), query_parameters=()): self._client = client self._properties = {} self.query = query self._configuration = _SyncQueryConfiguration() self.udf_resources = udf_resources self.query_parameters = query_parameters self._job = None @classmethod def from_query_job(cls, job): """Factory: construct from an existing job. :type job: :class:`~google.cloud.bigquery.job.QueryJob` :param job: existing job :rtype: :class:`QueryResults` :returns: the instance, bound to the job """ instance = cls(job.query, job._client, job.udf_resources) instance._job = job job_ref = instance._properties.setdefault('jobReference', {}) job_ref['jobId'] = job.name if job.default_dataset is not None: instance.default_dataset = job.default_dataset if job.use_query_cache is not None: instance.use_query_cache = job.use_query_cache if job.use_legacy_sql is not None: instance.use_legacy_sql = job.use_legacy_sql return instance @property def project(self): """Project bound to the job. :rtype: str :returns: the project (derived from the client). """ return self._client.project def _require_client(self, client): """Check client or verify over-ride. :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. :rtype: :class:`google.cloud.bigquery.client.Client` :returns: The client passed in or the currently bound client. """ if client is None: client = self._client return client @property def cache_hit(self): """Query results served from cache. See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#cacheHit :rtype: bool or ``NoneType`` :returns: True if the query results were served from cache (None until set by the server). """ return self._properties.get('cacheHit') @property def complete(self): """Server completed query. See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#jobComplete :rtype: bool or ``NoneType`` :returns: True if the query completed on the server (None until set by the server). """ return self._properties.get('jobComplete') @property def errors(self): """Errors generated by the query. See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#errors :rtype: list of mapping, or ``NoneType`` :returns: Mappings describing errors generated on the server (None until set by the server). """ return self._properties.get('errors') @property def name(self): """Job name, generated by the back-end. See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#jobReference :rtype: list of mapping, or ``NoneType`` :returns: Mappings describing errors generated on the server (None until set by the server). """ return self._properties.get('jobReference', {}).get('jobId') @property def job(self): """Job instance used to run the query. :rtype: :class:`google.cloud.bigquery.job.QueryJob`, or ``NoneType`` :returns: Job instance used to run the query (None until ``jobReference`` property is set by the server). """ if self._job is None: job_ref = self._properties.get('jobReference') if job_ref is not None: self._job = QueryJob(job_ref['jobId'], self.query, self._client) return self._job @property def page_token(self): """Token for fetching next bach of results. See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#pageToken :rtype: str, or ``NoneType`` :returns: Token generated on the server (None until set by the server). """ return self._properties.get('pageToken') @property def total_rows(self): """Total number of rows returned by the query. See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#totalRows :rtype: int, or ``NoneType`` :returns: Count generated on the server (None until set by the server). """ return self._properties.get('totalRows') @property def total_bytes_processed(self): """Total number of bytes processed by the query. See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#totalBytesProcessed :rtype: int, or ``NoneType`` :returns: Count generated on the server (None until set by the server). """ return self._properties.get('totalBytesProcessed') @property def rows(self): """Query results. See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#rows :rtype: list of tuples of row values, or ``NoneType`` :returns: fields describing the schema (None until set by the server). """ return _rows_from_json(self._properties.get('rows', ()), self.schema) @property def schema(self): """Schema for query results. See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#schema :rtype: list of :class:`SchemaField`, or ``NoneType`` :returns: fields describing the schema (None until set by the server). """ return _parse_schema_resource(self._properties.get('schema', {})) default_dataset = _TypedProperty('default_dataset', Dataset) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#defaultDataset """ dry_run = _TypedProperty('dry_run', bool) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#dryRun """ max_results = _TypedProperty('max_results', six.integer_types) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#maxResults """ preserve_nulls = _TypedProperty('preserve_nulls', bool) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#preserveNulls """ query_parameters = QueryParametersProperty() timeout_ms = _TypedProperty('timeout_ms', six.integer_types) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#timeoutMs """ udf_resources = UDFResourcesProperty() use_query_cache = _TypedProperty('use_query_cache', bool) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#useQueryCache """ use_legacy_sql = _TypedProperty('use_legacy_sql', bool) """See: https://cloud.google.com/bigquery/docs/\ reference/v2/jobs/query#useLegacySql """ def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` :type api_response: httplib2.Response :param api_response: response returned from an API call """ self._properties.clear() self._properties.update(api_response) def _build_resource(self): """Generate a resource for :meth:`begin`.""" resource = {'query': self.query} if self.default_dataset is not None: resource['defaultDataset'] = { 'projectId': self.project, 'datasetId': self.default_dataset.name, } if self.max_results is not None: resource['maxResults'] = self.max_results if self.preserve_nulls is not None: resource['preserveNulls'] = self.preserve_nulls if self.timeout_ms is not None: resource['timeoutMs'] = self.timeout_ms if self.use_query_cache is not None: resource['useQueryCache'] = self.use_query_cache if self.use_legacy_sql is not None: resource['useLegacySql'] = self.use_legacy_sql if self.dry_run is not None: resource['dryRun'] = self.dry_run if len(self._udf_resources) > 0: resource[self._UDF_KEY] = [{ udf_resource.udf_type: udf_resource.value } for udf_resource in self._udf_resources] if len(self._query_parameters) > 0: resource[self._QUERY_PARAMETERS_KEY] = [ query_parameter.to_api_repr() for query_parameter in self._query_parameters ] if self._query_parameters[0].name is None: resource['parameterMode'] = 'POSITIONAL' else: resource['parameterMode'] = 'NAMED' return resource def run(self, client=None): """API call: run the query via a POST request See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. """ if self._job is not None: raise ValueError("Query job is already running.") client = self._require_client(client) path = '/projects/%s/queries' % (self.project, ) api_response = client._connection.api_request( method='POST', path=path, data=self._build_resource()) self._set_properties(api_response) def fetch_data(self, max_results=None, page_token=None, start_index=None, timeout_ms=None, client=None): """API call: fetch a page of query result data via a GET request See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/getQueryResults :type max_results: int :param max_results: (Optional) maximum number of rows to return. :type page_token: str :param page_token: (Optional) token representing a cursor into the table's rows. :type start_index: int :param start_index: (Optional) zero-based index of starting row :type timeout_ms: int :param timeout_ms: (Optional) timeout, in milliseconds, to wait for query to complete :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. :rtype: tuple :returns: ``(row_data, total_rows, page_token)``, where ``row_data`` is a list of tuples, one per result row, containing only the values; ``total_rows`` is a count of the total number of rows in the table; and ``page_token`` is an opaque string which can be used to fetch the next batch of rows (``None`` if no further batches can be fetched). :raises: ValueError if the query has not yet been executed. """ if self.name is None: raise ValueError("Query not yet executed: call 'run()'") client = self._require_client(client) params = {} if max_results is not None: params['maxResults'] = max_results if page_token is not None: params['pageToken'] = page_token if start_index is not None: params['startIndex'] = start_index if timeout_ms is not None: params['timeoutMs'] = timeout_ms path = '/projects/%s/queries/%s' % (self.project, self.name) response = client._connection.api_request(method='GET', path=path, query_params=params) self._set_properties(response) total_rows = response.get('totalRows') if total_rows is not None: total_rows = int(total_rows) page_token = response.get('pageToken') rows_data = _rows_from_json(response.get('rows', ()), self.schema) return rows_data, total_rows, page_token
class LoadTableFromStorageJob(_AsyncJob): """Asynchronous job for loading data into a table from CloudStorage. :type name: string :param name: the name of the job :type destination: :class:`google.cloud.bigquery.table.Table` :param destination: Table into which data is to be loaded. :type source_uris: sequence of string :param source_uris: URIs of one or more data files to be loaded, in format ``gs://<bucket_name>/<object_name_or_glob>``. :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). :type schema: list of :class:`google.cloud.bigquery.table.SchemaField` :param schema: The job's schema """ _schema = None _JOB_TYPE = 'load' def __init__(self, name, destination, source_uris, client, schema=()): super(LoadTableFromStorageJob, self).__init__(name, client) self.destination = destination self.source_uris = source_uris # Let the @property do validation. self.schema = schema self._configuration = _LoadConfiguration() @property def schema(self): """Table's schema. :rtype: list of :class:`SchemaField` :returns: fields describing the schema """ return list(self._schema) @schema.setter def schema(self, value): """Update table's schema :type value: list of :class:`SchemaField` :param value: fields describing the schema :raises: TypeError if 'value' is not a sequence, or ValueError if any item in the sequence is not a SchemaField """ if not all(isinstance(field, SchemaField) for field in value): raise ValueError('Schema items must be fields') self._schema = tuple(value) @property def input_file_bytes(self): """Count of bytes loaded from source files. :rtype: integer, or ``NoneType`` :returns: the count (None until set from the server). """ statistics = self._properties.get('statistics') if statistics is not None: return int(statistics['load']['inputFileBytes']) @property def input_files(self): """Count of source files. :rtype: integer, or ``NoneType`` :returns: the count (None until set from the server). """ statistics = self._properties.get('statistics') if statistics is not None: return int(statistics['load']['inputFiles']) @property def output_bytes(self): """Count of bytes saved to destination table. :rtype: integer, or ``NoneType`` :returns: the count (None until set from the server). """ statistics = self._properties.get('statistics') if statistics is not None: return int(statistics['load']['outputBytes']) @property def output_rows(self): """Count of rows saved to destination table. :rtype: integer, or ``NoneType`` :returns: the count (None until set from the server). """ statistics = self._properties.get('statistics') if statistics is not None: return int(statistics['load']['outputRows']) allow_jagged_rows = _TypedProperty('allow_jagged_rows', bool) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.allowJaggedRows """ allow_quoted_newlines = _TypedProperty('allow_quoted_newlines', bool) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.allowQuotedNewlines """ create_disposition = CreateDisposition('create_disposition') """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.createDisposition """ encoding = Encoding('encoding') """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.encoding """ field_delimiter = _TypedProperty('field_delimiter', six.string_types) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.fieldDelimiter """ ignore_unknown_values = _TypedProperty('ignore_unknown_values', bool) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.ignoreUnknownValues """ max_bad_records = _TypedProperty('max_bad_records', six.integer_types) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.maxBadRecords """ quote_character = _TypedProperty('quote_character', six.string_types) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.quote """ skip_leading_rows = _TypedProperty('skip_leading_rows', six.integer_types) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.skipLeadingRows """ source_format = SourceFormat('source_format') """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.sourceFormat """ write_disposition = WriteDisposition('write_disposition') """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.writeDisposition """ def _populate_config_resource(self, configuration): """Helper for _build_resource: copy config properties to resource""" if self.allow_jagged_rows is not None: configuration['allowJaggedRows'] = self.allow_jagged_rows if self.allow_quoted_newlines is not None: configuration['allowQuotedNewlines'] = self.allow_quoted_newlines if self.create_disposition is not None: configuration['createDisposition'] = self.create_disposition if self.encoding is not None: configuration['encoding'] = self.encoding if self.field_delimiter is not None: configuration['fieldDelimiter'] = self.field_delimiter if self.ignore_unknown_values is not None: configuration['ignoreUnknownValues'] = self.ignore_unknown_values if self.max_bad_records is not None: configuration['maxBadRecords'] = self.max_bad_records if self.quote_character is not None: configuration['quote'] = self.quote_character if self.skip_leading_rows is not None: configuration['skipLeadingRows'] = self.skip_leading_rows if self.source_format is not None: configuration['sourceFormat'] = self.source_format if self.write_disposition is not None: configuration['writeDisposition'] = self.write_disposition def _build_resource(self): """Generate a resource for :meth:`begin`.""" resource = { 'jobReference': { 'projectId': self.project, 'jobId': self.name, }, 'configuration': { self._JOB_TYPE: { 'sourceUris': self.source_uris, 'destinationTable': { 'projectId': self.destination.project, 'datasetId': self.destination.dataset_name, 'tableId': self.destination.name, }, }, }, } configuration = resource['configuration'][self._JOB_TYPE] self._populate_config_resource(configuration) if len(self.schema) > 0: configuration['schema'] = { 'fields': _build_schema_resource(self.schema) } return resource def _scrub_local_properties(self, cleaned): """Helper: handle subclass properties in cleaned.""" schema = cleaned.pop('schema', {'fields': ()}) self.schema = _parse_schema_resource(schema) @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation .. note: This method assumes that the project found in the resource matches the client's project. :type resource: dict :param resource: dataset job representation returned from the API :type client: :class:`google.cloud.bigquery.client.Client` :param client: Client which holds credentials and project configuration for the dataset. :rtype: :class:`google.cloud.bigquery.job.LoadTableFromStorageJob` :returns: Job parsed from ``resource``. """ name, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] dataset = Dataset(dest_config['datasetId'], client) destination = Table(dest_config['tableId'], dataset) source_urls = config.get('sourceUris', ()) job = cls(name, destination, source_urls, client=client) job._set_properties(resource) return job
class QueryJob(_AsyncJob): """Asynchronous job: query tables. :type name: str :param name: the name of the job :type query: str :param query: SQL query string :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). :type udf_resources: tuple :param udf_resources: An iterable of :class:`google.cloud.bigquery._helpers.UDFResource` (empty by default) :type query_parameters: tuple :param query_parameters: An iterable of :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` (empty by default) """ _JOB_TYPE = 'query' _UDF_KEY = 'userDefinedFunctionResources' _QUERY_PARAMETERS_KEY = 'queryParameters' def __init__(self, name, query, client, udf_resources=(), query_parameters=()): super(QueryJob, self).__init__(name, client) self.query = query self.udf_resources = udf_resources self.query_parameters = query_parameters self._configuration = _AsyncQueryConfiguration() self._query_results = None allow_large_results = _TypedProperty('allow_large_results', bool) """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults """ create_disposition = CreateDisposition('create_disposition') """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition """ default_dataset = _TypedProperty('default_dataset', Dataset) """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.defaultDataset """ destination = _TypedProperty('destination', Table) """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable """ flatten_results = _TypedProperty('flatten_results', bool) """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.flattenResults """ priority = QueryPriority('priority') """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.priority """ query_parameters = QueryParametersProperty() udf_resources = UDFResourcesProperty() use_query_cache = _TypedProperty('use_query_cache', bool) """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.useQueryCache """ use_legacy_sql = _TypedProperty('use_legacy_sql', bool) """See https://cloud.google.com/bigquery/docs/\ reference/v2/jobs#configuration.query.useLegacySql """ dry_run = _TypedProperty('dry_run', bool) """See https://cloud.google.com/bigquery/docs/\ reference/rest/v2/jobs#configuration.dryRun """ write_disposition = WriteDisposition('write_disposition') """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition """ maximum_billing_tier = _TypedProperty('maximum_billing_tier', int) """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBillingTier """ maximum_bytes_billed = _TypedProperty('maximum_bytes_billed', int) """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled """ def _destination_table_resource(self): """Create a JSON resource for the destination table. Helper for :meth:`_populate_config_resource` and :meth:`_scrub_local_properties` """ if self.destination is not None: return { 'projectId': self.destination.project, 'datasetId': self.destination.dataset_name, 'tableId': self.destination.name, } def _populate_config_resource_booleans(self, configuration): """Helper for _populate_config_resource.""" if self.allow_large_results is not None: configuration['allowLargeResults'] = self.allow_large_results if self.flatten_results is not None: configuration['flattenResults'] = self.flatten_results if self.use_query_cache is not None: configuration['useQueryCache'] = self.use_query_cache if self.use_legacy_sql is not None: configuration['useLegacySql'] = self.use_legacy_sql def _populate_config_resource(self, configuration): """Helper for _build_resource: copy config properties to resource""" self._populate_config_resource_booleans(configuration) if self.create_disposition is not None: configuration['createDisposition'] = self.create_disposition if self.default_dataset is not None: configuration['defaultDataset'] = { 'projectId': self.default_dataset.project, 'datasetId': self.default_dataset.name, } if self.destination is not None: table_res = self._destination_table_resource() configuration['destinationTable'] = table_res if self.priority is not None: configuration['priority'] = self.priority if self.write_disposition is not None: configuration['writeDisposition'] = self.write_disposition if self.maximum_billing_tier is not None: configuration['maximumBillingTier'] = self.maximum_billing_tier if self.maximum_bytes_billed is not None: configuration['maximumBytesBilled'] = self.maximum_bytes_billed if len(self._udf_resources) > 0: configuration[self._UDF_KEY] = [{ udf_resource.udf_type: udf_resource.value } for udf_resource in self._udf_resources] if len(self._query_parameters) > 0: configuration[self._QUERY_PARAMETERS_KEY] = [ query_parameter.to_api_repr() for query_parameter in self._query_parameters ] if self._query_parameters[0].name is None: configuration['parameterMode'] = 'POSITIONAL' else: configuration['parameterMode'] = 'NAMED' def _build_resource(self): """Generate a resource for :meth:`begin`.""" resource = { 'jobReference': { 'projectId': self.project, 'jobId': self.name, }, 'configuration': { self._JOB_TYPE: { 'query': self.query, }, }, } if self.dry_run is not None: resource['configuration']['dryRun'] = self.dry_run configuration = resource['configuration'][self._JOB_TYPE] self._populate_config_resource(configuration) return resource def _scrub_local_properties(self, cleaned): """Helper: handle subclass properties in cleaned. .. note: This method assumes that the project found in the resource matches the client's project. """ configuration = cleaned['configuration']['query'] self.query = configuration['query'] dest_remote = configuration.get('destinationTable') if dest_remote is None: if self.destination is not None: del self.destination else: dest_local = self._destination_table_resource() if dest_remote != dest_local: dataset = self._client.dataset(dest_remote['datasetId']) self.destination = dataset.table(dest_remote['tableId']) @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation :type resource: dict :param resource: dataset job representation returned from the API :type client: :class:`google.cloud.bigquery.client.Client` :param client: Client which holds credentials and project configuration for the dataset. :rtype: :class:`google.cloud.bigquery.job.RunAsyncQueryJob` :returns: Job parsed from ``resource``. """ name, config = cls._get_resource_config(resource) query = config['query'] job = cls(name, query, client=client) job._set_properties(resource) return job def query_results(self): """Construct a QueryResults instance, bound to this job. :rtype: :class:`~google.cloud.bigquery.query.QueryResults` :returns: results instance """ if not self._query_results: self._query_results = self._client._get_query_results(self.name) return self._query_results def done(self): """Refresh the job and checks if it is complete. :rtype: bool :returns: True if the job is complete, False otherwise. """ # Do not refresh is the state is already done, as the job will not # change once complete. if self.state != _DONE_STATE: self._query_results = self._client._get_query_results(self.name) # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are # correctly populated. if self._query_results.complete: self.reload() return self.state == _DONE_STATE def result(self, timeout=None): """Start the job and wait for it to complete and get the result. :type timeout: int :param timeout: How long to wait for job to complete before raising a :class:`TimeoutError`. :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterator of row data :class:`tuple`s. During each page, the iterator will have the ``total_rows`` attribute set, which counts the total number of rows **in the result set** (this is distinct from the total number of rows in the current page: ``iterator.page.num_items``). :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job failed or :class:`TimeoutError` if the job did not complete in the given timeout. """ super(QueryJob, self).result(timeout=timeout) # Return an iterator instead of returning the job. return self.query_results().fetch_data()