Example #1
0
class QueryJob(_AsyncJob):
    """Asynchronous job: query tables.

    :type name: string
    :param name: the name of the job

    :type query: string
    :param query: SQL query string

    :type client: :class:`google.cloud.bigquery.client.Client`
    :param client: A client which holds credentials and project configuration
                   for the dataset (which requires a project).

    :type udf_resources: tuple
    :param udf_resources: An iterable of
                        :class:`google.cloud.bigquery.job.UDFResource`
                        (empty by default)
    """
    _JOB_TYPE = 'query'
    _UDF_KEY = 'userDefinedFunctionResources'

    def __init__(self, name, query, client, udf_resources=()):
        super(QueryJob, self).__init__(name, client)
        self.query = query
        self.udf_resources = udf_resources
        self._configuration = _AsyncQueryConfiguration()

    allow_large_results = _TypedProperty('allow_large_results', bool)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.allowLargeResults
    """

    create_disposition = CreateDisposition('create_disposition')
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.createDisposition
    """

    default_dataset = _TypedProperty('default_dataset', Dataset)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.defaultDataset
    """

    destination = _TypedProperty('destination', Table)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.destinationTable
    """

    flatten_results = _TypedProperty('flatten_results', bool)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.flattenResults
    """

    priority = QueryPriority('priority')
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.priority
    """

    udf_resources = UDFResourcesProperty()

    use_query_cache = _TypedProperty('use_query_cache', bool)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.useQueryCache
    """

    use_legacy_sql = _TypedProperty('use_legacy_sql', bool)
    """See:
    https://cloud.google.com/bigquery/docs/\
    reference/v2/jobs#configuration.query.useLegacySql
    """

    write_disposition = WriteDisposition('write_disposition')
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.writeDisposition
    """

    def _destination_table_resource(self):
        """Create a JSON resource for the destination table.

        Helper for :meth:`_populate_config_resource` and
        :meth:`_scrub_local_properties`
        """
        if self.destination is not None:
            return {
                'projectId': self.destination.project,
                'datasetId': self.destination.dataset_name,
                'tableId': self.destination.name,
            }

    def _populate_config_resource(self, configuration):
        """Helper for _build_resource: copy config properties to resource"""
        if self.allow_large_results is not None:
            configuration['allowLargeResults'] = self.allow_large_results
        if self.create_disposition is not None:
            configuration['createDisposition'] = self.create_disposition
        if self.default_dataset is not None:
            configuration['defaultDataset'] = {
                'projectId': self.default_dataset.project,
                'datasetId': self.default_dataset.name,
            }
        if self.destination is not None:
            table_res = self._destination_table_resource()
            configuration['destinationTable'] = table_res
        if self.flatten_results is not None:
            configuration['flattenResults'] = self.flatten_results
        if self.priority is not None:
            configuration['priority'] = self.priority
        if self.use_query_cache is not None:
            configuration['useQueryCache'] = self.use_query_cache
        if self.use_legacy_sql is not None:
            configuration['useLegacySql'] = self.use_legacy_sql
        if self.write_disposition is not None:
            configuration['writeDisposition'] = self.write_disposition
        if len(self._udf_resources) > 0:
            configuration[self._UDF_KEY] = _build_udf_resources(
                self._udf_resources)

    def _build_resource(self):
        """Generate a resource for :meth:`begin`."""

        resource = {
            'jobReference': {
                'projectId': self.project,
                'jobId': self.name,
            },
            'configuration': {
                self._JOB_TYPE: {
                    'query': self.query,
                },
            },
        }
        configuration = resource['configuration'][self._JOB_TYPE]
        self._populate_config_resource(configuration)

        return resource

    def _scrub_local_properties(self, cleaned):
        """Helper:  handle subclass properties in cleaned.

        .. note:

           This method assumes that the project found in the resource matches
           the client's project.
        """
        configuration = cleaned['configuration']['query']
        dest_remote = configuration.get('destinationTable')

        if dest_remote is None:
            if self.destination is not None:
                del self.destination
        else:
            dest_local = self._destination_table_resource()
            if dest_remote != dest_local:
                dataset = self._client.dataset(dest_remote['datasetId'])
                self.destination = dataset.table(dest_remote['tableId'])

    @classmethod
    def from_api_repr(cls, resource, client):
        """Factory:  construct a job given its API representation

        :type resource: dict
        :param resource: dataset job representation returned from the API

        :type client: :class:`google.cloud.bigquery.client.Client`
        :param client: Client which holds credentials and project
                       configuration for the dataset.

        :rtype: :class:`google.cloud.bigquery.job.RunAsyncQueryJob`
        :returns: Job parsed from ``resource``.
        """
        name, config = cls._get_resource_config(resource)
        query = config['query']
        job = cls(name, query, client=client)
        job._set_properties(resource)
        return job
Example #2
0
class ExtractTableToStorageJob(_AsyncJob):
    """Asynchronous job: extract data from a table into Cloud Storage.

    :type name: string
    :param name: the name of the job

    :type source: :class:`google.cloud.bigquery.table.Table`
    :param source: Table into which data is to be loaded.

    :type destination_uris: list of string
    :param destination_uris: URIs describing Cloud Storage blobs into which
                             extracted data will be written, in format
                             ``gs://<bucket_name>/<object_name_or_glob>``.

    :type client: :class:`google.cloud.bigquery.client.Client`
    :param client: A client which holds credentials and project configuration
                   for the dataset (which requires a project).
    """
    _JOB_TYPE = 'extract'

    def __init__(self, name, source, destination_uris, client):
        super(ExtractTableToStorageJob, self).__init__(name, client)
        self.source = source
        self.destination_uris = destination_uris
        self._configuration = _ExtractConfiguration()

    compression = Compression('compression')
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.compression
    """

    destination_format = DestinationFormat('destination_format')
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.destinationFormat
    """

    field_delimiter = _TypedProperty('field_delimiter', six.string_types)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.fieldDelimiter
    """

    print_header = _TypedProperty('print_header', bool)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.printHeader
    """

    def _populate_config_resource(self, configuration):
        """Helper for _build_resource: copy config properties to resource"""
        if self.compression is not None:
            configuration['compression'] = self.compression
        if self.destination_format is not None:
            configuration['destinationFormat'] = self.destination_format
        if self.field_delimiter is not None:
            configuration['fieldDelimiter'] = self.field_delimiter
        if self.print_header is not None:
            configuration['printHeader'] = self.print_header

    def _build_resource(self):
        """Generate a resource for :meth:`begin`."""

        source_ref = {
            'projectId': self.source.project,
            'datasetId': self.source.dataset_name,
            'tableId': self.source.name,
        }

        resource = {
            'jobReference': {
                'projectId': self.project,
                'jobId': self.name,
            },
            'configuration': {
                self._JOB_TYPE: {
                    'sourceTable': source_ref,
                    'destinationUris': self.destination_uris,
                },
            },
        }
        configuration = resource['configuration'][self._JOB_TYPE]
        self._populate_config_resource(configuration)

        return resource

    @classmethod
    def from_api_repr(cls, resource, client):
        """Factory:  construct a job given its API representation

        .. note:

           This method assumes that the project found in the resource matches
           the client's project.

        :type resource: dict
        :param resource: dataset job representation returned from the API

        :type client: :class:`google.cloud.bigquery.client.Client`
        :param client: Client which holds credentials and project
                       configuration for the dataset.

        :rtype: :class:`google.cloud.bigquery.job.ExtractTableToStorageJob`
        :returns: Job parsed from ``resource``.
        """
        name, config = cls._get_resource_config(resource)
        source_config = config['sourceTable']
        dataset = Dataset(source_config['datasetId'], client)
        source = Table(source_config['tableId'], dataset)
        destination_uris = config['destinationUris']
        job = cls(name, source, destination_uris, client=client)
        job._set_properties(resource)
        return job
Example #3
0
class QueryResults(object):
    """Synchronous job: query tables.

    :type query: str
    :param query: SQL query string

    :type client: :class:`google.cloud.bigquery.client.Client`
    :param client: A client which holds credentials and project configuration
                   for the dataset (which requires a project).

    :type udf_resources: tuple
    :param udf_resources: An iterable of
                        :class:`google.cloud.bigquery.job.UDFResource`
                        (empty by default)

    :type query_parameters: tuple
    :param query_parameters:
        An iterable of
        :class:`google.cloud.bigquery._helpers.AbstractQueryParameter`
        (empty by default)
    """

    _UDF_KEY = 'userDefinedFunctionResources'
    _QUERY_PARAMETERS_KEY = 'queryParameters'

    def __init__(self, query, client, udf_resources=(), query_parameters=()):
        self._client = client
        self._properties = {}
        self.query = query
        self._configuration = _SyncQueryConfiguration()
        self.udf_resources = udf_resources
        self.query_parameters = query_parameters
        self._job = None

    @classmethod
    def from_query_job(cls, job):
        """Factory: construct from an existing job.

        :type job: :class:`~google.cloud.bigquery.job.QueryJob`
        :param job: existing job

        :rtype: :class:`QueryResults`
        :returns: the instance, bound to the job
        """
        instance = cls(job.query, job._client, job.udf_resources)
        instance._job = job
        job_ref = instance._properties.setdefault('jobReference', {})
        job_ref['jobId'] = job.name
        if job.default_dataset is not None:
            instance.default_dataset = job.default_dataset
        if job.use_query_cache is not None:
            instance.use_query_cache = job.use_query_cache
        if job.use_legacy_sql is not None:
            instance.use_legacy_sql = job.use_legacy_sql
        return instance

    @property
    def project(self):
        """Project bound to the job.

        :rtype: str
        :returns: the project (derived from the client).
        """
        return self._client.project

    def _require_client(self, client):
        """Check client or verify over-ride.

        :type client: :class:`~google.cloud.bigquery.client.Client` or
                      ``NoneType``
        :param client: the client to use.  If not passed, falls back to the
                       ``client`` stored on the current dataset.

        :rtype: :class:`google.cloud.bigquery.client.Client`
        :returns: The client passed in or the currently bound client.
        """
        if client is None:
            client = self._client
        return client

    @property
    def cache_hit(self):
        """Query results served from cache.

        See:
        https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#cacheHit

        :rtype: bool or ``NoneType``
        :returns: True if the query results were served from cache (None
                  until set by the server).
        """
        return self._properties.get('cacheHit')

    @property
    def complete(self):
        """Server completed query.

        See:
        https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#jobComplete

        :rtype: bool or ``NoneType``
        :returns: True if the query completed on the server (None
                  until set by the server).
        """
        return self._properties.get('jobComplete')

    @property
    def errors(self):
        """Errors generated by the query.

        See:
        https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#errors

        :rtype: list of mapping, or ``NoneType``
        :returns: Mappings describing errors generated on the server (None
                  until set by the server).
        """
        return self._properties.get('errors')

    @property
    def name(self):
        """Job name, generated by the back-end.

        See:
        https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#jobReference

        :rtype: list of mapping, or ``NoneType``
        :returns: Mappings describing errors generated on the server (None
                  until set by the server).
        """
        return self._properties.get('jobReference', {}).get('jobId')

    @property
    def job(self):
        """Job instance used to run the query.

        :rtype: :class:`google.cloud.bigquery.job.QueryJob`, or ``NoneType``
        :returns: Job instance used to run the query (None until
                  ``jobReference`` property is set by the server).
        """
        if self._job is None:
            job_ref = self._properties.get('jobReference')
            if job_ref is not None:
                self._job = QueryJob(job_ref['jobId'], self.query,
                                     self._client)
        return self._job

    @property
    def page_token(self):
        """Token for fetching next bach of results.

        See:
        https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#pageToken

        :rtype: str, or ``NoneType``
        :returns: Token generated on the server (None until set by the server).
        """
        return self._properties.get('pageToken')

    @property
    def total_rows(self):
        """Total number of rows returned by the query.

        See:
        https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#totalRows

        :rtype: int, or ``NoneType``
        :returns: Count generated on the server (None until set by the server).
        """
        return self._properties.get('totalRows')

    @property
    def total_bytes_processed(self):
        """Total number of bytes processed by the query.

        See:
        https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#totalBytesProcessed

        :rtype: int, or ``NoneType``
        :returns: Count generated on the server (None until set by the server).
        """
        return self._properties.get('totalBytesProcessed')

    @property
    def rows(self):
        """Query results.

        See:
        https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#rows

        :rtype: list of tuples of row values, or ``NoneType``
        :returns: fields describing the schema (None until set by the server).
        """
        return _rows_from_json(self._properties.get('rows', ()), self.schema)

    @property
    def schema(self):
        """Schema for query results.

        See:
        https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#schema

        :rtype: list of :class:`SchemaField`, or ``NoneType``
        :returns: fields describing the schema (None until set by the server).
        """
        return _parse_schema_resource(self._properties.get('schema', {}))

    default_dataset = _TypedProperty('default_dataset', Dataset)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#defaultDataset
    """

    dry_run = _TypedProperty('dry_run', bool)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#dryRun
    """

    max_results = _TypedProperty('max_results', six.integer_types)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#maxResults
    """

    preserve_nulls = _TypedProperty('preserve_nulls', bool)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#preserveNulls
    """

    query_parameters = QueryParametersProperty()

    timeout_ms = _TypedProperty('timeout_ms', six.integer_types)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#timeoutMs
    """

    udf_resources = UDFResourcesProperty()

    use_query_cache = _TypedProperty('use_query_cache', bool)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#useQueryCache
    """

    use_legacy_sql = _TypedProperty('use_legacy_sql', bool)
    """See:
    https://cloud.google.com/bigquery/docs/\
    reference/v2/jobs/query#useLegacySql
    """

    def _set_properties(self, api_response):
        """Update properties from resource in body of ``api_response``

        :type api_response: httplib2.Response
        :param api_response: response returned from an API call
        """
        self._properties.clear()
        self._properties.update(api_response)

    def _build_resource(self):
        """Generate a resource for :meth:`begin`."""
        resource = {'query': self.query}

        if self.default_dataset is not None:
            resource['defaultDataset'] = {
                'projectId': self.project,
                'datasetId': self.default_dataset.name,
            }

        if self.max_results is not None:
            resource['maxResults'] = self.max_results

        if self.preserve_nulls is not None:
            resource['preserveNulls'] = self.preserve_nulls

        if self.timeout_ms is not None:
            resource['timeoutMs'] = self.timeout_ms

        if self.use_query_cache is not None:
            resource['useQueryCache'] = self.use_query_cache

        if self.use_legacy_sql is not None:
            resource['useLegacySql'] = self.use_legacy_sql

        if self.dry_run is not None:
            resource['dryRun'] = self.dry_run

        if len(self._udf_resources) > 0:
            resource[self._UDF_KEY] = [{
                udf_resource.udf_type:
                udf_resource.value
            } for udf_resource in self._udf_resources]
        if len(self._query_parameters) > 0:
            resource[self._QUERY_PARAMETERS_KEY] = [
                query_parameter.to_api_repr()
                for query_parameter in self._query_parameters
            ]
            if self._query_parameters[0].name is None:
                resource['parameterMode'] = 'POSITIONAL'
            else:
                resource['parameterMode'] = 'NAMED'

        return resource

    def run(self, client=None):
        """API call:  run the query via a POST request

        See:
        https://cloud.google.com/bigquery/docs/reference/v2/jobs/query

        :type client: :class:`~google.cloud.bigquery.client.Client` or
                      ``NoneType``
        :param client: the client to use.  If not passed, falls back to the
                       ``client`` stored on the current dataset.
        """
        if self._job is not None:
            raise ValueError("Query job is already running.")

        client = self._require_client(client)
        path = '/projects/%s/queries' % (self.project, )
        api_response = client._connection.api_request(
            method='POST', path=path, data=self._build_resource())
        self._set_properties(api_response)

    def fetch_data(self,
                   max_results=None,
                   page_token=None,
                   start_index=None,
                   timeout_ms=None,
                   client=None):
        """API call:  fetch a page of query result data via a GET request

        See:
        https://cloud.google.com/bigquery/docs/reference/v2/jobs/getQueryResults

        :type max_results: int
        :param max_results: (Optional) maximum number of rows to return.

        :type page_token: str
        :param page_token:
            (Optional) token representing a cursor into the table's rows.

        :type start_index: int
        :param start_index: (Optional) zero-based index of starting row

        :type timeout_ms: int
        :param timeout_ms:
            (Optional) timeout, in milliseconds, to wait for query to complete

        :type client: :class:`~google.cloud.bigquery.client.Client` or
                      ``NoneType``
        :param client: the client to use.  If not passed, falls back to the
                       ``client`` stored on the current dataset.

        :rtype: tuple
        :returns: ``(row_data, total_rows, page_token)``, where ``row_data``
                  is a list of tuples, one per result row, containing only
                  the values;  ``total_rows`` is a count of the total number
                  of rows in the table;  and ``page_token`` is an opaque
                  string which can be used to fetch the next batch of rows
                  (``None`` if no further batches can be fetched).
        :raises: ValueError if the query has not yet been executed.
        """
        if self.name is None:
            raise ValueError("Query not yet executed:  call 'run()'")

        client = self._require_client(client)
        params = {}

        if max_results is not None:
            params['maxResults'] = max_results

        if page_token is not None:
            params['pageToken'] = page_token

        if start_index is not None:
            params['startIndex'] = start_index

        if timeout_ms is not None:
            params['timeoutMs'] = timeout_ms

        path = '/projects/%s/queries/%s' % (self.project, self.name)
        response = client._connection.api_request(method='GET',
                                                  path=path,
                                                  query_params=params)
        self._set_properties(response)

        total_rows = response.get('totalRows')
        if total_rows is not None:
            total_rows = int(total_rows)
        page_token = response.get('pageToken')
        rows_data = _rows_from_json(response.get('rows', ()), self.schema)

        return rows_data, total_rows, page_token
Example #4
0
class LoadTableFromStorageJob(_AsyncJob):
    """Asynchronous job for loading data into a table from CloudStorage.

    :type name: string
    :param name: the name of the job

    :type destination: :class:`google.cloud.bigquery.table.Table`
    :param destination: Table into which data is to be loaded.

    :type source_uris: sequence of string
    :param source_uris: URIs of one or more data files to be loaded, in
                        format ``gs://<bucket_name>/<object_name_or_glob>``.

    :type client: :class:`google.cloud.bigquery.client.Client`
    :param client: A client which holds credentials and project configuration
                   for the dataset (which requires a project).

    :type schema: list of :class:`google.cloud.bigquery.table.SchemaField`
    :param schema: The job's schema
    """

    _schema = None
    _JOB_TYPE = 'load'

    def __init__(self, name, destination, source_uris, client, schema=()):
        super(LoadTableFromStorageJob, self).__init__(name, client)
        self.destination = destination
        self.source_uris = source_uris
        # Let the @property do validation.
        self.schema = schema
        self._configuration = _LoadConfiguration()

    @property
    def schema(self):
        """Table's schema.

        :rtype: list of :class:`SchemaField`
        :returns: fields describing the schema
        """
        return list(self._schema)

    @schema.setter
    def schema(self, value):
        """Update table's schema

        :type value: list of :class:`SchemaField`
        :param value: fields describing the schema

        :raises: TypeError if 'value' is not a sequence, or ValueError if
                 any item in the sequence is not a SchemaField
        """
        if not all(isinstance(field, SchemaField) for field in value):
            raise ValueError('Schema items must be fields')
        self._schema = tuple(value)

    @property
    def input_file_bytes(self):
        """Count of bytes loaded from source files.

        :rtype: integer, or ``NoneType``
        :returns: the count (None until set from the server).
        """
        statistics = self._properties.get('statistics')
        if statistics is not None:
            return int(statistics['load']['inputFileBytes'])

    @property
    def input_files(self):
        """Count of source files.

        :rtype: integer, or ``NoneType``
        :returns: the count (None until set from the server).
        """
        statistics = self._properties.get('statistics')
        if statistics is not None:
            return int(statistics['load']['inputFiles'])

    @property
    def output_bytes(self):
        """Count of bytes saved to destination table.

        :rtype: integer, or ``NoneType``
        :returns: the count (None until set from the server).
        """
        statistics = self._properties.get('statistics')
        if statistics is not None:
            return int(statistics['load']['outputBytes'])

    @property
    def output_rows(self):
        """Count of rows saved to destination table.

        :rtype: integer, or ``NoneType``
        :returns: the count (None until set from the server).
        """
        statistics = self._properties.get('statistics')
        if statistics is not None:
            return int(statistics['load']['outputRows'])

    allow_jagged_rows = _TypedProperty('allow_jagged_rows', bool)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.allowJaggedRows
    """

    allow_quoted_newlines = _TypedProperty('allow_quoted_newlines', bool)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.allowQuotedNewlines
    """

    create_disposition = CreateDisposition('create_disposition')
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.createDisposition
    """

    encoding = Encoding('encoding')
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.encoding
    """

    field_delimiter = _TypedProperty('field_delimiter', six.string_types)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.fieldDelimiter
    """

    ignore_unknown_values = _TypedProperty('ignore_unknown_values', bool)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.ignoreUnknownValues
    """

    max_bad_records = _TypedProperty('max_bad_records', six.integer_types)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.maxBadRecords
    """

    quote_character = _TypedProperty('quote_character', six.string_types)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.quote
    """

    skip_leading_rows = _TypedProperty('skip_leading_rows', six.integer_types)
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.skipLeadingRows
    """

    source_format = SourceFormat('source_format')
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.sourceFormat
    """

    write_disposition = WriteDisposition('write_disposition')
    """See:
    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.writeDisposition
    """

    def _populate_config_resource(self, configuration):
        """Helper for _build_resource: copy config properties to resource"""
        if self.allow_jagged_rows is not None:
            configuration['allowJaggedRows'] = self.allow_jagged_rows
        if self.allow_quoted_newlines is not None:
            configuration['allowQuotedNewlines'] = self.allow_quoted_newlines
        if self.create_disposition is not None:
            configuration['createDisposition'] = self.create_disposition
        if self.encoding is not None:
            configuration['encoding'] = self.encoding
        if self.field_delimiter is not None:
            configuration['fieldDelimiter'] = self.field_delimiter
        if self.ignore_unknown_values is not None:
            configuration['ignoreUnknownValues'] = self.ignore_unknown_values
        if self.max_bad_records is not None:
            configuration['maxBadRecords'] = self.max_bad_records
        if self.quote_character is not None:
            configuration['quote'] = self.quote_character
        if self.skip_leading_rows is not None:
            configuration['skipLeadingRows'] = self.skip_leading_rows
        if self.source_format is not None:
            configuration['sourceFormat'] = self.source_format
        if self.write_disposition is not None:
            configuration['writeDisposition'] = self.write_disposition

    def _build_resource(self):
        """Generate a resource for :meth:`begin`."""
        resource = {
            'jobReference': {
                'projectId': self.project,
                'jobId': self.name,
            },
            'configuration': {
                self._JOB_TYPE: {
                    'sourceUris': self.source_uris,
                    'destinationTable': {
                        'projectId': self.destination.project,
                        'datasetId': self.destination.dataset_name,
                        'tableId': self.destination.name,
                    },
                },
            },
        }
        configuration = resource['configuration'][self._JOB_TYPE]
        self._populate_config_resource(configuration)

        if len(self.schema) > 0:
            configuration['schema'] = {
                'fields': _build_schema_resource(self.schema)
            }

        return resource

    def _scrub_local_properties(self, cleaned):
        """Helper:  handle subclass properties in cleaned."""
        schema = cleaned.pop('schema', {'fields': ()})
        self.schema = _parse_schema_resource(schema)

    @classmethod
    def from_api_repr(cls, resource, client):
        """Factory:  construct a job given its API representation

        .. note:

           This method assumes that the project found in the resource matches
           the client's project.

        :type resource: dict
        :param resource: dataset job representation returned from the API

        :type client: :class:`google.cloud.bigquery.client.Client`
        :param client: Client which holds credentials and project
                       configuration for the dataset.

        :rtype: :class:`google.cloud.bigquery.job.LoadTableFromStorageJob`
        :returns: Job parsed from ``resource``.
        """
        name, config = cls._get_resource_config(resource)
        dest_config = config['destinationTable']
        dataset = Dataset(dest_config['datasetId'], client)
        destination = Table(dest_config['tableId'], dataset)
        source_urls = config.get('sourceUris', ())
        job = cls(name, destination, source_urls, client=client)
        job._set_properties(resource)
        return job
class QueryJob(_AsyncJob):
    """Asynchronous job: query tables.

    :type name: str
    :param name: the name of the job

    :type query: str
    :param query: SQL query string

    :type client: :class:`google.cloud.bigquery.client.Client`
    :param client: A client which holds credentials and project configuration
                   for the dataset (which requires a project).

    :type udf_resources: tuple
    :param udf_resources: An iterable of
                        :class:`google.cloud.bigquery._helpers.UDFResource`
                        (empty by default)

    :type query_parameters: tuple
    :param query_parameters:
        An iterable of
        :class:`google.cloud.bigquery._helpers.AbstractQueryParameter`
        (empty by default)
    """
    _JOB_TYPE = 'query'
    _UDF_KEY = 'userDefinedFunctionResources'
    _QUERY_PARAMETERS_KEY = 'queryParameters'

    def __init__(self,
                 name,
                 query,
                 client,
                 udf_resources=(),
                 query_parameters=()):
        super(QueryJob, self).__init__(name, client)
        self.query = query
        self.udf_resources = udf_resources
        self.query_parameters = query_parameters
        self._configuration = _AsyncQueryConfiguration()
        self._query_results = None

    allow_large_results = _TypedProperty('allow_large_results', bool)
    """See
    https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults
    """

    create_disposition = CreateDisposition('create_disposition')
    """See
    https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition
    """

    default_dataset = _TypedProperty('default_dataset', Dataset)
    """See
    https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.defaultDataset
    """

    destination = _TypedProperty('destination', Table)
    """See
    https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable
    """

    flatten_results = _TypedProperty('flatten_results', bool)
    """See
    https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.flattenResults
    """

    priority = QueryPriority('priority')
    """See
    https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.priority
    """

    query_parameters = QueryParametersProperty()

    udf_resources = UDFResourcesProperty()

    use_query_cache = _TypedProperty('use_query_cache', bool)
    """See
    https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.useQueryCache
    """

    use_legacy_sql = _TypedProperty('use_legacy_sql', bool)
    """See
    https://cloud.google.com/bigquery/docs/\
    reference/v2/jobs#configuration.query.useLegacySql
    """

    dry_run = _TypedProperty('dry_run', bool)
    """See
    https://cloud.google.com/bigquery/docs/\
    reference/rest/v2/jobs#configuration.dryRun
    """

    write_disposition = WriteDisposition('write_disposition')
    """See
    https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition
    """

    maximum_billing_tier = _TypedProperty('maximum_billing_tier', int)
    """See
    https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBillingTier
    """

    maximum_bytes_billed = _TypedProperty('maximum_bytes_billed', int)
    """See
    https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled
    """

    def _destination_table_resource(self):
        """Create a JSON resource for the destination table.

        Helper for :meth:`_populate_config_resource` and
        :meth:`_scrub_local_properties`
        """
        if self.destination is not None:
            return {
                'projectId': self.destination.project,
                'datasetId': self.destination.dataset_name,
                'tableId': self.destination.name,
            }

    def _populate_config_resource_booleans(self, configuration):
        """Helper for _populate_config_resource."""
        if self.allow_large_results is not None:
            configuration['allowLargeResults'] = self.allow_large_results
        if self.flatten_results is not None:
            configuration['flattenResults'] = self.flatten_results
        if self.use_query_cache is not None:
            configuration['useQueryCache'] = self.use_query_cache
        if self.use_legacy_sql is not None:
            configuration['useLegacySql'] = self.use_legacy_sql

    def _populate_config_resource(self, configuration):
        """Helper for _build_resource: copy config properties to resource"""
        self._populate_config_resource_booleans(configuration)

        if self.create_disposition is not None:
            configuration['createDisposition'] = self.create_disposition
        if self.default_dataset is not None:
            configuration['defaultDataset'] = {
                'projectId': self.default_dataset.project,
                'datasetId': self.default_dataset.name,
            }
        if self.destination is not None:
            table_res = self._destination_table_resource()
            configuration['destinationTable'] = table_res
        if self.priority is not None:
            configuration['priority'] = self.priority
        if self.write_disposition is not None:
            configuration['writeDisposition'] = self.write_disposition
        if self.maximum_billing_tier is not None:
            configuration['maximumBillingTier'] = self.maximum_billing_tier
        if self.maximum_bytes_billed is not None:
            configuration['maximumBytesBilled'] = self.maximum_bytes_billed
        if len(self._udf_resources) > 0:
            configuration[self._UDF_KEY] = [{
                udf_resource.udf_type:
                udf_resource.value
            } for udf_resource in self._udf_resources]
        if len(self._query_parameters) > 0:
            configuration[self._QUERY_PARAMETERS_KEY] = [
                query_parameter.to_api_repr()
                for query_parameter in self._query_parameters
            ]
            if self._query_parameters[0].name is None:
                configuration['parameterMode'] = 'POSITIONAL'
            else:
                configuration['parameterMode'] = 'NAMED'

    def _build_resource(self):
        """Generate a resource for :meth:`begin`."""

        resource = {
            'jobReference': {
                'projectId': self.project,
                'jobId': self.name,
            },
            'configuration': {
                self._JOB_TYPE: {
                    'query': self.query,
                },
            },
        }

        if self.dry_run is not None:
            resource['configuration']['dryRun'] = self.dry_run

        configuration = resource['configuration'][self._JOB_TYPE]
        self._populate_config_resource(configuration)

        return resource

    def _scrub_local_properties(self, cleaned):
        """Helper:  handle subclass properties in cleaned.

        .. note:

           This method assumes that the project found in the resource matches
           the client's project.
        """
        configuration = cleaned['configuration']['query']

        self.query = configuration['query']
        dest_remote = configuration.get('destinationTable')

        if dest_remote is None:
            if self.destination is not None:
                del self.destination
        else:
            dest_local = self._destination_table_resource()
            if dest_remote != dest_local:
                dataset = self._client.dataset(dest_remote['datasetId'])
                self.destination = dataset.table(dest_remote['tableId'])

    @classmethod
    def from_api_repr(cls, resource, client):
        """Factory:  construct a job given its API representation

        :type resource: dict
        :param resource: dataset job representation returned from the API

        :type client: :class:`google.cloud.bigquery.client.Client`
        :param client: Client which holds credentials and project
                       configuration for the dataset.

        :rtype: :class:`google.cloud.bigquery.job.RunAsyncQueryJob`
        :returns: Job parsed from ``resource``.
        """
        name, config = cls._get_resource_config(resource)
        query = config['query']
        job = cls(name, query, client=client)
        job._set_properties(resource)
        return job

    def query_results(self):
        """Construct a QueryResults instance, bound to this job.

        :rtype: :class:`~google.cloud.bigquery.query.QueryResults`
        :returns: results instance
        """
        if not self._query_results:
            self._query_results = self._client._get_query_results(self.name)
        return self._query_results

    def done(self):
        """Refresh the job and checks if it is complete.

        :rtype: bool
        :returns: True if the job is complete, False otherwise.
        """
        # Do not refresh is the state is already done, as the job will not
        # change once complete.
        if self.state != _DONE_STATE:
            self._query_results = self._client._get_query_results(self.name)

            # Only reload the job once we know the query is complete.
            # This will ensure that fields such as the destination table are
            # correctly populated.
            if self._query_results.complete:
                self.reload()

        return self.state == _DONE_STATE

    def result(self, timeout=None):
        """Start the job and wait for it to complete and get the result.

        :type timeout: int
        :param timeout:
            How long to wait for job to complete before raising a
            :class:`TimeoutError`.

        :rtype: :class:`~google.api.core.page_iterator.Iterator`
        :returns:
            Iterator of row data :class:`tuple`s. During each page, the
            iterator will have the ``total_rows`` attribute set, which counts
            the total number of rows **in the result set** (this is distinct
            from the total number of rows in the current page:
            ``iterator.page.num_items``).

        :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job
            failed or  :class:`TimeoutError` if the job did not complete in the
            given timeout.
        """
        super(QueryJob, self).result(timeout=timeout)
        # Return an iterator instead of returning the job.
        return self.query_results().fetch_data()