Example #1
0
    def get_query_results(self, query, use_legacy_sql=False, max_wait_secs=None):
        # type: (str, Optional[bool], Optional[int]) -> List[Tuple[Any]]
        """Returns a list or rows, each of which is a tuple of values.

        Args:
            query: A string with a complete SQL query.
            use_legacy_sql: Whether to use legacy SQL
            max_wait_secs: The maximum number of seconds to wait for the query to complete. If not
                set, the class default will be used.

        Returns:
            A list of tuples of values.
        """
        config = QueryJobConfig()
        if self.maximum_billing_tier:
            config.maximum_billing_tier = self.maximum_billing_tier

        config.use_legacy_sql = use_legacy_sql

        query_job = self.gclient.query(query, job_config=config,
                                       retry=self.default_retry_for_api_calls)
        # The above retry is for errors encountered in executing the jobs. The below retry is
        # for errors encountered in polling to see whether the job is done.
        query_job._retry = self.default_retry_for_async_jobs

        rows = self._wait_for_job(query_job, query,
                                  max_wait_secs=max_wait_secs or self.max_wait_secs)
        if query_job.errors:
            logging.warning('Errors in get_query_results: {}'.format(query_job.errors))
        return [x.values() for x in list(rows)]
 def select_insert(self,
                   source_table_id,
                   destination_table_id,
                   query_field,
                   prefix='    ',
                   fg='yellow'):
     query = 'SELECT {query_field} FROM {dataset_id}.{source_table_id}'.format(
         query_field=query_field,
         dataset_id=self._dataset_ref.dataset_id,
         source_table_id=source_table_id)
     destination_table = self.dataset.table(destination_table_id)
     job_config = QueryJobConfig()
     job_config.use_legacy_sql = False
     job_config.use_query_cache = False
     job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE
     job_config.destination = destination_table
     job = self._client.query(query, job_config)
     echo('Inserting... {0}'.format(job.job_id),
          prefix=prefix,
          fg=fg,
          no_color=self.no_color)
     echo('  {0}'.format(job.query),
          prefix=prefix,
          fg=fg,
          no_color=self.no_color)
     job.result()
     assert job.state == 'DONE'
     error_result = job.error_result
     if error_result:
         raise RuntimeError(job.errors)
Example #3
0
    def execute(self,
                query,
                destination_table,
                write_disposition="WRITE_TRUNCATE",
                allow_large_results=True):
        """
        :param query_file: query file path
        :param destination_table: target table
        :param write_disposition:  default is to replace existing table. To append: WRITE_APPEND
        :param allow_large_results: default to True
        :return:
        """
        query_configuration = QueryJobConfig()
        query_configuration.use_legacy_sql = False
        if destination_table:
            ref = TableReferenceBuilder(destination_table, self._dataset,
                                        self._project)
            query_configuration.write_disposition = write_disposition
            query_configuration.default_dataset = ref.dataset_reference
            query_configuration.destination = ref.table_reference
            query_configuration.allow_large_results = allow_large_results

        sql_query = self.__get_query(query)
        if not self._quiet:
            print("-- #### {}\n{}\n".format(destination_table or "",
                                            sql_query))

        self._query_job = bigquery.Client(project=self._project).query(
            sql_query, job_config=query_configuration)
        if self._query_job.errors:
            raise Exception(self._query_job.errors)
Example #4
0
    def get_query_results(self,
                          query,
                          use_legacy_sql=False,
                          max_wait_secs=None):
        # type: (str, Optional[bool], Optional[int]) -> List[Tuple[Any]]
        """Returns a list or rows, each of which is a tuple of values.

        Args:
            query: A string with a complete SQL query.
            use_legacy_sql: Whether to use legacy SQL
            max_wait_secs: The maximum number of seconds to wait for the query to complete. If not
                set, the class default will be used.

        Returns:
            A list of tuples of values.
        """
        config = QueryJobConfig()
        if self.maximum_billing_tier:
            config.maximum_billing_tier = self.maximum_billing_tier

        config.use_legacy_sql = use_legacy_sql

        query_job = self._run_async_query(query, job_config=config)

        rows = self._wait_for_job(query_job,
                                  query,
                                  max_wait_secs=max_wait_secs
                                  or self.max_wait_secs)
        if query_job.errors:
            logging.warning('Errors in get_query_results: {}'.format(
                query_job.errors))
        return [x.values() for x in list(rows)]
Example #5
0
def execute_sync_query(project_id, query_str, bq_client=None):
    if bq_client is None:
        bq_client = bigquery.Client(project_id)
    config = QueryJobConfig()
    config.use_legacy_sql = False
    config.use_query_cache = False
    query_job = bq_client.query(query_str, job_config=config, location="EU")

    result = []
    for row in query_job:
        result.append(row)

    return result
Example #6
0
    def create_table_from_query(
        self,
        query,  # type: str
        table_path,  # type: str
        write_disposition='WRITE_EMPTY',  # type: Optional[str]
        use_legacy_sql=False,  # type: Optional[bool]
        max_wait_secs=None,  # type: Optional[int]
        expected_schema=None  # type: Optional[List[SchemaField]]
    ):
        # type: (...) -> None
        """Creates a table in BigQuery from a specified query.

        Args:
          query: The query to run.
          table_path: The path to the table (in the client's project) to write
              the results to.
          write_disposition: Specifies behavior if table already exists. See options here:
              https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs under
              configuration.query.writeDisposition
          use_legacy_sql: Whether the query is written in standard or legacy sql.
          max_wait_secs: Seconds to wait for the query before timing out. If not
                set, the class default will be used.
          expected_schema: The expected schema of the resulting table; unused in this implementation
        """

        if write_disposition not in [
                'WRITE_TRUNCATE', 'WRITE_APPEND', 'WRITE_EMPTY'
        ]:
            raise ValueError(
                'write_disposition must be one of WRITE_TRUNCATE, '
                'WRITE_APPEND, or WRITE_EMPTY')

        config = QueryJobConfig()
        if self.maximum_billing_tier:
            config.maximum_billing_tier = self.maximum_billing_tier
        config.use_legacy_sql = use_legacy_sql
        config.write_disposition = write_disposition
        config.allow_large_results = True

        config.destination = self.get_table_reference_from_path(table_path)

        query_job = self._run_async_query(query, job_config=config)

        return self._wait_for_job(query_job,
                                  query,
                                  max_wait_secs=max_wait_secs
                                  or self.max_wait_secs)
Example #7
0
    def execute(self, query, tbl_ref=None, append=False, preview=True):
        sql_query = self.__get_query(query)
        if tbl_ref:
            print("-- ## " + str(tbl_ref))
        print("{}{}".format("-- preview: \n" if preview else "", sql_query))
        if preview:
            return

        job_conf = QueryJobConfig()
        job_conf.use_legacy_sql = False
        if tbl_ref:
            job_conf.write_disposition = "WRITE_APPEND" if append else "WRITE_TRUNCATE"
            job_conf.default_dataset = tbl_ref.dataset_ref
            job_conf.destination = tbl_ref.table_ref
            job_conf.allow_large_results = True
        query_job = self.connect(tbl_ref.project if tbl_ref else None).query(
            sql_query, job_config=job_conf)
        if query_job.errors:
            raise Exception(query_job.errors)
Example #8
0
    def create_table_from_query(self,
                                query,  # type: str
                                table_path,  # type: str
                                write_disposition='WRITE_EMPTY',  # type: Optional[str]
                                use_legacy_sql=False,  # type: Optional[bool]
                                max_wait_secs=None,  # type: Optional[int]
                                expected_schema=None  # type: Optional[List[SchemaField]]
                                ):
        # type: (...) -> None
        """Creates a table in BigQuery from a specified query.

        Args:
          query: The query to run.
          table_path: The path to the table (in the client's project) to write
              the results to.
          write_disposition: One of 'WRITE_TRUNCATE', 'WRITE_APPEND',
              'WRITE_EMPTY'. Default is WRITE_EMPTY.
          use_legacy_sql: Whether the query is written in standard or legacy sql.
          max_wait_secs: Seconds to wait for the query before timing out. If not
                set, the class default will be used.
          expected_schema: The expected schema of the resulting table; unused in this implementation
        """

        if write_disposition not in ['WRITE_TRUNCATE', 'WRITE_APPEND', 'WRITE_EMPTY']:
            raise ValueError('write_disposition must be one of WRITE_TRUNCATE, '
                             'WRITE_APPEND, or WRITE_EMPTY')

        config = QueryJobConfig()
        if self.maximum_billing_tier:
            config.maximum_billing_tier = self.maximum_billing_tier
        config.use_legacy_sql = use_legacy_sql
        config.write_disposition = write_disposition
        config.allow_large_results = True

        config.destination = self.get_table_reference_from_path(table_path)

        query_job = self.gclient.query(query, job_config=config, retry=self.default_retry)

        return query_job.result(timeout=max_wait_secs or self.max_wait_secs)
Example #9
0
    def get_query_results(self, query, use_legacy_sql=False, max_wait_secs=None):
        # type: (str, Optional[Bool], Optional[int]) -> List[Tuple[Any]]
        """Returns a list or rows, each of which is a tuple of values.

        Args:
            query: A string with a complete SQL query.
            use_legacy_sql: Whether to use legacy SQL
            max_wait_secs: The maximum number of seconds to wait for the query to complete. If not
                set, the class default will be used.

        Returns:
            A list of tuples of values.
        """
        config = QueryJobConfig()
        if self.maximum_billing_tier:
            config.maximum_billing_tier = self.maximum_billing_tier

        config.use_legacy_sql = use_legacy_sql

        query_job = self.gclient.query(query, job_config=config, retry=self.default_retry)

        rows = query_job.result(retry=self.default_retry,
                                timeout=max_wait_secs or self.max_wait_secs)
        return [x.values() for x in list(rows)]
Example #10
0
def create_config():
    config = QueryJobConfig()
    config.use_legacy_sql = False
    return config