def get_query_results(self, query, use_legacy_sql=False, max_wait_secs=None): # type: (str, Optional[bool], Optional[int]) -> List[Tuple[Any]] """Returns a list or rows, each of which is a tuple of values. Args: query: A string with a complete SQL query. use_legacy_sql: Whether to use legacy SQL max_wait_secs: The maximum number of seconds to wait for the query to complete. If not set, the class default will be used. Returns: A list of tuples of values. """ config = QueryJobConfig() if self.maximum_billing_tier: config.maximum_billing_tier = self.maximum_billing_tier config.use_legacy_sql = use_legacy_sql query_job = self.gclient.query(query, job_config=config, retry=self.default_retry_for_api_calls) # The above retry is for errors encountered in executing the jobs. The below retry is # for errors encountered in polling to see whether the job is done. query_job._retry = self.default_retry_for_async_jobs rows = self._wait_for_job(query_job, query, max_wait_secs=max_wait_secs or self.max_wait_secs) if query_job.errors: logging.warning('Errors in get_query_results: {}'.format(query_job.errors)) return [x.values() for x in list(rows)]
def run_query_job( querytext: str, temp_table: str = None, query_job_config: QueryJobConfig = QueryJobConfig() ) -> QueryJob: """ Set up and run a query job. Arguments: querytext {str} -- The querytext for the job. Keyword Arguments: temp_table {str} -- A temporary table in which to materialize results. The results will be streamed from this table when done. This is required for all large queries, and strongly recommended. (default: {None}) query_job_config {QueryJobConfig} -- A QueryJobConfig to start from. (default: {QueryJobConfig()}) Returns: QueryJob -- The resulting job. """ LOG.debug("Running query: %s", querytext) client = get_bq_client() if temp_table: query_job_config.destination = temp_table query_job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE return client.query(query=querytext, job_config=query_job_config)
def get_query_results(self, query, use_legacy_sql=False, max_wait_secs=None): # type: (str, Optional[bool], Optional[int]) -> List[Tuple[Any]] """Returns a list or rows, each of which is a tuple of values. Args: query: A string with a complete SQL query. use_legacy_sql: Whether to use legacy SQL max_wait_secs: The maximum number of seconds to wait for the query to complete. If not set, the class default will be used. Returns: A list of tuples of values. """ config = QueryJobConfig() if self.maximum_billing_tier: config.maximum_billing_tier = self.maximum_billing_tier config.use_legacy_sql = use_legacy_sql query_job = self._run_async_query(query, job_config=config) rows = self._wait_for_job(query_job, query, max_wait_secs=max_wait_secs or self.max_wait_secs) if query_job.errors: logging.warning('Errors in get_query_results: {}'.format( query_job.errors)) return [x.values() for x in list(rows)]
def load_query_result_to_table(dest_table, query, part_col_name=None, clustering_fields=None): bq_client = get_bigquery_client() qjc = None print(query) if bq_table_exists(dest_table): table = bq_client.get_table(dest_table) qjc = QueryJobConfig( destination=dest_table, write_disposition="WRITE_TRUNCATE", create_disposition="CREATE_IF_NEEDED", time_partitioning=table.time_partitioning, range_partitioning=table.range_partitioning, clustering_fields=table.clustering_fields, ) job = bq_client.query(query, job_config=qjc) job.result() else: import time temp_table_name = f"load_query_result_to_table__{str(int(time.time()))}" bq_client.query( f"CREATE OR REPLACE TABLE temp_1d.{temp_table_name} AS {query}" ).result() if part_col_name: schema = bq_client.get_table(f"temp_1d.{temp_table_name}").schema partition_type = [ f for f in schema if f.name.lower() == part_col_name.lower() ][0].field_type if partition_type == "DATE": qjc = QueryJobConfig( destination=dest_table, write_disposition="WRITE_TRUNCATE", create_disposition="CREATE_IF_NEEDED", time_partitioning=TimePartitioning(field=part_col_name), clustering_fields=clustering_fields, ) elif partition_type == "INTEGER": qjc = QueryJobConfig( destination=dest_table, write_disposition="WRITE_TRUNCATE", create_disposition="CREATE_IF_NEEDED", range_partitioning=RangePartitioning(PartitionRange( start=200001, end=209912, interval=1), field=part_col_name), clustering_fields=clustering_fields, ) else: print(partition_type) raise Exception( f"Partition column[{part_col_name}] is neither DATE or INTEGER type." ) job = bq_client.query(f"SELECT * FROM temp_1d.{temp_table_name}", job_config=qjc) job.result()
def dry_run(self, query: str) -> List[SqlColumn]: client = self._get_client() logging.info(f"DataWarehouse.dry_run") config = QueryJobConfig() config.dry_run = True query_job = client.query(query, config) result = query_job.result() return DataWarehouse._translate_columns(result.schema)
def execute_sync_query(project_id, query_str, bq_client=None): if bq_client is None: bq_client = bigquery.Client(project_id) config = QueryJobConfig() config.use_legacy_sql = False config.use_query_cache = False query_job = bq_client.query(query_str, job_config=config, location="EU") result = [] for row in query_job: result.append(row) return result
def bq_query_input_solid(context, sql_queries: List[str]) -> List[DataFrame]: query_job_config = _preprocess_config(context.solid_config.get('query_job_config', {})) results = [] for sql_query in sql_queries: cfg = QueryJobConfig(**query_job_config) if query_job_config else None context.log.info( 'executing query %s with config: %s' % (sql_query, cfg.to_api_repr() if cfg else '(no config provided)') ) results.append(context.resources.bigquery.query(sql_query, job_config=cfg).to_dataframe()) return results
def write_to_table_with_query(self, write_disposition): # type: (str) -> None """Query all rows from source table, write to destination table w/ requested disposition. Args: write_disposition: Whether to require the destination table to be empty, to append to it, or to overwrite (truncate) it. """ job_config = QueryJobConfig() job_config.destination = self.destination_table.reference job_config.write_disposition = write_disposition self.bq_client.query( 'SELECT * FROM `{}.{}.{}`'.format(self.source_table.project, self.source_table.dataset_id, self.source_table.table_id), job_config)
def _execute_query_custom(self, query): client = self.connection() con = google.cloud.bigquery.dbapi.connect(client=client) cursor = Cursor(con) try: cursor.execute( query, job_config=QueryJobConfig(script_options=ScriptOptions( statement_timeout_ms=120000))) except Exception as e: cursor.close() con.close() raise e con.commit() try: result = cursor.fetchall() except AttributeError: result = None except: raise cursor.close() con.close() query_create_table = re.search("(?i)(?<=((create table ))).*(?= as)", query) if result: return [dict(r) for r in result] elif query_create_table: return {'execute_query': query_create_table} else: empty_list = [] return empty_list
def do_extract(fq_dataset, max_tables, query_project, fq_destination_table, fq_sample_mapping_table, cohort_sample_names_file, sample_map_outfile, ttl, number_of_partitions, probes_per_partition, extract_genotype_counts_only): try: global client client = bigquery.Client(project=query_project, default_query_job_config=QueryJobConfig( priority="INTERACTIVE", use_query_cache=False)) global RAW_ARRAY_TABLE_COUNT RAW_ARRAY_TABLE_COUNT = max_tables print(f"Using {RAW_ARRAY_TABLE_COUNT} tables in {fq_dataset}...") cohort = get_all_samples(fq_sample_mapping_table, cohort_sample_names_file, sample_map_outfile) print( f"Discovered {len(cohort)} samples in {fq_sample_mapping_table}..." ) populate_extract_table(fq_dataset, cohort, fq_destination_table, ttl, number_of_partitions, probes_per_partition, extract_genotype_counts_only) print(f"\nFinal cohort extract written to {fq_destination_table}\n") except Exception as err: print(f"Unexpected error! {err}") raise finally: dump_job_stats()
def test_execute_w_query_dry_run(self): from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery import dbapi connection = dbapi.connect( self._mock_client( rows=[("hello", "world", 1), ("howdy", "y'all", 2)], schema=[ SchemaField("a", "STRING", mode="NULLABLE"), SchemaField("b", "STRING", mode="REQUIRED"), SchemaField("c", "INTEGER", mode="NULLABLE"), ], dry_run_job=True, total_bytes_processed=12345, ) ) cursor = connection.cursor() cursor.execute( "SELECT a, b, c FROM hello_world WHERE d > 3;", job_config=QueryJobConfig(dry_run=True), ) self.assertEqual(cursor.rowcount, 0) self.assertIsNone(cursor.description) rows = cursor.fetchall() self.assertEqual(list(rows), [])
def _compute_fn(context, _): query_job_config = _preprocess_config(context.solid_config.get('query_job_config', {})) # Retrieve results as pandas DataFrames results = [] for sql_query in sql_queries: # We need to construct a new QueryJobConfig for each query. # See: https://bit.ly/2VjD6sl cfg = QueryJobConfig(**query_job_config) if query_job_config else None context.log.info( 'executing query %s with config: %s' % (sql_query, cfg.to_api_repr() if cfg else '(no config provided)') ) results.append(context.resources.bq.query(sql_query, job_config=cfg).to_dataframe()) yield Result(results)
def execute(self, query, destination_table, write_disposition="WRITE_TRUNCATE", allow_large_results=True): """ :param query_file: query file path :param destination_table: target table :param write_disposition: default is to replace existing table. To append: WRITE_APPEND :param allow_large_results: default to True :return: """ query_configuration = QueryJobConfig() query_configuration.use_legacy_sql = False if destination_table: ref = TableReferenceBuilder(destination_table, self._dataset, self._project) query_configuration.write_disposition = write_disposition query_configuration.default_dataset = ref.dataset_reference query_configuration.destination = ref.table_reference query_configuration.allow_large_results = allow_large_results sql_query = self.__get_query(query) if not self._quiet: print("-- #### {}\n{}\n".format(destination_table or "", sql_query)) self._query_job = bigquery.Client(project=self._project).query( sql_query, job_config=query_configuration) if self._query_job.errors: raise Exception(self._query_job.errors)
def _solid(context): # pylint: disable=unused-argument query_job_config = _preprocess_config(context.solid_config.get("query_job_config", {})) # Retrieve results as pandas DataFrames results = [] for sql_query in sql_queries: # We need to construct a new QueryJobConfig for each query. # See: https://bit.ly/2VjD6sl cfg = QueryJobConfig(**query_job_config) if query_job_config else None context.log.info( "executing query %s with config: %s" % (sql_query, cfg.to_api_repr() if cfg else "(no config provided)") ) results.append( context.resources.bigquery.query(sql_query, job_config=cfg).to_dataframe() ) return results
def start_async_job(self, query, dest_path=None): # type: (str, Optional[str]) -> QueryJob """ Makes a QueryJob for the given query to be written to the dest_path, and starts it, returning the job. Args: query: The query string to run. dest_path: String of the path to the destination table. It's None if the query is a Data Definition Language (DDL) statement (CREATE/ALTER/DELETE tables), because destination table is already specified in a DDL query Returns: A QueryJob instance for the job Raises: ValueError. If dest_path is specified for a DDL query or dest_path is missing for a non-DDL query. """ is_ddl_query = any(query.strip().upper().startswith(ddl_op) for ddl_op in ['CREATE', 'ALTER', 'DROP']) # Make an asynchronous job and start it. config = QueryJobConfig() if dest_path: if is_ddl_query: raise ValueError( 'Cannot specify destination path for the DDL query below:\n ' + query) # allow_large_results requires destination to be specified config.allow_large_results = True config.destination = ( self.get_table_reference_from_path(dest_path)) elif not is_ddl_query: raise ValueError('Destination table is not specified, ' 'But the query below is not a DDL statement:\n ' + query) return self._run_async_query(query, config)
def _configure_gcp_client(self, query_job_config): """Configure GCP client.""" logger.info('Storing BigQuery Auth Credentials') os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = SETTINGS.bigquery_credentials_filepath logger.info('Creating new query job configuration') if query_job_config: self.query_job_config = query_job_config else: self.query_job_config = QueryJobConfig(use_legacy_sql=False, use_query_cache=True) self.client = Client(default_query_job_config=self.query_job_config)
def test_insert_rows(self): # type: () -> None dataset_ref = DatasetReference('my_project', 'my_dataset') dataset = Dataset(dataset_ref) table1_ref = TableReference(dataset_ref, 'table1') schema = [ SchemaField(name="a", field_type='INT64'), SchemaField(name="b", field_type='FLOAT64'), ] table = Table(table1_ref, schema) self.bq_client.create_dataset(dataset) self.bq_client.create_table(table) # Insert two rows, check that they landed self.assertFalse( self.bq_client.insert_rows(table, [{ 'a': 1, 'b': 2.5 }, { 'a': 3, 'b': 4.25 }])) self.assertRowsExpected( self.bq_client.query( 'SELECT * FROM `my_project.my_dataset.table1`', QueryJobConfig()), [[1, 2.5], [3, 4.25]]) # Insert two more rows, check that all four rows are now present. self.assertFalse( self.bq_client.insert_rows(table, [{ 'a': 5, 'b': 6.5 }, { 'a': 7, 'b': 8.25 }])) self.assertRowsExpected( self.bq_client.query( 'SELECT * FROM `my_project.my_dataset.table1`', QueryJobConfig()), [[1, 2.5], [3, 4.25], [5, 6.5], [7, 8.25]])
def select_into(self, query: str, output_dataset: str, output_table: str) -> bool: logging.info( f"DataWarehouse.select_into -> {output_dataset}.{output_table} ..." ) client = self._get_client() config = QueryJobConfig() config.allow_large_results = True config.destination = f"{self.config.gcp_project}.{output_dataset}.{output_table}" config.create_disposition = CreateDisposition.CREATE_IF_NEEDED config.write_disposition = WriteDisposition.WRITE_TRUNCATE query_job = client.query(query, config) # Execute the thing and check the result try: result = query_job.result() mb = int(query_job.total_bytes_processed / (1024 * 1024)) logging.info( f"DataWarehouse.select_into -> {output_dataset}.{output_table}: {query_job.state} (processed {mb}mb)" ) return True except: logging.error( f"DataWarehouse.select_into -> Exception: \n\t{query_job.error_result.message}" ) return False
def select_insert(self, source_table_id, destination_table_id, query_field, prefix=' ', fg='yellow'): query = 'SELECT {query_field} FROM {dataset_id}.{source_table_id}'.format( query_field=query_field, dataset_id=self._dataset_ref.dataset_id, source_table_id=source_table_id) destination_table = self.dataset.table(destination_table_id) job_config = QueryJobConfig() job_config.use_legacy_sql = False job_config.use_query_cache = False job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE job_config.destination = destination_table job = self._client.query(query, job_config) echo('Inserting... {0}'.format(job.job_id), prefix=prefix, fg=fg, no_color=self.no_color) echo(' {0}'.format(job.query), prefix=prefix, fg=fg, no_color=self.no_color) job.result() assert job.state == 'DONE' error_result = job.error_result if error_result: raise RuntimeError(job.errors)
def __init__( self, *, from_: Union[BaseResourceLoader, str], bqtk_config: BQTestKitConfig, location: Optional[str] = None, bq_client: Client, job_config: QueryJobConfig = None, project: Project = None, interpolators: List[BaseInterpolator] = None, global_dict: Dict[str, Any] = None, temp_tables: List[Tuple[BaseDataLiteralTransformer, TableResources]] = None, temp_technical_column_prefix: str = DEFAULT_TECHNICAL_COLUMN_PREFIX ) -> None: """Constructor of BQQueryTemplate Args: from_ (Union[BaseResourceLoader, str]): query to load from. bqtk_config (BQTestKitConfig): config used accross the query DSL. bq_client (Client): instance of bigquery client to use accross the datasetL. location (Optional[str], optional): force location for dataset. Defaults extracted from bqtk_config. job_config (QueryJobConfig, optional): Configure job. Defaults to QueryJobConfig(). project (Project, optional): project in which this query should be run. Allows usage of relative table name. Defaults to None. interpolators (List[BaseInterpolator], optional): List of interpolator to use before running query. Defaults to None. global_dict (Dict[str, Any], optional): global dictionary to mix with local interpolator's dictionary. Defaults to None. temp_tables (List[Tuple[BaseDataLiteralTransformer, TableResources]]): list of all table to create as temp table with a data literal. Defaults to None. temp_technical_column_prefix (str): prefix used when renaming partition column which are invalid in bigquery. Defaults to bq_test_kit.constants.DEFAULT_TECHNICAL_COLUMN_PREFIX. """ self.from_ = from_ self._bq_client = bq_client self.job_config = job_config if job_config else QueryJobConfig() self.location = location if location else bqtk_config.get_default_location( ) self.project = project self.interpolators = interpolators if interpolators else [] self.bqtk_config = bqtk_config self.global_dict = global_dict if global_dict else {} self.temp_tables = ([ self._to_temp_tables_with_schema_field(temp_table) for temp_table in temp_tables ] if temp_tables else []) self.temp_technical_column_prefix = temp_technical_column_prefix
def test_write_query_result_write_disposition_append(self): # type: () -> None # You can write into destination_table with WRITE_APPEND self.bq_client.create_table(self.destination_table) self.write_to_table_with_query('WRITE_APPEND') # And you can do it again self.write_to_table_with_query('WRITE_APPEND') self.assertRowsExpected( self.bq_client.query( 'SELECT * FROM `my_project.my_dataset.destination_table`', QueryJobConfig()), [[1, 2.5], [3, 4.25], [1, 2.5], [3, 4.25]])
def test_write_query_result_write_disposition_empty(self): # type: () -> None # You can write into destination_table with WRITE_EMPTY because it's empty # Note we do not create the table first; write_empty creates the table. self.write_to_table_with_query('WRITE_EMPTY') # ... but you can't do that again, because now it's not with self.assertRaisesRegexp(ValueError, 'trying to overwrite nonempty table'): self.write_to_table_with_query('WRITE_EMPTY') self.assertRowsExpected( self.bq_client.query('SELECT * FROM `my_project.my_dataset.destination_table`', QueryJobConfig()), [[1, 2.5], [3, 4.25]])
def test_write_query_result_write_disposition_truncate(self): # type: () -> None self.bq_client.create_table(self.destination_table) # Stick a row into destination table self.assertFalse(self.bq_client.insert_rows(self.destination_table, [{'a': 5, 'b': 6}])) # Overwrite destination_table with the data from source_table with WRITE_TRUNCATE self.write_to_table_with_query('WRITE_TRUNCATE') self.assertRowsExpected( self.bq_client.query('SELECT * FROM `my_project.my_dataset.destination_table`', QueryJobConfig()), [[1, 2.5], [3, 4.25]])
def materialize_view( # pylint: disable=too-many-arguments, too-many-locals client: bigquery.Client, source_view_name: str, destination_table_name: str, project: str, source_dataset: str, destination_dataset: str, ) -> QueryJob: query = get_select_all_from_query(source_view_name, project=project, dataset=source_dataset) LOGGER.info("materializing view: %s.%s -> %s.%s", source_dataset, source_view_name, destination_dataset, destination_table_name) LOGGER.debug("materialize_view: %s=%s", destination_table_name, [query]) start = time.perf_counter() dataset_ref = client.dataset(destination_dataset) destination_table_ref = dataset_ref.table(destination_table_name) job_config = QueryJobConfig() job_config.destination = destination_table_ref job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE query_job = client.query(query, job_config=job_config) # getting the result will make sure that the query ran successfully result: bigquery.table.RowIterator = query_job.result() duration = time.perf_counter() - start total_bytes_processed = query_job.total_bytes_processed LOGGER.info( 'materialized view: %s.%s, total rows: %s, %s bytes processed, took: %.3fs', source_dataset, source_view_name, result.total_rows, total_bytes_processed, duration) if LOGGER.isEnabledFor(logging.DEBUG): sample_result = list(islice(result, 3)) LOGGER.debug("sample_result: %s", sample_result) return MaterializeViewResult(total_bytes_processed=total_bytes_processed, total_rows=result.total_rows)
def get_query_results(self, query, use_legacy_sql=False, max_wait_secs=None): # type: (str, Optional[Bool], Optional[int]) -> List[Tuple[Any]] """Returns a list or rows, each of which is a tuple of values. Args: query: A string with a complete SQL query. use_legacy_sql: Whether to use legacy SQL max_wait_secs: The maximum number of seconds to wait for the query to complete. If not set, the class default will be used. Returns: A list of tuples of values. """ config = QueryJobConfig() if self.maximum_billing_tier: config.maximum_billing_tier = self.maximum_billing_tier config.use_legacy_sql = use_legacy_sql query_job = self.gclient.query(query, job_config=config, retry=self.default_retry) rows = query_job.result(retry=self.default_retry, timeout=max_wait_secs or self.max_wait_secs) return [x.values() for x in list(rows)]
def bq_to_df(query, spark_session=None): import time temp_table_name = f"bq_to_df__{str(int(time.time()))}" temp_dataset = "temp_1d" jc = QueryJobConfig( create_disposition="CREATE_IF_NEEDED", write_disposition="WRITE_TRUNCATE", destination=f"sktaic-datahub.{temp_dataset}.{temp_table_name}", ) bq_client = get_bigquery_client() job = bq_client.query(query, job_config=jc) job.result() return _bq_table_to_df(temp_dataset, temp_table_name, "*", spark_session=spark_session)
def explore_visits_by_hour(context): query_job_config = QueryJobConfig( destination='%s.aggregations.explore_visits_per_hour' % PROJECT_ID, create_disposition='CREATE_IF_NEEDED', write_disposition='WRITE_TRUNCATE', ) sql = ''' SELECT FORMAT_DATETIME("%F %H:00:00", DATETIME(TIMESTAMP_SECONDS(CAST(timestamp AS INT64)))) AS ts, COUNT(1) AS num_visits FROM events.events WHERE url = '/explore' GROUP BY ts ORDER BY ts ASC ''' context.resources.bigquery.query(sql, job_config=query_job_config)
def create_table_from_query( self, query, # type: str table_path, # type: str write_disposition='WRITE_EMPTY', # type: Optional[str] use_legacy_sql=False, # type: Optional[bool] max_wait_secs=None, # type: Optional[int] expected_schema=None # type: Optional[List[SchemaField]] ): # type: (...) -> None """Creates a table in BigQuery from a specified query. Args: query: The query to run. table_path: The path to the table (in the client's project) to write the results to. write_disposition: Specifies behavior if table already exists. See options here: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs under configuration.query.writeDisposition use_legacy_sql: Whether the query is written in standard or legacy sql. max_wait_secs: Seconds to wait for the query before timing out. If not set, the class default will be used. expected_schema: The expected schema of the resulting table; unused in this implementation """ if write_disposition not in [ 'WRITE_TRUNCATE', 'WRITE_APPEND', 'WRITE_EMPTY' ]: raise ValueError( 'write_disposition must be one of WRITE_TRUNCATE, ' 'WRITE_APPEND, or WRITE_EMPTY') config = QueryJobConfig() if self.maximum_billing_tier: config.maximum_billing_tier = self.maximum_billing_tier config.use_legacy_sql = use_legacy_sql config.write_disposition = write_disposition config.allow_large_results = True config.destination = self.get_table_reference_from_path(table_path) query_job = self._run_async_query(query, job_config=config) return self._wait_for_job(query_job, query, max_wait_secs=max_wait_secs or self.max_wait_secs)
def do_extract(fq_pet_vet_dataset, max_tables, fq_cohort_sample_names, query_project, fq_temp_table_dataset, fq_destination_dataset, destination_table, min_variant_samples, fq_sample_mapping_table): try: global client client = bigquery.Client( project=query_project, default_query_job_config=QueryJobConfig( labels={"id": f"test_cohort_export_{output_table_prefix}"}, priority="INTERACTIVE", use_query_cache=False)) ## TODO -- provide a cmdline arg to override this (so we can simulat smaller datasets) global PET_VET_TABLE_COUNT PET_VET_TABLE_COUNT = max_tables print( f"Using {PET_VET_TABLE_COUNT} PET tables in {fq_pet_vet_dataset}..." ) cohort = get_all_samples(fq_cohort_sample_names, fq_sample_mapping_table) print( f"Discovered {len(cohort)} samples in {fq_cohort_sample_names}...") make_new_vet_union_all(fq_pet_vet_dataset, fq_temp_table_dataset, cohort) create_position_table(fq_temp_table_dataset, min_variant_samples) make_new_pet_union_all(fq_pet_vet_dataset, fq_temp_table_dataset, cohort) populate_final_extract_table(fq_temp_table_dataset, fq_destination_dataset, destination_table, fq_sample_mapping_table) except Exception as err: print(err) dump_job_stats() print( f"\nFinal cohort extract written to {fq_destination_dataset}.{destination_table}\n" )
def _execute(self): client = self._get_client() job_id = self._get_job_id(with_unique_suffix=True) destination_table = self._get_full_table_name() query = self._params['query'].strip() location = self._params['bq_dataset_location'] try: job = client.get_job(job_id) except exceptions.NotFound: if self._params['overwrite']: write_disposition = 'WRITE_TRUNCATE' else: write_disposition = 'WRITE_APPEND' job_config = QueryJobConfig(destination=destination_table, write_disposition=write_disposition) job = client.query(query=query, job_id=job_id, location=location, job_config=job_config) self._wait(job)