Python BigQueryClientImpl.run_query_async Exemples

Langage de programmation: Python

Espace de nommage/Pack: recidiviz.big_query.big_query_client

Méthode/Fonction: run_query_async

Exemples au hotexamples.com: 2

Python BigQueryClientImpl.run_query_async - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de recidiviz.big_query.big_query_client.BigQueryClientImpl.run_query_async extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

BigQueryClientImpl(30)

dataset_ref_for_id(22)

list_tables(12)

create_dataset_if_necessary(12)

table_exists(11)

create_table_from_query_async(8)

materialize_view_to_table(8)

delete_table(8)

get_table(7)

dataset_exists(6)

create_or_update_view(6)

delete_from_table_async(6)

export_query_results_to_cloud_storage(6)

insert_into_table_from_query(5)

delete_dataset(5)

create_table_with_schema(5)

load_table_from_cloud_storage_async(4)

add_missing_fields_to_schema(4)

insert_into_table_from_table_async(4)

export_table_to_cloud_storage_async(3)

insert_into_table_from_cloud_storage_async(3)

_get_excess_schema_fields(3)

get_dataset(2)

list_datasets(2)

copy_view(2)

paged_read_and_process(2)

run_query_async(2)

update_schema(2)

insert_into_table(1)

copy_dataset_tables_across_regions(1)

remove_unused_fields_from_schema(1)

backup_dataset_tables_if_dataset_exists(1)

update_datasets_to_match_reference_schema(1)

Méthodes fréquemment utilisées

BigQueryClientImpl (30)

dataset_ref_for_id (22)

list_tables (12)

create_dataset_if_necessary (12)

table_exists (11)

create_table_from_query_async (8)

materialize_view_to_table (8)

delete_table (8)

get_table (7)

dataset_exists (6)

Méthodes fréquemment utilisées

create_or_update_view (6)

delete_from_table_async (6)

export_query_results_to_cloud_storage (6)

insert_into_table_from_query (5)

delete_dataset (5)

create_table_with_schema (5)

load_table_from_cloud_storage_async (4)

add_missing_fields_to_schema (4)

insert_into_table_from_table_async (4)

export_table_to_cloud_storage_async (3)

insert_into_table_from_cloud_storage_async (3)

_get_excess_schema_fields (3)

get_dataset (2)

list_datasets (2)

copy_view (2)

paged_read_and_process (2)

run_query_async (2)

update_schema (2)

insert_into_table (1)

copy_dataset_tables_across_regions (1)

Méthodes fréquemment utilisées

insert_into_table_from_cloud_storage_async (3)

_get_excess_schema_fields (3)

get_dataset (2)

list_datasets (2)

copy_view (2)

paged_read_and_process (2)

run_query_async (2)

update_schema (2)

insert_into_table (1)

copy_dataset_tables_across_regions (1)

remove_unused_fields_from_schema (1)

backup_dataset_tables_if_dataset_exists (1)

update_datasets_to_match_reference_schema (1)

Méthodes fréquemment utilisées

remove_unused_fields_from_schema (1)

backup_dataset_tables_if_dataset_exists (1)

update_datasets_to_match_reference_schema (1)

Exemple #1

0

Afficher le fichier

Fichier : backport_raw_config_columns.py Projet : Recidiviz/pulse-data

def _get_columns_by_file( state_code: str, project_id: str) -> Dict[str, List[RawTableColumnInfo]]: """Creates a list of RawTableColumnInfo for each raw file in a given state""" columns_by_file: Dict[str, List[RawTableColumnInfo]] = {} raw_data_dataset = f"{state_code.lower()}_raw_data" query_string = f""" SELECT * EXCEPT(is_generated, generation_expression, is_stored, is_updatable) FROM `{project_id}.{raw_data_dataset}.INFORMATION_SCHEMA.COLUMNS` ORDER BY table_name ASC, ordinal_position ASC """ bq_client = BigQueryClientImpl() query_job = bq_client.run_query_async(query_string) for row in query_job: column_name = row["column_name"] if column_name in {"file_id", "update_datetime"}: continue file_name = row["table_name"] is_datetime = row["data_type"].upper() == "DATETIME" if file_name not in columns_by_file: columns_by_file[file_name] = [] column_info = RawTableColumnInfo(name=column_name, is_datetime=is_datetime, description="TKTK") columns_by_file[file_name].append(column_info) return columns_by_file

Exemple #2

0

Afficher le fichier

def compare_raw_data_between_projects( region_code: str, source_project_id: str = environment.GCP_PROJECT_STAGING, comparison_project_id: str = environment.GCP_PROJECT_PRODUCTION, ) -> List[str]: """Compares the raw data between staging and production for a given region.""" logging.info( "**** Ensuring all raw data for [%s] in [%s] also exists in [%s] ****", region_code.upper(), source_project_id, comparison_project_id, ) raw_file_config = DirectIngestRegionRawFileConfig(region_code) bq_client = BigQueryClientImpl(project_id=source_project_id) dataset_id = DirectIngestRawFileImportManager.raw_tables_dataset_for_region( region_code) source_dataset = bq_client.dataset_ref_for_id(dataset_id) query_jobs: Dict[str, bigquery.QueryJob] = {} for file_tag, file_config in raw_file_config.raw_file_configs.items(): if (not bq_client.table_exists(source_dataset, file_tag) or file_config.is_undocumented or not file_config.primary_key_cols): continue columns = ", ".join( [column.name for column in file_config.available_columns]) query_job = bq_client.run_query_async( query_str=COMPARISON_TEMPLATE.format( source_project_id=source_project_id, comparison_project_id=comparison_project_id, raw_data_dataset_id=dataset_id, raw_data_table_id=file_tag, columns=columns, )) query_jobs[file_tag] = query_job table_column_width = min( max(len(tag) for tag in raw_file_config.raw_file_configs), 30) failed_tables: List[str] = [] for file_tag in sorted(raw_file_config.raw_file_tags): justified_name = file_tag.ljust(table_column_width) if file_tag not in query_jobs: # This file did not exist in the project that is the source of truth. continue query_job = query_jobs[file_tag] try: rows = query_job.result() except exceptions.NotFound: logging.warning( "%s | Missing table %s.%s.%s", justified_name, comparison_project_id, dataset_id, file_tag, ) failed_tables.append(file_tag) continue counts: List[Tuple[datetime.datetime, int]] = [row.values() for row in rows] if counts: logging.warning( "%s | Missing data in the %s table", justified_name, comparison_project_id, ) for update_datetime, num_missing in counts: logging.warning("\t%ss: %d", update_datetime.isoformat(), num_missing) failed_tables.append(file_tag) else: logging.info( "%s | %s contains all of the data from %s", justified_name, comparison_project_id, source_project_id, ) return failed_tables