Python BigQueryClientImpl.run_query_asyncの例

プログラミング言語: Python

名前空間/パッケージ名: recidiviz.big_query.big_query_client

クラス/型: BigQueryClientImpl

メソッド/関数: run_query_async

hotexamples.comのコード掲載数: 2

Python BigQueryClientImpl.run_query_async - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのrecidiviz.big_query.big_query_client.BigQueryClientImpl.run_query_asyncの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

BigQueryClientImpl(30)

dataset_ref_for_id(22)

list_tables(12)

create_dataset_if_necessary(12)

table_exists(11)

create_table_from_query_async(8)

materialize_view_to_table(8)

delete_table(8)

get_table(7)

dataset_exists(6)

create_or_update_view(6)

delete_from_table_async(6)

export_query_results_to_cloud_storage(6)

insert_into_table_from_query(5)

delete_dataset(5)

create_table_with_schema(5)

load_table_from_cloud_storage_async(4)

add_missing_fields_to_schema(4)

insert_into_table_from_table_async(4)

export_table_to_cloud_storage_async(3)

insert_into_table_from_cloud_storage_async(3)

_get_excess_schema_fields(3)

get_dataset(2)

list_datasets(2)

copy_view(2)

paged_read_and_process(2)

run_query_async(2)

update_schema(2)

insert_into_table(1)

copy_dataset_tables_across_regions(1)

remove_unused_fields_from_schema(1)

backup_dataset_tables_if_dataset_exists(1)

update_datasets_to_match_reference_schema(1)

コード例 #1

ファイルを表示

ファイル: backport_raw_config_columns.py プロジェクト: Recidiviz/pulse-data

def _get_columns_by_file(
        state_code: str,
        project_id: str) -> Dict[str, List[RawTableColumnInfo]]:
    """Creates a list of RawTableColumnInfo for each raw file in a given state"""
    columns_by_file: Dict[str, List[RawTableColumnInfo]] = {}

    raw_data_dataset = f"{state_code.lower()}_raw_data"

    query_string = f"""
SELECT
 * EXCEPT(is_generated, generation_expression, is_stored, is_updatable)
FROM
 `{project_id}.{raw_data_dataset}.INFORMATION_SCHEMA.COLUMNS`
ORDER BY
  table_name ASC, ordinal_position ASC
"""

    bq_client = BigQueryClientImpl()
    query_job = bq_client.run_query_async(query_string)
    for row in query_job:
        column_name = row["column_name"]
        if column_name in {"file_id", "update_datetime"}:
            continue

        file_name = row["table_name"]
        is_datetime = row["data_type"].upper() == "DATETIME"

        if file_name not in columns_by_file:
            columns_by_file[file_name] = []

        column_info = RawTableColumnInfo(name=column_name,
                                         is_datetime=is_datetime,
                                         description="TKTK")
        columns_by_file[file_name].append(column_info)

    return columns_by_file

コード例 #2

ファイルを表示

def compare_raw_data_between_projects(
    region_code: str,
    source_project_id: str = environment.GCP_PROJECT_STAGING,
    comparison_project_id: str = environment.GCP_PROJECT_PRODUCTION,
) -> List[str]:
    """Compares the raw data between staging and production for a given region."""
    logging.info(
        "**** Ensuring all raw data for [%s] in [%s] also exists in [%s] ****",
        region_code.upper(),
        source_project_id,
        comparison_project_id,
    )

    raw_file_config = DirectIngestRegionRawFileConfig(region_code)

    bq_client = BigQueryClientImpl(project_id=source_project_id)
    dataset_id = DirectIngestRawFileImportManager.raw_tables_dataset_for_region(
        region_code)
    source_dataset = bq_client.dataset_ref_for_id(dataset_id)

    query_jobs: Dict[str, bigquery.QueryJob] = {}
    for file_tag, file_config in raw_file_config.raw_file_configs.items():
        if (not bq_client.table_exists(source_dataset, file_tag)
                or file_config.is_undocumented
                or not file_config.primary_key_cols):
            continue

        columns = ", ".join(
            [column.name for column in file_config.available_columns])

        query_job = bq_client.run_query_async(
            query_str=COMPARISON_TEMPLATE.format(
                source_project_id=source_project_id,
                comparison_project_id=comparison_project_id,
                raw_data_dataset_id=dataset_id,
                raw_data_table_id=file_tag,
                columns=columns,
            ))
        query_jobs[file_tag] = query_job

    table_column_width = min(
        max(len(tag) for tag in raw_file_config.raw_file_configs), 30)

    failed_tables: List[str] = []
    for file_tag in sorted(raw_file_config.raw_file_tags):
        justified_name = file_tag.ljust(table_column_width)

        if file_tag not in query_jobs:
            # This file did not exist in the project that is the source of truth.
            continue

        query_job = query_jobs[file_tag]
        try:
            rows = query_job.result()
        except exceptions.NotFound:
            logging.warning(
                "%s | Missing table %s.%s.%s",
                justified_name,
                comparison_project_id,
                dataset_id,
                file_tag,
            )
            failed_tables.append(file_tag)
            continue

        counts: List[Tuple[datetime.datetime,
                           int]] = [row.values() for row in rows]

        if counts:
            logging.warning(
                "%s | Missing data in the %s table",
                justified_name,
                comparison_project_id,
            )
            for update_datetime, num_missing in counts:
                logging.warning("\t%ss: %d", update_datetime.isoformat(),
                                num_missing)
            failed_tables.append(file_tag)
        else:
            logging.info(
                "%s | %s contains all of the data from %s",
                justified_name,
                comparison_project_id,
                source_project_id,
            )

    return failed_tables