Example #1
0
def load_table_from_gcs_and_wait(
    big_query_client: BigQueryClient,
    table_name: str,
    cloud_sql_to_bq_config: CloudSqlToBQConfig,
    destination_table_id: str,
) -> None:
    """Loads a table from CSV data in GCS to BigQuery.

    Given a table name and a destination_table_id, retrieve the export URI and schema from cloud_sql_to_bq_config,
    then load the table into the destination_table_id.

    This starts the job, but does not wait until it completes.

    Tables are created if they do not exist, and overwritten if they do exist.

    Because we are using bigquery.WriteDisposition.WRITE_TRUNCATE, the table's
    data will be completely wiped and overwritten with the contents of the CSV.

    Args:
        big_query_client: A BigQueryClient.
        table_name: Table to import. Table must be defined in the base schema.
        cloud_sql_to_bq_config: Export config class for a specific SchemaType.
        destination_table_id: Optional destination table name. If none is given,
        the provided table name is used.
    Returns:
        If the table load succeeds, returns None. If it fails it raises a ValueError.
    """
    uri = cloud_sql_to_bq_config.get_gcs_export_uri_for_table(table_name)

    logging.info("GCS URI [%s] in project [%s]", uri, metadata.project_id())

    bq_schema = cloud_sql_to_bq_config.get_bq_schema_for_table(table_name)
    dataset_ref = cloud_sql_to_bq_config.get_dataset_ref(big_query_client)

    load_job = big_query_client.load_table_from_cloud_storage_async(
        source_uri=uri,
        destination_dataset_ref=dataset_ref,
        destination_table_id=destination_table_id,
        destination_table_schema=bq_schema,
    )

    table_load_success = wait_for_table_load(big_query_client, load_job)

    if not table_load_success:
        raise ValueError(
            f"Copy from cloud storage to temp table failed. Skipping refresh for BQ table [{table_name}]"
        )
Example #2
0
def refresh_bq_table_from_gcs_export_synchronous(
    big_query_client: BigQueryClient,
    table_name: str,
    cloud_sql_to_bq_config: CloudSqlToBQConfig,
) -> None:
    """Loads data from Cloud SQL export and rows excluded from the SQL export from the current BQ table
    into a target BQ table. If target BQ table does not exist, it is created.

    For example:
    1. Load data from GCS to temp table and wait.
    2. Load data from stale BQ table to temp table and wait and filter for rows excluded from SQL export.
        If stale BQ table does not exist, create the table. If the temp table has schema fields missing in the
        stale BQ table, add missing fields to the BQ table query.
    3. Load data from the temp table to the final BQ table. Overwrite all the data with the temp table and add any
        missing fields to the destination table.
    4. Delete temporary table.

    Waits until each BigQuery load is completed.

    Args:
        big_query_client: A BigQueryClient.
        table_name: Table to import from temp table. Table must be defined
            in the metadata_base class for its corresponding SchemaType.
        cloud_sql_to_bq_config: The config class for the given SchemaType.
    Returns:
        If the table load succeeds, returns None. If it fails it raises a ValueError.
    """
    temp_table_name = TEMP_TABLE_NAME.format(table_name=table_name)
    # Load GCS exported CSVs to temp table
    load_table_from_gcs_and_wait(
        big_query_client,
        table_name,
        cloud_sql_to_bq_config,
        destination_table_id=temp_table_name,
    )

    # Load rows excluded from CloudSQL export to temp table if table exists.
    # If table does not exist, create BQ destination table.
    dataset_ref = cloud_sql_to_bq_config.get_dataset_ref(big_query_client)

    if big_query_client.table_exists(dataset_ref=dataset_ref,
                                     table_id=table_name):
        load_rows_excluded_from_refresh_into_temp_table_and_wait(
            big_query_client,
            table_name,
            cloud_sql_to_bq_config,
            destination_table_id=temp_table_name,
        )
    else:
        logging.info(
            "Destination table [%s.%s] does not exist! Creating table from schema.",
            cloud_sql_to_bq_config.dataset_id,
            table_name,
        )

        create_table_success = big_query_client.create_table_with_schema(
            dataset_id=cloud_sql_to_bq_config.dataset_id,
            table_id=table_name,
            schema_fields=cloud_sql_to_bq_config.get_bq_schema_for_table(
                table_name),
        )

        if not create_table_success:
            raise ValueError(
                f"Failed to create table [{table_name}. Skipping table refresh from GCS."
            )

    logging.info("Loading BQ Table [%s] from temp table [%s]", table_name,
                 temp_table_name)

    load_job = big_query_client.load_table_from_table_async(
        source_dataset_id=cloud_sql_to_bq_config.dataset_id,
        source_table_id=temp_table_name,
        destination_dataset_id=cloud_sql_to_bq_config.dataset_id,
        destination_table_id=table_name,
    )

    table_load_success = wait_for_table_load(big_query_client, load_job)

    if not table_load_success:
        raise ValueError(
            f"Failed to load BigQuery table [{table_name}] from temp table [{temp_table_name}]."
        )

    delete_temp_table_if_exists(big_query_client, temp_table_name,
                                cloud_sql_to_bq_config)