Beispiel #1
0
def export_table(table_name: str, cloud_sql_to_bq_config: CloudSqlToBQConfig) -> bool:
    """Export a Cloud SQL table to a CSV file on GCS.

    Given a table name and export_query, retrieve the export URI from
    cloud_sql_to_bq_config, then execute the export operation and wait until it
    completes.

    Args:
        table_name: Table to export.
        cloud_sql_to_bq_config: The export config class for the table's SchemaType.
    Returns:
        True if operation succeeded without errors, False if not.
    """
    schema_type = cloud_sql_to_bq_config.schema_type
    export_query = cloud_sql_to_bq_config.get_table_export_query(table_name)
    export_uri = cloud_sql_to_bq_config.get_gcs_export_uri_for_table(table_name)

    export_context = create_export_context(schema_type, export_uri, export_query)

    project_id = metadata.project_id()
    instance_id = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id(schema_type)
    export_request = (
        sqladmin_client()
        .instances()
        .export(project=project_id, instance=instance_id, body=export_context)
    )

    logging.info("GCS URI [%s] in project [%s]", export_uri, project_id)
    logging.info("Starting export: [%s]", str(export_request.to_json()))

    try:
        response = export_request.execute()
    except googleapiclient.errors.HttpError:
        logging.exception("Failed to export table [%s]", table_name)
        return False

    # We need to block until the operation is done because
    # the Cloud SQL API only supports one operation at a time.
    operation_id = response["name"]
    logging.info(
        "Waiting for export operation [%s] to complete for table [%s] "
        "in database [%s] in project [%s]",
        operation_id,
        table_name,
        instance_id,
        project_id,
    )
    operation_success = wait_until_operation_finished(operation_id)

    return operation_success
Beispiel #2
0
def load_table_from_gcs_and_wait(
    big_query_client: BigQueryClient,
    table_name: str,
    cloud_sql_to_bq_config: CloudSqlToBQConfig,
    destination_table_id: str,
) -> None:
    """Loads a table from CSV data in GCS to BigQuery.

    Given a table name and a destination_table_id, retrieve the export URI and schema from cloud_sql_to_bq_config,
    then load the table into the destination_table_id.

    This starts the job, but does not wait until it completes.

    Tables are created if they do not exist, and overwritten if they do exist.

    Because we are using bigquery.WriteDisposition.WRITE_TRUNCATE, the table's
    data will be completely wiped and overwritten with the contents of the CSV.

    Args:
        big_query_client: A BigQueryClient.
        table_name: Table to import. Table must be defined in the base schema.
        cloud_sql_to_bq_config: Export config class for a specific SchemaType.
        destination_table_id: Optional destination table name. If none is given,
        the provided table name is used.
    Returns:
        If the table load succeeds, returns None. If it fails it raises a ValueError.
    """
    uri = cloud_sql_to_bq_config.get_gcs_export_uri_for_table(table_name)

    logging.info("GCS URI [%s] in project [%s]", uri, metadata.project_id())

    bq_schema = cloud_sql_to_bq_config.get_bq_schema_for_table(table_name)
    dataset_ref = cloud_sql_to_bq_config.get_dataset_ref(big_query_client)

    load_job = big_query_client.load_table_from_cloud_storage_async(
        source_uri=uri,
        destination_dataset_ref=dataset_ref,
        destination_table_id=destination_table_id,
        destination_table_schema=bq_schema,
    )

    table_load_success = wait_for_table_load(big_query_client, load_job)

    if not table_load_success:
        raise ValueError(
            f"Copy from cloud storage to temp table failed. Skipping refresh for BQ table [{table_name}]"
        )