def export_table(table_name: str, cloud_sql_to_bq_config: CloudSqlToBQConfig) -> bool: """Export a Cloud SQL table to a CSV file on GCS. Given a table name and export_query, retrieve the export URI from cloud_sql_to_bq_config, then execute the export operation and wait until it completes. Args: table_name: Table to export. cloud_sql_to_bq_config: The export config class for the table's SchemaType. Returns: True if operation succeeded without errors, False if not. """ schema_type = cloud_sql_to_bq_config.schema_type export_query = cloud_sql_to_bq_config.get_table_export_query(table_name) export_uri = cloud_sql_to_bq_config.get_gcs_export_uri_for_table(table_name) export_context = create_export_context(schema_type, export_uri, export_query) project_id = metadata.project_id() instance_id = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id(schema_type) export_request = ( sqladmin_client() .instances() .export(project=project_id, instance=instance_id, body=export_context) ) logging.info("GCS URI [%s] in project [%s]", export_uri, project_id) logging.info("Starting export: [%s]", str(export_request.to_json())) try: response = export_request.execute() except googleapiclient.errors.HttpError: logging.exception("Failed to export table [%s]", table_name) return False # We need to block until the operation is done because # the Cloud SQL API only supports one operation at a time. operation_id = response["name"] logging.info( "Waiting for export operation [%s] to complete for table [%s] " "in database [%s] in project [%s]", operation_id, table_name, instance_id, project_id, ) operation_success = wait_until_operation_finished(operation_id) return operation_success
def load_table_from_gcs_and_wait( big_query_client: BigQueryClient, table_name: str, cloud_sql_to_bq_config: CloudSqlToBQConfig, destination_table_id: str, ) -> None: """Loads a table from CSV data in GCS to BigQuery. Given a table name and a destination_table_id, retrieve the export URI and schema from cloud_sql_to_bq_config, then load the table into the destination_table_id. This starts the job, but does not wait until it completes. Tables are created if they do not exist, and overwritten if they do exist. Because we are using bigquery.WriteDisposition.WRITE_TRUNCATE, the table's data will be completely wiped and overwritten with the contents of the CSV. Args: big_query_client: A BigQueryClient. table_name: Table to import. Table must be defined in the base schema. cloud_sql_to_bq_config: Export config class for a specific SchemaType. destination_table_id: Optional destination table name. If none is given, the provided table name is used. Returns: If the table load succeeds, returns None. If it fails it raises a ValueError. """ uri = cloud_sql_to_bq_config.get_gcs_export_uri_for_table(table_name) logging.info("GCS URI [%s] in project [%s]", uri, metadata.project_id()) bq_schema = cloud_sql_to_bq_config.get_bq_schema_for_table(table_name) dataset_ref = cloud_sql_to_bq_config.get_dataset_ref(big_query_client) load_job = big_query_client.load_table_from_cloud_storage_async( source_uri=uri, destination_dataset_ref=dataset_ref, destination_table_id=destination_table_id, destination_table_schema=bq_schema, ) table_load_success = wait_for_table_load(big_query_client, load_job) if not table_load_success: raise ValueError( f"Copy from cloud storage to temp table failed. Skipping refresh for BQ table [{table_name}]" )