Beispiel #1
0
def export_table(table_name: str, cloud_sql_to_bq_config: CloudSqlToBQConfig) -> bool:
    """Export a Cloud SQL table to a CSV file on GCS.

    Given a table name and export_query, retrieve the export URI from
    cloud_sql_to_bq_config, then execute the export operation and wait until it
    completes.

    Args:
        table_name: Table to export.
        cloud_sql_to_bq_config: The export config class for the table's SchemaType.
    Returns:
        True if operation succeeded without errors, False if not.
    """
    schema_type = cloud_sql_to_bq_config.schema_type
    export_query = cloud_sql_to_bq_config.get_table_export_query(table_name)
    export_uri = cloud_sql_to_bq_config.get_gcs_export_uri_for_table(table_name)

    export_context = create_export_context(schema_type, export_uri, export_query)

    project_id = metadata.project_id()
    instance_id = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id(schema_type)
    export_request = (
        sqladmin_client()
        .instances()
        .export(project=project_id, instance=instance_id, body=export_context)
    )

    logging.info("GCS URI [%s] in project [%s]", export_uri, project_id)
    logging.info("Starting export: [%s]", str(export_request.to_json()))

    try:
        response = export_request.execute()
    except googleapiclient.errors.HttpError:
        logging.exception("Failed to export table [%s]", table_name)
        return False

    # We need to block until the operation is done because
    # the Cloud SQL API only supports one operation at a time.
    operation_id = response["name"]
    logging.info(
        "Waiting for export operation [%s] to complete for table [%s] "
        "in database [%s] in project [%s]",
        operation_id,
        table_name,
        instance_id,
        project_id,
    )
    operation_success = wait_until_operation_finished(operation_id)

    return operation_success
def _throw_if_error(project_id: str, operation_id: str,
                    operation_type: str) -> None:
    operation = sqladmin_client().operations().get(
        project=project_id, operation=operation_id).execute()

    if 'error' in operation:
        errors = operation['error'].get('errors', [])
        error_messages = [
            'code: {}\n message: {}'.format(error['code'], error['message'])
            for error in errors
        ]
        raise RuntimeError('Backup {} operation finished with '
                           '{} errors:\n{}'.format(operation_type,
                                                   str(len(errors)),
                                                   '\n'.join(error_messages)))
Beispiel #3
0
def _throw_if_error(project_id: str, operation_id: str,
                    operation_type: str) -> None:
    operation = (sqladmin_client().operations().get(
        project=project_id, operation=operation_id).execute())

    if "error" in operation:
        errors = operation["error"].get("errors", [])
        error_messages = [
            "code: {}\n message: {}".format(error["code"], error["message"])
            for error in errors
        ]
        raise RuntimeError("Backup {} operation finished with "
                           "{} errors:\n{}".format(operation_type,
                                                   str(len(errors)),
                                                   "\n".join(error_messages)))
def _await_operation(project_id: str, operation_id: str) -> None:
    done = False
    while True:
        if done:
            break

        operation = sqladmin_client().operations().get(
            project=project_id, operation=operation_id).execute()
        current_status = operation['status']

        if current_status in {'PENDING', 'RUNNING', 'UNKNOWN'}:
            time.sleep(_SECONDS_BETWEEN_OPERATION_STATUS_CHECKS)
        elif current_status == 'DONE':
            done = True
        else:
            raise RuntimeError(
                'Unrecognized operation status: {}'.format(current_status))
Beispiel #5
0
def _await_operation(project_id: str, operation_id: str) -> None:
    done = False
    while True:
        if done:
            break

        operation = (sqladmin_client().operations().get(
            project=project_id, operation=operation_id).execute())
        current_status = operation["status"]

        if current_status in {"PENDING", "RUNNING", "UNKNOWN"}:
            time.sleep(_SECONDS_BETWEEN_OPERATION_STATUS_CHECKS)
        elif current_status == "DONE":
            done = True
        else:
            raise RuntimeError(
                "Unrecognized operation status: {}".format(current_status))
def export_table(schema_type: SchemaType, table_name: str, export_query: str) \
        -> bool:
    """Export a Cloud SQL table to a CSV file on GCS.

    Given a table name and export_query, retrieve the export URI from
    export_config, then execute the export operation and wait until it
    completes.

    Args:
        schema_type: The schema, either SchemaType.JAILS or
            SchemaType.STATE, where this table lives.
        table_name: Table to export.
        export_query: Corresponding query for the table.
    Returns:
        True if operation succeeded without errors, False if not.
    """

    export_uri = export_config.gcs_export_uri(table_name)
    export_context = create_export_context(schema_type, export_uri,
                                           export_query)

    project_id = metadata.project_id()
    instance_id = \
        SQLAlchemyEngineManager.get_stripped_cloudql_instance_id(schema_type)
    export_request = sqladmin_client().instances().export(project=project_id,
                                                          instance=instance_id,
                                                          body=export_context)

    logging.info("Starting export: [%s]", str(export_request.to_json()))
    try:
        response = export_request.execute()
    except googleapiclient.errors.HttpError:
        logging.exception("Failed to export table [%s]", table_name)
        return False

    # We need to block until the operation is done because
    # the Cloud SQL API only supports one operation at a time.
    operation_id = response['name']
    logging.info(
        "Waiting for export operation [%s] to complete for table [%s] "
        "in database [%s] in project [%s]", operation_id, table_name,
        instance_id, project_id)
    operation_success = wait_until_operation_finished(operation_id)

    return operation_success
Beispiel #7
0
def wait_until_operation_finished(operation_id: str) -> bool:
    """Monitor a Cloud SQL operation's progress and wait until it completes.

    We must wait until completion becuase only one Cloud SQL operation can run
    at a time.

    Args:
        operation_id: Cloud SQL Operation ID.
    Returns:
        True if operation succeeded without errors, False if not.

    See here for details:
    https://cloud.google.com/sql/docs/postgres/admin-api/v1beta4/operations/get
    """
    operation_in_progress = True
    operation_success = False

    while operation_in_progress:
        get_operation = sqladmin_client().operations().get(
            project=metadata.project_id(), operation=operation_id)
        operation = get_operation.execute()
        operation_status = operation['status']

        if operation_status in {'PENDING', 'RUNNING', 'UNKNOWN'}:
            time.sleep(SECONDS_BETWEEN_OPERATION_STATUS_CHECKS)
        elif operation_status == 'DONE':
            operation_in_progress = False

        logging.debug("Operation [%s] status: [%s]",
                      operation_id, operation_status)

    if 'error' in operation:
        errors = operation['error'].get('errors', [])
        for error in errors:
            logging.error(
                "Operation %s finished with error: %s, %s\n%s",
                operation_id,
                error.get('kind'),
                error.get('code'),
                error.get('message'))
    else:
        logging.info("Operation [%s] succeeded.", operation_id)
        operation_success = True

    return operation_success
Beispiel #8
0
def get_operation_with_retries(operation_id: str) -> Dict[str, Any]:
    num_retries = 3
    while num_retries > 0:
        # We need to guard here for possible 404 HttpErrors if the operation hasn't started yet
        try:
            get_operation = sqladmin_client().operations().get(
                project=metadata.project_id(), operation=operation_id)
            return get_operation.execute()
        except googleapiclient.errors.HttpError as error:
            # If we get a 404 HttpError, wait a few seconds and then retry getting the operation instance.
            if error.resp.status == HTTPStatus.NOT_FOUND and num_retries > 0:
                logging.debug(
                    "HttpError when requesting operation_id [%s]. Retrying request: %s",
                    operation_id, num_retries)
                time.sleep(SECONDS_BETWEEN_OPERATION_STATUS_CHECKS)
                num_retries -= 1
            else:
                raise
    raise ValueError("Operation not set, request for the operation failed.")
def update_long_term_backups_for_cloudsql_instance(project_id: str,
                                                   instance_id: str) -> None:
    """Create a new manual backup for the given sqlalchemy instance
    and delete manual backups for that instance that are older than
    _MAX_BACKUP_AGE_DAYS.
    """

    logging.info('Creating request for backup insert operation on [%s]',
                 instance_id)
    insert_request = sqladmin_client().backupRuns().insert(
        project=project_id, instance=instance_id, body={})

    logging.info('Beginning backup insert operation on [%s]', instance_id)
    insert_operation = insert_request.execute()
    _await_operation(project_id, insert_operation['name'])
    _throw_if_error(project_id, insert_operation['name'], 'insert')
    logging.info('Backup insert operation on [%s] completed', instance_id)

    logging.info('Creating request for backup list operation on [%s]',
                 instance_id)
    list_request = sqladmin_client().backupRuns().list(project=project_id,
                                                       instance=instance_id)

    logging.info('Beginning backup list request')
    list_result = list_request.execute()
    backup_runs = list_result['items']
    manual_backup_runs = [
        backup_run for backup_run in backup_runs
        if backup_run['type'] == 'ON_DEMAND'
    ]
    logging.info(
        'Backup list request for [%s] completed with [%s] total backup'
        ' runs and [%s] manual backup runs', instance_id,
        str(len(backup_runs)), str(len(manual_backup_runs)))

    # startTime is a string with format yyyy-mm-dd, so sorting it as a
    # string will give the same result as converting it to a date and then
    # sorting by date
    manual_backup_runs.sort(key=lambda backup_run: backup_run['startTime'])

    six_months_ago_datetime = \
        datetime.datetime.utcnow() - datetime.timedelta(
            days=_MAX_BACKUP_AGE_DAYS)
    six_months_ago_date_str = six_months_ago_datetime.date().isoformat()

    for backup_run in manual_backup_runs:
        backup_start_date_str = backup_run['startTime']
        if backup_start_date_str > six_months_ago_date_str:
            break

        backup_id = backup_run['id']

        logging.info(
            'Creating request for backup delete operation for backup '
            '[%s] of [%s]', backup_id, instance_id)
        delete_request = sqladmin_client().backupRuns().delete(
            project=project_id, instance=instance_id, id=backup_id)

        logging.info(
            'Beginning backup delete operation for backup [%s] of [%s]',
            backup_id, instance_id)
        delete_operation = delete_request.execute()
        _await_operation(project_id, delete_operation['name'])
        _throw_if_error(project_id, delete_operation['name'], 'delete')
        logging.info(
            'Backup delete operation completed for backup [%s] of [%s]',
            backup_id, instance_id)
Beispiel #10
0
def update_long_term_backups_for_cloudsql_instance(project_id: str,
                                                   instance_id: str) -> None:
    """Create a new manual backup for the given sqlalchemy instance
    and delete manual backups for that instance that are older than
    _MAX_BACKUP_AGE_DAYS.
    """

    logging.info("Creating request for backup insert operation on [%s]",
                 instance_id)
    insert_request = (sqladmin_client().backupRuns().insert(
        project=project_id, instance=instance_id, body={}))

    logging.info("Beginning backup insert operation on [%s]", instance_id)
    insert_operation = insert_request.execute()
    _await_operation(project_id, insert_operation["name"])
    _throw_if_error(project_id, insert_operation["name"], "insert")
    logging.info("Backup insert operation on [%s] completed", instance_id)

    logging.info("Creating request for backup list operation on [%s]",
                 instance_id)
    list_request = (sqladmin_client().backupRuns().list(project=project_id,
                                                        instance=instance_id))

    logging.info("Beginning backup list request")
    list_result = list_request.execute()
    backup_runs = list_result["items"]
    manual_backup_runs = [
        backup_run for backup_run in backup_runs
        if backup_run["type"] == "ON_DEMAND"
    ]
    logging.info(
        "Backup list request for [%s] completed with [%s] total backup"
        " runs and [%s] manual backup runs",
        instance_id,
        str(len(backup_runs)),
        str(len(manual_backup_runs)),
    )

    # startTime is a string with format yyyy-mm-dd, so sorting it as a
    # string will give the same result as converting it to a date and then
    # sorting by date
    manual_backup_runs.sort(key=lambda backup_run: backup_run["startTime"])

    six_months_ago_datetime = datetime.datetime.now(
        tz=pytz.UTC) - datetime.timedelta(days=_MAX_BACKUP_AGE_DAYS)
    six_months_ago_date_str = six_months_ago_datetime.date().isoformat()

    for backup_run in manual_backup_runs:
        backup_start_date_str = backup_run["startTime"]
        if backup_start_date_str > six_months_ago_date_str:
            break

        backup_id = backup_run["id"]

        logging.info(
            "Creating request for backup delete operation for backup "
            "[%s] of [%s]",
            backup_id,
            instance_id,
        )
        delete_request = (sqladmin_client().backupRuns().delete(
            project=project_id, instance=instance_id, id=backup_id))

        logging.info(
            "Beginning backup delete operation for backup [%s] of [%s]",
            backup_id,
            instance_id,
        )
        delete_operation = delete_request.execute()
        _await_operation(project_id, delete_operation["name"])
        _throw_if_error(project_id, delete_operation["name"], "delete")
        logging.info(
            "Backup delete operation completed for backup [%s] of [%s]",
            backup_id,
            instance_id,
        )