Beispiel #1
0
def main(config, max_backup_age=0, max_backup_count=0):
    backups_to_purge = list()
    monitoring = Monitoring(config=config.monitoring)

    try:
        logging.info('Starting purge')
        storage = Storage(config=config.storage)
        # Get all backups for the local node
        logging.info('Listing backups for {}'.format(config.storage.fqdn))
        backup_index = storage.list_backup_index_blobs()
        backups = list(storage.list_node_backups(fqdn=config.storage.fqdn, backup_index_blobs=backup_index))
        # list all backups to purge based on date conditions
        backups_to_purge += backups_to_purge_by_age(backups, max_backup_age)
        # list all backups to purge based on count conditions
        backups_to_purge += backups_to_purge_by_count(backups, max_backup_count)
        # purge all candidate backups
        purge_backups(storage, backups_to_purge)

        logging.debug('Emitting metrics')
        tags = ['medusa-node-backup', 'purge-error', 'PURGE-ERROR']
        monitoring.send(tags, 0)
    except Exception as e:
        traceback.print_exc()
        tags = ['medusa-node-backup', 'purge-error', 'PURGE-ERROR']
        monitoring.send(tags, 1)
        logging.error('This error happened during the purge: {}'.format(str(e)))
        sys.exit(1)
Beispiel #2
0
def report_latest(config, push_metrics):
    MAX_RETRIES = 3
    SLEEP_TIME = 15
    retry = 0

    monitoring = Monitoring(config=config.monitoring)

    for retry in range(MAX_RETRIES):
        try:
            logging.debug('Trying to report about existing backups ({}/{})...'.format(
                retry + 1,
                MAX_RETRIES
            ))
            storage = Storage(config=config.storage)
            fqdn = config.storage.fqdn
            backup_index = storage.list_backup_index_blobs()
            check_node_backup(config, storage, fqdn, push_metrics, monitoring)
            check_complete_cluster_backup(storage, push_metrics, monitoring, backup_index)
            check_latest_cluster_backup(storage, push_metrics, monitoring, backup_index)
            break
        except Exception as e:
            if (retry + 1) < MAX_RETRIES:
                logging.debug('Report attempt {} failed, waiting {} seconds to retry'.format(
                    retry + 1,
                    SLEEP_TIME
                ))
                time.sleep(SLEEP_TIME)
                continue
            else:
                logging.error('This error happened during the check: {}'.format(e), exc_info=True)
                if push_metrics:
                    # Set latest known complete backup to ~ 10 years ago to attract the attention
                    # of the operator on the broken monitoring.
                    logging.info("Sending a big value to 'seconds-since-backup' metric to trigger alerts.")
                    long_time_flag_value = 315365400
                    tags = ['medusa-cluster-backup', 'seconds-since-backup', 'TRACKING-ERROR']
                    monitoring.send(tags, long_time_flag_value)