def test_existing_backup(self): """ Test create existing backup. """ # Given backup_db = BackupDB(self.bucket, self.filesystem) backup_time = '20210425_201838' backup_type = 'full' s3_key = f'{self.filesystem}/{backup_time}.{backup_type}' # When backup_db.create_backup(backup_time, backup_type, s3_key) # Then self.assertRaises(ValueError, backup_db.create_backup, backup_time, backup_type, s3_key)
def test_bad_backup_type(self): """ Test create backup with bad backup type. """ # Given backup_db = BackupDB(self.bucket, self.filesystem) backup_time = '20210425_201838' backup_type = 'badtype' s3_key = f'{self.filesystem}/{backup_time}.{backup_type}' # Then self.assertRaises(ValueError, backup_db.create_backup, backup_time, backup_type, s3_key)
def test_bad_dependency(self): """ Test creating a backup with a bad dependency. """ # Given backup_db = BackupDB(self.bucket, self.filesystem) backup_time = '20210425_201838' backup_type = 'full' s3_key = f'{self.filesystem}/{backup_time}.{backup_type}' dependency = '20200425-201838' # Then self.assertRaises(ValueError, backup_db.create_backup, backup_time, backup_type, s3_key, dependency)
def test_delete_backup(self): """ Test delete backup from backup_db. """ # Given backup_db = BackupDB(self.bucket, self.filesystem) backup_time = '20210425_201838' backup_type = 'full' s3_key = f'{self.filesystem}/{backup_time}.{backup_type}' backup_db.create_backup(backup_time, backup_type, s3_key) backup_db.get_backup(backup_time) # When backup_db.delete_backup(backup_time) # Then self.assertRaises(KeyError, backup_db.get_backup, backup_time)
def test_create_backup_db(self): """ Test if backup.db file is properly uploaded/downloaded. """ # Given backup_db = BackupDB(self.bucket, self.filesystem) backup_time = '20210425_201838' backup_type = 'full' s3_key = f'{self.filesystem}/{backup_time}.{backup_type}' # When backup_db.create_backup(backup_time, backup_type, s3_key) # Then backup_db_new = BackupDB(self.bucket, self.filesystem) self.assertEqual(backup_db.get_backup(backup_time), backup_db_new.get_backup(backup_time))
class ZFSjob: """ ZFS backup job. """ @property def bucket(self): """ S3 bucket. """ return self._bucket @property def region(self): """ S3 region. """ return self._region @property def endpoint(self): """ S3 Endpoint. """ return self._endpoint @property def access_key(self): """ S3 access key. """ return self._access_key @property def secret_key(self): """ S3 secret key. """ return self._secret_key @property def filesystem(self): """ ZFS filesystem. """ return self._filesystem @property def s3(self): """ S3 resource. """ return self._s3 @property def cron(self): """ Cron schedule. """ return self._cron @property def max_snapshots(self): """ Maximum number of snapshots. """ return self._max_snapshots @property def max_backups(self): """ Maximum number of full and incremental backups. """ return self._max_backups @property def max_incremental_backups_per_full(self): """ Maximum number of incremental backups per full backup. """ return self._max_incremental_backups_per_full @property def storage_class(self): """ S3 storage class. """ return self._storage_class @property def max_multipart_parts(self): """ Maximum number of parts to use in a multipart S3 upload. """ return self._max_multipart_parts @property def backup_db(self): """ BackupDB """ return self._backup_db @property def snapshot_db(self): """ SnapshotDB """ return self._snapshot_db def __init__(self, bucket_name, access_key, secret_key, filesystem, region=None, cron=None, max_snapshots=None, max_backups=None, max_incremental_backups_per_full=None, storage_class=None, endpoint=None, max_multipart_parts=None): """ Create ZFSjob object. Parameters ---------- bucket_name : str S3 bucket name. access_key : str S3 access key. secret_key : str S3 secret key. filesystem : str ZFS filesystem. region : str, default: us-east-1 S3 region. endpoint : str, optional S3 endpoint for alternative services cron : str, optional Cron schedule. Example: `* 0 * * *` max_snapshots : int, optional Maximum number of snapshots. max_backups : int, optional Maximum number of full and incremental backups. max_incremental_backups_per_full : int, optional Maximum number of incremental backups per full backup. storage_class : str, default: STANDARD S3 storage class. max_multipart_parts : int, default: 10000 Maximum number of parts to use in a multipart S3 upload. """ self._bucket_name = bucket_name self._region = region or 'us-east-1' self._access_key = access_key self._secret_key = secret_key self._filesystem = filesystem self._endpoint = endpoint self._s3 = boto3.resource(service_name='s3', region_name=self._region, aws_access_key_id=self._access_key, aws_secret_access_key=self._secret_key, endpoint_url=endpoint) self._bucket = self._s3.Bucket(self._bucket_name) self._backup_db = BackupDB(self._bucket, self._filesystem) self._snapshot_db = SnapshotDB(self._filesystem) self._cron = cron self._max_snapshots = max_snapshots self._max_backups = max_backups self._max_incremental_backups_per_full = max_incremental_backups_per_full # noqa self._storage_class = storage_class or 'STANDARD' self._max_multipart_parts = max_multipart_parts or 10000 self._logger = logging.getLogger(__name__) if max_snapshots and not max_snapshots >= 0: self._logger.error(f'filesystem={self._filesystem} ' 'msg="max_snapshots must be greater than or ' 'equal to 0."') sys.exit(1) if max_backups and not max_backups >= 1: self._logger.error(f'filesystem={self._filesystem} ' 'msg="max_backups must be greater ' 'than or equal to 1."') sys.exit(1) if max_incremental_backups_per_full and not max_incremental_backups_per_full >= 0: # noqa self._logger.error(f'filesystem={self._filesystem} ' 'msg="max_incremental_backups_per_full must be ' 'greater than or equal to 0."') sys.exit(1) def start(self): """ Start ZFS backup job. """ self._logger.info(f'filesystem={self._filesystem} msg="Starting job."') backups_inc = self._backup_db.get_backups(backup_type='inc') backups_full = self._backup_db.get_backups(backup_type='full') # find most recent full backup backup = backups_full[-1] if backups_full else None # if no full backup exists if backup is None: self._backup_full() # if we don't want incremental backups elif self._max_incremental_backups_per_full == 0: self._backup_full() # if we want incremental backups and multiple full backups elif self._max_incremental_backups_per_full: backup_time = backup.backup_time dependants = [True if b.dependency == backup_time else False for b in backups_inc] if sum(dependants) >= self._max_incremental_backups_per_full: self._backup_full() else: self._backup_incremental(backup_time) # if we want incremental backups and not multiple full backups else: self._backup_incremental(backup.backup_time) if self._max_snapshots or self._max_snapshots == 0: self._limit_snapshots() if self._max_backups or self._max_backups == 0: self._limit_backups() self._logger.info(f'filesystem={self._filesystem} msg="Finished job."') def restore(self, backup_time=None, filesystem=None): """ Restore from backup. Defaults to most recent backup if backup_time is not specified. WARNING: If restoring to a file system that already exists, snapshots and data that were written after the backup will be destroyed. Parameters ---------- backup_time : str, optional Backup time in %Y%m%d_%H%M%S format. filesystem : str, optional File system to restore to. Defaults to the file system that the backup was taken from. """ self._snapshot_db.refresh() snapshots = self._snapshot_db.get_snapshot_names() if backup_time: backup = self._backup_db.get_backup(backup_time) else: backups = self._backup_db.get_backups() if backups is None: raise RestoreError('No backups exist.') else: backup = backups[-1] backup_time = backup.backup_time backup_type = backup.backup_type s3_key = backup.s3_key if filesystem: out = create_filesystem(filesystem) if out.returncode: raise ZFSError(out.stderr) if backup_type == 'full': if backup_time in snapshots and filesystem is None: self._logger.info(f'filesystem={self.filesystem} ' f'snapshot_name={backup_time} ' f's3_key={s3_key} ' 'msg="Snapshot already exists."') else: self._restore_snapshot(backup, filesystem) elif backup_type == 'inc': # restore full backup first backup_full = self._backup_db.get_backup(backup.dependency) if backup_full.backup_time in snapshots and filesystem is None: self._logger.info(f'filesystem={self.filesystem} ' f'snapshot_name={backup_full.backup_time} ' f's3_key={backup_full.s3_key} ' 'msg="Snapshot already exists."') else: self._restore_snapshot(backup_full, filesystem) if backup_time in snapshots and filesystem is None: self._logger.info(f'filesystem={self.filesystem} ' f'snapshot_name={backup_time} ' f's3_key={s3_key} ' 'msg="Snapshot already exists."') else: self._restore_snapshot(backup, filesystem) def _backup_full(self): """ Create snapshot and upload full backup. """ snapshot = self._snapshot_db.create_snapshot() backup_time = snapshot.name filesystem = snapshot.filesystem send_size = int(get_snapshot_send_size(filesystem, backup_time)) transfer_config = _get_transfer_config(send_size, self._max_multipart_parts) s3_key = f'{filesystem}/{backup_time}.full' self._logger.info(f'filesystem={filesystem} ' f'snapshot_name={backup_time} ' f's3_key={s3_key} ' 'msg="Starting full backup."') with open_snapshot_stream(filesystem, backup_time, 'r') as f: transfer_callback = TransferCallback(self._logger, send_size, filesystem, backup_time, s3_key) self._bucket.upload_fileobj(f.stdout, s3_key, Callback=transfer_callback.callback, Config=transfer_config, ExtraArgs={ 'StorageClass': self._storage_class }) stderr = f.stderr.read().decode('utf-8') if f.returncode: raise ZFSError(stderr) backup_size = self._check_backup(s3_key) self._backup_db.create_backup(backup_time, 'full', s3_key, dependency=None, backup_size=backup_size) self._logger.info(f'filesystem={filesystem} ' f'snapshot_name={backup_time} ' f's3_key={s3_key} ' 'msg="Finished full backup."') def _backup_incremental(self, backup_time_full): """ Create snapshot and upload incremental backup. Parameters ---------- backup_time_full : str Backup time in %Y%m%d_%H%M%S format. """ snapshot = self._snapshot_db.create_snapshot() backup_time = snapshot.name filesystem = snapshot.filesystem send_size = int(get_snapshot_send_size_inc(filesystem, backup_time_full, backup_time)) transfer_config = _get_transfer_config(send_size, self._max_multipart_parts) s3_key = f'{filesystem}/{backup_time}.inc' self._logger.info(f'filesystem={filesystem} ' f'snapshot_name={backup_time} ' f's3_key={s3_key} ' 'msg="Starting incremental backup."') with open_snapshot_stream_inc( filesystem, backup_time_full, backup_time) as f: transfer_callback = TransferCallback(self._logger, send_size, filesystem, backup_time, s3_key) self._bucket.upload_fileobj( f.stdout, s3_key, Callback=transfer_callback.callback, Config=transfer_config, ExtraArgs={ 'StorageClass': self._storage_class }) stderr = f.stderr.read().decode('utf-8') if f.returncode: raise ZFSError(stderr) backup_size = self._check_backup(s3_key) self._backup_db.create_backup(backup_time, 'inc', s3_key, backup_time_full, backup_size) self._logger.info(f'filesystem={filesystem} ' f'snapshot_name={backup_time} ' f's3_key={s3_key} ' 'msg="Finished incremental backup."') def _restore_snapshot(self, backup, filesystem=None): """ Restore snapshot from backup. Parameters ---------- backup : Backup filesystem : str, optional File system to restore to. Defaults to the file system that the backup was taken from. """ backup_time = backup.backup_time backup_size = backup.backup_size filesystem = filesystem or backup.filesystem s3_key = backup.s3_key transfer_config = TransferConfig(max_concurrency=S3_MAX_CONCURRENCY) self._logger.info(f'filesystem={filesystem} ' f'snapshot_name={backup_time} ' f's3_key={s3_key} ' 'msg="Restoring snapshot."') backup_object = self._s3.Object(self._bucket_name, s3_key) with open_snapshot_stream(filesystem, backup_time, 'w') as f: transfer_callback = TransferCallback(self._logger, backup_size, filesystem, backup_time, s3_key) try: backup_object.download_fileobj( f.stdin, Callback=transfer_callback.callback, Config=transfer_config) except BrokenPipeError: pass stderr = f.stderr.read().decode('utf-8') if f.returncode: raise ZFSError(stderr) self._snapshot_db.refresh() def _limit_snapshots(self): """ Limit number of snapshots. We only remove snapshots that were used for incremental backups. Keeping snapshots that were used for full backups allow us to restore without having to download the full backup. """ backup_times_full = self._backup_db.get_backup_times('full') results = self._snapshot_db.get_snapshots() if len(results) > self._max_snapshots: self._logger.info(f'filesystem={self._filesystem} ' 'msg="Snapshot limit achieved."') while len(results) > self._max_snapshots: snapshot = results.pop(0) backup_time = snapshot.name if backup_time not in backup_times_full: self._logger.info(f'filesystem={self._filesystem} ' f'snapshot_name={snapshot.name} ' 'msg="Deleting snapshot."') self._snapshot_db.delete_snapshot(snapshot.name) def _check_backup(self, s3_key): """ Check if S3 object exists and returns object size. Parameters ---------- s3_key : str Returns ------- int """ # load() will fail if object does not exist backup_object = self._s3.Object(self._bucket_name, s3_key) backup_object.load() if backup_object.content_length == 0: raise BackupError('Backup upload failed.') return backup_object.content_length def _delete_backup(self, backup): """ Delete backup. Parameters ---------- backup : Backup """ backup_time = backup.backup_time s3_key = backup.s3_key self._logger.info(f's3_key={s3_key} ' 'msg="Deleting backup."') backup_object = self._s3.Object(self._bucket_name, s3_key) backup_object.delete() self._backup_db.delete_backup(backup_time) def _limit_backups(self): """ Limit number of incremental and full backups. Only backups with no dependants are removed. """ backups = self._backup_db.get_backups() if len(backups) > self._max_backups: self._logger.info(f'filesystem={self._filesystem} ' 'msg="Backup limit achieved."') count = 0 while len(backups) > self._max_backups: backup = backups[count] backup_time = backup.backup_time backup_type = backup.backup_type s3_key = backup.s3_key if backup_type == "inc": self._delete_backup(backup) backups.pop(count) elif backup_type == "full": dependants = any([True if b.dependency == backup_time else False for b in backups]) if dependants: self._logger.info(f's3_key={s3_key} ' 'msg="Backup has dependants. Not ' 'deleting."') else: self._delete_backup(backup) backups.pop(count) count += 1
def __init__(self, bucket_name, access_key, secret_key, filesystem, region=None, cron=None, max_snapshots=None, max_backups=None, max_incremental_backups_per_full=None, storage_class=None, endpoint=None, max_multipart_parts=None): """ Create ZFSjob object. Parameters ---------- bucket_name : str S3 bucket name. access_key : str S3 access key. secret_key : str S3 secret key. filesystem : str ZFS filesystem. region : str, default: us-east-1 S3 region. endpoint : str, optional S3 endpoint for alternative services cron : str, optional Cron schedule. Example: `* 0 * * *` max_snapshots : int, optional Maximum number of snapshots. max_backups : int, optional Maximum number of full and incremental backups. max_incremental_backups_per_full : int, optional Maximum number of incremental backups per full backup. storage_class : str, default: STANDARD S3 storage class. max_multipart_parts : int, default: 10000 Maximum number of parts to use in a multipart S3 upload. """ self._bucket_name = bucket_name self._region = region or 'us-east-1' self._access_key = access_key self._secret_key = secret_key self._filesystem = filesystem self._endpoint = endpoint self._s3 = boto3.resource(service_name='s3', region_name=self._region, aws_access_key_id=self._access_key, aws_secret_access_key=self._secret_key, endpoint_url=endpoint) self._bucket = self._s3.Bucket(self._bucket_name) self._backup_db = BackupDB(self._bucket, self._filesystem) self._snapshot_db = SnapshotDB(self._filesystem) self._cron = cron self._max_snapshots = max_snapshots self._max_backups = max_backups self._max_incremental_backups_per_full = max_incremental_backups_per_full # noqa self._storage_class = storage_class or 'STANDARD' self._max_multipart_parts = max_multipart_parts or 10000 self._logger = logging.getLogger(__name__) if max_snapshots and not max_snapshots >= 0: self._logger.error(f'filesystem={self._filesystem} ' 'msg="max_snapshots must be greater than or ' 'equal to 0."') sys.exit(1) if max_backups and not max_backups >= 1: self._logger.error(f'filesystem={self._filesystem} ' 'msg="max_backups must be greater ' 'than or equal to 1."') sys.exit(1) if max_incremental_backups_per_full and not max_incremental_backups_per_full >= 0: # noqa self._logger.error(f'filesystem={self._filesystem} ' 'msg="max_incremental_backups_per_full must be ' 'greater than or equal to 0."') sys.exit(1)