def test_existing_backup(self):
        """ Test create existing backup. """
        # Given
        backup_db = BackupDB(self.bucket, self.filesystem)
        backup_time = '20210425_201838'
        backup_type = 'full'
        s3_key = f'{self.filesystem}/{backup_time}.{backup_type}'

        # When
        backup_db.create_backup(backup_time, backup_type, s3_key)

        # Then
        self.assertRaises(ValueError, backup_db.create_backup, backup_time,
                          backup_type, s3_key)
    def test_bad_backup_type(self):
        """ Test create backup with bad backup type. """
        # Given
        backup_db = BackupDB(self.bucket, self.filesystem)
        backup_time = '20210425_201838'
        backup_type = 'badtype'
        s3_key = f'{self.filesystem}/{backup_time}.{backup_type}'

        # Then
        self.assertRaises(ValueError, backup_db.create_backup, backup_time,
                          backup_type, s3_key)
    def test_bad_dependency(self):
        """ Test creating a backup with a bad dependency. """

        # Given
        backup_db = BackupDB(self.bucket, self.filesystem)
        backup_time = '20210425_201838'
        backup_type = 'full'
        s3_key = f'{self.filesystem}/{backup_time}.{backup_type}'
        dependency = '20200425-201838'

        # Then
        self.assertRaises(ValueError, backup_db.create_backup, backup_time,
                          backup_type, s3_key, dependency)
    def test_delete_backup(self):
        """ Test delete backup from backup_db. """
        # Given
        backup_db = BackupDB(self.bucket, self.filesystem)
        backup_time = '20210425_201838'
        backup_type = 'full'
        s3_key = f'{self.filesystem}/{backup_time}.{backup_type}'

        backup_db.create_backup(backup_time, backup_type, s3_key)
        backup_db.get_backup(backup_time)

        # When
        backup_db.delete_backup(backup_time)

        # Then
        self.assertRaises(KeyError, backup_db.get_backup, backup_time)
    def test_create_backup_db(self):
        """ Test if backup.db file is properly uploaded/downloaded. """
        # Given
        backup_db = BackupDB(self.bucket, self.filesystem)
        backup_time = '20210425_201838'
        backup_type = 'full'
        s3_key = f'{self.filesystem}/{backup_time}.{backup_type}'

        # When
        backup_db.create_backup(backup_time, backup_type, s3_key)

        # Then
        backup_db_new = BackupDB(self.bucket, self.filesystem)

        self.assertEqual(backup_db.get_backup(backup_time),
                         backup_db_new.get_backup(backup_time))
Exemple #6
0
class ZFSjob:
    """ ZFS backup job. """
    @property
    def bucket(self):
        """ S3 bucket. """
        return self._bucket

    @property
    def region(self):
        """ S3 region. """
        return self._region

    @property
    def endpoint(self):
        """ S3 Endpoint. """
        return self._endpoint

    @property
    def access_key(self):
        """ S3 access key. """
        return self._access_key

    @property
    def secret_key(self):
        """ S3 secret key. """
        return self._secret_key

    @property
    def filesystem(self):
        """ ZFS filesystem. """
        return self._filesystem

    @property
    def s3(self):
        """ S3 resource. """
        return self._s3

    @property
    def cron(self):
        """ Cron schedule. """
        return self._cron

    @property
    def max_snapshots(self):
        """ Maximum number of snapshots. """
        return self._max_snapshots

    @property
    def max_backups(self):
        """ Maximum number of full and incremental backups. """
        return self._max_backups

    @property
    def max_incremental_backups_per_full(self):
        """ Maximum number of incremental backups per full backup. """
        return self._max_incremental_backups_per_full

    @property
    def storage_class(self):
        """ S3 storage class. """
        return self._storage_class

    @property
    def max_multipart_parts(self):
        """ Maximum number of parts to use in a multipart S3 upload. """
        return self._max_multipart_parts

    @property
    def backup_db(self):
        """ BackupDB """
        return self._backup_db

    @property
    def snapshot_db(self):
        """ SnapshotDB """
        return self._snapshot_db

    def __init__(self, bucket_name, access_key, secret_key, filesystem,
                 region=None, cron=None, max_snapshots=None, max_backups=None,
                 max_incremental_backups_per_full=None, storage_class=None,
                 endpoint=None, max_multipart_parts=None):
        """ Create ZFSjob object.

        Parameters
        ----------
        bucket_name : str
            S3 bucket name.
        access_key : str
            S3 access key.
        secret_key : str
            S3 secret key.
        filesystem : str
            ZFS filesystem.
        region : str, default: us-east-1
            S3 region.
        endpoint : str, optional
            S3 endpoint for alternative services
        cron : str, optional
            Cron schedule. Example: `* 0 * * *`
        max_snapshots : int, optional
            Maximum number of snapshots.
        max_backups : int, optional
            Maximum number of full and incremental backups.
        max_incremental_backups_per_full : int, optional
            Maximum number of incremental backups per full backup.
        storage_class : str, default: STANDARD
            S3 storage class.
        max_multipart_parts : int, default: 10000
            Maximum number of parts to use in a multipart S3 upload.

        """
        self._bucket_name = bucket_name
        self._region = region or 'us-east-1'
        self._access_key = access_key
        self._secret_key = secret_key
        self._filesystem = filesystem
        self._endpoint = endpoint

        self._s3 = boto3.resource(service_name='s3',
                                  region_name=self._region,
                                  aws_access_key_id=self._access_key,
                                  aws_secret_access_key=self._secret_key,
                                  endpoint_url=endpoint)
        self._bucket = self._s3.Bucket(self._bucket_name)
        self._backup_db = BackupDB(self._bucket, self._filesystem)
        self._snapshot_db = SnapshotDB(self._filesystem)
        self._cron = cron
        self._max_snapshots = max_snapshots
        self._max_backups = max_backups
        self._max_incremental_backups_per_full = max_incremental_backups_per_full # noqa
        self._storage_class = storage_class or 'STANDARD'
        self._max_multipart_parts = max_multipart_parts or 10000
        self._logger = logging.getLogger(__name__)

        if max_snapshots and not max_snapshots >= 0:
            self._logger.error(f'filesystem={self._filesystem} '
                               'msg="max_snapshots must be greater than or '
                               'equal to 0."')
            sys.exit(1)

        if max_backups and not max_backups >= 1:
            self._logger.error(f'filesystem={self._filesystem} '
                               'msg="max_backups must be greater '
                               'than or equal to 1."')
            sys.exit(1)

        if max_incremental_backups_per_full and not max_incremental_backups_per_full >= 0: # noqa
            self._logger.error(f'filesystem={self._filesystem} '
                               'msg="max_incremental_backups_per_full must be '
                               'greater than or equal to 0."')
            sys.exit(1)

    def start(self):
        """ Start ZFS backup job. """
        self._logger.info(f'filesystem={self._filesystem} msg="Starting job."')
        backups_inc = self._backup_db.get_backups(backup_type='inc')
        backups_full = self._backup_db.get_backups(backup_type='full')

        # find most recent full backup
        backup = backups_full[-1] if backups_full else None

        # if no full backup exists
        if backup is None:
            self._backup_full()

        # if we don't want incremental backups
        elif self._max_incremental_backups_per_full == 0:
            self._backup_full()

        # if we want incremental backups and multiple full backups
        elif self._max_incremental_backups_per_full:
            backup_time = backup.backup_time

            dependants = [True if b.dependency == backup_time
                          else False for b in backups_inc]

            if sum(dependants) >= self._max_incremental_backups_per_full:
                self._backup_full()
            else:
                self._backup_incremental(backup_time)

        # if we want incremental backups and not multiple full backups
        else:
            self._backup_incremental(backup.backup_time)

        if self._max_snapshots or self._max_snapshots == 0:
            self._limit_snapshots()
        if self._max_backups or self._max_backups == 0:
            self._limit_backups()

        self._logger.info(f'filesystem={self._filesystem} msg="Finished job."')

    def restore(self, backup_time=None, filesystem=None):
        """ Restore from backup.

        Defaults to most recent backup if backup_time is not specified.

        WARNING: If restoring to a file system that already exists, snapshots
        and data that were written after the backup will be destroyed.

        Parameters
        ----------
        backup_time : str, optional
            Backup time in %Y%m%d_%H%M%S format.

        filesystem : str, optional
            File system to restore to. Defaults to the file system that the
            backup was taken from.
        """
        self._snapshot_db.refresh()
        snapshots = self._snapshot_db.get_snapshot_names()

        if backup_time:
            backup = self._backup_db.get_backup(backup_time)
        else:
            backups = self._backup_db.get_backups()
            if backups is None:
                raise RestoreError('No backups exist.')
            else:
                backup = backups[-1]

        backup_time = backup.backup_time
        backup_type = backup.backup_type
        s3_key = backup.s3_key

        if filesystem:
            out = create_filesystem(filesystem)
            if out.returncode:
                raise ZFSError(out.stderr)

        if backup_type == 'full':
            if backup_time in snapshots and filesystem is None:
                self._logger.info(f'filesystem={self.filesystem} '
                                  f'snapshot_name={backup_time} '
                                  f's3_key={s3_key} '
                                  'msg="Snapshot already exists."')
            else:
                self._restore_snapshot(backup, filesystem)

        elif backup_type == 'inc':
            # restore full backup first
            backup_full = self._backup_db.get_backup(backup.dependency)

            if backup_full.backup_time in snapshots and filesystem is None:
                self._logger.info(f'filesystem={self.filesystem} '
                                  f'snapshot_name={backup_full.backup_time} '
                                  f's3_key={backup_full.s3_key} '
                                  'msg="Snapshot already exists."')
            else:
                self._restore_snapshot(backup_full, filesystem)

            if backup_time in snapshots and filesystem is None:
                self._logger.info(f'filesystem={self.filesystem} '
                                  f'snapshot_name={backup_time} '
                                  f's3_key={s3_key} '
                                  'msg="Snapshot already exists."')
            else:
                self._restore_snapshot(backup, filesystem)

    def _backup_full(self):
        """ Create snapshot and upload full backup. """
        snapshot = self._snapshot_db.create_snapshot()
        backup_time = snapshot.name
        filesystem = snapshot.filesystem

        send_size = int(get_snapshot_send_size(filesystem, backup_time))
        transfer_config = _get_transfer_config(send_size,
                                               self._max_multipart_parts)

        s3_key = f'{filesystem}/{backup_time}.full'
        self._logger.info(f'filesystem={filesystem} '
                          f'snapshot_name={backup_time} '
                          f's3_key={s3_key} '
                          'msg="Starting full backup."')

        with open_snapshot_stream(filesystem, backup_time, 'r') as f:
            transfer_callback = TransferCallback(self._logger, send_size,
                                                 filesystem, backup_time,
                                                 s3_key)
            self._bucket.upload_fileobj(f.stdout,
                                        s3_key,
                                        Callback=transfer_callback.callback,
                                        Config=transfer_config,
                                        ExtraArgs={
                                            'StorageClass': self._storage_class
                                        })
            stderr = f.stderr.read().decode('utf-8')
        if f.returncode:
            raise ZFSError(stderr)

        backup_size = self._check_backup(s3_key)
        self._backup_db.create_backup(backup_time, 'full', s3_key,
                                      dependency=None, backup_size=backup_size)
        self._logger.info(f'filesystem={filesystem} '
                          f'snapshot_name={backup_time} '
                          f's3_key={s3_key} '
                          'msg="Finished full backup."')

    def _backup_incremental(self, backup_time_full):
        """ Create snapshot and upload incremental backup.

        Parameters
        ----------
        backup_time_full : str
            Backup time in %Y%m%d_%H%M%S format.

        """
        snapshot = self._snapshot_db.create_snapshot()
        backup_time = snapshot.name
        filesystem = snapshot.filesystem

        send_size = int(get_snapshot_send_size_inc(filesystem,
                                                   backup_time_full,
                                                   backup_time))
        transfer_config = _get_transfer_config(send_size,
                                               self._max_multipart_parts)

        s3_key = f'{filesystem}/{backup_time}.inc'
        self._logger.info(f'filesystem={filesystem} '
                          f'snapshot_name={backup_time} '
                          f's3_key={s3_key} '
                          'msg="Starting incremental backup."')

        with open_snapshot_stream_inc(
                filesystem, backup_time_full, backup_time) as f:
            transfer_callback = TransferCallback(self._logger, send_size,
                                                 filesystem, backup_time,
                                                 s3_key)
            self._bucket.upload_fileobj(
                f.stdout,
                s3_key,
                Callback=transfer_callback.callback,
                Config=transfer_config,
                ExtraArgs={
                    'StorageClass': self._storage_class
                })
            stderr = f.stderr.read().decode('utf-8')
        if f.returncode:
            raise ZFSError(stderr)

        backup_size = self._check_backup(s3_key)
        self._backup_db.create_backup(backup_time, 'inc', s3_key,
                                      backup_time_full, backup_size)
        self._logger.info(f'filesystem={filesystem} '
                          f'snapshot_name={backup_time} '
                          f's3_key={s3_key} '
                          'msg="Finished incremental backup."')

    def _restore_snapshot(self, backup, filesystem=None):
        """ Restore snapshot from backup.

        Parameters
        ----------
        backup : Backup

        filesystem : str, optional
            File system to restore to. Defaults to the file system that the
            backup was taken from.
        """
        backup_time = backup.backup_time
        backup_size = backup.backup_size
        filesystem = filesystem or backup.filesystem
        s3_key = backup.s3_key

        transfer_config = TransferConfig(max_concurrency=S3_MAX_CONCURRENCY)

        self._logger.info(f'filesystem={filesystem} '
                          f'snapshot_name={backup_time} '
                          f's3_key={s3_key} '
                          'msg="Restoring snapshot."')
        backup_object = self._s3.Object(self._bucket_name, s3_key)

        with open_snapshot_stream(filesystem, backup_time, 'w') as f:
            transfer_callback = TransferCallback(self._logger, backup_size,
                                                 filesystem, backup_time,
                                                 s3_key)
            try:
                backup_object.download_fileobj(
                    f.stdin,
                    Callback=transfer_callback.callback,
                    Config=transfer_config)
            except BrokenPipeError:
                pass
            stderr = f.stderr.read().decode('utf-8')
        if f.returncode:
            raise ZFSError(stderr)

        self._snapshot_db.refresh()

    def _limit_snapshots(self):
        """ Limit number of snapshots.

        We only remove snapshots that were used for incremental backups.
        Keeping snapshots that were used for full backups allow us to
        restore without having to download the full backup.
        """
        backup_times_full = self._backup_db.get_backup_times('full')
        results = self._snapshot_db.get_snapshots()

        if len(results) > self._max_snapshots:
            self._logger.info(f'filesystem={self._filesystem} '
                              'msg="Snapshot limit achieved."')

        while len(results) > self._max_snapshots:
            snapshot = results.pop(0)
            backup_time = snapshot.name

            if backup_time not in backup_times_full:
                self._logger.info(f'filesystem={self._filesystem} '
                                  f'snapshot_name={snapshot.name} '
                                  'msg="Deleting snapshot."')
                self._snapshot_db.delete_snapshot(snapshot.name)

    def _check_backup(self, s3_key):
        """ Check if S3 object exists and returns object size.

        Parameters
        ----------
        s3_key : str

        Returns
        -------
        int

        """
        # load() will fail if object does not exist
        backup_object = self._s3.Object(self._bucket_name, s3_key)
        backup_object.load()
        if backup_object.content_length == 0:
            raise BackupError('Backup upload failed.')

        return backup_object.content_length

    def _delete_backup(self, backup):
        """ Delete backup.

        Parameters
        ----------
        backup : Backup

        """
        backup_time = backup.backup_time
        s3_key = backup.s3_key

        self._logger.info(f's3_key={s3_key} '
                          'msg="Deleting backup."')
        backup_object = self._s3.Object(self._bucket_name, s3_key)
        backup_object.delete()
        self._backup_db.delete_backup(backup_time)

    def _limit_backups(self):
        """ Limit number of incremental and full backups.

        Only backups with no dependants are removed.
        """
        backups = self._backup_db.get_backups()

        if len(backups) > self._max_backups:
            self._logger.info(f'filesystem={self._filesystem} '
                              'msg="Backup limit achieved."')

        count = 0
        while len(backups) > self._max_backups:
            backup = backups[count]
            backup_time = backup.backup_time
            backup_type = backup.backup_type
            s3_key = backup.s3_key

            if backup_type == "inc":
                self._delete_backup(backup)
                backups.pop(count)

            elif backup_type == "full":
                dependants = any([True if b.dependency == backup_time
                                  else False for b in backups])
                if dependants:
                    self._logger.info(f's3_key={s3_key} '
                                      'msg="Backup has dependants. Not '
                                      'deleting."')
                else:
                    self._delete_backup(backup)
                    backups.pop(count)

            count += 1
Exemple #7
0
    def __init__(self, bucket_name, access_key, secret_key, filesystem,
                 region=None, cron=None, max_snapshots=None, max_backups=None,
                 max_incremental_backups_per_full=None, storage_class=None,
                 endpoint=None, max_multipart_parts=None):
        """ Create ZFSjob object.

        Parameters
        ----------
        bucket_name : str
            S3 bucket name.
        access_key : str
            S3 access key.
        secret_key : str
            S3 secret key.
        filesystem : str
            ZFS filesystem.
        region : str, default: us-east-1
            S3 region.
        endpoint : str, optional
            S3 endpoint for alternative services
        cron : str, optional
            Cron schedule. Example: `* 0 * * *`
        max_snapshots : int, optional
            Maximum number of snapshots.
        max_backups : int, optional
            Maximum number of full and incremental backups.
        max_incremental_backups_per_full : int, optional
            Maximum number of incremental backups per full backup.
        storage_class : str, default: STANDARD
            S3 storage class.
        max_multipart_parts : int, default: 10000
            Maximum number of parts to use in a multipart S3 upload.

        """
        self._bucket_name = bucket_name
        self._region = region or 'us-east-1'
        self._access_key = access_key
        self._secret_key = secret_key
        self._filesystem = filesystem
        self._endpoint = endpoint

        self._s3 = boto3.resource(service_name='s3',
                                  region_name=self._region,
                                  aws_access_key_id=self._access_key,
                                  aws_secret_access_key=self._secret_key,
                                  endpoint_url=endpoint)
        self._bucket = self._s3.Bucket(self._bucket_name)
        self._backup_db = BackupDB(self._bucket, self._filesystem)
        self._snapshot_db = SnapshotDB(self._filesystem)
        self._cron = cron
        self._max_snapshots = max_snapshots
        self._max_backups = max_backups
        self._max_incremental_backups_per_full = max_incremental_backups_per_full # noqa
        self._storage_class = storage_class or 'STANDARD'
        self._max_multipart_parts = max_multipart_parts or 10000
        self._logger = logging.getLogger(__name__)

        if max_snapshots and not max_snapshots >= 0:
            self._logger.error(f'filesystem={self._filesystem} '
                               'msg="max_snapshots must be greater than or '
                               'equal to 0."')
            sys.exit(1)

        if max_backups and not max_backups >= 1:
            self._logger.error(f'filesystem={self._filesystem} '
                               'msg="max_backups must be greater '
                               'than or equal to 1."')
            sys.exit(1)

        if max_incremental_backups_per_full and not max_incremental_backups_per_full >= 0: # noqa
            self._logger.error(f'filesystem={self._filesystem} '
                               'msg="max_incremental_backups_per_full must be '
                               'greater than or equal to 0."')
            sys.exit(1)