Example #1
0
    def rm(self,
           version_uid,
           force=True,
           disallow_rm_when_younger_than_days=0):
        if not self.locking.lock(version_uid):
            raise LockError('Version {} is locked.'.format(version_uid))
        version = self.meta_backend.get_version(version_uid)
        if version.protected:
            raise ValueError(
                'Version {} is protected. Will not delete.'.format(
                    version_uid))
        if not force:
            # check if disallow_rm_when_younger_than_days allows deletion
            age_days = (datetime.datetime.utcnow() - version.date).days
            if disallow_rm_when_younger_than_days > age_days:
                raise LockError(
                    'Version {} is too young. Will not delete.'.format(
                        version_uid))

        num_blocks = self.meta_backend.rm_version(version_uid)
        logger.info('Removed backup version {} with {} blocks.'.format(
            version_uid,
            num_blocks,
        ))
        self.locking.unlock(version_uid)
Example #2
0
 def set_version_invalid(self, uid):
     version = self.get_version(uid)
     version.valid = 0
     self.session.commit()
     logger.info('Marked version invalid (UID {})'.format(
         uid,
         ))
Example #3
0
File: sql.py Project: wamdam/backy2
 def set_version_invalid(self, uid):
     version = self.get_version(uid)
     version.valid = 0
     self.session.commit()
     logger.info('Marked version invalid (UID {})'.format(
         uid,
         ))
Example #4
0
 def cleanup_fast(self, dt=3600):
     """ Delete unreferenced blob UIDs """
     if not self.locking.lock('backy-cleanup-fast'):
         raise LockError('Another backy cleanup is running.')
     delete_candidates = self.meta_backend.get_delete_candidates(dt=dt)
     try:
         for candidates in grouper(100, delete_candidates):
             # 100 is the number that works here smoothly within about 10-30s
             # per batch. With more than 70s there's a timeout and the job
             # is re-sent. Maybe we must either learn the best amount here
             # or we make this configurable...
             logger.debug('Cleanup: Removing UIDs {}'.format(', '.join(candidates)))
             try:
                 self.data_backend.rm_many(candidates)
             except FileNotFoundError:
                 continue
     except:
         logger.error('Error during cleanup. Reverting metadata changes.')
         self.meta_backend.revert_delete_candidates(delete_candidates)
         self.locking.unlock('backy-cleanup-fast')
         raise
     else:
         self.meta_backend.remove_delete_candidates(delete_candidates)
     logger.info('Cleanup: Removed {} blobs'.format(len(delete_candidates)))
     self.locking.unlock('backy-cleanup-fast')
Example #5
0
File: sql.py Project: wamdam/backy2
    def get_delete_candidates(self, dt=3600):
        _stat_i = 0
        _stat_remove_from_delete_candidates = 0
        _stat_delete_candidates = 0
        while True:
            delete_candidates = self.session.query(
                DeletedBlock
            ).filter(
                DeletedBlock.time < (inttime() - dt)
            ).limit(250).all()  # http://stackoverflow.com/questions/7389759/memory-efficient-built-in-sqlalchemy-iterator-generator
            if not delete_candidates:
                break

            _remove_from_delete_candidate_uids = set()
            _delete_candidates = set()
            for candidate in delete_candidates:
                _stat_i += 1
                if _stat_i%1000 == 0:
                    logger.info("Cleanup-fast: {} false positives, {} data deletions.".format(
                        _stat_remove_from_delete_candidates,
                        _stat_delete_candidates,
                        ))

                block = self.session.query(
                    Block
                ).filter(
                    Block.uid == candidate.uid
                ).limit(1).scalar()
                if block:
                    _remove_from_delete_candidate_uids.add(candidate.uid)
                    _stat_remove_from_delete_candidates += 1
                else:
                    _delete_candidates.add(candidate.uid)
                    _stat_delete_candidates += 1

            if _remove_from_delete_candidate_uids:
                logger.debug("Cleanup-fast: Removing {} false positive delete candidates".format(len(_remove_from_delete_candidate_uids)))
                self.session.query(
                    DeletedBlock
                ).filter(
                    DeletedBlock.uid.in_(_remove_from_delete_candidate_uids)
                ).delete(synchronize_session=False)

            if _delete_candidates:
                logger.debug("Cleanup-fast: Sending {} delete candidates for final deletion".format(len(_delete_candidates)))
                self.session.query(
                    DeletedBlock
                ).filter(
                    DeletedBlock.uid.in_(_delete_candidates)
                ).delete(synchronize_session=False)
                yield(_delete_candidates)

        logger.info("Cleanup-fast: Cleanup finished. {} false positives, {} data deletions.".format(
            _stat_remove_from_delete_candidates,
            _stat_delete_candidates,
            ))
Example #6
0
    def get_delete_candidates(self, dt=3600):
        _stat_i = 0
        _stat_remove_from_delete_candidates = 0
        _stat_delete_candidates = 0
        while True:
            delete_candidates = self.session.query(DeletedBlock).filter(
                DeletedBlock.time < (inttime() - dt)
            ).limit(250).all(
            )  # http://stackoverflow.com/questions/7389759/memory-efficient-built-in-sqlalchemy-iterator-generator
            if not delete_candidates:
                break

            _remove_from_delete_candidate_uids = set()
            _delete_candidates = set()
            for candidate in delete_candidates:
                _stat_i += 1
                if _stat_i % 1000 == 0:
                    logger.info(
                        "Cleanup-fast: {} false positives, {} data deletions.".
                        format(
                            _stat_remove_from_delete_candidates,
                            _stat_delete_candidates,
                        ))

                block = self.session.query(Block).filter(
                    Block.uid == candidate.uid).limit(1).scalar()
                if block:
                    _remove_from_delete_candidate_uids.add(candidate.uid)
                    _stat_remove_from_delete_candidates += 1
                else:
                    _delete_candidates.add(candidate.uid)
                    _stat_delete_candidates += 1

            if _remove_from_delete_candidate_uids:
                logger.debug(
                    "Cleanup-fast: Removing {} false positive delete candidates"
                    .format(len(_remove_from_delete_candidate_uids)))
                self.session.query(DeletedBlock).filter(
                    DeletedBlock.uid.in_(_remove_from_delete_candidate_uids)
                ).delete(synchronize_session=False)

            if _delete_candidates:
                logger.debug(
                    "Cleanup-fast: Sending {} delete candidates for final deletion"
                    .format(len(_delete_candidates)))
                self.session.query(DeletedBlock).filter(
                    DeletedBlock.uid.in_(_delete_candidates)).delete(
                        synchronize_session=False)
                yield (_delete_candidates)

        logger.info(
            "Cleanup-fast: Cleanup finished. {} false positives, {} data deletions."
            .format(
                _stat_remove_from_delete_candidates,
                _stat_delete_candidates,
            ))
Example #7
0
 def rm(self, version_uid):
     if not self.locking.lock(version_uid):
         raise LockError('Version {} is locked.'.format(version_uid))
     self.meta_backend.get_version(version_uid)  # just to raise if not exists
     num_blocks = self.meta_backend.rm_version(version_uid)
     logger.info('Removed backup version {} with {} blocks.'.format(
         version_uid,
         num_blocks,
         ))
     self.locking.unlock(version_uid)
Example #8
0
    def restore(self, version_uid, target, sparse=False, force=False):
        if not self.locking.lock(version_uid):
            raise LockError('Version {} is locked.'.format(version_uid))

        version = self.meta_backend.get_version(version_uid)  # raise if version not exists
        notify(self.process_name, 'Restoring Version {}. Getting blocks.'.format(version_uid))
        blocks = self.meta_backend.get_blocks_by_version(version_uid)

        io = self.get_io_by_source(target)
        io.open_w(target, version.size_bytes, force)

        read_jobs = 0
        for i, block in enumerate(blocks):
            if block.uid:
                self.data_backend.read(block.deref())  # adds a read job
                read_jobs += 1
            elif not sparse:
                io.write(block, b'\0'*block.size)
                logger.debug('Restored sparse block {} successfully ({} bytes).'.format(
                    block.id,
                    block.size,
                    ))
            else:
                logger.debug('Ignored sparse block {}.'.format(
                    block.id,
                    ))
            notify(self.process_name, 'Restoring Version {} to {} PREPARING AND SPARSE BLOCKS ({:.1f}%)'.format(version_uid, target, (i + 1) / len(blocks) * 100))

        done_jobs = 0
        _log_every_jobs = read_jobs // 200 + 1  # about every half percent
        for i in range(read_jobs):
            block, offset, length, data = self.data_backend.read_get()
            assert len(data) == block.size
            data_checksum = self.hash_function(data).hexdigest()
            io.write(block, data)
            if data_checksum != block.checksum:
                logger.error('Checksum mismatch during restore for block '
                    '{} (is: {} should-be: {}, block-valid: {}). Block '
                    'restored is invalid. Continuing.'.format(
                        block.id,
                        data_checksum,
                        block.checksum,
                        block.valid,
                        ))
                self.meta_backend.set_blocks_invalid(block.uid, block.checksum)
            else:
                logger.debug('Restored block {} successfully ({} bytes).'.format(
                    block.id,
                    block.size,
                    ))

            notify(self.process_name, 'Restoring Version {} to {} ({:.1f}%)'.format(version_uid, target, (i + 1) / read_jobs * 100))
            if i % _log_every_jobs == 0 or i + 1 == read_jobs:
                logger.info('Restored {}/{} blocks ({:.1f}%)'.format(i + 1, read_jobs, (i + 1) / read_jobs * 100))
        self.locking.unlock(version_uid)
Example #9
0
 def cleanup_delete_candidates(self, dt=3600):
     # Delete false positives:
     logger.info("Deleting false positives...")
     self.session.query(DeletedBlock.uid).filter(
         DeletedBlock.uid.in_(
             self.session.query(Block.uid).distinct(Block.uid).filter(
                 Block.uid.isnot(None)).subquery())).filter(
                     DeletedBlock.time < (inttime() - dt)).delete(
                         synchronize_session=False)
     logger.info("Deleting false positives: done. Now deleting blocks.")
     self.session.commit()
Example #10
0
    def cleanup_fast(self, dt=3600):
        """ Delete unreferenced blob UIDs """
        if not self.locking.lock('backy-cleanup-fast'):
            raise LockError('Another backy cleanup is running.')

        for uid_list in self.meta_backend.get_delete_candidates():
            logger.debug('Cleanup-fast: Deleting UIDs from data backend: {}'.format(uid_list))
            no_del_uids = []
            no_del_uids = self.data_backend.rm_many(uid_list)
            if no_del_uids:
                logger.info('Cleanup-fast: Unable to delete these UIDs from data backend: {}'.format(uid_list))
        self.locking.unlock('backy-cleanup-fast')
Example #11
0
    def cleanup_fast(self, dt=3600):
        """ Delete unreferenced blob UIDs """
        if not self.locking.lock('backy-cleanup-fast'):
            raise LockError('Another backy cleanup is running.')

        for uid_list in self.meta_backend.get_delete_candidates(dt):
            logger.debug('Cleanup-fast: Deleting UIDs from data backend: {}'.format(uid_list))
            no_del_uids = []
            no_del_uids = self.data_backend.rm_many(uid_list)
            if no_del_uids:
                logger.info('Cleanup-fast: Unable to delete these UIDs from data backend: {}'.format(uid_list))
        self.locking.unlock('backy-cleanup-fast')
Example #12
0
File: sql.py Project: wamdam/backy2
 def set_blocks_invalid(self, uid, checksum):
     _affected_version_uids = self.session.query(distinct(Block.version_uid)).filter_by(uid=uid, checksum=checksum).all()
     affected_version_uids = [v[0] for v in _affected_version_uids]
     self.session.query(Block).filter_by(uid=uid, checksum=checksum).update({'valid': 0}, synchronize_session='fetch')
     self.session.commit()
     logger.info('Marked block invalid (UID {}, Checksum {}. Affected versions: {}'.format(
         uid,
         checksum,
         ', '.join(affected_version_uids)
         ))
     for version_uid in affected_version_uids:
         self.set_version_invalid(version_uid)
     return affected_version_uids
Example #13
0
 def set_blocks_invalid(self, uid, checksum):
     _affected_version_uids = self.session.query(distinct(
         Block.version_uid)).filter_by(uid=uid, checksum=checksum).all()
     affected_version_uids = [v[0] for v in _affected_version_uids]
     self.session.query(Block).filter_by(uid=uid, checksum=checksum).update(
         {'valid': 0}, synchronize_session='fetch')
     self.session.commit()
     logger.info(
         'Marked block invalid (UID {}, Checksum {}. Affected versions: {}'.
         format(uid, checksum, ', '.join(affected_version_uids)))
     for version_uid in affected_version_uids:
         self.set_version_invalid(version_uid)
     return affected_version_uids
Example #14
0
    def rm(self, version_uid, force=True, disallow_rm_when_younger_than_days=0):
        if not self.locking.lock(version_uid):
            raise LockError('Version {} is locked.'.format(version_uid))
        version = self.meta_backend.get_version(version_uid)
        if not force:
            # check if disallow_rm_when_younger_than_days allows deletion
            age_days = (datetime.datetime.now() - version.date).days
            if disallow_rm_when_younger_than_days > age_days:
                raise LockError('Version {} is too young. Will not delete.'.format(version_uid))

        num_blocks = self.meta_backend.rm_version(version_uid)
        logger.info('Removed backup version {} with {} blocks.'.format(
            version_uid,
            num_blocks,
            ))
        self.locking.unlock(version_uid)
Example #15
0
 def copy_version(self, from_version_uid, version_name, snapshot_name=''):
     """ Copy version to a new uid and name, snapshot_name"""
     old_version = self.get_version(from_version_uid)
     new_version_uid = self.set_version(version_name, snapshot_name, old_version.size, old_version.size_bytes, old_version.valid)
     old_blocks = self.get_blocks_by_version(from_version_uid)
     logger.info('Copying version...')
     for i, block in enumerate(old_blocks.yield_per(1000)):
         self.set_block(
                 i,
                 new_version_uid,
                 block.uid,
                 block.checksum,
                 block.size,
                 block.valid,
                 _commit=False,
                 )
     self._commit()
     logger.info('Done copying version...')
     return new_version_uid
Example #16
0
 def cleanup_full(self, prefix=None):
     """ Delete unreferenced blob UIDs starting with <prefix> """
     # in this mode, we compare all existing uids in data and meta.
     # make sure, no other backy will start
     if not self.locking.lock('backy'):
         self.locking.unlock('backy')
         raise LockError('Other backy instances are running.')
     # make sure, no other backy is running
     if len(find_other_procs(self.process_name)) > 1:
         raise LockError('Other backy instances are running.')
     active_blob_uids = set(self.data_backend.get_all_blob_uids(prefix))
     active_block_uids = set(self.meta_backend.get_all_block_uids(prefix))
     delete_candidates = active_blob_uids.difference(active_block_uids)
     for delete_candidate in delete_candidates:
         logger.debug('Cleanup: Removing UID {}'.format(delete_candidate))
         try:
             self.data_backend.rm(delete_candidate)
         except FileNotFoundError:
             continue
     logger.info('Cleanup: Removed {} blobs'.format(len(delete_candidates)))
     self.locking.unlock('backy')
Example #17
0
 def cleanup_full(self, prefix=None):
     """ Delete unreferenced blob UIDs starting with <prefix> """
     # in this mode, we compare all existing uids in data and meta.
     # make sure, no other backy will start
     if not self.locking.lock('backy'):
         self.locking.unlock('backy')
         raise LockError('Other backy instances are running.')
     # make sure, no other backy is running
     if len(find_other_procs(self.process_name)) > 1:
         raise LockError('Other backy instances are running.')
     active_blob_uids = set(self.data_backend.get_all_blob_uids(prefix))
     active_block_uids = set(self.meta_backend.get_all_block_uids(prefix))
     delete_candidates = active_blob_uids.difference(active_block_uids)
     for delete_candidate in delete_candidates:
         logger.debug('Cleanup: Removing UID {}'.format(delete_candidate))
         try:
             self.data_backend.rm(delete_candidate)
         except FileNotFoundError:
             continue
     logger.info('Cleanup: Removed {} blobs'.format(len(delete_candidates)))
     self.locking.unlock('backy')
Example #18
0
File: nbd.py Project: kalkin/backy2
    def fixate(self, cow_version_uid):
        # save blocks into version
        logger.info('Fixating version {} with {} blocks (PLEASE WAIT)'.format(
            cow_version_uid, len(self.cow[cow_version_uid].items())))
        for block_id, block_uid in self.cow[cow_version_uid].items():
            logger.debug('Fixating block {} uid {}'.format(
                block_id, block_uid))
            data = self._read(block_uid)
            checksum = self.hash_function(data).hexdigest()
            if not self.backy.data_backend._SUPPORTS_PARTIAL_WRITES:
                # dump changed data
                new_uid = self.backy.data_backend.save(data, _sync=True)
                logger.debug(
                    'Stored block {} with local uid {} to uid {}'.format(
                        block_id, block_uid, new_uid))
                block_uid = new_uid

            self.backy.meta_backend.set_block(block_id,
                                              cow_version_uid,
                                              block_uid,
                                              checksum,
                                              len(data),
                                              valid=1,
                                              _commit=False)
        self.backy.meta_backend.set_version_valid(cow_version_uid)
        self.backy.meta_backend._commit()
        logger.info('Fixation done. Deleting temporary data (PLEASE WAIT)')
        # TODO: Delete COW blocks and also those from block_cache
        if self.backy.data_backend._SUPPORTS_PARTIAL_WRITES:
            for block_uid in self.block_cache:
                # TODO if this block is in the current version (and in no other?)
                # rm this block from cache
                # rm block uid from self.block_cache
                pass
            for block_id, block_uid in self.cow[cow_version_uid].items():
                # TODO: rm block_uid from cache
                pass
        else:
            # backends that support partial writes will be written to directly.
            # So there's no need to cleanup.
            pass
        del (self.cow[cow_version_uid])
        logger.info('Finished.')
Example #19
0
    def fixate(self, cow_version_uid):
        # save blocks into version
        logger.info('Fixating version {} with {} blocks (PLEASE WAIT)'.format(
            cow_version_uid,
            len(self.cow[cow_version_uid].items())
            ))
        for block_id, block_uid in self.cow[cow_version_uid].items():
            logger.debug('Fixating block {} uid {}'.format(block_id, block_uid))
            data = self._read(block_uid)
            checksum = self.hash_function(data).hexdigest()
            if not self.backy.data_backend._SUPPORTS_PARTIAL_WRITES:
                # dump changed data
                new_uid = self.backy.data_backend.save(data, _sync=True)
                logger.debug('Stored block {} with local uid {} to uid {}'.format(block_id, block_uid, new_uid))
                block_uid = new_uid

            self.backy.meta_backend.set_block(block_id, cow_version_uid, block_uid, checksum, len(data), valid=1, _commit=False)
        self.backy.meta_backend.set_version_valid(cow_version_uid)
        self.backy.meta_backend._commit()
        logger.info('Fixation done. Deleting temporary data (PLEASE WAIT)')
        # TODO: Delete COW blocks and also those from block_cache
        if self.backy.data_backend._SUPPORTS_PARTIAL_WRITES:
            for block_uid in self.block_cache:
                # TODO if this block is in the current version (and in no other?)
                # rm this block from cache
                # rm block uid from self.block_cache
                pass
            for block_id, block_uid in self.cow[cow_version_uid].items():
                # TODO: rm block_uid from cache
                pass
        else:
            # backends that support partial writes will be written to directly.
            # So there's no need to cleanup.
            pass
        del(self.cow[cow_version_uid])
        logger.info('Finished.')
Example #20
0
    def backup(self, name, source, hints, from_version):
        """ Create a backup from source.
        If hints are given, they must be tuples of (offset, length, exists)
        where offset and length are integers and exists is a boolean. Then, only
        data within hints will be backed up.
        Otherwise, the backup reads source and looks if checksums match with
        the target.
        """
        stats = {
                'version_size_bytes': 0,
                'version_size_blocks': 0,
                'bytes_read': 0,
                'blocks_read': 0,
                'bytes_written': 0,
                'blocks_written': 0,
                'bytes_found_dedup': 0,
                'blocks_found_dedup': 0,
                'bytes_sparse': 0,
                'blocks_sparse': 0,
                'start_time': time.time(),
            }
        self.reader.open(source)
        source_size = self.reader.size()

        size = math.ceil(source_size / self.block_size)
        stats['version_size_bytes'] = source_size
        stats['version_size_blocks'] = size

        # Sanity check: check hints for validity, i.e. too high offsets, ...
        if hints:
            max_offset = max([h[0]+h[1] for h in hints])
            if max_offset > source_size:
                raise ValueError('Hints have higher offsets than source file.')

        if hints:
            sparse_blocks = blocks_from_hints([hint for hint in hints if not hint[2]], self.block_size)
            read_blocks = blocks_from_hints([hint for hint in hints if hint[2]], self.block_size)
        else:
            sparse_blocks = []
            read_blocks = range(size)
        sparse_blocks = set(sparse_blocks)
        read_blocks = set(read_blocks)

        try:
            version_uid = self._prepare_version(name, source_size, from_version)
        except RuntimeError as e:
            logger.error(str(e))
            logger.error('Backy exiting.')
            # TODO: Don't exit here, exit in Commands
            exit(4)
        except LockError as e:
            logger.error(str(e))
            logger.error('Backy exiting.')
            # TODO: Don't exit here, exit in Commands
            exit(99)
        if not self.locking.lock(version_uid):
            logger.error('Version {} is locked.'.format(version_uid))
            logger.error('Backy exiting.')
            # TODO: Don't exit here, exit in Commands
            exit(99)

        blocks = self.meta_backend.get_blocks_by_version(version_uid)

        if from_version and hints:
            # SANITY CHECK:
            # Check some blocks outside of hints if they are the same in the
            # from_version backup and in the current backup. If they
            # don't, either hints are wrong (e.g. from a wrong snapshot diff)
            # or source doesn't match. In any case, the resulting backup won't
            # be good.
            logger.info('Starting sanity check with 1% of the blocks. Reading...')
            ignore_blocks = list(set(range(size)) - read_blocks - sparse_blocks)
            random.shuffle(ignore_blocks)
            num_check_blocks = max(10, len(ignore_blocks) // 100)  # 1%, but at least 10
            check_block_ids = ignore_blocks[:num_check_blocks]
            num_reading = 0
            for block in blocks:
                if block.id in check_block_ids and block.uid:  # no uid = sparse block in backup. Can't check.
                    self.reader.read(block)
                    num_reading += 1
            for i in range(num_reading):
                # this is source file data
                source_block, source_data, source_data_checksum = self.reader.get()
                # check metadata checksum with the newly read one
                if source_block.checksum != source_data_checksum:
                    logger.error("Source and backup don't match in regions outside of the hints.")
                    logger.error("Looks like the hints don't match or the source is different.")
                    logger.error("Found wrong source data at block {}: offset {} with max. length {}".format(
                        source_block.id,
                        source_block.id * self.block_size,
                        self.block_size
                        ))
                    # remove version
                    self.meta_backend.rm_version(version_uid)
                    sys.exit(5)
            logger.info('Finished sanity check. Checked {} blocks.'.format(num_reading))

        read_jobs = 0
        for block in blocks:
            if block.id in read_blocks or not block.valid:
                self.reader.read(block)  # adds a read job.
                read_jobs += 1
            elif block.id in sparse_blocks:
                # This "elif" is very important. Because if the block is in read_blocks
                # AND sparse_blocks, it *must* be read.
                self.meta_backend.set_block(block.id, version_uid, None, None, block.size, valid=1, _commit=False)
                stats['blocks_sparse'] += 1
                stats['bytes_sparse'] += block.size
                logger.debug('Skipping block (sparse) {}'.format(block.id))
            else:
                #self.meta_backend.set_block(block.id, version_uid, block.uid, block.checksum, block.size, valid=1, _commit=False)
                logger.debug('Keeping block {}'.format(block.id))

        # now use the readers and write
        done_jobs = 0
        for i in range(read_jobs):
            block, data, data_checksum = self.reader.get()

            stats['blocks_read'] += 1
            stats['bytes_read'] += len(data)

            # dedup
            existing_block = self.meta_backend.get_block_by_checksum(data_checksum)
            if existing_block and existing_block.size == len(data):
                self.meta_backend.set_block(block.id, version_uid, existing_block.uid, data_checksum, len(data), valid=1, _commit=False)
                stats['blocks_found_dedup'] += 1
                stats['bytes_found_dedup'] += len(data)
                logger.debug('Found existing block for id {} with uid {})'.format
                        (block.id, existing_block.uid))
            else:
                block_uid = self.data_backend.save(data)
                self.meta_backend.set_block(block.id, version_uid, block_uid, data_checksum, len(data), valid=1, _commit=False)
                stats['blocks_written'] += 1
                stats['bytes_written'] += len(data)
                logger.debug('Wrote block {} (checksum {}...)'.format(block.id, data_checksum[:16]))
            done_jobs += 1

        self.reader.close()  # wait for all readers
        self.data_backend.close()  # wait for all writers
        if read_jobs != done_jobs:
            logger.error('backy broke somewhere. Backup is invalid.')
            sys.exit(3)

        self.meta_backend.set_version_valid(version_uid)
        self.meta_backend.set_stats(
            version_uid=version_uid,
            version_name=name,
            version_size_bytes=stats['version_size_bytes'],
            version_size_blocks=stats['version_size_blocks'],
            bytes_read=stats['bytes_read'],
            blocks_read=stats['blocks_read'],
            bytes_written=stats['bytes_written'],
            blocks_written=stats['blocks_written'],
            bytes_found_dedup=stats['bytes_found_dedup'],
            blocks_found_dedup=stats['blocks_found_dedup'],
            bytes_sparse=stats['bytes_sparse'],
            blocks_sparse=stats['blocks_sparse'],
            duration_seconds=int(time.time() - stats['start_time']),
            )
        logger.info('New version: {}'.format(version_uid))
        self.locking.unlock(version_uid)
        return version_uid
Example #21
0
 def nbd(self, version_uid, bind_address, bind_port, read_only):
     from backy2.enterprise.nbdserver import Server as NbdServer
     from backy2.enterprise.nbd import BackyStore
     backy = self.backy()
     config_NBD = self.Config(section='NBD')
     config_DEFAULTS = self.Config(section='DEFAULTS')
     hash_function = getattr(hashlib,
                             config_DEFAULTS.get('hash_function', 'sha512'))
     store = BackyStore(
         backy,
         cachedir=config_NBD.get('cachedir'),
         hash_function=hash_function,
     )
     addr = (bind_address, bind_port)
     server = NbdServer(addr, store, read_only)
     logger.info("Starting to serve nbd on %s:%s" % (addr[0], addr[1]))
     logger.info("You may now start")
     logger.info("  nbd-client -l %s -p %s" % (addr[0], addr[1]))
     logger.info("and then get the backup via")
     logger.info("  modprobe nbd")
     logger.info("  nbd-client -N <version> %s -p %s /dev/nbd0" %
                 (addr[0], addr[1]))
     server.serve_forever()
Example #22
0
    def backup(self,
               name,
               snapshot_name,
               source,
               hints,
               from_version,
               tag=None,
               expire=None):
        """ Create a backup from source.
        If hints are given, they must be tuples of (offset, length, exists)
        where offset and length are integers and exists is a boolean. Then, only
        data within hints will be backed up.
        Otherwise, the backup reads source and looks if checksums match with
        the target.
        """
        stats = {
            'version_size_bytes': 0,
            'version_size_blocks': 0,
            'bytes_read': 0,
            'blocks_read': 0,
            'bytes_written': 0,
            'blocks_written': 0,
            'bytes_found_dedup': 0,
            'blocks_found_dedup': 0,
            'bytes_sparse': 0,
            'blocks_sparse': 0,
            'start_time': time.time(),
        }
        io = self.get_io_by_source(source)
        io.open_r(source)
        source_size = io.size()

        size = math.ceil(source_size / self.block_size)
        stats['version_size_bytes'] = source_size
        stats['version_size_blocks'] = size

        # Sanity check: check hints for validity, i.e. too high offsets, ...
        if hints is not None and len(hints) > 0:
            max_offset = max([h[0] + h[1] for h in hints])
            if max_offset > source_size:
                raise ValueError('Hints have higher offsets than source file.')

        if hints is not None:
            sparse_blocks = blocks_from_hints(
                [hint for hint in hints if not hint[2]], self.block_size)
            read_blocks = blocks_from_hints(
                [hint for hint in hints if hint[2]], self.block_size)
        else:
            sparse_blocks = []
            read_blocks = range(size)
        sparse_blocks = set(sparse_blocks)
        read_blocks = set(read_blocks)

        try:
            version_uid = self._prepare_version(name, snapshot_name,
                                                source_size, from_version)
        except RuntimeError as e:
            logger.error(str(e))
            logger.error('Backy exiting.')
            # TODO: Don't exit here, exit in Commands
            exit(4)
        except LockError as e:
            logger.error(str(e))
            logger.error('Backy exiting.')
            # TODO: Don't exit here, exit in Commands
            exit(99)
        if not self.locking.lock(version_uid):
            logger.error('Version {} is locked.'.format(version_uid))
            logger.error('Backy exiting.')
            # TODO: Don't exit here, exit in Commands
            exit(99)

        blocks = self.meta_backend.get_blocks_by_version(version_uid)

        if from_version and hints:
            # SANITY CHECK:
            # Check some blocks outside of hints if they are the same in the
            # from_version backup and in the current backup. If they
            # don't, either hints are wrong (e.g. from a wrong snapshot diff)
            # or source doesn't match. In any case, the resulting backup won't
            # be good.
            logger.info(
                'Starting sanity check with 1% of the blocks. Reading...')
            ignore_blocks = list(
                set(range(size)) - read_blocks - sparse_blocks)
            random.shuffle(ignore_blocks)
            num_check_blocks = 10
            # 50% from the start
            check_block_ids = ignore_blocks[:num_check_blocks // 2]
            # and 50% from random locations
            check_block_ids = set(
                check_block_ids +
                random.sample(ignore_blocks, num_check_blocks // 2))
            num_reading = 0
            for block in blocks:
                if block.id in check_block_ids and block.uid:  # no uid = sparse block in backup. Can't check.
                    io.read(block)
                    num_reading += 1
            for i in range(num_reading):
                # this is source file data
                source_block, source_data, source_data_checksum = io.get()
                # check metadata checksum with the newly read one
                if source_block.checksum != source_data_checksum:
                    logger.error(
                        "Source and backup don't match in regions outside of the hints."
                    )
                    logger.error(
                        "Looks like the hints don't match or the source is different."
                    )
                    logger.error(
                        "Found wrong source data at block {}: offset {} with max. length {}"
                        .format(source_block.id,
                                source_block.id * self.block_size,
                                self.block_size))
                    # remove version
                    self.meta_backend.rm_version(version_uid)
                    sys.exit(5)
            logger.info('Finished sanity check. Checked {} blocks {}.'.format(
                num_reading, check_block_ids))

        read_jobs = 0
        for block in blocks:
            if block.id in read_blocks or not block.valid:
                io.read(block.deref())  # adds a read job.
                read_jobs += 1
            elif block.id in sparse_blocks:
                # This "elif" is very important. Because if the block is in read_blocks
                # AND sparse_blocks, it *must* be read.
                self.meta_backend.set_block(block.id,
                                            version_uid,
                                            None,
                                            None,
                                            block.size,
                                            valid=1,
                                            _commit=False)
                stats['blocks_sparse'] += 1
                stats['bytes_sparse'] += block.size
                logger.debug('Skipping block (sparse) {}'.format(block.id))
            else:
                #self.meta_backend.set_block(block.id, version_uid, block.uid, block.checksum, block.size, valid=1, _commit=False)
                logger.debug('Keeping block {}'.format(block.id))

        # now use the readers and write
        done_jobs = 0
        _log_every_jobs = read_jobs // 200 + 1  # about every half percent
        for i in range(read_jobs):
            block, data, data_checksum = io.get()

            stats['blocks_read'] += 1
            stats['bytes_read'] += len(data)

            # dedup
            existing_block = self.meta_backend.get_block_by_checksum(
                data_checksum)
            if data == b'\0' * block.size:
                # if the block is only \0, set it as a sparse block.
                stats['blocks_sparse'] += 1
                stats['bytes_sparse'] += block.size
                logger.debug('Skipping block (detected sparse) {}'.format(
                    block.id))
                self.meta_backend.set_block(block.id,
                                            version_uid,
                                            None,
                                            None,
                                            block.size,
                                            valid=1,
                                            _commit=False)
            elif existing_block and existing_block.size == len(data):
                self.meta_backend.set_block(block.id,
                                            version_uid,
                                            existing_block.uid,
                                            data_checksum,
                                            len(data),
                                            valid=1,
                                            _commit=False)
                stats['blocks_found_dedup'] += 1
                stats['bytes_found_dedup'] += len(data)
                logger.debug(
                    'Found existing block for id {} with uid {})'.format(
                        block.id, existing_block.uid))
            else:
                block_uid = self.data_backend.save(data)
                self.meta_backend.set_block(block.id,
                                            version_uid,
                                            block_uid,
                                            data_checksum,
                                            len(data),
                                            valid=1,
                                            _commit=False)
                stats['blocks_written'] += 1
                stats['bytes_written'] += len(data)
                logger.debug('Wrote block {} (checksum {}...)'.format(
                    block.id, data_checksum[:16]))
            done_jobs += 1
            notify(
                self.process_name,
                'Backup Version {} from {} ({:.1f}%)'.format(
                    version_uid, source, (i + 1) / read_jobs * 100))
            if i % _log_every_jobs == 0 or i + 1 == read_jobs:
                logger.info('Backed up {}/{} blocks ({:.1f}%)'.format(
                    i + 1, read_jobs, (i + 1) / read_jobs * 100))

        io.close()  # wait for all readers
        # self.data_backend.close()  # wait for all writers
        if read_jobs != done_jobs:
            logger.error('backy broke somewhere. Backup is invalid.')
            sys.exit(3)

        self.meta_backend.set_version_valid(version_uid)

        if tag is not None:
            if isinstance(tag, list):
                tags = tag
            else:
                tags = []
                tags.append(tag)
        else:
            tags = self._generate_auto_tags(name)
        for tag in tags:
            self.meta_backend.add_tag(version_uid, tag)

        self.meta_backend.set_stats(
            version_uid=version_uid,
            version_name=name,
            version_size_bytes=stats['version_size_bytes'],
            version_size_blocks=stats['version_size_blocks'],
            bytes_read=stats['bytes_read'],
            blocks_read=stats['blocks_read'],
            bytes_written=stats['bytes_written'],
            blocks_written=stats['blocks_written'],
            bytes_found_dedup=stats['bytes_found_dedup'],
            blocks_found_dedup=stats['blocks_found_dedup'],
            bytes_sparse=stats['bytes_sparse'],
            blocks_sparse=stats['blocks_sparse'],
            duration_seconds=int(time.time() - stats['start_time']),
        )
        logger.info('New version: {} (Tags: [{}])'.format(
            version_uid, ','.join(tags)))
        self.locking.unlock(version_uid)

        if expire:
            self.meta_backend.expire_version(version_uid, expire)

        return version_uid
Example #23
0
def main():
    parser = argparse.ArgumentParser(
        description='Backup and restore for block devices.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument(
        '-v', '--verbose', action='store_true', help='verbose output')
    parser.add_argument(
        '-m', '--machine-output', action='store_true', default=False)
    parser.add_argument(
        '-V', '--version', action='store_true', help='Show version')

    subparsers = parser.add_subparsers()

    # BACKUP
    p = subparsers.add_parser(
        'backup',
        help="Perform a backup.")
    p.add_argument(
        'source',
        help='Source file')
    p.add_argument(
        'name',
        help='Backup name')
    p.add_argument('-r', '--rbd', default=None, help='Hints as rbd json format')
    p.add_argument('-f', '--from-version', default=None, help='Use this version-uid as base')
    p.set_defaults(func='backup')

    # RESTORE
    p = subparsers.add_parser(
        'restore',
        help="Restore a given backup with level to a given target.")
    p.add_argument('-s', '--sparse', action='store_true', help='Write restore file sparse (does not work with legacy devices)')
    p.add_argument('version_uid')
    p.add_argument('target')
    p.set_defaults(func='restore')

    # RM
    p = subparsers.add_parser(
        'rm',
        help="Remove a given backup version. This will only remove meta data and you will have to cleanup after this.")
    p.add_argument('version_uid')
    p.set_defaults(func='rm')

    # SCRUB
    p = subparsers.add_parser(
        'scrub',
        help="Scrub a given backup and check for consistency.")
    p.add_argument('-s', '--source', default=None,
        help="Source, optional. If given, check if source matches backup in addition to checksum tests.")
    p.add_argument('-p', '--percentile', default=100,
        help="Only check PERCENTILE percent of the blocks (value 0..100). Default: 100")
    p.add_argument('version_uid')
    p.set_defaults(func='scrub')

    # Export
    p = subparsers.add_parser(
        'export',
        help="Export the metadata of a backup uid into a file.")
    p.add_argument('version_uid')
    p.add_argument('filename', help="Export into this filename ('-' is for stdout)")
    p.set_defaults(func='export')

    # Import
    p = subparsers.add_parser(
        'import',
        help="Import the metadata of a backup from a file.")
    p.add_argument('filename', help="Read from this file ('-' is for stdin)")
    p.set_defaults(func='import_')

    # CLEANUP
    p = subparsers.add_parser(
        'cleanup',
        help="Clean unreferenced blobs.")
    p.add_argument(
        '-f', '--full', action='store_true', default=False,
        help='Do a full cleanup. This will read the full metadata from the data backend (i.e. backup storage) '
             'and compare it to the metadata in the meta backend. Unused data will then be deleted. '
             'This is a slow, but complete process. A full cleanup must not be run parallel to ANY other backy '
             'jobs.')
    p.add_argument(
        '-p', '--prefix', default=None,
        help='If you perform a full cleanup, you may add --prefix to only cleanup block uids starting '
             'with this prefix. This is for iterative cleanups. Example: '
             'cleanup --full --prefix=a')
    p.set_defaults(func='cleanup')

    # LS
    p = subparsers.add_parser(
        'ls',
        help="List existing backups.")
    p.add_argument('version_uid', nargs='?', default=None, help='Show verbose blocks for this version')
    p.set_defaults(func='ls')

    # STATS
    p = subparsers.add_parser(
        'stats',
        help="Show statistics")
    p.add_argument('version_uid', nargs='?', default=None, help='Show statistics for this version')
    p.add_argument('-l', '--limit', default=None,
            help="Limit output to this number (default: unlimited)")
    p.set_defaults(func='stats')

    # diff-meta
    p = subparsers.add_parser(
        'diff-meta',
        help="Output a diff between two versions")
    p.add_argument('version_uid1', help='Left version')
    p.add_argument('version_uid2', help='Right version')
    p.set_defaults(func='diff_meta')

    # NBD
    p = subparsers.add_parser(
        'nbd',
        help="Start an nbd server")
    p.add_argument('version_uid', nargs='?', default=None, help='Start an nbd server for this version')
    p.add_argument('-a', '--bind-address', default='127.0.0.1',
            help="Bind to this ip address (default: 127.0.0.1)")
    p.add_argument('-p', '--bind-port', default=10809,
            help="Bind to this port (default: 10809)")
    p.add_argument(
        '-r', '--read-only', action='store_true', default=False,
        help='Read only if set, otherwise a copy on write backup is created.')
    p.set_defaults(func='nbd')

    args = parser.parse_args()

    if args.version:
        print(__version__)
        sys.exit(0)

    if not hasattr(args, 'func'):
        parser.print_usage()
        sys.exit(1)

    if args.verbose:
        console_level = logging.DEBUG
    #elif args.func == 'scheduler':
        #console_level = logging.INFO
    else:
        console_level = logging.INFO

    Config = partial(_Config, conf_name='backy')
    config = Config(section='DEFAULTS')
    init_logging(config.get('logfile'), console_level)

    commands = Commands(args.machine_output, Config)
    func = getattr(commands, args.func)

    # Pass over to function
    func_args = dict(args._get_kwargs())
    del func_args['func']
    del func_args['verbose']
    del func_args['version']
    del func_args['machine_output']

    try:
        logger.debug('backup.{0}(**{1!r})'.format(args.func, func_args))
        func(**func_args)
        logger.info('Backy complete.\n')
        sys.exit(0)
    except Exception as e:
        logger.error('Unexpected exception')
        logger.exception(e)
        logger.info('Backy failed.\n')
        sys.exit(100)
Example #24
0
 def nbd(self, version_uid, bind_address, bind_port, read_only):
     try:
         from backy2.enterprise.nbdserver import Server as NbdServer
         from backy2.enterprise.nbd import BackyStore
     except ImportError:
         logger.error('NBD is available in the Enterprise Version only.')
         sys.exit(21)
     backy = self.backy()
     config_NBD = self.Config(section='NBD')
     config_DEFAULTS = self.Config(section='DEFAULTS')
     hash_function = getattr(hashlib, config_DEFAULTS.get('hash_function', 'sha512'))
     store = BackyStore(
             backy, cachedir=config_NBD.get('cachedir'),
             hash_function=hash_function,
             )
     addr = (bind_address, bind_port)
     server = NbdServer(addr, store, read_only)
     logger.info("Starting to serve nbd on %s:%s" % (addr[0], addr[1]))
     logger.info("You may now start")
     logger.info("  nbd-client -l %s -p %s" % (addr[0], addr[1]))
     logger.info("and then get the backup via")
     logger.info("  modprobe nbd")
     logger.info("  nbd-client -N <version> %s -p %s /dev/nbd0" % (addr[0], addr[1]))
     server.serve_forever()
Example #25
0
    def backup(self, name, snapshot_name, source, hints, from_version, tag=None, expire=None, continue_version=None):
        """ Create a backup from source.
        If hints are given, they must be tuples of (offset, length, exists)
        where offset and length are integers and exists is a boolean. Then, only
        data within hints will be backed up.
        Otherwise, the backup reads source and looks if checksums match with
        the target.
        If continue_version is given, this version will be continued, i.e.
        existing blocks will not be read again.
        """
        stats = {
                'version_size_bytes': 0,
                'version_size_blocks': 0,
                'bytes_checked': 0,
                'blocks_checked': 0,
                'bytes_read': 0,
                'blocks_read': 0,
                'bytes_written': 0,
                'blocks_written': 0,
                'bytes_found_dedup': 0,
                'blocks_found_dedup': 0,
                'bytes_sparse': 0,
                'blocks_sparse': 0,
                'bytes_throughput': 0,
                'blocks_throughput': 0,
                'start_time': time.time(),
            }
        io = self.get_io_by_source(source)
        io.open_r(source)
        source_size = io.size()

        size = math.ceil(source_size / self.block_size)
        stats['version_size_bytes'] = source_size
        stats['version_size_blocks'] = size

        # Sanity check: check hints for validity, i.e. too high offsets, ...
        if hints is not None and len(hints) > 0:
            max_offset = max([h[0]+h[1] for h in hints])
            if max_offset > source_size:
                raise ValueError('Hints have higher offsets than source file.')

        # Find out which blocks to read
        if hints is not None:
            sparse_blocks = blocks_from_hints([hint for hint in hints if not hint[2]], self.block_size)
            read_blocks = blocks_from_hints([hint for hint in hints if hint[2]], self.block_size)
        else:
            sparse_blocks = []
            read_blocks = range(size)
        sparse_blocks = set(sparse_blocks)
        read_blocks = set(read_blocks)

        # Validity check
        if from_version:
            old_version = self.meta_backend.get_version(from_version)  # raise if not exists
            if not old_version.valid:
                raise RuntimeError('You cannot base on an invalid version.')

        existing_block_ids = set()
        if continue_version:
            version_uid = continue_version
            _v = self.meta_backend.get_version(version_uid)  # raise if version does not exist
            if _v.size_bytes != source_size:
                raise ValueError('Version to continue backup from has a different size than the source. Cannot continue.')
            if _v.valid:
                raise ValueError('You cannot continue a valid version.')
            # reduce read_blocks and sparse_blocks by existing blocks
            existing_block_ids = set(self.meta_backend.get_block_ids_by_version(version_uid))
            read_blocks = read_blocks - existing_block_ids
            sparse_blocks = sparse_blocks - existing_block_ids
        else:
            # Create new version
            version_uid = self.meta_backend.set_version(name, snapshot_name, size, source_size, 0)  # initially marked invalid
            if not self.locking.lock(version_uid):
                raise LockError('Version {} is locked.'.format(version_uid))

        # Sanity check:
        # Check some blocks outside of hints if they are the same in the
        # from_version backup and in the current backup. If they
        # don't, either hints are wrong (e.g. from a wrong snapshot diff)
        # or source doesn't match. In any case, the resulting backup won't
        # be good.
        check_block_ids = set()
        if from_version and hints:
            ignore_blocks = list(set(range(size)) - read_blocks - sparse_blocks)
            random.shuffle(ignore_blocks)
            num_check_blocks = 10
            # 50% from the start
            check_block_ids = ignore_blocks[:num_check_blocks//2]
            # and 50% from random locations
            check_block_ids = set(check_block_ids + random.sample(ignore_blocks, num_check_blocks//2))

        # Find blocks to base on
        if from_version:
            # Make sure we're based on a valid version.
            old_blocks = iter(self.meta_backend.get_blocks_by_version(from_version).yield_per(1000))
        else:
            old_blocks = iter([])

        # Create read jobs
        _log_every_jobs = size // 200 + 1  # about every half percent
        _log_jobs_counter = 0
        t1 = time.time()
        t_last_run = 0
        for block_id in range(size):
            _log_jobs_counter -= 1
            # Create a block, either based on an old one (from_version) or a fresh one
            _have_old_block = False
            try:
                old_block = next(old_blocks)
            except StopIteration:  # No old block found, we create a fresh one
                block_uid = None
                checksum = None
                block_size = self.block_size
                valid = 1
            else:  # Old block found, maybe base on that one
                assert old_block.id == block_id
                block_uid = old_block.uid
                checksum = old_block.checksum
                block_size = old_block.size
                valid = old_block.valid
                _have_old_block = True
            # the last block can differ in size, so let's check
            _offset = block_id * self.block_size
            new_block_size = min(self.block_size, source_size - _offset)
            if new_block_size != block_size:
                # last block changed, so set back all info
                block_size = new_block_size
                block_uid = None
                checksum = None
                valid = 1
                _have_old_block = False

            # Build list of blocks to be read or skipped
            # Read (read_blocks, check_block_ids or block is invalid) or not?
            if block_id in read_blocks:
                logger.debug('Block {}: Reading'.format(block_id))
                io.read(block_id, read=True)
            elif block_id in check_block_ids and _have_old_block and checksum:
                logger.debug('Block {}: Reading / checking'.format(block_id))
                io.read(block_id, read=True, metadata={'check': True, 'checksum': checksum, 'block_size': block_size})
            elif not valid:
                logger.debug('Block {}: Reading because not valid'.format(block_id))
                io.read(block_id, read=True)
                assert _have_old_block
            elif block_id in sparse_blocks:
                logger.debug('Block {}: Sparse'.format(block_id))
                # Sparse blocks have uid and checksum None.
                io.read(block_id, read=False, metadata={'block_uid': None, 'checksum': None, 'block_size': block_size})
            elif block_id in existing_block_ids:
                logger.debug('Block {}: Exists in continued version'.format(block_id))
                io.read(block_id, read=False, metadata={'skip': True})
            else:
                logger.debug('Block {}: Fresh empty or existing'.format(block_id))
                io.read(block_id, read=False, metadata={'block_uid': block_uid, 'checksum': checksum, 'block_size': block_size})

            # log and process output
            if time.time() - t_last_run >= 1:
                t_last_run = time.time()
                t2 = time.time()
                dt = t2-t1
                logger.debug(io.thread_status() + " " + self.data_backend.thread_status())

                io_queue_status = io.queue_status()
                db_queue_status = self.data_backend.queue_status()
                _status = status(
                    'Backing up (1/2: Prep) {}'.format(source),
                    0,
                    0,
                    (block_id + 1) / size * 100,
                    0.0,
                    round(size / (block_id+1) * dt - dt),
                    )
                notify(self.process_name, _status)
                if _log_jobs_counter <= 0:
                    _log_jobs_counter = _log_every_jobs
                    logger.info(_status)


        # now use the readers and write
        _log_every_jobs = size // 200 + 1  # about every half percent
        _log_jobs_counter = 0
        t1 = time.time()
        t_last_run = 0

        _written_blocks_queue = queue.Queue()  # contains ONLY blocks that have been written to the data backend.

        # consume the read jobs
        for i in range(size):
            _log_jobs_counter -= 1
            block_id, data, data_checksum, metadata = io.get()

            if data:
                block_size = len(data)
                stats['blocks_read'] += 1
                stats['bytes_read'] += block_size
                stats['blocks_throughput'] += 1
                stats['bytes_throughput'] += block_size

                existing_block = None
                if self.dedup:
                    existing_block = self.meta_backend.get_block_by_checksum(data_checksum)

                if data == b'\0' * block_size:
                    block_uid = None
                    data_checksum = None
                    _written_blocks_queue.put((block_id, version_uid, block_uid, data_checksum, block_size))
                elif existing_block and existing_block.size == block_size:
                    block_uid = existing_block.uid
                    _written_blocks_queue.put((block_id, version_uid, block_uid, data_checksum, block_size))
                else:
                    try:
                        # This is the whole reason for _written_blocks_queue. We must first write the block to
                        # the backup data store before we write it to the database. Otherwise we can't continue
                        # reliably.
                        def callback(local_block_id, local_version_uid, local_data_checksum, local_block_size):
                            def f(_block_uid):
                                _written_blocks_queue.put((local_block_id, local_version_uid, _block_uid, local_data_checksum, local_block_size))
                            return f
                        block_uid = self.data_backend.save(data, callback=callback(block_id, version_uid, data_checksum, block_size))  # this will re-raise an exception from a worker thread
                    except Exception as e:
                        break  # close anything as always.
                    stats['blocks_written'] += 1
                    stats['bytes_written'] += block_size

                if metadata and 'check' in metadata:
                    # Perform sanity check
                    if not metadata['checksum'] == data_checksum or not metadata['block_size'] == block_size:
                        logger.error("Source and backup don't match in regions outside of the hints.")
                        logger.error("Looks like the hints don't match or the source is different.")
                        logger.error("Found wrong source data at block {}: offset {} with max. length {}".format(
                            block_id,
                            block_id * self.block_size,
                            block_size
                            ))
                        # remove version
                        self.meta_backend.rm_version(version_uid)
                        sys.exit(5)
                    stats['blocks_checked'] += 1
                    stats['bytes_checked'] += block_size
            else:
                # No data means that this block is from the previous version or is empty as of the hints, so just store metadata.
                # Except it's a skipped block from a continued version.
                if not 'skip' in metadata:
                    block_uid = metadata['block_uid']
                    data_checksum = metadata['checksum']
                    block_size = metadata['block_size']
                    _written_blocks_queue.put((block_id, version_uid, block_uid, data_checksum, block_size))
                    if metadata['block_uid'] is None:
                        stats['blocks_sparse'] += 1
                        stats['bytes_sparse'] += block_size
                    else:
                        stats['blocks_found_dedup'] += 1
                        stats['bytes_found_dedup'] += block_size

            # Set the blocks from the _written_blocks_queue
            while True:
                try:
                    q_block_id, q_version_uid, q_block_uid, q_data_checksum, q_block_size = _written_blocks_queue.get(block=False)
                except queue.Empty:
                    break
                else:
                    self.meta_backend.set_block(q_block_id, q_version_uid, q_block_uid, q_data_checksum, q_block_size, valid=1, _commit=True, _upsert=False)

            # log and process output
            if time.time() - t_last_run >= 1:
                t_last_run = time.time()
                t2 = time.time()
                dt = t2-t1
                logger.debug(io.thread_status() + " " + self.data_backend.thread_status())

                io_queue_status = io.queue_status()
                db_queue_status = self.data_backend.queue_status()
                _status = status(
                    'Backing up (2/2: Data) {}'.format(source),
                    io_queue_status['rq_filled']*100,
                    db_queue_status['wq_filled']*100,
                    (i + 1) / size * 100,
                    stats['bytes_throughput'] / dt,
                    round(size / (i+1) * dt - dt),
                    )
                notify(self.process_name, _status)
                if _log_jobs_counter <= 0:
                    _log_jobs_counter = _log_every_jobs
                    logger.info(_status)

        # check if there are any exceptions left
        if self.data_backend.last_exception:
            logger.error("Exception during saving to the data backend: {}".format(str(self.data_backend.last_exception)))
        else:
            io.close()  # wait for all readers
            self.data_backend.close()  # wait for all writers

        # Set the rest of the blocks from the _written_blocks_queue
        while True:
            try:
                q_block_id, q_version_uid, q_block_uid, q_data_checksum, q_block_size = _written_blocks_queue.get(block=False)
            except queue.Empty:
                break
            else:
                self.meta_backend.set_block(q_block_id, q_version_uid, q_block_uid, q_data_checksum, q_block_size, valid=1, _commit=True, _upsert=False)

        tags = []
        if tag is not None:
            if isinstance(tag, list):
                tags = tag
            else:
                tags.append(tag)
        else:
            if not continue_version:
                tags = self._generate_auto_tags(name)
        for tag in tags:
            self.meta_backend.add_tag(version_uid, tag)

        if expire:
            self.meta_backend.expire_version(version_uid, expire)

        self.meta_backend.set_stats(
            version_uid=version_uid,
            version_name=name,
            version_size_bytes=stats['version_size_bytes'],
            version_size_blocks=stats['version_size_blocks'],
            bytes_read=stats['bytes_read'],
            blocks_read=stats['blocks_read'],
            bytes_written=stats['bytes_written'],
            blocks_written=stats['blocks_written'],
            bytes_found_dedup=stats['bytes_found_dedup'],
            blocks_found_dedup=stats['blocks_found_dedup'],
            bytes_sparse=stats['bytes_sparse'],
            blocks_sparse=stats['blocks_sparse'],
            duration_seconds=int(time.time() - stats['start_time']),
            )

        if self.data_backend.last_exception:
            logger.info('New invalid version: {} (Tags: [{}])'.format(version_uid, ','.join(tags)))
        else:
            self.meta_backend.set_version_valid(version_uid)
            logger.info('New version: {} (Tags: [{}])'.format(version_uid, ','.join(tags)))

        self.meta_backend._commit()
        self.locking.unlock(version_uid)

        if self.data_backend.last_exception:
            sys.exit(6)  # i.e. kill all the remaining workers

        return version_uid
Example #26
0
    def restore(self, version_uid, target, sparse=False, force=False, continue_from=0):
        # See if the version is locked, i.e. currently in backup
        if not self.locking.lock(version_uid):
            raise LockError('Version {} is locked.'.format(version_uid))
        self.locking.unlock(version_uid)  # no need to keep it locked

        stats = {
                'bytes_read': 0,
                'blocks_read': 0,
                'bytes_written': 0,
                'blocks_written': 0,
                'bytes_throughput': 0,
                'blocks_throughput': 0,
                'bytes_sparse': 0,
                'blocks_sparse': 0,
            }

        version = self.meta_backend.get_version(version_uid)  # raise if version does not exist
        if continue_from:
            notify(self.process_name, 'Restoring Version {} from block id'.format(version_uid, continue_from))
        else:
            notify(self.process_name, 'Restoring Version {}'.format(version_uid))
        blocks = self.meta_backend.get_blocks_by_version(version_uid)
        num_blocks = blocks.count()

        io = self.get_io_by_source(target)
        io.open_w(target, version.size_bytes, force)

        read_jobs = 0
        _log_every_jobs = num_blocks // 200 + 1  # about every half percent
        _log_jobs_counter = 0
        t1 = time.time()
        t_last_run = 0
        num_blocks = blocks.count()
        for i, block in enumerate(blocks.yield_per(1000)):
            if block.id < continue_from:
                continue
            _log_jobs_counter -= 1
            if block.uid:
                self.data_backend.read(block.deref())  # adds a read job
                read_jobs += 1
            elif not sparse:
                io.write(block, b'\0'*block.size)
                stats['blocks_written'] += 1
                stats['bytes_written'] += block.size
                stats['blocks_throughput'] += 1
                stats['bytes_throughput'] += block.size
                logger.debug('Restored sparse block {} successfully ({} bytes).'.format(
                    block.id,
                    block.size,
                    ))
            else:
                stats['blocks_sparse'] += 1
                stats['bytes_sparse'] += block.size
                logger.debug('Ignored sparse block {}.'.format(
                    block.id,
                    ))

            if time.time() - t_last_run >= 1:
                t_last_run = time.time()
                t2 = time.time()
                dt = t2-t1
                logger.debug(io.thread_status() + " " + self.data_backend.thread_status())

                io_queue_status = io.queue_status()
                db_queue_status = self.data_backend.queue_status()
                _status = status(
                    'Restore phase 1/2 (sparse) to {}'.format(target),
                    db_queue_status['rq_filled']*100,
                    io_queue_status['wq_filled']*100,
                    (i + 1) / num_blocks * 100,
                    stats['bytes_throughput'] / dt,
                    round(num_blocks / (i+1) * dt - dt),
                    )
                notify(self.process_name, _status)
                if _log_jobs_counter <= 0:
                    _log_jobs_counter = _log_every_jobs
                    logger.info(_status)

        stats = {
                'bytes_read': 0,
                'blocks_read': 0,
                'bytes_written': 0,
                'blocks_written': 0,
                'bytes_sparse': 0,
                'blocks_sparse': 0,
                'bytes_throughput': 0,
                'blocks_throughput': 0,
            }
        _log_every_jobs = read_jobs // 200 + 1  # about every half percent
        _log_jobs_counter = 0
        t1 = time.time()
        t_last_run = 0
        min_sequential_block_id = MinSequential(continue_from)  # for finding the minimum block-ID until which we have restored ALL blocks
        for i in range(read_jobs):
            _log_jobs_counter -= 1
            try:
                while True:
                    try:
                        block, offset, length, data = self.data_backend.read_get(timeout=.1)
                    except queue.Empty:  # timeout occured
                        continue
                    else:
                        break
            except Exception as e:
                # TODO (restore): write information for continue
                logger.error("Exception during reading from the data backend: {}".format(str(e)))
                sys.exit(6)
            assert len(data) == block.size
            stats['blocks_read'] += 1
            stats['bytes_read'] += block.size

            data_checksum = self.hash_function(data).hexdigest()
            def callback(local_block_id):
                def f():
                    min_sequential_block_id.put(local_block_id)
                    stats['blocks_written'] += 1
                    stats['bytes_written'] += block.size
                    stats['blocks_throughput'] += 1
                    stats['bytes_throughput'] += block.size
                return f
            io.write(block, data, callback(block.id))

            if data_checksum != block.checksum:
                logger.error('Checksum mismatch during restore for block '
                    '{} (is: {} should-be: {}, block-valid: {}). Block '
                    'restored is invalid. Continuing.'.format(
                        block.id,
                        data_checksum,
                        block.checksum,
                        block.valid,
                        ))
                self.meta_backend.set_blocks_invalid(block.uid, block.checksum)
            else:
                logger.debug('Restored block {} successfully ({} bytes).'.format(
                    block.id,
                    block.size,
                    ))

            if time.time() - t_last_run >= 1:
                t_last_run = time.time()
                t2 = time.time()
                dt = t2-t1
                logger.debug(io.thread_status() + " " + self.data_backend.thread_status())

                io_queue_status = io.queue_status()
                db_queue_status = self.data_backend.queue_status()
                _status = status(
                    'Restore phase 2/2 (data) to {}'.format(target),
                    db_queue_status['rq_filled']*100,
                    io_queue_status['wq_filled']*100,
                    (i + 1) / read_jobs * 100,
                    stats['bytes_throughput'] / dt,
                    round(read_jobs / (i+1) * dt - dt),
                    'Last ID: {}'.format(min_sequential_block_id.get()),
                    )
                notify(self.process_name, _status)
                if _log_jobs_counter <= 0:
                    _log_jobs_counter = _log_every_jobs
                    logger.info(_status)


        self.locking.unlock(version_uid)
        io.close()
Example #27
0
def main():
    parser = argparse.ArgumentParser(
        description='Backup and restore for block devices.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument(
        '-v', '--verbose', action='store_true', help='verbose output')
    parser.add_argument(
        '-m', '--machine-output', action='store_true', default=False)
    parser.add_argument(
        '-V', '--version', action='store_true', help='Show version')

    subparsers = parser.add_subparsers()

    # BACKUP
    p = subparsers.add_parser(
        'backup',
        help="Perform a backup.")
    p.add_argument(
        'source',
        help='Source (url-like, e.g. file:///dev/sda or rbd://pool/imagename@snapshot)')
    p.add_argument(
        'name',
        help='Backup name')
    p.add_argument('-r', '--rbd', default=None, help='Hints as rbd json format')
    p.add_argument('-f', '--from-version', default=None, help='Use this version-uid as base')
    p.set_defaults(func='backup')

    # RESTORE
    p = subparsers.add_parser(
        'restore',
        help="Restore a given backup to a given target.")
    p.add_argument('-s', '--sparse', action='store_true', help='Faster. Restore '
        'only existing blocks (works only with file- and rbd-restore, not with lvm)')
    p.add_argument('-f', '--force', action='store_true', help='Force overwrite of existing files/devices/images')
    p.add_argument('version_uid')
    p.add_argument('target',
        help='Source (url-like, e.g. file:///dev/sda or rbd://pool/imagename)')
    p.set_defaults(func='restore')

    # PROTECT
    p = subparsers.add_parser(
        'protect',
        help="Protect a backup version. Protected versions cannot be removed.")
    p.add_argument('version_uid')
    p.set_defaults(func='protect')

    # UNPROTECT
    p = subparsers.add_parser(
        'unprotect',
        help="Unprotect a backup version. Unprotected versions can be removed.")
    p.add_argument('version_uid')
    p.set_defaults(func='unprotect')

    # RM
    p = subparsers.add_parser(
        'rm',
        help="Remove a given backup version. This will only remove meta data and you will have to cleanup after this.")
    p.add_argument('-f', '--force', action='store_true', help="Force removal of version, even if it's younger than the configured disallow_rm_when_younger_than_days.")
    p.add_argument('version_uid')
    p.set_defaults(func='rm')

    # SCRUB
    p = subparsers.add_parser(
        'scrub',
        help="Scrub a given backup and check for consistency.")
    p.add_argument('-s', '--source', default=None,
        help="Source, optional. If given, check if source matches backup in addition to checksum tests. url-like format as in backup.")
    p.add_argument('-p', '--percentile', default=100,
        help="Only check PERCENTILE percent of the blocks (value 0..100). Default: 100")
    p.add_argument('version_uid')
    p.set_defaults(func='scrub')

    # Export
    p = subparsers.add_parser(
        'export',
        help="Export the metadata of a backup uid into a file.")
    p.add_argument('version_uid')
    p.add_argument('filename', help="Export into this filename ('-' is for stdout)")
    p.set_defaults(func='export')

    # Import
    p = subparsers.add_parser(
        'import',
        help="Import the metadata of a backup from a file.")
    p.add_argument('filename', help="Read from this file ('-' is for stdin)")
    p.set_defaults(func='import_')

    # CLEANUP
    p = subparsers.add_parser(
        'cleanup',
        help="Clean unreferenced blobs.")
    p.add_argument(
        '-f', '--full', action='store_true', default=False,
        help='Do a full cleanup. This will read the full metadata from the data backend (i.e. backup storage) '
             'and compare it to the metadata in the meta backend. Unused data will then be deleted. '
             'This is a slow, but complete process. A full cleanup must not be run parallel to ANY other backy '
             'jobs.')
    p.add_argument(
        '-p', '--prefix', default=None,
        help='If you perform a full cleanup, you may add --prefix to only cleanup block uids starting '
             'with this prefix. This is for iterative cleanups. Example: '
             'cleanup --full --prefix=a')
    p.set_defaults(func='cleanup')

    # LS
    p = subparsers.add_parser(
        'ls',
        help="List existing backups.")
    p.add_argument('version_uid', nargs='?', default=None, help='Show verbose blocks for this version')
    p.set_defaults(func='ls')

    # STATS
    p = subparsers.add_parser(
        'stats',
        help="Show statistics")
    p.add_argument('version_uid', nargs='?', default=None, help='Show statistics for this version')
    p.add_argument('-l', '--limit', default=None,
            help="Limit output to this number (default: unlimited)")
    p.set_defaults(func='stats')

    # diff-meta
    p = subparsers.add_parser(
        'diff-meta',
        help="Output a diff between two versions")
    p.add_argument('version_uid1', help='Left version')
    p.add_argument('version_uid2', help='Right version')
    p.set_defaults(func='diff_meta')

    # NBD
    p = subparsers.add_parser(
        'nbd',
        help="Start an nbd server")
    p.add_argument('version_uid', nargs='?', default=None, help='Start an nbd server for this version')
    p.add_argument('-a', '--bind-address', default='127.0.0.1',
            help="Bind to this ip address (default: 127.0.0.1)")
    p.add_argument('-p', '--bind-port', default=10809,
            help="Bind to this port (default: 10809)")
    p.add_argument(
        '-r', '--read-only', action='store_true', default=False,
        help='Read only if set, otherwise a copy on write backup is created.')
    p.set_defaults(func='nbd')

    args = parser.parse_args()

    if args.version:
        print(__version__)
        sys.exit(0)

    if not hasattr(args, 'func'):
        parser.print_usage()
        sys.exit(1)

    if args.verbose:
        console_level = logging.DEBUG
    #elif args.func == 'scheduler':
        #console_level = logging.INFO
    else:
        console_level = logging.INFO

    Config = partial(_Config, conf_name='backy')
    config = Config(section='DEFAULTS')
    init_logging(config.get('logfile'), console_level)

    commands = Commands(args.machine_output, Config)
    func = getattr(commands, args.func)

    # Pass over to function
    func_args = dict(args._get_kwargs())
    del func_args['func']
    del func_args['verbose']
    del func_args['version']
    del func_args['machine_output']

    try:
        logger.debug('backup.{0}(**{1!r})'.format(args.func, func_args))
        func(**func_args)
        logger.info('Backy complete.\n')
        sys.exit(0)
    except Exception as e:
        logger.error('Unexpected exception')
        logger.exception(e)
        logger.info('Backy failed.\n')
        sys.exit(100)
Example #28
0
def main():
    parser = argparse.ArgumentParser(
        description='Backup and restore for block devices.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='verbose output')
    parser.add_argument('-d',
                        '--debug',
                        action='store_true',
                        help='debug output')
    parser.add_argument('-m',
                        '--machine-output',
                        action='store_true',
                        default=False)
    parser.add_argument('-s',
                        '--skip-header',
                        action='store_true',
                        default=False)
    parser.add_argument('-r',
                        '--human-readable',
                        action='store_true',
                        default=False)
    parser.add_argument('-V',
                        '--version',
                        action='store_true',
                        help='Show version')
    parser.add_argument('-c', '--configfile', default=None, type=str)

    subparsers = parser.add_subparsers()

    # INITDB
    p = subparsers.add_parser(
        'initdb',
        help=
        "Initialize the database by populating tables. This will not delete tables or data if they exist."
    )
    p.set_defaults(func='initdb')

    # BACKUP
    p = subparsers.add_parser('backup', help="Perform a backup.")
    p.add_argument(
        'source',
        help=
        'Source (url-like, e.g. file:///dev/sda or rbd://pool/imagename@snapshot)'
    )
    p.add_argument('name', help='Backup name (e.g. the hostname)')
    p.add_argument('-s',
                   '--snapshot-name',
                   default='',
                   help='Snapshot name (e.g. the name of the rbd snapshot)')
    p.add_argument('-r',
                   '--rbd',
                   default=None,
                   help='Hints as rbd json format')
    p.add_argument('-f',
                   '--from-version',
                   default=None,
                   help='Use this version-uid as base')
    p.add_argument('-c',
                   '--continue-version',
                   default=None,
                   help='Continue backup on this version-uid')
    p.add_argument(
        '-t',
        '--tag',
        default=None,
        help=
        'Use a specific tag (or multiple comma-separated tags) for the target backup version-uid'
    )
    p.add_argument(
        '-e',
        '--expire',
        default='',
        help='Expiration date (yyyy-mm-dd or "yyyy-mm-dd HH-MM-SS") (optional)'
    )
    p.set_defaults(func='backup')

    # RESTORE
    p = subparsers.add_parser('restore',
                              help="Restore a given backup to a given target.")
    p.add_argument(
        '-s',
        '--sparse',
        action='store_true',
        help='Faster. Restore '
        'only existing blocks (works only with file- and rbd-restore, not with lvm)'
    )
    p.add_argument('-f',
                   '--force',
                   action='store_true',
                   help='Force overwrite of existing files/devices/images')
    p.add_argument(
        '-c',
        '--continue-from',
        default=0,
        help=
        'Continue from this block (only use this for partially failed restores!)'
    )
    p.add_argument('version_uid')
    p.add_argument(
        'target',
        help='Source (url-like, e.g. file:///dev/sda or rbd://pool/imagename)')
    p.set_defaults(func='restore')

    # PROTECT
    p = subparsers.add_parser(
        'protect',
        help="Protect a backup version. Protected versions cannot be removed.")
    p.add_argument('version_uid')
    p.set_defaults(func='protect')

    # UNPROTECT
    p = subparsers.add_parser(
        'unprotect',
        help="Unprotect a backup version. Unprotected versions can be removed."
    )
    p.add_argument('version_uid')
    p.set_defaults(func='unprotect')

    # RM
    p = subparsers.add_parser(
        'rm',
        help=
        "Remove a given backup version. This will only remove meta data and you will have to cleanup after this."
    )
    p.add_argument(
        '-f',
        '--force',
        action='store_true',
        help=
        "Force removal of version, even if it's younger than the configured disallow_rm_when_younger_than_days."
    )
    p.add_argument('version_uid')
    p.set_defaults(func='rm')

    # SCRUB
    p = subparsers.add_parser(
        'scrub', help="Scrub a given backup and check for consistency.")
    p.add_argument(
        '-s',
        '--source',
        default=None,
        help=
        "Source, optional. If given, check if source matches backup in addition to checksum tests. url-like format as in backup."
    )
    p.add_argument(
        '-p',
        '--percentile',
        default=100,
        help=
        "Only check PERCENTILE percent of the blocks (value 0..100). Default: 100"
    )
    p.add_argument('version_uid')
    p.set_defaults(func='scrub')

    # Export
    p = subparsers.add_parser(
        'export', help="Export the metadata of a backup uid into a file.")
    p.add_argument('version_uid')
    p.add_argument('filename',
                   help="Export into this filename ('-' is for stdout)")
    p.set_defaults(func='export')

    # Import
    p = subparsers.add_parser(
        'import', help="Import the metadata of a backup from a file.")
    p.add_argument('filename', help="Read from this file ('-' is for stdin)")
    p.set_defaults(func='import_')

    # CLEANUP
    p = subparsers.add_parser('cleanup', help="Clean unreferenced blobs.")
    p.add_argument(
        '-f',
        '--full',
        action='store_true',
        default=False,
        help=
        'Do a full cleanup. This will read the full metadata from the data backend (i.e. backup storage) '
        'and compare it to the metadata in the meta backend. Unused data will then be deleted. '
        'This is a slow, but complete process. A full cleanup must not be run parallel to ANY other backy '
        'jobs.')
    p.add_argument(
        '-p',
        '--prefix',
        default=None,
        help=
        'If you perform a full cleanup, you may add --prefix to only cleanup block uids starting '
        'with this prefix. This is for iterative cleanups. Example: '
        'cleanup --full --prefix=a')
    p.add_argument(
        '--dangerous-force',
        action='store_true',
        default=False,
        help='Seriously, do not use this outside of testing and development.')
    p.set_defaults(func='cleanup')

    # LS
    p = subparsers.add_parser('ls', help="List existing backups.")
    p.add_argument('name',
                   nargs='?',
                   default=None,
                   help='Show versions for this name only')
    p.add_argument('-s',
                   '--snapshot-name',
                   default=None,
                   help="Limit output to this snapshot name")
    p.add_argument('-t',
                   '--tag',
                   default=None,
                   help="Limit output to this tag")
    p.add_argument('-e',
                   '--expired',
                   action='store_true',
                   default=False,
                   help="Only list expired versions (expired < now)")
    p.add_argument(
        '-f',
        '--fields',
        default=
        "date,name,snapshot_name,size,size_bytes,uid,valid,protected,tags,expire",
        help=
        "Show these fields (comma separated). Available: date,name,snapshot_name,size,size_bytes,uid,valid,protected,tags,expire"
    )

    p.set_defaults(func='ls')

    # STATS
    p = subparsers.add_parser('stats', help="Show statistics")
    p.add_argument('version_uid',
                   nargs='?',
                   default=None,
                   help='Show statistics for this version')
    p.add_argument(
        '-f',
        '--fields',
        default=
        "date,uid,name,size bytes,size blocks,bytes read,blocks read,bytes written,blocks written,bytes dedup,blocks dedup,bytes sparse,blocks sparse,duration (s)",
        help=
        "Show these fields (comma separated). Available: date,uid,name,size bytes,size blocks,bytes read,blocks read,bytes written,blocks written,bytes dedup,blocks dedup,bytes sparse,blocks sparse,duration (s)"
    )
    p.add_argument('-l',
                   '--limit',
                   default=None,
                   help="Limit output to this number (default: unlimited)")
    p.set_defaults(func='stats')

    # diff-meta
    p = subparsers.add_parser('diff-meta',
                              help="Output a diff between two versions")
    p.add_argument('version_uid1', help='Left version')
    p.add_argument('version_uid2', help='Right version')
    p.set_defaults(func='diff_meta')

    # disk usage
    p = subparsers.add_parser(
        'du', help="Get disk usage for a version or for all versions")
    p.add_argument('version_uid',
                   nargs='?',
                   default=None,
                   help='Show disk usage for this version')
    p.add_argument(
        '-f',
        '--fields',
        default=
        "Real,Null,Dedup Own,Dedup Others,Individual,Est. Space,Est. Space freed",
        help=
        "Show these fields (comma separated). Available: Real,Null,Dedup Own,Dedup Others,Individual,Est. Space,Est. Space freed)"
    )
    p.set_defaults(func='du')

    # FUSE
    p = subparsers.add_parser('fuse', help="Fuse mount backy backups")
    p.add_argument('mount', help='Mountpoint')
    p.set_defaults(func='fuse')

    # Re-Keying
    p = subparsers.add_parser(
        'rekey',
        help=
        "Re-Key all blocks in backy2 with a new key in the config. This will NOT encrypt unencrypted blocks or recrypt existing blocks."
    )
    p.add_argument('oldkey', help='The old key as it was found in the config')
    p.set_defaults(func='rekey')

    # Migrate encryption
    p = subparsers.add_parser(
        'migrate-encryption',
        help=
        "Create a new version with blocks migrated/encrypted to the latest encryption version."
    )
    p.add_argument('version_uid', help='The version uid to migrate')
    p.set_defaults(func='migrate_encryption')

    # ADD TAG
    p = subparsers.add_parser(
        'add-tag',
        help=
        "Add a named tag (or many comma-separated tags) to a backup version.")
    p.add_argument('version_uid')
    p.add_argument('name')
    p.set_defaults(func='add_tag')

    # REMOVE TAG
    p = subparsers.add_parser(
        'remove-tag',
        help=
        "Remove a named tag (or many comma-separated tags) from a backup version."
    )
    p.add_argument('version_uid')
    p.add_argument('name')
    p.set_defaults(func='remove_tag')

    # EXPIRE
    p = subparsers.add_parser(
        'expire',
        help=
        """Set expiration date for a backup version. Date format is yyyy-mm-dd or "yyyy-mm-dd HH:MM:SS" (e.g. 2020-01-23). HINT: Create with 'date +"%%Y-%%m-%%d" -d "today + 7 days"'"""
    )
    p.add_argument('version_uid')
    p.add_argument('expire')
    p.set_defaults(func='expire')

    # DUE
    p = subparsers.add_parser(
        'due',
        help=
        """Based on the schedulers in the config file, calculate the due backups including tags."""
    )
    p.add_argument(
        'name',
        nargs='?',
        default=None,
        help=
        'Show due backups for this version name (optional, if not given, show due backups for all names).'
    )
    p.add_argument(
        '-s',
        '--schedulers',
        default="daily,weekly,monthly",
        help=
        "Use these schedulers as defined in backy.cfg (default: daily,weekly,monthly)"
    )
    p.add_argument(
        '-f',
        '--fields',
        default="name,schedulers,expire_date,due_since",
        help=
        "Show these fields (comma separated). Available: name,schedulers,expire_date"
    )
    p.set_defaults(func='due')

    # SLA
    p = subparsers.add_parser(
        'sla',
        help=
        """Based on the schedulers in the config file, calculate the information about SLA."""
    )
    p.add_argument(
        'name',
        nargs='?',
        default=None,
        help=
        'Show SLA breaches for this version name (optional, if not given, show SLA breaches for all names).'
    )
    p.add_argument(
        '-s',
        '--schedulers',
        default="daily,weekly,monthly",
        help=
        "Use these schedulers as defined in backy.cfg (default: daily,weekly,monthly)"
    )
    p.add_argument(
        '-f',
        '--fields',
        default="name,breach",
        help="Show these fields (comma separated). Available: name,breach")
    p.set_defaults(func='sla')

    args = parser.parse_args()

    if args.version:
        print(__version__)
        sys.exit(0)

    if not hasattr(args, 'func'):
        parser.print_usage()
        sys.exit(1)

    if args.verbose:
        console_level = logging.DEBUG
    #elif args.func == 'scheduler':
    #console_level = logging.INFO
    else:
        console_level = logging.INFO

    if args.debug:
        debug = True
    else:
        debug = False

    if args.configfile is not None and args.configfile != '':
        try:
            cfg = open(args.configfile, 'r', encoding='utf-8').read()
        except FileNotFoundError:
            logger.error('File not found: {}'.format(args.configfile))
            sys.exit(1)
        Config = partial(_Config, cfg=cfg)
    else:
        Config = partial(_Config, conf_name='backy')
    config = Config(section='DEFAULTS')

    # logging ERROR only when machine output is selected
    if args.machine_output:
        init_logging(config.get('logfile'), logging.ERROR, debug)
    else:
        init_logging(config.get('logfile'), console_level, debug)

    commands = Commands(args.machine_output, args.skip_header,
                        args.human_readable, Config)
    func = getattr(commands, args.func)

    # Pass over to function
    func_args = dict(args._get_kwargs())
    del func_args['configfile']
    del func_args['func']
    del func_args['verbose']
    del func_args['debug']
    del func_args['version']
    del func_args['machine_output']
    del func_args['skip_header']
    del func_args['human_readable']

    try:
        logger.debug('backup.{0}(**{1!r})'.format(args.func, func_args))
        func(**func_args)
        logger.info('Backy complete.\n')
        sys.exit(0)
    except Exception as e:
        if args.debug:
            logger.error('Unexpected exception')
            logger.exception(e)
        else:
            logger.error(e)
        logger.error('Backy failed.\n')
        sys.exit(100)
Example #29
0
    def scrub(self, version_uid, source=None, percentile=100):
        """ Returns a boolean (state). If False, there were errors, if True
        all was ok
        """
        if not self.locking.lock(version_uid):
            raise LockError('Version {} is locked.'.format(version_uid))
        self.locking.unlock(version_uid)  # No need to keep it locked.

        stats = {
                'source_bytes_read': 0,
                'source_blocks_read': 0,
                'bytes_read': 0,
                'blocks_read': 0,
            }

        version = self.meta_backend.get_version(version_uid)  # raise if version not exists
        blocks = self.meta_backend.get_blocks_by_version(version_uid)

        # check if the backup is at least complete
        if blocks.count() != version.size:
            logger.error("Version is incomplete.")
            self.meta_backend.set_version_invalid(version_uid)
            return

        if source:
            io = self.get_io_by_source(source)
            io.open_r(source)

        state = True

        notify(self.process_name, 'Preparing Scrub of version {}'.format(version_uid))
        # prepare
        read_jobs = 0
        for block in blocks.yield_per(1000):
            if block.uid:
                if percentile < 100 and random.randint(1, 100) > percentile:
                    logger.debug('Scrub of block {} (UID {}) skipped (percentile is {}).'.format(
                        block.id,
                        block.uid,
                        percentile,
                        ))
                else:
                    self.data_backend.read(block.deref())  # async queue
                    read_jobs += 1
            else:
                logger.debug('Scrub of block {} (UID {}) skipped (sparse).'.format(
                    block.id,
                    block.uid,
                    ))

        # and read
        _log_every_jobs = read_jobs // 200 + 1  # about every half percent
        _log_jobs_counter = 0
        t1 = time.time()
        t_last_run = 0
        for i in range(read_jobs):
            _log_jobs_counter -= 1
            try:
                while True:
                    try:
                        block, offset, length, data = self.data_backend.read_get(timeout=1)
                    except queue.Empty:  # timeout occured
                        continue
                    else:
                        break
            except Exception as e:
                # log e
                logger.error("Exception during reading from the data backend: {}".format(str(e)))
                # raise  # use if you want to debug.
                # exit with error
                sys.exit(6)
            if data is None:
                logger.error('Blob not found: {}'.format(str(block)))
                self.meta_backend.set_blocks_invalid(block.uid, block.checksum)
                state = False
                continue
            stats['blocks_read'] += 1
            stats['bytes_read'] += len(data)
            if len(data) != block.size:
                logger.error('Blob has wrong size: {} is: {} should be: {}'.format(
                    block.uid,
                    len(data),
                    block.size,
                    ))
                self.meta_backend.set_blocks_invalid(block.uid, block.checksum)
                state = False
                continue
            data_checksum = self.hash_function(data).hexdigest()
            if data_checksum != block.checksum:
                logger.error('Checksum mismatch during scrub for block '
                    '{} (UID {}) (is: {} should-be: {}).'.format(
                        block.id,
                        block.uid,
                        data_checksum,
                        block.checksum,
                        ))
                self.meta_backend.set_blocks_invalid(block.uid, block.checksum)
                state = False
                continue

            if source:
                source_data = io.read(block.id, sync=True)  # TODO: This is still sync, but how could we do better (easily)?
                stats['source_blocks_read'] += 1
                stats['source_bytes_read'] += len(source_data)
                if source_data != data:
                    logger.error('Source data has changed for block {} '
                        '(UID {}) (is: {} should-be: {}). NOT setting '
                        'this block invalid, because the source looks '
                        'wrong.'.format(
                            block.id,
                            block.uid,
                            self.hash_function(source_data).hexdigest(),
                            data_checksum,
                            ))
                    state = False
                    # We are not setting the block invalid here because
                    # when the block is there AND the checksum is good,
                    # then the source is invalid.
            logger.debug('Scrub of block {} (UID {}) ok.'.format(
                block.id,
                block.uid,
                ))

            if time.time() - t_last_run >= 1:
                # TODO: Log source io status
                t_last_run = time.time()
                t2 = time.time()
                dt = t2-t1
                logger.debug(self.data_backend.thread_status())

                db_queue_status = self.data_backend.queue_status()
                _status = status(
                    'Scrubbing {} ({})'.format(version.name, version_uid),
                    db_queue_status['rq_filled']*100,
                    0,
                    (i + 1) / read_jobs * 100,
                    stats['bytes_read'] / dt,
                    round(read_jobs / (i+1) * dt - dt),
                    )
                notify(self.process_name, _status)
                if _log_jobs_counter <= 0:
                    _log_jobs_counter = _log_every_jobs
                    logger.info(_status)

        if state == True:
            self.meta_backend.set_version_valid(version_uid)
            logger.info('Marked version valid: {}'.format(version_uid))
        else:
            # version is set invalid by set_blocks_invalid.
            logger.error('Marked version invalid because it has errors: {}'.format(version_uid))
        if source:
            io.close()  # wait for all io

        notify(self.process_name)
        return state