def __init__(self, config): aws_access_key_id = config.get('aws_access_key_id') aws_secret_access_key = config.get('aws_secret_access_key') host = config.get('host') port = config.getint('port') is_secure = config.getboolean('is_secure') bucket_name = config.get('bucket_name', 'backy2') calling_format=boto.s3.connection.OrdinaryCallingFormat() self.conn = boto.connect_s3( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, host=host, port=port, is_secure=is_secure, calling_format=calling_format ) # create our bucket try: self.bucket = self.conn.create_bucket(bucket_name) except boto.exception.S3CreateError: # exists... pass except OSError as e: # no route to host logger.error('Fatal error, dying: {}'.format(e)) print('Fatal error: {}'.format(e)) exit(12)
def _writer(self, id_): """ A threaded background writer """ while True: entry = self._write_queue.get() if entry is None or self.fatal_error: logger.debug("Writer {} finishing.".format(id_)) break uid, data = entry time.sleep(self.write_throttling.consume(len(data))) t1 = time.time() key = self.bucket.new_key(uid) try: r = key.set_contents_from_string(data) except ( OSError, boto.exception.BotoServerError, boto.exception.S3ResponseError, ) as e: # OSError happens when the S3 host is gone (i.e. network died, # host down, ...). boto tries hard to recover, however after # several attempts it will give up and raise. # BotoServerError happens, when there is no server. # S3ResponseError sometimes happens, when the cluster is about # to shutdown. Hard to reproduce because the writer must write # in exactly this moment. # We let the backup job die here fataly. self.fatal_error = e logger.error('Fatal error, dying: {}'.format(e)) #exit('Fatal error: {}'.format(e)) # this only raises SystemExit os._exit(11) t2 = time.time() assert r == len(data) self._write_queue.task_done() logger.debug('Writer {} wrote data async. uid {} in {:.2f}s (Queue size is {})'.format(id_, uid, t2-t1, self._write_queue.qsize()))
def open_r(self, io_name): # io_name has the form rbd://pool/imagename@snapshotname or rbd://pool/imagename self.mode = 'r' self.io_name = io_name img_name = re.match('^rbd://([^/]+)/([^@]+)@?(.+)?$', io_name) if not img_name: raise RuntimeError('Not a valid io name: {} . Need pool/imagename or pool/imagename@snapshotname'.format(io_name)) self.pool_name, self.image_name, self.snapshot_name = img_name.groups() # try opening it and quit if that's not possible. try: ioctx = self.cluster.open_ioctx(self.pool_name) except rados.ObjectNotFound: logger.error('Pool not found: {}'.format(self.pool_name)) exit('Error opening backup source.') try: rbd.Image(ioctx, self.image_name, self.snapshot_name, read_only=True) except rbd.ImageNotFound: logger.error('Image/Snapshot not found: {}@{}'.format(self.image_name, self.snapshot_name)) exit('Error opening backup source.') for i in range(self.simultaneous_reads): _reader_thread = threading.Thread(target=self._reader, args=(i,)) _reader_thread.daemon = True _reader_thread.start() self._reader_threads.append(_reader_thread)
def read_raw(self, block_uid): key = self.bucket.get_key(block_uid) if not key: raise FileNotFoundError('UID {} not found.'.format(block_uid)) while True: try: data = key.get_contents_as_string() except socket.timeout: logger.error('Timeout while fetching from s3, trying again.') pass except OSError as e: # TODO: This is new and currently untested code. I'm not sure # why this happens in favour of socket.timeout and also if it # might be better to abort the whole restore/backup/scrub if # this happens, because I can't tell if the s3 lib is able to # recover from this situation and continue or not. We will see # this in the logs next time s3 is generating timeouts. logger.error( 'Timeout while fetching from s3 - error is "{}", trying again.' .format(str(e))) pass else: break time.sleep(self.read_throttling.consume(len(data))) return data
def open_w(self, io_name, size=None, force=False): # parameter size is version's size. self.mode = 'w' _s = re.match('^file://(.+)$', io_name) if not _s: raise RuntimeError( 'Not a valid io name: {} . Need a file path, e.g. file:///somepath/file' .format(io_name)) self.io_name = _s.groups()[0] if os.path.exists(self.io_name): if not force: logger.error('Target already exists: {}'.format(io_name)) exit( 'Error opening restore target. You must force the restore.' ) else: if self.size() < size: logger.error( 'Target size is too small. Has {}b, need {}b.'.format( self.size(), size)) exit('Error opening restore target.') else: # create the file with open(self.io_name, 'wb') as f: f.seek(size - 1) f.write(b'\0') for i in range(self.simultaneous_writes): _writer_thread = threading.Thread(target=self._writer, args=(i, )) _writer_thread.daemon = True _writer_thread.start() self._writer_threads.append(_writer_thread) self.writer_thread_status[i] = STATUS_NOTHING
def save(self, data): uid = self._uid() t1 = time.time() key = self.bucket.new_key(uid) try: r = key.set_contents_from_string(data) except ( OSError, boto.exception.BotoServerError, boto.exception.S3ResponseError, ) as e: # OSError happens when the S3 host is gone (i.e. network died, # host down, ...). boto tries hard to recover, however after # several attempts it will give up and raise. # BotoServerError happens, when there is no server. # S3ResponseError sometimes happens, when the cluster is about # to shutdown. Hard to reproduce because the writer must write # in exactly this moment. # We let the backup job die here fataly. logger.error('Fatal error, dying: {}'.format(e)) #exit('Fatal error: {}'.format(e)) # this only raises SystemExit os._exit(13) t2 = time.time() assert r == len(data) logger.debug('Wrote data uid {} in {:.2f}s'.format(uid, t2-t1)) return uid
def cleanup_fast(self, dt=3600): """ Delete unreferenced blob UIDs """ if not self.locking.lock('backy-cleanup-fast'): raise LockError('Another backy cleanup is running.') delete_candidates = self.meta_backend.get_delete_candidates(dt=dt) try: for candidates in grouper(100, delete_candidates): # 100 is the number that works here smoothly within about 10-30s # per batch. With more than 70s there's a timeout and the job # is re-sent. Maybe we must either learn the best amount here # or we make this configurable... logger.debug('Cleanup: Removing UIDs {}'.format(', '.join(candidates))) try: self.data_backend.rm_many(candidates) except FileNotFoundError: continue except: logger.error('Error during cleanup. Reverting metadata changes.') self.meta_backend.revert_delete_candidates(delete_candidates) self.locking.unlock('backy-cleanup-fast') raise else: self.meta_backend.remove_delete_candidates(delete_candidates) logger.info('Cleanup: Removed {} blobs'.format(len(delete_candidates))) self.locking.unlock('backy-cleanup-fast')
def open_w(self, io_name, size=None, force=False): # parameter size is version's size. self.mode = 'w' _s = re.match('^file://(.+)$', io_name) if not _s: raise RuntimeError( 'Not a valid io name: {} . Need a file path, e.g. file:///somepath/file' .format(io_name)) self.io_name = _s.groups()[0] if os.path.exists(self.io_name): if not force: logger.error('Target already exists: {}'.format(io_name)) exit( 'Error opening restore target. You must force the restore.' ) else: if self.size() < size: logger.error( 'Target size is too small. Has {}b, need {}b.'.format( self.size(), size)) exit('Error opening restore target.') else: # create the file with open(self.io_name, 'wb') as f: f.seek(size - 1) f.write(b'\0')
def backup(self, name, snapshot_name, source, rbd, from_version, tag=None, expire=None, continue_version=None): expire_date = None if expire: try: expire_date = parse_expire_date(expire) except ValueError as e: logger.error(str(e)) exit(1) backy = self.backy() hints = None if rbd: data = ''.join([line for line in fileinput.input(rbd).readline()]) hints = hints_from_rbd_diff(data) if tag: tags = [t.strip() for t in list(csv.reader(StringIO(tag)))[0]] else: tags = None version_uid = backy.backup(name, snapshot_name, source, hints, from_version, tags, expire_date, continue_version) if self.machine_output: print(version_uid) backy.close()
def read_raw(self, block_uid, _bucket=None): if not _bucket: _bucket = self.bucket while True: obj = _bucket.Object(block_uid) try: data_dict = obj.get() data = data_dict['Body'].read() except ClientError as e: if e.response['Error']['Code'] == 'NoSuchKey' or e.response['Error']['Code'] == '404': raise FileNotFoundError('Key {} not found.'.format(key)) from None else: raise except socket.timeout: logger.error('Timeout while fetching from s3, trying again.') pass except OSError as e: # TODO: This is new and currently untested code. I'm not sure # why this happens in favour of socket.timeout and also if it # might be better to abort the whole restore/backup/scrub if # this happens, because I can't tell if the s3 lib is able to # recover from this situation and continue or not. We will see # this in the logs next time s3 is generating timeouts. logger.error('Timeout while fetching from s3 - error is "{}", trying again.'.format(str(e))) pass else: break time.sleep(self.read_throttling.consume(len(data))) # TODO: Need throttling in thread statistics! return data
def __init__(self, config): aws_access_key_id = config.get('aws_access_key_id') aws_secret_access_key = config.get('aws_secret_access_key') host = config.get('host') port = config.getint('port') is_secure = config.getboolean('is_secure') bucket_name = config.get('bucket_name', 'backy2') simultaneous_writes = config.getint('simultaneous_writes', 1) simultaneous_reads = config.getint('simultaneous_reads', 1) calling_format = boto.s3.connection.OrdinaryCallingFormat() bandwidth_read = config.getint('bandwidth_read', 0) bandwidth_write = config.getint('bandwidth_write', 0) self.read_throttling = TokenBucket() self.read_throttling.set_rate(bandwidth_read) # 0 disables throttling self.write_throttling = TokenBucket() self.write_throttling.set_rate( bandwidth_write) # 0 disables throttling self.conn = boto.connect_s3( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, host=host, port=port, is_secure=is_secure, calling_format=calling_format) # create our bucket try: self.bucket = self.conn.create_bucket(bucket_name) except boto.exception.S3CreateError: # exists... self.bucket = self.conn.get_bucket(bucket_name) pass except OSError as e: # no route to host self.fatal_error = e logger.error('Fatal error, dying: {}'.format(e)) print('Fatal error: {}'.format(e)) exit(10) self.write_queue_length = simultaneous_writes + self.WRITE_QUEUE_LENGTH self.read_queue_length = simultaneous_reads + self.READ_QUEUE_LENGTH self._write_queue = queue.Queue(self.write_queue_length) self._read_queue = queue.Queue() self._read_data_queue = queue.Queue(self.read_queue_length) self._writer_threads = [] self._reader_threads = [] for i in range(simultaneous_writes): _writer_thread = threading.Thread(target=self._writer, args=(i, )) _writer_thread.daemon = True _writer_thread.start() self._writer_threads.append(_writer_thread) for i in range(simultaneous_reads): _reader_thread = threading.Thread(target=self._reader, args=(i, )) _reader_thread.daemon = True _reader_thread.start() self._reader_threads.append(_reader_thread)
def rm_many(self, uids): """ Deletes many uids from the data backend and returns a list of uids that couldn't be deleted. """ try: for del_err in self.client.remove_objects(self.bucket_name, uids): logger.error("S3 Object Deletion Error: {}".format(del_err)) except ResponseError as err: raise
def __init__(self, config): aws_access_key_id = config.get('aws_access_key_id') aws_secret_access_key = config.get('aws_secret_access_key') host = config.get('host') port = config.getint('port') is_secure = config.getboolean('is_secure') bucket_name = config.get('bucket_name', 'backy2') simultaneous_writes = config.getint('simultaneous_writes', 1) simultaneous_reads = config.getint('simultaneous_reads', 1) calling_format=boto.s3.connection.OrdinaryCallingFormat() bandwidth_read = config.getint('bandwidth_read', 0) bandwidth_write = config.getint('bandwidth_write', 0) self.read_throttling = TokenBucket() self.read_throttling.set_rate(bandwidth_read) # 0 disables throttling self.write_throttling = TokenBucket() self.write_throttling.set_rate(bandwidth_write) # 0 disables throttling self.conn = boto.connect_s3( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, host=host, port=port, is_secure=is_secure, calling_format=calling_format ) # create our bucket try: self.bucket = self.conn.create_bucket(bucket_name) except boto.exception.S3CreateError: # exists... pass except OSError as e: # no route to host self.fatal_error = e logger.error('Fatal error, dying: {}'.format(e)) print('Fatal error: {}'.format(e)) exit(10) self.write_queue_length = simultaneous_writes + self.WRITE_QUEUE_LENGTH self.read_queue_length = simultaneous_reads + self.READ_QUEUE_LENGTH self._write_queue = queue.Queue(self.write_queue_length) self._read_queue = queue.Queue() self._read_data_queue = queue.Queue(self.read_queue_length) self._writer_threads = [] self._reader_threads = [] for i in range(simultaneous_writes): _writer_thread = threading.Thread(target=self._writer, args=(i,)) _writer_thread.daemon = True _writer_thread.start() self._writer_threads.append(_writer_thread) for i in range(simultaneous_reads): _reader_thread = threading.Thread(target=self._reader, args=(i,)) _reader_thread.daemon = True _reader_thread.start() self._reader_threads.append(_reader_thread)
def __init__(self, config): azure_access_key_id = config.get('azure_access_key_id') azure_secret_access_key = config.get('azure_secret_access_key') container_name = config.get('bucket_name', 'backy2') simultaneous_writes = config.getint('simultaneous_writes', 1) simultaneous_reads = config.getint('simultaneous_reads', 1) bandwidth_read = config.getint('bandwidth_read', 0) bandwidth_write = config.getint('bandwidth_write', 0) self.read_throttling = TokenBucket() self.read_throttling.set_rate(bandwidth_read) # 0 disables throttling self.write_throttling = TokenBucket() self.write_throttling.set_rate(bandwidth_write) # 0 disables throttling self.container_name = container_name # print('Databackend: Azure blob') # print('azure_access_key_id: ', azure_access_key_id) # print('azure_secret_access_key: ', azure_secret_access_key) # print('container_name: ', container_name) self.conn = BlockBlobService( account_name=azure_access_key_id, account_key=azure_secret_access_key ) # create our bucket try: self.conn.create_container(container_name) # except boto.exception.S3CreateError: # # exists... # pass except (OSError, Exception) as e: # no route to host self.fatal_error = e logger.error('Fatal error, dying: {}'.format(e)) print('Fatal error: {}'.format(e)) exit(10) self.write_queue_length = simultaneous_writes + self.WRITE_QUEUE_LENGTH self.read_queue_length = simultaneous_reads + self.READ_QUEUE_LENGTH self._write_queue = queue.Queue(self.write_queue_length) self._read_queue = queue.Queue() self._read_data_queue = queue.Queue(self.read_queue_length) self._writer_threads = [] self._reader_threads = [] for i in range(simultaneous_writes): _writer_thread = threading.Thread(target=self._writer, args=(i,)) _writer_thread.daemon = True _writer_thread.start() self._writer_threads.append(_writer_thread) for i in range(simultaneous_reads): _reader_thread = threading.Thread(target=self._reader, args=(i,)) _reader_thread.daemon = True _reader_thread.start() self._reader_threads.append(_reader_thread)
def restore(self, version_uid, target, sparse=False, force=False): if not self.locking.lock(version_uid): raise LockError('Version {} is locked.'.format(version_uid)) version = self.meta_backend.get_version(version_uid) # raise if version not exists notify(self.process_name, 'Restoring Version {}. Getting blocks.'.format(version_uid)) blocks = self.meta_backend.get_blocks_by_version(version_uid) io = self.get_io_by_source(target) io.open_w(target, version.size_bytes, force) read_jobs = 0 for i, block in enumerate(blocks): if block.uid: self.data_backend.read(block.deref()) # adds a read job read_jobs += 1 elif not sparse: io.write(block, b'\0'*block.size) logger.debug('Restored sparse block {} successfully ({} bytes).'.format( block.id, block.size, )) else: logger.debug('Ignored sparse block {}.'.format( block.id, )) notify(self.process_name, 'Restoring Version {} to {} PREPARING AND SPARSE BLOCKS ({:.1f}%)'.format(version_uid, target, (i + 1) / len(blocks) * 100)) done_jobs = 0 _log_every_jobs = read_jobs // 200 + 1 # about every half percent for i in range(read_jobs): block, offset, length, data = self.data_backend.read_get() assert len(data) == block.size data_checksum = self.hash_function(data).hexdigest() io.write(block, data) if data_checksum != block.checksum: logger.error('Checksum mismatch during restore for block ' '{} (is: {} should-be: {}, block-valid: {}). Block ' 'restored is invalid. Continuing.'.format( block.id, data_checksum, block.checksum, block.valid, )) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) else: logger.debug('Restored block {} successfully ({} bytes).'.format( block.id, block.size, )) notify(self.process_name, 'Restoring Version {} to {} ({:.1f}%)'.format(version_uid, target, (i + 1) / read_jobs * 100)) if i % _log_every_jobs == 0 or i + 1 == read_jobs: logger.info('Restored {}/{} blocks ({:.1f}%)'.format(i + 1, read_jobs, (i + 1) / read_jobs * 100)) self.locking.unlock(version_uid)
def rm(self, uid): obj = self.bucket.Object(uid) try: obj.load() except ClientError as e: if e.response['Error']['Code'] == 'NoSuchKey' or e.response['Error']['Code'] == '404': #raise FileNotFoundError('Key {} not found.'.format(uid)) from None logger.error('Unable to remove block: key {} not found.'.format(uid)) else: raise else: obj.delete()
def open(self): try: self.migrate_db(self.engine) #except sqlalchemy.exc.OperationalError: except: logger.error('Invalid database ({}). Please run initdb first.'.format(self.engine.url)) sys.exit(1) # TODO: Return something (or raise) #raise RuntimeError('Invalid database') Session = sessionmaker(bind=self.engine) self.session = Session() self._flush_block_counter = 0 return self
def read_raw(self, block_uid): key = self.bucket.get_key(block_uid) if not key: raise FileNotFoundError('UID {} not found.'.format(block_uid)) while True: try: data = key.get_contents_as_string() except socket.timeout: logger.error('Timeout while fetching from s3, trying again.') pass else: break time.sleep(self.read_throttling.consume(len(data))) return data
def expire(self, version_uid, expire): if not expire: # empty string expire_date = None else: try: expire_date = parse_expire_date(expire) except ValueError as e: logger.error(str(e)) exit(1) try: backy = self.backy() backy.expire_version(version_uid, expire_date) backy.close() except: logger.warn('Unable to expire version.')
def import_(self, filename='-'): backy = self.backy() try: if filename=='-': backy.import_(sys.stdin) else: with open(filename, 'r') as f: backy.import_(f) except KeyError as e: logger.error(str(e)) exit(22) except ValueError as e: logger.error(str(e)) exit(23) finally: backy.close()
def import_(self, filename='-'): backy = self.backy() try: if filename == '-': backy.import_(sys.stdin) else: with open(filename, 'r') as f: backy.import_(f) except KeyError as e: logger.error(str(e)) exit(22) except ValueError as e: logger.error(str(e)) exit(23) finally: backy.close()
def read_raw(self, block_uid): while True: try: data = self.conn.get_blob_to_bytes( container_name=self.container_name, blob_name=block_uid, validate_content=True, ) data = data.content except (OSError, Exception) as e: # TODO: Check what is the exact exception throwed here to show if has error logger.error('Timeout while fetching from azure - error is "{}"'.format(str(e))) pass else: break time.sleep(self.read_throttling.consume(len(data))) return data
def restore(self, version_uid, target, sparse=False): if not self.locking.lock(version_uid): raise LockError('Version {} is locked.'.format(version_uid)) version = self.meta_backend.get_version(version_uid) # raise if version not exists blocks = self.meta_backend.get_blocks_by_version(version_uid) with open(target, 'wb') as f: for block in blocks: f.seek(block.id * self.block_size) if block.uid: data = self.data_backend.read(block.uid) assert len(data) == block.size data_checksum = self.hash_function(data).hexdigest() written = f.write(data) assert written == len(data) if data_checksum != block.checksum: logger.error('Checksum mismatch during restore for block ' '{} (is: {} should-be: {}, block-valid: {}). Block ' 'restored is invalid. Continuing.'.format( block.id, data_checksum, block.checksum, block.valid, )) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) else: logger.debug('Restored block {} successfully ({} bytes).'.format( block.id, block.size, )) elif not sparse: f.write(b'\0'*block.size) logger.debug('Restored sparse block {} successfully ({} bytes).'.format( block.id, block.size, )) else: logger.debug('Ignored sparse block {}.'.format( block.id, )) if f.tell() != version.size_bytes: # write last byte with \0, because this can only happen when # the last block was left over in sparse mode. last_block = blocks[-1] f.seek(last_block.id * self.block_size + last_block.size - 1) f.write(b'\0') self.locking.unlock(version_uid)
def restore(self, version_uid, target, sparse=False, force=False): if not self.locking.lock(version_uid): raise LockError('Version {} is locked.'.format(version_uid)) version = self.meta_backend.get_version(version_uid) # raise if version not exists blocks = self.meta_backend.get_blocks_by_version(version_uid) io = self.get_io_by_source(target) io.open_w(target, version.size_bytes, force) for block in blocks: if block.uid: data = self.data_backend.read(block, sync=True) assert len(data) == block.size data_checksum = self.hash_function(data).hexdigest() io.write(block, data) if data_checksum != block.checksum: logger.error('Checksum mismatch during restore for block ' '{} (is: {} should-be: {}, block-valid: {}). Block ' 'restored is invalid. Continuing.'.format( block.id, data_checksum, block.checksum, block.valid, )) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) else: logger.debug('Restored block {} successfully ({} bytes).'.format( block.id, block.size, )) elif not sparse: io.write(block, b'\0'*block.size) logger.debug('Restored sparse block {} successfully ({} bytes).'.format( block.id, block.size, )) else: logger.debug('Ignored sparse block {}.'.format( block.id, )) self.locking.unlock(version_uid)
def open_w(self, io_name, size=None, force=False): self.mode = 'w' _s = re.match('^file://(.+)$', io_name) if not _s: raise RuntimeError('Not a valid io name: {} . Need a file path, e.g. file:///somepath/file'.format(io_name)) self.io_name = _s.groups()[0] if os.path.exists(self.io_name): if not force: logger.error('Target already exists: {}'.format(io_name)) exit('Error opening restore target. You must force the restore.') else: if size < self.size(): logger.error('Target size is too small. Has {}b, need {}b.'.format(self.size(), size)) exit('Error opening restore target.') else: # create the file with open(self.io_name, 'wb') as f: f.seek(size - 1) f.write(b'\0')
def open_w(self, io_name, size=None, force=False): """ size is bytes """ self.mode = 'w' # io_name has the form rbd://pool/imagename@snapshotname or rbd://pool/imagename self.io_name = io_name img_name = re.match('^rbd://([^/]+)/([^@]+)$', io_name) if not img_name: raise RuntimeError('Not a valid io name: {} . Need pool/imagename'.format(io_name)) self.pool_name, self.image_name = img_name.groups() # try opening it and quit if that's not possible. try: ioctx = self.cluster.open_ioctx(self.pool_name) except rados.ObjectNotFound: logger.error('Pool not found: {}'.format(self.pool_name)) exit('Error opening backup source.') try: rbd.Image(ioctx, self.image_name) except rbd.ImageNotFound: rbd.RBD().create(ioctx, self.image_name, size, old_format=False, features=self.new_image_features) else: if not force: logger.error('Image already exists: {}'.format(self.image_name)) exit('Error opening restore target.') else: if size < self.size(): logger.error('Target size is too small. Has {}b, need {}b.'.format(self.size(), size)) exit('Error opening restore target.')
def _writer(self, id_): """ A threaded background writer """ while True: entry = self._write_queue.get() if entry is None or self.fatal_error: logger.debug("Writer {} finishing.".format(id_)) break uid, data = entry time.sleep(self.write_throttling.consume(len(data))) t1 = time.time() try: # res = self.conn.create_blob_from_text( # container_name=self.container_name, # blob_name=uid, # text=data, # validate_content=True, # encoding='ascii' # ) string_data = data if not isinstance(string_data, bytes): string_data = string_data.encode("utf-8") fp = BytesIO(string_data) res = self.conn.create_blob_from_bytes( container_name=self.container_name, blob_name=uid, blob=fp.getvalue(), validate_content=True, ) except (OSError, Exception) as e: # We let the backup job die here fataly. self.fatal_error = e logger.error('Fatal error, dying: {}'.format(e)) print('Error on Write File', e) #exit('Fatal error: {}'.format(e)) # this only raises SystemExit os._exit(11) t2 = time.time() self._write_queue.task_done() logger.debug('Writer {} wrote data async. uid {} in {:.2f}s (Queue size is {})'.format(id_, uid, t2-t1, self._write_queue.qsize()))
def _writer(self, id_): """ A threaded background writer """ while True: entry = self._write_queue.get() if entry is None or self.fatal_error: logger.debug("Writer {} finishing.".format(id_)) break uid, data = entry time.sleep(self.write_throttling.consume(len(data))) t1 = time.time() key = self.bucket.new_key(uid) try: r = key.set_contents_from_string(data) except ( OSError, boto.exception.BotoServerError, boto.exception.S3ResponseError, ) as e: # OSError happens when the S3 host is gone (i.e. network died, # host down, ...). boto tries hard to recover, however after # several attempts it will give up and raise. # BotoServerError happens, when there is no server. # S3ResponseError sometimes happens, when the cluster is about # to shutdown. Hard to reproduce because the writer must write # in exactly this moment. # We let the backup job die here fataly. self.fatal_error = e logger.error('Fatal error, dying: {}'.format(e)) #exit('Fatal error: {}'.format(e)) # this only raises SystemExit os._exit(11) t2 = time.time() assert r == len(data) self._write_queue.task_done() logger.debug( 'Writer {} wrote data async. uid {} in {:.2f}s (Queue size is {})' .format(id_, uid, t2 - t1, self._write_queue.qsize()))
def nbd(self, version_uid, bind_address, bind_port, read_only): try: from backy2.enterprise.nbdserver import Server as NbdServer from backy2.enterprise.nbd import BackyStore except ImportError: logger.error('NBD is available in the Enterprise Version only.') sys.exit(21) backy = self.backy() config_NBD = self.Config(section='NBD') config_DEFAULTS = self.Config(section='DEFAULTS') hash_function = getattr(hashlib, config_DEFAULTS.get('hash_function', 'sha512')) store = BackyStore( backy, cachedir=config_NBD.get('cachedir'), hash_function=hash_function, ) addr = (bind_address, bind_port) server = NbdServer(addr, store, read_only) logger.info("Starting to serve nbd on %s:%s" % (addr[0], addr[1])) logger.info("You may now start") logger.info(" nbd-client -l %s -p %s" % (addr[0], addr[1])) logger.info("and then get the backup via") logger.info(" modprobe nbd") logger.info(" nbd-client -N <version> %s -p %s /dev/nbd0" % (addr[0], addr[1])) server.serve_forever()
def open_w(self, io_name, size=None, force=False): """ size is bytes """ self.mode = 'w' # io_name has the form rbd://pool/imagename@snapshotname or rbd://pool/imagename self.io_name = io_name img_name = re.match('^rbd://([^/]+)/([^@]+)$', io_name) if not img_name: raise RuntimeError( 'Not a valid io name: {} . Need pool/imagename'.format( io_name)) self.pool_name, self.image_name = img_name.groups() # try opening it and quit if that's not possible. try: ioctx = self.cluster.open_ioctx(self.pool_name) except rados.ObjectNotFound: logger.error('Pool not found: {}'.format(self.pool_name)) exit('Error opening backup source.') try: rbd.Image(ioctx, self.image_name) except rbd.ImageNotFound: rbd.RBD().create(ioctx, self.image_name, size, old_format=False, features=self.new_image_features) else: if not force: logger.error('Image already exists: {}'.format( self.image_name)) exit('Error opening restore target.') else: if size < self.size(): logger.error( 'Target size is too small. Has {}b, need {}b.'.format( self.size(), size)) exit('Error opening restore target.') for i in range(self.simultaneous_writes): _writer_thread = threading.Thread(target=self._writer, args=(i, )) _writer_thread.daemon = True _writer_thread.start() self._writer_threads.append(_writer_thread) self.writer_thread_status[i] = STATUS_NOTHING ioctx = self.cluster.open_ioctx(self.pool_name) self._write_rbd = rbd.Image(ioctx, self.image_name)
def scrub(self, version_uid, source=None, percentile=100): """ Returns a boolean (state). If False, there were errors, if True all was ok """ if not self.locking.lock(version_uid): raise LockError('Version {} is locked.'.format(version_uid)) self.locking.unlock(version_uid) # No need to keep it locked. stats = { 'source_bytes_read': 0, 'source_blocks_read': 0, 'bytes_read': 0, 'blocks_read': 0, } version = self.meta_backend.get_version(version_uid) # raise if version not exists blocks = self.meta_backend.get_blocks_by_version(version_uid) # check if the backup is at least complete if blocks.count() != version.size: logger.error("Version is incomplete.") self.meta_backend.set_version_invalid(version_uid) return if source: io = self.get_io_by_source(source) io.open_r(source) state = True notify(self.process_name, 'Preparing Scrub of version {}'.format(version_uid)) # prepare read_jobs = 0 for block in blocks.yield_per(1000): if block.uid: if percentile < 100 and random.randint(1, 100) > percentile: logger.debug('Scrub of block {} (UID {}) skipped (percentile is {}).'.format( block.id, block.uid, percentile, )) else: self.data_backend.read(block.deref()) # async queue read_jobs += 1 else: logger.debug('Scrub of block {} (UID {}) skipped (sparse).'.format( block.id, block.uid, )) # and read _log_every_jobs = read_jobs // 200 + 1 # about every half percent _log_jobs_counter = 0 t1 = time.time() t_last_run = 0 for i in range(read_jobs): _log_jobs_counter -= 1 try: while True: try: block, offset, length, data = self.data_backend.read_get(timeout=1) except queue.Empty: # timeout occured continue else: break except Exception as e: # log e logger.error("Exception during reading from the data backend: {}".format(str(e))) # raise # use if you want to debug. # exit with error sys.exit(6) if data is None: logger.error('Blob not found: {}'.format(str(block))) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) state = False continue stats['blocks_read'] += 1 stats['bytes_read'] += len(data) if len(data) != block.size: logger.error('Blob has wrong size: {} is: {} should be: {}'.format( block.uid, len(data), block.size, )) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) state = False continue data_checksum = self.hash_function(data).hexdigest() if data_checksum != block.checksum: logger.error('Checksum mismatch during scrub for block ' '{} (UID {}) (is: {} should-be: {}).'.format( block.id, block.uid, data_checksum, block.checksum, )) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) state = False continue if source: source_data = io.read(block.id, sync=True) # TODO: This is still sync, but how could we do better (easily)? stats['source_blocks_read'] += 1 stats['source_bytes_read'] += len(source_data) if source_data != data: logger.error('Source data has changed for block {} ' '(UID {}) (is: {} should-be: {}). NOT setting ' 'this block invalid, because the source looks ' 'wrong.'.format( block.id, block.uid, self.hash_function(source_data).hexdigest(), data_checksum, )) state = False # We are not setting the block invalid here because # when the block is there AND the checksum is good, # then the source is invalid. logger.debug('Scrub of block {} (UID {}) ok.'.format( block.id, block.uid, )) if time.time() - t_last_run >= 1: # TODO: Log source io status t_last_run = time.time() t2 = time.time() dt = t2-t1 logger.debug(self.data_backend.thread_status()) db_queue_status = self.data_backend.queue_status() _status = status( 'Scrubbing {} ({})'.format(version.name, version_uid), db_queue_status['rq_filled']*100, 0, (i + 1) / read_jobs * 100, stats['bytes_read'] / dt, round(read_jobs / (i+1) * dt - dt), ) notify(self.process_name, _status) if _log_jobs_counter <= 0: _log_jobs_counter = _log_every_jobs logger.info(_status) if state == True: self.meta_backend.set_version_valid(version_uid) logger.info('Marked version valid: {}'.format(version_uid)) else: # version is set invalid by set_blocks_invalid. logger.error('Marked version invalid because it has errors: {}'.format(version_uid)) if source: io.close() # wait for all io notify(self.process_name) return state
def scrub(self, version_uid, source=None, percentile=100): """ Returns a boolean (state). If False, there were errors, if True all was ok """ if not self.locking.lock(version_uid): raise LockError('Version {} is locked.'.format(version_uid)) self.meta_backend.get_version(version_uid) # raise if version not exists blocks = self.meta_backend.get_blocks_by_version(version_uid) if source: self.reader.open(source) state = True for block in blocks: if block.uid: if percentile < 100 and random.randint(1, 100) > percentile: logger.debug('Scrub of block {} (UID {}) skipped (percentile is {}).'.format( block.id, block.uid, percentile, )) continue try: data = self.data_backend.read(block.uid) except FileNotFoundError as e: logger.error('Blob not found: {}'.format(str(e))) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) state = False continue if len(data) != block.size: logger.error('Blob has wrong size: {} is: {} should be: {}'.format( block.uid, len(data), block.size, )) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) state = False continue data_checksum = self.hash_function(data).hexdigest() if data_checksum != block.checksum: logger.error('Checksum mismatch during scrub for block ' '{} (UID {}) (is: {} should-be: {}).'.format( block.id, block.uid, data_checksum, block.checksum, )) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) state = False continue else: if source: source_data = self.reader.read(block, sync=True) if source_data != data: logger.error('Source data has changed for block {} ' '(UID {}) (is: {} should-be: {}). NOT setting ' 'this block invalid, because the source looks ' 'wrong.'.format( block.id, block.uid, self.hash_function(source_data).hexdigest(), data_checksum, )) state = False # We are not setting the block invalid here because # when the block is there AND the checksum is good, # then the source is invalid. logger.debug('Scrub of block {} (UID {}) ok.'.format( block.id, block.uid, )) else: logger.debug('Scrub of block {} (UID {}) skipped (sparse).'.format( block.id, block.uid, )) if state == True: self.meta_backend.set_version_valid(version_uid) else: # version is set invalid by set_blocks_invalid. logger.error('Marked version invalid because it has errors: {}'.format(version_uid)) if source: self.reader.close() # wait for all readers self.locking.unlock(version_uid) return state
def scrub(self, version_uid, source=None, percentile=100): """ Returns a boolean (state). If False, there were errors, if True all was ok """ if not self.locking.lock(version_uid): raise LockError('Version {} is locked.'.format(version_uid)) self.meta_backend.get_version( version_uid) # raise if version not exists blocks = self.meta_backend.get_blocks_by_version(version_uid) if source: io = self.get_io_by_source(source) io.open_r(source) state = True notify(self.process_name, 'Preparing Scrub of version {}'.format(version_uid)) # prepare read_jobs = 0 for block in blocks: if block.uid: if percentile < 100 and random.randint(1, 100) > percentile: logger.debug( 'Scrub of block {} (UID {}) skipped (percentile is {}).' .format( block.id, block.uid, percentile, )) else: self.data_backend.read(block.deref()) # async queue read_jobs += 1 else: logger.debug( 'Scrub of block {} (UID {}) skipped (sparse).'.format( block.id, block.uid, )) # and read for i in range(read_jobs): block, offset, length, data = self.data_backend.read_get() if data is None: logger.error('Blob not found: {}'.format(str(block))) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) state = False continue if len(data) != block.size: logger.error( 'Blob has wrong size: {} is: {} should be: {}'.format( block.uid, len(data), block.size, )) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) state = False continue data_checksum = self.hash_function(data).hexdigest() if data_checksum != block.checksum: logger.error('Checksum mismatch during scrub for block ' '{} (UID {}) (is: {} should-be: {}).'.format( block.id, block.uid, data_checksum, block.checksum, )) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) state = False continue if source: source_data = io.read(block, sync=True) if source_data != data: logger.error( 'Source data has changed for block {} ' '(UID {}) (is: {} should-be: {}). NOT setting ' 'this block invalid, because the source looks ' 'wrong.'.format( block.id, block.uid, self.hash_function(source_data).hexdigest(), data_checksum, )) state = False # We are not setting the block invalid here because # when the block is there AND the checksum is good, # then the source is invalid. logger.debug('Scrub of block {} (UID {}) ok.'.format( block.id, block.uid, )) notify( self.process_name, 'Scrubbing Version {} ({:.1f}%)'.format( version_uid, (i + 1) / read_jobs * 100)) if state == True: self.meta_backend.set_version_valid(version_uid) else: # version is set invalid by set_blocks_invalid. logger.error( 'Marked version invalid because it has errors: {}'.format( version_uid)) if source: io.close() # wait for all io self.locking.unlock(version_uid) notify(self.process_name) return state
def backup(self, name, snapshot_name, source, hints, from_version, tag=None, expire=None): """ Create a backup from source. If hints are given, they must be tuples of (offset, length, exists) where offset and length are integers and exists is a boolean. Then, only data within hints will be backed up. Otherwise, the backup reads source and looks if checksums match with the target. """ stats = { 'version_size_bytes': 0, 'version_size_blocks': 0, 'bytes_read': 0, 'blocks_read': 0, 'bytes_written': 0, 'blocks_written': 0, 'bytes_found_dedup': 0, 'blocks_found_dedup': 0, 'bytes_sparse': 0, 'blocks_sparse': 0, 'start_time': time.time(), } io = self.get_io_by_source(source) io.open_r(source) source_size = io.size() size = math.ceil(source_size / self.block_size) stats['version_size_bytes'] = source_size stats['version_size_blocks'] = size # Sanity check: check hints for validity, i.e. too high offsets, ... if hints is not None and len(hints) > 0: max_offset = max([h[0] + h[1] for h in hints]) if max_offset > source_size: raise ValueError('Hints have higher offsets than source file.') if hints is not None: sparse_blocks = blocks_from_hints( [hint for hint in hints if not hint[2]], self.block_size) read_blocks = blocks_from_hints( [hint for hint in hints if hint[2]], self.block_size) else: sparse_blocks = [] read_blocks = range(size) sparse_blocks = set(sparse_blocks) read_blocks = set(read_blocks) try: version_uid = self._prepare_version(name, snapshot_name, source_size, from_version) except RuntimeError as e: logger.error(str(e)) logger.error('Backy exiting.') # TODO: Don't exit here, exit in Commands exit(4) except LockError as e: logger.error(str(e)) logger.error('Backy exiting.') # TODO: Don't exit here, exit in Commands exit(99) if not self.locking.lock(version_uid): logger.error('Version {} is locked.'.format(version_uid)) logger.error('Backy exiting.') # TODO: Don't exit here, exit in Commands exit(99) blocks = self.meta_backend.get_blocks_by_version(version_uid) if from_version and hints: # SANITY CHECK: # Check some blocks outside of hints if they are the same in the # from_version backup and in the current backup. If they # don't, either hints are wrong (e.g. from a wrong snapshot diff) # or source doesn't match. In any case, the resulting backup won't # be good. logger.info( 'Starting sanity check with 1% of the blocks. Reading...') ignore_blocks = list( set(range(size)) - read_blocks - sparse_blocks) random.shuffle(ignore_blocks) num_check_blocks = 10 # 50% from the start check_block_ids = ignore_blocks[:num_check_blocks // 2] # and 50% from random locations check_block_ids = set( check_block_ids + random.sample(ignore_blocks, num_check_blocks // 2)) num_reading = 0 for block in blocks: if block.id in check_block_ids and block.uid: # no uid = sparse block in backup. Can't check. io.read(block) num_reading += 1 for i in range(num_reading): # this is source file data source_block, source_data, source_data_checksum = io.get() # check metadata checksum with the newly read one if source_block.checksum != source_data_checksum: logger.error( "Source and backup don't match in regions outside of the hints." ) logger.error( "Looks like the hints don't match or the source is different." ) logger.error( "Found wrong source data at block {}: offset {} with max. length {}" .format(source_block.id, source_block.id * self.block_size, self.block_size)) # remove version self.meta_backend.rm_version(version_uid) sys.exit(5) logger.info('Finished sanity check. Checked {} blocks {}.'.format( num_reading, check_block_ids)) read_jobs = 0 for block in blocks: if block.id in read_blocks or not block.valid: io.read(block.deref()) # adds a read job. read_jobs += 1 elif block.id in sparse_blocks: # This "elif" is very important. Because if the block is in read_blocks # AND sparse_blocks, it *must* be read. self.meta_backend.set_block(block.id, version_uid, None, None, block.size, valid=1, _commit=False) stats['blocks_sparse'] += 1 stats['bytes_sparse'] += block.size logger.debug('Skipping block (sparse) {}'.format(block.id)) else: #self.meta_backend.set_block(block.id, version_uid, block.uid, block.checksum, block.size, valid=1, _commit=False) logger.debug('Keeping block {}'.format(block.id)) # now use the readers and write done_jobs = 0 _log_every_jobs = read_jobs // 200 + 1 # about every half percent for i in range(read_jobs): block, data, data_checksum = io.get() stats['blocks_read'] += 1 stats['bytes_read'] += len(data) # dedup existing_block = self.meta_backend.get_block_by_checksum( data_checksum) if data == b'\0' * block.size: # if the block is only \0, set it as a sparse block. stats['blocks_sparse'] += 1 stats['bytes_sparse'] += block.size logger.debug('Skipping block (detected sparse) {}'.format( block.id)) self.meta_backend.set_block(block.id, version_uid, None, None, block.size, valid=1, _commit=False) elif existing_block and existing_block.size == len(data): self.meta_backend.set_block(block.id, version_uid, existing_block.uid, data_checksum, len(data), valid=1, _commit=False) stats['blocks_found_dedup'] += 1 stats['bytes_found_dedup'] += len(data) logger.debug( 'Found existing block for id {} with uid {})'.format( block.id, existing_block.uid)) else: block_uid = self.data_backend.save(data) self.meta_backend.set_block(block.id, version_uid, block_uid, data_checksum, len(data), valid=1, _commit=False) stats['blocks_written'] += 1 stats['bytes_written'] += len(data) logger.debug('Wrote block {} (checksum {}...)'.format( block.id, data_checksum[:16])) done_jobs += 1 notify( self.process_name, 'Backup Version {} from {} ({:.1f}%)'.format( version_uid, source, (i + 1) / read_jobs * 100)) if i % _log_every_jobs == 0 or i + 1 == read_jobs: logger.info('Backed up {}/{} blocks ({:.1f}%)'.format( i + 1, read_jobs, (i + 1) / read_jobs * 100)) io.close() # wait for all readers # self.data_backend.close() # wait for all writers if read_jobs != done_jobs: logger.error('backy broke somewhere. Backup is invalid.') sys.exit(3) self.meta_backend.set_version_valid(version_uid) if tag is not None: if isinstance(tag, list): tags = tag else: tags = [] tags.append(tag) else: tags = self._generate_auto_tags(name) for tag in tags: self.meta_backend.add_tag(version_uid, tag) self.meta_backend.set_stats( version_uid=version_uid, version_name=name, version_size_bytes=stats['version_size_bytes'], version_size_blocks=stats['version_size_blocks'], bytes_read=stats['bytes_read'], blocks_read=stats['blocks_read'], bytes_written=stats['bytes_written'], blocks_written=stats['blocks_written'], bytes_found_dedup=stats['bytes_found_dedup'], blocks_found_dedup=stats['blocks_found_dedup'], bytes_sparse=stats['bytes_sparse'], blocks_sparse=stats['blocks_sparse'], duration_seconds=int(time.time() - stats['start_time']), ) logger.info('New version: {} (Tags: [{}])'.format( version_uid, ','.join(tags))) self.locking.unlock(version_uid) if expire: self.meta_backend.expire_version(version_uid, expire) return version_uid
def backup(self, name, source, hints, from_version): """ Create a backup from source. If hints are given, they must be tuples of (offset, length, exists) where offset and length are integers and exists is a boolean. Then, only data within hints will be backed up. Otherwise, the backup reads source and looks if checksums match with the target. """ stats = { 'version_size_bytes': 0, 'version_size_blocks': 0, 'bytes_read': 0, 'blocks_read': 0, 'bytes_written': 0, 'blocks_written': 0, 'bytes_found_dedup': 0, 'blocks_found_dedup': 0, 'bytes_sparse': 0, 'blocks_sparse': 0, 'start_time': time.time(), } self.reader.open(source) source_size = self.reader.size() size = math.ceil(source_size / self.block_size) stats['version_size_bytes'] = source_size stats['version_size_blocks'] = size # Sanity check: check hints for validity, i.e. too high offsets, ... if hints: max_offset = max([h[0]+h[1] for h in hints]) if max_offset > source_size: raise ValueError('Hints have higher offsets than source file.') if hints: sparse_blocks = blocks_from_hints([hint for hint in hints if not hint[2]], self.block_size) read_blocks = blocks_from_hints([hint for hint in hints if hint[2]], self.block_size) else: sparse_blocks = [] read_blocks = range(size) sparse_blocks = set(sparse_blocks) read_blocks = set(read_blocks) try: version_uid = self._prepare_version(name, source_size, from_version) except RuntimeError as e: logger.error(str(e)) logger.error('Backy exiting.') # TODO: Don't exit here, exit in Commands exit(4) except LockError as e: logger.error(str(e)) logger.error('Backy exiting.') # TODO: Don't exit here, exit in Commands exit(99) if not self.locking.lock(version_uid): logger.error('Version {} is locked.'.format(version_uid)) logger.error('Backy exiting.') # TODO: Don't exit here, exit in Commands exit(99) blocks = self.meta_backend.get_blocks_by_version(version_uid) if from_version and hints: # SANITY CHECK: # Check some blocks outside of hints if they are the same in the # from_version backup and in the current backup. If they # don't, either hints are wrong (e.g. from a wrong snapshot diff) # or source doesn't match. In any case, the resulting backup won't # be good. logger.info('Starting sanity check with 1% of the blocks. Reading...') ignore_blocks = list(set(range(size)) - read_blocks - sparse_blocks) random.shuffle(ignore_blocks) num_check_blocks = max(10, len(ignore_blocks) // 100) # 1%, but at least 10 check_block_ids = ignore_blocks[:num_check_blocks] num_reading = 0 for block in blocks: if block.id in check_block_ids and block.uid: # no uid = sparse block in backup. Can't check. self.reader.read(block) num_reading += 1 for i in range(num_reading): # this is source file data source_block, source_data, source_data_checksum = self.reader.get() # check metadata checksum with the newly read one if source_block.checksum != source_data_checksum: logger.error("Source and backup don't match in regions outside of the hints.") logger.error("Looks like the hints don't match or the source is different.") logger.error("Found wrong source data at block {}: offset {} with max. length {}".format( source_block.id, source_block.id * self.block_size, self.block_size )) # remove version self.meta_backend.rm_version(version_uid) sys.exit(5) logger.info('Finished sanity check. Checked {} blocks.'.format(num_reading)) read_jobs = 0 for block in blocks: if block.id in read_blocks or not block.valid: self.reader.read(block) # adds a read job. read_jobs += 1 elif block.id in sparse_blocks: # This "elif" is very important. Because if the block is in read_blocks # AND sparse_blocks, it *must* be read. self.meta_backend.set_block(block.id, version_uid, None, None, block.size, valid=1, _commit=False) stats['blocks_sparse'] += 1 stats['bytes_sparse'] += block.size logger.debug('Skipping block (sparse) {}'.format(block.id)) else: #self.meta_backend.set_block(block.id, version_uid, block.uid, block.checksum, block.size, valid=1, _commit=False) logger.debug('Keeping block {}'.format(block.id)) # now use the readers and write done_jobs = 0 for i in range(read_jobs): block, data, data_checksum = self.reader.get() stats['blocks_read'] += 1 stats['bytes_read'] += len(data) # dedup existing_block = self.meta_backend.get_block_by_checksum(data_checksum) if existing_block and existing_block.size == len(data): self.meta_backend.set_block(block.id, version_uid, existing_block.uid, data_checksum, len(data), valid=1, _commit=False) stats['blocks_found_dedup'] += 1 stats['bytes_found_dedup'] += len(data) logger.debug('Found existing block for id {} with uid {})'.format (block.id, existing_block.uid)) else: block_uid = self.data_backend.save(data) self.meta_backend.set_block(block.id, version_uid, block_uid, data_checksum, len(data), valid=1, _commit=False) stats['blocks_written'] += 1 stats['bytes_written'] += len(data) logger.debug('Wrote block {} (checksum {}...)'.format(block.id, data_checksum[:16])) done_jobs += 1 self.reader.close() # wait for all readers self.data_backend.close() # wait for all writers if read_jobs != done_jobs: logger.error('backy broke somewhere. Backup is invalid.') sys.exit(3) self.meta_backend.set_version_valid(version_uid) self.meta_backend.set_stats( version_uid=version_uid, version_name=name, version_size_bytes=stats['version_size_bytes'], version_size_blocks=stats['version_size_blocks'], bytes_read=stats['bytes_read'], blocks_read=stats['blocks_read'], bytes_written=stats['bytes_written'], blocks_written=stats['blocks_written'], bytes_found_dedup=stats['bytes_found_dedup'], blocks_found_dedup=stats['blocks_found_dedup'], bytes_sparse=stats['bytes_sparse'], blocks_sparse=stats['blocks_sparse'], duration_seconds=int(time.time() - stats['start_time']), ) logger.info('New version: {}'.format(version_uid)) self.locking.unlock(version_uid) return version_uid
def main(): parser = argparse.ArgumentParser( description='Backup and restore for block devices.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '-v', '--verbose', action='store_true', help='verbose output') parser.add_argument( '-m', '--machine-output', action='store_true', default=False) parser.add_argument( '-V', '--version', action='store_true', help='Show version') subparsers = parser.add_subparsers() # BACKUP p = subparsers.add_parser( 'backup', help="Perform a backup.") p.add_argument( 'source', help='Source file') p.add_argument( 'name', help='Backup name') p.add_argument('-r', '--rbd', default=None, help='Hints as rbd json format') p.add_argument('-f', '--from-version', default=None, help='Use this version-uid as base') p.set_defaults(func='backup') # RESTORE p = subparsers.add_parser( 'restore', help="Restore a given backup with level to a given target.") p.add_argument('-s', '--sparse', action='store_true', help='Write restore file sparse (does not work with legacy devices)') p.add_argument('version_uid') p.add_argument('target') p.set_defaults(func='restore') # RM p = subparsers.add_parser( 'rm', help="Remove a given backup version. This will only remove meta data and you will have to cleanup after this.") p.add_argument('version_uid') p.set_defaults(func='rm') # SCRUB p = subparsers.add_parser( 'scrub', help="Scrub a given backup and check for consistency.") p.add_argument('-s', '--source', default=None, help="Source, optional. If given, check if source matches backup in addition to checksum tests.") p.add_argument('-p', '--percentile', default=100, help="Only check PERCENTILE percent of the blocks (value 0..100). Default: 100") p.add_argument('version_uid') p.set_defaults(func='scrub') # Export p = subparsers.add_parser( 'export', help="Export the metadata of a backup uid into a file.") p.add_argument('version_uid') p.add_argument('filename', help="Export into this filename ('-' is for stdout)") p.set_defaults(func='export') # Import p = subparsers.add_parser( 'import', help="Import the metadata of a backup from a file.") p.add_argument('filename', help="Read from this file ('-' is for stdin)") p.set_defaults(func='import_') # CLEANUP p = subparsers.add_parser( 'cleanup', help="Clean unreferenced blobs.") p.add_argument( '-f', '--full', action='store_true', default=False, help='Do a full cleanup. This will read the full metadata from the data backend (i.e. backup storage) ' 'and compare it to the metadata in the meta backend. Unused data will then be deleted. ' 'This is a slow, but complete process. A full cleanup must not be run parallel to ANY other backy ' 'jobs.') p.add_argument( '-p', '--prefix', default=None, help='If you perform a full cleanup, you may add --prefix to only cleanup block uids starting ' 'with this prefix. This is for iterative cleanups. Example: ' 'cleanup --full --prefix=a') p.set_defaults(func='cleanup') # LS p = subparsers.add_parser( 'ls', help="List existing backups.") p.add_argument('version_uid', nargs='?', default=None, help='Show verbose blocks for this version') p.set_defaults(func='ls') # STATS p = subparsers.add_parser( 'stats', help="Show statistics") p.add_argument('version_uid', nargs='?', default=None, help='Show statistics for this version') p.add_argument('-l', '--limit', default=None, help="Limit output to this number (default: unlimited)") p.set_defaults(func='stats') # diff-meta p = subparsers.add_parser( 'diff-meta', help="Output a diff between two versions") p.add_argument('version_uid1', help='Left version') p.add_argument('version_uid2', help='Right version') p.set_defaults(func='diff_meta') # NBD p = subparsers.add_parser( 'nbd', help="Start an nbd server") p.add_argument('version_uid', nargs='?', default=None, help='Start an nbd server for this version') p.add_argument('-a', '--bind-address', default='127.0.0.1', help="Bind to this ip address (default: 127.0.0.1)") p.add_argument('-p', '--bind-port', default=10809, help="Bind to this port (default: 10809)") p.add_argument( '-r', '--read-only', action='store_true', default=False, help='Read only if set, otherwise a copy on write backup is created.') p.set_defaults(func='nbd') args = parser.parse_args() if args.version: print(__version__) sys.exit(0) if not hasattr(args, 'func'): parser.print_usage() sys.exit(1) if args.verbose: console_level = logging.DEBUG #elif args.func == 'scheduler': #console_level = logging.INFO else: console_level = logging.INFO Config = partial(_Config, conf_name='backy') config = Config(section='DEFAULTS') init_logging(config.get('logfile'), console_level) commands = Commands(args.machine_output, Config) func = getattr(commands, args.func) # Pass over to function func_args = dict(args._get_kwargs()) del func_args['func'] del func_args['verbose'] del func_args['version'] del func_args['machine_output'] try: logger.debug('backup.{0}(**{1!r})'.format(args.func, func_args)) func(**func_args) logger.info('Backy complete.\n') sys.exit(0) except Exception as e: logger.error('Unexpected exception') logger.exception(e) logger.info('Backy failed.\n') sys.exit(100)
def backup(self, name, snapshot_name, source, hints, from_version, tag=None, expire=None, continue_version=None): """ Create a backup from source. If hints are given, they must be tuples of (offset, length, exists) where offset and length are integers and exists is a boolean. Then, only data within hints will be backed up. Otherwise, the backup reads source and looks if checksums match with the target. If continue_version is given, this version will be continued, i.e. existing blocks will not be read again. """ stats = { 'version_size_bytes': 0, 'version_size_blocks': 0, 'bytes_checked': 0, 'blocks_checked': 0, 'bytes_read': 0, 'blocks_read': 0, 'bytes_written': 0, 'blocks_written': 0, 'bytes_found_dedup': 0, 'blocks_found_dedup': 0, 'bytes_sparse': 0, 'blocks_sparse': 0, 'bytes_throughput': 0, 'blocks_throughput': 0, 'start_time': time.time(), } io = self.get_io_by_source(source) io.open_r(source) source_size = io.size() size = math.ceil(source_size / self.block_size) stats['version_size_bytes'] = source_size stats['version_size_blocks'] = size # Sanity check: check hints for validity, i.e. too high offsets, ... if hints is not None and len(hints) > 0: max_offset = max([h[0]+h[1] for h in hints]) if max_offset > source_size: raise ValueError('Hints have higher offsets than source file.') # Find out which blocks to read if hints is not None: sparse_blocks = blocks_from_hints([hint for hint in hints if not hint[2]], self.block_size) read_blocks = blocks_from_hints([hint for hint in hints if hint[2]], self.block_size) else: sparse_blocks = [] read_blocks = range(size) sparse_blocks = set(sparse_blocks) read_blocks = set(read_blocks) # Validity check if from_version: old_version = self.meta_backend.get_version(from_version) # raise if not exists if not old_version.valid: raise RuntimeError('You cannot base on an invalid version.') existing_block_ids = set() if continue_version: version_uid = continue_version _v = self.meta_backend.get_version(version_uid) # raise if version does not exist if _v.size_bytes != source_size: raise ValueError('Version to continue backup from has a different size than the source. Cannot continue.') if _v.valid: raise ValueError('You cannot continue a valid version.') # reduce read_blocks and sparse_blocks by existing blocks existing_block_ids = set(self.meta_backend.get_block_ids_by_version(version_uid)) read_blocks = read_blocks - existing_block_ids sparse_blocks = sparse_blocks - existing_block_ids else: # Create new version version_uid = self.meta_backend.set_version(name, snapshot_name, size, source_size, 0) # initially marked invalid if not self.locking.lock(version_uid): raise LockError('Version {} is locked.'.format(version_uid)) # Sanity check: # Check some blocks outside of hints if they are the same in the # from_version backup and in the current backup. If they # don't, either hints are wrong (e.g. from a wrong snapshot diff) # or source doesn't match. In any case, the resulting backup won't # be good. check_block_ids = set() if from_version and hints: ignore_blocks = list(set(range(size)) - read_blocks - sparse_blocks) random.shuffle(ignore_blocks) num_check_blocks = 10 # 50% from the start check_block_ids = ignore_blocks[:num_check_blocks//2] # and 50% from random locations check_block_ids = set(check_block_ids + random.sample(ignore_blocks, num_check_blocks//2)) # Find blocks to base on if from_version: # Make sure we're based on a valid version. old_blocks = iter(self.meta_backend.get_blocks_by_version(from_version).yield_per(1000)) else: old_blocks = iter([]) # Create read jobs _log_every_jobs = size // 200 + 1 # about every half percent _log_jobs_counter = 0 t1 = time.time() t_last_run = 0 for block_id in range(size): _log_jobs_counter -= 1 # Create a block, either based on an old one (from_version) or a fresh one _have_old_block = False try: old_block = next(old_blocks) except StopIteration: # No old block found, we create a fresh one block_uid = None checksum = None block_size = self.block_size valid = 1 else: # Old block found, maybe base on that one assert old_block.id == block_id block_uid = old_block.uid checksum = old_block.checksum block_size = old_block.size valid = old_block.valid _have_old_block = True # the last block can differ in size, so let's check _offset = block_id * self.block_size new_block_size = min(self.block_size, source_size - _offset) if new_block_size != block_size: # last block changed, so set back all info block_size = new_block_size block_uid = None checksum = None valid = 1 _have_old_block = False # Build list of blocks to be read or skipped # Read (read_blocks, check_block_ids or block is invalid) or not? if block_id in read_blocks: logger.debug('Block {}: Reading'.format(block_id)) io.read(block_id, read=True) elif block_id in check_block_ids and _have_old_block and checksum: logger.debug('Block {}: Reading / checking'.format(block_id)) io.read(block_id, read=True, metadata={'check': True, 'checksum': checksum, 'block_size': block_size}) elif not valid: logger.debug('Block {}: Reading because not valid'.format(block_id)) io.read(block_id, read=True) assert _have_old_block elif block_id in sparse_blocks: logger.debug('Block {}: Sparse'.format(block_id)) # Sparse blocks have uid and checksum None. io.read(block_id, read=False, metadata={'block_uid': None, 'checksum': None, 'block_size': block_size}) elif block_id in existing_block_ids: logger.debug('Block {}: Exists in continued version'.format(block_id)) io.read(block_id, read=False, metadata={'skip': True}) else: logger.debug('Block {}: Fresh empty or existing'.format(block_id)) io.read(block_id, read=False, metadata={'block_uid': block_uid, 'checksum': checksum, 'block_size': block_size}) # log and process output if time.time() - t_last_run >= 1: t_last_run = time.time() t2 = time.time() dt = t2-t1 logger.debug(io.thread_status() + " " + self.data_backend.thread_status()) io_queue_status = io.queue_status() db_queue_status = self.data_backend.queue_status() _status = status( 'Backing up (1/2: Prep) {}'.format(source), 0, 0, (block_id + 1) / size * 100, 0.0, round(size / (block_id+1) * dt - dt), ) notify(self.process_name, _status) if _log_jobs_counter <= 0: _log_jobs_counter = _log_every_jobs logger.info(_status) # now use the readers and write _log_every_jobs = size // 200 + 1 # about every half percent _log_jobs_counter = 0 t1 = time.time() t_last_run = 0 _written_blocks_queue = queue.Queue() # contains ONLY blocks that have been written to the data backend. # consume the read jobs for i in range(size): _log_jobs_counter -= 1 block_id, data, data_checksum, metadata = io.get() if data: block_size = len(data) stats['blocks_read'] += 1 stats['bytes_read'] += block_size stats['blocks_throughput'] += 1 stats['bytes_throughput'] += block_size existing_block = None if self.dedup: existing_block = self.meta_backend.get_block_by_checksum(data_checksum) if data == b'\0' * block_size: block_uid = None data_checksum = None _written_blocks_queue.put((block_id, version_uid, block_uid, data_checksum, block_size)) elif existing_block and existing_block.size == block_size: block_uid = existing_block.uid _written_blocks_queue.put((block_id, version_uid, block_uid, data_checksum, block_size)) else: try: # This is the whole reason for _written_blocks_queue. We must first write the block to # the backup data store before we write it to the database. Otherwise we can't continue # reliably. def callback(local_block_id, local_version_uid, local_data_checksum, local_block_size): def f(_block_uid): _written_blocks_queue.put((local_block_id, local_version_uid, _block_uid, local_data_checksum, local_block_size)) return f block_uid = self.data_backend.save(data, callback=callback(block_id, version_uid, data_checksum, block_size)) # this will re-raise an exception from a worker thread except Exception as e: break # close anything as always. stats['blocks_written'] += 1 stats['bytes_written'] += block_size if metadata and 'check' in metadata: # Perform sanity check if not metadata['checksum'] == data_checksum or not metadata['block_size'] == block_size: logger.error("Source and backup don't match in regions outside of the hints.") logger.error("Looks like the hints don't match or the source is different.") logger.error("Found wrong source data at block {}: offset {} with max. length {}".format( block_id, block_id * self.block_size, block_size )) # remove version self.meta_backend.rm_version(version_uid) sys.exit(5) stats['blocks_checked'] += 1 stats['bytes_checked'] += block_size else: # No data means that this block is from the previous version or is empty as of the hints, so just store metadata. # Except it's a skipped block from a continued version. if not 'skip' in metadata: block_uid = metadata['block_uid'] data_checksum = metadata['checksum'] block_size = metadata['block_size'] _written_blocks_queue.put((block_id, version_uid, block_uid, data_checksum, block_size)) if metadata['block_uid'] is None: stats['blocks_sparse'] += 1 stats['bytes_sparse'] += block_size else: stats['blocks_found_dedup'] += 1 stats['bytes_found_dedup'] += block_size # Set the blocks from the _written_blocks_queue while True: try: q_block_id, q_version_uid, q_block_uid, q_data_checksum, q_block_size = _written_blocks_queue.get(block=False) except queue.Empty: break else: self.meta_backend.set_block(q_block_id, q_version_uid, q_block_uid, q_data_checksum, q_block_size, valid=1, _commit=True, _upsert=False) # log and process output if time.time() - t_last_run >= 1: t_last_run = time.time() t2 = time.time() dt = t2-t1 logger.debug(io.thread_status() + " " + self.data_backend.thread_status()) io_queue_status = io.queue_status() db_queue_status = self.data_backend.queue_status() _status = status( 'Backing up (2/2: Data) {}'.format(source), io_queue_status['rq_filled']*100, db_queue_status['wq_filled']*100, (i + 1) / size * 100, stats['bytes_throughput'] / dt, round(size / (i+1) * dt - dt), ) notify(self.process_name, _status) if _log_jobs_counter <= 0: _log_jobs_counter = _log_every_jobs logger.info(_status) # check if there are any exceptions left if self.data_backend.last_exception: logger.error("Exception during saving to the data backend: {}".format(str(self.data_backend.last_exception))) else: io.close() # wait for all readers self.data_backend.close() # wait for all writers # Set the rest of the blocks from the _written_blocks_queue while True: try: q_block_id, q_version_uid, q_block_uid, q_data_checksum, q_block_size = _written_blocks_queue.get(block=False) except queue.Empty: break else: self.meta_backend.set_block(q_block_id, q_version_uid, q_block_uid, q_data_checksum, q_block_size, valid=1, _commit=True, _upsert=False) tags = [] if tag is not None: if isinstance(tag, list): tags = tag else: tags.append(tag) else: if not continue_version: tags = self._generate_auto_tags(name) for tag in tags: self.meta_backend.add_tag(version_uid, tag) if expire: self.meta_backend.expire_version(version_uid, expire) self.meta_backend.set_stats( version_uid=version_uid, version_name=name, version_size_bytes=stats['version_size_bytes'], version_size_blocks=stats['version_size_blocks'], bytes_read=stats['bytes_read'], blocks_read=stats['blocks_read'], bytes_written=stats['bytes_written'], blocks_written=stats['blocks_written'], bytes_found_dedup=stats['bytes_found_dedup'], blocks_found_dedup=stats['blocks_found_dedup'], bytes_sparse=stats['bytes_sparse'], blocks_sparse=stats['blocks_sparse'], duration_seconds=int(time.time() - stats['start_time']), ) if self.data_backend.last_exception: logger.info('New invalid version: {} (Tags: [{}])'.format(version_uid, ','.join(tags))) else: self.meta_backend.set_version_valid(version_uid) logger.info('New version: {} (Tags: [{}])'.format(version_uid, ','.join(tags))) self.meta_backend._commit() self.locking.unlock(version_uid) if self.data_backend.last_exception: sys.exit(6) # i.e. kill all the remaining workers return version_uid
def main(): parser = argparse.ArgumentParser( description='Backup and restore for block devices.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '-v', '--verbose', action='store_true', help='verbose output') parser.add_argument( '-m', '--machine-output', action='store_true', default=False) parser.add_argument( '-V', '--version', action='store_true', help='Show version') subparsers = parser.add_subparsers() # BACKUP p = subparsers.add_parser( 'backup', help="Perform a backup.") p.add_argument( 'source', help='Source (url-like, e.g. file:///dev/sda or rbd://pool/imagename@snapshot)') p.add_argument( 'name', help='Backup name') p.add_argument('-r', '--rbd', default=None, help='Hints as rbd json format') p.add_argument('-f', '--from-version', default=None, help='Use this version-uid as base') p.set_defaults(func='backup') # RESTORE p = subparsers.add_parser( 'restore', help="Restore a given backup to a given target.") p.add_argument('-s', '--sparse', action='store_true', help='Faster. Restore ' 'only existing blocks (works only with file- and rbd-restore, not with lvm)') p.add_argument('-f', '--force', action='store_true', help='Force overwrite of existing files/devices/images') p.add_argument('version_uid') p.add_argument('target', help='Source (url-like, e.g. file:///dev/sda or rbd://pool/imagename)') p.set_defaults(func='restore') # PROTECT p = subparsers.add_parser( 'protect', help="Protect a backup version. Protected versions cannot be removed.") p.add_argument('version_uid') p.set_defaults(func='protect') # UNPROTECT p = subparsers.add_parser( 'unprotect', help="Unprotect a backup version. Unprotected versions can be removed.") p.add_argument('version_uid') p.set_defaults(func='unprotect') # RM p = subparsers.add_parser( 'rm', help="Remove a given backup version. This will only remove meta data and you will have to cleanup after this.") p.add_argument('-f', '--force', action='store_true', help="Force removal of version, even if it's younger than the configured disallow_rm_when_younger_than_days.") p.add_argument('version_uid') p.set_defaults(func='rm') # SCRUB p = subparsers.add_parser( 'scrub', help="Scrub a given backup and check for consistency.") p.add_argument('-s', '--source', default=None, help="Source, optional. If given, check if source matches backup in addition to checksum tests. url-like format as in backup.") p.add_argument('-p', '--percentile', default=100, help="Only check PERCENTILE percent of the blocks (value 0..100). Default: 100") p.add_argument('version_uid') p.set_defaults(func='scrub') # Export p = subparsers.add_parser( 'export', help="Export the metadata of a backup uid into a file.") p.add_argument('version_uid') p.add_argument('filename', help="Export into this filename ('-' is for stdout)") p.set_defaults(func='export') # Import p = subparsers.add_parser( 'import', help="Import the metadata of a backup from a file.") p.add_argument('filename', help="Read from this file ('-' is for stdin)") p.set_defaults(func='import_') # CLEANUP p = subparsers.add_parser( 'cleanup', help="Clean unreferenced blobs.") p.add_argument( '-f', '--full', action='store_true', default=False, help='Do a full cleanup. This will read the full metadata from the data backend (i.e. backup storage) ' 'and compare it to the metadata in the meta backend. Unused data will then be deleted. ' 'This is a slow, but complete process. A full cleanup must not be run parallel to ANY other backy ' 'jobs.') p.add_argument( '-p', '--prefix', default=None, help='If you perform a full cleanup, you may add --prefix to only cleanup block uids starting ' 'with this prefix. This is for iterative cleanups. Example: ' 'cleanup --full --prefix=a') p.set_defaults(func='cleanup') # LS p = subparsers.add_parser( 'ls', help="List existing backups.") p.add_argument('version_uid', nargs='?', default=None, help='Show verbose blocks for this version') p.set_defaults(func='ls') # STATS p = subparsers.add_parser( 'stats', help="Show statistics") p.add_argument('version_uid', nargs='?', default=None, help='Show statistics for this version') p.add_argument('-l', '--limit', default=None, help="Limit output to this number (default: unlimited)") p.set_defaults(func='stats') # diff-meta p = subparsers.add_parser( 'diff-meta', help="Output a diff between two versions") p.add_argument('version_uid1', help='Left version') p.add_argument('version_uid2', help='Right version') p.set_defaults(func='diff_meta') # NBD p = subparsers.add_parser( 'nbd', help="Start an nbd server") p.add_argument('version_uid', nargs='?', default=None, help='Start an nbd server for this version') p.add_argument('-a', '--bind-address', default='127.0.0.1', help="Bind to this ip address (default: 127.0.0.1)") p.add_argument('-p', '--bind-port', default=10809, help="Bind to this port (default: 10809)") p.add_argument( '-r', '--read-only', action='store_true', default=False, help='Read only if set, otherwise a copy on write backup is created.') p.set_defaults(func='nbd') args = parser.parse_args() if args.version: print(__version__) sys.exit(0) if not hasattr(args, 'func'): parser.print_usage() sys.exit(1) if args.verbose: console_level = logging.DEBUG #elif args.func == 'scheduler': #console_level = logging.INFO else: console_level = logging.INFO Config = partial(_Config, conf_name='backy') config = Config(section='DEFAULTS') init_logging(config.get('logfile'), console_level) commands = Commands(args.machine_output, Config) func = getattr(commands, args.func) # Pass over to function func_args = dict(args._get_kwargs()) del func_args['func'] del func_args['verbose'] del func_args['version'] del func_args['machine_output'] try: logger.debug('backup.{0}(**{1!r})'.format(args.func, func_args)) func(**func_args) logger.info('Backy complete.\n') sys.exit(0) except Exception as e: logger.error('Unexpected exception') logger.exception(e) logger.info('Backy failed.\n') sys.exit(100)
def main(): parser = argparse.ArgumentParser( description='Backup and restore for block devices.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-v', '--verbose', action='store_true', help='verbose output') parser.add_argument('-d', '--debug', action='store_true', help='debug output') parser.add_argument('-m', '--machine-output', action='store_true', default=False) parser.add_argument('-s', '--skip-header', action='store_true', default=False) parser.add_argument('-r', '--human-readable', action='store_true', default=False) parser.add_argument('-V', '--version', action='store_true', help='Show version') parser.add_argument('-c', '--configfile', default=None, type=str) subparsers = parser.add_subparsers() # INITDB p = subparsers.add_parser( 'initdb', help= "Initialize the database by populating tables. This will not delete tables or data if they exist." ) p.set_defaults(func='initdb') # BACKUP p = subparsers.add_parser('backup', help="Perform a backup.") p.add_argument( 'source', help= 'Source (url-like, e.g. file:///dev/sda or rbd://pool/imagename@snapshot)' ) p.add_argument('name', help='Backup name (e.g. the hostname)') p.add_argument('-s', '--snapshot-name', default='', help='Snapshot name (e.g. the name of the rbd snapshot)') p.add_argument('-r', '--rbd', default=None, help='Hints as rbd json format') p.add_argument('-f', '--from-version', default=None, help='Use this version-uid as base') p.add_argument('-c', '--continue-version', default=None, help='Continue backup on this version-uid') p.add_argument( '-t', '--tag', default=None, help= 'Use a specific tag (or multiple comma-separated tags) for the target backup version-uid' ) p.add_argument( '-e', '--expire', default='', help='Expiration date (yyyy-mm-dd or "yyyy-mm-dd HH-MM-SS") (optional)' ) p.set_defaults(func='backup') # RESTORE p = subparsers.add_parser('restore', help="Restore a given backup to a given target.") p.add_argument( '-s', '--sparse', action='store_true', help='Faster. Restore ' 'only existing blocks (works only with file- and rbd-restore, not with lvm)' ) p.add_argument('-f', '--force', action='store_true', help='Force overwrite of existing files/devices/images') p.add_argument( '-c', '--continue-from', default=0, help= 'Continue from this block (only use this for partially failed restores!)' ) p.add_argument('version_uid') p.add_argument( 'target', help='Source (url-like, e.g. file:///dev/sda or rbd://pool/imagename)') p.set_defaults(func='restore') # PROTECT p = subparsers.add_parser( 'protect', help="Protect a backup version. Protected versions cannot be removed.") p.add_argument('version_uid') p.set_defaults(func='protect') # UNPROTECT p = subparsers.add_parser( 'unprotect', help="Unprotect a backup version. Unprotected versions can be removed." ) p.add_argument('version_uid') p.set_defaults(func='unprotect') # RM p = subparsers.add_parser( 'rm', help= "Remove a given backup version. This will only remove meta data and you will have to cleanup after this." ) p.add_argument( '-f', '--force', action='store_true', help= "Force removal of version, even if it's younger than the configured disallow_rm_when_younger_than_days." ) p.add_argument('version_uid') p.set_defaults(func='rm') # SCRUB p = subparsers.add_parser( 'scrub', help="Scrub a given backup and check for consistency.") p.add_argument( '-s', '--source', default=None, help= "Source, optional. If given, check if source matches backup in addition to checksum tests. url-like format as in backup." ) p.add_argument( '-p', '--percentile', default=100, help= "Only check PERCENTILE percent of the blocks (value 0..100). Default: 100" ) p.add_argument('version_uid') p.set_defaults(func='scrub') # Export p = subparsers.add_parser( 'export', help="Export the metadata of a backup uid into a file.") p.add_argument('version_uid') p.add_argument('filename', help="Export into this filename ('-' is for stdout)") p.set_defaults(func='export') # Import p = subparsers.add_parser( 'import', help="Import the metadata of a backup from a file.") p.add_argument('filename', help="Read from this file ('-' is for stdin)") p.set_defaults(func='import_') # CLEANUP p = subparsers.add_parser('cleanup', help="Clean unreferenced blobs.") p.add_argument( '-f', '--full', action='store_true', default=False, help= 'Do a full cleanup. This will read the full metadata from the data backend (i.e. backup storage) ' 'and compare it to the metadata in the meta backend. Unused data will then be deleted. ' 'This is a slow, but complete process. A full cleanup must not be run parallel to ANY other backy ' 'jobs.') p.add_argument( '-p', '--prefix', default=None, help= 'If you perform a full cleanup, you may add --prefix to only cleanup block uids starting ' 'with this prefix. This is for iterative cleanups. Example: ' 'cleanup --full --prefix=a') p.add_argument( '--dangerous-force', action='store_true', default=False, help='Seriously, do not use this outside of testing and development.') p.set_defaults(func='cleanup') # LS p = subparsers.add_parser('ls', help="List existing backups.") p.add_argument('name', nargs='?', default=None, help='Show versions for this name only') p.add_argument('-s', '--snapshot-name', default=None, help="Limit output to this snapshot name") p.add_argument('-t', '--tag', default=None, help="Limit output to this tag") p.add_argument('-e', '--expired', action='store_true', default=False, help="Only list expired versions (expired < now)") p.add_argument( '-f', '--fields', default= "date,name,snapshot_name,size,size_bytes,uid,valid,protected,tags,expire", help= "Show these fields (comma separated). Available: date,name,snapshot_name,size,size_bytes,uid,valid,protected,tags,expire" ) p.set_defaults(func='ls') # STATS p = subparsers.add_parser('stats', help="Show statistics") p.add_argument('version_uid', nargs='?', default=None, help='Show statistics for this version') p.add_argument( '-f', '--fields', default= "date,uid,name,size bytes,size blocks,bytes read,blocks read,bytes written,blocks written,bytes dedup,blocks dedup,bytes sparse,blocks sparse,duration (s)", help= "Show these fields (comma separated). Available: date,uid,name,size bytes,size blocks,bytes read,blocks read,bytes written,blocks written,bytes dedup,blocks dedup,bytes sparse,blocks sparse,duration (s)" ) p.add_argument('-l', '--limit', default=None, help="Limit output to this number (default: unlimited)") p.set_defaults(func='stats') # diff-meta p = subparsers.add_parser('diff-meta', help="Output a diff between two versions") p.add_argument('version_uid1', help='Left version') p.add_argument('version_uid2', help='Right version') p.set_defaults(func='diff_meta') # disk usage p = subparsers.add_parser( 'du', help="Get disk usage for a version or for all versions") p.add_argument('version_uid', nargs='?', default=None, help='Show disk usage for this version') p.add_argument( '-f', '--fields', default= "Real,Null,Dedup Own,Dedup Others,Individual,Est. Space,Est. Space freed", help= "Show these fields (comma separated). Available: Real,Null,Dedup Own,Dedup Others,Individual,Est. Space,Est. Space freed)" ) p.set_defaults(func='du') # FUSE p = subparsers.add_parser('fuse', help="Fuse mount backy backups") p.add_argument('mount', help='Mountpoint') p.set_defaults(func='fuse') # Re-Keying p = subparsers.add_parser( 'rekey', help= "Re-Key all blocks in backy2 with a new key in the config. This will NOT encrypt unencrypted blocks or recrypt existing blocks." ) p.add_argument('oldkey', help='The old key as it was found in the config') p.set_defaults(func='rekey') # Migrate encryption p = subparsers.add_parser( 'migrate-encryption', help= "Create a new version with blocks migrated/encrypted to the latest encryption version." ) p.add_argument('version_uid', help='The version uid to migrate') p.set_defaults(func='migrate_encryption') # ADD TAG p = subparsers.add_parser( 'add-tag', help= "Add a named tag (or many comma-separated tags) to a backup version.") p.add_argument('version_uid') p.add_argument('name') p.set_defaults(func='add_tag') # REMOVE TAG p = subparsers.add_parser( 'remove-tag', help= "Remove a named tag (or many comma-separated tags) from a backup version." ) p.add_argument('version_uid') p.add_argument('name') p.set_defaults(func='remove_tag') # EXPIRE p = subparsers.add_parser( 'expire', help= """Set expiration date for a backup version. Date format is yyyy-mm-dd or "yyyy-mm-dd HH:MM:SS" (e.g. 2020-01-23). HINT: Create with 'date +"%%Y-%%m-%%d" -d "today + 7 days"'""" ) p.add_argument('version_uid') p.add_argument('expire') p.set_defaults(func='expire') # DUE p = subparsers.add_parser( 'due', help= """Based on the schedulers in the config file, calculate the due backups including tags.""" ) p.add_argument( 'name', nargs='?', default=None, help= 'Show due backups for this version name (optional, if not given, show due backups for all names).' ) p.add_argument( '-s', '--schedulers', default="daily,weekly,monthly", help= "Use these schedulers as defined in backy.cfg (default: daily,weekly,monthly)" ) p.add_argument( '-f', '--fields', default="name,schedulers,expire_date,due_since", help= "Show these fields (comma separated). Available: name,schedulers,expire_date" ) p.set_defaults(func='due') # SLA p = subparsers.add_parser( 'sla', help= """Based on the schedulers in the config file, calculate the information about SLA.""" ) p.add_argument( 'name', nargs='?', default=None, help= 'Show SLA breaches for this version name (optional, if not given, show SLA breaches for all names).' ) p.add_argument( '-s', '--schedulers', default="daily,weekly,monthly", help= "Use these schedulers as defined in backy.cfg (default: daily,weekly,monthly)" ) p.add_argument( '-f', '--fields', default="name,breach", help="Show these fields (comma separated). Available: name,breach") p.set_defaults(func='sla') args = parser.parse_args() if args.version: print(__version__) sys.exit(0) if not hasattr(args, 'func'): parser.print_usage() sys.exit(1) if args.verbose: console_level = logging.DEBUG #elif args.func == 'scheduler': #console_level = logging.INFO else: console_level = logging.INFO if args.debug: debug = True else: debug = False if args.configfile is not None and args.configfile != '': try: cfg = open(args.configfile, 'r', encoding='utf-8').read() except FileNotFoundError: logger.error('File not found: {}'.format(args.configfile)) sys.exit(1) Config = partial(_Config, cfg=cfg) else: Config = partial(_Config, conf_name='backy') config = Config(section='DEFAULTS') # logging ERROR only when machine output is selected if args.machine_output: init_logging(config.get('logfile'), logging.ERROR, debug) else: init_logging(config.get('logfile'), console_level, debug) commands = Commands(args.machine_output, args.skip_header, args.human_readable, Config) func = getattr(commands, args.func) # Pass over to function func_args = dict(args._get_kwargs()) del func_args['configfile'] del func_args['func'] del func_args['verbose'] del func_args['debug'] del func_args['version'] del func_args['machine_output'] del func_args['skip_header'] del func_args['human_readable'] try: logger.debug('backup.{0}(**{1!r})'.format(args.func, func_args)) func(**func_args) logger.info('Backy complete.\n') sys.exit(0) except Exception as e: if args.debug: logger.error('Unexpected exception') logger.exception(e) else: logger.error(e) logger.error('Backy failed.\n') sys.exit(100)
def restore(self, version_uid, target, sparse=False, force=False, continue_from=0): # See if the version is locked, i.e. currently in backup if not self.locking.lock(version_uid): raise LockError('Version {} is locked.'.format(version_uid)) self.locking.unlock(version_uid) # no need to keep it locked stats = { 'bytes_read': 0, 'blocks_read': 0, 'bytes_written': 0, 'blocks_written': 0, 'bytes_throughput': 0, 'blocks_throughput': 0, 'bytes_sparse': 0, 'blocks_sparse': 0, } version = self.meta_backend.get_version(version_uid) # raise if version does not exist if continue_from: notify(self.process_name, 'Restoring Version {} from block id'.format(version_uid, continue_from)) else: notify(self.process_name, 'Restoring Version {}'.format(version_uid)) blocks = self.meta_backend.get_blocks_by_version(version_uid) num_blocks = blocks.count() io = self.get_io_by_source(target) io.open_w(target, version.size_bytes, force) read_jobs = 0 _log_every_jobs = num_blocks // 200 + 1 # about every half percent _log_jobs_counter = 0 t1 = time.time() t_last_run = 0 num_blocks = blocks.count() for i, block in enumerate(blocks.yield_per(1000)): if block.id < continue_from: continue _log_jobs_counter -= 1 if block.uid: self.data_backend.read(block.deref()) # adds a read job read_jobs += 1 elif not sparse: io.write(block, b'\0'*block.size) stats['blocks_written'] += 1 stats['bytes_written'] += block.size stats['blocks_throughput'] += 1 stats['bytes_throughput'] += block.size logger.debug('Restored sparse block {} successfully ({} bytes).'.format( block.id, block.size, )) else: stats['blocks_sparse'] += 1 stats['bytes_sparse'] += block.size logger.debug('Ignored sparse block {}.'.format( block.id, )) if time.time() - t_last_run >= 1: t_last_run = time.time() t2 = time.time() dt = t2-t1 logger.debug(io.thread_status() + " " + self.data_backend.thread_status()) io_queue_status = io.queue_status() db_queue_status = self.data_backend.queue_status() _status = status( 'Restore phase 1/2 (sparse) to {}'.format(target), db_queue_status['rq_filled']*100, io_queue_status['wq_filled']*100, (i + 1) / num_blocks * 100, stats['bytes_throughput'] / dt, round(num_blocks / (i+1) * dt - dt), ) notify(self.process_name, _status) if _log_jobs_counter <= 0: _log_jobs_counter = _log_every_jobs logger.info(_status) stats = { 'bytes_read': 0, 'blocks_read': 0, 'bytes_written': 0, 'blocks_written': 0, 'bytes_sparse': 0, 'blocks_sparse': 0, 'bytes_throughput': 0, 'blocks_throughput': 0, } _log_every_jobs = read_jobs // 200 + 1 # about every half percent _log_jobs_counter = 0 t1 = time.time() t_last_run = 0 min_sequential_block_id = MinSequential(continue_from) # for finding the minimum block-ID until which we have restored ALL blocks for i in range(read_jobs): _log_jobs_counter -= 1 try: while True: try: block, offset, length, data = self.data_backend.read_get(timeout=.1) except queue.Empty: # timeout occured continue else: break except Exception as e: # TODO (restore): write information for continue logger.error("Exception during reading from the data backend: {}".format(str(e))) sys.exit(6) assert len(data) == block.size stats['blocks_read'] += 1 stats['bytes_read'] += block.size data_checksum = self.hash_function(data).hexdigest() def callback(local_block_id): def f(): min_sequential_block_id.put(local_block_id) stats['blocks_written'] += 1 stats['bytes_written'] += block.size stats['blocks_throughput'] += 1 stats['bytes_throughput'] += block.size return f io.write(block, data, callback(block.id)) if data_checksum != block.checksum: logger.error('Checksum mismatch during restore for block ' '{} (is: {} should-be: {}, block-valid: {}). Block ' 'restored is invalid. Continuing.'.format( block.id, data_checksum, block.checksum, block.valid, )) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) else: logger.debug('Restored block {} successfully ({} bytes).'.format( block.id, block.size, )) if time.time() - t_last_run >= 1: t_last_run = time.time() t2 = time.time() dt = t2-t1 logger.debug(io.thread_status() + " " + self.data_backend.thread_status()) io_queue_status = io.queue_status() db_queue_status = self.data_backend.queue_status() _status = status( 'Restore phase 2/2 (data) to {}'.format(target), db_queue_status['rq_filled']*100, io_queue_status['wq_filled']*100, (i + 1) / read_jobs * 100, stats['bytes_throughput'] / dt, round(read_jobs / (i+1) * dt - dt), 'Last ID: {}'.format(min_sequential_block_id.get()), ) notify(self.process_name, _status) if _log_jobs_counter <= 0: _log_jobs_counter = _log_every_jobs logger.info(_status) self.locking.unlock(version_uid) io.close()