def _writer(self, id_): """ A threaded background writer """ while True: entry = self._write_queue.get() if entry is None or self.fatal_error: logger.debug("Writer {} finishing.".format(id_)) break uid, data = entry time.sleep(self.write_throttling.consume(len(data))) t1 = time.time() key = self.bucket.new_key(uid) try: r = key.set_contents_from_string(data) except ( OSError, boto.exception.BotoServerError, boto.exception.S3ResponseError, ) as e: # OSError happens when the S3 host is gone (i.e. network died, # host down, ...). boto tries hard to recover, however after # several attempts it will give up and raise. # BotoServerError happens, when there is no server. # S3ResponseError sometimes happens, when the cluster is about # to shutdown. Hard to reproduce because the writer must write # in exactly this moment. # We let the backup job die here fataly. self.fatal_error = e logger.error('Fatal error, dying: {}'.format(e)) #exit('Fatal error: {}'.format(e)) # this only raises SystemExit os._exit(11) t2 = time.time() assert r == len(data) self._write_queue.task_done() logger.debug('Writer {} wrote data async. uid {} in {:.2f}s (Queue size is {})'.format(id_, uid, t2-t1, self._write_queue.qsize()))
def _writer(self, id_): """ A threaded background writer """ #bucket = None client = None while True: self.writer_thread_status[id_] = STATUS_QUEUE entry = self._write_queue.get() self.writer_thread_status[id_] = STATUS_NOTHING if entry is None or self.last_exception: logger.debug("Writer {} finishing.".format(id_)) break if client is None: client = self._get_client() uid, enc_envkey, enc_version, enc_nonce, data, callback = entry self.writer_thread_status[id_] = STATUS_THROTTLING time.sleep(self.write_throttling.consume(len(data))) self.writer_thread_status[id_] = STATUS_NOTHING try: self.writer_thread_status[id_] = STATUS_WRITING client.put_object(self.bucket_name, uid, io.BytesIO(data), len(data)) #client.upload_fileobj(io.BytesIO(data), Key=uid, Bucket=self._bucket_name) self.writer_thread_status[id_] = STATUS_NOTHING #if random.random() > 0.9: # raise ValueError("This is a test") except Exception as e: self.last_exception = e else: if callback: callback(uid, enc_envkey, enc_version, enc_nonce) self._write_queue.task_done()
def _reader(self, id_): """ A threaded background reader """ client = None while True: block = self._read_queue.get() # contains block if block is None or self.last_exception: logger.debug("Reader {} finishing.".format(id_)) break if client is None: client = self._get_client() t1 = time.time() try: self.reader_thread_status[id_] = STATUS_READING data = self.read_raw(block, client) self.reader_thread_status[id_] = STATUS_NOTHING #except FileNotFoundError: except Exception as e: self.last_exception = e else: self._read_data_queue.put((block, data)) t2 = time.time() self._read_queue.task_done() logger.debug( 'Reader {} read data async. uid {} in {:.2f}s (Queue size is {})' .format(id_, block.uid, t2 - t1, self._read_queue.qsize()))
def rm_many(self, uids): """ Deletes many uids from the data backend and returns a list of uids that couldn't be deleted. """ # "The request contains a list of up to 1000 keys that you want to delete." no_deletes = [] for chunk in chunks(uids, 1000): logger.debug("About to delete {} objects from the backend.".format( len(chunk))) objects = [{'Key': uid} for uid in chunk] response = self.bucket.delete_objects( Delete={ 'Objects': objects, #'Quiet': True|False }, RequestPayer='requester', ) # {'Deleted': [{'Key': 'a04ab9bcc0BK6vATCi95Bwb4Djriiy5B'}, deleted_objects = [d['Key'] for d in response['Deleted']] not_found_objects = set(chunk) - set(deleted_objects) no_deletes.extend(not_found_objects) logger.debug("Deleted {} keys, {} were not found.".format( len(deleted_objects), len(not_found_objects))) return no_deletes
def set_block(self, id, version_uid, block_uid, checksum, size, valid, _commit=True, _upsert=True): """ Upsert a block (or insert only when _upsert is False - this is only a performance improvement) """ valid = 1 if valid else 0 block = None if _upsert: block = self.session.query(Block).filter_by(id=id, version_uid=version_uid).first() if block: block.uid = block_uid block.checksum = checksum block.size = size block.valid = valid block.date = datetime.datetime.now() else: block = Block( id=id, version_uid=version_uid, uid=block_uid, checksum=checksum, size=size, valid=valid ) self.session.add(block) self._flush_block_counter += 1 if self._flush_block_counter % self.FLUSH_EVERY_N_BLOCKS == 0: t1 = time.time() self.session.flush() # saves some ram t2 = time.time() logger.debug('Flushed meta backend in {:.2f}s'.format(t2-t1)) if _commit: self.session.commit()
def _writer(self, id_=0): """ A threaded background writer """ while True: entry = self._write_queue.get() if entry is None: logger.debug("Writer {} finishing.".format(id_)) break uid, data = entry path = os.path.join(self.path, self._path(uid)) filename = self._filename(uid) time.sleep(self.write_throttling.consume(len(data))) t1 = time.time() try: with open(filename, 'wb') as f: r = f.write(data) except FileNotFoundError: makedirs(path) with open(filename, 'wb') as f: r = f.write(data) t2 = time.time() assert r == len(data) self._write_queue.task_done() logger.debug( 'Writer {} wrote data async. uid {} in {:.2f}s (Queue size is {})' .format(id_, uid, t2 - t1, self._write_queue.qsize()))
def _writer(self, id_): """ A threaded background writer """ while True: entry = self._write_queue.get() if entry is None or self.last_exception: logger.debug("Writer {} finishing.".format(id_)) break uid, data, callback = entry self.writer_thread_status[id_] = STATUS_THROTTLING time.sleep(self.write_throttling.consume(len(data))) self.writer_thread_status[id_] = STATUS_NOTHING t1 = time.time() try: # storing data to key uid self.writer_thread_status[id_] = STATUS_WRITING #time.sleep(.1) self.writer_thread_status[id_] = STATUS_NOTHING except Exception as e: self.last_exception = e else: t2 = time.time() # assert r == len(data) if callback: callback(uid) self._write_queue.task_done()
def set_block(self, id, version_uid, block_uid, checksum, size, valid, enc_envkey=b'', enc_version=0, enc_nonce=None, _commit=True): """ insert a block """ valid = 1 if valid else 0 if enc_envkey: enc_envkey = binascii.hexlify(enc_envkey).decode('ascii') if enc_nonce: enc_nonce = binascii.hexlify(enc_nonce).decode('ascii') block = Block( id=id, version_uid=version_uid, date=datetime.datetime.utcnow(), # as func.now creates timezone stamps... uid=block_uid, checksum=checksum, size=size, valid=valid, enc_envkey=enc_envkey, enc_version=enc_version, enc_nonce=enc_nonce, ) self.session.add(block) self._flush_block_counter += 1 if self._flush_block_counter % self.FLUSH_EVERY_N_BLOCKS == 0: t1 = time.time() self.session.flush() # saves some ram t2 = time.time() logger.debug('Flushed meta backend in {:.2f}s'.format(t2-t1)) if _commit: self.session.commit() return block
def save(self, data): uid = self._uid() t1 = time.time() key = self.bucket.new_key(uid) try: r = key.set_contents_from_string(data) except ( OSError, boto.exception.BotoServerError, boto.exception.S3ResponseError, ) as e: # OSError happens when the S3 host is gone (i.e. network died, # host down, ...). boto tries hard to recover, however after # several attempts it will give up and raise. # BotoServerError happens, when there is no server. # S3ResponseError sometimes happens, when the cluster is about # to shutdown. Hard to reproduce because the writer must write # in exactly this moment. # We let the backup job die here fataly. logger.error('Fatal error, dying: {}'.format(e)) #exit('Fatal error: {}'.format(e)) # this only raises SystemExit os._exit(13) t2 = time.time() assert r == len(data) logger.debug('Wrote data uid {} in {:.2f}s'.format(uid, t2-t1)) return uid
def write(self, version_uid, offset, data): """ Copy on write backup writer """ dataio = BytesIO(data) cow = self.cow[version_uid] write_list = self._block_list(version_uid, offset, len(data)) for block, _offset, length in write_list: if block is None: logger.warning( 'Tried to save data beyond device (offset {})'.format( offset)) continue # raise? That'd be a write outside the device... if block.id in cow: # the block is already copied, so update it. block_uid = cow[block.id] self._update(block_uid, dataio.read(length), _offset) logger.debug('Updated cow changed block {} into {})'.format( block.id, block_uid)) else: # read the block from the original, update it and write it back write_data = BytesIO( self.backy.data_backend.read_raw(block.uid)) write_data.seek(_offset) write_data.write(dataio.read(length)) write_data.seek(0) block_uid = self._save(write_data.read()) cow[block.id] = block_uid logger.debug('Wrote cow changed block {} into {})'.format( block.id, block_uid))
def expire_version(self, version_uid, expire): version = self.get_version(version_uid) version.expire = expire self.session.commit() logger.debug('Set expire for version (UID {})'.format( version_uid, ))
def _writer(self, id_=0): """ A threaded background writer """ while True: entry = self._write_queue.get() if entry is None or self.last_exception: logger.debug("Writer {} finishing.".format(id_)) break uid, enc_envkey, enc_version, enc_nonce, data, callback = entry path = os.path.join(self.path, self._path(uid)) filename = self._filename(uid) self.writer_thread_status[id_] = STATUS_THROTTLING time.sleep(self.write_throttling.consume(len(data))) self.writer_thread_status[id_] = STATUS_NOTHING t1 = time.time() try: try: self.writer_thread_status[id_] = STATUS_WRITING with open(filename, 'wb') as f: r = f.write(data) self.writer_thread_status[id_] = STATUS_NOTHING except FileNotFoundError: self.writer_thread_status[id_] = STATUS_WRITING makedirs(path) with open(filename, 'wb') as f: r = f.write(data) self.writer_thread_status[id_] = STATUS_NOTHING assert r == len(data) except Exception as e: self.last_exception = e else: t2 = time.time() if callback: callback(uid, enc_envkey, enc_version, enc_nonce) self._write_queue.task_done()
def unprotect_version(self, uid): version = self.get_version(uid) version.protected = 0 self.session.commit() logger.debug('Marked version unprotected (UID {})'.format( uid, ))
def set_version_valid(self, uid): version = self.get_version(uid) version.valid = 1 self.session.commit() logger.debug('Marked version valid (UID {})'.format( uid, ))
def _reader(self, id_): """ self._inqueue contains block_ids to be read. self._outqueue contains (block_id, data, data_checksum) """ while True: entry = self._inqueue.get() if entry is None: logger.debug("IO {} finishing.".format(id_)) self._outqueue.put(None) # also let the outqueue end self._inqueue.task_done() break block_id, read, metadata = entry if not read: self._outqueue.put((block_id, None, None, metadata)) else: start_offset = block_id * self.block_size end_offset = min(block_id * self.block_size + self.block_size, self._size) block_size = end_offset - start_offset self.writer_thread_status[id_] = STATUS_READING data = generate_block(block_id, block_size) self.writer_thread_status[id_] = STATUS_NOTHING if not data: raise RuntimeError('EOF reached on source when there should be data.') data_checksum = self.hash_function(data).hexdigest() self._outqueue.put((block_id, data, data_checksum, metadata)) self._inqueue.task_done()
def _writer(self, id_): """ self._write_queue contains a list of (Block, data) to be written. """ with open(self.io_name, 'rb+') as _write_file: while True: entry = self._write_queue.get() if entry is None: logger.debug("IO writer {} finishing.".format(id_)) self._write_queue.task_done() break block, data, callback = entry offset = block.id * self.block_size self.writer_thread_status[id_] = STATUS_SEEKING _write_file.seek(offset) self.writer_thread_status[id_] = STATUS_WRITING written = _write_file.write(data) posix_fadvise(_write_file.fileno(), offset, offset + written, os.POSIX_FADV_DONTNEED) self.writer_thread_status[id_] = STATUS_NOTHING assert written == len(data) if callback: callback() self._write_queue.task_done()
def cleanup_fast(self, dt=3600): """ Delete unreferenced blob UIDs """ if not self.locking.lock('backy-cleanup-fast'): raise LockError('Another backy cleanup is running.') delete_candidates = self.meta_backend.get_delete_candidates(dt=dt) try: for candidates in grouper(100, delete_candidates): # 100 is the number that works here smoothly within about 10-30s # per batch. With more than 70s there's a timeout and the job # is re-sent. Maybe we must either learn the best amount here # or we make this configurable... logger.debug('Cleanup: Removing UIDs {}'.format(', '.join(candidates))) try: self.data_backend.rm_many(candidates) except FileNotFoundError: continue except: logger.error('Error during cleanup. Reverting metadata changes.') self.meta_backend.revert_delete_candidates(delete_candidates) self.locking.unlock('backy-cleanup-fast') raise else: self.meta_backend.remove_delete_candidates(delete_candidates) logger.info('Cleanup: Removed {} blobs'.format(len(delete_candidates))) self.locking.unlock('backy-cleanup-fast')
def get_delete_candidates(self, dt=3600): _stat_i = 0 _stat_remove_from_delete_candidates = 0 _stat_delete_candidates = 0 while True: delete_candidates = self.session.query( DeletedBlock ).filter( DeletedBlock.time < (inttime() - dt) ).limit(250).all() # http://stackoverflow.com/questions/7389759/memory-efficient-built-in-sqlalchemy-iterator-generator if not delete_candidates: break _remove_from_delete_candidate_uids = set() _delete_candidates = set() for candidate in delete_candidates: _stat_i += 1 if _stat_i%1000 == 0: logger.info("Cleanup-fast: {} false positives, {} data deletions.".format( _stat_remove_from_delete_candidates, _stat_delete_candidates, )) block = self.session.query( Block ).filter( Block.uid == candidate.uid ).limit(1).scalar() if block: _remove_from_delete_candidate_uids.add(candidate.uid) _stat_remove_from_delete_candidates += 1 else: _delete_candidates.add(candidate.uid) _stat_delete_candidates += 1 if _remove_from_delete_candidate_uids: logger.debug("Cleanup-fast: Removing {} false positive delete candidates".format(len(_remove_from_delete_candidate_uids))) self.session.query( DeletedBlock ).filter( DeletedBlock.uid.in_(_remove_from_delete_candidate_uids) ).delete(synchronize_session=False) if _delete_candidates: logger.debug("Cleanup-fast: Sending {} delete candidates for final deletion".format(len(_delete_candidates))) self.session.query( DeletedBlock ).filter( DeletedBlock.uid.in_(_delete_candidates) ).delete(synchronize_session=False) yield(_delete_candidates) logger.info("Cleanup-fast: Cleanup finished. {} false positives, {} data deletions.".format( _stat_remove_from_delete_candidates, _stat_delete_candidates, ))
def get_delete_candidates(self, dt=3600): _stat_i = 0 _stat_remove_from_delete_candidates = 0 _stat_delete_candidates = 0 while True: delete_candidates = self.session.query(DeletedBlock).filter( DeletedBlock.time < (inttime() - dt) ).limit(250).all( ) # http://stackoverflow.com/questions/7389759/memory-efficient-built-in-sqlalchemy-iterator-generator if not delete_candidates: break _remove_from_delete_candidate_uids = set() _delete_candidates = set() for candidate in delete_candidates: _stat_i += 1 if _stat_i % 1000 == 0: logger.info( "Cleanup-fast: {} false positives, {} data deletions.". format( _stat_remove_from_delete_candidates, _stat_delete_candidates, )) block = self.session.query(Block).filter( Block.uid == candidate.uid).limit(1).scalar() if block: _remove_from_delete_candidate_uids.add(candidate.uid) _stat_remove_from_delete_candidates += 1 else: _delete_candidates.add(candidate.uid) _stat_delete_candidates += 1 if _remove_from_delete_candidate_uids: logger.debug( "Cleanup-fast: Removing {} false positive delete candidates" .format(len(_remove_from_delete_candidate_uids))) self.session.query(DeletedBlock).filter( DeletedBlock.uid.in_(_remove_from_delete_candidate_uids) ).delete(synchronize_session=False) if _delete_candidates: logger.debug( "Cleanup-fast: Sending {} delete candidates for final deletion" .format(len(_delete_candidates))) self.session.query(DeletedBlock).filter( DeletedBlock.uid.in_(_delete_candidates)).delete( synchronize_session=False) yield (_delete_candidates) logger.info( "Cleanup-fast: Cleanup finished. {} false positives, {} data deletions." .format( _stat_remove_from_delete_candidates, _stat_delete_candidates, ))
def read(self, version_uid, offset, length): read_list = self._block_list(version_uid, offset, length) data = [] for block, offset, length in read_list: logger.debug('Reading block {}:{}:{}'.format(block, offset, length)) if block is None: data.append(b'\0'*length) else: data.append(self._read(block.uid, offset, length)) return b''.join(data)
def set_block_enc_envkey(self, block, enc_envkey): block.enc_envkey = binascii.hexlify(enc_envkey).decode('ascii') self.session.add(block) self._flush_block_counter += 1 if self._flush_block_counter % self.FLUSH_EVERY_N_BLOCKS == 0: t1 = time.time() self.session.flush() # saves some ram t2 = time.time() logger.debug('Flushed meta backend in {:.2f}s'.format(t2 - t1)) return block
def read(self, version_uid, offset, length): read_list = self._block_list(version_uid, offset, length) data = [] for block, offset, length in read_list: logger.debug('Reading block {}:{}:{}'.format( block, offset, length)) if block is None: data.append(b'\0' * length) else: data.append(self._read(block.uid, offset, length)) return b''.join(data)
def cleanup_fast(self, dt=3600): """ Delete unreferenced blob UIDs """ if not self.locking.lock('backy-cleanup-fast'): raise LockError('Another backy cleanup is running.') for uid_list in self.meta_backend.get_delete_candidates(dt): logger.debug('Cleanup-fast: Deleting UIDs from data backend: {}'.format(uid_list)) no_del_uids = [] no_del_uids = self.data_backend.rm_many(uid_list) if no_del_uids: logger.info('Cleanup-fast: Unable to delete these UIDs from data backend: {}'.format(uid_list)) self.locking.unlock('backy-cleanup-fast')
def cleanup_fast(self, dt=3600): """ Delete unreferenced blob UIDs """ if not self.locking.lock('backy-cleanup-fast'): raise LockError('Another backy cleanup is running.') for uid_list in self.meta_backend.get_delete_candidates(): logger.debug('Cleanup-fast: Deleting UIDs from data backend: {}'.format(uid_list)) no_del_uids = [] no_del_uids = self.data_backend.rm_many(uid_list) if no_del_uids: logger.info('Cleanup-fast: Unable to delete these UIDs from data backend: {}'.format(uid_list)) self.locking.unlock('backy-cleanup-fast')
def save(self, data): uid = self._uid() path = os.path.join(self.path, self._path(uid)) filename = self._filename(uid) t1 = time.time() try: with open(filename, 'wb') as f: r = f.write(data) except FileNotFoundError: makedirs(path) with open(filename, 'wb') as f: r = f.write(data) t2 = time.time() assert r == len(data) logger.debug('Wrote data uid {} in {:.2f}s'.format(uid, t2-t1)) return uid
def _reader(self, id_): """ A threaded background reader """ while True: block = self._read_queue.get() # contains block if block is None or self.fatal_error: logger.debug("Reader {} finishing.".format(id_)) break t1 = time.time() try: data = self.read_raw(block.uid) except FileNotFoundError: self._read_data_queue.put((block, None)) # catch this! else: self._read_data_queue.put((block, data)) t2 = time.time() self._read_queue.task_done() logger.debug('Reader {} read data async. uid {} in {:.2f}s (Queue size is {})'.format(id_, block.uid, t2-t1, self._read_queue.qsize()))
def _writer(self, id_): """ self._write_queue contains a list of (Block, data) to be written. """ while True: entry = self._write_queue.get() if entry is None: logger.debug("IO writer {} finishing.".format(id_)) break block, data, callback = entry self.writer_thread_status[id_] = STATUS_WRITING # write nothing #time.sleep(.1) self.writer_thread_status[id_] = STATUS_NOTHING if callback: callback() self._write_queue.task_done()
def fixate(self, cow_version_uid): # save blocks into version logger.info('Fixating version {} with {} blocks (PLEASE WAIT)'.format( cow_version_uid, len(self.cow[cow_version_uid].items()))) for block_id, block_uid in self.cow[cow_version_uid].items(): logger.debug('Fixating block {} uid {}'.format( block_id, block_uid)) data = self._read(block_uid) checksum = self.hash_function(data).hexdigest() if not self.backy.data_backend._SUPPORTS_PARTIAL_WRITES: # dump changed data new_uid = self.backy.data_backend.save(data, _sync=True) logger.debug( 'Stored block {} with local uid {} to uid {}'.format( block_id, block_uid, new_uid)) block_uid = new_uid self.backy.meta_backend.set_block(block_id, cow_version_uid, block_uid, checksum, len(data), valid=1, _commit=False) self.backy.meta_backend.set_version_valid(cow_version_uid) self.backy.meta_backend._commit() logger.info('Fixation done. Deleting temporary data (PLEASE WAIT)') # TODO: Delete COW blocks and also those from block_cache if self.backy.data_backend._SUPPORTS_PARTIAL_WRITES: for block_uid in self.block_cache: # TODO if this block is in the current version (and in no other?) # rm this block from cache # rm block uid from self.block_cache pass for block_id, block_uid in self.cow[cow_version_uid].items(): # TODO: rm block_uid from cache pass else: # backends that support partial writes will be written to directly. # So there's no need to cleanup. pass del (self.cow[cow_version_uid]) logger.info('Finished.')
def _writer(self, id_): """ self._write_queue contains a list of (Block, data) to be written. """ while True: entry = self._write_queue.get() if entry is None: logger.debug("IO writer {} finishing.".format(id_)) break block, data, callback = entry offset = block.id * self.block_size self.writer_thread_status[id_] = STATUS_WRITING written = self._write_rbd.write( data, offset, rados.LIBRADOS_OP_FLAG_FADVISE_DONTNEED) assert written == len(data) self.writer_thread_status[id_] = STATUS_NOTHING if callback: callback() self._write_queue.task_done()
def cleanup_full(self, prefix=None): """ Delete unreferenced blob UIDs starting with <prefix> """ # in this mode, we compare all existing uids in data and meta. # make sure, no other backy will start if not self.locking.lock('backy'): self.locking.unlock('backy') raise LockError('Other backy instances are running.') # make sure, no other backy is running if len(find_other_procs(self.process_name)) > 1: raise LockError('Other backy instances are running.') active_blob_uids = set(self.data_backend.get_all_blob_uids(prefix)) active_block_uids = set(self.meta_backend.get_all_block_uids(prefix)) delete_candidates = active_blob_uids.difference(active_block_uids) for delete_candidate in delete_candidates: logger.debug('Cleanup: Removing UID {}'.format(delete_candidate)) try: self.data_backend.rm(delete_candidate) except FileNotFoundError: continue logger.info('Cleanup: Removed {} blobs'.format(len(delete_candidates))) self.locking.unlock('backy')
def _reader(self, id_): """ self._inqueue contains Blocks. self._outqueue contains (block, data, data_checksum) """ with open(self.io_name, 'rb') as source_file: while True: block = self._inqueue.get() if block is None: logger.debug("IO {} finishing.".format(id_)) self._outqueue.put(None) # also let the outqueue end break offset = block.id * self.block_size t1 = time.time() source_file.seek(offset) t2 = time.time() data = source_file.read(self.block_size) t3 = time.time() # throw away cache posix_fadvise(source_file.fileno(), offset, offset + self.block_size, os.POSIX_FADV_DONTNEED) if not data: raise RuntimeError('EOF reached on source when there should be data.') data_checksum = self.hash_function(data).hexdigest() if not block.valid: logger.debug('IO {} re-read block (because it was invalid) {} (checksum {})'.format(id_, block.id, data_checksum)) else: logger.debug('IO {} read block {} (len {}, checksum {}...) in {:.2f}s (seek in {:.2f}s) (Inqueue size: {}, Outqueue size: {})'.format(id_, block.id, len(data), data_checksum[:16], t3-t1, t2-t1, self._inqueue.qsize(), self._outqueue.qsize())) self._outqueue.put((block, data, data_checksum)) self._inqueue.task_done()
def _reader(self, id_): """ self._inqueue contains block_ids to be read. self._outqueue contains (block_id, data, data_checksum) """ with open(self.io_name, 'rb') as source_file: while True: entry = self._inqueue.get() if entry is None: logger.debug("IO {} finishing.".format(id_)) self._outqueue.put(None) # also let the outqueue end self._inqueue.task_done() break block_id, read, metadata = entry if not read: self._outqueue.put((block_id, None, None, metadata)) else: offset = block_id * self.block_size t1 = time.time() self.reader_thread_status[id_] = STATUS_SEEKING source_file.seek(offset) t2 = time.time() self.reader_thread_status[id_] = STATUS_READING data = source_file.read(self.block_size) t3 = time.time() # throw away cache self.reader_thread_status[id_] = STATUS_FADVISE posix_fadvise(source_file.fileno(), offset, offset + self.block_size, os.POSIX_FADV_DONTNEED) self.reader_thread_status[id_] = STATUS_NOTHING if not data: raise RuntimeError( 'EOF reached on source when there should be data.') data_checksum = self.hash_function(data).hexdigest() self._outqueue.put( (block_id, data, data_checksum, metadata)) self._inqueue.task_done()
def _writer(self, id_): """ A threaded background writer """ while True: entry = self._write_queue.get() if entry is None or self.fatal_error: logger.debug("Writer {} finishing.".format(id_)) break uid, data = entry time.sleep(self.write_throttling.consume(len(data))) t1 = time.time() try: # res = self.conn.create_blob_from_text( # container_name=self.container_name, # blob_name=uid, # text=data, # validate_content=True, # encoding='ascii' # ) string_data = data if not isinstance(string_data, bytes): string_data = string_data.encode("utf-8") fp = BytesIO(string_data) res = self.conn.create_blob_from_bytes( container_name=self.container_name, blob_name=uid, blob=fp.getvalue(), validate_content=True, ) except (OSError, Exception) as e: # We let the backup job die here fataly. self.fatal_error = e logger.error('Fatal error, dying: {}'.format(e)) print('Error on Write File', e) #exit('Fatal error: {}'.format(e)) # this only raises SystemExit os._exit(11) t2 = time.time() self._write_queue.task_done() logger.debug('Writer {} wrote data async. uid {} in {:.2f}s (Queue size is {})'.format(id_, uid, t2-t1, self._write_queue.qsize()))
def _writer(self, id_=0): """ A threaded background writer """ while True: entry = self._queue.get() if entry is None: logger.debug("Writer {} finishing.".format(id_)) break uid, data = entry path = os.path.join(self.path, self._path(uid)) filename = self._filename(uid) t1 = time.time() try: with open(filename, 'wb') as f: r = f.write(data) except FileNotFoundError: makedirs(path) with open(filename, 'wb') as f: r = f.write(data) t2 = time.time() assert r == len(data) self._queue.task_done() logger.debug('Writer {} wrote data async. uid {} in {:.2f}s (Queue size is {})'.format(id_, uid, t2-t1, self._queue.qsize()))
def write(self, version_uid, offset, data): """ Copy on write backup writer """ dataio = BytesIO(data) cow = self.cow[version_uid] write_list = self._block_list(version_uid, offset, len(data)) for block, _offset, length in write_list: if block is None: logger.warning('Tried to save data beyond device (offset {})'.format(offset)) continue # raise? That'd be a write outside the device... if block.id in cow: # the block is already copied, so update it. block_uid = cow[block.id] self._update(block_uid, dataio.read(length), _offset) logger.debug('Updated cow changed block {} into {})'.format(block.id, block_uid)) else: # read the block from the original, update it and write it back write_data = BytesIO(self.backy.data_backend.read(block.uid)) write_data.seek(_offset) write_data.write(dataio.read(length)) write_data.seek(0) block_uid = self._save(write_data.read()) cow[block.id] = block_uid logger.debug('Wrote cow changed block {} into {})'.format(block.id, block_uid))
def fixate(self, cow_version_uid): # save blocks into version logger.info('Fixating version {} with {} blocks (PLEASE WAIT)'.format( cow_version_uid, len(self.cow[cow_version_uid].items()) )) for block_id, block_uid in self.cow[cow_version_uid].items(): logger.debug('Fixating block {} uid {}'.format(block_id, block_uid)) data = self._read(block_uid) checksum = self.hash_function(data).hexdigest() if not self.backy.data_backend._SUPPORTS_PARTIAL_WRITES: # dump changed data new_uid = self.backy.data_backend.save(data, _sync=True) logger.debug('Stored block {} with local uid {} to uid {}'.format(block_id, block_uid, new_uid)) block_uid = new_uid self.backy.meta_backend.set_block(block_id, cow_version_uid, block_uid, checksum, len(data), valid=1, _commit=False) self.backy.meta_backend.set_version_valid(cow_version_uid) self.backy.meta_backend._commit() logger.info('Fixation done. Deleting temporary data (PLEASE WAIT)') # TODO: Delete COW blocks and also those from block_cache if self.backy.data_backend._SUPPORTS_PARTIAL_WRITES: for block_uid in self.block_cache: # TODO if this block is in the current version (and in no other?) # rm this block from cache # rm block uid from self.block_cache pass for block_id, block_uid in self.cow[cow_version_uid].items(): # TODO: rm block_uid from cache pass else: # backends that support partial writes will be written to directly. # So there's no need to cleanup. pass del(self.cow[cow_version_uid]) logger.info('Finished.')
def get(self): try: block = self._read_list.pop() except IndexError: raise # pop from an empty list offset = block.id * self.block_size t1 = time.time() self.source_file.seek(offset) t2 = time.time() data = self.source_file.read(self.block_size) t3 = time.time() # throw away cache posix_fadvise(self.source_file.fileno(), offset, offset + self.block_size, os.POSIX_FADV_DONTNEED) if not data: raise RuntimeError('EOF reached on source when there should be data.') data_checksum = self.hash_function(data).hexdigest() if not block.valid: logger.debug('Re-read block (because it was invalid) {} (checksum {})'.format(block.id, data_checksum)) else: logger.debug('Read block {} (len {}, checksum {}...) in {:.2f}s (seek in {:.2f}s)'.format(block.id, len(data), data_checksum[:16], t3-t1, t2-t1)) return block, data, data_checksum
def _reader(self, id_): """ A threaded background reader """ while True: block = self._read_queue.get() # contains block if block is None or self.last_exception: logger.debug("Reader {} finishing.".format(id_)) break t1 = time.time() try: self.reader_thread_status[id_] = STATUS_READING data = self.read_raw(block.id, block.size) self.reader_thread_status[id_] = STATUS_THROTTLING except Exception as e: self.last_exception = e else: time.sleep(self.read_throttling.consume(len(data))) self.reader_thread_status[id_] = STATUS_NOTHING #time.sleep(.5) self._read_data_queue.put((block, data)) t2 = time.time() self._read_queue.task_done() logger.debug( 'Reader {} read data async. uid {} in {:.2f}s (Queue size is {})' .format(id_, block.uid, t2 - t1, self._read_queue.qsize()))
def _writer(self, id_): """ A threaded background writer """ while True: entry = self._write_queue.get() if entry is None or self.fatal_error: logger.debug("Writer {} finishing.".format(id_)) break uid, data = entry time.sleep(self.write_throttling.consume(len(data))) t1 = time.time() key = self.bucket.new_key(uid) try: r = key.set_contents_from_string(data) except ( OSError, boto.exception.BotoServerError, boto.exception.S3ResponseError, ) as e: # OSError happens when the S3 host is gone (i.e. network died, # host down, ...). boto tries hard to recover, however after # several attempts it will give up and raise. # BotoServerError happens, when there is no server. # S3ResponseError sometimes happens, when the cluster is about # to shutdown. Hard to reproduce because the writer must write # in exactly this moment. # We let the backup job die here fataly. self.fatal_error = e logger.error('Fatal error, dying: {}'.format(e)) #exit('Fatal error: {}'.format(e)) # this only raises SystemExit os._exit(11) t2 = time.time() assert r == len(data) self._write_queue.task_done() logger.debug( 'Writer {} wrote data async. uid {} in {:.2f}s (Queue size is {})' .format(id_, uid, t2 - t1, self._write_queue.qsize()))
def _reader(self, id_): """ self._inqueue contains block_ids to be read. self._outqueue contains (block_id, data, data_checksum) """ ioctx = self.cluster.open_ioctx(self.pool_name) with rbd.Image(ioctx, self.image_name, self.snapshot_name, read_only=True) as image: while True: entry = self._inqueue.get() if entry is None: logger.debug("IO {} finishing.".format(id_)) self._outqueue.put(None) # also let the outqueue end self._inqueue.task_done() break block_id, read, metadata = entry if not read: self._outqueue.put((block_id, None, None, metadata)) else: offset = block_id * self.block_size t1 = time.time() self.reader_thread_status[id_] = STATUS_READING data = image.read(offset, self.block_size, rados.LIBRADOS_OP_FLAG_FADVISE_DONTNEED) self.reader_thread_status[id_] = STATUS_NOTHING t2 = time.time() if not data: raise RuntimeError( 'EOF reached on source when there should be data.') data_checksum = self.hash_function(data).hexdigest() self._outqueue.put( (block_id, data, data_checksum, metadata)) self._inqueue.task_done()
def restore(self, version_uid, target, sparse=False, force=False): if not self.locking.lock(version_uid): raise LockError('Version {} is locked.'.format(version_uid)) version = self.meta_backend.get_version(version_uid) # raise if version not exists notify(self.process_name, 'Restoring Version {}. Getting blocks.'.format(version_uid)) blocks = self.meta_backend.get_blocks_by_version(version_uid) io = self.get_io_by_source(target) io.open_w(target, version.size_bytes, force) read_jobs = 0 for i, block in enumerate(blocks): if block.uid: self.data_backend.read(block.deref()) # adds a read job read_jobs += 1 elif not sparse: io.write(block, b'\0'*block.size) logger.debug('Restored sparse block {} successfully ({} bytes).'.format( block.id, block.size, )) else: logger.debug('Ignored sparse block {}.'.format( block.id, )) notify(self.process_name, 'Restoring Version {} to {} PREPARING AND SPARSE BLOCKS ({:.1f}%)'.format(version_uid, target, (i + 1) / len(blocks) * 100)) done_jobs = 0 _log_every_jobs = read_jobs // 200 + 1 # about every half percent for i in range(read_jobs): block, offset, length, data = self.data_backend.read_get() assert len(data) == block.size data_checksum = self.hash_function(data).hexdigest() io.write(block, data) if data_checksum != block.checksum: logger.error('Checksum mismatch during restore for block ' '{} (is: {} should-be: {}, block-valid: {}). Block ' 'restored is invalid. Continuing.'.format( block.id, data_checksum, block.checksum, block.valid, )) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) else: logger.debug('Restored block {} successfully ({} bytes).'.format( block.id, block.size, )) notify(self.process_name, 'Restoring Version {} to {} ({:.1f}%)'.format(version_uid, target, (i + 1) / read_jobs * 100)) if i % _log_every_jobs == 0 or i + 1 == read_jobs: logger.info('Restored {}/{} blocks ({:.1f}%)'.format(i + 1, read_jobs, (i + 1) / read_jobs * 100)) self.locking.unlock(version_uid)
def restore(self, version_uid, target, sparse=False): if not self.locking.lock(version_uid): raise LockError('Version {} is locked.'.format(version_uid)) version = self.meta_backend.get_version(version_uid) # raise if version not exists blocks = self.meta_backend.get_blocks_by_version(version_uid) with open(target, 'wb') as f: for block in blocks: f.seek(block.id * self.block_size) if block.uid: data = self.data_backend.read(block.uid) assert len(data) == block.size data_checksum = self.hash_function(data).hexdigest() written = f.write(data) assert written == len(data) if data_checksum != block.checksum: logger.error('Checksum mismatch during restore for block ' '{} (is: {} should-be: {}, block-valid: {}). Block ' 'restored is invalid. Continuing.'.format( block.id, data_checksum, block.checksum, block.valid, )) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) else: logger.debug('Restored block {} successfully ({} bytes).'.format( block.id, block.size, )) elif not sparse: f.write(b'\0'*block.size) logger.debug('Restored sparse block {} successfully ({} bytes).'.format( block.id, block.size, )) else: logger.debug('Ignored sparse block {}.'.format( block.id, )) if f.tell() != version.size_bytes: # write last byte with \0, because this can only happen when # the last block was left over in sparse mode. last_block = blocks[-1] f.seek(last_block.id * self.block_size + last_block.size - 1) f.write(b'\0') self.locking.unlock(version_uid)
def restore(self, version_uid, target, sparse=False, force=False): if not self.locking.lock(version_uid): raise LockError('Version {} is locked.'.format(version_uid)) version = self.meta_backend.get_version(version_uid) # raise if version not exists blocks = self.meta_backend.get_blocks_by_version(version_uid) io = self.get_io_by_source(target) io.open_w(target, version.size_bytes, force) for block in blocks: if block.uid: data = self.data_backend.read(block, sync=True) assert len(data) == block.size data_checksum = self.hash_function(data).hexdigest() io.write(block, data) if data_checksum != block.checksum: logger.error('Checksum mismatch during restore for block ' '{} (is: {} should-be: {}, block-valid: {}). Block ' 'restored is invalid. Continuing.'.format( block.id, data_checksum, block.checksum, block.valid, )) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) else: logger.debug('Restored block {} successfully ({} bytes).'.format( block.id, block.size, )) elif not sparse: io.write(block, b'\0'*block.size) logger.debug('Restored sparse block {} successfully ({} bytes).'.format( block.id, block.size, )) else: logger.debug('Ignored sparse block {}.'.format( block.id, )) self.locking.unlock(version_uid)
def _reader(self, id_): """ self._inqueue contains Blocks. self._outqueue contains (block, data, data_checksum) """ ioctx = self.cluster.open_ioctx(self.pool_name) with rbd.Image(ioctx, self.image_name, self.snapshot_name, read_only=True) as image: while True: block = self._inqueue.get() if block is None: logger.debug("IO {} finishing.".format(id_)) self._outqueue.put(None) # also let the outqueue end break offset = block.id * self.block_size t1 = time.time() data = image.read(offset, self.block_size, rados.LIBRADOS_OP_FLAG_FADVISE_DONTNEED) t2 = time.time() # throw away cache if not data: raise RuntimeError( 'EOF reached on source when there should be data.') data_checksum = self.hash_function(data).hexdigest() if not block.valid: logger.debug( 'IO {} re-read block (because it was invalid) {} (checksum {})' .format(id_, block.id, data_checksum)) else: logger.debug( 'IO {} read block {} (checksum {}...) in {:.2f}s) ' '(Inqueue size: {}, Outqueue size: {})'.format( id_, block.id, data_checksum[:16], t2 - t1, self._inqueue.qsize(), self._outqueue.qsize())) self._outqueue.put((block, data, data_checksum)) self._inqueue.task_done()
def _reader(self, id_): """ self._inqueue contains Blocks. self._outqueue contains (block, data, data_checksum) """ ioctx = self.cluster.open_ioctx(self.pool_name) with rbd.Image(ioctx, self.image_name, self.snapshot_name, read_only=True) as image: while True: block = self._inqueue.get() if block is None: logger.debug("Reader {} finishing.".format(id_)) self._outqueue.put(None) # also let the outqueue end break offset = block.id * self.block_size t1 = time.time() data = image.read(offset, self.block_size, rados.LIBRADOS_OP_FLAG_FADVISE_DONTNEED) t2 = time.time() # throw away cache if not data: raise RuntimeError('EOF reached on source when there should be data.') data_checksum = self.hash_function(data).hexdigest() if not block.valid: logger.debug('Reader {} re-read block (because it was invalid) {} (checksum {})'.format(id_, block.id, data_checksum)) else: logger.debug('Reader {} read block {} (checksum {}...) in {:.2f}s) ' '(Inqueue size: {}, Outqueue size: {})'.format( id_, block.id, data_checksum[:16], t2-t1, self._inqueue.qsize(), self._outqueue.qsize() )) self._outqueue.put((block, data, data_checksum)) self._inqueue.task_done()
def scrub(self, version_uid, source=None, percentile=100): """ Returns a boolean (state). If False, there were errors, if True all was ok """ if not self.locking.lock(version_uid): raise LockError('Version {} is locked.'.format(version_uid)) self.meta_backend.get_version(version_uid) # raise if version not exists blocks = self.meta_backend.get_blocks_by_version(version_uid) if source: self.reader.open(source) state = True for block in blocks: if block.uid: if percentile < 100 and random.randint(1, 100) > percentile: logger.debug('Scrub of block {} (UID {}) skipped (percentile is {}).'.format( block.id, block.uid, percentile, )) continue try: data = self.data_backend.read(block.uid) except FileNotFoundError as e: logger.error('Blob not found: {}'.format(str(e))) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) state = False continue if len(data) != block.size: logger.error('Blob has wrong size: {} is: {} should be: {}'.format( block.uid, len(data), block.size, )) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) state = False continue data_checksum = self.hash_function(data).hexdigest() if data_checksum != block.checksum: logger.error('Checksum mismatch during scrub for block ' '{} (UID {}) (is: {} should-be: {}).'.format( block.id, block.uid, data_checksum, block.checksum, )) self.meta_backend.set_blocks_invalid(block.uid, block.checksum) state = False continue else: if source: source_data = self.reader.read(block, sync=True) if source_data != data: logger.error('Source data has changed for block {} ' '(UID {}) (is: {} should-be: {}). NOT setting ' 'this block invalid, because the source looks ' 'wrong.'.format( block.id, block.uid, self.hash_function(source_data).hexdigest(), data_checksum, )) state = False # We are not setting the block invalid here because # when the block is there AND the checksum is good, # then the source is invalid. logger.debug('Scrub of block {} (UID {}) ok.'.format( block.id, block.uid, )) else: logger.debug('Scrub of block {} (UID {}) skipped (sparse).'.format( block.id, block.uid, )) if state == True: self.meta_backend.set_version_valid(version_uid) else: # version is set invalid by set_blocks_invalid. logger.error('Marked version invalid because it has errors: {}'.format(version_uid)) if source: self.reader.close() # wait for all readers self.locking.unlock(version_uid) return state
def backup(self, name, source, hints, from_version): """ Create a backup from source. If hints are given, they must be tuples of (offset, length, exists) where offset and length are integers and exists is a boolean. Then, only data within hints will be backed up. Otherwise, the backup reads source and looks if checksums match with the target. """ stats = { 'version_size_bytes': 0, 'version_size_blocks': 0, 'bytes_read': 0, 'blocks_read': 0, 'bytes_written': 0, 'blocks_written': 0, 'bytes_found_dedup': 0, 'blocks_found_dedup': 0, 'bytes_sparse': 0, 'blocks_sparse': 0, 'start_time': time.time(), } self.reader.open(source) source_size = self.reader.size() size = math.ceil(source_size / self.block_size) stats['version_size_bytes'] = source_size stats['version_size_blocks'] = size # Sanity check: check hints for validity, i.e. too high offsets, ... if hints: max_offset = max([h[0]+h[1] for h in hints]) if max_offset > source_size: raise ValueError('Hints have higher offsets than source file.') if hints: sparse_blocks = blocks_from_hints([hint for hint in hints if not hint[2]], self.block_size) read_blocks = blocks_from_hints([hint for hint in hints if hint[2]], self.block_size) else: sparse_blocks = [] read_blocks = range(size) sparse_blocks = set(sparse_blocks) read_blocks = set(read_blocks) try: version_uid = self._prepare_version(name, source_size, from_version) except RuntimeError as e: logger.error(str(e)) logger.error('Backy exiting.') # TODO: Don't exit here, exit in Commands exit(4) except LockError as e: logger.error(str(e)) logger.error('Backy exiting.') # TODO: Don't exit here, exit in Commands exit(99) if not self.locking.lock(version_uid): logger.error('Version {} is locked.'.format(version_uid)) logger.error('Backy exiting.') # TODO: Don't exit here, exit in Commands exit(99) blocks = self.meta_backend.get_blocks_by_version(version_uid) if from_version and hints: # SANITY CHECK: # Check some blocks outside of hints if they are the same in the # from_version backup and in the current backup. If they # don't, either hints are wrong (e.g. from a wrong snapshot diff) # or source doesn't match. In any case, the resulting backup won't # be good. logger.info('Starting sanity check with 1% of the blocks. Reading...') ignore_blocks = list(set(range(size)) - read_blocks - sparse_blocks) random.shuffle(ignore_blocks) num_check_blocks = max(10, len(ignore_blocks) // 100) # 1%, but at least 10 check_block_ids = ignore_blocks[:num_check_blocks] num_reading = 0 for block in blocks: if block.id in check_block_ids and block.uid: # no uid = sparse block in backup. Can't check. self.reader.read(block) num_reading += 1 for i in range(num_reading): # this is source file data source_block, source_data, source_data_checksum = self.reader.get() # check metadata checksum with the newly read one if source_block.checksum != source_data_checksum: logger.error("Source and backup don't match in regions outside of the hints.") logger.error("Looks like the hints don't match or the source is different.") logger.error("Found wrong source data at block {}: offset {} with max. length {}".format( source_block.id, source_block.id * self.block_size, self.block_size )) # remove version self.meta_backend.rm_version(version_uid) sys.exit(5) logger.info('Finished sanity check. Checked {} blocks.'.format(num_reading)) read_jobs = 0 for block in blocks: if block.id in read_blocks or not block.valid: self.reader.read(block) # adds a read job. read_jobs += 1 elif block.id in sparse_blocks: # This "elif" is very important. Because if the block is in read_blocks # AND sparse_blocks, it *must* be read. self.meta_backend.set_block(block.id, version_uid, None, None, block.size, valid=1, _commit=False) stats['blocks_sparse'] += 1 stats['bytes_sparse'] += block.size logger.debug('Skipping block (sparse) {}'.format(block.id)) else: #self.meta_backend.set_block(block.id, version_uid, block.uid, block.checksum, block.size, valid=1, _commit=False) logger.debug('Keeping block {}'.format(block.id)) # now use the readers and write done_jobs = 0 for i in range(read_jobs): block, data, data_checksum = self.reader.get() stats['blocks_read'] += 1 stats['bytes_read'] += len(data) # dedup existing_block = self.meta_backend.get_block_by_checksum(data_checksum) if existing_block and existing_block.size == len(data): self.meta_backend.set_block(block.id, version_uid, existing_block.uid, data_checksum, len(data), valid=1, _commit=False) stats['blocks_found_dedup'] += 1 stats['bytes_found_dedup'] += len(data) logger.debug('Found existing block for id {} with uid {})'.format (block.id, existing_block.uid)) else: block_uid = self.data_backend.save(data) self.meta_backend.set_block(block.id, version_uid, block_uid, data_checksum, len(data), valid=1, _commit=False) stats['blocks_written'] += 1 stats['bytes_written'] += len(data) logger.debug('Wrote block {} (checksum {}...)'.format(block.id, data_checksum[:16])) done_jobs += 1 self.reader.close() # wait for all readers self.data_backend.close() # wait for all writers if read_jobs != done_jobs: logger.error('backy broke somewhere. Backup is invalid.') sys.exit(3) self.meta_backend.set_version_valid(version_uid) self.meta_backend.set_stats( version_uid=version_uid, version_name=name, version_size_bytes=stats['version_size_bytes'], version_size_blocks=stats['version_size_blocks'], bytes_read=stats['bytes_read'], blocks_read=stats['blocks_read'], bytes_written=stats['bytes_written'], blocks_written=stats['blocks_written'], bytes_found_dedup=stats['bytes_found_dedup'], blocks_found_dedup=stats['blocks_found_dedup'], bytes_sparse=stats['bytes_sparse'], blocks_sparse=stats['blocks_sparse'], duration_seconds=int(time.time() - stats['start_time']), ) logger.info('New version: {}'.format(version_uid)) self.locking.unlock(version_uid) return version_uid
def main(): parser = argparse.ArgumentParser( description='Backup and restore for block devices.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '-v', '--verbose', action='store_true', help='verbose output') parser.add_argument( '-m', '--machine-output', action='store_true', default=False) parser.add_argument( '-V', '--version', action='store_true', help='Show version') subparsers = parser.add_subparsers() # BACKUP p = subparsers.add_parser( 'backup', help="Perform a backup.") p.add_argument( 'source', help='Source file') p.add_argument( 'name', help='Backup name') p.add_argument('-r', '--rbd', default=None, help='Hints as rbd json format') p.add_argument('-f', '--from-version', default=None, help='Use this version-uid as base') p.set_defaults(func='backup') # RESTORE p = subparsers.add_parser( 'restore', help="Restore a given backup with level to a given target.") p.add_argument('-s', '--sparse', action='store_true', help='Write restore file sparse (does not work with legacy devices)') p.add_argument('version_uid') p.add_argument('target') p.set_defaults(func='restore') # RM p = subparsers.add_parser( 'rm', help="Remove a given backup version. This will only remove meta data and you will have to cleanup after this.") p.add_argument('version_uid') p.set_defaults(func='rm') # SCRUB p = subparsers.add_parser( 'scrub', help="Scrub a given backup and check for consistency.") p.add_argument('-s', '--source', default=None, help="Source, optional. If given, check if source matches backup in addition to checksum tests.") p.add_argument('-p', '--percentile', default=100, help="Only check PERCENTILE percent of the blocks (value 0..100). Default: 100") p.add_argument('version_uid') p.set_defaults(func='scrub') # Export p = subparsers.add_parser( 'export', help="Export the metadata of a backup uid into a file.") p.add_argument('version_uid') p.add_argument('filename', help="Export into this filename ('-' is for stdout)") p.set_defaults(func='export') # Import p = subparsers.add_parser( 'import', help="Import the metadata of a backup from a file.") p.add_argument('filename', help="Read from this file ('-' is for stdin)") p.set_defaults(func='import_') # CLEANUP p = subparsers.add_parser( 'cleanup', help="Clean unreferenced blobs.") p.add_argument( '-f', '--full', action='store_true', default=False, help='Do a full cleanup. This will read the full metadata from the data backend (i.e. backup storage) ' 'and compare it to the metadata in the meta backend. Unused data will then be deleted. ' 'This is a slow, but complete process. A full cleanup must not be run parallel to ANY other backy ' 'jobs.') p.add_argument( '-p', '--prefix', default=None, help='If you perform a full cleanup, you may add --prefix to only cleanup block uids starting ' 'with this prefix. This is for iterative cleanups. Example: ' 'cleanup --full --prefix=a') p.set_defaults(func='cleanup') # LS p = subparsers.add_parser( 'ls', help="List existing backups.") p.add_argument('version_uid', nargs='?', default=None, help='Show verbose blocks for this version') p.set_defaults(func='ls') # STATS p = subparsers.add_parser( 'stats', help="Show statistics") p.add_argument('version_uid', nargs='?', default=None, help='Show statistics for this version') p.add_argument('-l', '--limit', default=None, help="Limit output to this number (default: unlimited)") p.set_defaults(func='stats') # diff-meta p = subparsers.add_parser( 'diff-meta', help="Output a diff between two versions") p.add_argument('version_uid1', help='Left version') p.add_argument('version_uid2', help='Right version') p.set_defaults(func='diff_meta') # NBD p = subparsers.add_parser( 'nbd', help="Start an nbd server") p.add_argument('version_uid', nargs='?', default=None, help='Start an nbd server for this version') p.add_argument('-a', '--bind-address', default='127.0.0.1', help="Bind to this ip address (default: 127.0.0.1)") p.add_argument('-p', '--bind-port', default=10809, help="Bind to this port (default: 10809)") p.add_argument( '-r', '--read-only', action='store_true', default=False, help='Read only if set, otherwise a copy on write backup is created.') p.set_defaults(func='nbd') args = parser.parse_args() if args.version: print(__version__) sys.exit(0) if not hasattr(args, 'func'): parser.print_usage() sys.exit(1) if args.verbose: console_level = logging.DEBUG #elif args.func == 'scheduler': #console_level = logging.INFO else: console_level = logging.INFO Config = partial(_Config, conf_name='backy') config = Config(section='DEFAULTS') init_logging(config.get('logfile'), console_level) commands = Commands(args.machine_output, Config) func = getattr(commands, args.func) # Pass over to function func_args = dict(args._get_kwargs()) del func_args['func'] del func_args['verbose'] del func_args['version'] del func_args['machine_output'] try: logger.debug('backup.{0}(**{1!r})'.format(args.func, func_args)) func(**func_args) logger.info('Backy complete.\n') sys.exit(0) except Exception as e: logger.error('Unexpected exception') logger.exception(e) logger.info('Backy failed.\n') sys.exit(100)