def test_backup_restore(self): """ Test if backup and restore works correctly """ backup_id = common.backup(self.backend, self.backup_dir) old_file = os.path.join(self.storage_dir, backup_id) # Create new backup, this should reuse the last metadata set and the # checksum should be reused. Metadata set should be identical with mock.patch('logging.info') as mock_log: backup_id = common.backup(self.backend, self.backup_dir) mock_log.assert_any_call('Skipped unchanged sub/o\xcc\x88') new_file = os.path.join(self.storage_dir, backup_id) self.assertEqual(utils.sha256_file(old_file), utils.sha256_file(new_file)) # Check if data deduplication works chunks = utils.find_modified_files(self.storage_dir) storage_size = 0 for filename, stat in chunks.items(): if filename.startswith('c-'): storage_size += stat['s'] self.assertTrue(storage_size < self.original_size) common.restore(self.backend, self.restore_dir, backup_id) # Compare original file content to restored file content for fn in ['x', 'sub/y']: old_filename = os.path.join(self.backup_dir, fn) old_hash = utils.sha256_file(old_filename) new_filename = os.path.join(self.restore_dir, fn) new_hash = utils.sha256_file(new_filename) self.assertEqual(old_hash, new_hash)
def backup(backend, src, tag="default"): # Try to load old metadata from latest backup old_backups = backend.list(prefix="b-*") old_meta_data = {} if old_backups: backup_id = utils.newest_backup_id(old_backups) om = backend.get(backup_id) try: old_meta_data = json.loads(om) except ValueError: pass start_time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") path = os.path.expanduser(src) files = utils.find_modified_files(path) chunk_size = chunk_count = changed_bytes = 0 for filename, meta in files.items(): # Assume file is unchanged if neither mtime nor size is changed old = old_meta_data.get(unicode(filename, 'utf-8')) if old and old['m'] == meta['m'] and old['s'] == meta['s']: old_checksum = old.get('c') if old_checksum: meta['c'] = old_checksum logging.info("Skipped unchanged %s" % filename) continue fullname = os.path.join(path, filename) if not S_ISREG(meta['p']): # not a file continue my_sha256 = hashlib.sha256() chunk_checksums = [] try: chunks = rabin(fullname) except IOError: logging.warning("%s not found, skipping" % fullname) continue with open(fullname) as infile: for chunksize in chunks: data = infile.read(chunksize) my_sha256.update(data) chunk_checksum = utils.sha256_string(data) name = "c-%s" % chunk_checksum chunk_checksums.append(chunk_checksum) stored = backend.put(name, data) changed_bytes += len(data) if stored: chunk_size += len(data) chunk_count += 1 if len(chunk_checksums) > 1: checksum = my_sha256.hexdigest() name = "o-%s" % checksum backend.put(name, ';'.join(chunk_checksums)) else: name = "c-%s" % chunk_checksums[0] meta['c'] = name logging.info(fullname) # write backup summary meta_data = json.dumps(files) suffix = ''.join(random.choice(ascii_letters + digits) for _ in range(8)) backup_id = "b-%s-%s-%s" % (tag, start_time, suffix) backend.put(backup_id, meta_data) logging.info("Finished backup %s. %s bytes changed" % ( backup_id, changed_bytes)) logging.info("Stored %s new objects with a total size of %s bytes" % ( chunk_count, chunk_size)) return backup_id
def test_find_files(self): """ Test if all files in directory tree are found """ files = utils.find_modified_files(self.tempdir) self.assertTrue('sample' in files) self.assertTrue('sub/' in files) self.assertTrue('sub/file' in files)