def unlock_manifest( manifest_filename: str, private_key_filename: str, load: Callable[[str, IOIter], IOIter], options: OptionsDict, ) -> Manifest: """ Load a manifest into local storage and unencrypt it :param manifest_filename: the name of the manifest to unlock :param private_key_filename: the private key file in PEM format used to encrypt the manifest's keypair :param load: the _load function from the backup store :param options: backup store options :returns: the requested Manifest """ local_manifest_filename = path_join(get_scratch_dir(), manifest_filename) logger.debug(f'Unlocking manifest at {local_manifest_filename}') # First use the private key to read the AES key and nonce used to encrypt the manifest key_pair = b'' if options['use_encryption']: key_pair = get_manifest_keypair(manifest_filename, private_key_filename, load) # Now use the key and nonce to decrypt the manifest with IOIter() as encrypted_local_manifest, \ IOIter(local_manifest_filename, check_mtime=False) as local_manifest: load(manifest_filename, encrypted_local_manifest) decrypt_and_unpack(encrypted_local_manifest, local_manifest, key_pair, options) return Manifest(local_manifest_filename)
def compute_diff( orig_file: IOIter, new_file: IOIter, diff_file: IOIter, discard_diff_percentage: Optional[float] = None, ) -> IOIter: """ Given an open original file and a new file, compute the diff between the two :param orig_file: an IOIter object whose contents are the "original" data :param new_file: an IOIter object whose contents are the "new" data :param diff_file: an IOIter object where the diff data will be written """ total_written = 0 writer = diff_file.writer() next(writer) logger.debug2('beginning diff computation') for orig_bytes, new_bytes in zip_longest(orig_file.reader(), new_file.reader(), fillvalue=b''): diff = bsdiff4.diff(orig_bytes, new_bytes) diff_str = str(len(diff)).encode() + SEPARATOR + diff total_written += len(diff_str) if discard_diff_percentage and total_written > orig_file.size * discard_diff_percentage: raise DiffTooLargeException writer.send(diff_str) return diff_file
def _check_entry(entry: ManifestEntry, backup_store: BackupStore): with IOIter() as orig_file, \ IOIter() as diff_file, \ IOIter() as restore_file: backup_store.restore_entry(entry, orig_file, diff_file, restore_file) sha = compute_sha(restore_file) if sha != entry.sha: raise MismatchedSHAError( f'SHAs for {entry.abs_file_name} do not match')
def test_save(caplog, mock_backup_store): with IOIter('/scratch/foo') as input1, IOIter( '/scratch/asdf/bar') as input2: mock_backup_store._save(input1, '/foo') mock_backup_store._save(input2, '/asdf/bar') assert os.path.exists('/fake/path/fake_backup/foo') with open('/fake/path/fake_backup/foo', 'r') as f: assert f.read() == "i'm a copy of foo" assert os.path.exists('/fake/path/fake_backup/asdf/bar') with open('/fake/path/fake_backup/asdf/bar', 'r') as f: assert f.read() == "i'm a copy of bar"
def test_save(s3_client, mock_backup_store): with IOIter('/scratch/foo') as input1, IOIter('/scratch/asdf/bar') as input2: mock_backup_store._save(input1, '/foo') mock_backup_store._save(input2, '/asdf/bar') assert s3_client.get_object( Bucket='test_bucket', Key='/foo' )['Body'].read() == b"i'm a copy of foo" assert s3_client.get_object( Bucket='test_bucket', Key='/asdf/bar' )['Body'].read() == b"i'm a copy of bar"
def decrypt_and_unpack( input_file: IOIter, output_file: IOIter, key_pair: Optional[bytes], options: OptionsDict, ) -> None: """ Read encrypted, GZIPed data from an open file descriptor, and write the decoded data to another file descriptor; verify the HMAC of the encrypted data to ensure integrity :param input_file: an IOIter object to read compressed ciphertext from :param output_file: an IOIter object to write plaintext data to """ key, nonce, signature = ( key_pair[:AES_KEY_SIZE], key_pair[AES_KEY_SIZE:AES_KEY_SIZE + AES_BLOCK_SIZE], key_pair[AES_KEY_SIZE + AES_BLOCK_SIZE:] ) if key_pair else (b'', b'', b'') decrypted_data = b'' decrypt_fn: Callable[[bytes], bytes] = ( Cipher(AES(key), CTR(nonce), backend=default_backend()).decryptor().update if options['use_encryption'] else identity ) decompress_obj = zlib.decompressobj() unzip_fn: Callable[[bytes], bytes] = ( decompress_obj.decompress # type: ignore if options['use_compression'] else identity ) hmac = HMAC(key, SHA256(), default_backend()) writer = output_file.writer(); next(writer) for encrypted_data in input_file.reader(): if options['use_encryption']: hmac.update(encrypted_data) decrypted_data += decrypt_fn(encrypted_data) logger.debug2(f'decrypt_fn returned {len(decrypted_data)} bytes') block = unzip_fn(decrypted_data) logger.debug2(f'unzip_fn returned {len(block)} bytes') writer.send(block) decrypted_data = decompress_obj.unused_data # Decompress and write out the last block if decrypted_data: block = unzip_fn(decrypted_data) logger.debug2(f'unzip_fn returned {len(block)} bytes') writer.send(block) try: if options['use_encryption']: hmac.verify(signature) except InvalidSignature as e: raise BackupCorruptedError("The file's signature did not match the data") from e
def lock_manifest( manifest: Manifest, private_key_filename: str, save: Callable[[IOIter, str], None], load: Callable[[str, IOIter], IOIter], options: OptionsDict, ) -> None: """ Save a manifest from local storage to the backup store :param manifest: the manifest object to save :param private_key_filename: the private key file in PEM format used to encrypt the manifest's keypair :param load: the _save function from the backup store :param options: backup store options :returns: the requested Manifest """ timestamp = time.time() local_manifest_filename = manifest.filename logger.debug(f'Locking manifest at {local_manifest_filename}') # First generate a new key and nonce to encrypt the manifest key_pair = generate_key_pair(options) # Next, use that key and nonce to encrypt and save the manifest new_manifest_filename = MANIFEST_FILE.format(ts=timestamp) with IOIter(local_manifest_filename) as local_manifest, \ IOIter(local_manifest_filename + '.enc') as encrypted_manifest: signature = compress_and_encrypt(local_manifest, encrypted_manifest, key_pair, options) save(encrypted_manifest, new_manifest_filename) # Finally, save the manifest key/nonce along with its HMAC using the user's private key if options['use_encryption']: with IOIter(local_manifest_filename + '.key') as new_manifest_key: new_manifest_key.fd.write( encrypt_and_sign(key_pair + signature, private_key_filename)) new_manifest_key.fd.seek(0) save(new_manifest_key, MANIFEST_KEY_FILE.format(ts=timestamp)) try: unlock_manifest(new_manifest_filename, private_key_filename, load, options) except Exception: logger.critical( 'The saved manifest could not be decrypted! ' 'The contents of the most recent backup is inaccessible!') raise
def _restore( files_to_restore: List[ManifestEntry], destination: str, backup_store: BackupStore, ) -> None: print('Beginning restore...') os.makedirs(destination, exist_ok=True) for f in files_to_restore: stripped_abs_file_name = f.abs_file_name.removeprefix('/').replace( ':', '') restore_file_name = path_join(destination, stripped_abs_file_name) with IOIter() as orig_file, \ IOIter() as diff_file, \ IOIter(restore_file_name) as restore_file: backup_store.restore_entry(f, orig_file, diff_file, restore_file) print('Restore complete!\n')
def test_writer_tmp_file(block_size): contents = b'asdfhjlkqwerty' with IOIter(None, block_size=block_size) as tmp, \ mock.patch('backuppy.io.TemporaryFile', wraps=TemporaryFile) as mock_tmp_file: writer = tmp.writer() next(writer) writer.send(contents) tmp.fd.seek(0) assert tmp.fd.read() == contents assert mock_tmp_file.call_count == (len(contents) > block_size)
def _load(self, path: str, output_file: IOIter) -> IOIter: path = path.replace('\\', '/') logger.info( f'Reading s3://{self._bucket}/{path} into {output_file.filename}') response = self._client.get_object(Bucket=self._bucket, Key=path) writer = output_file.writer() next(writer) for data in response['Body'].iter_chunks(BLOCK_SIZE): writer.send(data) return output_file
def compress_and_encrypt( input_file: IOIter, output_file: IOIter, key_pair: Optional[bytes], options: OptionsDict, ) -> bytes: """ Read data from an open file descriptor, and write the compressed, encrypted data to another file descriptor; compute the HMAC of the encrypted data to ensure integrity :param input_file: an IOIter object to read plaintext data from :param output_file: an IOIter object to write compressed ciphertext to """ key, nonce = (key_pair[:AES_KEY_SIZE], key_pair[AES_KEY_SIZE:]) if key_pair else (b'', b'') compressobj = zlib.compressobj() zip_fn: Callable[[bytes], bytes] = ( # type: ignore compressobj.compress if options['use_compression'] else identity ) encrypt_fn: Callable[[bytes], bytes] = ( Cipher(AES(key), CTR(nonce), backend=default_backend()).encryptor().update if options['use_encryption'] else identity ) hmac = HMAC(key, SHA256(), default_backend()) def last_block() -> Generator[Tuple[bytes, bool], None, None]: yield (compressobj.flush(), False) if options['use_compression'] else (b'', False) writer = output_file.writer(); next(writer) logger.debug2('starting to compress') for block, needs_compression in chain(zip(input_file.reader(), repeat(True)), last_block()): if needs_compression: block = zip_fn(block) logger.debug2(f'zip_fn returned {len(block)} bytes') block = encrypt_fn(block) logger.debug2(f'encrypt_fn returned {len(block)} bytes') if options['use_encryption']: hmac.update(block) writer.send(block) if options['use_encryption']: return hmac.finalize() else: return b''
def mock_open_streams(): class MockBytesIO(BytesIO): def fileno(self): # make this work with fstat return self orig, new, diff = IOIter('/orig'), IOIter('/new'), IOIter('/diff') with mock.patch('builtins.open'), \ mock.patch('backuppy.io.os.open'), \ mock.patch('backuppy.io.os.fdopen'), \ mock.patch('backuppy.io.os.stat'), \ mock.patch('backuppy.io.os.makedirs'), \ mock.patch('os.fstat') as mock_fstat, \ orig, new, diff: mock_fstat.side_effect = lambda bio: mock.Mock(st_size=len(bio. getvalue())) orig.block_size = new.block_size = diff.block_size = 2 orig._fd = MockBytesIO(b'asdfasdfa') new._fd = MockBytesIO() diff._fd = MockBytesIO() yield orig, new, diff
def get_manifest_keypair( manifest_filename: str, private_key_filename: str, load: Callable[[str, IOIter], IOIter], ) -> bytes: ts = manifest_filename.split('.', 1)[1] with IOIter() as manifest_key: # the key is not large enough to worry about chunked reads, so just do it all at once load(MANIFEST_KEY_FILE.format(ts=ts), manifest_key) manifest_key.fd.seek(0) encrypted_key_pair = manifest_key.fd.read() return decrypt_and_verify(encrypted_key_pair, private_key_filename)
def test_tmp_io_iter(fs): with mock.patch( 'backuppy.io.io.BytesIO') as mock_bytes_io, IOIter() as tmp: tmp._check_mtime() assert mock_bytes_io.call_count == 1 with pytest.raises(BufferError): tmp.uid with pytest.raises(BufferError): tmp.gid with pytest.raises(BufferError): tmp.mode with pytest.raises(BufferError): tmp.mtime
def test_validate_diffs(orig_data, new_data): if not orig_data: orig_data, new_data = generate_data() print(orig_data) print(new_data) with IOIter() as orig, IOIter() as new, IOIter() as diff, IOIter( ) as newnew: orig_writer = orig.writer() next(orig_writer) orig_writer.send(orig_data) new_writer = new.writer() next(new_writer) new_writer.send(new_data) compute_diff(orig, new, diff) apply_diff(orig, diff, newnew) new.fd.seek(0) newnew.fd.seek(0) assert new.fd.read() == newnew.fd.read()
def assert_backup_store_correct(): latest_manifest = get_latest_manifest() manifest_conn = sqlite3.connect(latest_manifest) manifest_conn.row_factory = sqlite3.Row manifest_cursor = manifest_conn.cursor() for path, history in test_file_history.items(): latest = history[-1] manifest_cursor.execute( 'select * from manifest where abs_file_name=? order by commit_timestamp', (os.path.abspath(latest.path),), ) rows = manifest_cursor.fetchall() if 'dont_back_me_up' in path: assert len(rows) == 0 continue else: deduped_history = [] [deduped_history.append(i) for i in history if i not in deduped_history] assert len(rows) == len(deduped_history) for row in rows: assert (row['sha'], row['mode']) in [(e.sha, e.mode) for e in deduped_history] if latest.backup_path: manifest_cursor.execute( 'select * from base_shas where sha=?', (latest.sha,), ) row = manifest_cursor.fetchone() with IOIter(latest.backup_path) as n: if not row or not row[1]: assert n.fd.read() == latest.contents else: orig_file_path = path_join(BACKUP_DIR, row[1][:2], row[1][2:4], row[1][4:]) with IOIter(orig_file_path) as o, IOIter() as tmp: apply_diff(o, n, tmp) tmp.fd.seek(0) assert tmp.fd.read() == latest.contents
def load( self, src: str, dest: IOIter, key_pair: Optional[bytes], ) -> IOIter: """ Wrapper around the _load function that converts the SHA to a path """ src = sha_to_path(src) with IOIter() as encrypted_load_file: self._load(src, encrypted_load_file) decrypt_and_unpack(encrypted_load_file, dest, key_pair, self.options) dest.fd.seek(0) return dest
def apply_diff(orig_file: IOIter, diff_file: IOIter, new_file: IOIter) -> None: """ Given an original file and a diff file, write out a new file with the diff applied :param orig_file: an IOIter object whose contents are the "original" data :param diff_file: an IOIter object whose contents are the diff to be applied :param new_file: an IOIter object where the new file data will be written """ # The outer loop reads a chunk of data at a time; the inner loop parses # the read chunk one step at a time and applies it diff = b'' new_writer = new_file.writer() next(new_writer) orig_reader = orig_file.reader() logger.debug2('applying diff') for diff_chunk in diff_file.reader(): diff += diff_chunk while diff: # try to parse the next chunk; if we can't, break out of the loop to get more data try: diff_len_str, remainder = diff.split(SEPARATOR, 1) except ValueError: break diff_len = int(diff_len_str) if len(remainder) < diff_len: break try: orig_block = next(orig_reader) except StopIteration: orig_block = b'' new_writer.send(bsdiff4.patch(orig_block, remainder[:diff_len])) diff = remainder[diff_len:] if diff: raise DiffParseError(f'Un-parseable diff: {diff}') # type: ignore
def save(self, src: IOIter, dest: str, key_pair: bytes) -> bytes: """ Wrapper around the _save function that converts the SHA to a path and does encryption :param src: the file to save :param dest: the name of the file to write to in the store :param key_pair: an AES key + nonce to use to encrypt the file :returns: the HMAC of the saved file """ dest = sha_to_path(dest) # We compress and encrypt the file on the local file system, and then pass the encrypted # file to the backup store to handle atomically filename = path_join(get_scratch_dir(), dest) with IOIter(filename) as encrypted_save_file: signature = compress_and_encrypt(src, encrypted_save_file, key_pair, self.options) self._save(encrypted_save_file, dest) # test_f1_crash_file_save os.remove(filename) return signature
def _write_diff( self, abs_file_name: str, new_sha: str, curr_entry: ManifestEntry, file_obj: IOIter, dry_run: bool, ) -> ManifestEntry: logger.info(f'Saving a diff for {abs_file_name}') entry_data = self._find_existing_entry_data(new_sha) # If the current entry is itself a diff, get its base; otherwise, this # entry becomes the base if entry_data: key_pair, base_sha, base_key_pair = entry_data elif curr_entry.base_sha: key_pair = generate_key_pair(self.options) base_sha = curr_entry.base_sha base_key_pair = curr_entry.base_key_pair else: key_pair = generate_key_pair(self.options) base_sha = curr_entry.sha base_key_pair = curr_entry.key_pair # compute a diff between the version we've previously backed up and the new version new_entry = ManifestEntry( abs_file_name, new_sha, base_sha, file_obj.uid, file_obj.gid, file_obj.mode, key_pair, base_key_pair, ) if not entry_data: assert base_sha with IOIter() as orig_file, IOIter() as diff_file: orig_file = self.load(base_sha, orig_file, base_key_pair) try: fd_diff = compute_diff( orig_file, file_obj, diff_file, self.options['discard_diff_percentage'], ) except DiffTooLargeException: logger.info( 'The computed diff was too large; saving a copy instead.' ) logger.info( '(you can configure this threshold with the discard_diff_percentage option)' ) file_obj.fd.seek(0) return self._write_copy(abs_file_name, new_sha, file_obj, False, dry_run) new_entry.sha = new_sha if not dry_run: signature = self.save(fd_diff, new_entry.sha, key_pair) new_entry.key_pair = key_pair + signature return new_entry
def save_if_new( self, abs_file_name: str, *, dry_run: bool = False, force_copy: bool = False, ) -> Optional[ManifestEntry]: """ The main workhorse function; determine if a file has changed, and if so, back it up! :param abs_file_name: the name of the file under consideration :param dry_run: whether to actually save any data or not :param force_copy: make a new copy of the file even if we could compute a diff instead """ curr_entry, new_entry = self.manifest.get_entry(abs_file_name), None with IOIter(abs_file_name) as new_file: new_sha = compute_sha(new_file) # If the file hasn't been backed up before, or if it's been deleted previously, save a # new copy; we make a copy here to ensure that the contents don't change while backing # the file up, and that we have the correct sha if force_copy or not curr_entry or not curr_entry.sha: new_entry = self._write_copy(abs_file_name, new_sha, new_file, force_copy, dry_run) # If the file has been backed up, check to see if it's changed by comparing shas elif new_sha != curr_entry.sha: if regex_search_list(abs_file_name, self.options['skip_diff_patterns']): new_entry = self._write_copy(abs_file_name, new_sha, new_file, False, dry_run) else: new_entry = self._write_diff( abs_file_name, new_sha, curr_entry, new_file, dry_run, ) # If the sha is the same but metadata on the file has changed, we just store the updated # metadata elif (new_file.uid != curr_entry.uid or new_file.gid != curr_entry.gid or new_file.mode != curr_entry.mode): logger.info(f'Saving changed metadata for {abs_file_name}') new_entry = ManifestEntry( abs_file_name, curr_entry.sha, curr_entry.base_sha, new_file.uid, new_file.gid, new_file.mode, curr_entry. key_pair, # NOTE: this is safe because the data has not changed! curr_entry.base_key_pair, ) else: # we don't want to flood the log with all the files that haven't changed logger.debug(f'{abs_file_name} is up to date!') if new_entry and not dry_run: self.manifest.insert_or_update(new_entry) return new_entry # test_m2_crash_after_file_save
def test_load(mock_backup_store): with IOIter('/restored_file') as output: mock_backup_store._load('/foo', output) with open('/restored_file') as f: assert f.read() == 'old boring content'
def test_copy(mock_io_iter, foo_contents): with mock_io_iter, IOIter('/bar') as copy: io_copy(mock_io_iter, copy) with open('/bar', 'rb') as f: assert f.read() == foo_contents
def mock_io_iter(fs): fake_filesystem.set_uid(1000) fake_filesystem.set_gid(1000) yield IOIter('/foo', block_size=2)
def _load(self, path: str, output_file: IOIter) -> IOIter: abs_backup_path = path_join(self.backup_location, path) logger.info(f'Reading {path} from {self.backup_location}') with IOIter(abs_backup_path) as input_file: io_copy(input_file, output_file) return output_file