Exemple #1
0
def unlock_manifest(
    manifest_filename: str,
    private_key_filename: str,
    load: Callable[[str, IOIter], IOIter],
    options: OptionsDict,
) -> Manifest:
    """ Load a manifest into local storage and unencrypt it

    :param manifest_filename: the name of the manifest to unlock
    :param private_key_filename: the private key file in PEM format used to encrypt the
        manifest's keypair
    :param load: the _load function from the backup store
    :param options: backup store options
    :returns: the requested Manifest
    """
    local_manifest_filename = path_join(get_scratch_dir(), manifest_filename)
    logger.debug(f'Unlocking manifest at {local_manifest_filename}')

    # First use the private key to read the AES key and nonce used to encrypt the manifest
    key_pair = b''
    if options['use_encryption']:
        key_pair = get_manifest_keypair(manifest_filename,
                                        private_key_filename, load)

    # Now use the key and nonce to decrypt the manifest
    with IOIter() as encrypted_local_manifest, \
            IOIter(local_manifest_filename, check_mtime=False) as local_manifest:
        load(manifest_filename, encrypted_local_manifest)
        decrypt_and_unpack(encrypted_local_manifest, local_manifest, key_pair,
                           options)

    return Manifest(local_manifest_filename)
Exemple #2
0
def compute_diff(
    orig_file: IOIter,
    new_file: IOIter,
    diff_file: IOIter,
    discard_diff_percentage: Optional[float] = None,
) -> IOIter:
    """ Given an open original file and a new file, compute the diff between the two

    :param orig_file: an IOIter object whose contents are the "original" data
    :param new_file: an IOIter object whose contents are the "new" data
    :param diff_file: an IOIter object where the diff data will be written
    """

    total_written = 0

    writer = diff_file.writer()
    next(writer)
    logger.debug2('beginning diff computation')
    for orig_bytes, new_bytes in zip_longest(orig_file.reader(),
                                             new_file.reader(),
                                             fillvalue=b''):
        diff = bsdiff4.diff(orig_bytes, new_bytes)
        diff_str = str(len(diff)).encode() + SEPARATOR + diff
        total_written += len(diff_str)
        if discard_diff_percentage and total_written > orig_file.size * discard_diff_percentage:
            raise DiffTooLargeException
        writer.send(diff_str)

    return diff_file
Exemple #3
0
def _check_entry(entry: ManifestEntry, backup_store: BackupStore):
    with IOIter() as orig_file, \
            IOIter() as diff_file, \
            IOIter() as restore_file:

        backup_store.restore_entry(entry, orig_file, diff_file, restore_file)
        sha = compute_sha(restore_file)
        if sha != entry.sha:
            raise MismatchedSHAError(
                f'SHAs for {entry.abs_file_name} do not match')
def test_save(caplog, mock_backup_store):
    with IOIter('/scratch/foo') as input1, IOIter(
            '/scratch/asdf/bar') as input2:
        mock_backup_store._save(input1, '/foo')
        mock_backup_store._save(input2, '/asdf/bar')
    assert os.path.exists('/fake/path/fake_backup/foo')
    with open('/fake/path/fake_backup/foo', 'r') as f:
        assert f.read() == "i'm a copy of foo"
    assert os.path.exists('/fake/path/fake_backup/asdf/bar')
    with open('/fake/path/fake_backup/asdf/bar', 'r') as f:
        assert f.read() == "i'm a copy of bar"
def test_save(s3_client, mock_backup_store):
    with IOIter('/scratch/foo') as input1, IOIter('/scratch/asdf/bar') as input2:
        mock_backup_store._save(input1, '/foo')
        mock_backup_store._save(input2, '/asdf/bar')
    assert s3_client.get_object(
        Bucket='test_bucket',
        Key='/foo'
    )['Body'].read() == b"i'm a copy of foo"
    assert s3_client.get_object(
        Bucket='test_bucket',
        Key='/asdf/bar'
    )['Body'].read() == b"i'm a copy of bar"
Exemple #6
0
def decrypt_and_unpack(
    input_file: IOIter,
    output_file: IOIter,
    key_pair: Optional[bytes],
    options: OptionsDict,
) -> None:
    """ Read encrypted, GZIPed data from an open file descriptor, and write the decoded data to
    another file descriptor; verify the HMAC of the encrypted data to ensure integrity

    :param input_file: an IOIter object to read compressed ciphertext from
    :param output_file: an IOIter object to write plaintext data to
    """
    key, nonce, signature = (
        key_pair[:AES_KEY_SIZE],
        key_pair[AES_KEY_SIZE:AES_KEY_SIZE + AES_BLOCK_SIZE],
        key_pair[AES_KEY_SIZE + AES_BLOCK_SIZE:]
    ) if key_pair else (b'', b'', b'')
    decrypted_data = b''
    decrypt_fn: Callable[[bytes], bytes] = (
        Cipher(AES(key), CTR(nonce), backend=default_backend()).decryptor().update
        if options['use_encryption'] else identity
    )
    decompress_obj = zlib.decompressobj()
    unzip_fn: Callable[[bytes], bytes] = (
        decompress_obj.decompress  # type: ignore
        if options['use_compression'] else identity
    )
    hmac = HMAC(key, SHA256(), default_backend())
    writer = output_file.writer(); next(writer)
    for encrypted_data in input_file.reader():
        if options['use_encryption']:
            hmac.update(encrypted_data)
        decrypted_data += decrypt_fn(encrypted_data)
        logger.debug2(f'decrypt_fn returned {len(decrypted_data)} bytes')

        block = unzip_fn(decrypted_data)
        logger.debug2(f'unzip_fn returned {len(block)} bytes')
        writer.send(block)
        decrypted_data = decompress_obj.unused_data

    # Decompress and write out the last block
    if decrypted_data:
        block = unzip_fn(decrypted_data)
        logger.debug2(f'unzip_fn returned {len(block)} bytes')
        writer.send(block)

    try:
        if options['use_encryption']:
            hmac.verify(signature)
    except InvalidSignature as e:
        raise BackupCorruptedError("The file's signature did not match the data") from e
Exemple #7
0
def lock_manifest(
    manifest: Manifest,
    private_key_filename: str,
    save: Callable[[IOIter, str], None],
    load: Callable[[str, IOIter], IOIter],
    options: OptionsDict,
) -> None:
    """ Save a manifest from local storage to the backup store

    :param manifest: the manifest object to save
    :param private_key_filename: the private key file in PEM format used to encrypt the
        manifest's keypair
    :param load: the _save function from the backup store
    :param options: backup store options
    :returns: the requested Manifest
    """

    timestamp = time.time()
    local_manifest_filename = manifest.filename
    logger.debug(f'Locking manifest at {local_manifest_filename}')

    # First generate a new key and nonce to encrypt the manifest
    key_pair = generate_key_pair(options)

    # Next, use that key and nonce to encrypt and save the manifest
    new_manifest_filename = MANIFEST_FILE.format(ts=timestamp)
    with IOIter(local_manifest_filename) as local_manifest, \
            IOIter(local_manifest_filename + '.enc') as encrypted_manifest:
        signature = compress_and_encrypt(local_manifest, encrypted_manifest,
                                         key_pair, options)
        save(encrypted_manifest, new_manifest_filename)

    # Finally, save the manifest key/nonce along with its HMAC using the user's private key
    if options['use_encryption']:
        with IOIter(local_manifest_filename + '.key') as new_manifest_key:
            new_manifest_key.fd.write(
                encrypt_and_sign(key_pair + signature, private_key_filename))
            new_manifest_key.fd.seek(0)
            save(new_manifest_key, MANIFEST_KEY_FILE.format(ts=timestamp))

    try:
        unlock_manifest(new_manifest_filename, private_key_filename, load,
                        options)
    except Exception:
        logger.critical(
            'The saved manifest could not be decrypted!  '
            'The contents of the most recent backup is inaccessible!')
        raise
Exemple #8
0
def _restore(
    files_to_restore: List[ManifestEntry],
    destination: str,
    backup_store: BackupStore,
) -> None:
    print('Beginning restore...')
    os.makedirs(destination, exist_ok=True)
    for f in files_to_restore:
        stripped_abs_file_name = f.abs_file_name.removeprefix('/').replace(
            ':', '')
        restore_file_name = path_join(destination, stripped_abs_file_name)

        with IOIter() as orig_file, \
                IOIter() as diff_file, \
                IOIter(restore_file_name) as restore_file:
            backup_store.restore_entry(f, orig_file, diff_file, restore_file)

    print('Restore complete!\n')
Exemple #9
0
def test_writer_tmp_file(block_size):
    contents = b'asdfhjlkqwerty'
    with IOIter(None, block_size=block_size) as tmp, \
            mock.patch('backuppy.io.TemporaryFile', wraps=TemporaryFile) as mock_tmp_file:
        writer = tmp.writer()
        next(writer)
        writer.send(contents)
        tmp.fd.seek(0)
        assert tmp.fd.read() == contents
        assert mock_tmp_file.call_count == (len(contents) > block_size)
Exemple #10
0
 def _load(self, path: str, output_file: IOIter) -> IOIter:
     path = path.replace('\\', '/')
     logger.info(
         f'Reading s3://{self._bucket}/{path} into {output_file.filename}')
     response = self._client.get_object(Bucket=self._bucket, Key=path)
     writer = output_file.writer()
     next(writer)
     for data in response['Body'].iter_chunks(BLOCK_SIZE):
         writer.send(data)
     return output_file
Exemple #11
0
def compress_and_encrypt(
    input_file: IOIter,
    output_file: IOIter,
    key_pair: Optional[bytes],
    options: OptionsDict,
) -> bytes:
    """ Read data from an open file descriptor, and write the compressed, encrypted data to another
    file descriptor; compute the HMAC of the encrypted data to ensure integrity

    :param input_file: an IOIter object to read plaintext data from
    :param output_file: an IOIter object to write compressed ciphertext to
    """
    key, nonce = (key_pair[:AES_KEY_SIZE], key_pair[AES_KEY_SIZE:]) if key_pair else (b'', b'')
    compressobj = zlib.compressobj()
    zip_fn: Callable[[bytes], bytes] = (  # type: ignore
        compressobj.compress if options['use_compression'] else identity
    )
    encrypt_fn: Callable[[bytes], bytes] = (
        Cipher(AES(key), CTR(nonce), backend=default_backend()).encryptor().update
        if options['use_encryption'] else identity
    )
    hmac = HMAC(key, SHA256(), default_backend())

    def last_block() -> Generator[Tuple[bytes, bool], None, None]:
        yield (compressobj.flush(), False) if options['use_compression'] else (b'', False)

    writer = output_file.writer(); next(writer)
    logger.debug2('starting to compress')
    for block, needs_compression in chain(zip(input_file.reader(), repeat(True)), last_block()):
        if needs_compression:
            block = zip_fn(block)
        logger.debug2(f'zip_fn returned {len(block)} bytes')
        block = encrypt_fn(block)
        logger.debug2(f'encrypt_fn returned {len(block)} bytes')
        if options['use_encryption']:
            hmac.update(block)
        writer.send(block)

    if options['use_encryption']:
        return hmac.finalize()
    else:
        return b''
Exemple #12
0
def mock_open_streams():
    class MockBytesIO(BytesIO):
        def fileno(self):  # make this work with fstat
            return self

    orig, new, diff = IOIter('/orig'), IOIter('/new'), IOIter('/diff')
    with mock.patch('builtins.open'), \
            mock.patch('backuppy.io.os.open'), \
            mock.patch('backuppy.io.os.fdopen'), \
            mock.patch('backuppy.io.os.stat'), \
            mock.patch('backuppy.io.os.makedirs'), \
            mock.patch('os.fstat') as mock_fstat, \
            orig, new, diff:
        mock_fstat.side_effect = lambda bio: mock.Mock(st_size=len(bio.
                                                                   getvalue()))
        orig.block_size = new.block_size = diff.block_size = 2
        orig._fd = MockBytesIO(b'asdfasdfa')
        new._fd = MockBytesIO()
        diff._fd = MockBytesIO()
        yield orig, new, diff
Exemple #13
0
def get_manifest_keypair(
    manifest_filename: str,
    private_key_filename: str,
    load: Callable[[str, IOIter], IOIter],
) -> bytes:
    ts = manifest_filename.split('.', 1)[1]
    with IOIter() as manifest_key:
        # the key is not large enough to worry about chunked reads, so just do it all at once
        load(MANIFEST_KEY_FILE.format(ts=ts), manifest_key)
        manifest_key.fd.seek(0)
        encrypted_key_pair = manifest_key.fd.read()
    return decrypt_and_verify(encrypted_key_pair, private_key_filename)
Exemple #14
0
def test_tmp_io_iter(fs):
    with mock.patch(
            'backuppy.io.io.BytesIO') as mock_bytes_io, IOIter() as tmp:
        tmp._check_mtime()
        assert mock_bytes_io.call_count == 1
        with pytest.raises(BufferError):
            tmp.uid
        with pytest.raises(BufferError):
            tmp.gid
        with pytest.raises(BufferError):
            tmp.mode
        with pytest.raises(BufferError):
            tmp.mtime
Exemple #15
0
def test_validate_diffs(orig_data, new_data):
    if not orig_data:
        orig_data, new_data = generate_data()

    print(orig_data)
    print(new_data)

    with IOIter() as orig, IOIter() as new, IOIter() as diff, IOIter(
    ) as newnew:
        orig_writer = orig.writer()
        next(orig_writer)
        orig_writer.send(orig_data)

        new_writer = new.writer()
        next(new_writer)
        new_writer.send(new_data)
        compute_diff(orig, new, diff)
        apply_diff(orig, diff, newnew)

        new.fd.seek(0)
        newnew.fd.seek(0)
        assert new.fd.read() == newnew.fd.read()
def assert_backup_store_correct():
    latest_manifest = get_latest_manifest()
    manifest_conn = sqlite3.connect(latest_manifest)
    manifest_conn.row_factory = sqlite3.Row
    manifest_cursor = manifest_conn.cursor()
    for path, history in test_file_history.items():
        latest = history[-1]

        manifest_cursor.execute(
            'select * from manifest where abs_file_name=? order by commit_timestamp',
            (os.path.abspath(latest.path),),
        )
        rows = manifest_cursor.fetchall()
        if 'dont_back_me_up' in path:
            assert len(rows) == 0
            continue
        else:
            deduped_history = []
            [deduped_history.append(i) for i in history if i not in deduped_history]
            assert len(rows) == len(deduped_history)
            for row in rows:
                assert (row['sha'], row['mode']) in [(e.sha, e.mode) for e in deduped_history]

        if latest.backup_path:
            manifest_cursor.execute(
                'select * from base_shas where sha=?',
                (latest.sha,),
            )
            row = manifest_cursor.fetchone()
            with IOIter(latest.backup_path) as n:
                if not row or not row[1]:
                    assert n.fd.read() == latest.contents
                else:
                    orig_file_path = path_join(BACKUP_DIR, row[1][:2], row[1][2:4], row[1][4:])
                    with IOIter(orig_file_path) as o, IOIter() as tmp:
                        apply_diff(o, n, tmp)
                        tmp.fd.seek(0)
                        assert tmp.fd.read() == latest.contents
Exemple #17
0
 def load(
     self,
     src: str,
     dest: IOIter,
     key_pair: Optional[bytes],
 ) -> IOIter:
     """ Wrapper around the _load function that converts the SHA to a path """
     src = sha_to_path(src)
     with IOIter() as encrypted_load_file:
         self._load(src, encrypted_load_file)
         decrypt_and_unpack(encrypted_load_file, dest, key_pair,
                            self.options)
     dest.fd.seek(0)
     return dest
Exemple #18
0
def apply_diff(orig_file: IOIter, diff_file: IOIter, new_file: IOIter) -> None:
    """ Given an original file and a diff file, write out a new file with the diff applied

    :param orig_file: an IOIter object whose contents are the "original" data
    :param diff_file: an IOIter object whose contents are the diff to be applied
    :param new_file: an IOIter object where the new file data will be written
    """

    # The outer loop reads a chunk of data at a time; the inner loop parses
    # the read chunk one step at a time and applies it
    diff = b''
    new_writer = new_file.writer()
    next(new_writer)
    orig_reader = orig_file.reader()
    logger.debug2('applying diff')
    for diff_chunk in diff_file.reader():
        diff += diff_chunk
        while diff:
            # try to parse the next chunk; if we can't, break out of the loop to get more data
            try:
                diff_len_str, remainder = diff.split(SEPARATOR, 1)
            except ValueError:
                break

            diff_len = int(diff_len_str)
            if len(remainder) < diff_len:
                break

            try:
                orig_block = next(orig_reader)
            except StopIteration:
                orig_block = b''
            new_writer.send(bsdiff4.patch(orig_block, remainder[:diff_len]))
            diff = remainder[diff_len:]

    if diff:
        raise DiffParseError(f'Un-parseable diff: {diff}')  # type: ignore
Exemple #19
0
    def save(self, src: IOIter, dest: str, key_pair: bytes) -> bytes:
        """ Wrapper around the _save function that converts the SHA to a path and does encryption

        :param src: the file to save
        :param dest: the name of the file to write to in the store
        :param key_pair: an AES key + nonce to use to encrypt the file
        :returns: the HMAC of the saved file
        """
        dest = sha_to_path(dest)

        # We compress and encrypt the file on the local file system, and then pass the encrypted
        # file to the backup store to handle atomically
        filename = path_join(get_scratch_dir(), dest)

        with IOIter(filename) as encrypted_save_file:
            signature = compress_and_encrypt(src, encrypted_save_file,
                                             key_pair, self.options)
            self._save(encrypted_save_file, dest)  # test_f1_crash_file_save
        os.remove(filename)
        return signature
Exemple #20
0
    def _write_diff(
        self,
        abs_file_name: str,
        new_sha: str,
        curr_entry: ManifestEntry,
        file_obj: IOIter,
        dry_run: bool,
    ) -> ManifestEntry:
        logger.info(f'Saving a diff for {abs_file_name}')

        entry_data = self._find_existing_entry_data(new_sha)
        # If the current entry is itself a diff, get its base; otherwise, this
        # entry becomes the base
        if entry_data:
            key_pair, base_sha, base_key_pair = entry_data
        elif curr_entry.base_sha:
            key_pair = generate_key_pair(self.options)
            base_sha = curr_entry.base_sha
            base_key_pair = curr_entry.base_key_pair
        else:
            key_pair = generate_key_pair(self.options)
            base_sha = curr_entry.sha
            base_key_pair = curr_entry.key_pair

        # compute a diff between the version we've previously backed up and the new version
        new_entry = ManifestEntry(
            abs_file_name,
            new_sha,
            base_sha,
            file_obj.uid,
            file_obj.gid,
            file_obj.mode,
            key_pair,
            base_key_pair,
        )

        if not entry_data:
            assert base_sha
            with IOIter() as orig_file, IOIter() as diff_file:
                orig_file = self.load(base_sha, orig_file, base_key_pair)
                try:
                    fd_diff = compute_diff(
                        orig_file,
                        file_obj,
                        diff_file,
                        self.options['discard_diff_percentage'],
                    )
                except DiffTooLargeException:
                    logger.info(
                        'The computed diff was too large; saving a copy instead.'
                    )
                    logger.info(
                        '(you can configure this threshold with the discard_diff_percentage option)'
                    )
                    file_obj.fd.seek(0)
                    return self._write_copy(abs_file_name, new_sha, file_obj,
                                            False, dry_run)

                new_entry.sha = new_sha
                if not dry_run:
                    signature = self.save(fd_diff, new_entry.sha, key_pair)
                    new_entry.key_pair = key_pair + signature
        return new_entry
Exemple #21
0
    def save_if_new(
        self,
        abs_file_name: str,
        *,
        dry_run: bool = False,
        force_copy: bool = False,
    ) -> Optional[ManifestEntry]:
        """ The main workhorse function; determine if a file has changed, and if so, back it up!

        :param abs_file_name: the name of the file under consideration
        :param dry_run: whether to actually save any data or not
        :param force_copy: make a new copy of the file even if we could compute a diff instead
        """
        curr_entry, new_entry = self.manifest.get_entry(abs_file_name), None
        with IOIter(abs_file_name) as new_file:
            new_sha = compute_sha(new_file)

            # If the file hasn't been backed up before, or if it's been deleted previously, save a
            # new copy; we make a copy here to ensure that the contents don't change while backing
            # the file up, and that we have the correct sha
            if force_copy or not curr_entry or not curr_entry.sha:
                new_entry = self._write_copy(abs_file_name, new_sha, new_file,
                                             force_copy, dry_run)

            # If the file has been backed up, check to see if it's changed by comparing shas
            elif new_sha != curr_entry.sha:
                if regex_search_list(abs_file_name,
                                     self.options['skip_diff_patterns']):
                    new_entry = self._write_copy(abs_file_name, new_sha,
                                                 new_file, False, dry_run)
                else:
                    new_entry = self._write_diff(
                        abs_file_name,
                        new_sha,
                        curr_entry,
                        new_file,
                        dry_run,
                    )

            # If the sha is the same but metadata on the file has changed, we just store the updated
            # metadata
            elif (new_file.uid != curr_entry.uid
                  or new_file.gid != curr_entry.gid
                  or new_file.mode != curr_entry.mode):
                logger.info(f'Saving changed metadata for {abs_file_name}')
                new_entry = ManifestEntry(
                    abs_file_name,
                    curr_entry.sha,
                    curr_entry.base_sha,
                    new_file.uid,
                    new_file.gid,
                    new_file.mode,
                    curr_entry.
                    key_pair,  # NOTE: this is safe because the data has not changed!
                    curr_entry.base_key_pair,
                )
            else:
                # we don't want to flood the log with all the files that haven't changed
                logger.debug(f'{abs_file_name} is up to date!')

            if new_entry and not dry_run:
                self.manifest.insert_or_update(new_entry)
            return new_entry  # test_m2_crash_after_file_save
def test_load(mock_backup_store):
    with IOIter('/restored_file') as output:
        mock_backup_store._load('/foo', output)
    with open('/restored_file') as f:
        assert f.read() == 'old boring content'
Exemple #23
0
def test_copy(mock_io_iter, foo_contents):
    with mock_io_iter, IOIter('/bar') as copy:
        io_copy(mock_io_iter, copy)
    with open('/bar', 'rb') as f:
        assert f.read() == foo_contents
Exemple #24
0
def mock_io_iter(fs):
    fake_filesystem.set_uid(1000)
    fake_filesystem.set_gid(1000)
    yield IOIter('/foo', block_size=2)
Exemple #25
0
 def _load(self, path: str, output_file: IOIter) -> IOIter:
     abs_backup_path = path_join(self.backup_location, path)
     logger.info(f'Reading {path} from {self.backup_location}')
     with IOIter(abs_backup_path) as input_file:
         io_copy(input_file, output_file)
     return output_file