def _write_copy( self, abs_file_name: str, new_sha: str, file_obj: IOIter, force_copy: bool, dry_run: bool, ) -> ManifestEntry: logger.info(f'Saving a new copy of {abs_file_name}') entry_data = None if not force_copy: entry_data = self._find_existing_entry_data( new_sha) # test_f3_file_changed_while_saving key_pair, base_sha, base_key_pair = entry_data or ( generate_key_pair(self.options), None, None, ) new_entry = ManifestEntry( # test_m2_crash_before_file_save abs_file_name, new_sha, base_sha, file_obj.uid, file_obj.gid, file_obj.mode, key_pair, base_key_pair, ) if not dry_run and not entry_data: signature = self.save(file_obj, new_entry.sha, key_pair) new_entry.key_pair = key_pair + signature # append the HMAC before writing to db return new_entry
def mock_search_results(): return [ ('/path/1/file1', [ ManifestEntry('/path/1/file1', 'ab1dedef', None, 1000, 1000, 12345, b'1111', None, 100), ManifestEntry('/path/1/file1', 'ab2dede1', None, 1000, 1000, 12345, b'2222', None, 75), ManifestEntry('/path/1/file1', 'ab3dede2', None, 1000, 1000, 12345, b'3333', None, 20), ]), ('/path/2/file2', [ ManifestEntry('/path/2/file2', '1b1dedef', None, 1000, 1000, 12345, b'4444', None, 105), ManifestEntry('/path/2/file2', '1b2dede1', None, 1000, 1000, 12345, b'5555', None, 65), ]), ('/path/2/file3', [ ManifestEntry('/path/2/file2', None, None, None, None, None, None, None, 176), ManifestEntry('/path/2/file2', 'ffffeeee', None, 1000, 1000, 12345, b'6666', None, 105), ]), ('/path/2/file4', [ ManifestEntry('/path/2/file2', '12834567', None, 1000, 1000, 12345, b'7777', None, 105), ]), ]
def test_delete_entry(mock_manifest): mock_manifest.delete_entry( ManifestEntry('/foo', '12345678', None, 1000, 2000, 34622, b'1234', None, 50), ) mock_manifest._cursor.execute(''' select * from manifest where abs_file_name = '/foo' ''') rows = mock_manifest._cursor.fetchall() assert len(rows) == 1
def preexisting_entry(): return ManifestEntry( '/some/other/file', '12345678', '123123', 1000, 1000, 55555, b'lkjhasdf', b'12341234', )
def current_entry(): return ManifestEntry( '/foo', 'abcdef123', None, 1000, 1000, 12345, b'aaaaa2222', None, )
def mock_manifest_entry_list(): return [ManifestEntry( '/path/0/foo/bar', 'abcd1234', None, 1000, 1000, 35677, b'1111', None, )]
def test_insert_diff_key_pair_for_sha(mock_manifest, mock_stat): new_file = '/somebody_new' uid, gid, mode = mock_stat.st_uid, mock_stat.st_gid, mock_stat.st_mode new_entry = ManifestEntry(new_file, '12345678', None, uid, gid, mode, b'2222', None) mock_manifest.insert_or_update(new_entry) mock_manifest._cursor.execute(''' select * from manifest where sha = '12345678' ''') rows = mock_manifest._cursor.fetchall() assert len(rows) == 2 assert all([r['key_pair'] == b'2222' for r in rows])
def test_insert_duplicate(mock_manifest, mock_stat): same_file = '/foo' uid, gid, mode = mock_stat.st_uid, mock_stat.st_gid, mock_stat.st_mode new_entry = ManifestEntry(same_file, '12345678', None, uid, gid, mode, b'1111', None) mock_manifest.insert_or_update(new_entry) mock_manifest._cursor.execute(''' select * from manifest where abs_file_name = '/foo' order by commit_timestamp ''') rows = mock_manifest._cursor.fetchall() assert len(rows) == 2 assert rows[-1]['abs_file_name'] == same_file assert rows[0]['sha'] == '12345679' assert rows[-1]['sha'] == '12345678' assert rows[-1]['uid'] == 1000 assert rows[-1]['gid'] == 2000 assert rows[-1]['mode'] == 34622 assert rows[-1]['key_pair'] == b'1111' assert rows[-1]['commit_timestamp'] == 1000
def test_update(mock_manifest, mock_stat, base_sha, base_key_pair): new_file = '/foo' uid, gid, mode = mock_stat.st_uid, mock_stat.st_gid, mock_stat.st_mode new_entry = ManifestEntry(new_file, 'b33f2', base_sha, uid, gid, mode, b'1111', base_key_pair) mock_manifest.insert_or_update(new_entry) mock_manifest._cursor.execute(''' select * from manifest left natural join base_shas where abs_file_name = '/foo' order by commit_timestamp ''') rows = mock_manifest._cursor.fetchall() assert len(rows) == 3 assert rows[-1]['abs_file_name'] == new_file assert rows[-1]['sha'] == 'b33f2' assert rows[-1]['base_sha'] == base_sha assert rows[-1]['uid'] == 1000 assert rows[-1]['gid'] == 2000 assert rows[-1]['mode'] == 34622 assert rows[-1]['key_pair'] == b'1111' assert rows[-1]['base_key_pair'] == base_key_pair assert rows[-1]['commit_timestamp'] == 1000
def _write_diff( self, abs_file_name: str, new_sha: str, curr_entry: ManifestEntry, file_obj: IOIter, dry_run: bool, ) -> ManifestEntry: logger.info(f'Saving a diff for {abs_file_name}') entry_data = self._find_existing_entry_data(new_sha) # If the current entry is itself a diff, get its base; otherwise, this # entry becomes the base if entry_data: key_pair, base_sha, base_key_pair = entry_data elif curr_entry.base_sha: key_pair = generate_key_pair(self.options) base_sha = curr_entry.base_sha base_key_pair = curr_entry.base_key_pair else: key_pair = generate_key_pair(self.options) base_sha = curr_entry.sha base_key_pair = curr_entry.key_pair # compute a diff between the version we've previously backed up and the new version new_entry = ManifestEntry( abs_file_name, new_sha, base_sha, file_obj.uid, file_obj.gid, file_obj.mode, key_pair, base_key_pair, ) if not entry_data: assert base_sha with IOIter() as orig_file, IOIter() as diff_file: orig_file = self.load(base_sha, orig_file, base_key_pair) try: fd_diff = compute_diff( orig_file, file_obj, diff_file, self.options['discard_diff_percentage'], ) except DiffTooLargeException: logger.info( 'The computed diff was too large; saving a copy instead.' ) logger.info( '(you can configure this threshold with the discard_diff_percentage option)' ) file_obj.fd.seek(0) return self._write_copy(abs_file_name, new_sha, file_obj, False, dry_run) new_entry.sha = new_sha if not dry_run: signature = self.save(fd_diff, new_entry.sha, key_pair) new_entry.key_pair = key_pair + signature return new_entry
def save_if_new( self, abs_file_name: str, *, dry_run: bool = False, force_copy: bool = False, ) -> Optional[ManifestEntry]: """ The main workhorse function; determine if a file has changed, and if so, back it up! :param abs_file_name: the name of the file under consideration :param dry_run: whether to actually save any data or not :param force_copy: make a new copy of the file even if we could compute a diff instead """ curr_entry, new_entry = self.manifest.get_entry(abs_file_name), None with IOIter(abs_file_name) as new_file: new_sha = compute_sha(new_file) # If the file hasn't been backed up before, or if it's been deleted previously, save a # new copy; we make a copy here to ensure that the contents don't change while backing # the file up, and that we have the correct sha if force_copy or not curr_entry or not curr_entry.sha: new_entry = self._write_copy(abs_file_name, new_sha, new_file, force_copy, dry_run) # If the file has been backed up, check to see if it's changed by comparing shas elif new_sha != curr_entry.sha: if regex_search_list(abs_file_name, self.options['skip_diff_patterns']): new_entry = self._write_copy(abs_file_name, new_sha, new_file, False, dry_run) else: new_entry = self._write_diff( abs_file_name, new_sha, curr_entry, new_file, dry_run, ) # If the sha is the same but metadata on the file has changed, we just store the updated # metadata elif (new_file.uid != curr_entry.uid or new_file.gid != curr_entry.gid or new_file.mode != curr_entry.mode): logger.info(f'Saving changed metadata for {abs_file_name}') new_entry = ManifestEntry( abs_file_name, curr_entry.sha, curr_entry.base_sha, new_file.uid, new_file.gid, new_file.mode, curr_entry. key_pair, # NOTE: this is safe because the data has not changed! curr_entry.base_key_pair, ) else: # we don't want to flood the log with all the files that haven't changed logger.debug(f'{abs_file_name} is up to date!') if new_entry and not dry_run: self.manifest.insert_or_update(new_entry) return new_entry # test_m2_crash_after_file_save