def test_changes_before_commits(repository_path: Path) -> None: expected = [Path(x) for x in CONTENT.keys()] repository = StateRepository(root_path=repository_path) actual = [c.item for c in repository.changes()] assert Counter(expected) == Counter(actual)
def test_no_changes_after_commit(repository_path: Path) -> None: repository = StateRepository(root_path=repository_path) for c in repository.changes(): c.commit() changes_after_commit = [c.item for c in repository.changes()] assert len(changes_after_commit) == 0
def test_previous_state_before_commits(repository_path: Path) -> None: expected = [Path(x) for x in CONTENT.keys()] repository = StateRepository(root_path=repository_path) none_previous_state = [ c.item for c in repository.changes() if c.previous_state == None ] assert Counter(expected) == Counter(none_previous_state)
def test_changes_after_commit(repository_path: Path, item: str) -> None: repository = StateRepository(root_path=repository_path) for c in repository.changes(): c.commit() item_path = repository_path.joinpath(item) with item_path.open(mode="a") as f: f.write("new content") changes = [c.item for c in repository.changes()] assert [Path(item)] == changes
def test_detect_single_character_change(tmp_path: Path) -> None: file_path = tmp_path.joinpath("content") file_path.write_text("abcdef") repository = StateRepository(root_path=tmp_path) for c in repository.changes(): c.commit file_path.write_text("abcdeg") changes = [c.item_path for c in repository.changes()] assert changes == [file_path]
def test_changes_after_delete_commit(repository_path: Path, to_delete: str) -> None: repository = StateRepository(root_path=repository_path) for c in repository.changes(): c.commit() to_delete_path = repository_path.joinpath(to_delete) to_delete_path.unlink() for c in repository.changes(): # The change should just be a delete. assert c.new_state == None c.commit() # After committing the deletes, there should be no more changes. assert [c for c in repository.changes()] == []
def test_delete_after_commit(repository_path: Path, to_delete: str) -> None: repository = StateRepository(root_path=repository_path) for c in repository.changes(): c.commit() to_delete_path = repository_path.joinpath(to_delete) to_delete_path.unlink() changes = [c for c in repository.changes()] changed_items = [c.item for c in changes] # Only one change to represent the deleted item. assert [Path(to_delete)] == changed_items # The change should represent a delete. assert changes[0].previous_state != None and changes[0].new_state == None
def __has_content(states: StateRepository) -> bool: try: next(states.content_paths()) return True except StopIteration: return False
def backup(repository_path: Path, configuration: Configuration) -> None: """ Parameters ---------- path The file system path to the directory that will be backed up. This path is expected to be an existing directory. configuration This provides a representation of the configuration for the backup (e.g. which Amazon S3 Bucket to upload backups to). """ s3 = boto3.resource('s3') bucket = s3.Bucket(configuration.s3_bucket) states = StateRepository(root_path=repository_path) if configuration.encryption_password: file_provider = lambda file: encryption.encrypted_file( source=file, password=configuration.encryption_password) else: file_provider = lambda file: (file, lambda: None) any_changes = False to_delete = [] for c in states.changes(): any_changes = True if (c.item_path.exists()): if (not c.previous_state ) or c.previous_state.hash != c.new_state.hash: logging.info(f'Uploading item {c.item.as_posix()}.') (to_upload, cleanup) = file_provider(c.item_path) try: bucket.upload_file(to_upload.as_posix(), f"content/{c.item.as_posix()}") finally: cleanup() else: logging.info( f'Content of item {c.item.as_posix()} has not changed, skipping upload.' ) c.commit() else: if c.new_state == None and c.previous_state != None: logging.info( f'Item {c.item.as_posix()} is no longer in filesystem. It will be deleted.' ) to_delete.append(c) delete_groups = [ to_delete[i:min(i + __DELETE_GROUP_SIZE, len(to_delete))] for i in range(0, len(to_delete), __DELETE_GROUP_SIZE) ] for sublist in delete_groups: keys = [{'Key': f"content/{c.item.as_posix()}"} for c in sublist] response = bucket.delete_objects(Delete={ 'Objects': keys, 'Quiet': True }) failed = [] if 'Errors' in response: # List of objects or items that failed to be deleted. Documentation says # the list should contain only the things that encountered an error while # deleting. failed = [o['Key'] for o in response['Errors']] for c in to_delete: if any(m for m in failed if m == c.item.as_posix()): # This means we could not delete the item. logging.warn(f'Could not delete {c.item.as_posix()}') else: c.commit() if any_changes == False: if __has_content(states): print("No changes was detected.") else: print( f"Directory '{repository_path}' contains no files to back up.")