def test_non_intersecting_directories(): # assert PathUtils.is_child_of_directories(Path('/hello'), set(to_paths(['/']))) == True # relative paths are all displaced by '.' assert PathUtils.non_intersecting_directories( to_paths(['.', 'hello', 'world'])) == set(to_paths(['.'])) # absolute paths are all displaced by '/' assert PathUtils.non_intersecting_directories( to_paths(['/', '/hello', '/hello/world'])) == set(to_paths(['/'])) # parents displace children assert PathUtils.non_intersecting_directories( to_paths(['/hello', '/hello/world', 'world', 'world/of/tomorrow'])) == set(to_paths(['/hello', 'world']))
async def publish_changed_directories( old_commit: Commit, new_commit: Commit, publisher: TopicPublisher, file_extensions: Optional[List[str]] = None): """ publishes policy topics matching all relevant directories in tracked repo, prompting the client to ask for *all* contents of these directories (and not just diffs). """ if new_commit == old_commit: return await publish_all_directories_in_repo( old_commit, new_commit, publisher=publisher, file_extensions=file_extensions) with DiffViewer(old_commit, new_commit) as viewer: def has_extension(path: Path) -> bool: if not file_extensions: return True return path.suffix in file_extensions all_paths = list(viewer.affected_paths(has_extension)) if not all_paths: logger.warning( f"new commits detected but no tracked files were affected: '{old_commit.hexsha}' -> '{new_commit.hexsha}'", old_commit=old_commit, new_commit=new_commit) return directories = PathUtils.intermediate_directories(all_paths) logger.info("Publishing policy update, directories: {directories}", directories=[str(d) for d in directories]) topics = policy_topics(directories) publisher.publish(topics=topics, data=new_commit.hexsha)
def pubsub_topics_from_directories(dirs: List[str]) -> List[str]: """ converts a list of directories on the policy repository that the client wants to subscribe to into a list of topics. this method also ensures the client only subscribes to non-intersecting directories by dedupping directories that are decendents of one another. """ policy_directories = PathUtils.non_intersecting_directories( [Path(d) for d in dirs]) return policy_topics(policy_directories)
def default_subscribed_policy_directories() -> List[str]: """ wraps the configured value of POLICY_SUBSCRIPTION_DIRS, but dedups intersecting dirs. """ subscription_directories = [ Path(d) for d in opal_client_config.POLICY_SUBSCRIPTION_DIRS ] non_intersecting_directories = PathUtils.non_intersecting_directories( subscription_directories) return [str(directory) for directory in non_intersecting_directories]
def diffed_file_is_under_directories(diff: Diff, directories: Set[Path]) -> bool: """ filter on git diffs, filters only diffs on files that are located in certain directories. if a file is renamed/added/removed, its enough that only one of its versions was located in one of the required directories. """ for path in [diff.a_path, diff.b_path]: if path is not None and PathUtils.is_child_of_directories( Path(path), directories): return True return False
async def publish_all_directories_in_repo( old_commit: Commit, new_commit: Commit, publisher: TopicPublisher, file_extensions: Optional[List[str]] = None): """ publishes policy topics matching all relevant directories in tracked repo, prompting the client to ask for *all* contents of these directories (and not just diffs). """ with CommitViewer(new_commit) as viewer: filter = partial(has_extension, extensions=file_extensions) all_paths = list(viewer.files(filter)) directories = PathUtils.intermediate_directories(all_paths) logger.info("Publishing policy update, directories: {directories}", directories=[str(d) for d in directories]) topics = policy_topics(directories) publisher.publish(topics=topics, data=new_commit.hexsha)
def test_intermediate_directories(): # empty sources returns empty parent list assert len(PathUtils.intermediate_directories(to_paths([]))) == 0 # '/', '.' and '' has no parent assert len(PathUtils.intermediate_directories(to_paths(['/']))) == 0 assert len(PathUtils.intermediate_directories(to_paths(['.']))) == 0 assert len(PathUtils.intermediate_directories(to_paths(['']))) == 0 # top level directories has only one parent assert PathUtils.intermediate_directories(to_paths(['/some' ])) == to_paths(['/']) assert PathUtils.intermediate_directories(to_paths(['some' ])) == to_paths(['.']) # check some examples of nested paths parents = PathUtils.intermediate_directories(to_paths(['some/dir/to'])) assert len(parents) == 3 assert len( set(parents).intersection(set(to_paths(['.', 'some', 'some/dir'])))) == 3 parents = PathUtils.intermediate_directories(to_paths(['/another/example' ])) assert len(parents) == 2 assert len(set(parents).intersection(set(to_paths(['/', '/another'])))) == 2 # mix and match parents = PathUtils.intermediate_directories( to_paths([ 'some', '/other', 'example/of/path', 'some/may/intersect', ])) assert len(parents) == 6 assert Path('.') in parents assert Path('/') in parents assert Path('some') in parents assert Path('some/may') in parents assert Path('example') in parents assert Path('example/of') in parents
def test_filter_children_paths_of_directories(): sources = to_paths([ '/files/for/testing/1.txt', '/files/for/testing/2.json', '/filtered/out.txt', 'relative/path.log', 'relative/subdir/another.log', ]) # filter paths under . paths = PathUtils.filter_children_paths_of_directories( sources, set(to_paths(['.']))) assert len(paths) == 2 assert len( set(paths).intersection( set(to_paths(['relative/path.log', 'relative/subdir/another.log'])))) == 2 # filter paths under / paths = PathUtils.filter_children_paths_of_directories( sources, set(to_paths(['/']))) assert len(paths) == 3 assert len( set(paths).intersection( set( to_paths([ '/files/for/testing/1.txt', '/files/for/testing/2.json', '/filtered/out.txt' ])))) == 3 # filter paths under /files paths = PathUtils.filter_children_paths_of_directories( sources, set(to_paths(['/files']))) assert len(paths) == 2 assert len( set(paths).intersection( set( to_paths([ '/files/for/testing/1.txt', '/files/for/testing/2.json', ])))) == 2 # filter paths under relative/subdir paths = PathUtils.filter_children_paths_of_directories( sources, set(to_paths(['relative/subdir']))) assert len(paths) == 1 assert len( set(paths).intersection( set(to_paths([ 'relative/subdir/another.log', ])))) == 1 # filter paths under multiple parents paths = PathUtils.filter_children_paths_of_directories( sources, set(to_paths(['relative/subdir', '/filtered']))) assert len(paths) == 2 assert len( set(paths).intersection( set( to_paths([ '/filtered/out.txt', 'relative/subdir/another.log', ])))) == 2 # parents can intersect paths = PathUtils.filter_children_paths_of_directories( sources, set(to_paths(['relative/subdir', '.']))) assert len(paths) == 2 assert len( set(paths).intersection( set(to_paths(['relative/path.log', 'relative/subdir/another.log'])))) == 2 # no parents paths = PathUtils.filter_children_paths_of_directories(sources, set()) assert len(paths) == 0 # no parent match sources paths = PathUtils.filter_children_paths_of_directories( sources, set(to_paths(['not/in/repo']))) assert len(paths) == 0
def test_is_child_of_directories(): # parent directories are the top level (relative) dir assert PathUtils.is_child_of_directories(Path('.'), set(to_paths(['.']))) == False assert PathUtils.is_child_of_directories(Path('hello'), set(to_paths(['.']))) == True assert PathUtils.is_child_of_directories(Path('world.txt'), set(to_paths(['.']))) == True assert PathUtils.is_child_of_directories(Path('/world'), set(to_paths(['.']))) == False # parent directories are the top level (absolute) dir assert PathUtils.is_child_of_directories(Path('/'), set(to_paths(['/']))) == False assert PathUtils.is_child_of_directories(Path('/hello'), set(to_paths(['/']))) == True assert PathUtils.is_child_of_directories(Path('/world.txt'), set(to_paths(['/']))) == True assert PathUtils.is_child_of_directories(Path('world'), set(to_paths(['/']))) == False # directories can be files (bad input) assert PathUtils.is_child_of_directories(Path('/world.txt'), set(to_paths(['/hello.txt' ]))) == False # some valid input assert PathUtils.is_child_of_directories(Path('some/file.txt'), set(to_paths(['some']))) == True assert PathUtils.is_child_of_directories(Path('some/file.txt'), set(to_paths(['.']))) == True assert PathUtils.is_child_of_directories(Path('some/dir/to/file.txt'), set(to_paths(['some/dir' ]))) == True
def is_under_directories(f: VersionedFile, directories: Set[Path]) -> bool: """ a filter on versioned files, filters only files under certain directories in the repo. """ return PathUtils.is_child_of_directories(f.path, directories)