def detect_path_overlap(paths): """ Check for valid POSIX paths (ie ones that aren't duplicated and don't overlap). Overlapping paths are where one path terminates inside another (e.g. a/b and a/b/c). NOTE: The logic is copied from pulpcore.app.files.validate_file_paths(). This function returns the first dupe or overlap it detects. We use a trie (or prefix tree) to keep track of which paths we've already seen. Args: paths (iterable of str): An iterable of strings each representing a relative path Returns: str: a path which overlaps or duplicates another """ path_trie = StringTrie(separator="/") for path in paths: if path in path_trie: # path duplicates a path already in the trie return path if path_trie.has_subtrie(path): # overlap where path is 'a/b' and trie has 'a/b/c' return path prefixes = list(path_trie.prefixes(path)) if prefixes: # overlap where path is 'a/b/c' and trie has 'a/b' return path # if there are no overlaps, add it to our trie and continue path_trie[path] = True
def validate_file_paths(paths): """ Check for valid POSIX paths (ie ones that aren't duplicated and don't overlap). Overlapping paths are where one path terminates inside another (e.g. a/b and a/b/c). This function will raise an exception at the first dupe or overlap it detects. We use a trie (or prefix tree) to keep track of which paths we've already seen. Args: paths (iterable of str): An iterable of strings each representing a relative path Raises: ValueError: If any path overlaps another """ overlap_error = _("The path for file '{path}' overlaps: {conflicts}") path_trie = StringTrie(separator="/") dups = [] overlaps = [] for path in paths: if path in path_trie: # path duplicates a path already in the trie dups.append(path) elif path_trie.has_subtrie(path): # overlap where path is 'a/b' and trie has 'a/b/c' conflicts = [item[0] for item in path_trie.items(prefix=path)] overlaps.append( overlap_error.format(path=path, conflicts=", ".join(conflicts))) else: prefixes = list(path_trie.prefixes(path)) if prefixes: # overlap where path is 'a/b/c' and trie has 'a/b' conflicts = [prefix.key for prefix in prefixes] overlaps.append( overlap_error.format(path=path, conflicts=", ".join(conflicts))) # if there are no overlaps, add it to our trie and continue path_trie[path] = True if dups or overlaps: dups_msg = "" overlaps_msg = "" if dups: dups_msg = _("Paths are duplicated: {paths}").format( paths=",".join(dups)) if overlaps: overlaps_msg = "\n".join(overlaps) raise ValueError( _("Path errors found. {dups}\n{overlaps}").format( dups=dups_msg, overlaps=overlaps_msg))