def filter_submodules(self, repo: pygit2.Repository) -> None: """Exclude all git submodules and their contents from being scanned. :param repo: The repository being scanned """ patterns: List[Pattern] = [] self.logger.info("Excluding submodules paths from scan.") try: for module in repo.listall_submodules(): submodule = repo.lookup_submodule(module) patterns.append(re.compile(f"^{submodule.path}")) except AttributeError as exc: raise TartufoException( "There was an error while parsing submodules for this repository. " "A likely cause is that a file tree was committed in place of a " "submodule.") from exc self._excluded_paths = list(set(self.excluded_paths + patterns))
def detect_changed_files(repo: pygit2.Repository, repo_path: Path) -> Iterator[Path]: submodules = repo.listall_submodules() for file, flags in repo.status().items(): if flags not in (pygit2.GIT_STATUS_CURRENT, pygit2.GIT_STATUS_IGNORED): target_path = Path(repo_path, file) if not target_path.is_dir(): yield target_path else: relative_path = target_path.relative_to(repo_path) # NOTE: Special treatment for sub-modules if str(relative_path) in submodules: sub_repo = pygit2.Repository(target_path) # TODO: What if it's no longer a repository? # Mark the subrepo itself as modified. It has additional commit metadata # that might have changed yield target_path # Detect any modified files within the sub-repo # NOTE: This is faster than plain hashing because it implicitly takes advantage of # the tracking git has already done. yield from detect_changed_files(sub_repo, target_path) else: # Not a submodule: just mark all (non-ignored) subfiles as changed # # NOTE: We do not yield the directory itself because git ignores that. # There is no extra metadata to add in that case. detected_modification = False for dirpath, dirnames, filenames in os.walk(target_path): relative_dirpath = Path(dirpath).relative_to(repo_path) for name in filenames: if not repo.path_is_ignored(str(Path(relative_dirpath, name))): yield Path(dirpath, name) detected_modification = True for sub_dir in list(dirnames): if repo.path_is_ignored(str(Path(relative_dirpath, sub_dir))): dirnames.remove(sub_dir) else: yield Path(dirpath, sub_dir) detected_modification = True if not detected_modification: raise AssertionError(f"Unable to find git's claimed modification (flags={flags:04x}): {target_path}")