예제 #1
0
파일: scanner.py 프로젝트: godaddy/tartufo
    def filter_submodules(self, repo: pygit2.Repository) -> None:
        """Exclude all git submodules and their contents from being scanned.

        :param repo: The repository being scanned
        """
        patterns: List[Pattern] = []
        self.logger.info("Excluding submodules paths from scan.")
        try:
            for module in repo.listall_submodules():
                submodule = repo.lookup_submodule(module)
                patterns.append(re.compile(f"^{submodule.path}"))
        except AttributeError as exc:
            raise TartufoException(
                "There was an error while parsing submodules for this repository. "
                "A likely cause is that a file tree was committed in place of a "
                "submodule.") from exc
        self._excluded_paths = list(set(self.excluded_paths + patterns))
예제 #2
0
def detect_changed_files(repo: pygit2.Repository, repo_path: Path) -> Iterator[Path]:
    submodules = repo.listall_submodules()
    for file, flags in repo.status().items():
        if flags not in (pygit2.GIT_STATUS_CURRENT, pygit2.GIT_STATUS_IGNORED):
            target_path = Path(repo_path, file)
            if not target_path.is_dir():
                yield target_path
            else:
                relative_path = target_path.relative_to(repo_path)
                # NOTE: Special treatment for sub-modules
                if str(relative_path) in submodules:
                    sub_repo = pygit2.Repository(target_path)  # TODO: What if it's no longer a repository?
                    # Mark the subrepo itself as modified. It has additional commit metadata
                    # that might have changed
                    yield target_path
                    # Detect any modified files within the sub-repo
                    # NOTE: This is faster than plain hashing because it implicitly takes advantage of
                    # the tracking git has already done.
                    yield from detect_changed_files(sub_repo, target_path)
                else:
                    # Not a submodule: just mark all (non-ignored) subfiles as changed
                    #
                    # NOTE: We do not yield the directory itself because git ignores that.
                    # There is no extra metadata to add in that case.
                    detected_modification = False
                    for dirpath, dirnames, filenames in os.walk(target_path):
                        relative_dirpath = Path(dirpath).relative_to(repo_path)
                        for name in filenames:
                            if not repo.path_is_ignored(str(Path(relative_dirpath, name))):
                                yield Path(dirpath, name)
                                detected_modification = True
                        for sub_dir in list(dirnames):
                            if repo.path_is_ignored(str(Path(relative_dirpath, sub_dir))):
                                dirnames.remove(sub_dir)
                            else:
                                yield Path(dirpath, sub_dir)
                                detected_modification = True
                    if not detected_modification:
                        raise AssertionError(f"Unable to find git's claimed modification (flags={flags:04x}): {target_path}")