예제 #1
0
 def load_repo(self, repo_path: str) -> git.Repo:
     config_file: Optional[pathlib.Path] = None
     data: MutableMapping[str, Any] = {}
     try:
         (config_file, data) = config.load_config_from_path(
             pathlib.Path(repo_path), traverse=False
         )
     except (FileNotFoundError, types.ConfigException) as exc:
         config_file = None
     if config_file and config_file != self.global_options.config:
         signatures = data.get("exclude_signatures", None)
         if signatures:
             self.global_options.exclude_signatures = tuple(
                 set(self.global_options.exclude_signatures + tuple(signatures))
             )
         extras_path = data.get("include_paths", None)
         if extras_path:
             extras_file = pathlib.Path(repo_path, extras_path)
             if extras_file.exists():
                 includes = self.included_paths
                 with extras_file.open() as handle:
                     includes += config.compile_path_rules(handle.readlines())
                 self._included_paths = includes
         extras_path = data.get("exclude_paths", None)
         if extras_path:
             extras_file = pathlib.Path(repo_path, extras_path)
             if extras_file.exists():
                 excludes = self.excluded_paths
                 with extras_file.open() as handle:
                     excludes += config.compile_path_rules(handle.readlines())
                 self._excluded_paths = excludes
     try:
         return git.Repo(repo_path)
     except git.GitError as exc:
         raise types.GitLocalException(str(exc)) from exc
예제 #2
0
def scan_repo(
    repo_path: str,
    regexes: Optional[Dict[str, Pattern]],
    path_inclusions: List[Pattern],
    path_exclusions: List[Pattern],
    options: Dict[str, config.OptionTypes],
) -> List[Issue]:
    # Check the repo for any local configs
    repo_config = {}  # type: Dict[str, config.OptionTypes]
    path = pathlib.Path(repo_path)
    config_file = path / "pyproject.toml"
    if not config_file.is_file():
        config_file = path / "tartufo.toml"
    if config_file.is_file() and str(config_file.resolve()) != str(
            options["config"]):
        toml_file = toml.load(str(config_file))
        repo_config = toml_file.get("tool", {}).get("tartufo", {})
    if repo_config:
        normalized_config = {
            k.replace("--", "").replace("-", "_"): v
            for k, v in repo_config.items()
        }
        extra_paths = cast(str, normalized_config.get("include_paths", None))
        if extra_paths:
            file_path = pathlib.Path(extra_paths).resolve()
            if file_path.is_file():
                with file_path.open("r", encoding="utf8") as paths_file:
                    path_inclusions.extend(
                        config.compile_path_rules(paths_file.readlines()))
        extra_paths = cast(str, normalized_config.get("exclude_paths", None))
        if extra_paths:
            file_path = pathlib.Path(extra_paths).resolve()
            if file_path.is_file():
                with file_path.open("r", encoding="utf8") as paths_file:
                    path_exclusions.extend(
                        config.compile_path_rules(paths_file.readlines()))

    return find_strings(
        repo_path,
        since_commit=cast(str, options["since_commit"]),
        max_depth=cast(int, options["max_depth"]),
        do_regex=cast(bool, options["regex"]),
        do_entropy=cast(bool, options["entropy"]),
        custom_regexes=regexes,
        branch=cast(str, options["branch"]),
        path_inclusions=path_inclusions,
        path_exclusions=path_exclusions,
    )
예제 #3
0
파일: scanner.py 프로젝트: godaddy/tartufo
 def excluded_paths(self) -> List[Pattern]:
     """Get a list of regexes used to match paths to exclude from the scan"""
     if self._excluded_paths is None:
         self.logger.info("Initializing excluded paths")
         patterns: Set[str] = set()
         deprecated = False
         for pattern in tuple(
                 self.global_options.exclude_path_patterns or []) + tuple(
                     self.config_data.get("exclude_path_patterns", [])):
             if isinstance(pattern, dict):
                 try:
                     patterns.add(pattern["path-pattern"])
                 except KeyError as exc:
                     raise types.ConfigException(
                         "Required key path-pattern missing in exclude-path-patterns"
                     ) from exc
             elif isinstance(pattern, str):
                 deprecated = True
                 patterns.add(pattern)
             else:
                 raise types.ConfigException(
                     f"{type(pattern).__name__} pattern is illegal in exclude-path-patterns"
                 )
         if deprecated:
             warnings.warn(
                 "Old format of --exclude-path-patterns option and config file setup exclude-path-patterns "
                 "= ['exclusion pattern'] has been deprecated and will be removed in a future version. "
                 "Make sure all the exclusions are set up using new pattern i.e. exclude-path-patterns = "
                 "[{path-pattern='exclusion pattern',reason='reason for exclusion'}] in the config file",
                 DeprecationWarning,
             )
         self._excluded_paths = config.compile_path_rules(patterns)
     return self._excluded_paths
예제 #4
0
 def test_whitespace_lines_are_ignored(self):
     rules = config.compile_path_rules([
         "# Poetry lock file",
         r"poetry\.lock",
         "",
         "\t\n",
         "# NPM files",
         r"package-lock\.json",
     ])
     self.assertEqual(
         rules,
         [re.compile(r"poetry\.lock"),
          re.compile(r"package-lock\.json")])
예제 #5
0
    def excluded_paths(self) -> List[Pattern]:
        """Get a list of regexes used to match paths to exclude from the scan.

        :rtype: List[Pattern]
        """
        if self._excluded_paths is None:
            if self.global_options.exclude_paths:
                self._excluded_paths = config.compile_path_rules(
                    self.global_options.exclude_paths.readlines())
                self.global_options.exclude_paths.close()
            else:
                self._excluded_paths = []
        return self._excluded_paths
예제 #6
0
    def included_paths(self) -> List[Pattern]:
        """Get a list of regexes used as an exclusive list of paths to scan.

        :rtype: List[Pattern]
        """
        if self._included_paths is None:
            if self.global_options.include_paths:
                self._included_paths = config.compile_path_rules(
                    self.global_options.include_paths.readlines())
                self.global_options.include_paths.close()
            else:
                self._included_paths = []
        return self._included_paths
예제 #7
0
    def excluded_paths(self) -> List[Pattern]:
        """Get a list of regexes used to match paths to exclude from the scan.

        :rtype: List[Pattern]
        """
        if self._excluded_paths is None:
            self.logger.info("Initializing excluded paths")
            patterns = list(self.global_options.exclude_path_patterns or ())
            if self.global_options.exclude_paths:
                self.logger.warning(
                    "DEPRECATED --exclude-paths, use --exclude-path-patterns"
                )
                patterns += self.global_options.exclude_paths.readlines()
                self.global_options.exclude_paths.close()
            self._excluded_paths = (
                config.compile_path_rules(set(patterns)) if patterns else []
            )
            self.logger.debug(
                "Excluded paths was initialized as: %s", self._excluded_paths
            )
        return self._excluded_paths
예제 #8
0
 def test_commented_lines_are_ignored(self):
     rules = config.compile_path_rules(
         ["# Poetry lock file", r"poetry\.lock"])
     self.assertEqual(rules, [re.compile(r"poetry\.lock")])
예제 #9
0
def main(ctx: click.Context, **kwargs: config.OptionTypes) -> None:
    """Find secrets hidden in the depths of git.

    Tartufo will, by default, scan the entire history of a git repository
    for any text which looks like a secret, password, credential, etc. It can
    also be made to work in pre-commit mode, for scanning blobs of text as a
    pre-commit hook.
    """
    if not any((kwargs["entropy"], kwargs["regex"])):
        err("No analysis requested.")
        ctx.exit(1)
    if not any((kwargs["pre_commit"], kwargs["repo_path"], kwargs["git_url"])):
        err("You must specify one of --pre-commit, --repo-path, or git_url.")
        ctx.exit(1)
    if kwargs["regex"]:
        try:
            rules_regexes = config.configure_regexes(
                cast(bool, kwargs["default_regexes"]),
                cast(Tuple[TextIO, ...], kwargs["rules"]),
                cast(Optional[str], kwargs["git_rules_repo"]),
                cast(Tuple[str, ...], kwargs["git_rules_files"]),
            )
        except ValueError as exc:
            err(str(exc))
            ctx.exit(1)
        if not rules_regexes:
            err("Regex checks requested, but no regexes found.")
            ctx.exit(1)
    else:
        rules_regexes = {}

    # read & compile path inclusion/exclusion patterns
    path_inclusions = []  # type: List[Pattern]
    path_exclusions = []  # type: List[Pattern]
    paths_file = cast(TextIO, kwargs["include_paths"])
    if paths_file:
        path_inclusions = config.compile_path_rules(paths_file.readlines())
    paths_file = cast(TextIO, kwargs["exclude_paths"])
    if paths_file:
        path_exclusions = config.compile_path_rules(paths_file.readlines())

    if kwargs["pre_commit"]:
        output = scanner.find_staged(
            cast(str, kwargs["repo_path"]),
            cast(bool, kwargs["json"]),
            cast(bool, kwargs["regex"]),
            cast(bool, kwargs["entropy"]),
            custom_regexes=rules_regexes,
            suppress_output=False,
            path_inclusions=path_inclusions,
            path_exclusions=path_exclusions,
        )
    else:
        remove_repo = False
        if kwargs["git_url"]:
            repo_path = util.clone_git_repo(cast(str, kwargs["git_url"]))
            remove_repo = True
        else:
            repo_path = cast(str, kwargs["repo_path"])

        output = scanner.scan_repo(repo_path, rules_regexes, path_inclusions,
                                   path_exclusions, kwargs)

        if remove_repo:
            shutil.rmtree(repo_path, onerror=util.del_rw)

    if kwargs["cleanup"]:
        util.clean_outputs(output)
    else:
        issues_path = output.get("issues_path", None)
        if issues_path:
            print("Results have been saved in {}".format(issues_path))

    if output.get("found_issues", False):
        ctx.exit(1)
    ctx.exit(0)