def load_repo(self, repo_path: str) -> git.Repo: config_file: Optional[pathlib.Path] = None data: MutableMapping[str, Any] = {} try: (config_file, data) = config.load_config_from_path( pathlib.Path(repo_path), traverse=False ) except (FileNotFoundError, types.ConfigException) as exc: config_file = None if config_file and config_file != self.global_options.config: signatures = data.get("exclude_signatures", None) if signatures: self.global_options.exclude_signatures = tuple( set(self.global_options.exclude_signatures + tuple(signatures)) ) extras_path = data.get("include_paths", None) if extras_path: extras_file = pathlib.Path(repo_path, extras_path) if extras_file.exists(): includes = self.included_paths with extras_file.open() as handle: includes += config.compile_path_rules(handle.readlines()) self._included_paths = includes extras_path = data.get("exclude_paths", None) if extras_path: extras_file = pathlib.Path(repo_path, extras_path) if extras_file.exists(): excludes = self.excluded_paths with extras_file.open() as handle: excludes += config.compile_path_rules(handle.readlines()) self._excluded_paths = excludes try: return git.Repo(repo_path) except git.GitError as exc: raise types.GitLocalException(str(exc)) from exc
def scan_repo( repo_path: str, regexes: Optional[Dict[str, Pattern]], path_inclusions: List[Pattern], path_exclusions: List[Pattern], options: Dict[str, config.OptionTypes], ) -> List[Issue]: # Check the repo for any local configs repo_config = {} # type: Dict[str, config.OptionTypes] path = pathlib.Path(repo_path) config_file = path / "pyproject.toml" if not config_file.is_file(): config_file = path / "tartufo.toml" if config_file.is_file() and str(config_file.resolve()) != str( options["config"]): toml_file = toml.load(str(config_file)) repo_config = toml_file.get("tool", {}).get("tartufo", {}) if repo_config: normalized_config = { k.replace("--", "").replace("-", "_"): v for k, v in repo_config.items() } extra_paths = cast(str, normalized_config.get("include_paths", None)) if extra_paths: file_path = pathlib.Path(extra_paths).resolve() if file_path.is_file(): with file_path.open("r", encoding="utf8") as paths_file: path_inclusions.extend( config.compile_path_rules(paths_file.readlines())) extra_paths = cast(str, normalized_config.get("exclude_paths", None)) if extra_paths: file_path = pathlib.Path(extra_paths).resolve() if file_path.is_file(): with file_path.open("r", encoding="utf8") as paths_file: path_exclusions.extend( config.compile_path_rules(paths_file.readlines())) return find_strings( repo_path, since_commit=cast(str, options["since_commit"]), max_depth=cast(int, options["max_depth"]), do_regex=cast(bool, options["regex"]), do_entropy=cast(bool, options["entropy"]), custom_regexes=regexes, branch=cast(str, options["branch"]), path_inclusions=path_inclusions, path_exclusions=path_exclusions, )
def excluded_paths(self) -> List[Pattern]: """Get a list of regexes used to match paths to exclude from the scan""" if self._excluded_paths is None: self.logger.info("Initializing excluded paths") patterns: Set[str] = set() deprecated = False for pattern in tuple( self.global_options.exclude_path_patterns or []) + tuple( self.config_data.get("exclude_path_patterns", [])): if isinstance(pattern, dict): try: patterns.add(pattern["path-pattern"]) except KeyError as exc: raise types.ConfigException( "Required key path-pattern missing in exclude-path-patterns" ) from exc elif isinstance(pattern, str): deprecated = True patterns.add(pattern) else: raise types.ConfigException( f"{type(pattern).__name__} pattern is illegal in exclude-path-patterns" ) if deprecated: warnings.warn( "Old format of --exclude-path-patterns option and config file setup exclude-path-patterns " "= ['exclusion pattern'] has been deprecated and will be removed in a future version. " "Make sure all the exclusions are set up using new pattern i.e. exclude-path-patterns = " "[{path-pattern='exclusion pattern',reason='reason for exclusion'}] in the config file", DeprecationWarning, ) self._excluded_paths = config.compile_path_rules(patterns) return self._excluded_paths
def test_whitespace_lines_are_ignored(self): rules = config.compile_path_rules([ "# Poetry lock file", r"poetry\.lock", "", "\t\n", "# NPM files", r"package-lock\.json", ]) self.assertEqual( rules, [re.compile(r"poetry\.lock"), re.compile(r"package-lock\.json")])
def excluded_paths(self) -> List[Pattern]: """Get a list of regexes used to match paths to exclude from the scan. :rtype: List[Pattern] """ if self._excluded_paths is None: if self.global_options.exclude_paths: self._excluded_paths = config.compile_path_rules( self.global_options.exclude_paths.readlines()) self.global_options.exclude_paths.close() else: self._excluded_paths = [] return self._excluded_paths
def included_paths(self) -> List[Pattern]: """Get a list of regexes used as an exclusive list of paths to scan. :rtype: List[Pattern] """ if self._included_paths is None: if self.global_options.include_paths: self._included_paths = config.compile_path_rules( self.global_options.include_paths.readlines()) self.global_options.include_paths.close() else: self._included_paths = [] return self._included_paths
def excluded_paths(self) -> List[Pattern]: """Get a list of regexes used to match paths to exclude from the scan. :rtype: List[Pattern] """ if self._excluded_paths is None: self.logger.info("Initializing excluded paths") patterns = list(self.global_options.exclude_path_patterns or ()) if self.global_options.exclude_paths: self.logger.warning( "DEPRECATED --exclude-paths, use --exclude-path-patterns" ) patterns += self.global_options.exclude_paths.readlines() self.global_options.exclude_paths.close() self._excluded_paths = ( config.compile_path_rules(set(patterns)) if patterns else [] ) self.logger.debug( "Excluded paths was initialized as: %s", self._excluded_paths ) return self._excluded_paths
def test_commented_lines_are_ignored(self): rules = config.compile_path_rules( ["# Poetry lock file", r"poetry\.lock"]) self.assertEqual(rules, [re.compile(r"poetry\.lock")])
def main(ctx: click.Context, **kwargs: config.OptionTypes) -> None: """Find secrets hidden in the depths of git. Tartufo will, by default, scan the entire history of a git repository for any text which looks like a secret, password, credential, etc. It can also be made to work in pre-commit mode, for scanning blobs of text as a pre-commit hook. """ if not any((kwargs["entropy"], kwargs["regex"])): err("No analysis requested.") ctx.exit(1) if not any((kwargs["pre_commit"], kwargs["repo_path"], kwargs["git_url"])): err("You must specify one of --pre-commit, --repo-path, or git_url.") ctx.exit(1) if kwargs["regex"]: try: rules_regexes = config.configure_regexes( cast(bool, kwargs["default_regexes"]), cast(Tuple[TextIO, ...], kwargs["rules"]), cast(Optional[str], kwargs["git_rules_repo"]), cast(Tuple[str, ...], kwargs["git_rules_files"]), ) except ValueError as exc: err(str(exc)) ctx.exit(1) if not rules_regexes: err("Regex checks requested, but no regexes found.") ctx.exit(1) else: rules_regexes = {} # read & compile path inclusion/exclusion patterns path_inclusions = [] # type: List[Pattern] path_exclusions = [] # type: List[Pattern] paths_file = cast(TextIO, kwargs["include_paths"]) if paths_file: path_inclusions = config.compile_path_rules(paths_file.readlines()) paths_file = cast(TextIO, kwargs["exclude_paths"]) if paths_file: path_exclusions = config.compile_path_rules(paths_file.readlines()) if kwargs["pre_commit"]: output = scanner.find_staged( cast(str, kwargs["repo_path"]), cast(bool, kwargs["json"]), cast(bool, kwargs["regex"]), cast(bool, kwargs["entropy"]), custom_regexes=rules_regexes, suppress_output=False, path_inclusions=path_inclusions, path_exclusions=path_exclusions, ) else: remove_repo = False if kwargs["git_url"]: repo_path = util.clone_git_repo(cast(str, kwargs["git_url"])) remove_repo = True else: repo_path = cast(str, kwargs["repo_path"]) output = scanner.scan_repo(repo_path, rules_regexes, path_inclusions, path_exclusions, kwargs) if remove_repo: shutil.rmtree(repo_path, onerror=util.del_rw) if kwargs["cleanup"]: util.clean_outputs(output) else: issues_path = output.get("issues_path", None) if issues_path: print("Results have been saved in {}".format(issues_path)) if output.get("found_issues", False): ctx.exit(1) ctx.exit(0)