def test_tartufo_clones_git_repo_into_temp_dir( self, mock_mkdtemp: mock.MagicMock, mock_clone: mock.MagicMock ): mock_mkdtemp.return_value = "/foo" util.clone_git_repo("https://github.com/godaddy/tartufo.git") mock_clone.assert_called_once_with( "https://github.com/godaddy/tartufo.git", "/foo" )
def test_clone_git_repo_clones_into_target_dir(self, mock_temp: mock.MagicMock, mock_clone: mock.MagicMock): util.clone_git_repo("https://github.com/godaddy/tartufo.git", Path("/foo/tartufo.git")) mock_temp.assert_not_called() mock_clone.assert_called_once_with( "https://github.com/godaddy/tartufo.git", "/foo/tartufo.git")
def test_clone_git_repo_raises_explicit_exception_on_clone_fail( self, mock_clone: mock.MagicMock): mock_clone.side_effect = git.GitCommandError( command="git clone foo.git", status=42, stderr="Error cloning repo!") with self.assertRaisesRegex(types.GitRemoteException, "stderr: 'Error cloning repo!'"): util.clone_git_repo("https://github.com/godaddy/tartufo.git")
def main( ctx: click.Context, options: types.GlobalOptions, git_url: str, since_commit: Optional[str], max_depth: int, branch: Optional[str], work_dir: Optional[str], ) -> Tuple[str, Optional[GitRepoScanner]]: """Automatically clone and scan a remote git repository.""" git_options = types.GitOptions( since_commit=since_commit, max_depth=max_depth, branch=branch, fetch=False ) repo_path: Optional[Path] = None if work_dir: # Make sure we clone into a sub-directory of the working directory # so that we don't inadvertently delete the working directory repo_name = urlparse(git_url).path.split("/")[-1] repo_path = Path(work_dir) / repo_name repo_path.mkdir(parents=True) scanner = None try: repo_path = util.clone_git_repo(git_url, repo_path) scanner = GitRepoScanner(options, git_options, str(repo_path)) scanner.scan() except types.GitException as exc: util.fail(f"Error cloning remote repo: {exc}", ctx) except types.TartufoException as exc: util.fail(str(exc), ctx) finally: if repo_path and repo_path.exists(): rmtree(str(repo_path), onerror=util.del_rw) return (git_url, scanner)
def test_return_correct_commit_hash(self): """FIXME: Split this test out into multiple smaller tests w/o real clone FIXME: Also, this test will continue to grow slower the more times we commit Necessary: * Make sure all commits are checked (done) * Make sure all branches are checked * Make sure `diff_worker` flags bad diffs * Make sure all bad diffs are returned """ # Start at commit d15627104d07846ac2914a976e8e347a663bbd9b, which # is immediately followed by a secret inserting commit: # https://github.com/dxa4481/truffleHog/commit/9ed54617547cfca783e0f81f8dc5c927e3d1e345 since_commit = "d15627104d07846ac2914a976e8e347a663bbd9b" commit_w_secret = "9ed54617547cfca783e0f81f8dc5c927e3d1e345" xcheck_commit_w_scrt_comment = "OH no a secret" # We have to clone tartufo mostly because TravisCI only does a shallow clone repo_path = util.clone_git_repo( "https://github.com/godaddy/tartufo.git") try: issues = scanner.find_strings( repo_path, since_commit=since_commit, ) filtered_results = [ result for result in issues if result.commit_hash == commit_w_secret ] self.assertEqual(1, len(filtered_results)) self.assertEqual(commit_w_secret, filtered_results[0].commit_hash) # Additionally, we cross-validate the commit comment matches the expected comment self.assertEqual(xcheck_commit_w_scrt_comment, filtered_results[0].commit_message.strip()) finally: shutil.rmtree(repo_path)
def configure_regexes( include_default: bool = True, rules_files: Optional[Iterable[TextIO]] = None, rules_repo: Optional[str] = None, rules_repo_files: Optional[Iterable[str]] = None, ) -> Dict[str, Rule]: """Build a set of regular expressions to be used during a regex scan. :param include_default: Whether to include the built-in set of regexes :param rules_files: A list of files to load rules from :param rules_repo: A separate git repository to load rules from :param rules_repo_files: A set of patterns used to find files in the rules repo """ if include_default: rules = copy.copy(DEFAULT_REGEXES) else: rules = {} if rules_files: all_files: List[TextIO] = list(rules_files) else: all_files = [] try: cloned_repo = False repo_path = None if rules_repo: repo_path = pathlib.Path(rules_repo) try: if not repo_path.is_dir(): cloned_repo = True except OSError: # If a git URL is passed in, Windows will raise an OSError on `is_dir()` cloned_repo = True finally: if cloned_repo: repo_path = pathlib.Path(util.clone_git_repo(rules_repo)) if not rules_repo_files: rules_repo_files = ("*.json", ) for repo_file in rules_repo_files: all_files.extend( [path.open("r") for path in repo_path.glob(repo_file)]) if all_files: for rules_file in all_files: loaded = load_rules_from_file(rules_file) dupes = set(loaded.keys()).intersection(rules.keys()) if dupes: raise ValueError( "Rule(s) were defined multiple times: {}".format( dupes)) rules.update(loaded) finally: if cloned_repo: shutil.rmtree(repo_path, onerror=util.del_rw) # type: ignore return rules
def test_clone_git_repo_returns_path_to_clone(self, mock_mkdtemp: mock.MagicMock, mock_clone: mock.MagicMock): mock_remote = mock.MagicMock() mock_remote.name = "origin" mock_repo = mock.MagicMock() mock_repo.remotes = [mock_remote] mock_clone.return_value = mock_repo mock_mkdtemp.return_value = "/foo" repo_path, repo_origin = util.clone_git_repo( "https://github.com/godaddy/tartufo.git") self.assertEqual(repo_path, Path("/foo")) self.assertEqual(repo_origin, "origin")
def test_return_correct_commit_hash(self): """FIXME: Split this test out into multiple smaller tests w/o real clone FIXME: Also, this test will continue to grow slower the more times we commit Necessary: * Make sure all commits are checked (done) * Make sure all branches are checked * Make sure `diff_worker` flags bad diffs * Make sure all bad diffs are returned """ # Start at commit d15627104d07846ac2914a976e8e347a663bbd9b, which # is immediately followed by a secret inserting commit: # https://github.com/dxa4481/truffleHog/commit/9ed54617547cfca783e0f81f8dc5c927e3d1e345 since_commit = "d15627104d07846ac2914a976e8e347a663bbd9b" commit_w_secret = "9ed54617547cfca783e0f81f8dc5c927e3d1e345" xcheck_commit_w_scrt_comment = "OH no a secret" tmp_stdout = six.StringIO() bak_stdout = sys.stdout # Redirect STDOUT, run scan and re-establish STDOUT sys.stdout = tmp_stdout try: # We have to clone tartufo mostly because TravisCI only does a shallow clone repo_path = util.clone_git_repo("https://github.com/godaddy/tartufo.git") try: scanner.find_strings( str(repo_path), since_commit=since_commit, print_json=True, suppress_output=False, ) finally: shutil.rmtree(repo_path) finally: sys.stdout = bak_stdout json_result_list = tmp_stdout.getvalue().split("\n") results = [json.loads(r) for r in json_result_list if bool(r.strip())] filtered_results = [ result for result in results if result["commit_hash"] == commit_w_secret ] self.assertEqual(1, len(filtered_results)) self.assertEqual(commit_w_secret, filtered_results[0]["commit_hash"]) # Additionally, we cross-validate the commit comment matches the expected comment self.assertEqual( xcheck_commit_w_scrt_comment, filtered_results[0]["commit_message"].strip() )
def configure_regexes( include_default: bool = True, rules_files: Optional[Iterable[TextIO]] = None, rules_repo: Optional[str] = None, rules_repo_files: Optional[Iterable[str]] = None, ) -> Dict[str, Pattern]: if include_default: rules = copy.copy(DEFAULT_REGEXES) else: rules = {} if rules_files: all_files = list(rules_files) # type: List[IO[Any]] else: all_files = [] try: cloned_repo = False repo_path = None if rules_repo: repo_path = pathlib.Path(rules_repo) if not repo_path.is_dir(): repo_path = pathlib.Path(util.clone_git_repo(rules_repo)) if not rules_repo_files: rules_repo_files = ("*.json", ) for repo_file in rules_repo_files: all_files.extend( [path.open("r") for path in repo_path.glob(repo_file)]) if rules_files: for rules_file in rules_files: loaded = load_rules_from_file(rules_file) dupes = set(loaded.keys()).intersection(rules.keys()) if dupes: raise ValueError( "Rule(s) were defined multiple times: {}".format( dupes)) rules.update(loaded) finally: if cloned_repo: shutil.rmtree(repo_path) # type: ignore return rules
def main( ctx: click.Context, options: types.GlobalOptions, git_url: str, since_commit: Optional[str], max_depth: int, branch: Optional[str], work_dir: Optional[str], include_submodules: bool, ) -> GitRepoScanner: """Automatically clone and scan a remote git repository.""" git_options = types.GitOptions( since_commit=since_commit, max_depth=max_depth, branch=None, include_submodules=include_submodules, ) repo_path: Optional[Path] = None if work_dir: # Make sure we clone into a sub-directory of the working directory # so that we don't inadvertently delete the working directory repo_name = urlparse(git_url).path.split("/")[-1] repo_path = Path(work_dir) / repo_name repo_path.mkdir(parents=True) scanner = None try: repo_path, origin = util.clone_git_repo(git_url, repo_path) if branch: git_options.branch = f"{origin}/{branch}" scanner = GitRepoScanner(options, git_options, str(repo_path)) util.process_issues(git_url, scanner, options) except types.GitException as exc: util.fail(f"Error cloning remote repo: {exc}", ctx) except types.TartufoException as exc: util.fail(str(exc), ctx) finally: if repo_path and repo_path.exists(): rmtree(str(repo_path), onerror=util.del_rw) return scanner # type: ignore
def test_clone_git_repo_returns_path_to_clone(self, mock_mkdtemp): repo_path = util.clone_git_repo("https://github.com/godaddy/tartufo.git") self.assertEqual(repo_path, mock_mkdtemp.return_value)
def test_tartufo_clones_git_repo_into_temp_dir(self, mock_mkdtemp, mock_clone): util.clone_git_repo("https://github.com/godaddy/tartufo.git") mock_clone.assert_called_once_with( "https://github.com/godaddy/tartufo.git", mock_mkdtemp.return_value )
def test_clone_git_repo_returns_path_to_clone( self, mock_mkdtemp: mock.MagicMock): mock_mkdtemp.return_value = "/foo" repo_path = util.clone_git_repo( "https://github.com/godaddy/tartufo.git") self.assertEqual(repo_path, Path("/foo"))
def main(ctx: click.Context, **kwargs: config.OptionTypes) -> None: """Find secrets hidden in the depths of git. Tartufo will, by default, scan the entire history of a git repository for any text which looks like a secret, password, credential, etc. It can also be made to work in pre-commit mode, for scanning blobs of text as a pre-commit hook. """ if not any((kwargs["entropy"], kwargs["regex"])): err("No analysis requested.") ctx.exit(1) if not any((kwargs["pre_commit"], kwargs["repo_path"], kwargs["git_url"])): err("You must specify one of --pre-commit, --repo-path, or git_url.") ctx.exit(1) if kwargs["regex"]: try: rules_regexes = config.configure_regexes( cast(bool, kwargs["default_regexes"]), cast(Tuple[TextIO, ...], kwargs["rules"]), cast(Optional[str], kwargs["git_rules_repo"]), cast(Tuple[str, ...], kwargs["git_rules_files"]), ) except ValueError as exc: err(str(exc)) ctx.exit(1) if not rules_regexes: err("Regex checks requested, but no regexes found.") ctx.exit(1) else: rules_regexes = {} # read & compile path inclusion/exclusion patterns path_inclusions = [] # type: List[Pattern] path_exclusions = [] # type: List[Pattern] paths_file = cast(TextIO, kwargs["include_paths"]) if paths_file: path_inclusions = config.compile_path_rules(paths_file.readlines()) paths_file = cast(TextIO, kwargs["exclude_paths"]) if paths_file: path_exclusions = config.compile_path_rules(paths_file.readlines()) if kwargs["pre_commit"]: output = scanner.find_staged( cast(str, kwargs["repo_path"]), cast(bool, kwargs["json"]), cast(bool, kwargs["regex"]), cast(bool, kwargs["entropy"]), custom_regexes=rules_regexes, suppress_output=False, path_inclusions=path_inclusions, path_exclusions=path_exclusions, ) else: remove_repo = False if kwargs["git_url"]: repo_path = util.clone_git_repo(cast(str, kwargs["git_url"])) remove_repo = True else: repo_path = cast(str, kwargs["repo_path"]) output = scanner.scan_repo(repo_path, rules_regexes, path_inclusions, path_exclusions, kwargs) if remove_repo: shutil.rmtree(repo_path, onerror=util.del_rw) if kwargs["cleanup"]: util.clean_outputs(output) else: issues_path = output.get("issues_path", None) if issues_path: print("Results have been saved in {}".format(issues_path)) if output.get("found_issues", False): ctx.exit(1) ctx.exit(0)
def configure_regexes( include_default: bool = True, rules_files: Optional[Iterable[TextIO]] = None, rule_patterns: Optional[Iterable[Dict[str, str]]] = None, rules_repo: Optional[str] = None, rules_repo_files: Optional[Iterable[str]] = None, ) -> Set[Rule]: """Build a set of regular expressions to be used during a regex scan. :param include_default: Whether to include the built-in set of regexes :param rules_files: A list of files to load rules from :param rule_patterns: A set of previously-collected rules :param rules_repo: A separate git repository to load rules from :param rules_repo_files: A set of patterns used to find files in the rules repo :returns: Set of `Rule` objects to be used for regex scans """ if include_default: with DEFAULT_PATTERN_FILE.open() as handle: rules = load_rules_from_file(handle) else: rules = set() if rule_patterns: try: for pattern in rule_patterns: rule = Rule( name=pattern["reason"], pattern=re.compile(pattern["pattern"]), path_pattern=re.compile(pattern.get("path-pattern", "")), re_match_type=MatchType.Search, re_match_scope=None, ) rules.add(rule) except KeyError as exc: raise ConfigException( f"Invalid rule-pattern; both reason and pattern are required fields. Rule: {pattern}" ) from exc if rules_files: warnings.warn( "Storing rules in a separate file has been deprecated and will be removed " "in a future release. You should be using the 'rule-patterns' config " " option instead.", DeprecationWarning, ) all_files: List[TextIO] = list(rules_files) else: all_files = [] try: cloned_repo = False repo_path = None if rules_repo: repo_path = pathlib.Path(rules_repo) try: if not repo_path.is_dir(): cloned_repo = True except OSError: # pragma: no cover # If a git URL is passed in, Windows will raise an OSError on `is_dir()` cloned_repo = True finally: if cloned_repo: repo_path, _ = util.clone_git_repo(rules_repo) if not rules_repo_files: rules_repo_files = ("*.json", ) for repo_file in rules_repo_files: all_files.extend( [path.open("r") for path in repo_path.glob(repo_file)]) if all_files: for rules_file in all_files: rules.update(load_rules_from_file(rules_file)) finally: if cloned_repo: shutil.rmtree(repo_path, onerror=util.del_rw) # type: ignore return rules