Ejemplo n.º 1
0
 def test_tartufo_clones_git_repo_into_temp_dir(
     self, mock_mkdtemp: mock.MagicMock, mock_clone: mock.MagicMock
 ):
     mock_mkdtemp.return_value = "/foo"
     util.clone_git_repo("https://github.com/godaddy/tartufo.git")
     mock_clone.assert_called_once_with(
         "https://github.com/godaddy/tartufo.git", "/foo"
     )
Ejemplo n.º 2
0
 def test_clone_git_repo_clones_into_target_dir(self,
                                                mock_temp: mock.MagicMock,
                                                mock_clone: mock.MagicMock):
     util.clone_git_repo("https://github.com/godaddy/tartufo.git",
                         Path("/foo/tartufo.git"))
     mock_temp.assert_not_called()
     mock_clone.assert_called_once_with(
         "https://github.com/godaddy/tartufo.git", "/foo/tartufo.git")
Ejemplo n.º 3
0
 def test_clone_git_repo_raises_explicit_exception_on_clone_fail(
         self, mock_clone: mock.MagicMock):
     mock_clone.side_effect = git.GitCommandError(
         command="git clone foo.git",
         status=42,
         stderr="Error cloning repo!")
     with self.assertRaisesRegex(types.GitRemoteException,
                                 "stderr: 'Error cloning repo!'"):
         util.clone_git_repo("https://github.com/godaddy/tartufo.git")
Ejemplo n.º 4
0
def main(
    ctx: click.Context,
    options: types.GlobalOptions,
    git_url: str,
    since_commit: Optional[str],
    max_depth: int,
    branch: Optional[str],
    work_dir: Optional[str],
) -> Tuple[str, Optional[GitRepoScanner]]:
    """Automatically clone and scan a remote git repository."""
    git_options = types.GitOptions(
        since_commit=since_commit, max_depth=max_depth, branch=branch, fetch=False
    )
    repo_path: Optional[Path] = None
    if work_dir:
        # Make sure we clone into a sub-directory of the working directory
        #   so that we don't inadvertently delete the working directory
        repo_name = urlparse(git_url).path.split("/")[-1]
        repo_path = Path(work_dir) / repo_name
        repo_path.mkdir(parents=True)
    scanner = None
    try:
        repo_path = util.clone_git_repo(git_url, repo_path)
        scanner = GitRepoScanner(options, git_options, str(repo_path))
        scanner.scan()
    except types.GitException as exc:
        util.fail(f"Error cloning remote repo: {exc}", ctx)
    except types.TartufoException as exc:
        util.fail(str(exc), ctx)
    finally:
        if repo_path and repo_path.exists():
            rmtree(str(repo_path), onerror=util.del_rw)
    return (git_url, scanner)
Ejemplo n.º 5
0
    def test_return_correct_commit_hash(self):
        """FIXME: Split this test out into multiple smaller tests w/o real clone
        FIXME: Also, this test will continue to grow slower the more times we commit

        Necessary:
            * Make sure all commits are checked (done)
            * Make sure all branches are checked
            * Make sure `diff_worker` flags bad diffs
            * Make sure all bad diffs are returned
        """
        # Start at commit d15627104d07846ac2914a976e8e347a663bbd9b, which
        # is immediately followed by a secret inserting commit:
        # https://github.com/dxa4481/truffleHog/commit/9ed54617547cfca783e0f81f8dc5c927e3d1e345
        since_commit = "d15627104d07846ac2914a976e8e347a663bbd9b"
        commit_w_secret = "9ed54617547cfca783e0f81f8dc5c927e3d1e345"
        xcheck_commit_w_scrt_comment = "OH no a secret"
        # We have to clone tartufo mostly because TravisCI only does a shallow clone
        repo_path = util.clone_git_repo(
            "https://github.com/godaddy/tartufo.git")
        try:
            issues = scanner.find_strings(
                repo_path,
                since_commit=since_commit,
            )
            filtered_results = [
                result for result in issues
                if result.commit_hash == commit_w_secret
            ]
            self.assertEqual(1, len(filtered_results))
            self.assertEqual(commit_w_secret, filtered_results[0].commit_hash)
            # Additionally, we cross-validate the commit comment matches the expected comment
            self.assertEqual(xcheck_commit_w_scrt_comment,
                             filtered_results[0].commit_message.strip())
        finally:
            shutil.rmtree(repo_path)
Ejemplo n.º 6
0
def configure_regexes(
    include_default: bool = True,
    rules_files: Optional[Iterable[TextIO]] = None,
    rules_repo: Optional[str] = None,
    rules_repo_files: Optional[Iterable[str]] = None,
) -> Dict[str, Rule]:
    """Build a set of regular expressions to be used during a regex scan.

    :param include_default: Whether to include the built-in set of regexes
    :param rules_files: A list of files to load rules from
    :param rules_repo: A separate git repository to load rules from
    :param rules_repo_files: A set of patterns used to find files in the rules repo
    """
    if include_default:
        rules = copy.copy(DEFAULT_REGEXES)
    else:
        rules = {}

    if rules_files:
        all_files: List[TextIO] = list(rules_files)
    else:
        all_files = []
    try:
        cloned_repo = False
        repo_path = None
        if rules_repo:
            repo_path = pathlib.Path(rules_repo)
            try:
                if not repo_path.is_dir():
                    cloned_repo = True
            except OSError:
                # If a git URL is passed in, Windows will raise an OSError on `is_dir()`
                cloned_repo = True
            finally:
                if cloned_repo:
                    repo_path = pathlib.Path(util.clone_git_repo(rules_repo))
            if not rules_repo_files:
                rules_repo_files = ("*.json", )
            for repo_file in rules_repo_files:
                all_files.extend(
                    [path.open("r") for path in repo_path.glob(repo_file)])
        if all_files:
            for rules_file in all_files:
                loaded = load_rules_from_file(rules_file)
                dupes = set(loaded.keys()).intersection(rules.keys())
                if dupes:
                    raise ValueError(
                        "Rule(s) were defined multiple times: {}".format(
                            dupes))
                rules.update(loaded)
    finally:
        if cloned_repo:
            shutil.rmtree(repo_path, onerror=util.del_rw)  # type: ignore

    return rules
Ejemplo n.º 7
0
 def test_clone_git_repo_returns_path_to_clone(self,
                                               mock_mkdtemp: mock.MagicMock,
                                               mock_clone: mock.MagicMock):
     mock_remote = mock.MagicMock()
     mock_remote.name = "origin"
     mock_repo = mock.MagicMock()
     mock_repo.remotes = [mock_remote]
     mock_clone.return_value = mock_repo
     mock_mkdtemp.return_value = "/foo"
     repo_path, repo_origin = util.clone_git_repo(
         "https://github.com/godaddy/tartufo.git")
     self.assertEqual(repo_path, Path("/foo"))
     self.assertEqual(repo_origin, "origin")
Ejemplo n.º 8
0
    def test_return_correct_commit_hash(self):
        """FIXME: Split this test out into multiple smaller tests w/o real clone
        FIXME: Also, this test will continue to grow slower the more times we commit

        Necessary:
            * Make sure all commits are checked (done)
            * Make sure all branches are checked
            * Make sure `diff_worker` flags bad diffs
            * Make sure all bad diffs are returned
        """
        # Start at commit d15627104d07846ac2914a976e8e347a663bbd9b, which
        # is immediately followed by a secret inserting commit:
        # https://github.com/dxa4481/truffleHog/commit/9ed54617547cfca783e0f81f8dc5c927e3d1e345
        since_commit = "d15627104d07846ac2914a976e8e347a663bbd9b"
        commit_w_secret = "9ed54617547cfca783e0f81f8dc5c927e3d1e345"
        xcheck_commit_w_scrt_comment = "OH no a secret"

        tmp_stdout = six.StringIO()
        bak_stdout = sys.stdout

        # Redirect STDOUT, run scan and re-establish STDOUT
        sys.stdout = tmp_stdout
        try:
            # We have to clone tartufo mostly because TravisCI only does a shallow clone
            repo_path = util.clone_git_repo("https://github.com/godaddy/tartufo.git")
            try:
                scanner.find_strings(
                    str(repo_path),
                    since_commit=since_commit,
                    print_json=True,
                    suppress_output=False,
                )
            finally:
                shutil.rmtree(repo_path)
        finally:
            sys.stdout = bak_stdout

        json_result_list = tmp_stdout.getvalue().split("\n")
        results = [json.loads(r) for r in json_result_list if bool(r.strip())]
        filtered_results = [
            result for result in results if result["commit_hash"] == commit_w_secret
        ]
        self.assertEqual(1, len(filtered_results))
        self.assertEqual(commit_w_secret, filtered_results[0]["commit_hash"])
        # Additionally, we cross-validate the commit comment matches the expected comment
        self.assertEqual(
            xcheck_commit_w_scrt_comment, filtered_results[0]["commit_message"].strip()
        )
Ejemplo n.º 9
0
def configure_regexes(
    include_default: bool = True,
    rules_files: Optional[Iterable[TextIO]] = None,
    rules_repo: Optional[str] = None,
    rules_repo_files: Optional[Iterable[str]] = None,
) -> Dict[str, Pattern]:
    if include_default:
        rules = copy.copy(DEFAULT_REGEXES)
    else:
        rules = {}

    if rules_files:
        all_files = list(rules_files)  # type: List[IO[Any]]
    else:
        all_files = []
    try:
        cloned_repo = False
        repo_path = None
        if rules_repo:
            repo_path = pathlib.Path(rules_repo)
            if not repo_path.is_dir():
                repo_path = pathlib.Path(util.clone_git_repo(rules_repo))
            if not rules_repo_files:
                rules_repo_files = ("*.json", )
            for repo_file in rules_repo_files:
                all_files.extend(
                    [path.open("r") for path in repo_path.glob(repo_file)])
        if rules_files:
            for rules_file in rules_files:
                loaded = load_rules_from_file(rules_file)
                dupes = set(loaded.keys()).intersection(rules.keys())
                if dupes:
                    raise ValueError(
                        "Rule(s) were defined multiple times: {}".format(
                            dupes))
                rules.update(loaded)
    finally:
        if cloned_repo:
            shutil.rmtree(repo_path)  # type: ignore

    return rules
Ejemplo n.º 10
0
def main(
    ctx: click.Context,
    options: types.GlobalOptions,
    git_url: str,
    since_commit: Optional[str],
    max_depth: int,
    branch: Optional[str],
    work_dir: Optional[str],
    include_submodules: bool,
) -> GitRepoScanner:
    """Automatically clone and scan a remote git repository."""
    git_options = types.GitOptions(
        since_commit=since_commit,
        max_depth=max_depth,
        branch=None,
        include_submodules=include_submodules,
    )
    repo_path: Optional[Path] = None
    if work_dir:
        # Make sure we clone into a sub-directory of the working directory
        #   so that we don't inadvertently delete the working directory
        repo_name = urlparse(git_url).path.split("/")[-1]
        repo_path = Path(work_dir) / repo_name
        repo_path.mkdir(parents=True)
    scanner = None
    try:
        repo_path, origin = util.clone_git_repo(git_url, repo_path)
        if branch:
            git_options.branch = f"{origin}/{branch}"
        scanner = GitRepoScanner(options, git_options, str(repo_path))
        util.process_issues(git_url, scanner, options)
    except types.GitException as exc:
        util.fail(f"Error cloning remote repo: {exc}", ctx)
    except types.TartufoException as exc:
        util.fail(str(exc), ctx)
    finally:
        if repo_path and repo_path.exists():
            rmtree(str(repo_path), onerror=util.del_rw)
    return scanner  # type: ignore
Ejemplo n.º 11
0
 def test_clone_git_repo_returns_path_to_clone(self, mock_mkdtemp):
     repo_path = util.clone_git_repo("https://github.com/godaddy/tartufo.git")
     self.assertEqual(repo_path, mock_mkdtemp.return_value)
Ejemplo n.º 12
0
 def test_tartufo_clones_git_repo_into_temp_dir(self, mock_mkdtemp, mock_clone):
     util.clone_git_repo("https://github.com/godaddy/tartufo.git")
     mock_clone.assert_called_once_with(
         "https://github.com/godaddy/tartufo.git", mock_mkdtemp.return_value
     )
Ejemplo n.º 13
0
 def test_clone_git_repo_returns_path_to_clone(
         self, mock_mkdtemp: mock.MagicMock):
     mock_mkdtemp.return_value = "/foo"
     repo_path = util.clone_git_repo(
         "https://github.com/godaddy/tartufo.git")
     self.assertEqual(repo_path, Path("/foo"))
Ejemplo n.º 14
0
def main(ctx: click.Context, **kwargs: config.OptionTypes) -> None:
    """Find secrets hidden in the depths of git.

    Tartufo will, by default, scan the entire history of a git repository
    for any text which looks like a secret, password, credential, etc. It can
    also be made to work in pre-commit mode, for scanning blobs of text as a
    pre-commit hook.
    """
    if not any((kwargs["entropy"], kwargs["regex"])):
        err("No analysis requested.")
        ctx.exit(1)
    if not any((kwargs["pre_commit"], kwargs["repo_path"], kwargs["git_url"])):
        err("You must specify one of --pre-commit, --repo-path, or git_url.")
        ctx.exit(1)
    if kwargs["regex"]:
        try:
            rules_regexes = config.configure_regexes(
                cast(bool, kwargs["default_regexes"]),
                cast(Tuple[TextIO, ...], kwargs["rules"]),
                cast(Optional[str], kwargs["git_rules_repo"]),
                cast(Tuple[str, ...], kwargs["git_rules_files"]),
            )
        except ValueError as exc:
            err(str(exc))
            ctx.exit(1)
        if not rules_regexes:
            err("Regex checks requested, but no regexes found.")
            ctx.exit(1)
    else:
        rules_regexes = {}

    # read & compile path inclusion/exclusion patterns
    path_inclusions = []  # type: List[Pattern]
    path_exclusions = []  # type: List[Pattern]
    paths_file = cast(TextIO, kwargs["include_paths"])
    if paths_file:
        path_inclusions = config.compile_path_rules(paths_file.readlines())
    paths_file = cast(TextIO, kwargs["exclude_paths"])
    if paths_file:
        path_exclusions = config.compile_path_rules(paths_file.readlines())

    if kwargs["pre_commit"]:
        output = scanner.find_staged(
            cast(str, kwargs["repo_path"]),
            cast(bool, kwargs["json"]),
            cast(bool, kwargs["regex"]),
            cast(bool, kwargs["entropy"]),
            custom_regexes=rules_regexes,
            suppress_output=False,
            path_inclusions=path_inclusions,
            path_exclusions=path_exclusions,
        )
    else:
        remove_repo = False
        if kwargs["git_url"]:
            repo_path = util.clone_git_repo(cast(str, kwargs["git_url"]))
            remove_repo = True
        else:
            repo_path = cast(str, kwargs["repo_path"])

        output = scanner.scan_repo(repo_path, rules_regexes, path_inclusions,
                                   path_exclusions, kwargs)

        if remove_repo:
            shutil.rmtree(repo_path, onerror=util.del_rw)

    if kwargs["cleanup"]:
        util.clean_outputs(output)
    else:
        issues_path = output.get("issues_path", None)
        if issues_path:
            print("Results have been saved in {}".format(issues_path))

    if output.get("found_issues", False):
        ctx.exit(1)
    ctx.exit(0)
Ejemplo n.º 15
0
def configure_regexes(
    include_default: bool = True,
    rules_files: Optional[Iterable[TextIO]] = None,
    rule_patterns: Optional[Iterable[Dict[str, str]]] = None,
    rules_repo: Optional[str] = None,
    rules_repo_files: Optional[Iterable[str]] = None,
) -> Set[Rule]:
    """Build a set of regular expressions to be used during a regex scan.

    :param include_default: Whether to include the built-in set of regexes
    :param rules_files: A list of files to load rules from
    :param rule_patterns: A set of previously-collected rules
    :param rules_repo: A separate git repository to load rules from
    :param rules_repo_files: A set of patterns used to find files in the rules repo
    :returns: Set of `Rule` objects to be used for regex scans
    """

    if include_default:
        with DEFAULT_PATTERN_FILE.open() as handle:
            rules = load_rules_from_file(handle)
    else:
        rules = set()

    if rule_patterns:
        try:
            for pattern in rule_patterns:
                rule = Rule(
                    name=pattern["reason"],
                    pattern=re.compile(pattern["pattern"]),
                    path_pattern=re.compile(pattern.get("path-pattern", "")),
                    re_match_type=MatchType.Search,
                    re_match_scope=None,
                )
                rules.add(rule)
        except KeyError as exc:
            raise ConfigException(
                f"Invalid rule-pattern; both reason and pattern are required fields. Rule: {pattern}"
            ) from exc

    if rules_files:
        warnings.warn(
            "Storing rules in a separate file has been deprecated and will be removed "
            "in a future release. You should be using the 'rule-patterns' config "
            " option instead.",
            DeprecationWarning,
        )
        all_files: List[TextIO] = list(rules_files)
    else:
        all_files = []
    try:
        cloned_repo = False
        repo_path = None
        if rules_repo:
            repo_path = pathlib.Path(rules_repo)
            try:
                if not repo_path.is_dir():
                    cloned_repo = True
            except OSError:  # pragma: no cover
                # If a git URL is passed in, Windows will raise an OSError on `is_dir()`
                cloned_repo = True
            finally:
                if cloned_repo:
                    repo_path, _ = util.clone_git_repo(rules_repo)
            if not rules_repo_files:
                rules_repo_files = ("*.json", )
            for repo_file in rules_repo_files:
                all_files.extend(
                    [path.open("r") for path in repo_path.glob(repo_file)])
        if all_files:
            for rules_file in all_files:
                rules.update(load_rules_from_file(rules_file))
    finally:
        if cloned_repo:
            shutil.rmtree(repo_path, onerror=util.del_rw)  # type: ignore

    return rules