def test_return_correct_commit_hash(self): # Start at commit 202564cf776b402800a4aab8bb14fa4624888475, which # is immediately followed by a secret inserting commit: # https://github.com/dxa4481/truffleHog/commit/d15627104d07846ac2914a976e8e347a663bbd9b since_commit = '202564cf776b402800a4aab8bb14fa4624888475' commit_w_secret = 'd15627104d07846ac2914a976e8e347a663bbd9b' cross_valdiating_commit_w_secret_comment = 'Oh no a secret file' json_result = '' if sys.version_info >= (3,): tmp_stdout = io.StringIO() else: tmp_stdout = io.BytesIO() bak_stdout = sys.stdout # Redirect STDOUT, run scan and re-establish STDOUT sys.stdout = tmp_stdout try: truffleHog.find_strings("https://github.com/dxa4481/truffleHog.git", since_commit=since_commit, printJson=True, surpress_output=False) finally: sys.stdout = bak_stdout json_result_list = tmp_stdout.getvalue().split('\n') results = [json.loads(r) for r in json_result_list if bool(r.strip())] filtered_results = list(filter(lambda r: r['commitHash'] == commit_w_secret and r['branch'] == 'origin/master', results)) self.assertEqual(1, len(filtered_results)) self.assertEqual(commit_w_secret, filtered_results[0]['commitHash']) # Additionally, we cross-validate the commit comment matches the expected comment self.assertEqual(cross_valdiating_commit_w_secret_comment, filtered_results[0]['commit'].strip())
def test_return_correct_commit_hash(self): # Start at commit d15627104d07846ac2914a976e8e347a663bbd9b, which # is immediately followed by a secret inserting commit: # https://github.com/dxa4481/truffleHog/commit/9ed54617547cfca783e0f81f8dc5c927e3d1e345 since_commit = 'd15627104d07846ac2914a976e8e347a663bbd9b' # nosec commit_w_secret = '9ed54617547cfca783e0f81f8dc5c927e3d1e345' # nosec cross_validating_commit_w_secret_comment = 'OH no a secret' # nosec if sys.version_info >= (3, ): tmp_stdout = io.StringIO() else: tmp_stdout = io.BytesIO() bak_stdout = sys.stdout # Redirect STDOUT, run scan and re-establish STDOUT sys.stdout = tmp_stdout try: truffleHog.find_strings( "https://github.com/dxa4481/truffleHog.git", since_commit=since_commit, print_json=True, surpress_output=False) finally: sys.stdout = bak_stdout json_result_list = tmp_stdout.getvalue().split('\n') results = [json.loads(r) for r in json_result_list if bool(r.strip())] filtered_results = list( filter(lambda r: r['commitHash'] == commit_w_secret, results)) self.assertEqual(1, len(filtered_results)) self.assertEqual(commit_w_secret, filtered_results[0]['commitHash']) # Additionally, we cross-validate the commit comment matches the expected comment self.assertEqual(cross_validating_commit_w_secret_comment, filtered_results[0]['commit'].strip())
def truffle(git_url, do_regex, custom_regex): if do_regex: strings_found = truffleHog.find_strings(git_url=git_url, since_commit=None, max_depth=1000000, printJson=True, do_regex=True, do_entropy=True, surpress_output=True) if not do_regex: strings_found = truffleHog.find_strings(git_url=git_url, since_commit=None, max_depth=1000000, printJson=True, do_entropy=True, surpress_output=True) found_issues = strings_found['foundIssues'] found = {} count = 0 for issues in found_issues: with open(issues, 'r', encoding='utf-8') as issue: data = json.loads([line.rstrip() for line in issue][0], strict=False) found['issue%s' % count] = data count += 1 return found
def get_org_repos(orgname, page): response = requests.get(url='https://api.github.com/users/' + orgname + '/repos?page={}'.format(page)) json = response.json() if not json: return None for item in json: if item['private'] == False: print('searching ' + item["html_url"]) truffleHog.find_strings(item["html_url"], do_regex=True, do_entropy=False, max_depth=100000) get_org_repos(orgname, page + 1)
def test_remove_only_temp_repos(self): # First, we'll clone the remote repo git_url = "https://github.com/dxa4481/truffleHog.git" project_path, created = truffleHog.clone_git_repo(git_url) self.assertTrue(re.search(r'^/tmp/', project_path)) self.assertTrue(created) # Second, we'll use a local repo without cloning to find strigs truffleHog.find_strings('file://' + project_path) self.assertTrue(os.path.exists(project_path))
def getMatches(conf, path): #Run TruffleHog against github repo and store results in list matches = [] ## lets build our options we're going to pass to truffleHog ## This should probably go in it's own function. kwargs = dict() if 'regexRules' in conf: if conf['regexRules'] and conf['regexRules'] is not None: customRegexes = conf['regexRules'] for rule in customRegexes: customRegexes[rule] = re.compile(customRegexes[rule]) kwargs = buildkwargs(git_url=path, custom_regexes=customRegexes, **conf['flags']) else: kwargs = buildkwargs(git_url=path, **conf['flags']) if 'sincecommit' in conf: if conf['sincecommit'] and conf['sincecommit'] is not None: sinceCommit = conf['sincecommit'] kwargs = buildkwargs(since_commit=sinceCommit, **kwargs) printDebug( conf, "Calling TruffleHog with the following options\n {}".format(kwargs)) output = TH.find_strings(**kwargs) printDebug(conf, "found issues: {}".format(len(output['foundIssues']))) for i in range(len(output['foundIssues'])): issueFile = output['foundIssues'][i] issue = json.load(open(issueFile)) issue_json = issueJson(issue) if issue_json not in matches: matches.append(issue_json) printDebug(conf, "Found {} Matches".format(len(matches))) return matches
def get_org_repos(orgname, page): response = requests.get(url='https://api.github.com/users/' + orgname + '/repos?page={}'.format(page)) json = response.json() if not json: return None for item in json: if item['fork'] == False and reached: print('searching ' + item["html_url"]) results = truffleHog.find_strings(item["html_url"], do_regex=True, custom_regexes=rules, do_entropy=False, max_depth=100000) for issue in results["foundIssues"]: d = loads(open(issue).read()) d['github_url'] = "{}/blob/{}/{}".format( item["html_url"], d['commitHash'], d['path']) d['github_commit_url'] = "{}/commit/{}".format( item["html_url"], d['commitHash']) d['diff'] = d['diff'][0:200] d['printDiff'] = d['printDiff'][0:200] print(dumps(d, indent=4)) get_org_repos(orgname, page + 1)
def search_bitbucket(count, git_urls, username, secret, args, json_repos, total_rep): git_url = git_urls[count] try: git_slug = git_url.split('/')[-1][:-4] git_auth_url = git_url.replace('https://', 'https://' + username + ':' + secret + '@') do_entropy = truffleHog.str2bool(args.do_entropy) project_path = clone_git_repo(git_auth_url, git_url, json_repos, count, args.not_clone) logger.info('Rep #{0}. Starting to verify Bitbucket repo #{1} from {2} {3}'.format(count, count, total_rep, git_slug)) # Search sensitive data using regexChecks.regexes_txt in folder: truffleHog found_leaks = truffleHog.find_strings(project_path, git_url, json_repos, args.since_commit, args.max_depth, args.do_regex, do_entropy) fount_leaks_file = str(count) + "-code-" + git_slug + ".json" if found_leaks: write_json_to_file(fount_leaks_file, 'checks', found_leaks, False) # Search sensitive data using regexChecks.regexes_fs in folder: truffleHog found_fs_leaks = truffleHog.searchSensitiveFilesInRepo(project_path, git_url, json_repos) fs_file = str(count) + "-fs-" + git_slug + ".json" if found_fs_leaks: write_json_to_file(fs_file, 'checks', found_fs_leaks, False) logger.info('Rep #{0}. Successfully Verified Bitbucket repo #{1} from {2} {3}\n'.format(count, count, total_rep, git_slug)) print("Repo#", count, ". Slug. ", git_slug) count += 1 except GitCommandError as exception: logger.info('Rep #{0}. Exception in parsing repo {1}. Details are - {2}'.format(count, git_slug, str(exception)))
def run(self, params={}): git_url = params.get("git_url") do_entropy = params.get("do_entropy") do_regex = params.get("do_regex") since_commit = params.get("since_commit") max_depth = params.get("max_depth") custom_regex = params.get("custom_regexes") if custom_regex is None: custom_regex = {} else: for key in custom_regex: custom_regex[key] = re.compile(custom_regex[key]) try: scan = truffleHog.find_strings( git_url, printJson=True, do_entropy=do_entropy, do_regex=do_regex, since_commit=since_commit, custom_regexes=custom_regex, max_depth=max_depth, surpress_output=True, ) git_url = re.sub("\.git", "", git_url) found_issues = scan["foundIssues"] found = {} issues = [] count = 0 for issue in found_issues: with open(issue, "r") as issue: data = json.loads([line.rstrip() for line in issue][0], strict=False) commit_hash = data["commitHash"] # diff = data['diff'] # url = re.search("(?P<url>https?://[^\s]+)", diff).group("url") # url = re.sub('\.git', '', url) commit_url = git_url + "/commit/" + commit_hash data.update({"url": str(commit_url)}) # found.update({'issue%s' % count: data}) found["issue%s" % count] = data issues.append(data) count += 1 return {"issues": issues} except Exception: self.logger.error( "Please enter the correct variables for the input")
def run(self, params={}): git_url = params.get('git_url') do_entropy = params.get('do_entropy') do_regex = params.get('do_regex') since_commit = params.get('since_commit') max_depth = params.get('max_depth') custom_regex = params.get('custom_regexes') if custom_regex is None: custom_regex = {} else: for key in custom_regex: custom_regex[key] = re.compile(custom_regex[key]) try: scan = truffleHog.find_strings(git_url, printJson=True, do_entropy=do_entropy, do_regex=do_regex, since_commit=since_commit, custom_regexes=custom_regex, max_depth=max_depth, surpress_output=True) git_url = re.sub('\.git', '', git_url) found_issues = scan['foundIssues'] found = {} issues = [] count = 0 for issue in found_issues: with open(issue, 'r') as issue: data = json.loads([line.rstrip() for line in issue][0], strict=False) commit_hash = data['commitHash'] # diff = data['diff'] # url = re.search("(?P<url>https?://[^\s]+)", diff).group("url") # url = re.sub('\.git', '', url) commit_url = git_url + '/commit/' + commit_hash data.update({'url': str(commit_url)}) # found.update({'issue%s' % count: data}) found['issue%s' % count] = data issues.append(data) count += 1 return {'issues': issues} except Exception: self.logger.error( 'Please enter the correct variables for the input')
def checkRepositories(git_urls): for index, git_url in enumerate(git_urls): try: print(f'\n{110*"-"}\n\nChecking {git_url} ({index + 1}/{len(git_urls)})\n') repoStrings = truffleHog.find_strings(git_url, do_regex=True) issues = repoStrings['foundIssues'] keysFound = 0 for issue in issues: with open(issue) as json_data: json = load(json_data) if(json['reason'] == 'RSA private key'): keysFound += 1 # Appending key path to key_paths.txt with open(f'keys/key_paths.txt', 'a') as file: file.write(f'Commit link: {git_url}/commit/{json["commitHash"]}\nPath: {json["path"]}\n\n') print(f'\nRSA private key found in {git_url + ("/" if git_url[-1] != "/" else "") + json["path"]} on branch {json["branch"]}\nCommit hash: {json["commitHash"]}\nDate:{json["date"]}\n') rsaKey = json['diff'].split('-----BEGIN RSA PRIVATE KEY-----')[1].split('-----END RSA PRIVATE KEY-----')[0] rsaKey = cleanRSA(rsaKey.split('\n')[1:-1]) rsaKey = ['-----BEGIN RSA PRIVATE KEY-----'] + rsaKey + ['-----END RSA PRIVATE KEY-----'] # Printing the key to the console for line in rsaKey: print(line) # Saving the key to a file repositoryName = git_url.split('/')[-1] with open(f'keys/{repositoryName}_private_RSA_{keysFound}.txt', 'a') as file: file.write('\n'.join(rsaKey)) if not keysFound: print('\nNo private RSA keys found.\n') except: print(f'\n{110*"-"}\n\n\nCouldn\'t seatch {git_url}.\nMake sure the url is correct and that you have the required permissions.\n')
def test_branch(self, rmtree_mock, repo_const_mock, clone_git_repo): repo = MagicMock() repo_const_mock.return_value = repo truffleHog.find_strings("test_repo", branch="testbranch") repo.remotes.origin.fetch.assert_called_once_with("testbranch")
def execute_find_secrets_request(request: FindSecretsRequest) -> List[Secret]: """ Executes the search for secrets with the given request :param FindSecretsRequest request: request object containing the path to the git repository and other configurations for the search :return: list of secret objects that represent the secrets found by the search """ path = request.path repo_config = request.repo_config search_config = request.search_config if not repo_config: repo_config = RepoConfig() if not search_config: search_config = SearchConfig() token_key = repo_config.access_token_env_key token_exists = token_key and token_key in os.environ repo = None repo_path = path if is_git_dir(path + os.path.sep + ".git"): # If repo is local and env key for access token is present, display warning if token_exists: warnings.warn( "Warning: local repository path provided with an access token - " "Token will be ignored") else: # If repo is remote, append access token to path from its env key git_url = path if token_exists: git_url = _append_env_access_token_to_path(path, token_key) # We pre-clone the repo to fix a bug that causes truffleHog to crash # on Windows machines when run on remote repositories. try: repo_path = tempfile.mkdtemp() repo = Repo.clone_from(git_url, repo_path) except Exception as e: _delete_tempdir(repo_path) raise TrufflehogApiError(e) do_regex = search_config.regexes secrets = None try: output = truffleHog.find_strings( git_url=None, since_commit=repo_config.since_commit, max_depth=search_config.max_depth, do_regex=do_regex, do_entropy=search_config.entropy_checks_enabled, custom_regexes=search_config.regexes, branch=repo_config.branch, repo_path=repo_path, path_inclusions=search_config.include_search_paths, path_exclusions=search_config.exclude_search_paths) secrets = _convert_default_output_to_secrets(output) _clean_up(output) except Exception as e: raise TrufflehogApiError(e) # Delete our clone of the remote repo (if it exists) if repo is not None: repo.close( ) # truffleHog doesn't do this, which causes a bug on Windows _delete_tempdir(repo_path) return secrets
def test_unicode_expection(self): try: truffleHog.find_strings("https://github.com/dxa4481/tst.git") except UnicodeEncodeError: self.fail("Unicode print error")
def test_repo_path(self, rmtree_mock, repo_const_mock, clone_git_repo): truffleHog.find_strings("test_repo", repo_path="test/path/") rmtree_mock.assert_not_called() clone_git_repo.assert_not_called()
def test_find_strings(self): try: truffleHog.find_strings("file:///Users/xavier/Development/Projects/truffleHog", False) except UnicodeEncodeError: self.fail("Unicode print error")