def test_saves_to_baseline(): # We create an empty baseline, with customized settings. # This way, we expect the engine to use the settings configured by the baseline, # but have the results replaced by the new scan. with transient_settings({ 'plugins_used': [ { 'name': 'Base64HighEntropyString', 'limit': 4.5, }, ], }): secrets = SecretsCollection() old_secrets = baseline.format_for_output(secrets) with mock_printer( main_module) as printer, tempfile.NamedTemporaryFile() as f: baseline.save_to_file(old_secrets, f.name) f.seek(0) # We also test setting the root directory through this test. main_module.main(['scan', 'test_data', '--baseline', f.name]) f.seek(0) new_secrets = json.loads(f.read()) assert not secrets.exactly_equals( baseline.load(new_secrets, f.name)) assert new_secrets['plugins_used'] == [ { 'name': 'Base64HighEntropyString', 'limit': 4.5, }, ] assert not printer.message
def get_secrets_from_baseline(baseline, filter_func=lambda secret: True): """ :type baseline: SecretsCollection :param baseline: SecretsCollection of baseline results. This will be updated accordingly (by reference) :type filter_func: function :param filter_func: the function to filter on secret. If not supplied return all secrets :rtype: SecretsCollection :returns: SecretsCollection of non-audited results """ if not isinstance(filter_func, types.FunctionType): return baseline new_secrets = SecretsCollection() for filename in baseline.data: # The __hash__ method of PotentialSecret makes this work filtered_results = { secret: secret for secret in baseline.data[filename] if filter_func(secret) } if filtered_results: new_secrets.data[filename] = filtered_results return new_secrets
def secrets_collection_factory(secrets=None, plugins=(), exclude_regex=''): # pragma: no cover """ :type secrets: list(dict) :param secrets: list of params to pass to add_secret. Eg. [ {'secret': 'blah'}, ] :type plugins: tuple :type exclude_regex: str :rtype: SecretsCollection """ collection = SecretsCollection(plugins, exclude_regex) if plugins: collection.plugins = plugins # Handle secrets if secrets is None: return collection for kwargs in secrets: _add_secret(collection, **kwargs) return collection
def test_ensure_file_transformers_are_used(printer): """ In this tests, we construct a situation where detect-secrets scan leverages special file transformers in order to find a secret, that wouldn't otherwise be found with normal line-by-line reading. In doing so, if audit is able to find this secret, it can be inferred that it too knows how to use file transformers. """ with transient_settings({ 'plugins_used': [ { 'name': 'Base64HighEntropyString' }, ], }): secrets = SecretsCollection() secrets.scan_file('test_data/config.env') assert bool(secrets) with open('test_data/config.env') as f: lines = [line.rstrip() for line in f.readlines()] with mock.patch('detect_secrets.audit.io.print_secret_not_found') as m: run_logic(secrets, 'y') assert not m.called line_number = list(secrets['test_data/config.env'])[0].line_number assert lines[line_number - 1] in printer.message
def test_main_scan_repo_scan_success_secrets_found(self, mock_file, mock_scan, mock_log): mock_file.return_value = { 'sha': 'does_not_matter', 'repo': 'repo_name', 'plugins': { 'base64_limit': 3, }, 'cron': '* * * * *', 'baseline_file': '.secrets.baseline', } mock_secret_collection = SecretsCollection() mock_secret_collection.data['junk'] = 'data' mock_scan.return_value = mock_secret_collection with mock.patch('detect_secrets_server.usage.ExternalHook') as hook, \ mock.patch('detect_secrets_server.repos.base_tracked_repo.BaseTrackedRepo.update') as update, \ mock.patch('detect_secrets.core.secrets_collection.SecretsCollection.json') as secrets_json: assert main([ '--scan-repo', 'will-be-mocked', '--output-hook', 'examples/standalone_hook.py', ]) == 0 assert update.call_count == 0 assert hook().alert.call_count == 1 assert secrets_json.call_count == 1
def test_merge(): old_secrets = SecretsCollection() old_secrets.scan_file('test_data/each_secret.py') assert len(list(old_secrets)) >= 3 # otherwise, this test won't work. index = 0 for _, secret in old_secrets: if index == 0: secret.is_secret = False elif index == 1: secret.is_secret = True elif index == 2: secret.is_verified = True index += 1 new_secrets = SecretsCollection() new_secrets.scan_file('test_data/each_secret.py') list(new_secrets)[-1][1].is_secret = True new_secrets.merge(old_secrets) index = 0 for _, secret in new_secrets: if index == 0: assert secret.is_secret is False assert secret.is_verified is False elif index == 1: assert secret.is_secret is True assert secret.is_verified is False elif index == 2: assert secret.is_secret is True assert secret.is_verified is True index += 1
def initialize(plugins, exclude_regex=None, rootdir='.'): """Scans the entire codebase for high entropy strings, and returns a SecretsCollection object. :type plugins: tuple of detect_secrets.plugins.base.BasePlugin :param plugins: rules to initialize the SecretsCollection with. :type exclude_regex: str|None :type rootdir: str :rtype: SecretsCollection """ output = SecretsCollection(plugins, exclude_regex) if os.path.isfile(rootdir): # This option allows for much easier adhoc usage. git_files = [rootdir] else: git_files = _get_git_tracked_files(rootdir) if not git_files: return output if exclude_regex: regex = re.compile(exclude_regex, re.IGNORECASE) git_files = filter( lambda x: not regex.search(x), git_files, ) for file in git_files: output.scan_file(file) return output
def secrets_collection_factory(secrets=None, plugins=(), exclude_files_regex=None): """ :type secrets: list(dict) :param secrets: list of params to pass to add_secret. E.g. [ {'secret': 'blah'}, ] :type plugins: tuple :type exclude_files_regex: str|None :rtype: SecretsCollection """ collection = SecretsCollection( plugins, exclude_files=exclude_files_regex, ) if plugins: for plugin in plugins: # We don't want to incur network calls during test cases plugin.should_verify = False collection.plugins = plugins # Handle secrets if secrets is None: return collection for kwargs in secrets: _add_secret(collection, **kwargs) return collection
def test_baseline_filters_out_known_secrets(): secrets = SecretsCollection() secrets.scan_file('test_data/each_secret.py') with tempfile.NamedTemporaryFile() as f: baseline.save_to_file(secrets, f.name) f.seek(0) # This succeeds, because all the secrets are known. assert_commit_succeeds([ 'test_data/each_secret.py', '--baseline', f.name, ]) # Remove one arbitrary secret, so that it won't be the full set. secrets.data['test_data/each_secret.py'].pop() with tempfile.NamedTemporaryFile() as f: baseline.save_to_file(secrets, f.name) f.seek(0) # Test that it isn't the case that a baseline is provided, and everything passes. # import pdb; pdb.set_trace() assert_commit_blocked([ 'test_data/each_secret.py', '--baseline', f.name, ])
def modified_baseline(self): secrets = SecretsCollection() secrets.scan_file(self.FILENAME) for _, secret in secrets: secret.line_number += 1 yield secrets
def test_load_baseline_without_any_valid_fields(self, mock_log): with pytest.raises(IOError): SecretsCollection.load_baseline_from_string( json.dumps({ 'junk': 'dictionary', }), ) assert mock_log.error_messages == 'Incorrectly formatted baseline!\n'
def test_load_baseline_with_invalid_input(self, mock_log): with pytest.raises(IOError): SecretsCollection.load_baseline_from_string( json.dumps({ 'junk': 'dictionary', }), ) assert mock_log.getLogger().error.called
def test_no_modifications(base_state, scanned_results): secrets = SecretsCollection.load_from_baseline({'results': base_state}) results = SecretsCollection.load_from_baseline( {'results': scanned_results}) secrets.trim(results) assert secrets.json() == base_state
def test_load_baseline_without_exclude(self, mock_log): with pytest.raises(IOError): SecretsCollection.load_baseline_from_string( json.dumps({ 'plugins_used': (), 'results': {}, }), ) assert mock_log.error_messages == 'Incorrectly formatted baseline!\n'
def test_file_no_longer_exists(printer, mock_user_decision): secretsA = SecretsCollection() secretsA['fileB'].add(potential_secret_factory('a')) secretsB = SecretsCollection() secretsB['fileA'].add(potential_secret_factory('a')) run_logic(secretsA, secretsB) assert not mock_user_decision.called
def test_basic(file_content): with tempfile.NamedTemporaryFile() as f: f.write(file_content.encode()) f.seek(0) secrets = SecretsCollection() secrets.scan_file(f.name) assert len(list(secrets)) == 1
def test_load_baseline_with_invalid_input(self, mock_log): with pytest.raises(IOError): SecretsCollection.load_baseline_from_string( json.dumps({ 'junk': 'dictionary', }), ) assert mock_log.error_messages == 'Incorrectly formatted baseline!\n'
def test_bool(): secrets = SecretsCollection() assert not secrets secrets.scan_file('test_data/each_secret.py') assert secrets secrets['test_data/each_secret.py'].clear() assert not secrets
def initialize( path, plugins, exclude_files_regex=None, exclude_lines_regex=None, should_scan_all_files=False, ): """Scans the entire codebase for secrets, and returns a SecretsCollection object. :type plugins: tuple of detect_secrets.plugins.base.BasePlugin :param plugins: rules to initialize the SecretsCollection with. :type exclude_files_regex: str|None :type exclude_lines_regex: str|None :type path: list :type should_scan_all_files: bool :rtype: SecretsCollection """ output = SecretsCollection( plugins, exclude_files=exclude_files_regex, exclude_lines=exclude_lines_regex, ) files_to_scan = [] for element in path: if os.path.isdir(element): if should_scan_all_files: files_to_scan.extend(_get_files_recursively(element)) else: files = _get_git_tracked_files(element) if files: files_to_scan.extend(files) elif os.path.isfile(element): files_to_scan.append(element) else: log.error('detect-secrets: ' + element + ': No such file or directory') if not files_to_scan: return output if exclude_files_regex: exclude_files_regex = re.compile(exclude_files_regex, re.IGNORECASE) files_to_scan = filter( lambda file: ( not exclude_files_regex.search(file) ), files_to_scan, ) for file in files_to_scan: output.scan_file(file) return output
def test_deleted_secret_file(): secrets = SecretsCollection() secrets.scan_file('test_data/each_secret.py') secrets.trim(SecretsCollection()) assert secrets secrets.trim(SecretsCollection(), filelist=['test_data/each_secret.py']) assert not secrets
def test_strict_equality(): secret = potential_secret_factory() secretsA = SecretsCollection() secretsA[secret.filename].add(secret) secret = potential_secret_factory(line_number=2) secretsB = SecretsCollection() secretsB[secret.filename].add(secret) assert secretsA == secretsB assert not secretsA.exactly_equals(secretsB)
def find_secrets_in_files(args, plugins): collection = SecretsCollection(plugins) for filename in args.filenames: # Don't scan the baseline file if filename == args.baseline[0]: continue collection.scan_file(filename) return collection
def scan(self, exclude_files_regex=None, exclude_lines_regex=None, scan_head=False): """Fetches latest changes, and scans the git diff between last_commit_hash and HEAD. :raises: subprocess.CalledProcessError :type exclude_files_regex: str|None :param exclude_files_regex: A regex matching filenames to skip over. :type exclude_lines: str|None :param exclude_lines: A regex matching lines to skip over. :rtype: SecretsCollection :returns: secrets found. """ self.storage.fetch_new_changes() default_plugins = initialize_plugins.from_parser_builder( self.plugin_config, exclude_lines_regex=exclude_lines_regex, ) # TODO Issue 17: Ignoring self.exclude_regex, using the server scan CLI arg secrets = SecretsCollection( plugins=default_plugins, exclude_files=exclude_files_regex, exclude_lines=exclude_lines_regex, ) scan_from_this_commit = git.get_empty_tree_commit_hash() if scan_head else self.last_commit_hash try: diff_name_only = self.storage.get_diff_name_only(scan_from_this_commit) # do a per-file diff + scan so we don't get a OOM if the the commit-diff is too large for filename in diff_name_only: file_diff = self.storage.get_diff(scan_from_this_commit, filename) secrets.scan_diff( file_diff, baseline_filename=self.baseline_filename, last_commit_hash=scan_from_this_commit, repo_name=self.name, ) except subprocess.CalledProcessError: self.update() return secrets if self.baseline_filename: baseline = self.storage.get_baseline_file(self.baseline_filename) if baseline: baseline_collection = SecretsCollection.load_baseline_from_string(baseline) secrets = get_secrets_not_in_baseline(secrets, baseline_collection) return secrets
def find_secrets_in_files(args): plugins = initialize.from_parser_builder(args.plugins) collection = SecretsCollection(plugins) for filename in args.filenames: if filename == args.baseline[0]: # Obviously, don't detect the baseline file continue collection.scan_file(filename) return collection
def test_load_baseline_from_file_fails_early_on_bad_filename( self, mock_log): with mock.patch.object(SecretsCollection, 'load_baseline_from_string') as \ mock_load_baseline_from_string, \ mock_open('will_throw_error') as mock_file: mock_file().read.side_effect = MockUnicodeDecodeError with pytest.raises(UnicodeDecodeError): SecretsCollection.load_baseline_from_file('does_not_matter') assert not mock_load_baseline_from_string.called assert mock_log.getLogger().error.called
def main(argv: Optional[List[str]] = None) -> int: try: args = parse_args(argv) except ValueError: return 1 if args.verbose: # pragma: no cover log.set_debug_level(args.verbose) # Find all secrets in files to be committed secrets = SecretsCollection() for filename in args.filenames: secrets.scan_file(filename) new_secrets = secrets if args.baseline: new_secrets = secrets - args.baseline if new_secrets: pretty_print_diagnostics(new_secrets) return 1 if not args.baseline: return 0 # Only attempt baseline modifications if we don't find any new secrets. is_modified = should_update_baseline( args.baseline, scanned_results=secrets, filelist=args.filenames, baseline_version=args.baseline_version, ) if is_modified: if args.baseline_version != VERSION: with open(args.baseline_filename) as f: old_baseline = json.loads(f.read()) # Override the results, because this has been updated in `should_update_baseline`. old_baseline['results'] = args.baseline.json() args.baseline = baseline.upgrade(old_baseline) baseline.save_to_file(args.baseline, filename=args.baseline_filename) print( 'The baseline file was updated.\n' 'Probably to keep line numbers of secrets up-to-date.\n' 'Please `git add {}`, thank you.\n\n'.format( args.baseline_filename), ) return 3 return 0
def get_baseline_file(self, formatter=baseline.format_for_output): secrets = SecretsCollection() secrets.scan_file(self.FILENAME) with tempfile.NamedTemporaryFile() as f: with mock.patch('detect_secrets.core.baseline.VERSION', '0.0.1'): data = formatter(secrets) # Simulating old version data['plugins_used'][0]['base64_limit'] = data['plugins_used'][0].pop('limit') baseline.save_to_file(data, f.name) yield f
def test_maintains_labels(): labelled_secrets = SecretsCollection() labelled_secrets.scan_file('test_data/each_secret.py') for _, secret in labelled_secrets: secret.is_secret = True break secrets = SecretsCollection() secrets.scan_file('test_data/each_secret.py') labelled_secrets.trim(scanned_results=secrets) assert any([secret.is_secret for _, secret in labelled_secrets])
def test_basic(configure_plugins): with transient_settings({**configure_plugins, 'filters_used': []}): secrets = SecretsCollection() secrets.scan_file('test_data/each_secret.py') # This baseline will have less secrets, since it filtered out some. with transient_settings({ **configure_plugins, 'filters_used': [ { 'path': 'detect_secrets.filters.regex.should_exclude_line', 'pattern': [ 'EXAMPLE', ], }, ], }): baseline = SecretsCollection() baseline.scan_file('test_data/each_secret.py') # This tests the != operator for same file, different number of secrets. # It's hidden in a different test, but I didn't want to set up the boilerplate # again. assert secrets != baseline result = secrets - baseline assert len(result['test_data/each_secret.py']) == 2 assert len(secrets['test_data/each_secret.py']) == 4
def test_file_based_success_yaml(): get_settings().configure_plugins([ { 'name': 'HexHighEntropyString', 'limit': 3.0, }, ]) secrets = SecretsCollection() secrets.scan_file('test_data/config.yaml') assert [str(secret).splitlines()[1] for _, secret in secrets] == [ 'Location: test_data/config.yaml:3', 'Location: test_data/config.yaml:5', ]
def test_disable_filter(parser): with tempfile.NamedTemporaryFile() as f: f.write(f'secret = "{uuid.uuid4()}"'.encode()) # First, make sure that we actually catch it. f.seek(0) with transient_settings({ 'plugins_used': [{ 'name': 'KeywordDetector', }], }): secrets = SecretsCollection() secrets.scan_file(f.name) assert not secrets f.seek(0) with default_settings(): parser.parse_args([ 'scan', '--disable-filter', 'detect_secrets.filters.heuristic.is_potential_uuid', # invalid filter '--disable-filter', 'blah', ]) secrets = SecretsCollection() secrets.scan_file(f.name) assert secrets
def get_baseline(baseline_filename): """ :raises: IOError :raises: ValueError """ if not baseline_filename: return raise_exception_if_baseline_file_is_not_up_to_date(baseline_filename) baseline_string = _get_baseline_string_from_file(baseline_filename) baseline_version = json.loads(baseline_string).get('version') try: raise_exception_if_baseline_version_is_outdated( baseline_version, ) except ValueError: log.error( 'The supplied baseline may be incompatible with the current\n' 'version of detect-secrets. Please recreate your baseline to\n' 'avoid potential mis-configurations.\n\n' 'Current Version: %s\n' 'Baseline Version: %s', VERSION, baseline_version if baseline_version else '0.0.0', ) raise return SecretsCollection.load_baseline_from_string(baseline_string)
def test_load_baseline_from_string(self, mock_gmtime): """ We use load_baseline_from_string as a proxy to testing _load_baseline_from_dict, because it's the most entry into the private function. """ original = self.get_baseline_dict(mock_gmtime) secrets = SecretsCollection.load_baseline_from_string( json.dumps(original), ).format_for_baseline_output() self.assert_loaded_collection_is_original_collection(original, secrets)
def get_secrets_not_in_baseline(results, baseline): """ :type results: SecretsCollection :param results: SecretsCollection of current results :type baseline: SecretsCollection :param baseline: SecretsCollection of baseline results. This will be updated accordingly (by reference) :rtype: SecretsCollection :returns: SecretsCollection of new results (filtering out baseline) """ regex = None if baseline.exclude_regex: regex = re.compile(baseline.exclude_regex, re.IGNORECASE) new_secrets = SecretsCollection() for filename in results.data: if regex and regex.search(filename): continue if filename not in baseline.data: # We don't have a previous record of this file, so obviously # everything is new. new_secrets.data[filename] = results.data[filename] continue # The __hash__ method of PotentialSecret makes this work filtered_results = { secret: secret for secret in results.data[filename] if secret not in baseline.data[filename] } if filtered_results: new_secrets.data[filename] = filtered_results return new_secrets