def _load_baseline_from_dict(cls, data): """Initializes a SecretsCollection object from dictionary. :type data: dict :param data: properly formatted dictionary to load SecretsCollection from. :rtype: SecretsCollection :raises: IOError """ result = SecretsCollection() if 'results' not in data or 'exclude_regex' not in data: raise IOError for filename in data['results']: result.data[filename] = {} for item in data['results'][filename]: secret = PotentialSecret( item['type'], filename, item['line_number'], 'will be replaced', ) secret.secret_hash = item['hashed_secret'] result.data[filename][secret] = secret result.exclude_regex = data['exclude_regex'] return result
def test_scan_with_baseline(self, mock_subprocess_obj, mock_load_from_string, mock_apply): repo = mock_tracked_repo() # Setup secrets secretA = PotentialSecret('type', 'filenameA', 1, 'blah') secretB = PotentialSecret('type', 'filenameA', 2, 'curry') original_secrets = SecretsCollection() original_secrets.data['filenameA'] = { secretA: secretA, secretB: secretB, } baseline_secrets = SecretsCollection() baseline_secrets.data['filenameA'] = { secretA: secretA, } # Easier than mocking load_from_diff. mock_apply.side_effect = lambda orig, base: \ get_secrets_not_in_baseline(original_secrets, baseline_secrets) mock_subprocess_obj.side_effect = mock_subprocess((SubprocessMock( expected_input='git show', mocked_output=b'will be mocked', ), )) secrets = repo.scan() assert len(secrets.data) == 1 assert secrets.data['filenameA'][secretB] == secretB
def get_secret(self, filename, secret, type_=None): """Checks to see whether a secret is found in the collection. :type filename: str :param filename: the file to search in. :type secret: str :param secret: secret hash of secret to search for. :type type_: str :param type_: type of secret, if known. :rtype: PotentialSecret|None """ if filename not in self.data: return None if type_: # Optimized lookup, because we know the type of secret # (and therefore, its hash) tmp_secret = PotentialSecret(type_, filename, 0, 'will be overriden') tmp_secret.secret_hash = secret if tmp_secret in self.data[filename]: return self.data[filename][tmp_secret] return None # NOTE: We can only optimize this, if we knew the type of secret. # Otherwise, we need to iterate through the set and find out. for obj in self.data[filename]: if obj.secret_hash == secret: return obj return None
def get_secret(self, filename, secret, type_=None): """Checks to see whether a secret is found in the collection. :type filename: str :param filename: the file to search in. :type secret: str :param secret: secret hash of secret to search for. :type type_: str :param type_: type of secret, if known. :rtype: PotentialSecret|None """ if filename not in self.data: return None if type_: # Optimized lookup, because we know the type of secret # (and therefore, its hash) tmp_secret = PotentialSecret(type_, filename, secret='will be overriden') tmp_secret.secret_hash = secret if tmp_secret in self.data[filename]: return self.data[filename][tmp_secret] return None # NOTE: We can only optimize this, if we knew the type of secret. # Otherwise, we need to iterate through the set and find out. for obj in self.data[filename]: if obj.secret_hash == secret: return obj return None
def load_baseline_from_dict(cls, data): """Initializes a SecretsCollection object from dictionary. :type data: dict :param data: properly formatted dictionary to load SecretsCollection from. :rtype: SecretsCollection :raises: IOError """ result = SecretsCollection() if not all(key in data for key in ( 'plugins_used', 'results', )): raise IOError # In v0.12.0 `exclude_regex` got replaced by `exclude` if not any(key in data for key in ( 'exclude', 'exclude_regex', )): raise IOError if 'exclude_regex' in data: result.exclude_files = data['exclude_regex'] else: result.exclude_files = data['exclude']['files'] result.exclude_lines = data['exclude']['lines'] plugins = [] for plugin in data['plugins_used']: plugin_classname = plugin.pop('name') plugins.append( initialize.from_plugin_classname( plugin_classname, exclude_lines_regex=result.exclude_lines, should_verify_secrets=False, **plugin), ) result.plugins = tuple(plugins) for filename in data['results']: result.data[filename] = {} for item in data['results'][filename]: secret = PotentialSecret( item['type'], filename, secret='will be replaced', lineno=item['line_number'], is_secret=item.get('is_secret'), ) secret.secret_hash = item['hashed_secret'] result.data[filename][secret] = secret result.version = (data['version'] if 'version' in data else '0.0.0') return result
def test_verify_invalid(self, mock_box, mock_jwt): mock_box.side_effect = Exception('oops') potential_secret = PotentialSecret('test box', 'test filename', BOX_CLIENT_SECRET) assert BoxDetector().verify( BOX_CLIENT_SECRET, '''"clientID": "{}", "clientSecret": "{}", "publicKeyID": "{}", "privateKey": "{}", "passphrase": "{}", "enterpriseID": "{}" '''.format( BOX_CLIENT_ID, BOX_CLIENT_SECRET, BOX_PUBLIC_KEY_ID, BOX_PRIVATE_KEY, BOX_PASSPHRASE, BOX_ENTERPRISE_ID, ), potential_secret, ) == VerifiedResult.VERIFIED_FALSE mock_box.assert_called()
def test_verify_keep_trying_until_found_something(self): data = {'count': 0} def counter(*args, **kwargs): output = data['count'] data['count'] += 1 return bool(output) with mock.patch( 'detect_secrets.plugins.aws.verify_aws_secret_access_key', counter, ): potential_secret = PotentialSecret('test aws', 'test filename', self.example_key) assert AWSKeyDetector().verify( self.example_key, textwrap.dedent(""" false_secret = {} real_secret = {} """)[1:-1].format( 'TEST' * 10, EXAMPLE_SECRET, ), potential_secret, ) == VerifiedResult.VERIFIED_TRUE assert potential_secret.other_factors[ 'secret_access_key'] == EXAMPLE_SECRET
def _create_baseline(): base64_secret = 'c3VwZXIgbG9uZyBzdHJpbmcgc2hvdWxkIGNhdXNlIGVub3VnaCBlbnRyb3B5' baseline = { 'generated_at': 'does_not_matter', 'exclude_regex': '', 'plugins_used': [ { 'name': 'HexHighEntropyString', 'hex_limit': 3, }, { 'name': 'PrivateKeyDetector', }, ], 'results': { 'test_data/files/file_with_secrets.py': [ { 'type': 'Base64 High Entropy String', 'line_number': 3, 'hashed_secret': PotentialSecret.hash_secret(base64_secret), }, ], }, 'version': VERSION, } return json.dumps( baseline, indent=2, sort_keys=True, )
def test_verify_no_other_factors(self): potential_secret = PotentialSecret('test db2', 'test filename', DB2_PASSWORD) assert Db2Detector().verify( DB2_PASSWORD, 'password={}'.format(DB2_PASSWORD), potential_secret, ) == VerifiedResult.UNVERIFIED
def _create_baseline(): base64_secret = 'c3VwZXIgbG9uZyBzdHJpbmcgc2hvdWxkIGNhdXNlIGVub3VnaCBlbnRyb3B5' baseline = { 'generated_at': 'does_not_matter', 'exclude_regex': '', 'plugins_used': [ { 'name': 'HexHighEntropyString', 'hex_limit': 3, }, { 'name': 'PrivateKeyDetector', }, ], 'results': { 'test_data/files/file_with_secrets.py': [ { 'type': 'Base64 High Entropy String', 'line_number': 3, 'hashed_secret': PotentialSecret.hash_secret(base64_secret), }, ], }, 'version': VERSION, } return json.dumps( baseline, indent=2, sort_keys=True, )
def potential_secret_factory(type_='type', filename='filename', secret='secret', lineno=1): """This is only marginally better than creating PotentialSecret objects directly, because of the default values. """ return PotentialSecret(type_, filename, secret, lineno)
def _load_baseline_from_dict(cls, data): """Initializes a SecretsCollection object from dictionary. :type data: dict :param data: properly formatted dictionary to load SecretsCollection from. :rtype: SecretsCollection :raises: IOError """ result = SecretsCollection() if not all(key in data for key in ( 'exclude_regex', 'plugins_used', 'results', 'version', )): raise IOError result.exclude_regex = data['exclude_regex'] plugins = [] for plugin in data['plugins_used']: plugin_classname = plugin.pop('name') plugins.append(initialize.from_plugin_classname( plugin_classname, **plugin )) result.plugins = tuple(plugins) for filename in data['results']: result.data[filename] = {} for item in data['results'][filename]: secret = PotentialSecret( item['type'], filename, item['line_number'], 'will be replaced', ) secret.secret_hash = item['hashed_secret'] result.data[filename][secret] = secret result.version = data['version'] return result
def test_rolled_creds(self): """Same line, different secret""" new_findings = secrets_collection_factory([{ 'secret': 'secret_new', }]) baseline = secrets_collection_factory([{ 'secret': 'secret', }]) backup_baseline = baseline.data.copy() results = get_secrets_not_in_baseline(new_findings, baseline) assert len(results.data['filename']) == 1 secretA = PotentialSecret('type', 'filename', 1, 'secret_new') assert results.data['filename'][secretA].secret_hash == \ PotentialSecret.hash_secret('secret_new') assert baseline.data == backup_baseline
def analyze(self, f, filename, output_raw=False, output_verified_false=False): # We're not testing the plugin's ability to analyze secrets, so # it doesn't matter what we return secret = PotentialSecret('mock fixed value type', filename, 'asdf', 1) return {secret: secret}
def _load_baseline_from_dict(cls, data): """Initializes a SecretsCollection object from dictionary. :type data: dict :param data: properly formatted dictionary to load SecretsCollection from. :rtype: SecretsCollection :raises: IOError """ result = SecretsCollection() if not all(key in data for key in ( 'exclude_regex', 'plugins_used', 'results', 'version', )): raise IOError result.exclude_regex = data['exclude_regex'] plugins = [] for plugin in data['plugins_used']: plugin_classname = plugin.pop('name') plugins.append( initialize.from_plugin_classname(plugin_classname, **plugin)) result.plugins = tuple(plugins) for filename in data['results']: result.data[filename] = {} for item in data['results'][filename]: secret = PotentialSecret( item['type'], filename, item['line_number'], secret='will be replaced', is_secret=item.get('is_secret'), ) secret.secret_hash = item['hashed_secret'] result.data[filename][secret] = secret result.version = data['version'] return result
def analyze(self, f, filename, output_raw=False, output_verified_false=False): password_secret = PotentialSecret('Password', filename, f.read().strip(), 2) return { password_secret: password_secret, }
def test_new_secret_line_old_file(self): """Same file, new line with potential secret""" new_findings = secrets_collection_factory([{ 'secret': 'secret1', 'lineno': 1, }]) baseline = secrets_collection_factory([{ 'secret': 'secret2', 'lineno': 2, }]) backup_baseline = baseline.data.copy() results = get_secrets_not_in_baseline(new_findings, baseline) assert len(results.data['filename']) == 1 secretA = PotentialSecret('type', 'filename', 1, 'secret1') assert results.data['filename'][ secretA].secret_hash == PotentialSecret.hash_secret('secret1') assert baseline.data == backup_baseline
def get_secret(): """Generates a random secret, used for testing. :rtype: dict """ random_number = random.randint(0, 500) return { 'hashed_secret': PotentialSecret.hash_secret(str(random_number)), 'line_number': random_number, 'type': 'Test Type', }
def get_secret(): """Generates a random secret, used for testing. :rtype: dict """ random_number = random.randint(0, 500) return { 'hashed_secret': PotentialSecret.hash_secret(str(random_number)), 'line_number': random_number, 'type': 'Test Type', }
def test_verify_unverified_secret_no_match(self, mock_hmac_verify): mock_hmac_verify.side_effect = requests.exceptions.RequestException('oops') potential_secret = PotentialSecret('test', 'test filename', SECRET_ACCESS_KEY) assert IbmCosHmacDetector().verify( SECRET_ACCESS_KEY, '''something={}'''.format(ACCESS_KEY_ID), potential_secret, ) == VerifiedResult.UNVERIFIED mock_hmac_verify.assert_not_called()
def test_verify_valid_secret(self, mock_hmac_verify): mock_hmac_verify.return_value = True potential_secret = PotentialSecret('test', 'test filename', SECRET_ACCESS_KEY) assert IbmCosHmacDetector().verify( SECRET_ACCESS_KEY, '''access_key_id={}'''.format(ACCESS_KEY_ID), potential_secret, ) == VerifiedResult.VERIFIED_TRUE mock_hmac_verify.assert_called_with(ACCESS_KEY_ID, SECRET_ACCESS_KEY)
def test_verify_valid_secret(self): responses.add( responses.GET, 'https://api.softlayer.com/rest/v3/SoftLayer_Account.json', json={'id': 1}, status=200, ) potential_secret = PotentialSecret('test softlayer', 'test filename', SL_TOKEN) assert SoftlayerDetector().verify( SL_TOKEN, 'softlayer_username={}'.format(SL_USERNAME), potential_secret, ) == VerifiedResult.VERIFIED_TRUE assert potential_secret.other_factors['username'] == SL_USERNAME
def analyze_string(self, string, line_num, filename): output = {} if any(line in string for line in BLACKLIST): output[filename] = PotentialSecret( self.secret_type, filename, line_num, string, ) return output
def test_analyze_standard_positives(self, file_content): logic = KeywordDetector() f = mock_file_object(file_content) output = logic.analyze(f, 'mock_filename') assert len(output) == 1 for potential_secret in output: assert 'mock_filename' == potential_secret.filename assert ( potential_secret.secret_hash == PotentialSecret.hash_secret('{{h}o)p${e]nob(ody[finds>-_$#thisone}}') )
def test_verify_invalid_secret(self): with mock.patch( 'detect_secrets.plugins.aws.verify_aws_secret_access_key', return_value=False, ) as mock_verify: potential_secret = PotentialSecret('test aws', 'test filename', self.example_key) assert AWSKeyDetector().verify( self.example_key, '={}'.format(EXAMPLE_SECRET), potential_secret, ) == VerifiedResult.VERIFIED_FALSE mock_verify.assert_called_with(self.example_key, EXAMPLE_SECRET)
def analyze_string_content(self, string, line_num, filename): """Searches string for custom pattern, and captures all high entropy strings that match self.regex, with a limit defined as self.entropy_limit. """ output = {} for result in self.secret_generator(string): if self._is_sequential_string(result): continue secret = PotentialSecret(self.secret_type, filename, result, line_num) output[secret] = secret return output
def analyze_string(self, string, line_num, filename): output = {} for identifier in self.secret_generator(string): secret = PotentialSecret( self.secret_type, filename, identifier, line_num, ) output[secret] = secret return output
def test_analyze_quotes_required_positives(self, file_content, file_extension): logic = KeywordDetector() f = mock_file_object(file_content) mock_filename = 'mock_filename{}'.format(file_extension) output = logic.analyze(f, mock_filename) assert len(output) == 1 for potential_secret in output: assert mock_filename == potential_secret.filename assert ( potential_secret.secret_hash == PotentialSecret.hash_secret('{{h}o)p${e]nob(ody[finds>-_$#thisone}}') )
def test_verify_invalid_connect_returns_none(self, mock_db2_connect): mock_db2_connect.return_value = None potential_secret = PotentialSecret('test db2', 'test filename', DB2_PASSWORD) assert Db2Detector().verify( DB2_PASSWORD, '''user={}, password={}, database={}, host={}, port={}'''.format(DB2_USER, DB2_PASSWORD, DB2_DATABASE, DB2_HOSTNAME, DB2_PORT), potential_secret, ) == VerifiedResult.VERIFIED_FALSE mock_db2_connect.assert_called_with(DB2_CONN_STRING, '', '')
def test_verify_times_out(self, mock_db2_connect): mock_db2_connect.side_effect = Exception('Timeout') potential_secret = PotentialSecret('test db2', 'test filename', DB2_PASSWORD) assert Db2Detector().verify( DB2_PASSWORD, '''user={}, password={}, database={}, host={}, port={}'''.format(DB2_USER, DB2_PASSWORD, DB2_DATABASE, DB2_HOSTNAME, DB2_PORT), potential_secret, ) == VerifiedResult.UNVERIFIED mock_db2_connect.assert_called_with(DB2_CONN_STRING, '', '')
def analyze_string(self, string, line_num, filename): """Searches string for custom pattern, and captures all high entropy strings that match self.regex, with a limit defined as self.entropy_limit. """ output = {} if WHITELIST_REGEX.search(string): return output for result in self.secret_generator(string): secret = PotentialSecret(self.secret_type, filename, line_num, result) output[secret] = secret return output
def test_verify_db2_url_key(self, mock_db2_connect): mock_db2_connect.return_value = MagicMock() potential_secret = PotentialSecret('test db2', 'test filename', DB2_PASSWORD) assert Db2Detector().verify( DB2_PASSWORD, '''jdbc:db2://{}:{}/{}:user={};password={}; '''.format(DB2_HOSTNAME, DB2_PORT, DB2_DATABASE, DB2_USER, DB2_PASSWORD), potential_secret, ) == VerifiedResult.VERIFIED_TRUE mock_db2_connect.assert_called_with(DB2_CONN_STRING, '', '') assert potential_secret.other_factors['database'] == DB2_DATABASE assert potential_secret.other_factors['hostname'] == DB2_HOSTNAME assert potential_secret.other_factors['port'] == DB2_PORT assert potential_secret.other_factors['username'] == DB2_USER
def analyze_string_content(self, string, line_num, filename): output = {} for identifier in self.secret_generator( string, filetype=determine_file_type(filename), ): secret = PotentialSecret( self.secret_type, filename, identifier, line_num, ) output[secret] = secret return output
def analyze_string(self, string, line_num, filename): output = {} if WHITELIST_REGEX.search(string): return output for identifier in self.secret_generator(string.lower()): secret = PotentialSecret( self.secret_type, filename, identifier, line_num, ) output[secret] = secret return output
def get_baseline_dict(self, gmtime): # They are all the same secret, so they should all have the same secret hash. secret_hash = PotentialSecret.hash_secret('secret') return { 'generated_at': strftime('%Y-%m-%dT%H:%M:%SZ', gmtime), 'exclude_regex': '', 'plugins_used': [ { 'name': 'HexHighEntropyString', 'hex_limit': 3, }, { 'name': 'PrivateKeyDetector', }, ], 'results': { 'fileA': [ # Line numbers should be sorted, for better readability { 'type': 'B', 'line_number': 2, 'hashed_secret': secret_hash, }, { 'type': 'A', 'line_number': 3, 'hashed_secret': secret_hash, }, ], 'fileB': [ { 'type': 'C', 'line_number': 1, 'hashed_secret': secret_hash, }, ], }, 'version': VERSION, }
def test_rolled_creds(self): """Same line, different secret""" new_findings = secrets_collection_factory([ { 'secret': 'secret_new', }, ]) baseline = secrets_collection_factory([ { 'secret': 'secret', }, ]) backup_baseline = baseline.data.copy() results = get_secrets_not_in_baseline(new_findings, baseline) assert len(results.data['filename']) == 1 secretA = PotentialSecret('type', 'filename', 1, 'secret_new') assert results.data['filename'][secretA].secret_hash == \ PotentialSecret.hash_secret('secret_new') assert baseline.data == backup_baseline
def test_new_secret_line_old_file(self): """Same file, new line with potential secret""" new_findings = secrets_collection_factory([ { 'secret': 'secret1', 'lineno': 1, }, ]) baseline = secrets_collection_factory([ { 'secret': 'secret2', 'lineno': 2, }, ]) backup_baseline = baseline.data.copy() results = get_secrets_not_in_baseline(new_findings, baseline) assert len(results.data['filename']) == 1 secretA = PotentialSecret('type', 'filename', 1, 'secret1') assert results.data['filename'][secretA].secret_hash == \ PotentialSecret.hash_secret('secret1') assert baseline.data == backup_baseline