def test_build_automaton(): word_list = """ foam\n """ with mock_open( data=word_list, namespace='detect_secrets.util.open', ): automaton, word_list_hash = util.build_automaton(word_list='will_be_mocked.txt') assert word_list_hash == hashlib.sha1('foam'.encode('utf-8')).hexdigest() assert filters.is_found_with_aho_corasick( secret='foam_roller', automaton=automaton, ) assert not filters.is_found_with_aho_corasick( secret='no_words_in_word_list', automaton=automaton, )
def get_project_secrets(project_results_dir: str, project_name: str) -> dict: argv = ["scan", f"{project_results_dir}/{project_name}"] args = parse_args(argv) automaton = None word_list_hash = None if args.word_list_file: automaton, word_list_hash = build_automaton(args.word_list_file) # Plugins are *always* rescanned with fresh settings, because # we want to get the latest updates. plugins = initialize.from_parser_builder( plugins_dict=args.plugins, custom_plugin_paths=args.custom_plugin_paths, exclude_lines_regex=args.exclude_lines, automaton=automaton, should_verify_secrets=not args.no_verify, ) baseline_dict = _perform_scan(args, plugins, automaton, word_list_hash,) return baseline_dict
def main(argv=None): args = parse_args(argv) if args.verbose: # pragma: no cover log.set_debug_level(args.verbose) try: # If baseline is provided, we first want to make sure # it's valid, before doing any further computation. baseline_collection = get_baseline(args.baseline[0]) except (IOError, ValueError): # Error logs handled within logic. return 1 automaton = None word_list_hash = None if args.word_list_file: automaton, word_list_hash = build_automaton(args.word_list_file) plugins = initialize.from_parser_builder( args.plugins, exclude_lines_regex=args.exclude_lines, automaton=automaton, should_verify_secrets=not args.no_verify, ) # Merge plugins from baseline if baseline_collection: plugins = initialize.merge_plugins_from_baseline( baseline_collection.plugins, args, automaton, ) baseline_collection.plugins = plugins results = find_secrets_in_files(args, plugins) if baseline_collection: original_results = results results = get_secrets_not_in_baseline( results, baseline_collection, ) if len(results.data) > 0: pretty_print_diagnostics(results) return 1 if not baseline_collection: return 0 # Only attempt baseline modifications if we don't find any new secrets baseline_modified = trim_baseline_of_removed_secrets( original_results, baseline_collection, args.filenames, ) if VERSION != baseline_collection.version: baseline_collection.version = VERSION baseline_modified = True if baseline_modified: write_baseline_to_file( filename=args.baseline[0], data=baseline_collection.format_for_baseline_output(), ) log.error( 'The baseline file was updated.\n' 'Probably to keep line numbers of secrets up-to-date.\n' 'Please `git add {}`, thank you.\n\n'.format(args.baseline[0]), ) return 3 return 0
def main(argv=sys.argv[1:]): if len(sys.argv) == 1: # pragma: no cover sys.argv.append('--help') args = parse_args(argv) if args.verbose: # pragma: no cover log.set_debug_level(args.verbose) if args.action == 'scan': automaton = None word_list_hash = None if args.word_list_file: automaton, word_list_hash = build_automaton(args.word_list_file) # Plugins are *always* rescanned with fresh settings, because # we want to get the latest updates. plugins = initialize.from_parser_builder( plugins_dict=args.plugins, custom_plugin_paths=args.custom_plugin_paths, exclude_lines_regex=args.exclude_lines, automaton=automaton, should_verify_secrets=not args.no_verify, ) if args.string: line = args.string if isinstance(args.string, bool): line = sys.stdin.read().splitlines()[0] _scan_string(line, plugins) else: baseline_dict = _perform_scan( args, plugins, automaton, word_list_hash, ) if args.import_filename: write_baseline_to_file( filename=args.import_filename[0], data=baseline_dict, ) else: print(baseline.format_baseline_for_output(baseline_dict, ), ) elif args.action == 'audit': if not args.diff and not args.display_results: audit.audit_baseline(args.filename[0]) return 0 if args.display_results: audit.print_audit_results(args.filename[0]) return 0 if len(args.filename) != 2: print( 'Must specify two files to compare!', file=sys.stderr, ) return 1 try: audit.compare_baselines(args.filename[0], args.filename[1]) except audit.RedundantComparisonError: print( 'No difference, because it\'s the same file!', file=sys.stderr, ) return 0
def load_baseline_from_dict(cls, data, plugin_filenames=None): """Initializes a SecretsCollection object from dictionary. :type data: dict :param data: properly formatted dictionary to load SecretsCollection from. :type plugin_filenames: tuple :param plugin_filenames: the plugin filenames. :rtype: SecretsCollection :raises: IOError """ result = SecretsCollection() if not all(key in data for key in ( 'plugins_used', 'results', )): raise IOError # In v0.12.0 `exclude_regex` got replaced by `exclude` if not any(key in data for key in ( 'exclude', 'exclude_regex', )): raise IOError if 'exclude_regex' in data: result.exclude_files = data['exclude_regex'] else: result.exclude_files = data['exclude']['files'] result.exclude_lines = data['exclude']['lines'] # In v0.12.7 the `--word-list` option got added automaton = None if 'word_list' in data: result.word_list_file = data['word_list']['file'] result.word_list_hash = data['word_list']['hash'] if result.word_list_file: # Always ignore the given `data['word_list']['hash']` # The difference will show whenever the word list changes automaton, result.word_list_hash = build_automaton( result.word_list_file) plugins = [] for plugin in data['plugins_used']: plugin_classname = plugin.pop('name') initialized_plugin_classname = initialize.from_plugin_classname( plugin_classname, exclude_lines_regex=result.exclude_lines, automaton=automaton, should_verify_secrets=True, plugin_filenames=plugin_filenames, **plugin, ) if initialized_plugin_classname is not None: plugins.append(initialized_plugin_classname) result.plugins = tuple(plugins) for filename in data['results']: result.data[filename] = {} for item in data['results'][filename]: secret = PotentialSecret( item['type'], filename, secret='will be replaced', lineno=item['line_number'], is_secret=item.get('is_secret'), output_raw=result.output_raw, is_verified=item['is_verified'], verified_result=item.get('verified_result'), ) secret.secret_hash = item['hashed_secret'] result.data[filename][secret] = secret result.version = (data['version'] if 'version' in data else '0.0.0') return result
def load_baseline_from_dict(cls, data): """Initializes a SecretsCollection object from dictionary. :type data: dict :param data: properly formatted dictionary to load SecretsCollection from. :rtype: SecretsCollection :raises: IOError """ result = SecretsCollection() if not all(key in data for key in ( 'plugins_used', 'results', )): raise IOError # In v0.12.0 `exclude_regex` got replaced by `exclude` if not any(key in data for key in ( 'exclude', 'exclude_regex', )): raise IOError if 'exclude_regex' in data: result.exclude_files = data['exclude_regex'] else: result.exclude_files = data['exclude']['files'] result.exclude_lines = data['exclude']['lines'] # In v0.12.7 the `--word-list` option got added automaton = None if 'word_list' in data: result.word_list_file = data['word_list']['file'] result.word_list_hash = data['word_list']['hash'] if result.word_list_file: # Always ignore the existing `data['word_list']['hash']` # The difference will show whenever the word list changes automaton, result.word_list_hash = build_automaton( result.word_list_file) # In v0.14.0 the `--custom-plugins` option got added result.custom_plugin_paths = tuple(data.get('custom_plugin_paths', ())) result.plugins = tuple( initialize.from_plugin_classname( plugin_classname=plugin.pop('name'), custom_plugin_paths=result.custom_plugin_paths, exclude_lines_regex=result.exclude_lines, automaton=automaton, should_verify_secrets=False, **plugin) for plugin in data['plugins_used']) for filename in data['results']: result.data[filename] = {} for item in data['results'][filename]: secret = PotentialSecret( item['type'], filename, secret='will be replaced', lineno=item['line_number'], is_secret=item.get('is_secret'), ) secret.secret_hash = item['hashed_secret'] result.data[filename][secret] = secret result.version = (data['version'] if 'version' in data else '0.0.0') return result