def _perform_scan(args, plugins, automaton, word_list_hash): """ :param args: output of `argparse.ArgumentParser.parse_args` :param plugins: tuple of initialized plugins :type automaton: ahocorasick.Automaton|None :param automaton: optional automaton for ignoring certain words. :type word_list_hash: str|None :param word_list_hash: optional iterated sha1 hash of the words in the word list. :rtype: dict """ old_baseline = _get_existing_baseline(args.import_filename) if old_baseline: plugins = initialize.merge_plugins_from_baseline( _get_plugins_from_baseline(old_baseline), args, automaton=automaton, ) # Favors CLI arguments over existing baseline configuration if old_baseline: if not args.exclude_files: args.exclude_files = _get_exclude_files(old_baseline) if (not args.exclude_lines and old_baseline.get('exclude')): args.exclude_lines = old_baseline['exclude']['lines'] if (not args.word_list_file and old_baseline.get('word_list')): args.word_list_file = old_baseline['word_list']['file'] if (not args.custom_plugin_paths and old_baseline.get('custom_plugin_paths')): args.custom_plugin_paths = old_baseline['custom_plugin_paths'] # If we have knowledge of an existing baseline file, we should use # that knowledge and add it to our exclude_files regex. if args.import_filename: _add_baseline_to_exclude_files(args) new_baseline = baseline.initialize( path=args.path, plugins=plugins, custom_plugin_paths=args.custom_plugin_paths, exclude_files_regex=args.exclude_files, exclude_lines_regex=args.exclude_lines, word_list_file=args.word_list_file, word_list_hash=word_list_hash, should_scan_all_files=args.all_files, ).format_for_baseline_output() if old_baseline: new_baseline = baseline.merge_baseline( old_baseline, new_baseline, ) return new_baseline
def main(argv=None): args = parse_args(argv) if args.verbose: # pragma: no cover log.set_debug_level(args.verbose) try: # If baseline is provided, we first want to make sure # it's valid, before doing any further computation. baseline_collection = get_baseline(args.baseline[0]) except (IOError, ValueError): # Error logs handled within logic. return 1 automaton = None word_list_hash = None if args.word_list_file: automaton, word_list_hash = build_automaton(args.word_list_file) plugins = initialize.from_parser_builder( args.plugins, exclude_lines_regex=args.exclude_lines, automaton=automaton, should_verify_secrets=not args.no_verify, ) # Merge plugins from baseline if baseline_collection: plugins = initialize.merge_plugins_from_baseline( baseline_collection.plugins, args, automaton, ) baseline_collection.plugins = plugins results = find_secrets_in_files(args, plugins) if baseline_collection: original_results = results results = get_secrets_not_in_baseline( results, baseline_collection, ) if len(results.data) > 0: pretty_print_diagnostics(results) return 1 if not baseline_collection: return 0 # Only attempt baseline modifications if we don't find any new secrets baseline_modified = trim_baseline_of_removed_secrets( original_results, baseline_collection, args.filenames, ) if VERSION != baseline_collection.version: baseline_collection.version = VERSION baseline_modified = True if baseline_modified: write_baseline_to_file( filename=args.baseline[0], data=baseline_collection.format_for_baseline_output(), ) log.error( 'The baseline file was updated.\n' 'Probably to keep line numbers of secrets up-to-date.\n' 'Please `git add {}`, thank you.\n\n'.format(args.baseline[0]), ) return 3 return 0