def scan_repo(args): """Returns 0 on success""" try: repo = tracked_repo_factory( args.local, bool(getattr(args, 's3_config', None)), ).load_from_file( args.repo, args.root_dir, s3_config=getattr(args, 's3_config', None), ) except FileNotFoundError: log.error('Unable to find repo: %s', args.repo) return 1 secrets = repo.scan( exclude_files_regex=args.exclude_files, exclude_lines_regex=args.exclude_lines, ) if (len(secrets.data) > 0) or args.always_run_output_hook: _alert_on_secrets_found(repo, secrets.json(), args.output_hook) if args.always_update_state or ( len(secrets.data) == 0 and not args.dry_run ): _update_tracked_repo(repo) return 0
def _git(directory, *args, **kwargs): try: output = subprocess.check_output([ 'git', '--git-dir', directory, ] + list(args), stderr=subprocess.STDOUT).decode( 'utf-8', errors='ignore') # This is to fix https://github.com/matiasb/python-unidiff/issues/54 if not kwargs.get('should_strip_output', True): return output return output.strip() except subprocess.CalledProcessError as e: error_message = e.output.decode('utf-8') # Catch this error, this happens during scanning and means it's an empty repo. This bails out # of the scan process and logs error. if re.match(r"fatal: couldn't find remote ref (None|HEAD)", error_message): # directory is the best/only output without drastic rewrites, hashed path correlates to repo log.error("Empty repository cannot be scanned: %s", directory) sys.exit(1) # TODO: This won't work if scan loops through repos, but works since it's a single scan currently # Catch this error, this happens during initialization and means it's an empty repo. This allows # the repo metadata to be written to /tracked elif re.match( r"fatal: ambiguous argument 'HEAD': unknown revision or path not in the working tree.", error_message): return None else: raise
def scan_diff( self, diff, baseline_filename='', last_commit_hash='', repo_name='', ): """For optimization purposes, our scanning strategy focuses on looking at incremental differences, rather than re-scanning the codebase every time. This function supports this, and adds information to self.data. :type diff: str :param diff: diff string. Eg. The output of `git diff <fileA> <fileB>` :type baseline_filename: str :param baseline_filename: if there are any baseline secrets, then the baseline file will have hashes in them. By specifying it, we can skip this clear exception. :type last_commit_hash: str :param last_commit_hash: used for logging only -- the last commit hash we saved :type repo_name: str :param repo_name: used for logging only -- the name of the repo """ try: patch_set = PatchSet.from_string(diff) except UnidiffParseError: # pragma: no cover alert = { 'alert': 'UnidiffParseError', 'hash': last_commit_hash, 'repo_name': repo_name, } log.error(alert) raise if self.exclude_regex: regex = re.compile(self.exclude_regex, re.IGNORECASE) for patch_file in patch_set: filename = patch_file.path # If the file matches the exclude_regex, we skip it if self.exclude_regex and regex.search(filename): continue if filename == baseline_filename: continue for results, plugin in self._results_accumulator(filename): results.update( self._extract_secrets_from_patch( patch_file, plugin, filename, ), )
def scan_diff( self, diff, baseline_filename='', last_commit_hash='', repo_name='', ): """For optimization purposes, our scanning strategy focuses on looking at incremental differences, rather than re-scanning the codebase every time. This function supports this, and adds information to self.data. :type diff: str :param diff: diff string. e.g. The output of `git diff <fileA> <fileB>` :type baseline_filename: str :param baseline_filename: if there are any baseline secrets, then the baseline file will have hashes in them. By specifying it, we can skip this clear exception. :type last_commit_hash: str :param last_commit_hash: used for logging only -- the last commit hash we saved :type repo_name: str :param repo_name: used for logging only -- the name of the repo """ try: patch_set = PatchSet.from_string(diff) except UnidiffParseError: # pragma: no cover alert = { 'alert': 'UnidiffParseError', 'hash': last_commit_hash, 'repo_name': repo_name, } log.error(alert) raise if self.exclude_regex: regex = re.compile(self.exclude_regex, re.IGNORECASE) for patch_file in patch_set: filename = patch_file.path # If the file matches the exclude_regex, we skip it if self.exclude_regex and regex.search(filename): continue if filename == baseline_filename: continue for results, plugin in self._results_accumulator(filename): results.update( self._extract_secrets_from_patch( patch_file, plugin, filename, ), )
def open_config_file(config_file): try: with codecs.open(config_file) as f: data = yaml.safe_load(f) except IOError: log.error('Unable to open config file: %s', config_file) raise return data
def _should_discard_tracked_repo_in_config(tracked_repo): try: if tracked_repo.get('is_local_repo', False): is_valid_file(tracked_repo['repo']) else: is_git_url(tracked_repo['repo']) return False except argparse.ArgumentTypeError as e: # We log the error, rather than hard failing, because we don't want # to hard fail if one out of many repositories are bad. log.error(str(e)) return True
def load_baseline_from_string(cls, string): """Initializes a SecretsCollection object from string. :type string: str :param string: string to load SecretsCollection from. :rtype: SecretsCollection :raises: IOError """ try: return cls._load_baseline_from_dict(json.loads(string)) except (IOError, ValueError): log.error('Incorrectly formatted baseline!') raise
def _get_baseline_string_from_file(filename): # pragma: no cover """Breaking this function up for mockability.""" try: with open(filename) as f: return f.read() except IOError: log.error( 'Unable to open baseline file: {}\n' 'Please create it via\n' ' `detect-secrets scan > {}`\n' .format(filename, filename), ) raise
def load_baseline_from_string(cls, string): """Initializes a SecretsCollection object from string. :type string: str :param string: string to load SecretsCollection from. :rtype: SecretsCollection :raises: IOError """ try: return cls.load_baseline_from_dict(json.loads(string)) except (IOError, ValueError): log.error('Incorrectly formatted baseline!') raise
def _alert_on_secrets_found(repo, secrets, output_hook): """ :type repo: detect_secrets_server.repos.base_tracked_repo.BaseTrackedRepo :type secrets: dict :param secrets: output of detect_secrets.core.secrets_collection.SecretsCollection.json() :type output_hook: detect_secrets_server.hooks.base.BaseHook """ log.error('Secrets found in %s', repo.name) _set_authors_for_found_secrets(repo, secrets) output_hook.alert(repo.name, secrets)
def from_plugin_classname(plugin_classname, custom_plugin_paths, exclude_lines_regex=None, automaton=None, should_verify_secrets=False, **kwargs): """Initializes a plugin class, given a classname and kwargs. :type plugin_classname: str :param plugin_classname: subclass of BasePlugin. :type custom_plugin_paths: Tuple[str] :param custom_plugin_paths: possibly empty tuple of paths that have custom plugins. :type exclude_lines_regex: str|None :param exclude_lines_regex: optional regex for ignored lines. :type automaton: ahocorasick.Automaton|None :param automaton: optional automaton for ignoring English-words. :type should_verify_secrets: bool """ try: klass = import_plugins(custom_plugin_paths)[plugin_classname] except KeyError: log.error('Error: No such `{}` plugin to initialize.'.format( plugin_classname)) log.error('Chances are you should run `pre-commit autoupdate`.') log.error( 'This error can occur when using a baseline that was made by ' 'a newer detect-secrets version than the one running.', ) log.error( 'It can also occur if the baseline has custom plugin paths, ' 'but the `--custom-plugins` option was not passed.', ) raise TypeError try: instance = klass(exclude_lines_regex=exclude_lines_regex, automaton=automaton, should_verify=should_verify_secrets, **kwargs) except TypeError: log.error('Unable to initialize plugin!') raise return instance
def get_diff(self, from_sha): try: return git.get_diff(self._repo_location, from_sha) except subprocess.CalledProcessError: # This sometimes complains, if the hash does not exist. # There could be a variety of reasons for this, including: # - some sort of rewrite of git history # - this scanner being run on an out-of-date repo # # To prevent from any further alerting on this, we are going to # update the last_commit_hash, to prevent re-alerting on old # secrets. # # TODO: Fix this to be more robust. log.error(self._construct_debugging_output(from_sha), ) raise
def load_baseline_from_string(cls, string, plugin_filenames=None): """Initializes a SecretsCollection object from string. :type string: str :param string: string to load SecretsCollection from. :type plugin_filenames: tuple :param plugin_filenames: list of plugins to import :rtype: SecretsCollection :raises: IOError """ try: return cls.load_baseline_from_dict( json.loads(string), plugin_filenames=plugin_filenames, ) except (IOError, ValueError): log.error('Incorrectly formatted baseline!') raise
def scan_repo(args): """Returns 0 on success""" try: repo = tracked_repo_factory( args.local, bool(getattr(args, 's3_config', None)), ).load_from_file( args.repo, args.root_dir, s3_config=getattr(args, 's3_config', None), ) except FileNotFoundError: log.error('Unable to find repo: %s', args.repo) return 1 # if last_commit_hash is empty, re-clone and see if there's an initial commit hash if repo.last_commit_hash is None: _clone_and_save_repo(repo) secrets = repo.scan( exclude_files_regex=args.exclude_files, exclude_lines_regex=args.exclude_lines, scan_head=args.scan_head, ) if (len(secrets.data) > 0) or args.always_run_output_hook: _alert_on_secrets_found(repo, secrets.json(), args.output_hook) if args.always_update_state or ( (len(secrets.data) == 0) and (not args.dry_run) and (not args.scan_head) ): _update_tracked_repo(repo) return 0
def from_plugin_classname(plugin_classname, exclude_lines_regex=None, automaton=None, should_verify_secrets=False, **kwargs): """Initializes a plugin class, given a classname and kwargs. :type plugin_classname: str :param plugin_classname: subclass of BasePlugin. :type exclude_lines_regex: str|None :param exclude_lines_regex: optional regex for ignored lines. :type automaton: ahocorasick.Automaton|None :param automaton: optional automaton for ignoring English-words. :type should_verify_secrets: bool """ try: klass = import_plugins()[plugin_classname] except KeyError: log.error('Error: No such `{}` plugin to initialize.'.format( plugin_classname)) log.error('Chances are you should run `pre-commit autoupdate`.') log.error( 'This error occurs when using a baseline that was made by ' 'a newer detect-secrets version than the one running.', ) raise TypeError try: instance = klass(exclude_lines_regex=exclude_lines_regex, automaton=automaton, should_verify=should_verify_secrets, **kwargs) except TypeError: log.warning('Unable to initialize plugin!') raise return instance
def initialize_repos_from_repo_yaml(repo_yaml, plugin_sensitivity, repo_config, s3_config=None): """For expected yaml file format, see `repos.yaml.sample` :type repo_yaml: string :param repo_yaml: filename of config file to read and parse :type plugin_sensitivity: SensitivityValues :type repo_config: RepoConfig :type s3_config: S3Config :return: list of TrackedRepos :raises: IOError """ data = open_config_file(repo_yaml) output = [] if data.get('tracked') is None: return output for entry in data['tracked']: sensitivity = plugin_sensitivity if entry.get('plugins'): # Merge plugin sensitivities plugin_dict = plugin_sensitivity._asdict() # Use SensitivityValues constructor to convert values entry_sensitivity = SensitivityValues(**entry['plugins']) plugin_dict.update(entry_sensitivity._asdict()) sensitivity = SensitivityValues(**plugin_dict) entry['plugin_sensitivity'] = sensitivity config = repo_config if 'baseline_file' in entry: config = RepoConfig( base_tmp_dir=repo_config.base_tmp_dir, exclude_regex=repo_config.exclude_regex, baseline=entry['baseline_file'], ) entry['repo_config'] = config if entry.get('s3_backed') and s3_config is None: log.error( ('Unable to load s3 config for %s. Make sure to specify ' '--s3-config-file for s3_backed repos!'), entry.get('repo'), ) continue entry['s3_config'] = s3_config # After setting up all arguments, create respective object. repo = tracked_repo_factory( entry.get('is_local_repo', False), entry.get('s3_backed', False), ) output.append(repo(**entry)) return output
def main(argv=None): """ Expected Usage: 1. Initialize TrackedRepos from config.yaml, and save to crontab. 2. Each cron command will run and scan git diff from previous commit saved, to now. 3. If something is found, alert. :return: shell error code """ if len(sys.argv) == 1: # pragma: no cover sys.argv.append('-h') args = parse_args(argv) if args.verbose: # pragma: no cover log.set_debug_level(args.verbose) plugin_sensitivity = parse_sensitivity_values(args) repo_config = parse_repo_config(args) s3_config = parse_s3_config(args) if args.initialize: # initialize sets up the local file storage for tracking try: tracked_repos = initialize_repos_from_repo_yaml( args.initialize, plugin_sensitivity, repo_config, s3_config, ) except IOError: # Error handled in initialize_repos_from_repo_yaml return 1 cron_repos = [repo for repo in tracked_repos if repo.save()] if not cron_repos: return 0 print('# detect-secrets scanner') for repo in cron_repos: print('{} {}'.format( repo.cron(), args.output_hook_command, )) elif args.add_repo: add_repo( args.add_repo[0], plugin_sensitivity, is_local_repo=args.local, s3_config=s3_config, repo_config=repo_config, ) elif args.scan_repo: repo_name = args.scan_repo[0] repo = tracked_repo_factory(args.local, bool(s3_config)) \ .load_from_file(repo_name, repo_config, s3_config) if not repo: return 1 secrets = repo.scan() if not secrets: return 1 if len(secrets.data) > 0: log.error('SCAN COMPLETE - We found secrets in: %s', repo.name) secrets = secrets.json() set_authors_for_found_secrets(secrets, repo) alert = { 'alert': 'Secrets found', 'repo_name': repo.name, 'secrets': secrets, } log.error(alert) args.output_hook.alert(repo.name, secrets) else: log.info('SCAN COMPLETE - STATUS: clean for %s', repo.name) # Save records, since the latest scan indicates that the most recent commit is clean repo.update() repo.save(OverrideLevel.ALWAYS) return 0
def main(argv=None): if len(sys.argv) == 1: # pragma: no cover sys.argv.append('-h') log.error('here it is') args = parse_args(argv) if args.verbose: # pragma: no cover log.set_debug_level(args.verbose) if args.action == 'scan': automaton = None word_list_hash = None if args.word_list_file: automaton, word_list_hash = build_automaton(args.word_list_file) # Plugins are *always* rescanned with fresh settings, because # we want to get the latest updates. plugins = initialize.from_parser_builder( args.plugins, exclude_lines_regex=args.exclude_lines, automaton=automaton, should_verify_secrets=not args.no_verify, ) if args.string: line = args.string if isinstance(args.string, bool): line = sys.stdin.read().splitlines()[0] _scan_string(line, plugins) else: baseline_dict = _perform_scan( args, plugins, automaton, word_list_hash, ) if args.import_filename: write_baseline_to_file( filename=args.import_filename[0], data=baseline_dict, ) else: print( baseline.format_baseline_for_output( baseline_dict, ), ) elif args.action == 'audit': if not args.diff and not args.display_results: audit.audit_baseline(args.filename[0]) return 0 if args.display_results: audit.print_audit_results(args.filename[0]) return 0 if len(args.filename) != 2: print( 'Must specify two files to compare!', file=sys.stderr, ) return 1 try: audit.compare_baselines(args.filename[0], args.filename[1]) except audit.RedundantComparisonError: print( 'No difference, because it\'s the same file!', file=sys.stderr, ) return 0