def excluded_signatures(self) -> Tuple[str, ...]: """Get a list of the signatures of findings to be excluded from the scan results. :returns: The signatures to be excluded from scan results """ if self._excluded_signatures is None: signatures: Set[str] = set() deprecated = False for signature in tuple( self.global_options.exclude_signatures or []) + tuple( self.config_data.get("exclude_signatures", [])): if isinstance(signature, dict): try: signatures.add(signature["signature"]) except KeyError as exc: raise types.ConfigException( "Required key signature missing in exclude-signatures" ) from exc elif isinstance(signature, str): deprecated = True signatures.add(signature) else: raise types.ConfigException( f"{type(signature).__name__} signature is illegal in exclude-signatures" ) if deprecated: warnings.warn( "Configuring exclude-signatures as string has been deprecated and support for this format will " "be removed in the future. Please make sure to update your exclude-signatures configuration to " "an array of tables. For example: exclude-signatures = [{signature='signature', reason='The " "reason of excluding the signature'}]", DeprecationWarning, ) self._excluded_signatures = tuple(signatures) return self._excluded_signatures
def scan(self) -> List[Issue]: """Run the requested scans against the target data. This will iterate through all chunks of data as provided by the scanner implementation, and run all requested scans against it, as specified in `self.global_options`. :raises types.TartufoConfigException: If there were problems with the scanner's configuration """ issues: List[Issue] = [] if not any((self.global_options.entropy, self.global_options.regex)): raise types.ConfigException("No analysis requested.") if self.global_options.regex and not self.rules_regexes: raise types.ConfigException( "Regex checks requested, but no regexes found.") for chunk in self.chunks: # Run regex scans first to trigger a potential fast fail for bad config if self.global_options.regex and self.rules_regexes: issues += self.scan_regex(chunk) if self.global_options.entropy: issues += self.scan_entropy(chunk) self._issues = issues return self._issues
def excluded_paths(self) -> List[Pattern]: """Get a list of regexes used to match paths to exclude from the scan""" if self._excluded_paths is None: self.logger.info("Initializing excluded paths") patterns: Set[str] = set() deprecated = False for pattern in tuple( self.global_options.exclude_path_patterns or []) + tuple( self.config_data.get("exclude_path_patterns", [])): if isinstance(pattern, dict): try: patterns.add(pattern["path-pattern"]) except KeyError as exc: raise types.ConfigException( "Required key path-pattern missing in exclude-path-patterns" ) from exc elif isinstance(pattern, str): deprecated = True patterns.add(pattern) else: raise types.ConfigException( f"{type(pattern).__name__} pattern is illegal in exclude-path-patterns" ) if deprecated: warnings.warn( "Old format of --exclude-path-patterns option and config file setup exclude-path-patterns " "= ['exclusion pattern'] has been deprecated and will be removed in a future version. " "Make sure all the exclusions are set up using new pattern i.e. exclude-path-patterns = " "[{path-pattern='exclusion pattern',reason='reason for exclusion'}] in the config file", DeprecationWarning, ) self._excluded_paths = config.compile_path_rules(patterns) return self._excluded_paths
def scan(self) -> Generator[Issue, None, None]: """Run the requested scans against the target data. This will iterate through all chunks of data as provided by the scanner implementation, and run all requested scans against it, as specified in `self.global_options`. The scan method is thread-safe; if multiple concurrent scans are requested, the first will run to completion while other callers are blocked (after which they will each execute in turn, yielding cached issues without repeating the underlying repository scan). :raises types.ConfigException: If there were problems with the scanner's configuration """ # I cannot find any written description of the python memory model. The # correctness of this code in multithreaded environments relies on the # expectation that the write to _completed at the bottom of the critical # section cannot be reordered to appear after the implicit release of # _scan_lock (when viewed from a competing thread). with self._scan_lock: if self._completed: yield from self._issues return if not any( (self.global_options.entropy, self.global_options.regex)): self.logger.error("No analysis requested.") raise types.ConfigException("No analysis requested.") if self.global_options.regex and not self.rules_regexes: self.logger.error( "Regex checks requested, but no regexes found.") raise types.ConfigException( "Regex checks requested, but no regexes found.") self.logger.info("Starting scan...") self._issues = [] for chunk in self.chunks: # Run regex scans first to trigger a potential fast fail for bad config if self.global_options.regex and self.rules_regexes: for issue in self.scan_regex(chunk): self._issues.append(issue) yield issue if self.global_options.entropy: for issue in self.scan_entropy(chunk, ): self._issues.append(issue) yield issue self._completed = True self.logger.info("Found %d issues.", len(self._issues))
def test_file_error_is_raised_if_non_specified_config_file_cant_be_read( self, mock_load: mock.MagicMock): cur_dir = pathlib.Path() os.chdir(str(self.data_dir)) mock_load.side_effect = types.ConfigException("Bad TOML!") with self.assertRaisesRegex(click.FileError, "Bad TOML!") as exc: config.read_pyproject_toml(self.ctx, self.param, "") self.assertEqual(exc.exception.filename, str(self.data_dir / "tartufo.toml")) os.chdir(str(cur_dir))
def rules_regexes(self) -> Dict[str, Rule]: """Get a dictionary of regular expressions to scan the code for. :raises types.TartufoConfigException: If there was a problem compiling the rules :rtype: Dict[str, Pattern] """ if self._rules_regexes is None: try: self._rules_regexes = config.configure_regexes( self.global_options.default_regexes, self.global_options.rules, self.global_options.git_rules_repo, self.global_options.git_rules_files, ) except (ValueError, re.error) as exc: raise types.ConfigException(str(exc)) from exc return self._rules_regexes
def rules_regexes(self) -> Set[Rule]: """Get a set of regular expressions to scan the code for. :raises types.ConfigException: If there was a problem compiling the rules """ if self._rules_regexes is None: self.logger.info("Initializing regex rules") try: self._rules_regexes = config.configure_regexes( include_default=self.global_options.default_regexes, rules_files=self.global_options.rules, rule_patterns=self.global_options.rule_patterns, rules_repo=self.global_options.git_rules_repo, rules_repo_files=self.global_options.git_rules_files, ) except (ValueError, re.error) as exc: self.logger.exception("Error loading regex rules", exc_info=exc) raise types.ConfigException(str(exc)) from exc self.logger.debug("Regex rules were initialized as: %s", self._rules_regexes) return self._rules_regexes
def load_config_from_path( config_path: pathlib.Path, filename: Optional[str] = None, traverse: bool = True ) -> Tuple[pathlib.Path, MutableMapping[str, Any]]: """Scan a path for a configuration file, and return its contents. All key names are normalized to remove leading "-"/"--" and replace "-" with "_". For example, "--repo-path" becomes "repo_path". In addition to checking the specified path, if ``traverse`` is ``True``, this will traverse up through the directory structure, looking for a configuration file in parent directories. For example, given this directory structure: :: working_dir/ |- tartufo.toml |- group1/ | |- project1/ | | |- tartufo.toml | |- project2/ |- group2/ |- tartufo.toml |- project1/ |- project2/ |- tartufo.toml The following ``config_path`` values will load the configuration files at the corresponding paths: ============================ ==== config_path file ---------------------------- ---- working_dir/group1/project1/ working_dir/group1/project1/tartufo.toml working_dir/group1/project2/ working_dir/tartufo.toml working_dir/group2/project1/ working_dir/group2/tartufo.toml working_dir/group2/project2/ working_dir/group2/project2/tartufo.toml ============================ ==== :param config_path: The path to search for configuration files :param filename: A specific filename to look for. By default, this will look for both ``tartufo.toml`` and then ``pyproject.toml``. :raises FileNotFoundError: If no config file was found :raises types.ConfigException: If a config file was found, but could not be read :returns: A tuple consisting of the config file that was discovered, and the contents of that file loaded in as TOML data """ config: MutableMapping[str, Any] = {} full_path: Optional[pathlib.Path] = None if filename: config_filenames = [filename] else: config_filenames = ["tartufo.toml", "pyproject.toml"] for possibility in config_filenames: full_path = config_path / possibility if full_path.exists(): try: toml_file = toml.load(full_path) config = toml_file.get("tool", {}).get("tartufo", {}) break except (toml.TomlDecodeError, OSError) as exc: raise types.ConfigException(f"Error reading configuration file: {exc}") if not config and traverse and config_path.parent != config_path: return load_config_from_path(config_path.parent, filename, traverse) if not config: raise FileNotFoundError(f"Could not find config file in {config_path}.") return (full_path, {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}) # type: ignore