def test_rule_patterns_without_defaults(self): rule_patterns = [ { "reason": "RSA private key 2", "pattern": "-----BEGIN EC PRIVATE KEY-----", }, { "reason": "Complex Rule", "pattern": "complex-rule", "path-pattern": "/tmp/[a-z0-9A-Z]+\\.(py|js|json)", }, ] expected = { Rule( name="RSA private key 2", pattern=re.compile("-----BEGIN EC PRIVATE KEY-----"), path_pattern=re.compile(""), re_match_type=MatchType.Search, re_match_scope=None, ), Rule( name="Complex Rule", pattern=re.compile("complex-rule"), path_pattern=re.compile("/tmp/[a-z0-9A-Z]+\\.(py|js|json)"), re_match_type=MatchType.Search, re_match_scope=None, ), } actual = config.configure_regexes(rule_patterns=rule_patterns, include_default=False) self.assertEqual(actual, expected)
def test_configure_regexes_rules_files_with_defaults(self): rules_path = pathlib.Path(__file__).parent / "data" / "testRules.json" rules_files = (rules_path.open(), ) with config.DEFAULT_PATTERN_FILE.open() as handle: expected_regexes = config.load_rules_from_file(handle) expected_regexes.add( Rule( name="RSA private key 2", pattern=re.compile("-----BEGIN EC PRIVATE KEY-----"), path_pattern=None, re_match_type=MatchType.Match, re_match_scope=None, )) expected_regexes.add( Rule( name="Complex Rule", pattern=re.compile("complex-rule"), path_pattern=re.compile("/tmp/[a-z0-9A-Z]+\\.(py|js|json)"), re_match_type=MatchType.Match, re_match_scope=None, )) actual_regexes = config.configure_regexes(include_default=True, rules_files=rules_files) self.assertEqual( expected_regexes, actual_regexes, f"The regexes dictionary should match the test rules (expected: {expected_regexes}, actual: {actual_regexes})", )
def test_configure_regexes_rules_files_with_defaults(self): rules_path = pathlib.Path(__file__).parent / "data" / "testRules.json" rules_files = (rules_path.open(),) expected_regexes = copy.copy(config.DEFAULT_REGEXES) expected_regexes["RSA private key 2"] = Rule( name="RSA private key 2", pattern=re.compile("-----BEGIN EC PRIVATE KEY-----"), path_pattern=None, ) expected_regexes["Complex Rule"] = Rule( name="Complex Rule", pattern=re.compile("complex-rule"), path_pattern=re.compile("/tmp/[a-z0-9A-Z]+\\.(py|js|json)"), ) actual_regexes = config.configure_regexes( include_default=True, rules_files=rules_files ) self.assertEqual( expected_regexes, actual_regexes, "The regexes dictionary should match the test rules " "(expected: {}, actual: {})".format(expected_regexes, actual_regexes), )
def test_configure_regexes_includes_rules_from_rules_repo(self): rules_path = pathlib.Path(__file__).parent / "data" actual_regexes = config.configure_regexes( include_default=False, rules_repo=str(rules_path), rules_repo_files=["testRules.json"], ) expected_regexes = { Rule( name="RSA private key 2", pattern=re.compile("-----BEGIN EC PRIVATE KEY-----"), path_pattern=None, re_match_type=MatchType.Match, re_match_scope=None, ), Rule( name="Complex Rule", pattern=re.compile("complex-rule"), path_pattern=re.compile("/tmp/[a-z0-9A-Z]+\\.(py|js|json)"), re_match_type=MatchType.Match, re_match_scope=None, ), } self.assertEqual( expected_regexes, actual_regexes, f"The regexes dictionary should match the test rules (expected: {expected_regexes}, actual: {actual_regexes})", )
def load_rules_from_file(rules_file: TextIO) -> Dict[str, Rule]: """Load a set of JSON rules from a file and return them as compiled patterns. :param rules_file: An open file handle containing a JSON dictionary of regexes :raises ValueError: If the rules contain invalid JSON """ rules: Dict[str, Rule] = {} try: new_rules = json.load(rules_file) except json.JSONDecodeError as exc: raise ValueError( "Error loading rules from file: {}".format(rules_file.name) ) from exc for rule_name in new_rules: rule_definition = new_rules[rule_name] if isinstance(rule_definition, collections.Mapping): path_pattern = rule_definition.get("path_pattern", None) rule = Rule( name=rule_name, pattern=re.compile(rule_definition["pattern"]), path_pattern=re.compile(path_pattern) if path_pattern else None, ) else: rule = Rule( name=rule_name, pattern=re.compile(rule_definition), path_pattern=None ) rules[rule_name] = rule return rules
def load_rules_from_file(rules_file: TextIO) -> Set[Rule]: """Load a set of JSON rules from a file and return them as compiled patterns. :param rules_file: An open file handle containing a JSON dictionary of regexes :raises ValueError: If the rules contain invalid JSON """ rules: Set[Rule] = set() try: new_rules = json.load(rules_file) except json.JSONDecodeError as exc: raise ValueError( f"Error loading rules from file: {rules_file.name}") from exc for rule_name, rule_definition in new_rules.items(): try: path_pattern = rule_definition.get("path_pattern", None) rule = Rule( name=rule_name, pattern=re.compile(rule_definition["pattern"]), path_pattern=re.compile(path_pattern) if path_pattern else EMPTY_PATTERN, re_match_type=MatchType.Match, re_match_scope=None, ) except AttributeError: rule = Rule( name=rule_name, pattern=re.compile(rule_definition), path_pattern=None, re_match_type=MatchType.Match, re_match_scope=None, ) rules.add(rule) return rules
def test_echo_result_echos_exclusions_verbose(self, mock_time, mock_click, mock_scanner): mock_time.now.return_value.isoformat.return_value = "now:now:now" options = generate_options( GlobalOptions, quiet=False, verbose=1, ) mock_scanner.issues = [] mock_scanner.issue_count = 0 mock_scanner.excluded_paths = [ re.compile("package-lock.json"), re.compile("poetry.lock"), ] mock_scanner.excluded_signatures = [ "fffffffffffff", "ooooooooooooo", ] rule_1 = (Rule( name="Rule-1", pattern="aaaa", path_pattern="bbbb", re_match_type=MatchType.Search, re_match_scope=Scope.Line, ), ) rule_2 = (Rule( name="Rule-1", pattern="cccc", path_pattern="dddd", re_match_type=MatchType.Search, re_match_scope=Scope.Line, ), ) mock_scanner.excluded_entropy = [rule_1, rule_2] util.echo_result(options, mock_scanner, "", "") mock_click.echo.assert_has_calls( ( mock.call( "Time: now:now:now\nAll clear. No secrets detected."), mock.call("\nExcluded paths:"), mock.call( "re.compile('package-lock.json')\nre.compile('poetry.lock')" ), mock.call("\nExcluded signatures:"), mock.call("fffffffffffff\nooooooooooooo"), mock.call("\nExcluded entropy patterns:"), mock.call(f"{rule_1}\n{rule_2}"), ), any_order=False, )
def convert_regexes_to_rules(regexes: Dict[str, Pattern]) -> Dict[str, Rule]: rules: Dict[str, Rule] = {} for regex_name in regexes: rules[regex_name] = Rule(name=regex_name, pattern=regexes[regex_name], path_pattern=None) return rules
def test_all_regex_rules_are_checked(self): rule_1 = mock.MagicMock() rule_1.findall.return_value = [] rule_2 = mock.MagicMock() rule_2.findall.return_value = [] rule_2_path = mock.MagicMock() rule_2_path.match = mock.MagicMock(return_value=["/file/path"]) rule_3 = mock.MagicMock() rule_3_path = mock.MagicMock() rule_3_path.match = mock.MagicMock(return_value=[]) test_scanner = TestScanner(self.options) test_scanner._rules_regexes = { # pylint: disable=protected-access Rule( name="foo", pattern=rule_1, path_pattern=None, re_match_type=MatchType.Match, re_match_scope=None, ), Rule( name="bar", pattern=rule_2, path_pattern=rule_2_path, re_match_type=MatchType.Match, re_match_scope=None, ), Rule( name="not-found", pattern=rule_3, path_pattern=rule_3_path, re_match_type=MatchType.Match, re_match_scope=None, ), } chunk = types.Chunk("foo", "/file/path", {}) list(test_scanner.scan_regex(chunk)) rule_1.findall.assert_called_once_with("foo") rule_2.findall.assert_called_once_with("foo") rule_2_path.match.assert_called_once_with("/file/path") rule_3_path.match.assert_called_once_with("/file/path") rule_3.assert_not_called()
def test_issue_is_not_created_if_signature_is_excluded( self, mock_signature: mock.MagicMock ): mock_signature.return_value = True test_scanner = TestScanner(self.options) test_scanner._rules_regexes = { # pylint: disable=protected-access "foo": Rule(name=None, pattern=re.compile("foo"), path_pattern=None) } chunk = types.Chunk("foo", "bar") issues = test_scanner.scan_regex(chunk) mock_signature.assert_called_once_with("foo", "bar") self.assertEqual(issues, [])
def test_path_is_used(self): rules = config.compile_rules([ { "path-pattern": r"src/.*", "pattern": r"^[a-zA-Z0-9]{26}$" }, { "pattern": r"^[a-zA-Z0-9]test$" }, { "path-pattern": r"src/.*", "pattern": r"^[a-zA-Z0-9]{26}::test$" }, ]) self.assertCountEqual( rules, list({ Rule( None, re.compile(r"^[a-zA-Z0-9]{26}$"), re.compile(r"src/.*"), re_match_type=MatchType.Search, re_match_scope=Scope.Line, ), Rule( None, re.compile(r"^[a-zA-Z0-9]test$"), re.compile(r""), re_match_type=MatchType.Search, re_match_scope=Scope.Line, ), Rule( None, re.compile(r"^[a-zA-Z0-9]{26}::test$"), re.compile(r"src/.*"), re_match_type=MatchType.Search, re_match_scope=Scope.Line, ), }), )
def test_issue_is_returned_if_signature_is_not_excluded( self, mock_signature: mock.MagicMock ): mock_signature.return_value = False test_scanner = TestScanner(self.options) test_scanner._rules_regexes = { # pylint: disable=protected-access "foo": Rule(name=None, pattern=re.compile("foo"), path_pattern=None) } chunk = types.Chunk("foo", "bar") issues = test_scanner.scan_regex(chunk) mock_signature.assert_called_once_with("foo", "bar") self.assertEqual(len(issues), 1) self.assertEqual(issues[0].issue_detail, "foo") self.assertEqual(issues[0].issue_type, types.IssueType.RegEx) self.assertEqual(issues[0].matched_string, "foo")
def test_match_can_contain_delimiter(self): rules = config.compile_rules([ { "pattern": r"^[a-zA-Z0-9]::test$" }, ]) self.assertEqual( rules, [ Rule( None, re.compile(r"^[a-zA-Z0-9]::test$"), re.compile(r""), re_match_type=MatchType.Search, re_match_scope=Scope.Line, ) ], )
def compile_rules(patterns: Iterable[Dict[str, str]]) -> List[Rule]: """Take a list of regex string with paths and compile them into a List of Rule. :param patterns: The list of patterns to be compiled :return: List of Rule objects """ rules: List[Rule] = [] for pattern in patterns: try: match_type = MatchType( pattern.get("match-type", MatchType.Search.value)) except ValueError as exc: raise ConfigException( f"Invalid value for match-type: {pattern.get('match-type')}" ) from exc try: scope = Scope(pattern.get("scope", Scope.Line.value)) except ValueError as exc: raise ConfigException( f"Invalid value for scope: {pattern.get('scope')}") from exc try: rules.append( Rule( name=pattern.get("reason", None), # type: ignore[union-attr] pattern=re.compile( pattern["pattern"]), # type: ignore[index] path_pattern=re.compile(pattern.get( "path-pattern", "")), # type: ignore[union-attr] re_match_type=match_type, re_match_scope=scope, )) except KeyError as exc: raise ConfigException( f"Invalid exclude-entropy-patterns: {patterns}") from exc return rules
def convert_regexes_to_rules(regexes: Dict[str, Pattern]) -> Dict[str, Rule]: return { name: Rule(name=name, pattern=pattern, path_pattern=None) for name, pattern in regexes.items() }
def configure_regexes( include_default: bool = True, rules_files: Optional[Iterable[TextIO]] = None, rule_patterns: Optional[Iterable[Dict[str, str]]] = None, rules_repo: Optional[str] = None, rules_repo_files: Optional[Iterable[str]] = None, ) -> Set[Rule]: """Build a set of regular expressions to be used during a regex scan. :param include_default: Whether to include the built-in set of regexes :param rules_files: A list of files to load rules from :param rule_patterns: A set of previously-collected rules :param rules_repo: A separate git repository to load rules from :param rules_repo_files: A set of patterns used to find files in the rules repo :returns: Set of `Rule` objects to be used for regex scans """ if include_default: with DEFAULT_PATTERN_FILE.open() as handle: rules = load_rules_from_file(handle) else: rules = set() if rule_patterns: try: for pattern in rule_patterns: rule = Rule( name=pattern["reason"], pattern=re.compile(pattern["pattern"]), path_pattern=re.compile(pattern.get("path-pattern", "")), re_match_type=MatchType.Search, re_match_scope=None, ) rules.add(rule) except KeyError as exc: raise ConfigException( f"Invalid rule-pattern; both reason and pattern are required fields. Rule: {pattern}" ) from exc if rules_files: warnings.warn( "Storing rules in a separate file has been deprecated and will be removed " "in a future release. You should be using the 'rule-patterns' config " " option instead.", DeprecationWarning, ) all_files: List[TextIO] = list(rules_files) else: all_files = [] try: cloned_repo = False repo_path = None if rules_repo: repo_path = pathlib.Path(rules_repo) try: if not repo_path.is_dir(): cloned_repo = True except OSError: # pragma: no cover # If a git URL is passed in, Windows will raise an OSError on `is_dir()` cloned_repo = True finally: if cloned_repo: repo_path, _ = util.clone_git_repo(rules_repo) if not rules_repo_files: rules_repo_files = ("*.json", ) for repo_file in rules_repo_files: all_files.extend( [path.open("r") for path in repo_path.glob(repo_file)]) if all_files: for rules_file in all_files: rules.update(load_rules_from_file(rules_file)) finally: if cloned_repo: shutil.rmtree(repo_path, onerror=util.del_rw) # type: ignore return rules