def test_multiple_configs(): config1 = dedent(""" rules: - id: rule1 pattern: $X == $X languages: [python] severity: INFO message: bad """) config2 = dedent(""" rules: - id: rule2 pattern: $X == $Y languages: [python] severity: INFO message: good - id: rule3 pattern: $X < $Y languages: [c] severity: INFO message: doog """) with NamedTemporaryFile() as tf1, NamedTemporaryFile() as tf2: tf1.write(config1.encode("utf-8")) tf2.write(config2.encode("utf-8")) tf1.flush() tf2.flush() config_list = [tf1.name, tf2.name] config, errors = Config.from_config_list(config_list) assert not errors rules = config.get_rules(True) assert len(rules) == 3 assert {"rule1", "rule2", "rule3"} == set([rule.id for rule in rules])
def create_config_map(semgrep_config_strings: List[str]) -> Dict[str, Rule]: """ Create a mapping of Semgrep config strings to Rule objects. This will resolve the config strings into their Rule objects, as well. NOTE: this will only use the _first rule_ in the resolved config. TODO: support more than the first rule. """ config = {} for config_string in semgrep_config_strings: resolved = resolve_config(config_string) # Some code-fu to get single rules config.update({ config_string: list(Config._validate(resolved)[0].values())[0][0] }) return config
def test_rules_hash() -> None: config1 = dedent(""" rules: - id: rule1 pattern: $X == $X languages: [python] severity: INFO message: bad - id: rule2 pattern: $X == $Y languages: [python] severity: INFO message: good - id: rule3 pattern: $X < $Y languages: [c] severity: INFO message: doog """) # Load rules with NamedTemporaryFile() as tf1: tf1.write(config1.encode("utf-8")) tf1.flush() config, errors = Config.from_config_list([tf1.name]) assert not errors rules = config.get_rules(True) assert len(rules) == 3 rule1, rule2, rule3 = rules metric_manager.set_rules_hash([rule1]) old_hash = metric_manager._rules_hash metric_manager.set_rules_hash([rule1]) assert old_hash == metric_manager._rules_hash metric_manager.set_rules_hash(rules) old_hash_2 = metric_manager._rules_hash metric_manager.set_rules_hash(rules) assert old_hash_2 == metric_manager._rules_hash assert old_hash != old_hash_2
import logging import sys from semgrep.config_resolver import Config logger = logging.getLogger(__file__) logger.setLevel(logging.INFO) handler = logging.StreamHandler(stream=sys.stderr) handler.setFormatter( logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")) logger.addHandler(handler) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() # Add arguments here parser.add_argument("--config", "-f") parser.add_argument("--verbose", "-v", action="store_true") args = parser.parse_args() if args.verbose: logger.setLevel(logging.DEBUG) logger.info(f"Fetching '{args.config}'") config = Config.from_config_list([args.config])[0] rules = config.get_rules(True) for rule in rules: logger.debug(f"Hashing '{rule.id}'") print(f"{rule.id},{rule.full_hash}")
def test_timings(snapshot) -> None: config1 = dedent(""" rules: - id: rule1 pattern: $X == $X languages: [python] severity: INFO message: bad - id: rule2 pattern: $X == $Y languages: [python] severity: INFO message: good - id: rule3 pattern: $X < $Y languages: [c] severity: INFO message: doog """) # Load rules with NamedTemporaryFile() as tf1: tf1.write(config1.encode("utf-8")) tf1.flush() config, errors = Config.from_config_list([tf1.name]) assert not errors rules = config.get_rules(True) assert len(rules) == 3 rule1, rule2, rule3 = rules # Mock Path().stat().st_size with patch.object(Path, "stat") as stat_mock: m = MagicMock() # Note this mock is a little fragile and assumes st_size is called twice # once in set_run_times then once in set_run_timings and assumes that # it will be called for target[0] then target[1] then target[0] then target[1] type(m).st_size = PropertyMock(side_effect=[1, 2, 1, 2]) stat_mock.return_value = m targets = [Path("a"), Path("b")] profiling_data = ProfilingData() profiling_data.set_run_times( rule1, targets[0], Times(match_time=0.2, run_time=0.4, parse_time=0.3), ) profiling_data.set_run_times( rule2, targets[1], Times(match_time=1.2, run_time=1.4, parse_time=0.2), ) profiling_data.set_rule_parse_time(rule1, 0.05) profiling_data.set_rule_parse_time(rule2, 0.04) metric_manager.set_run_timings(profiling_data, targets, rules) assert metric_manager._rule_stats == [ { "ruleHash": "720c14cd416c021bc45d6db0689dd0eb54d1d062bf9f446f85dae0cb5d1438c0", "parseTime": 0.05, "matchTime": 0.2, "runTime": 0.4, "bytesScanned": 1, }, { "ruleHash": "a5360bb56a3b0a3c33c1bb2b6e7d6465e9a246ccb8940bc05710bc5b35a43e30", "parseTime": 0.04, "matchTime": 1.2, "runTime": 1.4, "bytesScanned": 2, }, { "ruleHash": "2cc5dbc0cae3a8b6af0d8792079251c4d861b5e16815c1b1cdba676d1c96c5a5", "parseTime": 0.0, "matchTime": None, "runTime": None, "bytesScanned": 0, }, ] assert metric_manager._file_stats == [ { "size": 1, "numTimesScanned": 1, "parseTime": 0.3, "matchTime": 0.2, "runTime": 0.4, }, { "size": 2, "numTimesScanned": 1, "parseTime": 0.2, "matchTime": 1.2, "runTime": 1.4, }, ]