def _run_rules( self, rules: List[Rule], target_manager: TargetManager ) -> Tuple[Dict[Rule, List[RuleMatch]], Dict[Rule, List[Dict[str, Any]]], List[SemgrepError], int, ]: findings_by_rule: Dict[Rule, List[RuleMatch]] = {} debugging_steps_by_rule: Dict[Rule, List[Dict[str, Any]]] = {} all_errors: List[SemgrepError] = [] file_timeouts: Dict[Path, int] = collections.defaultdict(lambda: 0) max_timeout_files: List[Path] = [] all_targets: Set[Path] = set() # cf. for bar_format: https://tqdm.github.io/docs/tqdm/ with tempfile.TemporaryDirectory() as semgrep_core_ast_cache_dir: for rule in progress_bar( rules, bar_format="{l_bar}{bar}|{n_fmt}/{total_fmt}"): debug_tqdm_write(f"Running rule {rule._raw.get('id')}...") rule_matches, debugging_steps, errors, rule_targets = self._run_rule( rule, target_manager, semgrep_core_ast_cache_dir, max_timeout_files) all_targets = all_targets.union(rule_targets) findings_by_rule[rule] = rule_matches debugging_steps_by_rule[rule] = debugging_steps all_errors.extend(errors) for err in errors: if isinstance(err, MatchTimeoutError): file_timeouts[err.path] += 1 if (self._timeout_threshold != 0 and file_timeouts[err.path] >= self._timeout_threshold): max_timeout_files.append(err.path) all_errors = dedup_errors(all_errors) return findings_by_rule, debugging_steps_by_rule, all_errors, len( all_targets)
def _run_rules( self, rules: List[Rule], target_manager: TargetManager ) -> Tuple[Dict[Rule, List[RuleMatch]], Dict[Rule, List[Dict[str, Any]]], List[CoreException], ]: findings_by_rule: Dict[Rule, List[RuleMatch]] = {} debugging_steps_by_rule: Dict[Rule, List[Dict[str, Any]]] = {} all_errors: List[CoreException] = [] # cf. for bar_format: https://tqdm.github.io/docs/tqdm/ with tempfile.TemporaryDirectory() as semgrep_core_ast_cache_dir: for rule in progress_bar( rules, bar_format="{l_bar}{bar}|{n_fmt}/{total_fmt}"): debug_tqdm_write(f"Running rule {rule._raw.get('id')}") rule_matches, debugging_steps, errors = self._run_rule( rule, target_manager, semgrep_core_ast_cache_dir) findings_by_rule[rule] = rule_matches debugging_steps_by_rule[rule] = debugging_steps all_errors.extend(errors) all_errors = dedup_errors(all_errors) return findings_by_rule, debugging_steps_by_rule, all_errors
def _run_rules_direct_to_semgrep_core( self, rules: List[Rule], target_manager: TargetManager, profiler: ProfileManager, ) -> Tuple[Dict[Rule, List[RuleMatch]], Dict[Rule, List[Any]], List[SemgrepError], Set[Path], ProfilingData, ]: logger.debug(f"Passing whole rules directly to semgrep_core") outputs: Dict[Rule, List[RuleMatch]] = collections.defaultdict(list) errors: List[SemgrepError] = [] all_targets: Set[Path] = set() file_timeouts: Dict[Path, int] = collections.defaultdict(lambda: 0) max_timeout_files: Set[Path] = set() profiling_data: ProfilingData = ProfilingData() # cf. for bar_format: https://tqdm.github.io/docs/tqdm/ with tempfile.TemporaryDirectory() as semgrep_core_ast_cache_dir: for rule in progress_bar( rules, bar_format="{l_bar}{bar}|{n_fmt}/{total_fmt}"): for language in rule.languages: debug_tqdm_write(f"Running rule {rule.id}...") with tempfile.NamedTemporaryFile( "w", suffix=".yaml" ) as rule_file, tempfile.NamedTemporaryFile( "w") as target_file, tempfile.NamedTemporaryFile( "w") as equiv_file: targets = self.get_files_for_language( language, rule, target_manager) targets = [ target for target in targets if target not in max_timeout_files ] # opti: no need to call semgrep-core if no target files if not targets: continue all_targets = all_targets.union(targets) target_file.write("\n".join( map(lambda p: str(p), targets))) target_file.flush() yaml = YAML() yaml.dump({"rules": [rule._raw]}, rule_file) rule_file.flush() cmd = [SEMGREP_PATH] + [ "-lang", language.value, "-json", "-config", rule_file.name, "-j", str(self._jobs), "-target_file", target_file.name, "-use_parsing_cache", semgrep_core_ast_cache_dir, "-timeout", str(self._timeout), "-max_memory", str(self._max_memory), "-json_time", ] if self._optimizations != "none": cmd.append("-fast") stderr: Optional[int] = subprocess.PIPE if is_debug(): cmd += ["-debug"] stderr = None core_run = sub_run(cmd, stdout=subprocess.PIPE, stderr=stderr) output_json = self._extract_core_output(rule, core_run) if "time" in output_json: self._add_match_times(rule, profiling_data, output_json["time"]) # end with tempfile.NamedTemporaryFile(...) ... pattern_matches = [ PatternMatch(match) for match in output_json["matches"] ] findings = create_output(rule, pattern_matches) findings = dedup_output(findings) outputs[rule].extend(findings) parsed_errors = [ CoreException.from_json(e, language.value, rule.id).into_semgrep_error() for e in output_json["errors"] ] for err in parsed_errors: if isinstance(err, MatchTimeoutError): file_timeouts[err.path] += 1 if (self._timeout_threshold != 0 and file_timeouts[err.path] >= self._timeout_threshold): max_timeout_files.add(err.path) errors.extend(parsed_errors) # end for language ... # end for rule ... return outputs, {}, errors, all_targets, profiling_data