def main( output_handler: OutputHandler, target: List[str], pattern: str, lang: str, configs: List[str], no_rewrite_rule_ids: bool = False, jobs: int = 1, include: Optional[List[str]] = None, exclude: Optional[List[str]] = None, strict: bool = False, autofix: bool = False, dryrun: bool = False, disable_nosem: bool = False, dangerously_allow_arbitrary_code_execution_from_rules: bool = False, no_git_ignore: bool = False, timeout: int = 0, max_memory: int = 0, timeout_threshold: int = 0, skip_unknown_extensions: bool = False, testing: bool = False, ) -> None: if include is None: include = [] if exclude is None: exclude = [] configs_obj, errors = get_config(pattern, lang, configs) all_rules = configs_obj.get_rules(no_rewrite_rule_ids) output_handler.handle_semgrep_errors(errors) if errors and strict: raise SemgrepError( f"run with --strict and there were {len(errors)} errors loading configs", code=MISSING_CONFIG_EXIT_CODE, ) if not pattern: plural = "s" if len(configs_obj.valid) > 1 else "" config_id_if_single = ( list(configs_obj.valid.keys())[0] if len(configs_obj.valid) == 1 else "" ) invalid_msg = ( f"({len(errors)} config files were invalid)" if len(errors) else "" ) logger.debug( f"running {len(all_rules)} rules from {len(configs_obj.valid)} config{plural} {config_id_if_single} {invalid_msg}" ) if len(configs_obj.valid) == 0: raise SemgrepError( f"no valid configuration file found ({len(errors)} configs were invalid)", code=MISSING_CONFIG_EXIT_CODE, ) notify_user_of_work(all_rules, include, exclude) respect_git_ignore = not no_git_ignore target_manager = TargetManager( includes=include, excludes=exclude, targets=target, respect_git_ignore=respect_git_ignore, output_handler=output_handler, skip_unknown_extensions=skip_unknown_extensions, ) # actually invoke semgrep rule_matches_by_rule, debug_steps_by_rule, semgrep_errors, stats_line = CoreRunner( allow_exec=dangerously_allow_arbitrary_code_execution_from_rules, jobs=jobs, timeout=timeout, max_memory=max_memory, timeout_threshold=timeout_threshold, testing=testing, ).invoke_semgrep(target_manager, all_rules) output_handler.handle_semgrep_errors(semgrep_errors) if not disable_nosem: rule_matches_by_rule = { rule: [ rule_match for rule_match in rule_matches if not rule_match_nosem(rule_match, strict) ] for rule, rule_matches in rule_matches_by_rule.items() } output_handler.handle_semgrep_core_output( rule_matches_by_rule, debug_steps_by_rule, stats_line ) if autofix: apply_fixes(rule_matches_by_rule, dryrun)
def main( output_handler: OutputHandler, target: List[str], pattern: str, lang: str, config: str, no_rewrite_rule_ids: bool = False, jobs: int = 1, include: Optional[List[str]] = None, exclude: Optional[List[str]] = None, strict: bool = False, autofix: bool = False, dryrun: bool = False, disable_nosem: bool = False, dangerously_allow_arbitrary_code_execution_from_rules: bool = False, no_git_ignore: bool = False, ) -> None: if include is None: include = [] if exclude is None: exclude = [] valid_configs, config_errors = get_config(pattern, lang, config) output_handler.handle_semgrep_errors(config_errors) if config_errors and strict: raise SemgrepError( f"run with --strict and there were {len(config_errors)} errors loading configs", code=MISSING_CONFIG_EXIT_CODE, ) if not no_rewrite_rule_ids: # re-write the configs to have the hierarchical rule ids valid_configs = rename_rule_ids(valid_configs) # extract just the rules from valid configs all_rules = flatten_configs(valid_configs) if not pattern: plural = "s" if len(valid_configs) > 1 else "" config_id_if_single = (list(valid_configs.keys())[0] if len(valid_configs) == 1 else "") invalid_msg = (f"({len(config_errors)} config files were invalid)" if len(config_errors) else "") debug_print( f"running {len(all_rules)} rules from {len(valid_configs)} config{plural} {config_id_if_single} {invalid_msg}" ) notify_user_of_work(all_rules, include, exclude) if len(valid_configs) == 0: raise SemgrepError( f"no valid configuration file found ({len(config_errors)} configs were invalid)", code=MISSING_CONFIG_EXIT_CODE, ) respect_git_ignore = not no_git_ignore target_manager = TargetManager( includes=include, excludes=exclude, targets=target, respect_git_ignore=respect_git_ignore, ) # actually invoke semgrep rule_matches_by_rule, debug_steps_by_rule, semgrep_core_errors = CoreRunner( allow_exec=dangerously_allow_arbitrary_code_execution_from_rules, jobs=jobs, ).invoke_semgrep(target_manager, all_rules) semgrep_errors = [e.into_semgrep_error() for e in semgrep_core_errors] output_handler.handle_semgrep_errors(semgrep_errors) if not disable_nosem: rule_matches_by_rule = { rule: [ rule_match for rule_match in rule_matches if not rule_match_nosem(rule_match, strict) ] for rule, rule_matches in rule_matches_by_rule.items() } output_handler.handle_semgrep_core_output(rule_matches_by_rule, debug_steps_by_rule) if autofix: apply_fixes(rule_matches_by_rule, dryrun)