def _build_text_output( rule_matches: Sequence[RuleMatch], color_output: bool, per_finding_max_lines_limit: Optional[int], per_line_max_chars_limit: Optional[int], ) -> Iterator[str]: last_file = None last_message = None sorted_rule_matches = sorted(rule_matches, key=lambda r: (r.path, r.id)) for rule_index, rule_match in enumerate(sorted_rule_matches): current_file = rule_match.path check_id = rule_match.id message = rule_match.message fix = rule_match.fix if last_file is None or last_file != current_file: if last_file is not None: yield "" yield with_color("green", str(current_file)) last_message = None # don't display the rule line if the check is empty if ( check_id and check_id != CLI_RULE_ID and (last_message is None or last_message != message) ): severity = rule_match.severity severity_text = f"severity:{severity.value.lower()} " if severity == RuleSeverity.WARNING: severity_text = with_color("yellow", severity_text) elif severity == RuleSeverity.ERROR: severity_text = with_color("red", severity_text) yield f"{severity_text}{with_color('yellow', f'rule:{check_id}: {message}')}" last_file = current_file last_message = message next_rule_match = ( sorted_rule_matches[rule_index + 1] if rule_index != len(sorted_rule_matches) - 1 else None ) if fix: yield f"{with_color('blue', 'autofix:')} {fix}" elif rule_match.fix_regex: fix_regex = rule_match.fix_regex yield f"{with_color('blue', 'autofix:')} s/{fix_regex.get('regex')}/{fix_regex.get('replacement')}/{fix_regex.get('count', 'g')}" is_same_file = ( next_rule_match.path == rule_match.path if next_rule_match else False ) yield from TextFormatter._finding_to_line( rule_match, color_output, per_finding_max_lines_limit, per_line_max_chars_limit, is_same_file, )
def _format_line_number(span: Span, line_number: Optional[int]) -> str: """ Produce a string like: ` 10 |` The amount of padding is set for printing within `span` (so it handles up to `context_end.line`) """ # line numbers are 0 indexed width = ErrorWithSpan._line_number_width(span) if line_number is not None: base_str = str(line_number) assert len(base_str) < width return with_color(Fore.LIGHTBLUE_EX, base_str.ljust(width) + "| ") else: return with_color(Fore.LIGHTBLUE_EX, "".ljust(width) + "| ")
def handle_semgrep_timeout_errors(self, errors: Dict[Path, List[str]]) -> None: self.has_output = True separator = ", " print_threshold_hint = False for path in errors.keys(): num_errs = len(errors[path]) errors[path].sort() error_msg = f"Warning: {num_errs} timeout error(s) in {path} when running the following rules: [{separator.join(errors[path])}]" if num_errs == self.settings.timeout_threshold: error_msg += f"\nSemgrep stopped running rules on {path} after {num_errs} timeout error(s). See `--timeout-threshold` for more info." print_threshold_hint = print_threshold_hint or ( num_errs > 5 and not self.settings.timeout_threshold) logger.error(with_color(colorama.Fore.RED, error_msg)) if print_threshold_hint: logger.error( with_color( colorama.Fore.RED, f"You can use the `--timeout-threshold` flag to set a number of timeouts after which a file will be skipped.", ))
def __str__(self) -> str: """ Format this exception into a pretty string with context and color """ header = f"{with_color('red', 'semgrep ' + self.level.name.lower())}: {self.short_msg}" snippets = [] for span in self.spans: if span.file != "semgrep temp file": location_hint = f" --> {span.file}:{span.start.line}" snippet = [location_hint] else: snippet = [] # all the lines of code in the file this comes from source: List[str] = SourceTracker.source(span.source_hash) # First, print the span from `context_start` to `start` # Next, sprint the focus of the span from `start` to `end` # If the actual span is only 1 line long, use `column` information to highlight the exact problem # Finally, print end context from `end` to `context_end` if span.context_start: snippet += self._format_code_segment( span.context_start, span.start.previous_line(), source, span) snippet += self._format_code_segment(span.start, span.end, source, span) # Currently, only span highlighting if it's a one line span if span.start.line == span.end.line: error = with_color("red", (span.end.col - span.start.col) * "^") snippet.append( self._format_line_number(span, None) + " " * (span.start.col - 1) + error) if span.context_end: snippet += self._format_code_segment(span.end.next_line(), span.context_end, source, span) snippets.append("\n".join(snippet)) snippet_str = "\n".join(snippets) if self.help: help_str = f"= {with_color('cyan', 'help', bold=True)}: {self.help}" else: help_str = "" # TODO remove this when temp files are no longer in error messages if snippet_str == "": snippet_str_with_newline = "" else: snippet_str_with_newline = f"{snippet_str}\n" return f"{header}\n{snippet_str_with_newline}{help_str}\n{with_color('red', self.long_msg or '')}\n"
def _color_line( line: str, line_number: int, start_line: int, start_col: int, end_line: int, end_col: int, ) -> str: start_color = 0 if line_number > start_line else start_col # column offset start_color = max(start_color - 1, 0) end_color = end_col if line_number >= end_line else len(line) + 1 + 1 end_color = max(end_color - 1, 0) line = ( line[:start_color] + with_color( "bright_black", line[start_color : end_color + 1] ) # want the color to include the end_col + line[end_color + 1 :] ) return line
def _finding_to_line( rule_match: RuleMatch, color_output: bool, per_finding_max_lines_limit: Optional[int], per_line_max_chars_limit: Optional[int], show_separator: bool, ) -> Iterator[str]: path = rule_match.path start_line = rule_match.start.line end_line = rule_match.end.line start_col = rule_match.start.col end_col = rule_match.end.col trimmed = 0 stripped = False if path: lines = rule_match.extra.get("fixed_lines") or rule_match.lines if per_finding_max_lines_limit: trimmed = len(lines) - per_finding_max_lines_limit lines = lines[:per_finding_max_lines_limit] for i, line in enumerate(lines): line = line.rstrip() line_number = "" if start_line: if color_output: line = TextFormatter._color_line( line, start_line + i, start_line, start_col, end_line, end_col, ) line_number = with_color("green", f"{start_line + i}") else: line_number = f"{start_line + i}" if ( per_line_max_chars_limit and len(line) > per_line_max_chars_limit ): stripped = True is_first_line = i == 0 if is_first_line: line = ( line[ start_col - 1 : start_col - 1 + per_line_max_chars_limit ] + ELLIPSIS_STRING ) if start_col > 1: line = ELLIPSIS_STRING + line else: line = line[:per_line_max_chars_limit] + ELLIPSIS_STRING # while stripping a string, the ANSI code for resetting color might also get stripped. line = line + colorama.Style.RESET_ALL yield f"{line_number}:{line}" if line_number else f"{line}" if stripped: yield f"[Shortened a long line from output, adjust with {MAX_CHARS_FLAG_NAME}]" trimmed_str = ( f" [hid {trimmed} additional lines, adjust with {MAX_LINES_FLAG_NAME}] " ) if per_finding_max_lines_limit != 1: if trimmed > 0: yield trimmed_str.center(BREAK_LINE_WIDTH, BREAK_LINE_CHAR) elif show_separator: yield BREAK_LINE
def __str__(self) -> str: msg = f"Warning: Semgrep encountered a lexical error when running {self.rule_id} on {self.path}. Please ensure this is valid code." return with_color(Fore.RED, msg)
def __str__(self) -> str: msg = f"Warning: Semgrep exceeded memory when running {self.rule_id} on {self.path}. See `--max-memory` for more info." return with_color(Fore.RED, msg)
def __str__(self) -> str: msg = f"Warning: Semgrep exceeded number of matches when running {self.rule_id} on {self.path}." return with_color(Fore.RED, msg)
def cli( autofix: bool, config: Optional[Tuple[str, ...]], dangerously_allow_arbitrary_code_execution_from_rules: bool, debug: bool, debugging_json: bool, dryrun: bool, dump_ast: bool, emacs: bool, enable_metrics: bool, enable_nosem: bool, enable_version_check: bool, error_on_findings: bool, exclude: Optional[Tuple[str, ...]], force_color: bool, generate_config: bool, include: Optional[Tuple[str, ...]], jobs: int, json: bool, json_stats: bool, json_time: bool, junit_xml: bool, lang: Optional[str], max_chars_per_line: int, max_lines_per_finding: int, max_memory: int, max_target_bytes: int, optimizations: str, output: Optional[str], pattern: Optional[str], quiet: bool, replacement: Optional[str], rewrite_rule_ids: bool, sarif: bool, save_test_output_tar: bool, scan_unknown_extensions: bool, severity: Optional[Tuple[str, ...]], strict: bool, synthesize_patterns: str, target: Tuple[str, ...], test: bool, test_ignore_todo: bool, time: bool, timeout: int, timeout_threshold: int, use_git_ignore: bool, validate: bool, verbose: bool, version: bool, vim: bool, ) -> None: """ Semgrep CLI. Searches TARGET paths for matches to rules or patterns. Defaults to searching entire current working directory. For more information about Semgrep, go to https://semgrep.dev. """ if version: print(__VERSION__) if enable_version_check: from semgrep.version import version_check version_check() return # To keep version runtime fast, we defer non-version imports until here import semgrep.semgrep_main import semgrep.test import semgrep.config_resolver from semgrep.constants import OutputFormat from semgrep.constants import DEFAULT_CONFIG_FILE from semgrep.dump_ast import dump_parsed_ast from semgrep.error import SemgrepError from semgrep.metric_manager import metric_manager from semgrep.output import managed_output from semgrep.output import OutputSettings from semgrep.synthesize_patterns import synthesize from semgrep.target_manager import optional_stdin_target target_sequence: Sequence[str] = list(target) if target else [os.curdir] if enable_metrics: metric_manager.enable() else: metric_manager.disable() if include and exclude: logger.warning( with_color( "yellow", "Paths that match both --include and --exclude will be skipped by Semgrep.", )) if pattern is not None and lang is None: abort("-e/--pattern and -l/--lang must both be specified") if dangerously_allow_arbitrary_code_execution_from_rules: logger.warning( "The '--dangerously-allow-arbitrary-code-execution-from-rules' flag is now deprecated and does nothing. It will be removed in the future." ) output_time = time or json_time # set the flags semgrep.util.set_flags(verbose=verbose, debug=debug, quiet=quiet, force_color=force_color) # change cwd if using docker try: semgrep.config_resolver.adjust_for_docker() except SemgrepError as e: logger.exception(str(e)) raise e output_format = OutputFormat.TEXT if json or json_time or debugging_json: output_format = OutputFormat.JSON elif junit_xml: output_format = OutputFormat.JUNIT_XML elif sarif: output_format = OutputFormat.SARIF elif emacs: output_format = OutputFormat.EMACS elif vim: output_format = OutputFormat.VIM output_settings = OutputSettings( output_format=output_format, output_destination=output, error_on_findings=error_on_findings, strict=strict, debug=debugging_json, verbose_errors=verbose, timeout_threshold=timeout_threshold, json_stats=json_stats, output_time=output_time, output_per_finding_max_lines_limit=max_lines_per_finding, output_per_line_max_chars_limit=max_chars_per_line, ) if test: # the test code (which isn't a "test" per se but is actually machinery to evaluate semgrep performance) # uses managed_output internally semgrep.test.test_main( target=target_sequence, config=config, test_ignore_todo=test_ignore_todo, strict=strict, json=json, save_test_output_tar=save_test_output_tar, optimizations=optimizations, ) # The 'optional_stdin_target' context manager must remain before # 'managed_output'. Output depends on file contents so we cannot have # already deleted the temporary stdin file. with optional_stdin_target( target_sequence) as target_sequence, managed_output( output_settings) as output_handler: if dump_ast: dump_parsed_ast(json, __validate_lang("--dump_ast", lang), pattern, target_sequence) elif synthesize_patterns: synthesize( __validate_lang("--synthesize-patterns", lang), synthesize_patterns, target_sequence, ) elif validate: configs, config_errors = semgrep.config_resolver.get_config( pattern, lang, config or []) valid_str = "invalid" if config_errors else "valid" rule_count = len(configs.get_rules(True)) logger.info( f"Configuration is {valid_str} - found {len(configs.valid)} valid configuration(s), {len(config_errors)} configuration error(s), and {rule_count} rule(s)." ) if config_errors: for err in config_errors: output_handler.handle_semgrep_error(err) raise SemgrepError( "Please fix the above errors and try again.") elif generate_config: with open(DEFAULT_CONFIG_FILE, "w") as fd: semgrep.config_resolver.generate_config(fd, lang, pattern) else: semgrep.semgrep_main.main( output_handler=output_handler, target=target_sequence, pattern=pattern, lang=lang, configs=(config or []), no_rewrite_rule_ids=(not rewrite_rule_ids), jobs=jobs, include=include, exclude=exclude, max_target_bytes=max_target_bytes, replacement=replacement, strict=strict, autofix=autofix, dryrun=dryrun, disable_nosem=(not enable_nosem), no_git_ignore=(not use_git_ignore), timeout=timeout, max_memory=max_memory, timeout_threshold=timeout_threshold, skip_unknown_extensions=(not scan_unknown_extensions), severity=severity, optimizations=optimizations, ) if enable_version_check: from semgrep.version import version_check version_check()
def __str__(self) -> str: msg = f"Rule id: {self.rule_id} contains a pattern that could not be parsed as a pattern for language {self.language}: `{self.pattern}`" return with_color(Fore.RED, msg)
def __str__(self) -> str: return with_color("red", self._error_message) + with_color( "white", self._stack_trace)
def __str__(self) -> str: return with_color( Fore.RED, f"semgrep-core reported a fatal error:\n-----\n{self.msg}-----\nPlease file a bug report at https://github.com/returntocorp/semgrep/issues/new/choose", )
def __str__(self) -> str: # "MatchingError" -> "matching error" error_id = " ".join(re.sub("([A-Z]+)", r" \1", self.check_id).split()).lower() return with_color( Fore.YELLOW, f"semgrep-core reported a {error_id}\n --> {self.msg}" )