Exemple #1
0
    def _build_text_output(
        rule_matches: Sequence[RuleMatch],
        color_output: bool,
        per_finding_max_lines_limit: Optional[int],
        per_line_max_chars_limit: Optional[int],
    ) -> Iterator[str]:

        last_file = None
        last_message = None
        sorted_rule_matches = sorted(rule_matches, key=lambda r: (r.path, r.id))
        for rule_index, rule_match in enumerate(sorted_rule_matches):

            current_file = rule_match.path
            check_id = rule_match.id
            message = rule_match.message
            fix = rule_match.fix
            if last_file is None or last_file != current_file:
                if last_file is not None:
                    yield ""
                yield with_color("green", str(current_file))
                last_message = None
            # don't display the rule line if the check is empty
            if (
                check_id
                and check_id != CLI_RULE_ID
                and (last_message is None or last_message != message)
            ):
                severity = rule_match.severity
                severity_text = f"severity:{severity.value.lower()} "
                if severity == RuleSeverity.WARNING:
                    severity_text = with_color("yellow", severity_text)
                elif severity == RuleSeverity.ERROR:
                    severity_text = with_color("red", severity_text)
                yield f"{severity_text}{with_color('yellow', f'rule:{check_id}: {message}')}"

            last_file = current_file
            last_message = message
            next_rule_match = (
                sorted_rule_matches[rule_index + 1]
                if rule_index != len(sorted_rule_matches) - 1
                else None
            )

            if fix:
                yield f"{with_color('blue', 'autofix:')} {fix}"
            elif rule_match.fix_regex:
                fix_regex = rule_match.fix_regex
                yield f"{with_color('blue', 'autofix:')} s/{fix_regex.get('regex')}/{fix_regex.get('replacement')}/{fix_regex.get('count', 'g')}"

            is_same_file = (
                next_rule_match.path == rule_match.path if next_rule_match else False
            )
            yield from TextFormatter._finding_to_line(
                rule_match,
                color_output,
                per_finding_max_lines_limit,
                per_line_max_chars_limit,
                is_same_file,
            )
Exemple #2
0
    def _format_line_number(span: Span, line_number: Optional[int]) -> str:
        """
        Produce a string like:
        ` 10 |`

        The amount of padding is set for printing within `span` (so it handles up to `context_end.line`)
        """
        # line numbers are 0 indexed
        width = ErrorWithSpan._line_number_width(span)
        if line_number is not None:
            base_str = str(line_number)
            assert len(base_str) < width
            return with_color(Fore.LIGHTBLUE_EX, base_str.ljust(width) + "| ")
        else:
            return with_color(Fore.LIGHTBLUE_EX, "".ljust(width) + "| ")
Exemple #3
0
    def handle_semgrep_timeout_errors(self, errors: Dict[Path,
                                                         List[str]]) -> None:
        self.has_output = True
        separator = ", "
        print_threshold_hint = False
        for path in errors.keys():
            num_errs = len(errors[path])
            errors[path].sort()
            error_msg = f"Warning: {num_errs} timeout error(s) in {path} when running the following rules: [{separator.join(errors[path])}]"
            if num_errs == self.settings.timeout_threshold:
                error_msg += f"\nSemgrep stopped running rules on {path} after {num_errs} timeout error(s). See `--timeout-threshold` for more info."
            print_threshold_hint = print_threshold_hint or (
                num_errs > 5 and not self.settings.timeout_threshold)
            logger.error(with_color(colorama.Fore.RED, error_msg))

        if print_threshold_hint:
            logger.error(
                with_color(
                    colorama.Fore.RED,
                    f"You can use the `--timeout-threshold` flag to set a number of timeouts after which a file will be skipped.",
                ))
Exemple #4
0
    def __str__(self) -> str:
        """
        Format this exception into a pretty string with context and color
        """
        header = f"{with_color('red', 'semgrep ' + self.level.name.lower())}: {self.short_msg}"
        snippets = []
        for span in self.spans:
            if span.file != "semgrep temp file":
                location_hint = f"  --> {span.file}:{span.start.line}"
                snippet = [location_hint]
            else:
                snippet = []

            # all the lines of code in the file this comes from
            source: List[str] = SourceTracker.source(span.source_hash)

            # First, print the span from `context_start` to `start`
            # Next, sprint the focus of the span from `start` to `end`
            # If the actual span is only 1 line long, use `column` information to highlight the exact problem
            # Finally, print end context from `end` to `context_end`
            if span.context_start:
                snippet += self._format_code_segment(
                    span.context_start, span.start.previous_line(), source,
                    span)
            snippet += self._format_code_segment(span.start, span.end, source,
                                                 span)
            # Currently, only span highlighting if it's a one line span
            if span.start.line == span.end.line:
                error = with_color("red",
                                   (span.end.col - span.start.col) * "^")
                snippet.append(
                    self._format_line_number(span, None) + " " *
                    (span.start.col - 1) + error)
            if span.context_end:
                snippet += self._format_code_segment(span.end.next_line(),
                                                     span.context_end, source,
                                                     span)

            snippets.append("\n".join(snippet))
        snippet_str = "\n".join(snippets)
        if self.help:
            help_str = f"= {with_color('cyan', 'help', bold=True)}: {self.help}"
        else:
            help_str = ""

        # TODO remove this when temp files are no longer in error messages
        if snippet_str == "":
            snippet_str_with_newline = ""
        else:
            snippet_str_with_newline = f"{snippet_str}\n"
        return f"{header}\n{snippet_str_with_newline}{help_str}\n{with_color('red', self.long_msg or '')}\n"
Exemple #5
0
 def _color_line(
     line: str,
     line_number: int,
     start_line: int,
     start_col: int,
     end_line: int,
     end_col: int,
 ) -> str:
     start_color = 0 if line_number > start_line else start_col
     # column offset
     start_color = max(start_color - 1, 0)
     end_color = end_col if line_number >= end_line else len(line) + 1 + 1
     end_color = max(end_color - 1, 0)
     line = (
         line[:start_color]
         + with_color(
             "bright_black", line[start_color : end_color + 1]
         )  # want the color to include the end_col
         + line[end_color + 1 :]
     )
     return line
Exemple #6
0
    def _finding_to_line(
        rule_match: RuleMatch,
        color_output: bool,
        per_finding_max_lines_limit: Optional[int],
        per_line_max_chars_limit: Optional[int],
        show_separator: bool,
    ) -> Iterator[str]:
        path = rule_match.path
        start_line = rule_match.start.line
        end_line = rule_match.end.line
        start_col = rule_match.start.col
        end_col = rule_match.end.col
        trimmed = 0
        stripped = False
        if path:
            lines = rule_match.extra.get("fixed_lines") or rule_match.lines
            if per_finding_max_lines_limit:
                trimmed = len(lines) - per_finding_max_lines_limit
                lines = lines[:per_finding_max_lines_limit]

            for i, line in enumerate(lines):
                line = line.rstrip()
                line_number = ""
                if start_line:
                    if color_output:
                        line = TextFormatter._color_line(
                            line,
                            start_line + i,
                            start_line,
                            start_col,
                            end_line,
                            end_col,
                        )
                        line_number = with_color("green", f"{start_line + i}")
                    else:
                        line_number = f"{start_line + i}"

                    if (
                        per_line_max_chars_limit
                        and len(line) > per_line_max_chars_limit
                    ):
                        stripped = True
                        is_first_line = i == 0
                        if is_first_line:
                            line = (
                                line[
                                    start_col
                                    - 1 : start_col
                                    - 1
                                    + per_line_max_chars_limit
                                ]
                                + ELLIPSIS_STRING
                            )
                            if start_col > 1:
                                line = ELLIPSIS_STRING + line
                        else:
                            line = line[:per_line_max_chars_limit] + ELLIPSIS_STRING
                        # while stripping a string, the ANSI code for resetting color might also get stripped.
                        line = line + colorama.Style.RESET_ALL

                yield f"{line_number}:{line}" if line_number else f"{line}"

            if stripped:
                yield f"[Shortened a long line from output, adjust with {MAX_CHARS_FLAG_NAME}]"
            trimmed_str = (
                f" [hid {trimmed} additional lines, adjust with {MAX_LINES_FLAG_NAME}] "
            )
            if per_finding_max_lines_limit != 1:
                if trimmed > 0:
                    yield trimmed_str.center(BREAK_LINE_WIDTH, BREAK_LINE_CHAR)
                elif show_separator:
                    yield BREAK_LINE
Exemple #7
0
 def __str__(self) -> str:
     msg = f"Warning: Semgrep encountered a lexical error when running {self.rule_id} on {self.path}. Please ensure this is valid code."
     return with_color(Fore.RED, msg)
Exemple #8
0
 def __str__(self) -> str:
     msg = f"Warning: Semgrep exceeded memory when running {self.rule_id} on {self.path}. See `--max-memory` for more info."
     return with_color(Fore.RED, msg)
Exemple #9
0
 def __str__(self) -> str:
     msg = f"Warning: Semgrep exceeded number of matches when running {self.rule_id} on {self.path}."
     return with_color(Fore.RED, msg)
Exemple #10
0
def cli(
    autofix: bool,
    config: Optional[Tuple[str, ...]],
    dangerously_allow_arbitrary_code_execution_from_rules: bool,
    debug: bool,
    debugging_json: bool,
    dryrun: bool,
    dump_ast: bool,
    emacs: bool,
    enable_metrics: bool,
    enable_nosem: bool,
    enable_version_check: bool,
    error_on_findings: bool,
    exclude: Optional[Tuple[str, ...]],
    force_color: bool,
    generate_config: bool,
    include: Optional[Tuple[str, ...]],
    jobs: int,
    json: bool,
    json_stats: bool,
    json_time: bool,
    junit_xml: bool,
    lang: Optional[str],
    max_chars_per_line: int,
    max_lines_per_finding: int,
    max_memory: int,
    max_target_bytes: int,
    optimizations: str,
    output: Optional[str],
    pattern: Optional[str],
    quiet: bool,
    replacement: Optional[str],
    rewrite_rule_ids: bool,
    sarif: bool,
    save_test_output_tar: bool,
    scan_unknown_extensions: bool,
    severity: Optional[Tuple[str, ...]],
    strict: bool,
    synthesize_patterns: str,
    target: Tuple[str, ...],
    test: bool,
    test_ignore_todo: bool,
    time: bool,
    timeout: int,
    timeout_threshold: int,
    use_git_ignore: bool,
    validate: bool,
    verbose: bool,
    version: bool,
    vim: bool,
) -> None:
    """
    Semgrep CLI. Searches TARGET paths for matches to rules or patterns. Defaults to searching entire current working directory.

    For more information about Semgrep, go to https://semgrep.dev.
    """

    if version:
        print(__VERSION__)
        if enable_version_check:
            from semgrep.version import version_check

            version_check()
        return

    # To keep version runtime fast, we defer non-version imports until here
    import semgrep.semgrep_main
    import semgrep.test
    import semgrep.config_resolver
    from semgrep.constants import OutputFormat
    from semgrep.constants import DEFAULT_CONFIG_FILE
    from semgrep.dump_ast import dump_parsed_ast
    from semgrep.error import SemgrepError
    from semgrep.metric_manager import metric_manager
    from semgrep.output import managed_output
    from semgrep.output import OutputSettings
    from semgrep.synthesize_patterns import synthesize
    from semgrep.target_manager import optional_stdin_target

    target_sequence: Sequence[str] = list(target) if target else [os.curdir]

    if enable_metrics:
        metric_manager.enable()
    else:
        metric_manager.disable()

    if include and exclude:
        logger.warning(
            with_color(
                "yellow",
                "Paths that match both --include and --exclude will be skipped by Semgrep.",
            ))

    if pattern is not None and lang is None:
        abort("-e/--pattern and -l/--lang must both be specified")

    if dangerously_allow_arbitrary_code_execution_from_rules:
        logger.warning(
            "The '--dangerously-allow-arbitrary-code-execution-from-rules' flag is now deprecated and does nothing. It will be removed in the future."
        )

    output_time = time or json_time

    # set the flags
    semgrep.util.set_flags(verbose=verbose,
                           debug=debug,
                           quiet=quiet,
                           force_color=force_color)

    # change cwd if using docker
    try:
        semgrep.config_resolver.adjust_for_docker()
    except SemgrepError as e:
        logger.exception(str(e))
        raise e

    output_format = OutputFormat.TEXT
    if json or json_time or debugging_json:
        output_format = OutputFormat.JSON
    elif junit_xml:
        output_format = OutputFormat.JUNIT_XML
    elif sarif:
        output_format = OutputFormat.SARIF
    elif emacs:
        output_format = OutputFormat.EMACS
    elif vim:
        output_format = OutputFormat.VIM

    output_settings = OutputSettings(
        output_format=output_format,
        output_destination=output,
        error_on_findings=error_on_findings,
        strict=strict,
        debug=debugging_json,
        verbose_errors=verbose,
        timeout_threshold=timeout_threshold,
        json_stats=json_stats,
        output_time=output_time,
        output_per_finding_max_lines_limit=max_lines_per_finding,
        output_per_line_max_chars_limit=max_chars_per_line,
    )

    if test:
        # the test code (which isn't a "test" per se but is actually machinery to evaluate semgrep performance)
        # uses managed_output internally
        semgrep.test.test_main(
            target=target_sequence,
            config=config,
            test_ignore_todo=test_ignore_todo,
            strict=strict,
            json=json,
            save_test_output_tar=save_test_output_tar,
            optimizations=optimizations,
        )

    # The 'optional_stdin_target' context manager must remain before
    # 'managed_output'. Output depends on file contents so we cannot have
    # already deleted the temporary stdin file.
    with optional_stdin_target(
            target_sequence) as target_sequence, managed_output(
                output_settings) as output_handler:
        if dump_ast:
            dump_parsed_ast(json, __validate_lang("--dump_ast", lang), pattern,
                            target_sequence)
        elif synthesize_patterns:
            synthesize(
                __validate_lang("--synthesize-patterns", lang),
                synthesize_patterns,
                target_sequence,
            )
        elif validate:
            configs, config_errors = semgrep.config_resolver.get_config(
                pattern, lang, config or [])
            valid_str = "invalid" if config_errors else "valid"
            rule_count = len(configs.get_rules(True))
            logger.info(
                f"Configuration is {valid_str} - found {len(configs.valid)} valid configuration(s), {len(config_errors)} configuration error(s), and {rule_count} rule(s)."
            )
            if config_errors:
                for err in config_errors:
                    output_handler.handle_semgrep_error(err)
                raise SemgrepError(
                    "Please fix the above errors and try again.")
        elif generate_config:
            with open(DEFAULT_CONFIG_FILE, "w") as fd:
                semgrep.config_resolver.generate_config(fd, lang, pattern)
        else:
            semgrep.semgrep_main.main(
                output_handler=output_handler,
                target=target_sequence,
                pattern=pattern,
                lang=lang,
                configs=(config or []),
                no_rewrite_rule_ids=(not rewrite_rule_ids),
                jobs=jobs,
                include=include,
                exclude=exclude,
                max_target_bytes=max_target_bytes,
                replacement=replacement,
                strict=strict,
                autofix=autofix,
                dryrun=dryrun,
                disable_nosem=(not enable_nosem),
                no_git_ignore=(not use_git_ignore),
                timeout=timeout,
                max_memory=max_memory,
                timeout_threshold=timeout_threshold,
                skip_unknown_extensions=(not scan_unknown_extensions),
                severity=severity,
                optimizations=optimizations,
            )

    if enable_version_check:
        from semgrep.version import version_check

        version_check()
Exemple #11
0
 def __str__(self) -> str:
     msg = f"Rule id: {self.rule_id} contains a pattern that could not be parsed as a pattern for language {self.language}: `{self.pattern}`"
     return with_color(Fore.RED, msg)
Exemple #12
0
 def __str__(self) -> str:
     return with_color("red", self._error_message) + with_color(
         "white", self._stack_trace)
Exemple #13
0
 def __str__(self) -> str:
     return with_color(
         Fore.RED,
         f"semgrep-core reported a fatal error:\n-----\n{self.msg}-----\nPlease file a bug report at https://github.com/returntocorp/semgrep/issues/new/choose",
     )
Exemple #14
0
 def __str__(self) -> str:
     # "MatchingError" -> "matching error"
     error_id = " ".join(re.sub("([A-Z]+)", r" \1", self.check_id).split()).lower()
     return with_color(
         Fore.YELLOW, f"semgrep-core reported a {error_id}\n  --> {self.msg}"
     )