Example #1
0
    def from_json(  # type: ignore
            cls, json_obj: Dict[str, Any], language: str,
            rule_id: str) -> "CoreException":
        if {"check_id", "path", "start", "end", "extra"}.difference(
                json_obj.keys()) != set():
            raise ValueError(f"cannot parse {json_obj} as {cls.__name__}")

        start = json_obj["start"]
        end = json_obj["end"]
        if ("line" not in start or "col" not in start or "line" not in end
                or "col" not in end):
            raise ValueError(f"cannot parse {json_obj} as {cls.__name__}")

        start_pos = Position(start["line"], start["col"])
        end_pos = Position(end["line"], end["col"])

        # Semgrep-Core caches TimeoutErrors as FatalErrors
        # Hack to treat these as TimeoutErrors
        check_id = json_obj["check_id"]
        if check_id == "FatalError" and "Timeout" in json_obj["extra"][
                "message"]:
            check_id = "Timeout"

        return cls(
            check_id,
            Path(json_obj["path"]),
            start_pos,
            end_pos,
            json_obj["extra"],
            language,
            rule_id,
        )
Example #2
0
    def from_json(  # type: ignore
            cls, json_obj: Dict[str, Any], language: str) -> "CoreException":
        if {"check_id", "path", "start", "end", "extra"}.difference(
                json_obj.keys()) != set():
            raise ValueError(f"cannot parse {json_obj} as {cls.__name__}")

        start = json_obj["start"]
        end = json_obj["end"]
        if ("line" not in start or "col" not in start or "line" not in end
                or "col" not in end):
            raise ValueError(f"cannot parse {json_obj} as {cls.__name__}")

        start_pos = Position(start["line"], start["col"])
        end_pos = Position(end["line"], end["col"])

        return cls(
            json_obj["check_id"],
            Path(json_obj["path"]),
            start_pos,
            end_pos,
            json_obj["extra"],
            language,
        )
Example #3
0
def test_span_tracking():
    data = parse_yaml_preserve_spans(test_yaml, Path("filename"))

    def test_span(start: Position, end: Position) -> Span:
        return attr.evolve(data.span, start=start, end=end)

    # basic spans
    assert data.span == test_span(
        start=Position(line=2, col=1),
        end=Position(line=10, col=1),
    )

    # values act like dictionaries
    assert data.value["a"].span == test_span(
        start=Position(line=3, col=3),
        end=Position(line=10, col=1),
    )

    # values act like lists
    assert data.value["a"].value[1].span == test_span(
        start=Position(line=4, col=5),
        end=Position(line=4, col=6),
    )

    assert data.value["a"].value[1].value == 2

    # spans are also attached to keys
    kvs = list(data.value.items())
    key, value = kvs[0]
    assert key.span == test_span(
        start=Position(line=2, col=1),
        end=Position(line=2, col=2),
    )

    # unrolling is equivalent
    assert data.unroll() == parse_yaml(test_yaml)
Example #4
0
def run_spacegrep(rule_id: str, patterns: List[Pattern], targets: List[Path],
                  timeout: int) -> dict:
    matches: List[dict] = []
    errors: List[dict] = []
    for pattern in patterns:
        if not isinstance(pattern._pattern, str):
            raise NotImplementedError(
                f"Support for {type(pattern._pattern)} has not been implemented yet."
            )
        pattern_str = pattern._pattern  # TODO: Handle pattern Dict
        for target in targets:
            cmd = [
                SPACEGREP_PATH,
                "--output-format",
                "semgrep",
                "-d",
                str(target),
                pattern_str,
                "--timeout",
                str(timeout),
            ]
            try:
                p = sub_run(cmd,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
                # exit code 3 indicates a timeout. See 'spacegrep --help'.
                if p.returncode == 3:
                    err = CoreException(
                        check_id="Timeout",
                        path=target,
                        start=Position(0, 0),
                        end=Position(0, 0),
                        extra={
                            "message": "spacegrep timeout",
                            "line": "",
                        },
                        language="generic",
                        rule_id=rule_id,
                    ).to_dict()
                    errors.append(err)
                else:
                    p.check_returncode()
                    raw_output = p.stdout

                    output_json = _parse_spacegrep_output(raw_output)
                    output_json["matches"] = _patch_id(
                        pattern, output_json.get("matches", []))

                    matches.extend(output_json["matches"])
                    errors.extend(output_json["errors"])
            except subprocess.CalledProcessError as e:
                raw_error = p.stderr
                spacegrep_error_text = raw_error.decode("utf-8",
                                                        errors="replace")
                raise SemgrepError(
                    f"Error running spacegrep on file {target}: Process error: {e}\n\nspacegrep error: {spacegrep_error_text}"
                )
            except json.JSONDecodeError as e:
                raise SemgrepError(
                    f"Could not parse spacegrep output as JSON: JSON error: {e}"
                )
            except KeyError as e:
                raise SemgrepError(
                    f"Invalid JSON output was received from spacegrep: {e}")

    return {
        "matches": matches,
        "errors": errors,
    }
Example #5
0
def run_spacegrep(
    rule_id: str,
    patterns: List[Pattern],
    targets: List[Path],
    timeout: int,
) -> dict:
    matches: List[dict] = []
    errors: List[dict] = []
    targets_time: Dict[str, Tuple[float, float, float]] = {}
    for pattern in patterns:
        if not isinstance(pattern._pattern, str):
            raise NotImplementedError(
                f"Support for {type(pattern._pattern)} has not been implemented yet."
            )
        pattern_str = pattern._pattern  # TODO: Handle pattern Dict
        for target in targets:
            cmd = [
                SPACEGREP_PATH,
                "--output-format",
                "semgrep",
                "-d",
                str(target),
                pattern_str,
                "--timeout",
                str(timeout),
                "--time",
            ]

            try:
                p = sub_run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                # exit code 3 indicates a timeout. See 'spacegrep --help'.
                if p.returncode == 3:
                    err = CoreException(
                        check_id="Timeout",
                        path=target,
                        start=Position(0, 0),
                        end=Position(0, 0),
                        extra={
                            "message": "spacegrep timeout",
                            "line": "",
                        },
                        language="generic",
                        rule_id=rule_id,
                    ).to_dict()
                    errors.append(err)
                else:
                    p.check_returncode()
                    raw_output = p.stdout

                    output_json = _parse_spacegrep_output(raw_output)
                    output_json["matches"] = _patch_id(
                        pattern, output_json.get("matches", [])
                    )

                    matches.extend(output_json["matches"])
                    errors.extend(output_json["errors"])
                    # aggregate the match times obtained for the different patterns of the rule
                    path_s = str(target)

                    targets_time[path_s] = tuple(  # type: ignore
                        [
                            i + j
                            for i, j in zip(
                                targets_time.get(path_s, (0.0, 0.0, 0.0)),
                                _extract_times(output_json),
                            )
                        ]
                    )

            except subprocess.CalledProcessError as e:
                raw_error = p.stderr
                spacegrep_error_text = raw_error.decode("utf-8", errors="replace")
                raise SemgrepError(
                    f"Error running spacegrep on file {target}: Process error: {e}\n\nspacegrep error: {spacegrep_error_text}"
                )
            except json.JSONDecodeError as e:
                raise SemgrepError(
                    f"Could not parse spacegrep output as JSON: JSON error: {e}"
                )
            except KeyError as e:
                raise SemgrepError(
                    f"Invalid JSON output was received from spacegrep: {e}"
                )

    target_list = []
    for path in targets:
        times = targets_time.get(str(path), (0.0, 0.0, 0.0))
        target_list.append(
            {
                "path": str(path),
                "parse_time": times[0],
                "match_time": times[1],
                "run_time": times[2],
            }
        )
    time = {"targets": target_list}
    return {
        "matches": matches,
        "errors": errors,
        "time": time,
    }