def from_json( # type: ignore cls, json_obj: Dict[str, Any], language: str, rule_id: str) -> "CoreException": if {"check_id", "path", "start", "end", "extra"}.difference( json_obj.keys()) != set(): raise ValueError(f"cannot parse {json_obj} as {cls.__name__}") start = json_obj["start"] end = json_obj["end"] if ("line" not in start or "col" not in start or "line" not in end or "col" not in end): raise ValueError(f"cannot parse {json_obj} as {cls.__name__}") start_pos = Position(start["line"], start["col"]) end_pos = Position(end["line"], end["col"]) # Semgrep-Core caches TimeoutErrors as FatalErrors # Hack to treat these as TimeoutErrors check_id = json_obj["check_id"] if check_id == "FatalError" and "Timeout" in json_obj["extra"][ "message"]: check_id = "Timeout" return cls( check_id, Path(json_obj["path"]), start_pos, end_pos, json_obj["extra"], language, rule_id, )
def from_json( # type: ignore cls, json_obj: Dict[str, Any], language: str) -> "CoreException": if {"check_id", "path", "start", "end", "extra"}.difference( json_obj.keys()) != set(): raise ValueError(f"cannot parse {json_obj} as {cls.__name__}") start = json_obj["start"] end = json_obj["end"] if ("line" not in start or "col" not in start or "line" not in end or "col" not in end): raise ValueError(f"cannot parse {json_obj} as {cls.__name__}") start_pos = Position(start["line"], start["col"]) end_pos = Position(end["line"], end["col"]) return cls( json_obj["check_id"], Path(json_obj["path"]), start_pos, end_pos, json_obj["extra"], language, )
def test_span_tracking(): data = parse_yaml_preserve_spans(test_yaml, Path("filename")) def test_span(start: Position, end: Position) -> Span: return attr.evolve(data.span, start=start, end=end) # basic spans assert data.span == test_span( start=Position(line=2, col=1), end=Position(line=10, col=1), ) # values act like dictionaries assert data.value["a"].span == test_span( start=Position(line=3, col=3), end=Position(line=10, col=1), ) # values act like lists assert data.value["a"].value[1].span == test_span( start=Position(line=4, col=5), end=Position(line=4, col=6), ) assert data.value["a"].value[1].value == 2 # spans are also attached to keys kvs = list(data.value.items()) key, value = kvs[0] assert key.span == test_span( start=Position(line=2, col=1), end=Position(line=2, col=2), ) # unrolling is equivalent assert data.unroll() == parse_yaml(test_yaml)
def run_spacegrep(rule_id: str, patterns: List[Pattern], targets: List[Path], timeout: int) -> dict: matches: List[dict] = [] errors: List[dict] = [] for pattern in patterns: if not isinstance(pattern._pattern, str): raise NotImplementedError( f"Support for {type(pattern._pattern)} has not been implemented yet." ) pattern_str = pattern._pattern # TODO: Handle pattern Dict for target in targets: cmd = [ SPACEGREP_PATH, "--output-format", "semgrep", "-d", str(target), pattern_str, "--timeout", str(timeout), ] try: p = sub_run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # exit code 3 indicates a timeout. See 'spacegrep --help'. if p.returncode == 3: err = CoreException( check_id="Timeout", path=target, start=Position(0, 0), end=Position(0, 0), extra={ "message": "spacegrep timeout", "line": "", }, language="generic", rule_id=rule_id, ).to_dict() errors.append(err) else: p.check_returncode() raw_output = p.stdout output_json = _parse_spacegrep_output(raw_output) output_json["matches"] = _patch_id( pattern, output_json.get("matches", [])) matches.extend(output_json["matches"]) errors.extend(output_json["errors"]) except subprocess.CalledProcessError as e: raw_error = p.stderr spacegrep_error_text = raw_error.decode("utf-8", errors="replace") raise SemgrepError( f"Error running spacegrep on file {target}: Process error: {e}\n\nspacegrep error: {spacegrep_error_text}" ) except json.JSONDecodeError as e: raise SemgrepError( f"Could not parse spacegrep output as JSON: JSON error: {e}" ) except KeyError as e: raise SemgrepError( f"Invalid JSON output was received from spacegrep: {e}") return { "matches": matches, "errors": errors, }
def run_spacegrep( rule_id: str, patterns: List[Pattern], targets: List[Path], timeout: int, ) -> dict: matches: List[dict] = [] errors: List[dict] = [] targets_time: Dict[str, Tuple[float, float, float]] = {} for pattern in patterns: if not isinstance(pattern._pattern, str): raise NotImplementedError( f"Support for {type(pattern._pattern)} has not been implemented yet." ) pattern_str = pattern._pattern # TODO: Handle pattern Dict for target in targets: cmd = [ SPACEGREP_PATH, "--output-format", "semgrep", "-d", str(target), pattern_str, "--timeout", str(timeout), "--time", ] try: p = sub_run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # exit code 3 indicates a timeout. See 'spacegrep --help'. if p.returncode == 3: err = CoreException( check_id="Timeout", path=target, start=Position(0, 0), end=Position(0, 0), extra={ "message": "spacegrep timeout", "line": "", }, language="generic", rule_id=rule_id, ).to_dict() errors.append(err) else: p.check_returncode() raw_output = p.stdout output_json = _parse_spacegrep_output(raw_output) output_json["matches"] = _patch_id( pattern, output_json.get("matches", []) ) matches.extend(output_json["matches"]) errors.extend(output_json["errors"]) # aggregate the match times obtained for the different patterns of the rule path_s = str(target) targets_time[path_s] = tuple( # type: ignore [ i + j for i, j in zip( targets_time.get(path_s, (0.0, 0.0, 0.0)), _extract_times(output_json), ) ] ) except subprocess.CalledProcessError as e: raw_error = p.stderr spacegrep_error_text = raw_error.decode("utf-8", errors="replace") raise SemgrepError( f"Error running spacegrep on file {target}: Process error: {e}\n\nspacegrep error: {spacegrep_error_text}" ) except json.JSONDecodeError as e: raise SemgrepError( f"Could not parse spacegrep output as JSON: JSON error: {e}" ) except KeyError as e: raise SemgrepError( f"Invalid JSON output was received from spacegrep: {e}" ) target_list = [] for path in targets: times = targets_time.get(str(path), (0.0, 0.0, 0.0)) target_list.append( { "path": str(path), "parse_time": times[0], "match_time": times[1], "run_time": times[2], } ) time = {"targets": target_list} return { "matches": matches, "errors": errors, "time": time, }