def dump_parsed_ast( to_json: bool, language: str, pattern: Optional[str], targets_str: List[str] ) -> None: targets = semgrep.config_resolver.resolve_targets(targets_str) with tempfile.NamedTemporaryFile("w") as fout: args = [] if pattern: fout.write(pattern) fout.flush() args = ["-lang", language, "-dump_pattern", fout.name] else: if len(targets) != 1: raise SemgrepError("--dump-ast requires exactly one target file") target = targets[0] args = ["-lang", language, "-dump_ast", str(target)] if to_json: args = ["-json"] + args cmd = [SEMGREP_PATH] + args try: output = sub_check_output(cmd) except subprocess.CalledProcessError as ex: raise SemgrepError( f"error invoking semgrep with:\n\t{' '.join(cmd)}\n\t{ex}\n{PLEASE_FILE_ISSUE_TEXT}" ) print(output.decode(errors="replace"))
def _raise_semgrep_error_from_json( self, error_json: Dict[str, Any], patterns: List[Pattern], ) -> None: """ See format_output_exception in semgrep O'Caml for details on schema """ error_type = error_json["error"] if error_type == "invalid language": raise SemgrepError( f'{error_json["language"]} was accepted by semgrep but rejected by semgrep-core. {PLEASE_FILE_ISSUE_TEXT}' ) elif error_type == "invalid pattern": matching_pattern = next( (p for p in patterns if p._id == error_json["pattern_id"]), None ) if matching_pattern is None or matching_pattern.span is None: raise SemgrepError( f"Pattern id from semgrep-core was missing in pattern spans. {PLEASE_FILE_ISSUE_TEXT}" ) matching_span = matching_pattern.span raise InvalidPatternError( short_msg=error_type, long_msg=f"Pattern could not be parsed as a {error_json['language']} semgrep pattern", spans=[matching_span], help=None, ) # no special formatting ought to be required for the other types; the semgrep python should be performing # validation for them. So if any other type of error occurs, ask the user to file an issue else: raise SemgrepError( f'an internal error occured while invoking semgrep-core:\n\t{error_type}: {error_json.get("message", "no message")}\n{PLEASE_FILE_ISSUE_TEXT}' )
def get_config( pattern: str, lang: str, config: str) -> Tuple[Dict[str, List[Rule]], List[SemgrepError]]: # let's check for a pattern if pattern: # and a language if not lang: raise SemgrepError( "language must be specified when a pattern is passed") # TODO for now we generate a manual config. Might want to just call semgrep -e ... -l ... configs = semgrep.config_resolver.manual_config(pattern, lang) else: # else let's get a config. A config is a dict from config_id -> config. Config Id is not well defined at this point. try: configs = semgrep.config_resolver.resolve_config(config) except SemgrepError as e: return {}, [e] # if we can't find a config, use default r2c rules if not configs: raise SemgrepError( f"No config given and {DEFAULT_CONFIG_FILE} was not found. Try running with --help to debug or if you want to download a default config, try running with --config r2c" ) valid_configs, error = validate_configs(configs) return valid_configs, error
def load_config_from_local_path( location: Optional[str] = None, ) -> Dict[str, YamlTree]: """ Return config file(s) as dictionary object """ base_path = get_base_path() if location is None: default_file = base_path.joinpath(DEFAULT_CONFIG_FILE) default_folder = base_path.joinpath(DEFAULT_CONFIG_FOLDER) if default_file.exists(): return parse_config_at_path(default_file) elif default_folder.exists(): return parse_config_folder(default_folder, relative=True) else: return {} else: loc = base_path.joinpath(location) if loc.exists(): if loc.is_file(): return parse_config_at_path(loc) elif loc.is_dir(): return parse_config_folder(loc) else: raise SemgrepError( f"config location `{loc}` is not a file or folder!") else: addendum = "" if IN_DOCKER: addendum = " (since you are running in docker, you cannot specify arbitary paths on the host; they must be mounted into the container)" raise SemgrepError( f"unable to find a config; path `{loc}` does not exist{addendum}" )
def generate_config() -> None: import requests # here for faster startup times # defensive coding if Path(DEFAULT_CONFIG_FILE).exists(): raise SemgrepError( f"{DEFAULT_CONFIG_FILE} already exists. Please remove and try again" ) try: r = requests.get(TEMPLATE_YAML_URL, timeout=10) r.raise_for_status() template_str = r.text except Exception as e: debug_print(str(e)) print_stderr( f"There was a problem downloading the latest template config. Using fallback template" ) template_str = """rules: - id: eqeq-is-bad pattern: $X == $X message: "$X == $X is a useless equality check" languages: [python] severity: ERROR""" try: with open(DEFAULT_CONFIG_FILE, "w") as template: template.write(template_str) print_stderr( f"Template config successfully written to {DEFAULT_CONFIG_FILE}" ) except Exception as e: raise SemgrepError(str(e))
def _evaluate_expression( expression: BooleanRuleExpression, pattern_ids_to_pattern_matches: Dict[PatternId, List[PatternMatch]], ranges_left: Set[Range], steps_for_debugging: List[Dict[str, Any]], flags: Optional[Dict[str, Any]] = None, ) -> Set[Range]: if expression.operator in OPERATORS_WITH_CHILDREN: if expression.children is None: raise SemgrepError( f"operator '{expression.operator}' must have child operators") # recurse on the nested expressions if expression.operator == OPERATORS.AND_EITHER: # remove anything that does not equal one of these ranges evaluated_ranges = [ _evaluate_expression( expr, pattern_ids_to_pattern_matches, ranges_left.copy(), steps_for_debugging, flags=flags, ) for expr in expression.children ] ranges_left.intersection_update(flatten(evaluated_ranges)) elif expression.operator == OPERATORS.AND_ALL: # chain intersection eagerly; intersect for every AND'ed child for expr in expression.children: remainining_ranges = _evaluate_expression( expr, pattern_ids_to_pattern_matches, ranges_left.copy(), steps_for_debugging, flags=flags, ) ranges_left.intersection_update(remainining_ranges) else: raise UnknownOperatorError( f"unknown operator {expression.operator}") logger.debug(f"after filter `{expression.operator}`: {ranges_left}") steps_for_debugging.append({ "filter": f"{pattern_name_for_operator(expression.operator)}", "pattern_id": None, "ranges": list(ranges_left), }) else: if expression.children is not None: raise SemgrepError( f"operator '{expression.operator}' must not have child operators" ) ranges_left = _evaluate_single_expression( expression, pattern_ids_to_pattern_matches, ranges_left, steps_for_debugging, flags=flags, ) return ranges_left
def _evaluate_expression( expression: BooleanRuleExpression, pattern_ids_to_pattern_matches: Dict[PatternId, List[PatternMatch]], ranges_left: Set[Range], steps_for_debugging: List[DebuggingStep], allow_exec: bool, ) -> Set[Range]: if expression.operator in OPERATORS_WITH_CHILDREN: if expression.children is None: raise SemgrepError( f"operator '{expression.operator}' must have child operators" ) # recurse on the nested expressions if expression.operator == OPERATORS.AND_EITHER: # remove anything that does not equal one of these ranges evaluated_ranges = [ _evaluate_expression( expr, pattern_ids_to_pattern_matches, ranges_left.copy(), steps_for_debugging, allow_exec=allow_exec, ) for expr in expression.children ] ranges_left.intersection_update(flatten(evaluated_ranges)) elif expression.operator == OPERATORS.AND_ALL: # chain intersection eagerly; intersect for every AND'ed child for expr in expression.children: remainining_ranges = _evaluate_expression( expr, pattern_ids_to_pattern_matches, ranges_left.copy(), steps_for_debugging, allow_exec=allow_exec, ) ranges_left.intersection_update(remainining_ranges) else: raise UnknownOperatorError(f"unknown operator {expression.operator}") else: if expression.children is not None: raise SemgrepError( f"operator '{expression.operator}' must not have child operators" ) ranges_left = _evaluate_single_expression( expression, pattern_ids_to_pattern_matches, ranges_left, allow_exec=allow_exec, ) add_debugging_info( expression, ranges_left, pattern_ids_to_pattern_matches, steps_for_debugging, ) return ranges_left
def get_config( pattern: str, lang: str, config_strs: List[str] ) -> Tuple[semgrep.config_resolver.Config, List[SemgrepError]]: # let's check for a pattern if pattern: # and a language if not lang: raise SemgrepError( "language must be specified when a pattern is passed") # TODO for now we generate a manual config. Might want to just call semgrep -e ... -l ... config, errors = semgrep.config_resolver.Config.from_pattern_lang( pattern, lang) else: # else let's get a config. A config is a dict from config_id -> config. Config Id is not well defined at this point. config, errors = semgrep.config_resolver.Config.from_config_list( config_strs) # if we can't find a config, use default r2c rules if not config: raise SemgrepError( f"No config given and {DEFAULT_CONFIG_FILE} was not found. Try running with --help to debug or if you want to download a default config, try running with --config r2c" ) return config, errors
def apply_fixes(rule_matches_by_rule: RuleMatchMap, dryrun: bool = False) -> None: """ Modify files in place for all files with findings from rules with an autofix configuration """ modified_files: Set[Path] = set() for _, rule_matches in rule_matches_by_rule.items(): for rule_match in rule_matches: fix = rule_match.fix fix_regex = rule_match.fix_regex filepath = rule_match.path if fix: try: fixobj = _basic_fix(rule_match, fix) except Exception as e: raise SemgrepError( f"unable to modify file {filepath}: {e}") elif fix_regex: regex = fix_regex.get("regex") replacement = fix_regex.get("replacement") count = fix_regex.get("count", 0) if not regex or not replacement: raise SemgrepError( "'regex' and 'replacement' values required when using 'fix-regex'" ) try: count = int(count) except ValueError: raise SemgrepError( "optional 'count' value must be an integer when using 'fix-regex'" ) try: fixobj = _regex_replace(rule_match, regex, replacement, count) except Exception as e: raise SemgrepError( f"unable to use regex to modify file {filepath} with fix '{fix}': {e}" ) else: continue # endif if not dryrun: _write_contents(rule_match.path, fixobj.fixed_contents) modified_files.add(filepath) else: rule_match.extra[ "fixed_lines"] = fixobj.fixed_lines # Monkey patch in fixed lines num_modified = len(modified_files) if len(modified_files): logger.info( f"successfully modified {num_modified} file{'s' if num_modified > 1 else ''}." ) else: logger.info(f"no files modified.")
def adjust_for_docker() -> None: # change into this folder so that all paths are relative to it if IN_DOCKER and not IN_GH_ACTION: if OLD_SRC_DIRECTORY.exists(): raise SemgrepError( f"Detected Docker environment using old code volume, please use '{SRC_DIRECTORY}' instead of '{OLD_SRC_DIRECTORY}'" ) if not SRC_DIRECTORY.exists(): raise SemgrepError( f"Detected Docker environment without a code volume, please include '-v \"${{PWD}}:{SRC_DIRECTORY}\"'" ) if SRC_DIRECTORY.exists(): os.chdir(SRC_DIRECTORY)
def run_spacegrep(patterns: List[Pattern], targets: List[Path]) -> dict: matches: List[dict] = [] errors: List[dict] = [] for pattern in patterns: if not isinstance(pattern._pattern, str): raise NotImplementedError( f"Support for {type(pattern._pattern)} has not been implemented yet." ) pattern_str = pattern._pattern # TODO: Handle pattern Dict for target in targets: cmd = [ SPACEGREP_PATH, "--output-format", "semgrep", "-d", str(target), pattern_str, ] try: p = sub_run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.check_returncode() raw_output = p.stdout output_json = _parse_spacegrep_output(raw_output) output_json["matches"] = _patch_id( pattern, output_json.get("matches", [])) matches.extend(output_json["matches"]) errors.extend(output_json["errors"]) except subprocess.CalledProcessError as e: raw_error = p.stderr spacegrep_error_text = raw_error.decode("utf-8") raise SemgrepError( f"Error running spacegrep on file {target}: Process error: {e}\n\nspacegrep error: {spacegrep_error_text}" ) except json.JSONDecodeError as e: raise SemgrepError( f"Could not parse spacegrep output as JSON: JSON error: {e}" ) except KeyError as e: raise SemgrepError( f"Invalid JSON output was received from spacegrep: {e}") return { "matches": matches, "errors": errors, }
def parse_config_string(config_id: str, contents: str, filename: Optional[str]) -> Dict[str, YamlTree]: if not contents: raise SemgrepError( f"Empty configuration file {filename}", code=UNPARSEABLE_YAML_EXIT_CODE, ) try: data = parse_yaml_preserve_spans(contents, filename) return {config_id: data} except YAMLError as se: raise SemgrepError( f"Invalid YAML file {config_id}:\n{indent(str(se))}", code=UNPARSEABLE_YAML_EXIT_CODE, )
def _run_core_command( self, patterns_json: List[Any], patterns: List[Pattern], targets: List[Path], language: Language, rule: Rule, rules_file_flag: str, cache_dir: str, ) -> dict: with tempfile.NamedTemporaryFile( "w") as pattern_file, tempfile.NamedTemporaryFile( "w") as target_file, tempfile.NamedTemporaryFile( "w") as equiv_file: yaml = YAML() yaml.dump({"rules": patterns_json}, pattern_file) pattern_file.flush() target_file.write("\n".join(str(t) for t in targets)) target_file.flush() cmd = [SEMGREP_PATH] + [ "-lang", language, rules_file_flag, pattern_file.name, "-j", str(self._jobs), "-target_file", target_file.name, "-use_parsing_cache", cache_dir, "-timeout", str(self._timeout), "-max_memory", str(self._max_memory), ] equivalences = rule.equivalences if equivalences: self._write_equivalences_file(equiv_file, equivalences) cmd += ["-equivalences", equiv_file.name] core_run = sub_run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) logger.debug(core_run.stderr.decode("utf-8", "replace")) if core_run.returncode != 0: output_json = self._parse_core_output(core_run.stdout) if "error" in output_json: self._raise_semgrep_error_from_json(output_json, patterns) else: raise SemgrepError( f"unexpected json output while invoking semgrep-core:\n{PLEASE_FILE_ISSUE_TEXT}" ) output_json = self._parse_core_output(core_run.stdout) return output_json
def get_config( pattern: str, lang: str, config_strs: List[str] ) -> Tuple[Config, List[SemgrepError]]: if pattern: if not lang: raise SemgrepError("language must be specified when a pattern is passed") config, errors = Config.from_pattern_lang(pattern, lang) else: config, errors = Config.from_config_list(config_strs) if not config: raise SemgrepError( f"No config given and {DEFAULT_CONFIG_FILE} was not found. Try running with --help to debug or if you want to download a default config, try running with --config r2c" ) return config, errors
def _fail( self, reason: str, rule: Rule, core_run: subprocess.CompletedProcess, returncode: int, semgrep_output: str, semgrep_error_output: str, ) -> None: # Once we require python >= 3.8, switch to using shlex.join instead # for proper quoting of the command line. shell_command = " ".join(core_run.args) raise SemgrepError( f"semgrep-core failed: {reason}\n" f"rule ID: '{rule.id}'\n" f"semgrep-core exit code: {returncode}\n" f"semgrep-core command: {shell_command}\n" f"unexpected non-json output while invoking semgrep-core:\n" "--- semgrep-core stdout ---\n" f"{semgrep_output}" "--- end semgrep-core stdout ---\n" "--- semgrep-core stderr ---\n" f"{semgrep_error_output}" "--- end semgrep-core stderr ---\n" f"{PLEASE_FILE_ISSUE_TEXT}")
def _where_python_statement_matches(where_expression: str, metavars: Dict[str, Any]) -> bool: # TODO: filter out obvious dangerous things here result = False local_vars = {k: v["abstract_content"] for k, v in metavars.items()} RETURN_VAR = "semgrep_pattern_return" try: cleaned_where_expression = where_expression.strip() lines = cleaned_where_expression.split("\n") new_last_line = f"{RETURN_VAR} = {lines[-1]}" lines[-1] = new_last_line to_eval = "\n".join(lines) scope = {"vars": local_vars} # fmt: off exec( to_eval, scope ) # nosem: contrib.dlint.dlint-equivalent.insecure-exec-use, python.lang.security.audit.exec-detected.exec-detected # fmt: on result = scope[RETURN_VAR] # type: ignore except KeyError as ex: logger.error( f"could not find metavariable {ex} while evaluating where-python expression '{where_expression}', consider case where metavariable is missing" ) except Exception as ex: logger.error( f"received error '{repr(ex)}' while evaluating where-python expression '{where_expression}'" ) if not isinstance(result, bool): raise SemgrepError( f"where-python expression '{where_expression}' needs boolean output but got {result}" ) return result
def _where_python_statement_matches(where_expression: str, metavars: Dict[str, Any]) -> bool: # TODO: filter out obvious dangerous things here result = False local_vars = {k: v["abstract_content"] for k, v in metavars.items()} RETURN_VAR = "semgrep_pattern_return" try: cleaned_where_expression = where_expression.strip() lines = cleaned_where_expression.split("\n") new_last_line = f"{RETURN_VAR} = {lines[-1]}" lines[-1] = new_last_line to_eval = "\n".join(lines) scope = {"vars": local_vars} # fmt: off exec( to_eval, scope ) # nosem: contrib.dlint.dlint-equivalent.insecure-exec-use, python.lang.security.audit.exec-detected.exec-detected # fmt: on result = scope[RETURN_VAR] # type: ignore except Exception as ex: print_stderr( f"error evaluating a where-python expression: `{where_expression}`: {ex}" ) if not isinstance(result, bool): raise SemgrepError( f"python where expression needs boolean output but got: {result} for {where_expression}" ) return result
def into_semgrep_error(self) -> SemgrepError: if self._check_id == "Timeout": return MatchTimeoutError(self._path, self._rule_id) elif self._check_id == "OutOfMemory": return OutOfMemoryError(self._path, self._rule_id) elif self._check_id == "LexicalError": return LexicalError(self._path, self._rule_id) else: try: with open(self._path, errors="replace") as f: file_hash = SourceTracker.add_source(f.read()) except IOError as e: return SemgrepError(f"Could not open '{self._path}': {e}") error_span = Span( start=self._start, end=self._end, source_hash=file_hash, file=str(self._path), ) return SourceParseError( short_msg="parse error", long_msg=f"Could not parse {self._path.name} as {self._language}", spans=[error_span], help="If the code appears to be valid, this may be a semgrep bug.", )
def close(self) -> None: """ Close the output handler. This will write any output that hasn't been written so far. It returns the exit code of the program. """ if self.has_output: output = self.build_output(self.settings.output_destination is None and self.stdout.isatty()) if output: print(output, file=self.stdout) if self.stats_line: logger.info(self.stats_line) if self.settings.output_destination: self.save_output(self.settings.output_destination, output) final_error = None error_stats = None if self.final_error: final_error = self.final_error elif self.rule_matches and self.settings.error_on_findings: # This exception won't be visible to the user, we're just # using this to return a specific error code final_error = SemgrepError("", code=FINDINGS_EXIT_CODE) elif self.semgrep_structured_errors: # make a simplifying assumption that # errors = # files failed # it's a quite a bit of work to simplify further because errors may or may not have path, span, etc. error_stats = ( f"{len(self.semgrep_structured_errors)} files could not be analyzed" ) final_error = self.semgrep_structured_errors[-1] self.final_raise(final_error, error_stats)
def _sarif_notification_from_error(error: SemgrepError) -> Dict[str, Any]: error_dict = error.to_dict() descriptor = error_dict["type"] error_to_sarif_level = { Level.ERROR.name.lower(): "error", Level.WARN.name.lower(): "warning", } level = error_to_sarif_level[error_dict["level"]] message = error_dict.get("message") if message is None: message = error_dict.get("long_msg") if message is None: message = error_dict.get("short_msg", "") return { "descriptor": { "id": descriptor }, "message": { "text": message }, "level": level, }
def handle_regex_patterns( self, outputs: List[PatternMatch], patterns_regex: List[Any], targets: List[Path], ) -> None: patterns_json = [pattern.to_json() for pattern in patterns_regex] try: patterns_re = [(pattern["id"], re.compile(pattern["pattern"])) for pattern in patterns_json] except re.error as err: raise SemgrepError(f"invalid regular expression specified: {err}") if self._testing: # Testing functionality runs in a multiprocessing.Pool. We cannot run # a Pool inside a Pool, so we have to avoid multiprocessing when testing. # https://stackoverflow.com/questions/6974695/python-process-pool-non-daemonic matches = [ get_re_matches(patterns_re, target) for target in targets ] else: re_fn = functools.partial(get_re_matches, patterns_re) with multiprocessing.Pool(self._jobs) as pool: matches = pool.map(re_fn, targets) outputs.extend(single_match for file_matches in matches for single_match in file_matches)
def compare_where_python(where_expression: str, pattern_match: PatternMatch) -> bool: result = False return_var = "semgrep_pattern_return" lines = where_expression.strip().split("\n") to_eval = "\n".join(lines[:-1] + [f"{return_var} = {lines[-1]}"]) local_vars = { metavar: pattern_match.get_metavariable_value(metavar) for metavar in pattern_match.metavariables } scope = {"vars": local_vars} try: # fmt: off exec( to_eval, scope ) # nosem: contrib.dlint.dlint-equivalent.insecure-exec-use, python.lang.security.audit.exec-detected.exec-detected # fmt: on result = scope[return_var] # type: ignore except KeyError as ex: logger.error( f"could not find metavariable {ex} while evaluating where-python expression '{where_expression}', consider case where metavariable is missing" ) except Exception as ex: logger.error( f"received error '{repr(ex)}' while evaluating where-python expression '{where_expression}'" ) if not isinstance(result, bool): raise SemgrepError( f"where-python expression '{where_expression}' needs boolean output but got {result}" ) return result
def _where_python_statement_matches(where_expression: str, metavars: Dict[str, Any]) -> bool: # TODO: filter out obvious dangerous things here output_var = None # HACK: we're executing arbitrary Python in the where-python, # be careful my friend vars = {k: v["abstract_content"] for k, v in metavars.items()} RETURN_VAR = "semgrep_pattern_return" try: cleaned_where_expression = where_expression.strip() lines = cleaned_where_expression.split("\n") new_last_line = f"{RETURN_VAR} = {lines[-1]}" lines[-1] = new_last_line to_eval = "\n".join(lines) scope = {"vars": vars} # fmt: off exec( to_eval, scope ) # nosem: contrib.dlint.dlint-equivalent.insecure-exec-use, python.lang.security.audit.exec-detected.exec-detected # fmt: on output_var = scope[RETURN_VAR] except Exception as ex: print_stderr( f"error evaluating a where-python expression: `{where_expression}`: {ex}" ) if type(output_var) != type(True): raise SemgrepError( f"python where expression needs boolean output but got: {output_var} for {where_expression}" ) return output_var == True
def close(self) -> None: """ Close the output handler. This will write any output that hasn't been written so far. It returns the exit code of the program. """ if self.has_output: output = self.build_output(self.settings.output_destination is None and self.stdout.isatty()) if output: print(output, file=self.stdout) if self.settings.output_destination: self.save_output(self.settings.output_destination, output) final_error = None if self.final_error: final_error = self.final_error elif self.rule_matches and self.settings.error_on_findings: # This exception won't be visiable to the user, we're just # using this to return a specific error code final_error = SemgrepError("", code=FINDINGS_EXIT_CODE) elif self.semgrep_structured_errors: final_error = self.semgrep_structured_errors[-1] self.final_raise(final_error)
def _raise_semgrep_error_from_json( self, error_json: Dict[str, Any], patterns: List[Pattern], rule: Rule, ) -> None: """ See format_output_exception in semgrep O'Caml for details on schema """ error_type = error_json["error"] if error_type == "invalid language": raise SemgrepError( f'{error_json["language"]} was accepted by semgrep but rejected by semgrep-core. {PLEASE_FILE_ISSUE_TEXT}' ) elif error_type == "invalid regexp in rule": raise SemgrepError( f'Invalid regexp in rule: {error_json["message"]}') elif error_type == "invalid pattern": if self._optimizations == "all": raise InvalidPatternErrorNoSpan( rule_id=error_json.get("pattern_id", "<no rule_id>"), pattern=error_json.get("pattern", "<no pattern>"), language=error_json.get("language", "<no language>"), ) else: matching_pattern = next( (p for p in patterns if p._id == error_json["pattern_id"]), None) if matching_pattern is None or matching_pattern.span is None: raise SemgrepError( f"Pattern id from semgrep-core was missing in pattern spans. {PLEASE_FILE_ISSUE_TEXT}" ) matching_span = matching_pattern.span raise InvalidPatternError( short_msg=error_type, long_msg= f"Pattern could not be parsed as a {error_json['language']} semgrep pattern", spans=[matching_span], help=None, ) # no special formatting ought to be required for the other types; the semgrep python should be performing # validation for them. So if any other type of error occurs, ask the user to file an issue else: raise SemgrepError( f"an internal error occured while invoking semgrep-core while running rule '{rule.id}'. Consider skipping this rule and reporting this issue.\n\t{error_type}: {error_json.get('message', 'no message')}\n{PLEASE_FILE_ISSUE_TEXT}" )
def post_output(cls, output_url: str, output: str) -> None: import requests # here for faster startup times logger.info(f"posting to {output_url}...") try: r = requests.post(output_url, data=output, timeout=10) logger.debug(f"posted to {output_url} and got status_code:{r.status_code}") except requests.exceptions.Timeout: raise SemgrepError(f"posting output to {output_url} timed out")
def _parse_core_output(self, core_run_out: bytes) -> Dict[str, Any]: # see if semgrep output a JSON error that we can decode semgrep_output = core_run_out.decode("utf-8", "replace") try: return cast(Dict[str, Any], json.loads(semgrep_output)) except ValueError: raise SemgrepError( f"unexpected non-json output while invoking semgrep-core:\n{semgrep_output}\n\n{PLEASE_FILE_ISSUE_TEXT}" )
def synthesize(language: str, code_to_synthesize: str, targets_str: Sequence[str]) -> None: targets = semgrep.config_resolver.resolve_targets(targets_str) if len(targets) != 1: raise SemgrepError( "--synthesize-patterns requires exactly one target file") target = targets[0] args = ["-synthesize_patterns", code_to_synthesize, str(target)] cmd = [SemgrepCore.path()] + args try: output = sub_check_output(cmd) except subprocess.CalledProcessError as ex: raise SemgrepError( f"error invoking semgrep with:\n\t{' '.join(cmd)}\n\t{ex}\n{PLEASE_FILE_ISSUE_TEXT}" ) print(output.decode(errors="replace"))
def _raise_semgrep_error_from_json( self, error_json: Dict[str, Any], rule: Rule, ) -> None: """ See format_output_exception in semgrep O'Caml for details on schema """ error_type = error_json["error"] if error_type == "invalid language": raise SemgrepError( f'{error_json["language"]} was accepted by semgrep but rejected by semgrep-core. {PLEASE_FILE_ISSUE_TEXT}' ) elif error_type == "invalid regexp in rule": raise SemgrepError( f'Invalid regexp in rule: {error_json["message"]}') elif error_type == "invalid pattern": range = error_json["range"] s = error_json.get("pattern", "<no pattern>") matching_span = Span.from_string_token( s=s, line=range.get("line", 0), col=range.get("col", 0), path=range.get("path", []), filename="semgrep temp file", ) if error_json["message"] == "Parsing.Parse_error": long_msg = f"Pattern `{s.strip()}` could not be parsed as a {error_json['language']} semgrep pattern" else: long_msg = f"Error parsing {error_json['language']} pattern: {error_json['message']}" raise InvalidPatternError( short_msg=error_type, long_msg=long_msg, spans=[matching_span], help=None, ) # no special formatting ought to be required for the other types; the semgrep python should be performing # validation for them. So if any other type of error occurs, ask the user to file an issue else: raise SemgrepError( f"an internal error occured while invoking semgrep-core while running rule '{rule.id}'. Consider skipping this rule and reporting this issue.\n\t{error_type}: {error_json.get('message', 'no message')}\n{PLEASE_FILE_ISSUE_TEXT}" )
def load_config_from_local_path(location: str) -> Dict[str, YamlTree]: """ Return config file(s) as dictionary object """ base_path = get_base_path() loc = base_path.joinpath(location) if loc.exists(): if loc.is_file(): return parse_config_at_path(loc) elif loc.is_dir(): return parse_config_folder(loc) else: raise SemgrepError( f"config location `{loc}` is not a file or folder!") else: addendum = "" if IN_DOCKER: addendum = " (since you are running in docker, you cannot specify arbitrary paths on the host; they must be mounted into the container)" raise SemgrepError( f"unable to find a config; path `{loc}` does not exist{addendum}")