def _where_python_statement_matches(where_expression: str, metavars: Dict[str, Any]) -> bool: # TODO: filter out obvious dangerous things here result = False local_vars = {k: v["abstract_content"] for k, v in metavars.items()} RETURN_VAR = "semgrep_pattern_return" try: cleaned_where_expression = where_expression.strip() lines = cleaned_where_expression.split("\n") new_last_line = f"{RETURN_VAR} = {lines[-1]}" lines[-1] = new_last_line to_eval = "\n".join(lines) scope = {"vars": local_vars} # fmt: off exec( to_eval, scope ) # nosem: contrib.dlint.dlint-equivalent.insecure-exec-use, python.lang.security.audit.exec-detected.exec-detected # fmt: on result = scope[RETURN_VAR] # type: ignore except Exception as ex: print_stderr( f"error evaluating a where-python expression: `{where_expression}`: {ex}" ) if not isinstance(result, bool): raise SemgrepError( f"python where expression needs boolean output but got: {result} for {where_expression}" ) return result
def generate_config() -> None: import requests # here for faster startup times # defensive coding if Path(DEFAULT_CONFIG_FILE).exists(): raise SemgrepError( f"{DEFAULT_CONFIG_FILE} already exists. Please remove and try again" ) try: r = requests.get(TEMPLATE_YAML_URL, timeout=10) r.raise_for_status() template_str = r.text except Exception as e: debug_print(str(e)) print_stderr( f"There was a problem downloading the latest template config. Using fallback template" ) template_str = """rules: - id: eqeq-is-bad pattern: $X == $X message: "$X == $X is a useless equality check" languages: [python] severity: ERROR""" try: with open(DEFAULT_CONFIG_FILE, "w") as template: template.write(template_str) print_stderr( f"Template config successfully written to {DEFAULT_CONFIG_FILE}" ) except Exception as e: raise SemgrepError(str(e))
def _where_python_statement_matches(where_expression: str, metavars: Dict[str, Any]) -> bool: # TODO: filter out obvious dangerous things here output_var = None # HACK: we're executing arbitrary Python in the where-python, # be careful my friend vars = {k: v["abstract_content"] for k, v in metavars.items()} RETURN_VAR = "semgrep_pattern_return" try: cleaned_where_expression = where_expression.strip() lines = cleaned_where_expression.split("\n") new_last_line = f"{RETURN_VAR} = {lines[-1]}" lines[-1] = new_last_line to_eval = "\n".join(lines) scope = {"vars": vars} # fmt: off exec( to_eval, scope ) # nosem: contrib.dlint.dlint-equivalent.insecure-exec-use, python.lang.security.audit.exec-detected.exec-detected # fmt: on output_var = scope[RETURN_VAR] except Exception as ex: print_stderr( f"error evaluating a where-python expression: `{where_expression}`: {ex}" ) if type(output_var) != type(True): raise SemgrepError( f"python where expression needs boolean output but got: {output_var} for {where_expression}" ) return output_var == True
def apply_fixes( rule_matches_by_rule: Dict[Rule, List[RuleMatch]], dryrun: bool = False ) -> None: """ Modify files in place for all files with findings from rules with an autofix configuration """ modified_files: Set[Path] = set() for _, rule_matches in rule_matches_by_rule.items(): for rule_match in rule_matches: fix = rule_match.fix fix_regex = rule_match.fix_regex filepath = rule_match.path if fix: try: fixobj = _basic_fix(rule_match, fix) except Exception as e: raise SemgrepError(f"unable to modify file {filepath}: {e}") elif fix_regex: regex = fix_regex.get("regex") replacement = fix_regex.get("replacement") count = fix_regex.get("count", 0) if not regex or not replacement: raise SemgrepError( "'regex' and 'replacement' values required when using 'fix-regex'" ) try: count = int(count) except ValueError: raise SemgrepError( "optional 'count' value must be an integer when using 'fix-regex'" ) try: fixobj = _regex_replace(rule_match, regex, replacement, count) except Exception as e: raise SemgrepError( f"unable to use regex to modify file {filepath} with fix '{fix}': {e}" ) else: continue # endif if not dryrun: _write_contents(rule_match.path, fixobj.fixed_contents) modified_files.add(filepath) else: rule_match.extra[ "fixed_lines" ] = fixobj.fixed_lines # Monkey patch in fixed lines num_modified = len(modified_files) if len(modified_files): print_stderr( f"successfully modified {num_modified} file{'s' if num_modified > 1 else ''}." ) else: print_stderr(f"no files modified.")
def handle_semgrep_error(self, error: SemgrepError) -> None: """ Reports generic exceptions that extend SemgrepError """ self.has_output = True if error not in self.error_set: self.semgrep_structured_errors.append(error) self.error_set.add(error) print_stderr(str(error))
def post_output(cls, output_url: str, output: str) -> None: import requests # here for faster startup times print_stderr(f"posting to {output_url}...") try: r = requests.post(output_url, data=output, timeout=10) debug_print( f"posted to {output_url} and got status_code:{r.status_code}") except requests.exceptions.Timeout: raise SemgrepError(f"posting output to {output_url} timed out")
def download_config(config_url: str) -> Dict[str, YamlTree]: import requests # here for faster startup times DOWNLOADING_MESSAGE = f"downloading config..." debug_print(f"trying to download from {config_url}") print_stderr( f"using config from {nice_semgrep_url(config_url)}. Visit https://semgrep.live/registry to see all public rules." ) print_stderr(DOWNLOADING_MESSAGE, end="\r") headers = {"User-Agent": SEMGREP_USER_AGENT} try: r = requests.get(config_url, stream=True, headers=headers, timeout=10) if r.status_code == requests.codes.ok: content_type = r.headers.get("Content-Type") yaml_types = [ "text/plain", "application/x-yaml", "text/x-yaml", "text/yaml", "text/vnd.yaml", ] if content_type and any((ct in content_type for ct in yaml_types)): return parse_config_string( "remote-url", r.content.decode("utf-8"), filename=f"{config_url[:20]}...", ) else: raise SemgrepError( f"unknown content-type: {content_type} returned by config url: {config_url}. Can not parse" ) else: raise SemgrepError( f"bad status code: {r.status_code} returned by config url: {config_url}" ) except Exception as e: raise SemgrepError( f"Failed to download config from {config_url}: {str(e)}") return None
def cli() -> None: parser = argparse.ArgumentParser( description=f"semgrep CLI. For more information about semgrep, go to {SEMGREP_URL}", prog="semgrep", ) # input parser.add_argument( "target", nargs="*", default=[os.curdir], help=( "Search these files or directories. Defaults to entire current " "working directory. Implied argument if piping to semgrep." ), ) # config options config = parser.add_argument_group("config") config_ex = config.add_mutually_exclusive_group() config_ex.add_argument( "-g", "--generate-config", action="store_true", help=f"Generate starter configuration file, {DEFAULT_CONFIG_FILE}", ) config_ex.add_argument( "-f", "--config", help=( "YAML configuration file, directory of YAML files ending in " ".yml|.yaml, URL of a configuration file, or semgrep registry entry " "name. See README for information on configuration file format." ), ) config_ex.add_argument( "-e", "--pattern", help="Code search pattern. See README for information on pattern features.", ) config.add_argument( "-l", "--lang", help=( "Parse pattern and all files in specified language. Must be used " "with -e/--pattern." ), ) config.add_argument( "--validate", action="store_true", help="Validate configuration file(s). No search is performed.", ) config.add_argument( "--strict", action="store_true", help="Only invoke semgrep if configuration files(s) are valid.", ) parser.add_argument( "--exclude", action="append", default=[], help="Skip any file or directory that matches this pattern; --exclude='*.py' will ignore" " the following: foo.py, src/foo.py, foo.py/bar.sh. --exclude='tests' will ignore tests/foo.py" " as well as a/b/tests/c/foo.py. Can add multiple times. Overrides includes.", ) parser.add_argument( "--include", action="append", default=[], help="Scan only files or directories that match this pattern; --include='*.jsx' will scan" " the following: foo.jsx, src/foo.jsx, foo.jsx/bar.sh. --include='src' will scan src/foo.py" " as well as a/b/src/c/foo.py. Can add multiple times.", ) parser.add_argument( "--no-git-ignore", action="store_true", help="Scan all files even those ignored by a projects gitignore(s)", ) config.add_argument( RCE_RULE_FLAG, action="store_true", help=( "WARNING: allow rules to run arbitrary code. ONLY ENABLE IF YOU " "TRUST THE SOURCE OF ALL RULES IN YOUR CONFIGURATION." ), ) config.add_argument( "--precommit", action="store_true", help=argparse.SUPPRESS, ) config.add_argument( "-j", "--jobs", action="store", type=int, default=CPU_COUNT, help=( "Number of subprocesses to use to run checks in parallel. Defaults " "to the number of CPUs on the system." ), ) # output options output = parser.add_argument_group("output") output.add_argument( "-q", "--quiet", action="store_true", help=( "Do not print anything to stdout. Search results can still be " "saved to an output file specified by -o/--output. Exit code " "provides success status." ), ) output.add_argument( "--no-rewrite-rule-ids", action="store_true", help=( "Do not rewrite rule ids when they appear in nested sub-directories " "(by default, rule 'foo' in test/rules.yaml will be renamed " "'test.foo')." ), ) output.add_argument( "-o", "--output", help=( "Save search results to a file or post to URL. " "Default is to print to stdout." ), ) output.add_argument( "--json", action="store_true", help="Output results in JSON format." ) output.add_argument( "--debugging-json", action="store_true", help="Output JSON with extra debugging information.", ) output.add_argument( "--sarif", action="store_true", help="Output results in SARIF format." ) output.add_argument("--test", action="store_true", help="Run test suite.") parser.add_argument( "--test-ignore-todo", action="store_true", help="Ignore rules marked as '#todoruleid:' in test files.", ) output.add_argument( "--dump-ast", action="store_true", help=( "Show AST of the input file or passed expression and then exit " "(can use --json)." ), ) output.add_argument( "--synthesize-patterns", help=argparse.SUPPRESS, ) output.add_argument( "--error", action="store_true", help="Exit 1 if there are findings. Useful for CI and scripts.", ) output.add_argument( "-a", "--autofix", action="store_true", help=( "Apply the autofix patches. WARNING: data loss can occur with this " "flag. Make sure your files are stored in a version control system." ), ) output.add_argument( "--dryrun", action="store_true", help=( "Do autofixes, but don't write them to a file. " "This will print the changes to the console. " "This lets you see the changes before you commit to them. " "Only works with the --autofix flag. Otherwise does nothing." ), ) output.add_argument( "--disable-nosem", action="store_true", help=( "Disable the effect of 'nosem'. This will report findings on lines " "containing a 'nosem' comment at the end." ), ) # logging options logging = parser.add_argument_group("logging") logging.add_argument( "-v", "--verbose", action="store_true", help=( "Set the logging level to verbose. E.g. statements about which " "files are being processed will be printed." ), ) parser.add_argument( "--version", action="store_true", help="Show the version and exit." ) parser.add_argument( "--force-color", action="store_true", help="Always include ANSI color in the output, even if not writing to a TTY", ) parser.add_argument( "--disable-version-check", action="store_true", help="Disable checking for latest version.", ) ### Parse and validate args = parser.parse_args() if args.version: print(__VERSION__) return if args.pattern and not args.lang: parser.error("-e/--pattern and -l/--lang must both be specified") if args.dump_ast and not args.lang: parser.error("--dump-ast and -l/--lang must both be specified") # set the flags semgrep.util.set_flags(args.verbose, args.quiet, args.force_color) # change cwd if using docker try: semgrep.config_resolver.adjust_for_docker(args.precommit) except SemgrepError as e: print_stderr(str(e)) raise e output_format = OutputFormat.TEXT if args.json: output_format = OutputFormat.JSON elif args.debugging_json: output_format = OutputFormat.JSON_DEBUG elif args.sarif: output_format = OutputFormat.SARIF output_settings = OutputSettings( output_format=output_format, output_destination=args.output, quiet=args.quiet, error_on_findings=args.error, strict=args.strict, ) if not args.disable_version_check: if not is_running_latest(): print_stderr( "A new version of Semgrep is available. Please see https://github.com/returntocorp/semgrep#upgrading for more information." ) if args.test: # the test code (which isn't a "test" per se but is actually machinery to evaluate semgrep performance) # uses managed_output internally semgrep.test.test_main(args) with managed_output(output_settings) as output_handler: if args.dump_ast: dump_parsed_ast(args.json, args.lang, args.pattern, args.target) elif args.synthesize_patterns: synthesize_patterns(args.lang, args.synthesize_patterns, args.target) elif args.validate: configs, config_errors = semgrep.semgrep_main.get_config( args.pattern, args.lang, args.config ) valid_str = "invalid" if config_errors else "valid" print_stderr( f"Configuration is {valid_str} - found {len(configs)} valid configuration(s) and {len(config_errors)} configuration error(s)." ) if config_errors: for error in config_errors: output_handler.handle_semgrep_error(error) raise SemgrepError("Please fix the above errors and try again.") elif args.generate_config: semgrep.config_resolver.generate_config() else: semgrep.semgrep_main.main( output_handler=output_handler, target=args.target, pattern=args.pattern, lang=args.lang, config=args.config, no_rewrite_rule_ids=args.no_rewrite_rule_ids, jobs=args.jobs, include=args.include, exclude=args.exclude, strict=args.strict, autofix=args.autofix, dryrun=args.dryrun, disable_nosem=args.disable_nosem, dangerously_allow_arbitrary_code_execution_from_rules=args.dangerously_allow_arbitrary_code_execution_from_rules, no_git_ignore=args.no_git_ignore, )
def notify_user_of_work( all_rules: List[Rule], include: List[str], exclude: List[str], verbose: bool = False, ) -> None: """ Notify user of what semgrep is about to do, including: - number of rules - which rules? <- not yet, too cluttered - which dirs are excluded, etc. """ if include: print_stderr(f"including files:") for inc in include: print_stderr(f"- {inc}") if exclude: print_stderr(f"excluding files:") for exc in exclude: print_stderr(f"- {exc}") print_stderr(f"running {len(all_rules)} rules...") if verbose: print_stderr("rules:") for rule in all_rules: print_stderr(f"- {rule.id}")