Beispiel #1
0
def _where_python_statement_matches(where_expression: str,
                                    metavars: Dict[str, Any]) -> bool:
    # TODO: filter out obvious dangerous things here
    result = False

    local_vars = {k: v["abstract_content"] for k, v in metavars.items()}
    RETURN_VAR = "semgrep_pattern_return"
    try:
        cleaned_where_expression = where_expression.strip()
        lines = cleaned_where_expression.split("\n")
        new_last_line = f"{RETURN_VAR} = {lines[-1]}"
        lines[-1] = new_last_line
        to_eval = "\n".join(lines)
        scope = {"vars": local_vars}
        # fmt: off
        exec(
            to_eval, scope
        )  # nosem: contrib.dlint.dlint-equivalent.insecure-exec-use, python.lang.security.audit.exec-detected.exec-detected
        # fmt: on
        result = scope[RETURN_VAR]  # type: ignore
    except Exception as ex:
        print_stderr(
            f"error evaluating a where-python expression: `{where_expression}`: {ex}"
        )

    if not isinstance(result, bool):
        raise SemgrepError(
            f"python where expression needs boolean output but got: {result} for {where_expression}"
        )
    return result
Beispiel #2
0
def generate_config() -> None:
    import requests  # here for faster startup times

    # defensive coding
    if Path(DEFAULT_CONFIG_FILE).exists():
        raise SemgrepError(
            f"{DEFAULT_CONFIG_FILE} already exists. Please remove and try again"
        )
    try:
        r = requests.get(TEMPLATE_YAML_URL, timeout=10)
        r.raise_for_status()
        template_str = r.text
    except Exception as e:
        debug_print(str(e))
        print_stderr(
            f"There was a problem downloading the latest template config. Using fallback template"
        )
        template_str = """rules:
  - id: eqeq-is-bad
    pattern: $X == $X
    message: "$X == $X is a useless equality check"
    languages: [python]
    severity: ERROR"""
    try:
        with open(DEFAULT_CONFIG_FILE, "w") as template:
            template.write(template_str)
            print_stderr(
                f"Template config successfully written to {DEFAULT_CONFIG_FILE}"
            )
    except Exception as e:
        raise SemgrepError(str(e))
Beispiel #3
0
def _where_python_statement_matches(where_expression: str,
                                    metavars: Dict[str, Any]) -> bool:
    # TODO: filter out obvious dangerous things here
    output_var = None

    # HACK: we're executing arbitrary Python in the where-python,
    # be careful my friend
    vars = {k: v["abstract_content"] for k, v in metavars.items()}
    RETURN_VAR = "semgrep_pattern_return"
    try:
        cleaned_where_expression = where_expression.strip()
        lines = cleaned_where_expression.split("\n")
        new_last_line = f"{RETURN_VAR} = {lines[-1]}"
        lines[-1] = new_last_line
        to_eval = "\n".join(lines)
        scope = {"vars": vars}
        # fmt: off
        exec(
            to_eval, scope
        )  # nosem: contrib.dlint.dlint-equivalent.insecure-exec-use, python.lang.security.audit.exec-detected.exec-detected
        # fmt: on
        output_var = scope[RETURN_VAR]
    except Exception as ex:
        print_stderr(
            f"error evaluating a where-python expression: `{where_expression}`: {ex}"
        )

    if type(output_var) != type(True):
        raise SemgrepError(
            f"python where expression needs boolean output but got: {output_var} for {where_expression}"
        )
    return output_var == True
Beispiel #4
0
def apply_fixes(
    rule_matches_by_rule: Dict[Rule, List[RuleMatch]], dryrun: bool = False
) -> None:
    """
        Modify files in place for all files with findings from rules with an
        autofix configuration
    """
    modified_files: Set[Path] = set()

    for _, rule_matches in rule_matches_by_rule.items():
        for rule_match in rule_matches:
            fix = rule_match.fix
            fix_regex = rule_match.fix_regex
            filepath = rule_match.path
            if fix:
                try:
                    fixobj = _basic_fix(rule_match, fix)
                except Exception as e:
                    raise SemgrepError(f"unable to modify file {filepath}: {e}")
            elif fix_regex:
                regex = fix_regex.get("regex")
                replacement = fix_regex.get("replacement")
                count = fix_regex.get("count", 0)
                if not regex or not replacement:
                    raise SemgrepError(
                        "'regex' and 'replacement' values required when using 'fix-regex'"
                    )
                try:
                    count = int(count)
                except ValueError:
                    raise SemgrepError(
                        "optional 'count' value must be an integer when using 'fix-regex'"
                    )
                try:
                    fixobj = _regex_replace(rule_match, regex, replacement, count)
                except Exception as e:
                    raise SemgrepError(
                        f"unable to use regex to modify file {filepath} with fix '{fix}': {e}"
                    )
            else:
                continue
            # endif
            if not dryrun:
                _write_contents(rule_match.path, fixobj.fixed_contents)
                modified_files.add(filepath)
            else:
                rule_match.extra[
                    "fixed_lines"
                ] = fixobj.fixed_lines  # Monkey patch in fixed lines

    num_modified = len(modified_files)
    if len(modified_files):
        print_stderr(
            f"successfully modified {num_modified} file{'s' if num_modified > 1 else ''}."
        )
    else:
        print_stderr(f"no files modified.")
Beispiel #5
0
 def handle_semgrep_error(self, error: SemgrepError) -> None:
     """
     Reports generic exceptions that extend SemgrepError
     """
     self.has_output = True
     if error not in self.error_set:
         self.semgrep_structured_errors.append(error)
         self.error_set.add(error)
         print_stderr(str(error))
Beispiel #6
0
    def post_output(cls, output_url: str, output: str) -> None:
        import requests  # here for faster startup times

        print_stderr(f"posting to {output_url}...")
        try:
            r = requests.post(output_url, data=output, timeout=10)
            debug_print(
                f"posted to {output_url} and got status_code:{r.status_code}")
        except requests.exceptions.Timeout:
            raise SemgrepError(f"posting output to {output_url} timed out")
Beispiel #7
0
def download_config(config_url: str) -> Dict[str, YamlTree]:
    import requests  # here for faster startup times

    DOWNLOADING_MESSAGE = f"downloading config..."
    debug_print(f"trying to download from {config_url}")
    print_stderr(
        f"using config from {nice_semgrep_url(config_url)}. Visit https://semgrep.live/registry to see all public rules."
    )
    print_stderr(DOWNLOADING_MESSAGE, end="\r")
    headers = {"User-Agent": SEMGREP_USER_AGENT}

    try:
        r = requests.get(config_url, stream=True, headers=headers, timeout=10)
        if r.status_code == requests.codes.ok:
            content_type = r.headers.get("Content-Type")
            yaml_types = [
                "text/plain",
                "application/x-yaml",
                "text/x-yaml",
                "text/yaml",
                "text/vnd.yaml",
            ]
            if content_type and any((ct in content_type for ct in yaml_types)):
                return parse_config_string(
                    "remote-url",
                    r.content.decode("utf-8"),
                    filename=f"{config_url[:20]}...",
                )
            else:
                raise SemgrepError(
                    f"unknown content-type: {content_type} returned by config url: {config_url}. Can not parse"
                )
        else:
            raise SemgrepError(
                f"bad status code: {r.status_code} returned by config url: {config_url}"
            )
    except Exception as e:
        raise SemgrepError(
            f"Failed to download config from {config_url}: {str(e)}")

    return None
Beispiel #8
0
def cli() -> None:
    parser = argparse.ArgumentParser(
        description=f"semgrep CLI. For more information about semgrep, go to {SEMGREP_URL}",
        prog="semgrep",
    )

    # input
    parser.add_argument(
        "target",
        nargs="*",
        default=[os.curdir],
        help=(
            "Search these files or directories. Defaults to entire current "
            "working directory. Implied argument if piping to semgrep."
        ),
    )

    # config options
    config = parser.add_argument_group("config")
    config_ex = config.add_mutually_exclusive_group()
    config_ex.add_argument(
        "-g",
        "--generate-config",
        action="store_true",
        help=f"Generate starter configuration file, {DEFAULT_CONFIG_FILE}",
    )

    config_ex.add_argument(
        "-f",
        "--config",
        help=(
            "YAML configuration file, directory of YAML files ending in "
            ".yml|.yaml, URL of a configuration file, or semgrep registry entry "
            "name. See README for information on configuration file format."
        ),
    )

    config_ex.add_argument(
        "-e",
        "--pattern",
        help="Code search pattern. See README for information on pattern features.",
    )
    config.add_argument(
        "-l",
        "--lang",
        help=(
            "Parse pattern and all files in specified language. Must be used "
            "with -e/--pattern."
        ),
    )
    config.add_argument(
        "--validate",
        action="store_true",
        help="Validate configuration file(s). No search is performed.",
    )
    config.add_argument(
        "--strict",
        action="store_true",
        help="Only invoke semgrep if configuration files(s) are valid.",
    )

    parser.add_argument(
        "--exclude",
        action="append",
        default=[],
        help="Skip any file or directory that matches this pattern; --exclude='*.py' will ignore"
        " the following: foo.py, src/foo.py, foo.py/bar.sh. --exclude='tests' will ignore tests/foo.py"
        " as well as a/b/tests/c/foo.py. Can add multiple times. Overrides includes.",
    )
    parser.add_argument(
        "--include",
        action="append",
        default=[],
        help="Scan only files or directories that match this pattern; --include='*.jsx' will scan"
        " the following: foo.jsx, src/foo.jsx, foo.jsx/bar.sh. --include='src' will scan src/foo.py"
        " as well as a/b/src/c/foo.py. Can add multiple times.",
    )
    parser.add_argument(
        "--no-git-ignore",
        action="store_true",
        help="Scan all files even those ignored by a projects gitignore(s)",
    )

    config.add_argument(
        RCE_RULE_FLAG,
        action="store_true",
        help=(
            "WARNING: allow rules to run arbitrary code. ONLY ENABLE IF YOU "
            "TRUST THE SOURCE OF ALL RULES IN YOUR CONFIGURATION."
        ),
    )

    config.add_argument(
        "--precommit", action="store_true", help=argparse.SUPPRESS,
    )
    config.add_argument(
        "-j",
        "--jobs",
        action="store",
        type=int,
        default=CPU_COUNT,
        help=(
            "Number of subprocesses to use to run checks in parallel. Defaults "
            "to the number of CPUs on the system."
        ),
    )

    # output options
    output = parser.add_argument_group("output")

    output.add_argument(
        "-q",
        "--quiet",
        action="store_true",
        help=(
            "Do not print anything to stdout. Search results can still be "
            "saved to an output file specified by -o/--output. Exit code "
            "provides success status."
        ),
    )

    output.add_argument(
        "--no-rewrite-rule-ids",
        action="store_true",
        help=(
            "Do not rewrite rule ids when they appear in nested sub-directories "
            "(by default, rule 'foo' in test/rules.yaml will be renamed "
            "'test.foo')."
        ),
    )

    output.add_argument(
        "-o",
        "--output",
        help=(
            "Save search results to a file or post to URL. "
            "Default is to print to stdout."
        ),
    )
    output.add_argument(
        "--json", action="store_true", help="Output results in JSON format."
    )
    output.add_argument(
        "--debugging-json",
        action="store_true",
        help="Output JSON with extra debugging information.",
    )
    output.add_argument(
        "--sarif", action="store_true", help="Output results in SARIF format."
    )
    output.add_argument("--test", action="store_true", help="Run test suite.")
    parser.add_argument(
        "--test-ignore-todo",
        action="store_true",
        help="Ignore rules marked as '#todoruleid:' in test files.",
    )
    output.add_argument(
        "--dump-ast",
        action="store_true",
        help=(
            "Show AST of the input file or passed expression and then exit "
            "(can use --json)."
        ),
    )
    output.add_argument(
        "--synthesize-patterns", help=argparse.SUPPRESS,
    )
    output.add_argument(
        "--error",
        action="store_true",
        help="Exit 1 if there are findings. Useful for CI and scripts.",
    )

    output.add_argument(
        "-a",
        "--autofix",
        action="store_true",
        help=(
            "Apply the autofix patches. WARNING: data loss can occur with this "
            "flag. Make sure your files are stored in a version control system."
        ),
    )
    output.add_argument(
        "--dryrun",
        action="store_true",
        help=(
            "Do autofixes, but don't write them to a file. "
            "This will print the changes to the console. "
            "This lets you see the changes before you commit to them. "
            "Only works with the --autofix flag. Otherwise does nothing."
        ),
    )
    output.add_argument(
        "--disable-nosem",
        action="store_true",
        help=(
            "Disable the effect of 'nosem'. This will report findings on lines "
            "containing a 'nosem' comment at the end."
        ),
    )

    # logging options
    logging = parser.add_argument_group("logging")

    logging.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        help=(
            "Set the logging level to verbose. E.g. statements about which "
            "files are being processed will be printed."
        ),
    )

    parser.add_argument(
        "--version", action="store_true", help="Show the version and exit."
    )

    parser.add_argument(
        "--force-color",
        action="store_true",
        help="Always include ANSI color in the output, even if not writing to a TTY",
    )
    parser.add_argument(
        "--disable-version-check",
        action="store_true",
        help="Disable checking for latest version.",
    )

    ### Parse and validate
    args = parser.parse_args()
    if args.version:
        print(__VERSION__)
        return

    if args.pattern and not args.lang:
        parser.error("-e/--pattern and -l/--lang must both be specified")

    if args.dump_ast and not args.lang:
        parser.error("--dump-ast and -l/--lang must both be specified")

    # set the flags
    semgrep.util.set_flags(args.verbose, args.quiet, args.force_color)

    # change cwd if using docker
    try:
        semgrep.config_resolver.adjust_for_docker(args.precommit)
    except SemgrepError as e:
        print_stderr(str(e))
        raise e

    output_format = OutputFormat.TEXT
    if args.json:
        output_format = OutputFormat.JSON
    elif args.debugging_json:
        output_format = OutputFormat.JSON_DEBUG
    elif args.sarif:
        output_format = OutputFormat.SARIF

    output_settings = OutputSettings(
        output_format=output_format,
        output_destination=args.output,
        quiet=args.quiet,
        error_on_findings=args.error,
        strict=args.strict,
    )

    if not args.disable_version_check:
        if not is_running_latest():
            print_stderr(
                "A new version of Semgrep is available. Please see https://github.com/returntocorp/semgrep#upgrading for more information."
            )

    if args.test:
        # the test code (which isn't a "test" per se but is actually machinery to evaluate semgrep performance)
        # uses managed_output internally
        semgrep.test.test_main(args)

    with managed_output(output_settings) as output_handler:
        if args.dump_ast:
            dump_parsed_ast(args.json, args.lang, args.pattern, args.target)
        elif args.synthesize_patterns:
            synthesize_patterns(args.lang, args.synthesize_patterns, args.target)
        elif args.validate:
            configs, config_errors = semgrep.semgrep_main.get_config(
                args.pattern, args.lang, args.config
            )
            valid_str = "invalid" if config_errors else "valid"
            print_stderr(
                f"Configuration is {valid_str} - found {len(configs)} valid configuration(s) and {len(config_errors)} configuration error(s)."
            )
            if config_errors:
                for error in config_errors:
                    output_handler.handle_semgrep_error(error)
                raise SemgrepError("Please fix the above errors and try again.")
        elif args.generate_config:
            semgrep.config_resolver.generate_config()
        else:
            semgrep.semgrep_main.main(
                output_handler=output_handler,
                target=args.target,
                pattern=args.pattern,
                lang=args.lang,
                config=args.config,
                no_rewrite_rule_ids=args.no_rewrite_rule_ids,
                jobs=args.jobs,
                include=args.include,
                exclude=args.exclude,
                strict=args.strict,
                autofix=args.autofix,
                dryrun=args.dryrun,
                disable_nosem=args.disable_nosem,
                dangerously_allow_arbitrary_code_execution_from_rules=args.dangerously_allow_arbitrary_code_execution_from_rules,
                no_git_ignore=args.no_git_ignore,
            )
Beispiel #9
0
def notify_user_of_work(
    all_rules: List[Rule],
    include: List[str],
    exclude: List[str],
    verbose: bool = False,
) -> None:
    """
    Notify user of what semgrep is about to do, including:
    - number of rules
    - which rules? <- not yet, too cluttered
    - which dirs are excluded, etc.
    """
    if include:
        print_stderr(f"including files:")
        for inc in include:
            print_stderr(f"- {inc}")
    if exclude:
        print_stderr(f"excluding files:")
        for exc in exclude:
            print_stderr(f"- {exc}")
    print_stderr(f"running {len(all_rules)} rules...")
    if verbose:
        print_stderr("rules:")
        for rule in all_rules:
            print_stderr(f"- {rule.id}")