コード例 #1
0
ファイル: sgrep.py プロジェクト: nyu-ninjacs/code-scanner
 def __init__(self, ruleset):
     self.ruleset = ruleset
     util.set_flags(False, True, False)
     try:
         self.setting = OutputSettings(
             output_format=OutputFormat.JSON,
             output_destination=None,
             error_on_findings=False,
             verbose_errors=False,
             strict=False,
             timeout_threshold=3,
             json_stats=False,
             # json_time = False,
             output_per_finding_max_lines_limit=None,
         )
     except:
         self.setting = OutputSettings(
             output_format=OutputFormat.JSON,
             output_destination=None,
             error_on_findings=False,
             verbose_errors=False,
             strict=False,
             timeout_threshold=3,
             json_stats=False,
             json_time=False,
             output_per_finding_max_lines_limit=None,
         )
コード例 #2
0
ファイル: semgrep_main.py プロジェクト: rmallof/semgrep
def invoke_semgrep(config: Path, targets: List[Path], **kwargs: Any) -> Any:
    """
    Call semgrep with config on targets and return result as a json object

    Uses default arguments of MAIN unless overwritten with a kwarg
    """
    io_capture = StringIO()
    output_handler = OutputHandler(
        OutputSettings(
            output_format=OutputFormat.JSON,
            output_destination=None,
            error_on_findings=False,
            verbose_errors=False,
            strict=False,
            json_stats=False,
            output_per_finding_max_lines_limit=None,
        ),
        stdout=io_capture,
    )
    main(
        output_handler=output_handler,
        target=[str(t) for t in targets],
        pattern="",
        lang="",
        configs=[str(config)],
        **kwargs,
    )
    output_handler.close()
    return json.loads(io_capture.getvalue())
コード例 #3
0
ファイル: test_error.py プロジェクト: wisdomSir/semgrep
def test_raise_semgrep_error_from_json_unknown_error():
    test_rule_id = "test_rule_id"
    rule_yaml_text = io.StringIO(f"""
    rules:
    - id: {test_rule_id}
      pattern: $X == $X
      severity: INFO
      languages: [python]
      message: blah
    """)
    rule_dict = yaml.load(rule_yaml_text).get("rules")[0]
    rule: Rule = Rule.from_json(rule_dict)

    core_runner = CoreRunner(
        allow_exec=False,
        output_settings=OutputSettings(OutputFormat.TEXT),
        jobs=1,
        timeout=0,
        max_memory=0,
        timeout_threshold=0,
        report_time=False,
    )

    patterns: List[Pattern] = list(core_runner._flatten_rule_patterns([rule]))

    output_json: Dict[str, Any] = {
        "error": "unknown exception",
        "message": "End_of_file",
    }
    with pytest.raises(SemgrepError) as excinfo:
        core_runner._raise_semgrep_error_from_json(output_json, patterns, rule)
        assert test_rule_id in str(excinfo.value)
コード例 #4
0
def invoke_semgrep(
    config: Path,
    targets: List[Path],
    output_settings: Optional[OutputSettings] = None,
    **kwargs: Any,
) -> Union[Dict[str, Any], str]:
    """
    Return Semgrep results of 'config' on 'targets' as a dict|str

    Uses default arguments of 'semgrep_main.main' unless overwritten with 'kwargs'
    """
    if output_settings is None:
        output_settings = OutputSettings(output_format=OutputFormat.JSON)

    io_capture = StringIO()
    output_handler = OutputHandler(output_settings, stdout=io_capture)
    main(
        output_handler=output_handler,
        target=[str(t) for t in targets],
        pattern="",
        lang="",
        configs=[str(config)],
        **kwargs,
    )
    output_handler.close()

    result: Union[Dict[str, Any], str] = (json.loads(
        io_capture.getvalue()) if output_settings.output_format.is_json() else
                                          io_capture.getvalue())

    return result
コード例 #5
0
def test_ignore_git_dir(tmp_path, monkeypatch):
    """
    Ignores all files in .git directory when scanning generic
    """
    foo = tmp_path / ".git"
    foo.mkdir()
    (foo / "bar").touch()

    monkeypatch.chdir(tmp_path)
    language = Language("generic")
    output_settings = OutputSettings(
        output_format=OutputFormat.TEXT,
        output_destination=None,
        error_on_findings=False,
        verbose_errors=False,
        strict=False,
        json_stats=False,
        output_time=False,
        output_per_finding_max_lines_limit=None,
        output_per_line_max_chars_limit=None,
    )
    defaulthandler = OutputHandler(output_settings)
    assert [] == TargetManager([], [], 0, [foo], True, defaulthandler, False).get_files(
        language, [], []
    )
コード例 #6
0
def invoke_semgrep(paths, scan_rules, **kwargs):
    """Call Semgrep."""
    if platform.system() == 'Windows':
        return None
    from semgrep import semgrep_main, util
    from semgrep.constants import OutputFormat
    from semgrep.output import OutputHandler, OutputSettings
    try:
        cpu_count = multiprocessing.cpu_count()
    except NotImplementedError:
        cpu_count = 1  # CPU count is not implemented on Windows
    util.set_flags(False, True, False)  # Verbose, Quiet, Force_color

    io_capture = StringIO()
    output_handler = OutputHandler(
        OutputSettings(
            output_format=OutputFormat.JSON,
            output_destination=None,
            error_on_findings=False,
            strict=False,
        ),
        stdout=io_capture,
    )
    semgrep_main.main(
        output_handler=output_handler,
        target=[pt.as_posix() for pt in paths],
        jobs=cpu_count,
        pattern=None,
        lang=None,
        config=scan_rules,
        **kwargs,
    )
    output_handler.close()
    return json.loads(io_capture.getvalue())
コード例 #7
0
ファイル: helpers.py プロジェクト: ajinabraham/libsast
def invoke_semgrep(paths, scan_rules, **kwargs):
    """Call Semgrep."""
    if platform.system() == 'Windows':
        return None
    from semgrep import semgrep_main
    from semgrep.state import get_state
    from semgrep.constants import OutputFormat
    from semgrep.output import OutputHandler, OutputSettings
    try:
        cpu_count = multiprocessing.cpu_count()
    except NotImplementedError:
        cpu_count = 1  # CPU count is not implemented on Windows
    # Semgrep output formatting
    state = get_state()
    state.terminal.configure(
        verbose=False,
        debug=False,
        quiet=True,
        force_color=False,
    )
    logging.getLogger('semgrep').propagate = False
    output_settings = OutputSettings(
        output_format=OutputFormat.JSON,
        output_destination=None,
        output_per_finding_max_lines_limit=None,
        output_per_line_max_chars_limit=None,
        error_on_findings=False,
        verbose_errors=False,
        strict=False,
        timeout_threshold=3,
    )
    output_handler = OutputHandler(output_settings)
    (
        filtered_matches_by_rule,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
    ) = semgrep_main.main(
        output_handler=output_handler,
        target=[pt.as_posix() for pt in paths],
        jobs=cpu_count,
        pattern=None,
        lang=None,
        configs=[scan_rules],
        timeout=5,
        timeout_threshold=3,
        **kwargs,
    )
    output_handler.rule_matches = [
        m for ms in filtered_matches_by_rule.values() for m in ms
    ]
    return json.loads(output_handler._build_output())
コード例 #8
0
ファイル: test_target_manager.py プロジェクト: isp1r0/semgrep
def test_explicit_path(tmp_path, monkeypatch):
    foo = tmp_path / "foo"
    foo.mkdir()
    (foo / "a.go").touch()
    (foo / "b.go").touch()
    foo_noext = foo / "noext"
    foo_noext.touch()
    foo_a = foo / "a.py"
    foo_a.touch()
    foo_b = foo / "b.py"
    foo_b.touch()

    monkeypatch.chdir(tmp_path)

    # Should include explicitly passed python file
    foo_a = foo_a.relative_to(tmp_path)
    output_settings = OutputSettings(
        output_format=OutputFormat.TEXT,
        output_destination=None,
        error_on_findings=False,
        strict=False,
    )
    defaulthandler = OutputHandler(output_settings)

    python_language = Language("python")

    assert foo_a in TargetManager([], [], ["foo/a.py"], False, defaulthandler,
                                  False).get_files(python_language, [], [])

    # Should include explicitly passed python file even if is in excludes
    assert foo_a not in TargetManager([], ["foo/a.py"], ["."], False,
                                      defaulthandler,
                                      False).get_files(python_language, [], [])
    assert foo_a in TargetManager([], ["foo/a.py"], [".", "foo/a.py"], False,
                                  defaulthandler,
                                  False).get_files(python_language, [], [])

    # Should ignore expliclty passed .go file when requesting python
    assert (TargetManager([], [], ["foo/a.go"], False, defaulthandler,
                          False).get_files(python_language, [], []) == [])

    # Should include explicitly passed file with unknown extension if skip_unknown_extensions=False
    assert cmp_path_sets(
        set(
            TargetManager([], [], ["foo/noext"], False, defaulthandler,
                          False).get_files(python_language, [], [])),
        {foo_noext},
    )

    # Should not include explicitly passed file with unknown extension if skip_unknown_extensions=True
    assert cmp_path_sets(
        set(
            TargetManager([], [], ["foo/noext"], False, defaulthandler,
                          True).get_files(python_language, [], [])),
        set(),
    )
コード例 #9
0
ファイル: cli.py プロジェクト: petermulller/semgrep
def cli() -> None:
    parser = argparse.ArgumentParser(
        description=
        f"semgrep CLI. For more information about semgrep, go to {SEMGREP_URL}",
        prog="semgrep",
    )

    # input
    parser.add_argument(
        "target",
        nargs="*",
        default=[os.curdir],
        help=("Search these files or directories. Defaults to entire current "
              "working directory. Implied argument if piping to semgrep."),
    )

    # config options
    config = parser.add_argument_group("config")
    config_ex = config.add_mutually_exclusive_group()
    config_ex.add_argument(
        "-g",
        "--generate-config",
        action="store_true",
        help=f"Generate starter configuration file, {DEFAULT_CONFIG_FILE}",
    )

    config_ex.add_argument(
        "-f",
        "--config",
        help=
        ("YAML configuration file, directory of YAML files ending in "
         ".yml|.yaml, URL of a configuration file, or semgrep registry entry "
         "name. See README for information on configuration file format."),
    )

    config_ex.add_argument(
        "-e",
        "--pattern",
        help=
        "Code search pattern. See README for information on pattern features.",
    )
    config.add_argument(
        "-l",
        "--lang",
        help=(
            "Parse pattern and all files in specified language. Must be used "
            "with -e/--pattern."),
    )
    config.add_argument(
        "--validate",
        action="store_true",
        help="Validate configuration file(s). No search is performed.",
    )
    config.add_argument(
        "--strict",
        action="store_true",
        help="Only invoke semgrep if configuration files(s) are valid.",
    )

    parser.add_argument(
        "--exclude",
        action="append",
        default=[],
        help=
        "Skip any file or directory that matches this pattern; --exclude='*.py' will ignore"
        " the following: foo.py, src/foo.py, foo.py/bar.sh. --exclude='tests' will ignore tests/foo.py"
        " as well as a/b/tests/c/foo.py. Can add multiple times. Overrides includes.",
    )
    parser.add_argument(
        "--include",
        action="append",
        default=[],
        help=
        "Scan only files or directories that match this pattern; --include='*.jsx' will scan"
        " the following: foo.jsx, src/foo.jsx, foo.jsx/bar.sh. --include='src' will scan src/foo.py"
        " as well as a/b/src/c/foo.py. Can add multiple times.",
    )
    parser.add_argument(
        "--no-git-ignore",
        action="store_true",
        help="Scan all files even those ignored by a projects gitignore(s)",
    )

    config.add_argument(
        RCE_RULE_FLAG,
        action="store_true",
        help=("WARNING: allow rules to run arbitrary code. ONLY ENABLE IF YOU "
              "TRUST THE SOURCE OF ALL RULES IN YOUR CONFIGURATION."),
    )

    config.add_argument(
        "-j",
        "--jobs",
        action="store",
        type=int,
        default=CPU_COUNT,
        help=(
            "Number of subprocesses to use to run checks in parallel. Defaults "
            "to the number of CPUs on the system."),
    )

    config.add_argument(
        "--timeout",
        type=int,
        default=0,
        help=
        ("Maximum time to spend running a rule on a single file in seconds. If set to 0 will not have time limit. Defaults to 0."
         ),
    )

    config.add_argument(
        "--max-memory",
        type=int,
        default=0,
        help=
        ("Maximum memory to use running a rule on a single file in MB. If set to 0 will not have memory limit. Defaults to 0."
         ),
    )

    config.add_argument(
        "--timeout-threshold",
        type=int,
        default=0,
        help=
        ("Maximum number of rules that can timeout on a file before the file is skipped. If set to 0 will not have limit. Defaults to 0."
         ),
    )

    # output options
    output = parser.add_argument_group("output")

    output.add_argument(
        "-q",
        "--quiet",
        action="store_true",
        help=("Do not print anything to stdout. Search results can still be "
              "saved to an output file specified by -o/--output. Exit code "
              "provides success status."),
    )

    output.add_argument(
        "--no-rewrite-rule-ids",
        action="store_true",
        help=
        ("Do not rewrite rule ids when they appear in nested sub-directories "
         "(by default, rule 'foo' in test/rules.yaml will be renamed "
         "'test.foo')."),
    )

    output.add_argument(
        "-o",
        "--output",
        help=("Save search results to a file or post to URL. "
              "Default is to print to stdout."),
    )
    output.add_argument("--json",
                        action="store_true",
                        help="Output results in JSON format.")
    output.add_argument(
        "--debugging-json",
        action="store_true",
        help="Output JSON with extra debugging information.",
    )
    output.add_argument("--sarif",
                        action="store_true",
                        help="Output results in SARIF format.")
    output.add_argument("--test", action="store_true", help="Run test suite.")
    parser.add_argument(
        "--test-ignore-todo",
        action="store_true",
        help="Ignore rules marked as '#todoruleid:' in test files.",
    )
    output.add_argument(
        "--dump-ast",
        action="store_true",
        help=("Show AST of the input file or passed expression and then exit "
              "(can use --json)."),
    )
    output.add_argument("--synthesize-patterns", help=argparse.SUPPRESS)
    output.add_argument(
        "--error",
        action="store_true",
        help="Exit 1 if there are findings. Useful for CI and scripts.",
    )

    output.add_argument(
        "-a",
        "--autofix",
        action="store_true",
        help=
        ("Apply the autofix patches. WARNING: data loss can occur with this "
         "flag. Make sure your files are stored in a version control system."),
    )
    output.add_argument(
        "--dryrun",
        action="store_true",
        help=("Do autofixes, but don't write them to a file. "
              "This will print the changes to the console. "
              "This lets you see the changes before you commit to them. "
              "Only works with the --autofix flag. Otherwise does nothing."),
    )
    output.add_argument(
        "--disable-nosem",
        action="store_true",
        help=(
            "Disable the effect of 'nosem'. This will report findings on lines "
            "containing a 'nosem' comment at the end."),
    )

    # logging options
    logging_ = parser.add_argument_group("logging")

    logging_.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        help=("Set the logging level to verbose. E.g. statements about which "
              "files are being processed will be printed."),
    )

    parser.add_argument("--version",
                        action="store_true",
                        help="Show the version and exit.")

    parser.add_argument(
        "--force-color",
        action="store_true",
        help=
        "Always include ANSI color in the output, even if not writing to a TTY",
    )
    parser.add_argument(
        "--disable-version-check",
        action="store_true",
        help="Disable checking for latest version.",
    )

    ### Parse and validate
    args = parser.parse_args()
    if args.version:
        print(__VERSION__)
        return

    if args.pattern and not args.lang:
        parser.error("-e/--pattern and -l/--lang must both be specified")

    if args.dump_ast and not args.lang:
        parser.error("--dump-ast and -l/--lang must both be specified")

    # set the flags
    semgrep.util.set_flags(args.verbose, args.quiet, args.force_color)

    # change cwd if using docker
    try:
        semgrep.config_resolver.adjust_for_docker()
    except SemgrepError as e:
        logger.exception(str(e))
        raise e

    output_format = OutputFormat.TEXT
    if args.json:
        output_format = OutputFormat.JSON
    elif args.debugging_json:
        output_format = OutputFormat.JSON_DEBUG
    elif args.sarif:
        output_format = OutputFormat.SARIF

    output_settings = OutputSettings(
        output_format=output_format,
        output_destination=args.output,
        error_on_findings=args.error,
        strict=args.strict,
        timeout_threshold=args.timeout_threshold,
    )

    if not args.disable_version_check:
        if not is_running_latest():
            logger.warning(
                "A new version of Semgrep is available. Please see https://github.com/returntocorp/semgrep#upgrading for more information."
            )

    if args.test:
        # the test code (which isn't a "test" per se but is actually machinery to evaluate semgrep performance)
        # uses managed_output internally
        semgrep.test.test_main(args)

    with managed_output(output_settings) as output_handler:
        if args.dump_ast:
            dump_parsed_ast(args.json, args.lang, args.pattern, args.target)
        elif args.synthesize_patterns:
            synthesize_patterns(args.lang, args.synthesize_patterns,
                                args.target)
        elif args.validate:
            configs, config_errors = semgrep.semgrep_main.get_config(
                args.pattern, args.lang, args.config)
            valid_str = "invalid" if config_errors else "valid"
            logger.info(
                f"Configuration is {valid_str} - found {len(configs)} valid configuration(s) and {len(config_errors)} configuration error(s)."
            )
            if config_errors:
                for error in config_errors:
                    output_handler.handle_semgrep_error(error)
                raise SemgrepError(
                    "Please fix the above errors and try again.")
        elif args.generate_config:
            semgrep.config_resolver.generate_config()
        else:
            semgrep.semgrep_main.main(
                output_handler=output_handler,
                target=args.target,
                pattern=args.pattern,
                lang=args.lang,
                config=args.config,
                no_rewrite_rule_ids=args.no_rewrite_rule_ids,
                jobs=args.jobs,
                include=args.include,
                exclude=args.exclude,
                strict=args.strict,
                autofix=args.autofix,
                dryrun=args.dryrun,
                disable_nosem=args.disable_nosem,
                dangerously_allow_arbitrary_code_execution_from_rules=args.
                dangerously_allow_arbitrary_code_execution_from_rules,
                no_git_ignore=args.no_git_ignore,
                timeout=args.timeout,
                max_memory=args.max_memory,
                timeout_threshold=args.timeout_threshold,
            )
コード例 #10
0
def cli() -> None:
    parser = argparse.ArgumentParser(
        description=
        f"semgrep CLI. For more information about semgrep, go to {SEMGREP_URL}",
        prog="semgrep",
    )

    # input
    parser.add_argument(
        "target",
        nargs="*",
        default=[os.curdir],
        help=("Search these files or directories. Defaults to entire current "
              "working directory. Implied argument if piping to semgrep."),
    )

    # config options
    config = parser.add_argument_group("config")
    config_ex = config.add_mutually_exclusive_group()
    config_ex.add_argument(
        "-f",  # for backwards compatibility
        "-c",
        "--config",
        action="append",
        default=[],
        help=
        ("YAML configuration file, directory of YAML files ending in "
         ".yml|.yaml, URL of a configuration file, or semgrep registry entry "
         "name. See https://semgrep.dev/docs/writing-rules/rule-syntax for information on configuration file format."
         ),
    )
    config_ex.add_argument(
        "-e",
        "--pattern",
        help=
        "Code search pattern. See https://semgrep.dev/docs/writing-rules/pattern-syntax for information on pattern features.",
    )
    config.add_argument(
        "-g",
        "--generate-config",
        action="store",
        nargs="?",
        const=DEFAULT_CONFIG_FILE,
        type=argparse.FileType("x"),
        help=
        f"Generate starter configuration file. Defaults to {DEFAULT_CONFIG_FILE}.",
    )
    config.add_argument(
        "-l",
        "--lang",
        help=(
            "Parse pattern and all files in specified language. Must be used "
            "with -e/--pattern."),
    )
    config.add_argument(
        "--validate",
        action="store_true",
        help="Validate configuration file(s). No search is performed.",
    )
    config.add_argument(
        "--strict",
        action="store_true",
        help="Only invoke semgrep if configuration files(s) are valid.",
    )

    parser.add_argument(
        "--exclude",
        action="append",
        default=[],
        help=
        "Skip any file or directory that matches this pattern; --exclude='*.py' will ignore"
        " the following: foo.py, src/foo.py, foo.py/bar.sh. --exclude='tests' will ignore tests/foo.py"
        " as well as a/b/tests/c/foo.py. Can add multiple times. Overrides includes.",
    )
    parser.add_argument(
        "--include",
        action="append",
        default=[],
        help="Filter files or directories by path. The argument is a"
        " glob-style pattern such as 'foo.*' that must match the path."
        " This is an extra filter in addition to other applicable filters."
        " For example, specifying the language with '-l javascript' might"
        " preselect files 'src/foo.jsx' and 'lib/bar.js'. Specifying one of"
        " '--include=src', '--include=*.jsx', or '--include=src/foo.*'"
        " will restrict the selection to the single file 'src/foo.jsx'."
        " A choice of multiple '--include' patterns can be specified."
        " For example, '--include=foo.* --include=bar.*' will select"
        " both 'src/foo.jsx' and 'lib/bar.js'."
        " Glob-style patterns follow the syntax supported by python,"
        " which is documented at https://docs.python.org/3/library/glob.html",
    )
    parser.add_argument(
        "--no-git-ignore",
        action="store_true",
        help="Don't skip files ignored by git."
        " Scanning starts from the root folder specified on the semgrep"
        " command line."
        " Normally, if the scanning root is within a git repository, "
        " only the tracked files and the new files"
        " would be scanned. Git submodules and git-ignored files would"
        " normally be skipped."
        " This option will disable git-aware filtering."
        " Setting this flag does nothing if the scanning root is not"
        " in a git repository.",
    )
    parser.add_argument(
        "--skip-unknown-extensions",
        action="store_true",
        help=
        "Scan only known file extensions, even if unrecognized ones are explicitly targeted.",
    )

    config.add_argument(
        RCE_RULE_FLAG,
        action="store_true",
        help=("WARNING: allow rules to run arbitrary code. ONLY ENABLE IF YOU "
              "TRUST THE SOURCE OF ALL RULES IN YOUR CONFIGURATION."),
    )

    config.add_argument(
        "-j",
        "--jobs",
        action="store",
        type=int,
        default=CPU_COUNT,
        help=(
            "Number of subprocesses to use to run checks in parallel. Defaults "
            "to the number of CPUs on the system."),
    )

    config.add_argument(
        "--timeout",
        type=int,
        default=DEFAULT_TIMEOUT,
        help=
        ("Maximum time to spend running a rule on a single file in seconds. If set to 0 will not have time limit. Defaults to {} s."
         .format(DEFAULT_TIMEOUT)),
    )

    config.add_argument(
        "--max-memory",
        type=int,
        default=0,
        help=
        ("Maximum memory to use running a rule on a single file in MB. If set to 0 will not have memory limit. Defaults to 0."
         ),
    )

    config.add_argument(
        "--timeout-threshold",
        type=int,
        default=0,
        help=
        ("Maximum number of rules that can timeout on a file before the file is skipped. If set to 0 will not have limit. Defaults to 0."
         ),
    )

    config.add_argument(
        "--severity",
        action="append",
        default=[],
        help=
        ("Report findings only from rules matching the supplied severity level. By default all applicable rules are run."
         "Can add multiple times. Each should be one of INFO, WARNING, or ERROR."
         ),
    )

    # output options
    output = parser.add_argument_group("output")

    output.add_argument(
        "-q",
        "--quiet",
        action="store_true",
        help=
        ("Do not print any logging messages to stderr. Finding output will still be sent to stdout. Exit code "
         "provides success status."),
    )

    output.add_argument(
        "--no-rewrite-rule-ids",
        action="store_true",
        help=
        ("Do not rewrite rule ids when they appear in nested sub-directories "
         "(by default, rule 'foo' in test/rules.yaml will be renamed "
         "'test.foo')."),
    )

    output.add_argument(
        "-o",
        "--output",
        help=("Save search results to a file or post to URL. "
              "Default is to print to stdout."),
    )
    output.add_argument("--json",
                        action="store_true",
                        help="Output results in JSON format.")
    output.add_argument(
        "--save-test-output-tar",
        action="store_true",
        help=
        ("Store json output as a tarball that will be uploaded as a Github artifact."
         ),
    )
    output.add_argument(
        "--json-stats",
        action="store_true",
        help=argparse.
        SUPPRESS,  # this flag is experimental and users should not yet rely on the output being stable
        # help="Include statistical information about performance in JSON output (experimental).",
    )
    output.add_argument(
        "--debugging-json",
        action="store_true",
        help="Output JSON with extra debugging information (experimental).",
    )
    output.add_argument("--junit-xml",
                        action="store_true",
                        help="Output results in JUnit XML format.")
    output.add_argument("--sarif",
                        action="store_true",
                        help="Output results in SARIF format.")
    output.add_argument(
        "--emacs",
        action="store_true",
        help="Output results in Emacs single-line format.",
    )
    output.add_argument("--test", action="store_true", help="Run test suite.")
    parser.add_argument(
        "--test-ignore-todo",
        action="store_true",
        help="Ignore rules marked as '#todoruleid:' in test files.",
    )
    output.add_argument(
        "--dump-ast",
        action="store_true",
        help=("Show AST of the input file or passed expression and then exit "
              "(can use --json)."),
    )
    output.add_argument("--synthesize-patterns", help=argparse.SUPPRESS)
    output.add_argument(
        "--error",
        action="store_true",
        help="Exit 1 if there are findings. Useful for CI and scripts.",
    )

    output.add_argument(
        "-a",
        "--autofix",
        action="store_true",
        help=
        ("Apply the autofix patches. WARNING: data loss can occur with this "
         "flag. Make sure your files are stored in a version control system."),
    )
    output.add_argument(
        "--dryrun",
        action="store_true",
        help=("Do autofixes, but don't write them to a file. "
              "This will print the changes to the console. "
              "This lets you see the changes before you commit to them. "
              "Only works with the --autofix flag. Otherwise does nothing."),
    )
    output.add_argument(
        "--disable-nosem",
        action="store_true",
        help=(
            "Disable the effect of 'nosem'. This will report findings on lines "
            "containing a 'nosem' comment at the end."),
    )

    output.add_argument(
        MAX_LINES_FLAG_NAME,
        type=int,
        default=DEFAULT_MAX_LINES_PER_FINDING,
        help=
        ("Maximum number of lines of code that will be shown for each match before trimming (set to 0 for unlimited)."
         ),
    )

    # logging options
    logging_ = parser.add_argument_group("logging")

    logging_.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        help=
        ("Show more details about what rules are running, which files failed to parse, etc."
         ),
    )

    output.add_argument(
        "--debug",
        action="store_true",
        help="Set the logging level to DEBUG",
    )

    parser.add_argument("--version",
                        action="store_true",
                        help="Show the version and exit.")

    parser.add_argument(
        "--force-color",
        action="store_true",
        help=
        "Always include ANSI color in the output, even if not writing to a TTY",
    )
    parser.add_argument(
        "--disable-version-check",
        action="store_true",
        help="Disable checking for latest version.",
    )

    ### Parse and validate
    args = parser.parse_args()
    if args.version:
        print(__VERSION__)
        return

    if args.pattern and not args.lang:
        parser.error("-e/--pattern and -l/--lang must both be specified")

    if args.dump_ast and not args.lang:
        parser.error("--dump-ast and -l/--lang must both be specified")

    # set the flags
    semgrep.util.set_flags(args.debug, args.quiet, args.force_color)

    # change cwd if using docker
    try:
        semgrep.config_resolver.adjust_for_docker()
    except SemgrepError as e:
        logger.exception(str(e))
        raise e

    output_format = OutputFormat.TEXT
    if args.json:
        output_format = OutputFormat.JSON
    elif args.debugging_json:
        output_format = OutputFormat.JSON_DEBUG
    elif args.junit_xml:
        output_format = OutputFormat.JUNIT_XML
    elif args.sarif:
        output_format = OutputFormat.SARIF
    elif args.emacs:
        output_format = OutputFormat.EMACS

    output_settings = OutputSettings(
        output_format=output_format,
        output_destination=args.output,
        error_on_findings=args.error,
        strict=args.strict,
        verbose_errors=args.verbose,
        timeout_threshold=args.timeout_threshold,
        json_stats=args.json_stats,
        output_per_finding_max_lines_limit=args.max_lines_per_finding,
    )

    if not args.disable_version_check:
        if not is_running_latest():
            logger.warning(
                "A new version of Semgrep is available. Please see https://github.com/returntocorp/semgrep#upgrading for more information."
            )

    if args.test:
        # the test code (which isn't a "test" per se but is actually machinery to evaluate semgrep performance)
        # uses managed_output internally
        semgrep.test.test_main(args)

    # The 'optional_stdin_target' context manager must remain before
    # 'managed_output'. Output depends on file contents so we cannot have
    # already deleted the temporary stdin file.
    with optional_stdin_target(args.target) as target, managed_output(
            output_settings) as output_handler:
        if args.dump_ast:
            dump_parsed_ast(args.json, args.lang, args.pattern, target)
        elif args.synthesize_patterns:
            synthesize_patterns(args.lang, args.synthesize_patterns, target)
        elif args.validate:
            configs, config_errors = semgrep.semgrep_main.get_config(
                args.pattern, args.lang, args.config)
            valid_str = "invalid" if config_errors else "valid"
            rule_count = len(configs.get_rules(True))
            logger.info(
                f"Configuration is {valid_str} - found {len(configs.valid)} valid configuration(s), {len(config_errors)} configuration error(s), and {rule_count} rule(s)."
            )
            if config_errors:
                for error in config_errors:
                    output_handler.handle_semgrep_error(error)
                raise SemgrepError(
                    "Please fix the above errors and try again.")
        elif args.generate_config:
            semgrep.config_resolver.generate_config(args.generate_config,
                                                    args.lang, args.pattern)
        else:
            semgrep.semgrep_main.main(
                output_handler=output_handler,
                target=target,
                pattern=args.pattern,
                lang=args.lang,
                configs=args.config,
                no_rewrite_rule_ids=args.no_rewrite_rule_ids,
                jobs=args.jobs,
                include=args.include,
                exclude=args.exclude,
                strict=args.strict,
                autofix=args.autofix,
                dryrun=args.dryrun,
                disable_nosem=args.disable_nosem,
                dangerously_allow_arbitrary_code_execution_from_rules=args.
                dangerously_allow_arbitrary_code_execution_from_rules,
                no_git_ignore=args.no_git_ignore,
                timeout=args.timeout,
                max_memory=args.max_memory,
                timeout_threshold=args.timeout_threshold,
                skip_unknown_extensions=args.skip_unknown_extensions,
                severity=args.severity,
            )
コード例 #11
0
ファイル: cli.py プロジェクト: zythosec/semgrep
def cli(
    autofix: bool,
    config: Optional[Tuple[str, ...]],
    dangerously_allow_arbitrary_code_execution_from_rules: bool,
    debug: bool,
    debugging_json: bool,
    dryrun: bool,
    dump_ast: bool,
    emacs: bool,
    enable_metrics: bool,
    enable_nosem: bool,
    enable_version_check: bool,
    error_on_findings: bool,
    exclude: Optional[Tuple[str, ...]],
    force_color: bool,
    generate_config: bool,
    include: Optional[Tuple[str, ...]],
    jobs: int,
    json: bool,
    json_stats: bool,
    json_time: bool,
    junit_xml: bool,
    lang: Optional[str],
    max_chars_per_line: int,
    max_lines_per_finding: int,
    max_memory: int,
    max_target_bytes: int,
    optimizations: str,
    output: Optional[str],
    pattern: Optional[str],
    quiet: bool,
    replacement: Optional[str],
    rewrite_rule_ids: bool,
    sarif: bool,
    save_test_output_tar: bool,
    scan_unknown_extensions: bool,
    severity: Optional[Tuple[str, ...]],
    strict: bool,
    synthesize_patterns: str,
    target: Tuple[str, ...],
    test: bool,
    test_ignore_todo: bool,
    time: bool,
    timeout: int,
    timeout_threshold: int,
    use_git_ignore: bool,
    validate: bool,
    verbose: bool,
    version: bool,
    vim: bool,
) -> None:
    """
    Semgrep CLI. Searches TARGET paths for matches to rules or patterns. Defaults to searching entire current working directory.

    For more information about Semgrep, go to https://semgrep.dev.
    """

    if version:
        print(__VERSION__)
        if enable_version_check:
            from semgrep.version import version_check

            version_check()
        return

    # To keep version runtime fast, we defer non-version imports until here
    import semgrep.semgrep_main
    import semgrep.test
    import semgrep.config_resolver
    from semgrep.constants import OutputFormat
    from semgrep.constants import DEFAULT_CONFIG_FILE
    from semgrep.dump_ast import dump_parsed_ast
    from semgrep.error import SemgrepError
    from semgrep.metric_manager import metric_manager
    from semgrep.output import managed_output
    from semgrep.output import OutputSettings
    from semgrep.synthesize_patterns import synthesize
    from semgrep.target_manager import optional_stdin_target

    target_sequence: Sequence[str] = list(target) if target else [os.curdir]

    if enable_metrics:
        metric_manager.enable()
    else:
        metric_manager.disable()

    if include and exclude:
        logger.warning(
            with_color(
                "yellow",
                "Paths that match both --include and --exclude will be skipped by Semgrep.",
            ))

    if pattern is not None and lang is None:
        abort("-e/--pattern and -l/--lang must both be specified")

    if dangerously_allow_arbitrary_code_execution_from_rules:
        logger.warning(
            "The '--dangerously-allow-arbitrary-code-execution-from-rules' flag is now deprecated and does nothing. It will be removed in the future."
        )

    output_time = time or json_time

    # set the flags
    semgrep.util.set_flags(verbose=verbose,
                           debug=debug,
                           quiet=quiet,
                           force_color=force_color)

    # change cwd if using docker
    try:
        semgrep.config_resolver.adjust_for_docker()
    except SemgrepError as e:
        logger.exception(str(e))
        raise e

    output_format = OutputFormat.TEXT
    if json or json_time or debugging_json:
        output_format = OutputFormat.JSON
    elif junit_xml:
        output_format = OutputFormat.JUNIT_XML
    elif sarif:
        output_format = OutputFormat.SARIF
    elif emacs:
        output_format = OutputFormat.EMACS
    elif vim:
        output_format = OutputFormat.VIM

    output_settings = OutputSettings(
        output_format=output_format,
        output_destination=output,
        error_on_findings=error_on_findings,
        strict=strict,
        debug=debugging_json,
        verbose_errors=verbose,
        timeout_threshold=timeout_threshold,
        json_stats=json_stats,
        output_time=output_time,
        output_per_finding_max_lines_limit=max_lines_per_finding,
        output_per_line_max_chars_limit=max_chars_per_line,
    )

    if test:
        # the test code (which isn't a "test" per se but is actually machinery to evaluate semgrep performance)
        # uses managed_output internally
        semgrep.test.test_main(
            target=target_sequence,
            config=config,
            test_ignore_todo=test_ignore_todo,
            strict=strict,
            json=json,
            save_test_output_tar=save_test_output_tar,
            optimizations=optimizations,
        )

    # The 'optional_stdin_target' context manager must remain before
    # 'managed_output'. Output depends on file contents so we cannot have
    # already deleted the temporary stdin file.
    with optional_stdin_target(
            target_sequence) as target_sequence, managed_output(
                output_settings) as output_handler:
        if dump_ast:
            dump_parsed_ast(json, __validate_lang("--dump_ast", lang), pattern,
                            target_sequence)
        elif synthesize_patterns:
            synthesize(
                __validate_lang("--synthesize-patterns", lang),
                synthesize_patterns,
                target_sequence,
            )
        elif validate:
            configs, config_errors = semgrep.config_resolver.get_config(
                pattern, lang, config or [])
            valid_str = "invalid" if config_errors else "valid"
            rule_count = len(configs.get_rules(True))
            logger.info(
                f"Configuration is {valid_str} - found {len(configs.valid)} valid configuration(s), {len(config_errors)} configuration error(s), and {rule_count} rule(s)."
            )
            if config_errors:
                for err in config_errors:
                    output_handler.handle_semgrep_error(err)
                raise SemgrepError(
                    "Please fix the above errors and try again.")
        elif generate_config:
            with open(DEFAULT_CONFIG_FILE, "w") as fd:
                semgrep.config_resolver.generate_config(fd, lang, pattern)
        else:
            semgrep.semgrep_main.main(
                output_handler=output_handler,
                target=target_sequence,
                pattern=pattern,
                lang=lang,
                configs=(config or []),
                no_rewrite_rule_ids=(not rewrite_rule_ids),
                jobs=jobs,
                include=include,
                exclude=exclude,
                max_target_bytes=max_target_bytes,
                replacement=replacement,
                strict=strict,
                autofix=autofix,
                dryrun=dryrun,
                disable_nosem=(not enable_nosem),
                no_git_ignore=(not use_git_ignore),
                timeout=timeout,
                max_memory=max_memory,
                timeout_threshold=timeout_threshold,
                skip_unknown_extensions=(not scan_unknown_extensions),
                severity=severity,
                optimizations=optimizations,
            )

    if enable_version_check:
        from semgrep.version import version_check

        version_check()