def invoke_semgrep( config: Path, targets: List[Path], output_settings: Optional[OutputSettings] = None, **kwargs: Any, ) -> Union[Dict[str, Any], str]: """ Return Semgrep results of 'config' on 'targets' as a dict|str Uses default arguments of 'semgrep_main.main' unless overwritten with 'kwargs' """ if output_settings is None: output_settings = OutputSettings(output_format=OutputFormat.JSON) io_capture = StringIO() output_handler = OutputHandler(output_settings, stdout=io_capture) main( output_handler=output_handler, target=[str(t) for t in targets], pattern="", lang="", configs=[str(config)], **kwargs, ) output_handler.close() result: Union[Dict[str, Any], str] = (json.loads( io_capture.getvalue()) if output_settings.output_format.is_json() else io_capture.getvalue()) return result
def invoke_semgrep(config: Path, targets: List[Path], **kwargs: Any) -> Any: """ Call semgrep with config on targets and return result as a json object Uses default arguments of MAIN unless overwritten with a kwarg """ io_capture = StringIO() output_handler = OutputHandler( OutputSettings( output_format=OutputFormat.JSON, output_destination=None, error_on_findings=False, verbose_errors=False, strict=False, json_stats=False, output_per_finding_max_lines_limit=None, ), stdout=io_capture, ) main( output_handler=output_handler, target=[str(t) for t in targets], pattern="", lang="", configs=[str(config)], **kwargs, ) output_handler.close() return json.loads(io_capture.getvalue())
def test_ignore_git_dir(tmp_path, monkeypatch): """ Ignores all files in .git directory when scanning generic """ foo = tmp_path / ".git" foo.mkdir() (foo / "bar").touch() monkeypatch.chdir(tmp_path) language = Language("generic") output_settings = OutputSettings( output_format=OutputFormat.TEXT, output_destination=None, error_on_findings=False, verbose_errors=False, strict=False, json_stats=False, output_time=False, output_per_finding_max_lines_limit=None, output_per_line_max_chars_limit=None, ) defaulthandler = OutputHandler(output_settings) assert [] == TargetManager([], [], 0, [foo], True, defaulthandler, False).get_files( language, [], [] )
def invoke_semgrep(paths, scan_rules, **kwargs): """Call Semgrep.""" if platform.system() == 'Windows': return None from semgrep import semgrep_main, util from semgrep.constants import OutputFormat from semgrep.output import OutputHandler, OutputSettings try: cpu_count = multiprocessing.cpu_count() except NotImplementedError: cpu_count = 1 # CPU count is not implemented on Windows util.set_flags(False, True, False) # Verbose, Quiet, Force_color io_capture = StringIO() output_handler = OutputHandler( OutputSettings( output_format=OutputFormat.JSON, output_destination=None, error_on_findings=False, strict=False, ), stdout=io_capture, ) semgrep_main.main( output_handler=output_handler, target=[pt.as_posix() for pt in paths], jobs=cpu_count, pattern=None, lang=None, config=scan_rules, **kwargs, ) output_handler.close() return json.loads(io_capture.getvalue())
def invoke_semgrep(paths, scan_rules, **kwargs): """Call Semgrep.""" if platform.system() == 'Windows': return None from semgrep import semgrep_main from semgrep.state import get_state from semgrep.constants import OutputFormat from semgrep.output import OutputHandler, OutputSettings try: cpu_count = multiprocessing.cpu_count() except NotImplementedError: cpu_count = 1 # CPU count is not implemented on Windows # Semgrep output formatting state = get_state() state.terminal.configure( verbose=False, debug=False, quiet=True, force_color=False, ) logging.getLogger('semgrep').propagate = False output_settings = OutputSettings( output_format=OutputFormat.JSON, output_destination=None, output_per_finding_max_lines_limit=None, output_per_line_max_chars_limit=None, error_on_findings=False, verbose_errors=False, strict=False, timeout_threshold=3, ) output_handler = OutputHandler(output_settings) ( filtered_matches_by_rule, _, _, _, _, _, _, _, ) = semgrep_main.main( output_handler=output_handler, target=[pt.as_posix() for pt in paths], jobs=cpu_count, pattern=None, lang=None, configs=[scan_rules], timeout=5, timeout_threshold=3, **kwargs, ) output_handler.rule_matches = [ m for ms in filtered_matches_by_rule.values() for m in ms ] return json.loads(output_handler._build_output())
def test_explicit_path(tmp_path, monkeypatch): foo = tmp_path / "foo" foo.mkdir() (foo / "a.go").touch() (foo / "b.go").touch() foo_noext = foo / "noext" foo_noext.touch() foo_a = foo / "a.py" foo_a.touch() foo_b = foo / "b.py" foo_b.touch() monkeypatch.chdir(tmp_path) # Should include explicitly passed python file foo_a = foo_a.relative_to(tmp_path) output_settings = OutputSettings( output_format=OutputFormat.TEXT, output_destination=None, error_on_findings=False, strict=False, ) defaulthandler = OutputHandler(output_settings) python_language = Language("python") assert foo_a in TargetManager([], [], ["foo/a.py"], False, defaulthandler, False).get_files(python_language, [], []) # Should include explicitly passed python file even if is in excludes assert foo_a not in TargetManager([], ["foo/a.py"], ["."], False, defaulthandler, False).get_files(python_language, [], []) assert foo_a in TargetManager([], ["foo/a.py"], [".", "foo/a.py"], False, defaulthandler, False).get_files(python_language, [], []) # Should ignore expliclty passed .go file when requesting python assert (TargetManager([], [], ["foo/a.go"], False, defaulthandler, False).get_files(python_language, [], []) == []) # Should include explicitly passed file with unknown extension if skip_unknown_extensions=False assert cmp_path_sets( set( TargetManager([], [], ["foo/noext"], False, defaulthandler, False).get_files(python_language, [], [])), {foo_noext}, ) # Should not include explicitly passed file with unknown extension if skip_unknown_extensions=True assert cmp_path_sets( set( TargetManager([], [], ["foo/noext"], False, defaulthandler, True).get_files(python_language, [], [])), set(), )
def Scan(self, filepath): self.io_capture = StringIO() self.output = OutputHandler(self.setting, stdout=self.io_capture) semgrep_main.main( output_handler=self.output, target=[filepath], jobs=1, pattern=None, lang=None, configs=[self.ruleset], timeout=5, timeout_threshold=3, ) self.output.close() return self.format(filepath)