def __init__(self, raw: YamlTree[YamlMap]) -> None: self._yaml = raw self._raw: Dict[str, Any] = raw.unroll_dict() # For tracking errors from semgrep-core self._pattern_spans: Dict[PatternId, Span] = {} paths_tree: Optional[YamlTree] = self._yaml.value.get("paths") if paths_tree is None: path_dict = {} else: paths, paths_span = paths_tree.value, paths_tree.span if not isinstance(paths, YamlMap): path_key = self._yaml.value.key_tree("paths").span help_str: Optional[str] = None if isinstance(paths, list): help_str = "remove the `-` to convert the list into a mapping" raise InvalidRuleSchemaError( short_msg="invalid paths", long_msg=f"the `paths:` targeting rules must be an object with at least one of {ALLOWED_GLOB_TYPES}", spans=[path_key.extend_to(paths_span)], help=help_str, ) path_dict = paths_tree.unroll_dict() self._includes = path_dict.get("include", []) self._excludes = path_dict.get("exclude", []) self._languages = [Language(l) for l in self._raw["languages"]] # check taint/search mode self._expression, self._mode = self._build_search_patterns_for_mode(self._yaml)
def test_ignore_git_dir(tmp_path, monkeypatch): """ Ignores all files in .git directory when scanning generic """ foo = tmp_path / ".git" foo.mkdir() (foo / "bar").touch() monkeypatch.chdir(tmp_path) language = Language("generic") output_settings = OutputSettings( output_format=OutputFormat.TEXT, output_destination=None, error_on_findings=False, verbose_errors=False, strict=False, json_stats=False, output_time=False, output_per_finding_max_lines_limit=None, output_per_line_max_chars_limit=None, ) defaulthandler = OutputHandler(output_settings) assert [] == TargetManager([], [], 0, [foo], True, defaulthandler, False).get_files( language, [], [] )
def test_explicit_path(tmp_path, monkeypatch): foo = tmp_path / "foo" foo.mkdir() (foo / "a.go").touch() (foo / "b.go").touch() foo_noext = foo / "noext" foo_noext.touch() foo_a = foo / "a.py" foo_a.touch() foo_b = foo / "b.py" foo_b.touch() monkeypatch.chdir(tmp_path) # Should include explicitly passed python file foo_a = foo_a.relative_to(tmp_path) output_settings = OutputSettings( output_format=OutputFormat.TEXT, output_destination=None, error_on_findings=False, strict=False, ) defaulthandler = OutputHandler(output_settings) python_language = Language("python") assert foo_a in TargetManager([], [], ["foo/a.py"], False, defaulthandler, False).get_files(python_language, [], []) # Should include explicitly passed python file even if is in excludes assert foo_a not in TargetManager([], ["foo/a.py"], ["."], False, defaulthandler, False).get_files(python_language, [], []) assert foo_a in TargetManager([], ["foo/a.py"], [".", "foo/a.py"], False, defaulthandler, False).get_files(python_language, [], []) # Should ignore expliclty passed .go file when requesting python assert (TargetManager([], [], ["foo/a.go"], False, defaulthandler, False).get_files(python_language, [], []) == []) # Should include explicitly passed file with unknown extension if skip_unknown_extensions=False assert cmp_path_sets( set( TargetManager([], [], ["foo/noext"], False, defaulthandler, False).get_files(python_language, [], [])), {foo_noext}, ) # Should not include explicitly passed file with unknown extension if skip_unknown_extensions=True assert cmp_path_sets( set( TargetManager([], [], ["foo/noext"], False, defaulthandler, True).get_files(python_language, [], [])), set(), )
def test_skip_symlink(tmp_path, monkeypatch): foo = tmp_path / "foo" foo.mkdir() (foo / "a.py").touch() (foo / "link.py").symlink_to(foo / "a.py") monkeypatch.chdir(tmp_path) python_language = Language("python") assert cmp_path_sets( TargetManager.expand_targets([foo], python_language, False), {foo / "a.py"}, ) assert cmp_path_sets( TargetManager.expand_targets([foo / "link.py"], python_language, False), set())
def __init__(self, raw: YamlTree[YamlMap]) -> None: self._yaml = raw self._raw: Dict[str, Any] = raw.unroll_dict() # For tracking errors from semgrep-core self._pattern_spans: Dict[PatternId, Span] = {} paths_tree: Optional[YamlTree] = self._yaml.value.get("paths") if paths_tree is None: path_dict = {} else: paths, paths_span = paths_tree.value, paths_tree.span if not isinstance(paths, YamlMap): path_key = self._yaml.value.key_tree("paths").span help_str: Optional[str] = None if isinstance(paths, list): help_str = "remove the `-` to convert the list into a mapping" raise InvalidRuleSchemaError( short_msg="invalid paths", long_msg= f"the `paths:` targeting rules must be an object with at least one of {ALLOWED_GLOB_TYPES}", spans=[path_key.extend_to(paths_span)], help=help_str, ) for key, value in paths.items(): if key.value not in ALLOWED_GLOB_TYPES: raise InvalidRuleSchemaError( short_msg="invalid targeting rules", long_msg= f"the `paths:` targeting rules must each be one of {ALLOWED_GLOB_TYPES}", spans=[key.span.with_context(before=1, after=1)], ) if not isinstance(value.value, list): raise InvalidRuleSchemaError( short_msg="invalid target value", long_msg= f"the `paths:` targeting rule values must be lists", spans=[value.span], ) path_dict = paths_tree.unroll_dict() self._includes = path_dict.get("include", []) self._excludes = path_dict.get("exclude", []) self._languages = [Language(l) for l in self._raw["languages"]] self._expression = self._build_boolean_expression(self._yaml)
def test_delete_git(tmp_path, monkeypatch): """ Check that deleted files are not included in expanded targets """ foo = tmp_path / "foo.py" bar = tmp_path / "bar.py" foo.touch() bar.touch() monkeypatch.chdir(tmp_path) subprocess.run(["git", "init"]) subprocess.run(["git", "add", foo]) subprocess.run(["git", "commit", "-m", "first commit"]) foo.unlink() subprocess.run(["git", "status"]) assert cmp_path_sets( TargetManager.expand_targets([Path(".")], Language("python"), True), {bar})
def test_expand_targets_not_git(tmp_path, monkeypatch): """ Check that directory expansion works with relative paths, absolute paths, paths with .. """ foo = tmp_path / "foo" foo.mkdir() (foo / "a.go").touch() (foo / "b.go").touch() (foo / "py").touch() foo_a = foo / "a.py" foo_a.touch() foo_b = foo / "b.py" foo_b.touch() bar = tmp_path / "bar" bar.mkdir() bar_a = bar / "a.py" bar_a.touch() bar_b = bar / "b.py" bar_b.touch() foo_bar = foo / "bar" foo_bar.mkdir() foo_bar_a = foo_bar / "a.py" foo_bar_a.touch() foo_bar_b = foo_bar / "b.py" foo_bar_b.touch() in_foo_bar = {foo_bar_a, foo_bar_b} in_foo = {foo_a, foo_b}.union(in_foo_bar) in_bar = {bar_a, bar_b} in_all = in_foo.union(in_bar) python_language = Language("python") monkeypatch.chdir(tmp_path) assert cmp_path_sets( TargetManager.expand_targets([Path(".")], python_language, False), in_all) assert cmp_path_sets( TargetManager.expand_targets([Path("bar")], python_language, False), in_bar) assert cmp_path_sets( TargetManager.expand_targets([Path("foo")], python_language, False), in_foo) assert cmp_path_sets( TargetManager.expand_targets([Path("foo").resolve()], python_language, False), in_foo, ) assert cmp_path_sets( TargetManager.expand_targets([Path("foo/bar")], python_language, False), in_foo_bar, ) assert cmp_path_sets( TargetManager.expand_targets([Path("foo/bar").resolve()], python_language, False), in_foo_bar, ) monkeypatch.chdir(foo) assert cmp_path_sets( TargetManager.expand_targets([Path(".")], python_language, False), in_foo) assert cmp_path_sets( TargetManager.expand_targets([Path("./foo")], python_language, False), set()) assert cmp_path_sets( TargetManager.expand_targets([Path("bar")], python_language, False), in_foo_bar) assert cmp_path_sets( TargetManager.expand_targets([Path("bar")], python_language, False), in_foo_bar) assert cmp_path_sets( TargetManager.expand_targets([Path("..")], python_language, False), in_all) assert cmp_path_sets( TargetManager.expand_targets([Path("../bar")], python_language, False), in_bar) assert cmp_path_sets( TargetManager.expand_targets([Path("../foo/bar")], python_language, False), in_foo_bar, )
def test_expand_targets_git(tmp_path, monkeypatch): """ Test TargetManager with visible_to_git_only flag on in a git repository with nested .gitignores """ foo = tmp_path / "foo" foo.mkdir() foo_a_go = foo / "a.go" foo_a_go.touch() (foo / "b.go").touch() (foo / "py").touch() foo_a = foo / "a.py" foo_a.touch() foo_b = foo / "b.py" foo_b.touch() bar = tmp_path / "bar" bar.mkdir() bar_a = bar / "a.py" bar_a.touch() bar_b = bar / "b.py" bar_b.touch() foo_bar = foo / "bar" foo_bar.mkdir() foo_bar_a = foo_bar / "a.py" foo_bar_a.touch() foo_bar_b = foo_bar / "b.py" foo_bar_b.touch() monkeypatch.chdir(tmp_path) subprocess.run(["git", "init"]) subprocess.run(["git", "add", foo_a]) subprocess.run(["git", "add", foo_bar_a]) subprocess.run(["git", "add", foo_bar_b]) subprocess.run(["git", "add", foo_a_go]) subprocess.run(["git", "commit", "-m", "first"]) # Check that all files are visible without a .gitignore in_foo_bar = {foo_bar_a, foo_bar_b} in_foo = {foo_a, foo_b}.union(in_foo_bar) in_bar = {bar_a, bar_b} in_all = in_foo.union(in_bar) python_language = Language("python") monkeypatch.chdir(tmp_path) assert cmp_path_sets( TargetManager.expand_targets([Path(".")], python_language, True), in_all) assert cmp_path_sets( TargetManager.expand_targets([Path("bar")], python_language, True), in_bar) assert cmp_path_sets( TargetManager.expand_targets([Path("foo")], python_language, True), in_foo) assert cmp_path_sets( TargetManager.expand_targets([Path("foo").resolve()], python_language, True), in_foo, ) assert cmp_path_sets( TargetManager.expand_targets([Path("foo/bar")], python_language, True), in_foo_bar, ) assert cmp_path_sets( TargetManager.expand_targets([Path("foo/bar").resolve()], python_language, True), in_foo_bar, ) monkeypatch.chdir(foo) assert cmp_path_sets( TargetManager.expand_targets([Path(".")], python_language, True), in_foo) assert cmp_path_sets( TargetManager.expand_targets([Path("./foo")], python_language, True), set()) assert cmp_path_sets( TargetManager.expand_targets([Path("bar")], python_language, True), in_foo_bar) assert cmp_path_sets( TargetManager.expand_targets([Path("bar")], python_language, True), in_foo_bar) assert cmp_path_sets( TargetManager.expand_targets([Path("..")], python_language, True), in_all) assert cmp_path_sets( TargetManager.expand_targets([Path("../bar")], python_language, True), in_bar) assert cmp_path_sets( TargetManager.expand_targets([Path("../foo/bar")], python_language, True), in_foo_bar, ) # Add bar/, foo/bar/a.py, foo/b.py to gitignores monkeypatch.chdir(tmp_path) (tmp_path / ".gitignore").write_text("bar/\nfoo/bar/a.py") (tmp_path / "foo" / ".gitignore").write_text("b.py") # Reflect what should now be visible given gitignores in_foo_bar = { foo_bar_a, foo_bar_b, } # foo/bar/a.py is gitignored but is already tracked in_foo = {foo_a}.union( in_foo_bar) # foo/b.py is gitignored with a nested gitignore in_bar = set() # bar/ is gitignored in_all = in_foo.union(in_bar) monkeypatch.chdir(tmp_path) assert cmp_path_sets( TargetManager.expand_targets([Path(".")], python_language, True), in_all) assert cmp_path_sets( TargetManager.expand_targets([Path("bar")], python_language, True), in_bar) assert cmp_path_sets( TargetManager.expand_targets([Path("foo")], python_language, True), in_foo) assert cmp_path_sets( TargetManager.expand_targets([Path("foo").resolve()], python_language, True), in_foo, ) assert cmp_path_sets( TargetManager.expand_targets([Path("foo/bar")], python_language, True), in_foo_bar, ) assert cmp_path_sets( TargetManager.expand_targets([Path("foo/bar").resolve()], python_language, True), in_foo_bar, ) monkeypatch.chdir(foo) assert cmp_path_sets( TargetManager.expand_targets([Path(".")], python_language, True), in_foo) assert cmp_path_sets( TargetManager.expand_targets([Path("./foo")], python_language, True), set()) assert cmp_path_sets( TargetManager.expand_targets([Path("bar")], python_language, True), in_foo_bar) assert cmp_path_sets( TargetManager.expand_targets([Path("bar")], python_language, True), in_foo_bar) assert cmp_path_sets( TargetManager.expand_targets([Path("..")], python_language, True), in_all) assert cmp_path_sets( TargetManager.expand_targets([Path("../bar")], python_language, True), in_bar) assert cmp_path_sets( TargetManager.expand_targets([Path("../foo/bar")], python_language, True), in_foo_bar, )
# This is used to determine the set of files with known extensions, # i.e. those for which we have a proper parser. ALL_EXTENSIONS = (PYTHON_EXTENSIONS + JAVASCRIPT_EXTENSIONS + TYPESCRIPT_EXTENSIONS + JAVA_EXTENSIONS + C_EXTENSIONS + GO_EXTENSIONS + RUBY_EXTENSIONS + ML_EXTENSIONS + JSON_EXTENSIONS + RUST_EXTENSIONS + KOTLIN_EXTENSIONS + YAML_EXTENSIONS) # This is used to select the files suitable for spacegrep, which is # all of them. It is spacegrep itself that will detect and ignore binary # files. GENERIC_EXTENSIONS = [FileExtension("")] PYTHON_LANGUAGES = [ Language("python"), Language("python2"), Language("python3"), Language("py"), ] JAVASCRIPT_LANGUAGES = [Language("javascript"), Language("js")] TYPESCRIPT_LANGUAGES = [Language("typescript"), Language("ts")] JAVA_LANGUAGES = [Language("java")] C_LANGUAGES = [Language("c")] GO_LANGUAGES = [Language("golang"), Language("go")] RUBY_LANGUAGES = [Language("ruby"), Language("rb")] PHP_LANGUAGES = [Language("php")] LUA_LANGUAGES = [Language("lua")] CSHARP_LANGUAGES = [Language("csharp"), Language("cs"), Language("C#")] RUST_LANGUAGES = [Language("rust"), Language("Rust"), Language("rs")] KOTLIN_LANGUAGES = [Language("kotlin"), Language("Kotlin"), Language("kt")]