def filtered_files(self, lang: Language) -> Set[Path]: """ Return all files that are decendants of any directory in TARGET that have an extension matching LANG that match any pattern in INCLUDES and do not match any pattern in EXCLUDES. Any file in TARGET bypasses excludes and includes. If a file in TARGET has a known extension that is not for langugage LANG then it is also filtered out """ if lang in self._filtered_targets: return self._filtered_targets[lang] targets = self.resolve_targets(self._targets) explicit_files, directories = partition_set(lambda p: not p.is_dir(), targets) # Remove explicit_files with known extensions. Remove non-existent files explicit_files = set(f for f in explicit_files if f.is_file and (any( f.match(f"*.{ext}") for ext in lang_to_exts(lang)) or not any( f.match(f"*.{ext}") for ext in ALL_EXTENSIONS))) targets = self.expand_targets(directories, lang, self._respect_git_ignore) targets = self.filter_includes(targets, self._includes) targets = self.filter_excludes(targets, self._excludes) self._filtered_targets[lang] = targets.union(explicit_files) return self._filtered_targets[lang]
def filtered_files(self, lang: Language) -> FrozenSet[Path]: """ Return all files that are decendants of any directory in TARGET that have an extension matching LANG that match any pattern in INCLUDES and do not match any pattern in EXCLUDES. Any file in TARGET bypasses excludes and includes. If a file in TARGET has a known extension that is not for langugage LANG then it is also filtered out Note also filters out any directory and decendants of `.git` """ if lang in self._filtered_targets: return self._filtered_targets[lang] targets = self.resolve_targets(self.targets) files, directories = partition_set(lambda p: not p.is_dir(), targets) # Error on non-existent files explicit_files, nonexistent_files = partition_set( lambda p: p.is_file(), files) if nonexistent_files: self.output_handler.handle_semgrep_error( FilesNotFoundError(tuple(nonexistent_files))) targets = self.expand_targets(directories, lang, self.respect_git_ignore) targets = self.filter_includes(targets, self.includes) targets = self.filter_excludes(targets, self.excludes + [".git"]) targets = self.filter_by_size(targets, self.max_target_bytes) # Remove explicit_files with known extensions. explicit_files_with_lang_extension = frozenset( f for f in explicit_files if (any( f.match(f"*{ext}") for ext in lang_to_exts(lang)))) targets = targets.union(explicit_files_with_lang_extension) if not self.skip_unknown_extensions: explicit_files_with_unknown_extensions = frozenset( f for f in explicit_files if not any(f.match(f"*{ext}") for ext in ALL_EXTENSIONS)) targets = targets.union(explicit_files_with_unknown_extensions) self._filtered_targets[lang] = targets return self._filtered_targets[lang]