Beispiel #1
0
    def filtered_files(self, lang: Language) -> Set[Path]:
        """
            Return all files that are decendants of any directory in TARGET that have
            an extension matching LANG that match any pattern in INCLUDES and do not
            match any pattern in EXCLUDES. Any file in TARGET bypasses excludes and includes.
            If a file in TARGET has a known extension that is not for langugage LANG then
            it is also filtered out
        """
        if lang in self._filtered_targets:
            return self._filtered_targets[lang]

        targets = self.resolve_targets(self._targets)
        explicit_files, directories = partition_set(lambda p: not p.is_dir(),
                                                    targets)

        # Remove explicit_files with known extensions. Remove non-existent files
        explicit_files = set(f for f in explicit_files if f.is_file and (any(
            f.match(f"*.{ext}") for ext in lang_to_exts(lang)) or not any(
                f.match(f"*.{ext}") for ext in ALL_EXTENSIONS)))

        targets = self.expand_targets(directories, lang,
                                      self._respect_git_ignore)
        targets = self.filter_includes(targets, self._includes)
        targets = self.filter_excludes(targets, self._excludes)

        self._filtered_targets[lang] = targets.union(explicit_files)
        return self._filtered_targets[lang]
Beispiel #2
0
    def filtered_files(self, lang: Language) -> FrozenSet[Path]:
        """
        Return all files that are decendants of any directory in TARGET that have
        an extension matching LANG that match any pattern in INCLUDES and do not
        match any pattern in EXCLUDES. Any file in TARGET bypasses excludes and includes.
        If a file in TARGET has a known extension that is not for langugage LANG then
        it is also filtered out

        Note also filters out any directory and decendants of `.git`
        """
        if lang in self._filtered_targets:
            return self._filtered_targets[lang]

        targets = self.resolve_targets(self.targets)

        files, directories = partition_set(lambda p: not p.is_dir(), targets)

        # Error on non-existent files
        explicit_files, nonexistent_files = partition_set(
            lambda p: p.is_file(), files)
        if nonexistent_files:
            self.output_handler.handle_semgrep_error(
                FilesNotFoundError(tuple(nonexistent_files)))

        targets = self.expand_targets(directories, lang,
                                      self.respect_git_ignore)
        targets = self.filter_includes(targets, self.includes)
        targets = self.filter_excludes(targets, self.excludes + [".git"])
        targets = self.filter_by_size(targets, self.max_target_bytes)

        # Remove explicit_files with known extensions.
        explicit_files_with_lang_extension = frozenset(
            f for f in explicit_files if (any(
                f.match(f"*{ext}") for ext in lang_to_exts(lang))))
        targets = targets.union(explicit_files_with_lang_extension)

        if not self.skip_unknown_extensions:
            explicit_files_with_unknown_extensions = frozenset(
                f for f in explicit_files
                if not any(f.match(f"*{ext}") for ext in ALL_EXTENSIONS))
            targets = targets.union(explicit_files_with_unknown_extensions)

        self._filtered_targets[lang] = targets
        return self._filtered_targets[lang]