Exemple #1
0
def test__config__find_sqlfluffignore_in_same_directory():
    """Test find ignore file in the same directory as sql file."""
    ignore_files = ConfigLoader.find_ignore_config_files(
        path="test/fixtures/linter/sqlfluffignore/path_b/query_b.sql",
        working_path="test/fixtures/linter/sqlfluffignore/",
    )
    assert ignore_files == {
        os.path.abspath(
            "test/fixtures/linter/sqlfluffignore/path_b/.sqlfluffignore"),
        os.path.abspath("test/fixtures/linter/sqlfluffignore/.sqlfluffignore"),
    }
Exemple #2
0
    def paths_from_path(
            self,
            path: str,
            ignore_file_name: str = ".sqlfluffignore",
            ignore_non_existent_files: bool = False,
            ignore_files: bool = True,
            working_path: str = os.getcwd(),
    ) -> List[str]:
        """Return a set of sql file paths from a potentially more ambiguous path string.

        Here we also deal with the .sqlfluffignore file if present.

        When a path to a file to be linted is explicitly passed
        we look for ignore files in all directories that are parents of the file,
        up to the current directory.

        If the current directory is not a parent of the file we only
        look for an ignore file in the direct parent of the file.

        """
        if not os.path.exists(path):
            if ignore_non_existent_files:
                return []
            else:
                raise OSError("Specified path does not exist")

        # Files referred to exactly are also ignored if
        # matched, but we warn the users when that happens
        is_exact_file = os.path.isfile(path)

        path_walk: WalkableType
        if is_exact_file:
            # When the exact file to lint is passed, we
            # fill path_walk with an input that follows
            # the structure of `os.walk`:
            #   (root, directories, files)
            dirpath = os.path.dirname(path)
            files = [os.path.basename(path)]
            path_walk = [(dirpath, None, files)]
        else:
            path_walk = list(os.walk(path))

        ignore_file_paths = ConfigLoader.find_ignore_config_files(
            path=path,
            working_path=working_path,
            ignore_file_name=ignore_file_name)
        # Add paths that could contain "ignore files"
        # to the path_walk list
        path_walk_ignore_file = [
            (
                os.path.dirname(ignore_file_path),
                None,
                # Only one possible file, since we only
                # have one "ignore file name"
                [os.path.basename(ignore_file_path)],
            ) for ignore_file_path in ignore_file_paths
        ]
        path_walk += path_walk_ignore_file

        # If it's a directory then expand the path!
        buffer = []
        ignores = {}
        for dirpath, _, filenames in path_walk:
            for fname in filenames:
                fpath = os.path.join(dirpath, fname)
                # Handle potential .sqlfluffignore files
                if ignore_files and fname == ignore_file_name:
                    with open(fpath) as fh:
                        spec = pathspec.PathSpec.from_lines("gitwildmatch", fh)
                        ignores[dirpath] = spec
                    # We don't need to process the ignore file any futher
                    continue

                # We won't purge files *here* because there's an edge case
                # that the ignore file is processed after the sql file.

                # Scan for remaining files
                for ext in (self.config.get(
                        "sql_file_exts", default=".sql").lower().split(",")):
                    # is it a sql file?
                    if fname.lower().endswith(ext):
                        buffer.append(fpath)

        if not ignore_files:
            return sorted(buffer)

        # Check the buffer for ignore items and normalise the rest.
        # It's a set, so we can do natural deduplication.
        filtered_buffer = set()

        for fpath in buffer:
            abs_fpath = os.path.abspath(fpath)
            for ignore_base, ignore_spec in ignores.items():
                abs_ignore_base = os.path.abspath(ignore_base)
                if abs_fpath.startswith(abs_ignore_base +
                                        os.sep) and ignore_spec.match_file(
                                            os.path.relpath(
                                                abs_fpath, abs_ignore_base)):
                    # This file is ignored, skip it.
                    if is_exact_file:
                        linter_logger.warning(
                            "Exact file path %s was given but "
                            "it was ignored by a %s pattern in %s, "
                            "re-run with `--disregard-sqlfluffignores` to "
                            "skip %s" % (
                                path,
                                ignore_file_name,
                                ignore_base,
                                ignore_file_name,
                            ))
                    break
            else:
                npath = os.path.normpath(fpath)
                # For debugging, log if we already have the file.
                if npath in filtered_buffer:
                    linter_logger.debug(  # pragma: no cover
                        "Developer Warning: Path crawler attempted to "
                        "requeue the same file twice. %s is already in "
                        "filtered buffer.",
                        npath,
                    )
                filtered_buffer.add(npath)

        # Return a sorted list
        return sorted(filtered_buffer)
Exemple #3
0
    def paths_from_path(
            self,
            path: str,
            ignore_file_name: str = ".sqlfluffignore",
            ignore_non_existent_files: bool = False,
            ignore_files: bool = True,
            working_path: str = os.getcwd(),
    ) -> List[str]:
        """Return a set of sql file paths from a potentially more ambiguous path string.

        Here we also deal with the .sqlfluffignore file if present.

        When a path to a file to be linted is explicitly passed
        we look for ignore files in all directories that are parents of the file,
        up to the current directory.

        If the current directory is not a parent of the file we only
        look for an ignore file in the direct parent of the file.

        """
        if not os.path.exists(path):
            if ignore_non_existent_files:
                return []
            else:
                raise IOError("Specified path does not exist")

        # Files referred to exactly are also ignored if
        # matched, but we warn the users when that happens
        is_exact_file = os.path.isfile(path)

        if is_exact_file:
            # When the exact file to lint is passed, we
            # fill path_walk with an input that follows
            # the structure of `os.walk`:
            #   (root, directories, files)
            dirpath = os.path.dirname(path)
            files = [os.path.basename(path)]
            ignore_file_paths = ConfigLoader.find_ignore_config_files(
                path=path,
                working_path=working_path,
                ignore_file_name=ignore_file_name)
            # Add paths that could contain "ignore files"
            # to the path_walk list
            path_walk_ignore_file = [
                (
                    os.path.dirname(ignore_file_path),
                    None,
                    # Only one possible file, since we only
                    # have one "ignore file name"
                    [os.path.basename(ignore_file_path)],
                ) for ignore_file_path in ignore_file_paths
            ]
            path_walk: WalkableType = [(dirpath, None, files)
                                       ] + path_walk_ignore_file
        else:
            path_walk = os.walk(path)

        # If it's a directory then expand the path!
        buffer = []
        ignore_set = set()
        for dirpath, _, filenames in path_walk:
            for fname in filenames:
                fpath = os.path.join(dirpath, fname)
                # Handle potential .sqlfluffignore files
                if ignore_files and fname == ignore_file_name:
                    with open(fpath, "r") as fh:
                        spec = pathspec.PathSpec.from_lines("gitwildmatch", fh)
                    matches = spec.match_tree(dirpath)
                    for m in matches:
                        ignore_path = os.path.join(dirpath, m)
                        ignore_set.add(os.path.abspath(ignore_path))
                    # We don't need to process the ignore file any futher
                    continue

                # We won't purge files *here* because there's an edge case
                # that the ignore file is processed after the sql file.

                # Scan for remaining files
                for ext in self.config.get("sql_file_exts",
                                           default=".sql").split(","):
                    # is it a sql file?
                    if fname.endswith(ext):
                        buffer.append(fpath)

        if not ignore_files:
            return sorted(buffer)

        # Check the buffer for ignore items and normalise the rest.
        filtered_buffer = []

        for fpath in buffer:
            if os.path.abspath(fpath) not in ignore_set:
                filtered_buffer.append(os.path.normpath(fpath))
            elif is_exact_file:
                linter_logger.warning(
                    "Exact file path %s was given but "
                    "it was ignored by a %s pattern, "
                    "re-run with `--disregard-sqlfluffignores` to "
                    "skip %s" % (
                        path,
                        ignore_file_name,
                        ignore_file_name,
                    ))

        # Return
        return sorted(filtered_buffer)