Python RawFileSlice Examples, sqlfluff.core.templaters.base.RawFileSlice Python Examples

Example #1

0

Show file

File: jinja.py Project: stjordanis/sqlfluff

    def _slice_template(cls, in_str: str) -> Iterator[RawFileSlice]:
        """Slice template in jinja.

        NB: Starts and ends of blocks are not distinguished.
        """
        env = cls._get_jinja_env()
        str_buff = ""
        idx = 0
        # We decide the "kind" of element we're dealing with
        # using it's _closing_ tag rather than it's opening
        # tag. The types here map back to similar types of
        # sections in the python slicer.
        block_types = {
            "variable_end": "templated",
            "block_end": "block",
            "comment_end": "comment",
            # Raw tags should behave like blocks. Note that
            # raw_end and raw_begin are whole tags rather
            # than blocks and comments where we get partial
            # tags.
            "raw_end": "block",
            "raw_begin": "block",
        }

        # https://jinja.palletsprojects.com/en/2.11.x/api/#jinja2.Environment.lex
        for _, elem_type, raw in env.lex(cls._preprocess_template(in_str)):
            if elem_type == "data":
                yield RawFileSlice(raw, "literal", idx)
                idx += len(raw)
                continue
            str_buff += raw
            # raw_end and raw_begin behave a little differently in
            # that the whole tag shows up in one go rather than getting
            # parts of the tag at a time.
            if elem_type.endswith("_end") or elem_type == "raw_begin":
                block_type = block_types[elem_type]
                block_subtype = None
                # Handle starts and ends of blocks
                if block_type == "block":
                    # Trim off the brackets and then the whitespace
                    m_open = cls.re_open_tag.search(str_buff)
                    m_close = cls.re_close_tag.search(str_buff)
                    trimmed_content = ""
                    if m_open and m_close:
                        trimmed_content = str_buff[
                            len(m_open.group(0)):-len(m_close.group(0))]
                    if trimmed_content.startswith("end"):
                        block_type = "block_end"
                    elif trimmed_content.startswith("el"):
                        # else, elif
                        block_type = "block_mid"
                    else:
                        block_type = "block_start"
                        if trimmed_content.split()[0] == "for":
                            block_subtype = "loop"
                yield RawFileSlice(str_buff, block_type, idx, block_subtype)
                idx += len(str_buff)
                str_buff = ""

Example #2

0

Show file

 def _split_invariants(
     raw_sliced: List[RawFileSlice],
     literals: List[str],
     raw_occurances: Dict[str, List[int]],
     templated_occurances: Dict[str, List[int]],
     templated_str: str,
 ) -> Iterator[IntermediateFileSlice]:
     """Split a sliced file on its invariant literals."""
     # Calculate invariants
     invariants = [
         literal for literal in literals
         if len(raw_occurances[literal]) == 1
         and len(templated_occurances[literal]) == 1
     ]
     # Set up some buffers
     buffer: List[RawFileSlice] = []
     idx: Optional[int] = None
     templ_idx = 0
     # Loop through
     for raw, token_type, raw_pos in raw_sliced:
         if raw in invariants:
             if buffer:
                 yield IntermediateFileSlice(
                     "compound",
                     slice(idx, raw_pos),
                     slice(templ_idx, templated_occurances[raw][0]),
                     buffer,
                 )
             buffer = []
             idx = None
             yield IntermediateFileSlice(
                 "invariant",
                 slice(raw_pos, raw_pos + len(raw)),
                 slice(
                     templated_occurances[raw][0],
                     templated_occurances[raw][0] + len(raw),
                 ),
                 [
                     RawFileSlice(raw, token_type,
                                  templated_occurances[raw][0])
                 ],
             )
             templ_idx = templated_occurances[raw][0] + len(raw)
         else:
             buffer.append(RawFileSlice(raw, token_type, raw_pos))
             if idx is None:
                 idx = raw_pos
     # If we have a final buffer, yield it
     if buffer:
         yield IntermediateFileSlice(
             "compound",
             slice((idx or 0),
                   (idx or 0) + sum(len(slc.raw) for slc in buffer)),
             slice(templ_idx, len(templated_str)),
             buffer,
         )

Example #3

0

Show file

File: base_test.py Project: zhongjiajie/sqlfluff

def test__templated_file_source_only_slices():
    """Test TemplatedFile.source_only_slices."""
    file = TemplatedFile(
        source_str=" Dummy String again ",  # NB: has length 20
        raw_sliced=[
            RawFileSlice("a" * 10, "literal", 0),
            RawFileSlice("b" * 7, "comment", 10),
            RawFileSlice("a" * 10, "literal", 17),
        ],
    )
    assert file.source_only_slices() == [RawFileSlice("b" * 7, "comment", 10)]

Example #4

0

Show file

    def _slice_template(cls, in_str: str) -> Iterator[RawFileSlice]:
        """Slice a templated python string into token tuples.

        This uses Formatter() as per:
        https://docs.python.org/3/library/string.html#string.Formatter
        """
        fmt = Formatter()
        in_idx = 0
        for literal_text, field_name, format_spec, conversion in fmt.parse(in_str):
            if literal_text:
                escape_chars = cls._sorted_occurrence_tuples(
                    cls._substring_occurrences(literal_text, ["}", "{"])
                )
                idx = 0
                while escape_chars:
                    first_char = escape_chars.pop()
                    # Is there a literal first?
                    if first_char[1] > idx:
                        yield RawFileSlice(
                            literal_text[idx : first_char[1]], "literal", in_idx
                        )
                        in_idx += first_char[1] - idx
                    # Add the escaped
                    idx = first_char[1] + len(first_char[0])
                    # We double them here to make the raw
                    yield RawFileSlice(
                        literal_text[first_char[1] : idx] * 2, "escaped", in_idx
                    )
                    # Will always be 2 in this case.
                    # This is because ALL escape sequences in the python formatter
                    # are two characters which reduce to one.
                    in_idx += 2
                # Deal with last one (if present)
                if literal_text[idx:]:
                    yield RawFileSlice(literal_text[idx:], "literal", in_idx)
                    in_idx += len(literal_text) - idx
            # Deal with fields
            if field_name:
                constructed_token = "{{{field_name}{conv}{spec}}}".format(
                    field_name=field_name,
                    conv=f"!{conversion}" if conversion else "",
                    spec=f":{format_spec}" if format_spec else "",
                )
                yield RawFileSlice(constructed_token, "templated", in_idx)
                in_idx += len(constructed_token)

Example #5

0

Show file

    def has_template_conflicts(self, templated_file: TemplatedFile) -> bool:
        """Does this fix conflict with (i.e. touch) templated code?"""
        # Goal: Find the raw slices touched by the fix. Two cases, based on
        # edit type:
        # 1. "delete", "replace": Raw slices touching the anchor segment. If
        #    ANY are templated, discard the fix.
        # 2. "create_before", "create_after": Raw slices encompassing the two
        #    character positions surrounding the insertion point (**NOT** the
        #    whole anchor segment, because we're not *touching* the anchor
        #    segment, we're inserting **RELATIVE** to it. If ALL are templated,
        #    discard the fix.
        assert self.anchor.pos_marker
        anchor_slice = self.anchor.pos_marker.templated_slice
        templated_slices = [anchor_slice]
        check_fn = any

        if self.edit_type == "create_before":
            # Consider the first position of the anchor segment and the
            # position just before it.
            templated_slices = [
                slice(anchor_slice.start, anchor_slice.start + 1),
                slice(anchor_slice.start - 1, anchor_slice.start),
            ]
            check_fn = all
        elif self.edit_type == "create_after":
            # Consider the last position of the anchor segment and the
            # character just after it.
            templated_slices = [
                slice(anchor_slice.stop - 1, anchor_slice.stop),
                slice(anchor_slice.stop, anchor_slice.stop + 1),
            ]
            check_fn = all
        # TRICKY: For creations at the end of the file, there won't be an
        # existing slice. In this case, the function adds file_end_slice to the
        # result, as a sort of placeholder or sentinel value. We pass a literal
        # slice for "file_end_slice" so that later in this function, the LintFix
        # is interpreted as literal code. Otherwise, it could be interpreted as
        # a fix to *templated* code and incorrectly discarded.
        fix_slices = self._raw_slices_from_templated_slices(
            templated_file,
            templated_slices,
            file_end_slice=RawFileSlice("", "literal", -1),
        )

        # We have the fix slices. Now check for conflicts.
        result = check_fn(fs.slice_type == "templated" for fs in fix_slices)
        if result or not self.source:
            return result

        # Fix slices were okay. Now check template safety of the "source" field.
        templated_slices = [
            cast(PositionMarker, source.pos_marker).templated_slice
            for source in self.source
        ]
        raw_slices = self._raw_slices_from_templated_slices(
            templated_file, templated_slices)
        return any(fs.slice_type == "templated" for fs in raw_slices)

Example #6

0

Show file

File: base.py Project: sqlfluff/sqlfluff

    def get_fix_slices(
        self, templated_file: TemplatedFile, within_only: bool
    ) -> Set[RawFileSlice]:
        """Returns slices touched by the fix."""
        # Goal: Find the raw slices touched by the fix. Two cases, based on
        # edit type:
        # 1. "delete", "replace": Raw slices touching the anchor segment.
        # 2. "create_before", "create_after": Raw slices encompassing the two
        #    character positions surrounding the insertion point (**NOT** the
        #    whole anchor segment, because we're not *touching* the anchor
        #    segment, we're inserting **RELATIVE** to it.
        assert self.anchor.pos_marker
        anchor_slice = self.anchor.pos_marker.templated_slice
        templated_slices = [anchor_slice]

        # If "within_only" is set for a "create_*" fix, the slice should only
        # include the area of code "within" the area of insertion, not the other
        # side.
        adjust_boundary = 1 if not within_only else 0
        if self.edit_type == "create_before":
            # Consider the first position of the anchor segment and the
            # position just before it.
            templated_slices = [
                slice(anchor_slice.start - 1, anchor_slice.start + adjust_boundary),
            ]
        elif self.edit_type == "create_after":
            # Consider the last position of the anchor segment and the
            # character just after it.
            templated_slices = [
                slice(anchor_slice.stop - adjust_boundary, anchor_slice.stop + 1),
            ]
        elif (
            self.edit_type == "replace"
            and self.anchor.pos_marker.source_slice.stop
            == self.anchor.pos_marker.source_slice.start
        ):
            # We're editing something with zero size in the source. This means
            # it likely _didn't exist_ in the source and so can be edited safely.
            # We return an empty set because this edit doesn't touch anything
            # in the source.
            return set()

        # TRICKY: For creations at the end of the file, there won't be an
        # existing slice. In this case, the function adds file_end_slice to the
        # result, as a sort of placeholder or sentinel value. We pass a literal
        # slice for "file_end_slice" so that later in this function, the LintFix
        # is interpreted as literal code. Otherwise, it could be interpreted as
        # a fix to *templated* code and incorrectly discarded.
        return self._raw_slices_from_templated_slices(
            templated_file,
            templated_slices,
            file_end_slice=RawFileSlice("", "literal", -1),
        )

Example #7

0

Show file

File: tracer.py Project: sqlfluff/sqlfluff

 def track_literal(self, raw: str, block_idx: int) -> None:
     """Set up tracking for a Jinja literal."""
     self.raw_sliced.append(
         RawFileSlice(
             raw,
             "literal",
             self.idx_raw,
             None,
             block_idx,
         ))
     # Replace literal text with a unique ID.
     self.raw_slice_info[self.raw_sliced[-1]] = self.slice_info_for_literal(
         len(raw), "")
     self.idx_raw += len(raw)

Example #8

0

Show file

File: base_test.py Project: sti0/sqlfluff

def test__templated_file_templated_slice_to_source_slice(
        in_slice, out_slice, is_literal, file_slices, raw_slices):
    """Test TemplatedFile.templated_slice_to_source_slice."""
    file = TemplatedFile(
        source_str="Dummy String",
        sliced_file=file_slices,
        raw_sliced=[
            rs if isinstance(rs, RawFileSlice) else RawFileSlice(*rs)
            for rs in raw_slices
        ],
        fname="test",
    )
    source_slice = file.templated_slice_to_source_slice(in_slice)
    literal_test = file.is_source_slice_literal(source_slice)
    assert (is_literal, source_slice) == (literal_test, out_slice)

Example #9

0

Show file

File: tracer.py Project: sqlfluff/sqlfluff

    def handle_left_whitespace_stripping(self, token: str,
                                         block_idx: int) -> None:
        """If block open uses whitespace stripping, record it.

        When a "begin" tag (whether block, comment, or data) uses whitespace
        stripping
        (https://jinja.palletsprojects.com/en/3.0.x/templates/#whitespace-control)
        the Jinja lex() function handles this by discarding adjacent whitespace
        from 'raw_str'. For more insight, see the tokeniter() function in this file:
        https://github.com/pallets/jinja/blob/main/src/jinja2/lexer.py

        We want to detect and correct for this in order to:
        - Correctly update "idx" (if this is wrong, that's a potential
          DISASTER because lint fixes use this info to update the source file,
          and incorrect values often result in CORRUPTING the user's file so
          it's no longer valid SQL. :-O
        - Guarantee that the slices we return fully "cover" the contents of
          'in_str'.

        We detect skipped characters by looking ahead in in_str for the token
        just returned from lex(). The token text will either be at the current
        'idx_raw' position (if whitespace stripping did not occur) OR it'll be
        farther along in 'raw_str', but we're GUARANTEED that lex() only skips
        over WHITESPACE; nothing else.
        """
        # Find the token returned. Did lex() skip over any characters?
        num_chars_skipped = self.raw_str.index(token,
                                               self.idx_raw) - self.idx_raw
        if not num_chars_skipped:
            return

        # Yes. It skipped over some characters. Compute a string
        # containing the skipped characters.
        skipped_str = self.raw_str[self.idx_raw:self.idx_raw +
                                   num_chars_skipped]

        # Sanity check: Verify that Jinja only skips over
        # WHITESPACE, never anything else.
        if not skipped_str.isspace():  # pragma: no cover
            templater_logger.warning("Jinja lex() skipped non-whitespace: %s",
                                     skipped_str)
        # Treat the skipped whitespace as a literal.
        self.raw_sliced.append(
            RawFileSlice(skipped_str, "literal", self.idx_raw, None,
                         block_idx))
        self.raw_slice_info[self.raw_sliced[-1]] = self.slice_info_for_literal(
            0)
        self.idx_raw += num_chars_skipped

Example #10

0

Show file

    def _split_invariants(
        cls,
        raw_sliced: List[RawFileSlice],
        literals: List[str],
        raw_occurrences: Dict[str, List[int]],
        templated_occurrences: Dict[str, List[int]],
        templated_str: str,
    ) -> Iterator[IntermediateFileSlice]:
        """Split a sliced file on its invariant literals.

        We prioritise the _longest_ invariants first as they
        are more likely to the the anchors.
        """
        # Calculate invariants
        invariants = [
            literal
            for literal in literals
            if len(raw_occurrences[literal]) == 1
            and len(templated_occurrences[literal]) == 1
        ]
        # Work through the invariants and make sure they appear
        # in order.
        for linv in sorted(invariants, key=len, reverse=True):
            # Any invariants which have templated positions, relative
            # to source positions, which aren't in order, should be
            # ignored.

            # Is this one still relevant?
            if linv not in invariants:
                continue

            source_pos, templ_pos = raw_occurrences[linv], templated_occurrences[linv]
            # Copy the list before iterating because we're going to edit it.
            for tinv in invariants.copy():
                if tinv != linv:
                    src_dir = source_pos > raw_occurrences[tinv]
                    tmp_dir = templ_pos > templated_occurrences[tinv]
                    # If it's not in the same direction in the source and template remove it.
                    if src_dir != tmp_dir:
                        templater_logger.debug(
                            "          Invariant found out of order: %r", tinv
                        )
                        invariants.remove(tinv)

        # Set up some buffers
        buffer: List[RawFileSlice] = []
        idx: Optional[int] = None
        templ_idx = 0
        # Loop through
        for raw, token_type, raw_pos, _ in raw_sliced:
            if raw in invariants:
                if buffer:
                    yield IntermediateFileSlice(
                        "compound",
                        slice(idx, raw_pos),
                        slice(templ_idx, templated_occurrences[raw][0]),
                        buffer,
                    )
                buffer = []
                idx = None
                yield IntermediateFileSlice(
                    "invariant",
                    slice(raw_pos, raw_pos + len(raw)),
                    slice(
                        templated_occurrences[raw][0],
                        templated_occurrences[raw][0] + len(raw),
                    ),
                    [RawFileSlice(raw, token_type, templated_occurrences[raw][0])],
                )
                templ_idx = templated_occurrences[raw][0] + len(raw)
            else:
                buffer.append(RawFileSlice(raw, token_type, raw_pos))
                if idx is None:
                    idx = raw_pos
        # If we have a final buffer, yield it
        if buffer:
            yield IntermediateFileSlice(
                "compound",
                slice((idx or 0), (idx or 0) + sum(len(slc.raw) for slc in buffer)),
                slice(templ_idx, len(templated_str)),
                buffer,
            )

Example #11

0

Show file

File: base_test.py Project: sti0/sqlfluff

    instr = "SELECT * FROM {{blah}}"
    outstr, _ = t.process(in_str=instr, fname="test")
    assert instr == str(outstr)


SIMPLE_SOURCE_STR = "01234\n6789{{foo}}fo\nbarss"
SIMPLE_TEMPLATED_STR = "01234\n6789x\nfo\nbarfss"
SIMPLE_SLICED_FILE = [
    TemplatedFileSlice(*args) for args in [
        ("literal", slice(0, 10, None), slice(0, 10, None)),
        ("templated", slice(10, 17, None), slice(10, 12, None)),
        ("literal", slice(17, 25, None), slice(12, 20, None)),
    ]
]
SIMPLE_RAW_SLICED_FILE = [
    RawFileSlice(*args) for args in [
        ("x" * 10, "literal", 0),
        ("x" * 7, "templated", 10),
        ("x" * 8, "literal", 17),
    ]
]

COMPLEX_SLICED_FILE = [
    TemplatedFileSlice(*args) for args in [
        ("literal", slice(0, 13, None), slice(0, 13, None)),
        ("comment", slice(13, 29, None), slice(13, 13, None)),
        ("literal", slice(29, 44, None), slice(13, 28, None)),
        ("block_start", slice(44, 68, None), slice(28, 28, None)),
        ("literal", slice(68, 81, None), slice(28, 41, None)),
        ("templated", slice(81, 86, None), slice(41, 42, None)),
        ("literal", slice(86, 110, None), slice(42, 66, None)),

Example #12

0

Show file

File: python_test.py Project: davehowell/sqlfluff

    t = PythonTemplater(override_context=dict(noblah="foo"))
    instr = PYTHON_STRING
    with pytest.raises(SQLTemplaterError):
        t.process(in_str=instr)


@pytest.mark.parametrize(
    "int_slice,templated_str,head_test,tail_test,int_test",
    [
        # Test Invariante
        (
            IntermediateFileSlice(
                "compound",
                slice(0, 5),
                slice(0, 5),
                [RawFileSlice("{{i}}", "templated", 0)],
            ),
            "foo",
            [],
            [],
            IntermediateFileSlice(
                "compound",
                slice(0, 5),
                slice(0, 5),
                [RawFileSlice("{{i}}", "templated", 0)],
            ),
        ),
        # Test Complete Trimming
        (
            IntermediateFileSlice(
                "compound",

Example #13

0

Show file

    def process(self,
                *,
                in_str: str,
                fname: str,
                config=None,
                formatter=None) -> Tuple[Optional[TemplatedFile], list]:
        """Process a string and return a TemplatedFile.

        Note that the arguments are enforced as keywords
        because Templaters can have differences in their
        `process` method signature.
        A Templater that only supports reading from a file
        would need the following signature:
            process(*, fname, in_str=None, config=None)
        (arguments are swapped)

        Args:
            in_str (:obj:`str`): The input string.
            fname (:obj:`str`, optional): The filename of this string. This is
                mostly for loading config files at runtime.
            config (:obj:`FluffConfig`): A specific config to use for this
                templating operation. Only necessary for some templaters.
            formatter (:obj:`CallbackFormatter`): Optional object for output.

        """
        context = self.get_context(config)
        template_slices = []
        raw_slices = []
        last_pos_raw, last_pos_templated = 0, 0
        out_str = ""

        regex = context["__bind_param_regex"]
        # when the param has no name, use a 1-based index
        param_counter = 1
        for found_param in regex.finditer(in_str):
            span = found_param.span()
            if "param_name" not in found_param.groupdict():
                param_name = str(param_counter)
                param_counter += 1
            else:
                param_name = found_param["param_name"]
            last_literal_length = span[0] - last_pos_raw
            try:
                replacement = str(context[param_name])
            except KeyError as err:
                # TODO: Add a url here so people can get more help.
                raise SQLTemplaterError(
                    "Failure in placeholder templating: {}. Have you configured your "
                    "variables?".format(err))
            # add the literal to the slices
            template_slices.append(
                TemplatedFileSlice(
                    slice_type="literal",
                    source_slice=slice(last_pos_raw, span[0], None),
                    templated_slice=slice(
                        last_pos_templated,
                        last_pos_templated + last_literal_length,
                        None,
                    ),
                ))
            raw_slices.append(
                RawFileSlice(
                    raw=in_str[last_pos_raw:span[0]],
                    slice_type="literal",
                    source_idx=last_pos_raw,
                ))
            out_str += in_str[last_pos_raw:span[0]]
            # add the current replaced element
            start_template_pos = last_pos_templated + last_literal_length
            template_slices.append(
                TemplatedFileSlice(
                    slice_type="templated",
                    source_slice=slice(span[0], span[1], None),
                    templated_slice=slice(
                        start_template_pos,
                        start_template_pos + len(replacement), None),
                ))
            raw_slices.append(
                RawFileSlice(
                    raw=in_str[span[0]:span[1]],
                    slice_type="templated",
                    source_idx=span[0],
                ))
            out_str += replacement
            # update the indexes
            last_pos_raw = span[1]
            last_pos_templated = start_template_pos + len(replacement)
        # add the last literal, if any
        if len(in_str) > last_pos_raw:
            template_slices.append(
                TemplatedFileSlice(
                    slice_type="literal",
                    source_slice=slice(last_pos_raw, len(in_str), None),
                    templated_slice=slice(
                        last_pos_templated,
                        last_pos_templated + (len(in_str) - last_pos_raw),
                        None,
                    ),
                ))
            raw_slices.append(
                RawFileSlice(
                    raw=in_str[last_pos_raw:],
                    slice_type="literal",
                    source_idx=last_pos_raw,
                ))
            out_str += in_str[last_pos_raw:]
        return (
            TemplatedFile(
                # original string
                source_str=in_str,
                # string after all replacements
                templated_str=out_str,
                # filename
                fname=fname,
                # list of TemplatedFileSlice
                sliced_file=template_slices,
                # list of RawFileSlice, same size
                raw_sliced=raw_slices,
            ),
            [],  # violations, always empty
        )

Example #14

0

Show file

File: tracer.py Project: sti0/sqlfluff

    def _slice_template(self) -> List[RawFileSlice]:
        """Slice template in jinja.

        NB: Starts and ends of blocks are not distinguished.
        """
        str_buff = ""
        idx = 0
        # We decide the "kind" of element we're dealing with
        # using it's _closing_ tag rather than it's opening
        # tag. The types here map back to similar types of
        # sections in the python slicer.
        block_types = {
            "variable_end": "templated",
            "block_end": "block",
            "comment_end": "comment",
            # Raw tags should behave like blocks. Note that
            # raw_end and raw_begin are whole tags rather
            # than blocks and comments where we get partial
            # tags.
            "raw_end": "block",
            "raw_begin": "block",
        }

        # https://jinja.palletsprojects.com/en/2.11.x/api/#jinja2.Environment.lex
        stack = []
        result = []
        set_idx = None
        unique_alternate_id: Optional[str]
        alternate_code: Optional[str]
        for _, elem_type, raw in self.env.lex(self.raw_str):
            # Replace literal text with a unique ID.
            if elem_type == "data":
                if set_idx is None:
                    unique_alternate_id = self.next_slice_id()
                    alternate_code = f"\0{unique_alternate_id}_{len(raw)}"
                else:
                    unique_alternate_id = self.next_slice_id()
                    alternate_code = f"\0set{unique_alternate_id}_{len(raw)}"
                result.append(RawFileSlice(
                    raw,
                    "literal",
                    idx,
                ))
                self.raw_slice_info[result[-1]] = RawSliceInfo(
                    unique_alternate_id, alternate_code, [])
                idx += len(raw)
                continue
            str_buff += raw

            if elem_type.endswith("_begin"):
                # When a "begin" tag (whether block, comment, or data) uses
                # whitespace stripping (
                # https://jinja.palletsprojects.com/en/3.0.x/templates/#whitespace-control
                # ), the Jinja lex() function handles this by discarding adjacent
                # whitespace from in_str. For more insight, see the tokeniter()
                # function in this file:
                # https://github.com/pallets/jinja/blob/main/src/jinja2/lexer.py
                # We want to detect and correct for this in order to:
                # - Correctly update "idx" (if this is wrong, that's a
                #   potential DISASTER because lint fixes use this info to
                #   update the source file, and incorrect values often result in
                #   CORRUPTING the user's file so it's no longer valid SQL. :-O
                # - Guarantee that the slices we return fully "cover" the
                #   contents of in_str.
                #
                # We detect skipped characters by looking ahead in in_str for
                # the token just returned from lex(). The token text will either
                # be at the current 'idx' position (if whitespace stripping did
                # not occur) OR it'll be farther along in in_str, but we're
                # GUARANTEED that lex() only skips over WHITESPACE; nothing else.

                # Find the token returned. Did lex() skip over any characters?
                num_chars_skipped = self.raw_str.index(raw, idx) - idx
                if num_chars_skipped:
                    # Yes. It skipped over some characters. Compute a string
                    # containing the skipped characters.
                    skipped_str = self.raw_str[idx:idx + num_chars_skipped]

                    # Sanity check: Verify that Jinja only skips over
                    # WHITESPACE, never anything else.
                    if not skipped_str.isspace():  # pragma: no cover
                        templater_logger.warning(
                            "Jinja lex() skipped non-whitespace: %s",
                            skipped_str)
                    # Treat the skipped whitespace as a literal.
                    result.append(RawFileSlice(skipped_str, "literal", idx))
                    self.raw_slice_info[result[-1]] = RawSliceInfo("", "", [])
                    idx += num_chars_skipped

            # raw_end and raw_begin behave a little differently in
            # that the whole tag shows up in one go rather than getting
            # parts of the tag at a time.
            unique_alternate_id = None
            alternate_code = None
            trimmed_content = ""
            if elem_type.endswith("_end") or elem_type == "raw_begin":
                block_type = block_types[elem_type]
                block_subtype = None
                # Handle starts and ends of blocks
                if block_type in ("block", "templated"):
                    # Trim off the brackets and then the whitespace
                    m_open = self.re_open_tag.search(str_buff)
                    m_close = self.re_close_tag.search(str_buff)
                    if m_open and m_close:
                        trimmed_content = str_buff[
                            len(m_open.group(0)):-len(m_close.group(0))]
                    # :TRICKY: Syntactically, the Jinja {% include %} directive looks
                    # like a block, but its behavior is basically syntactic sugar for
                    # {{ open("somefile).read() }}. Thus, treat it as templated code.
                    if block_type == "block" and trimmed_content.startswith(
                            "include "):
                        block_type = "templated"
                    if block_type == "block":
                        if trimmed_content.startswith("end"):
                            block_type = "block_end"
                        elif trimmed_content.startswith("el"):
                            # else, elif
                            block_type = "block_mid"
                        else:
                            block_type = "block_start"
                            if trimmed_content.split()[0] == "for":
                                block_subtype = "loop"
                    else:
                        # For "templated", evaluate the content in case of side
                        # effects, but return a unique slice ID.
                        if trimmed_content:
                            assert m_open and m_close
                            unique_id = self.next_slice_id()
                            unique_alternate_id = unique_id
                            prefix = "set" if set_idx is not None else ""
                            open_ = m_open.group(1)
                            close_ = m_close.group(1)
                            alternate_code = (
                                f"\0{prefix}{unique_alternate_id} {open_} "
                                f"{trimmed_content} {close_}")
                if block_type == "block_start" and trimmed_content.split(
                )[0] in (
                        "macro",
                        "set",
                ):
                    # Jinja supports two forms of {% set %}:
                    # - {% set variable = value %}
                    # - {% set variable %}value{% endset %}
                    # https://jinja.palletsprojects.com/en/2.10.x/templates/#block-assignments
                    # When the second format is used, set the variable 'is_set'
                    # to a non-None value. This info is used elsewhere, as
                    # literals inside a {% set %} block require special handling
                    # during the trace.
                    trimmed_content_parts = trimmed_content.split(maxsplit=2)
                    if len(trimmed_content_parts
                           ) <= 2 or not trimmed_content_parts[2].startswith(
                               "="):
                        set_idx = len(result)
                elif block_type == "block_end" and set_idx is not None:
                    # Exiting a {% set %} block. Clear the indicator variable.
                    set_idx = None
                m = regex.search(r"\s+$", raw, regex.MULTILINE | regex.DOTALL)
                if raw.startswith("-") and m:
                    # Right whitespace was stripped. Split off the trailing
                    # whitespace into a separate slice. The desired behavior is
                    # to behave similarly as the left stripping case above.
                    # Note that the stakes are a bit different, because lex()
                    # hasn't *omitted* any characters from the strings it
                    # returns, it has simply grouped them differently than we
                    # want.
                    trailing_chars = len(m.group(0))
                    if block_type.startswith("block_"):
                        alternate_code = self._remove_block_whitespace_control(
                            str_buff[:-trailing_chars])
                    result.append(
                        RawFileSlice(
                            str_buff[:-trailing_chars],
                            block_type,
                            idx,
                            block_subtype,
                        ))
                    self.raw_slice_info[result[-1]] = RawSliceInfo(
                        unique_alternate_id, alternate_code, [])
                    block_idx = len(result) - 1
                    idx += len(str_buff) - trailing_chars
                    result.append(
                        RawFileSlice(
                            str_buff[-trailing_chars:],
                            "literal",
                            idx,
                        ))
                    self.raw_slice_info[result[-1]] = RawSliceInfo("", "", [])
                    idx += trailing_chars
                else:
                    if block_type.startswith("block_"):
                        alternate_code = self._remove_block_whitespace_control(
                            str_buff)
                    result.append(
                        RawFileSlice(
                            str_buff,
                            block_type,
                            idx,
                            block_subtype,
                        ))
                    self.raw_slice_info[result[-1]] = RawSliceInfo(
                        unique_alternate_id, alternate_code, [])
                    block_idx = len(result) - 1
                    idx += len(str_buff)
                if block_type == "block_start" and trimmed_content.split(
                )[0] in (
                        "for",
                        "if",
                ):
                    stack.append(block_idx)
                elif block_type == "block_mid":
                    # Record potential forward jump over this block.
                    self.raw_slice_info[result[
                        stack[-1]]].next_slice_indices.append(block_idx)
                    stack.pop()
                    stack.append(block_idx)
                elif block_type == "block_end" and trimmed_content.split(
                )[0] in (
                        "endfor",
                        "endif",
                ):
                    # Record potential forward jump over this block.
                    self.raw_slice_info[result[
                        stack[-1]]].next_slice_indices.append(block_idx)
                    if result[stack[-1]].slice_subtype == "loop":
                        # Record potential backward jump to the loop beginning.
                        self.raw_slice_info[
                            result[block_idx]].next_slice_indices.append(
                                stack[-1] + 1)
                    stack.pop()
                str_buff = ""
        return result

Example #15

0

Show file

File: tracer.py Project: sqlfluff/sqlfluff

    def analyze(self, make_template: Callable[[str], Template]) -> JinjaTracer:
        """Slice template in jinja."""
        # str_buff and str_parts are two ways we keep track of tokens received
        # from Jinja. str_buff concatenates them together, while str_parts
        # accumulates the individual strings. We generally prefer using
        # str_parts. That's because Jinja doesn't just split on whitespace, so
        # by keeping tokens as Jinja returns them, the code is more robust.
        # Consider the following:
        #   {% set col= "col1" %}
        # Note there's no space after col. Jinja splits this up for us. If we
        # simply concatenated the parts together and later split on whitespace,
        # we'd need some ugly, fragile logic to handle various whitespace
        # possibilities:
        #   {% set col= "col1" %}
        #   {% set col = "col1" %}
        #   {% set col ="col1" %}
        # By using str_parts and letting Jinja handle this, it just works.

        str_buff = ""
        str_parts = []

        # https://jinja.palletsprojects.com/en/2.11.x/api/#jinja2.Environment.lex
        block_idx = 0
        last_elem_type = None
        for _, elem_type, raw in self.env.lex(self.raw_str):
            if last_elem_type == "block_end" or elem_type == "block_start":
                block_idx += 1
            last_elem_type = elem_type

            if elem_type == "data":
                self.track_literal(raw, block_idx)
                continue
            str_buff += raw
            str_parts.append(raw)

            if elem_type.endswith("_begin"):
                self.handle_left_whitespace_stripping(raw, block_idx)

            raw_slice_info: RawSliceInfo = self.make_raw_slice_info(None, None)
            tag_contents = []
            # raw_end and raw_begin behave a little differently in
            # that the whole tag shows up in one go rather than getting
            # parts of the tag at a time.
            m_open = None
            m_close = None
            if elem_type.endswith("_end") or elem_type == "raw_begin":
                block_type = self.block_types[elem_type]
                block_subtype = None
                # Handle starts and ends of blocks
                if block_type in ("block", "templated"):
                    m_open = self.re_open_tag.search(str_parts[0])
                    m_close = self.re_close_tag.search(str_parts[-1])
                    if m_open and m_close:
                        tag_contents = self.extract_tag_contents(
                            str_parts, m_close, m_open, str_buff)

                    if block_type == "block" and tag_contents:
                        block_type, block_subtype = self.extract_block_type(
                            tag_contents[0], block_subtype)
                    if block_type == "templated" and tag_contents:
                        assert m_open and m_close
                        raw_slice_info = self.track_templated(
                            m_open, m_close, tag_contents)
                raw_slice_info_temp = self.update_inside_set_call_macro_or_block(
                    block_type, tag_contents, m_open, m_close, tag_contents)
                if raw_slice_info_temp:
                    raw_slice_info = raw_slice_info_temp
                m_strip_right = regex.search(r"\s+$", raw,
                                             regex.MULTILINE | regex.DOTALL)
                if elem_type.endswith("_end") and raw.startswith(
                        "-") and m_strip_right:
                    # Right whitespace was stripped after closing block. Split
                    # off the trailing whitespace into a separate slice. The
                    # desired behavior is to behave similarly as the left
                    # stripping case. Note that the stakes are a bit lower here,
                    # because lex() hasn't *omitted* any characters from the
                    # strings it returns, it has simply grouped them differently
                    # than we want.
                    trailing_chars = len(m_strip_right.group(0))
                    self.raw_sliced.append(
                        RawFileSlice(
                            str_buff[:-trailing_chars],
                            block_type,
                            self.idx_raw,
                            block_subtype,
                            block_idx,
                        ))
                    self.raw_slice_info[self.raw_sliced[-1]] = raw_slice_info
                    slice_idx = len(self.raw_sliced) - 1
                    self.idx_raw += len(str_buff) - trailing_chars
                    self.raw_sliced.append(
                        RawFileSlice(
                            str_buff[-trailing_chars:],
                            "literal",
                            self.idx_raw,
                            None,
                            block_idx,
                        ))
                    self.raw_slice_info[
                        self.raw_sliced[-1]] = self.slice_info_for_literal(0)
                    self.idx_raw += trailing_chars
                else:
                    self.raw_sliced.append(
                        RawFileSlice(
                            str_buff,
                            block_type,
                            self.idx_raw,
                            block_subtype,
                            block_idx,
                        ))
                    self.raw_slice_info[self.raw_sliced[-1]] = raw_slice_info
                    slice_idx = len(self.raw_sliced) - 1
                    self.idx_raw += len(str_buff)
                if block_type.startswith("block"):
                    self.track_block_start(block_type, tag_contents[0])
                    self.track_block_end(block_type, tag_contents[0])
                    self.update_next_slice_indices(slice_idx, block_type,
                                                   tag_contents[0])
                str_buff = ""
                str_parts = []
        return JinjaTracer(
            self.raw_str,
            self.raw_sliced,
            self.raw_slice_info,
            self.sliced_file,
            make_template,
        )

Example #16

0

Show file

"""Tests for the raw_file_slices module."""
import pytest

from sqlfluff.utils.functional import raw_file_slices
from sqlfluff.core.templaters.base import RawFileSlice

rs_templated_abc = RawFileSlice("{{abc}}", "templated", 0)
rs_templated_def = RawFileSlice("{{def}}", "templated", 0)
rs_literal_abc = RawFileSlice("abc", "literal", 0)


@pytest.mark.parametrize(
    ["input", "expected"],
    [
        [
            raw_file_slices.RawFileSlices(rs_templated_abc,
                                          templated_file=None),
            True,
        ],
        [
            raw_file_slices.RawFileSlices(rs_templated_def,
                                          templated_file=None),
            False,
        ],
        [
            raw_file_slices.RawFileSlices(
                rs_templated_abc, rs_templated_def, templated_file=None),
            False,
        ],
    ],
)