예제 #1
0
    def _check_for_wrapped(
        cls,
        slices: List[TemplatedFileSlice],
        templated_str: str,
        unwrap_wrapped: bool = True,
    ) -> Tuple[List[TemplatedFileSlice], str]:
        """Identify a wrapped query (e.g. dbt test) and handle it.

        If unwrap_wrapped is true, we trim the wrapping from the templated file.
        If unwrap_wrapped is false, we add a slice at start and end.
        """
        if not slices:
            # If there are no slices, return
            return slices, templated_str
        first_slice = slices[0]
        last_slice = slices[-1]

        if unwrap_wrapped:
            # If we're unwrapping, there is no need to edit the slices, but we do need to trim
            # the templated string. We should expect that the template will need to be re-sliced
            # but we should assume that the function calling this one will deal with that
            # eventuality.
            return (
                slices,
                templated_str[
                    first_slice.templated_slice.start : last_slice.templated_slice.stop
                ],
            )

        if (
            first_slice.source_slice.start == 0
            and first_slice.templated_slice.start != 0
        ):
            # This means that there is text at the start of the templated file which doesn't exist
            # in the raw file. Handle this by adding a templated slice (though it's not really templated)
            # between 0 and 0 in the raw, and 0 and the current first slice start index in the templated.
            slices.insert(
                0,
                TemplatedFileSlice(
                    "templated",
                    slice(0, 0),
                    slice(0, first_slice.templated_slice.start),
                ),
            )
        if last_slice.templated_slice.stop != len(templated_str):
            #  This means that there is text at the end of the templated file which doesn't exist
            #  in the raw file. Handle this by adding a templated slice beginning and ending at the
            #  end of the raw, and the current last slice stop and file end in the templated.
            slices.append(
                TemplatedFileSlice(
                    "templated",
                    slice(last_slice.source_slice.stop, last_slice.source_slice.stop),
                    slice(last_slice.templated_slice.stop, len(templated_str)),
                )
            )
        return slices, templated_str
예제 #2
0
 def coalesce(self):
     """Coalesce this whole slice into a single one. Brutally."""
     return TemplatedFileSlice(
         PythonTemplater._coalesce_types(self.slice_buffer),
         self.source_slice,
         self.templated_slice,
     )
예제 #3
0
 def try_simple(self):
     """Try to turn this intermediate slice into a simple slice."""
     # Yield anything simple
     if len(self.slice_buffer) == 1:
         return TemplatedFileSlice(
             self.slice_buffer[0].slice_type,
             self.source_slice,
             self.templated_slice,
         )
     else:
         raise ValueError("IntermediateFileSlice is not simple!")
예제 #4
0
파일: tracer.py 프로젝트: sti0/sqlfluff
 def record_trace(self,
                  target_slice_length,
                  slice_idx=None,
                  slice_type=None):
     """Add the specified (default: current) location to the trace."""
     if slice_idx is None:
         slice_idx = self.program_counter
     if slice_type is None:
         slice_type = self.raw_sliced[slice_idx].slice_type
     self.sliced_file.append(
         TemplatedFileSlice(
             slice_type,
             slice(
                 self.raw_sliced[slice_idx].source_idx,
                 self.raw_sliced[slice_idx + 1].source_idx if slice_idx +
                 1 < len(self.raw_sliced) else len(self.raw_str),
             ),
             slice(self.source_idx, self.source_idx + target_slice_length),
         ))
     if slice_type in ("literal", "templated"):
         self.source_idx += target_slice_length
예제 #5
0
    def _split_uniques_coalesce_rest(
        cls,
        split_file: List[IntermediateFileSlice],
        raw_occurrences: Dict[str, List[int]],
        templ_occurrences: Dict[str, List[int]],
        templated_str: str,
    ) -> Iterator[TemplatedFileSlice]:
        """Within each of the compound sections split on unique literals.

        For everything else we coalesce to the dominant type.

        Returns:
            Iterable of the type of segment, the slice in the raw file
                and the slice in the templated file.

        """
        # A buffer to capture tail segments
        tail_buffer: List[TemplatedFileSlice] = []

        templater_logger.debug("    _split_uniques_coalesce_rest: %s", split_file)

        for int_file_slice in split_file:
            # Yield anything from the tail buffer
            if tail_buffer:
                templater_logger.debug(
                    "        Yielding Tail Buffer [start]: %s", tail_buffer
                )
                yield from tail_buffer
                tail_buffer = []

            # Check whether we're handling a zero length slice.
            if (
                int_file_slice.templated_slice.stop
                - int_file_slice.templated_slice.start
                == 0
            ):
                point_combo = int_file_slice.coalesce()
                templater_logger.debug(
                    "        Yielding Point Combination: %s", point_combo
                )
                yield point_combo
                continue

            # Yield anything simple
            try:
                simple_elem = int_file_slice.try_simple()
                templater_logger.debug("        Yielding Simple: %s", simple_elem)
                yield simple_elem
                continue
            except ValueError:
                pass

            # Trim ends and overwrite the current working copy.
            head_buffer, int_file_slice, tail_buffer = int_file_slice.trim_ends(
                templated_str=templated_str
            )
            if head_buffer:
                yield from head_buffer
            # Have we consumed the whole thing?
            if not int_file_slice.slice_buffer:
                continue

            # Try to yield simply again (post trim)
            try:
                simple_elem = int_file_slice.try_simple()
                templater_logger.debug("        Yielding Simple: %s", simple_elem)
                yield simple_elem
                continue
            except ValueError:
                pass

            templater_logger.debug("        Intermediate Slice: %s", int_file_slice)
            # Generate the coalesced version in case we need it
            coalesced = int_file_slice.coalesce()

            # Look for anchors
            raw_occs = cls._filter_occurrences(
                int_file_slice.source_slice, raw_occurrences
            )
            templ_occs = cls._filter_occurrences(
                int_file_slice.templated_slice, templ_occurrences
            )
            # Do we have any uniques to split on?
            # NB: We use `get` on the templated occurrences, because it's possible
            # that because of an if statement, something is in the source, but
            # not in the templated at all. In that case, we shouldn't use it.
            one_way_uniques = [
                key
                for key in raw_occs.keys()
                if len(raw_occs[key]) == 1 and len(templ_occs.get(key, [])) >= 1
            ]
            two_way_uniques = [
                key for key in one_way_uniques if len(templ_occs[key]) == 1
            ]
            # if we don't have anything to anchor on, then just return (coalescing types)
            if not raw_occs or not templ_occs or not one_way_uniques:
                templater_logger.debug(
                    "        No Anchors or Uniques. Yielding Whole: %s", coalesced
                )
                yield coalesced
                continue

            # Deal with the inner segment itself.
            templater_logger.debug(
                "        Intermediate Slice [post trim]: %s: %r",
                int_file_slice,
                templated_str[int_file_slice.templated_slice],
            )
            templater_logger.debug("        One Way Uniques: %s", one_way_uniques)
            templater_logger.debug("        Two Way Uniques: %s", two_way_uniques)

            # Hang onto the starting position, which we'll advance as we go.
            starts = (
                int_file_slice.source_slice.start,
                int_file_slice.templated_slice.start,
            )

            # Deal with two way uniques first, because they are easier.
            # If we do find any we use recursion, because we'll want to do
            # all of the above checks again.
            if two_way_uniques:
                # Yield the uniques and coalesce anything between.
                bookmark_idx = 0
                for idx, raw_slice in enumerate(int_file_slice.slice_buffer):
                    pos = 0
                    unq: Optional[str] = None
                    # Does this element contain one of our uniques? If so, where?
                    for unique in two_way_uniques:
                        if unique in raw_slice.raw:
                            pos = raw_slice.raw.index(unique)
                            unq = unique

                    if unq:
                        # Yes it does. Handle it.

                        # Get the position of the unique section.
                        unique_position = (
                            raw_occs[unq][0],
                            templ_occs[unq][0],
                        )
                        templater_logger.debug(
                            "            Handling Unique: %r, %s, %s, %r",
                            unq,
                            pos,
                            unique_position,
                            raw_slice,
                        )

                        # Handle full slices up to this one
                        if idx > bookmark_idx:
                            # Recurse to deal with any loops separately
                            yield from cls._split_uniques_coalesce_rest(
                                [
                                    IntermediateFileSlice(
                                        "compound",
                                        # slice up to this unique
                                        slice(starts[0], unique_position[0] - pos),
                                        slice(starts[1], unique_position[1] - pos),
                                        int_file_slice.slice_buffer[bookmark_idx:idx],
                                    )
                                ],
                                raw_occs,
                                templ_occs,
                                templated_str,
                            )

                        # Handle any potential partial slice if we're part way through this one.
                        if pos > 0:
                            yield TemplatedFileSlice(
                                raw_slice.slice_type,
                                slice(unique_position[0] - pos, unique_position[0]),
                                slice(unique_position[1] - pos, unique_position[1]),
                            )

                        # Handle the unique itself and update the bookmark
                        starts = (
                            unique_position[0] + len(unq),
                            unique_position[1] + len(unq),
                        )
                        yield TemplatedFileSlice(
                            raw_slice.slice_type,
                            slice(unique_position[0], starts[0]),
                            slice(unique_position[1], starts[1]),
                        )
                        # Move the bookmark after this position
                        bookmark_idx = idx + 1

                        # Handle any remnant after the unique.
                        if raw_slice.raw[pos + len(unq) :]:
                            remnant_length = len(raw_slice.raw) - (len(unq) + pos)
                            _starts = starts
                            starts = (
                                starts[0] + remnant_length,
                                starts[1] + remnant_length,
                            )
                            yield TemplatedFileSlice(
                                raw_slice.slice_type,
                                slice(_starts[0], starts[0]),
                                slice(_starts[1], starts[1]),
                            )

                if bookmark_idx == 0:  # pragma: no cover
                    # This is a SAFETY VALVE. In Theory we should never be here
                    # and if we are it implies an error elsewhere. This clause
                    # should stop any potential infinite recursion in its tracks
                    # by simply classifying the whole of the current block as
                    # templated and just stopping here.
                    # Bugs triggering this eventuality have been observed in 0.4.0.
                    templater_logger.info(
                        "        Safety Value Info: %s, %r",
                        two_way_uniques,
                        templated_str[int_file_slice.templated_slice],
                    )
                    templater_logger.warning(
                        "        Python templater safety value unexpectedly triggered. "
                        "Please report your raw and compiled query on github for debugging."
                    )
                    # NOTE: If a bug is reported here, this will incorrectly
                    # classify more of the query as "templated" than it should.
                    yield coalesced
                    continue

                # At the end of the loop deal with any remaining slices.
                # The above "Safety Valve"TM should keep us safe from infinite
                # recursion.
                if len(int_file_slice.slice_buffer) > bookmark_idx:
                    # Recurse to deal with any loops separately
                    yield from cls._split_uniques_coalesce_rest(
                        [
                            IntermediateFileSlice(
                                "compound",
                                # Slicing is easy here, we have no choice
                                slice(starts[0], int_file_slice.source_slice.stop),
                                slice(starts[1], int_file_slice.templated_slice.stop),
                                # Calculate the subsection to deal with.
                                int_file_slice.slice_buffer[
                                    bookmark_idx : len(int_file_slice.slice_buffer)
                                ],
                            )
                        ],
                        raw_occs,
                        templ_occs,
                        templated_str,
                    )
                # We continue here because the buffer should be exhausted,
                # and if there's more to do we'll do it in the recursion.
                continue

            # If we get here, then there ARE uniques, but they are only ONE WAY.
            # This means loops. Loops are tricky.
            # We're very unlikely to get here (impossible?) with just python
            # formatting, but this class is also the base for the jinja templater
            # (and others?) so it may be used there.
            # One way uniques give us landmarks to try and estimate what to do with them.
            owu_templ_tuples = cls._sorted_occurrence_tuples(
                {key: templ_occs[key] for key in one_way_uniques}
            )

            templater_logger.debug(
                "        Handling One Way Uniques: %s", owu_templ_tuples
            )

            # Hang onto out *ending* position too from here.
            stops = (
                int_file_slice.source_slice.stop,
                int_file_slice.templated_slice.stop,
            )

            # OWU in this context refers to "One Way Unique"
            this_owu_idx: Optional[int] = None
            last_owu_idx: Optional[int] = None
            # Iterate through occurrence tuples of the one-way uniques.
            for raw, template_idx in owu_templ_tuples:
                raw_idx = raw_occs[raw][0]
                raw_len = len(raw)

                # Find the index of this owu in the slice_buffer, store the previous
                last_owu_idx = this_owu_idx
                try:
                    this_owu_idx = next(
                        idx
                        for idx, slc in enumerate(int_file_slice.slice_buffer)
                        if slc.raw == raw
                    )
                except StopIteration:
                    # This can happen if the unique was detected, but was introduced
                    # by a templater step. This is a false positive. Skip and move on.
                    templater_logger.info(
                        "One Way Unique %r not found in slice buffer. Skipping...", raw
                    )
                    continue

                templater_logger.debug(
                    "        Handling OWU: %r @%s (raw @%s) [this_owu_idx: %s, last_owu_dx: %s]",
                    raw,
                    template_idx,
                    raw_idx,
                    this_owu_idx,
                    last_owu_idx,
                )

                if template_idx > starts[1]:
                    # Yield the bit before this literal. We yield it
                    # all as a tuple, because if we could do any better
                    # we would have done it by now.

                    # Can we identify a meaningful portion of the patch
                    # to recurse a split?
                    sub_section: Optional[List[RawFileSlice]] = None
                    # If it's the start, the slicing is easy
                    if (
                        starts[1] == int_file_slice.templated_slice.stop
                    ):  # pragma: no cover TODO?
                        sub_section = int_file_slice.slice_buffer[:this_owu_idx]
                    # If we are AFTER the previous in the template, then it's
                    # also easy. [assuming it's not the same owu]
                    elif raw_idx > starts[0] and last_owu_idx != this_owu_idx:
                        if last_owu_idx:
                            sub_section = int_file_slice.slice_buffer[
                                last_owu_idx + 1 : this_owu_idx
                            ]
                        else:
                            sub_section = int_file_slice.slice_buffer[:this_owu_idx]

                    # If we succeeded in one of the above, we can also recurse
                    # and be more intelligent with the other sections.
                    if sub_section:
                        # This assertion makes MyPy happy. In this case, we
                        # never set source_slice without also setting
                        # subsection.
                        templater_logger.debug(
                            "        Attempting Subsplit [pre]: %s, %r",
                            sub_section,
                            templated_str[slice(starts[1], template_idx)],
                        )
                        yield from cls._split_uniques_coalesce_rest(
                            [
                                IntermediateFileSlice(
                                    "compound",
                                    # Slicing is easy here, we have no choice
                                    slice(starts[0], raw_idx),
                                    slice(starts[1], template_idx),
                                    sub_section,
                                )
                            ],
                            raw_occs,
                            templ_occs,
                            templated_str,
                        )
                    # Otherwise, it's the tricky case.
                    else:
                        # In this case we've found a literal, coming AFTER another
                        # in the templated version, but BEFORE (or the same) in the
                        # raw version. This only happens during loops, but it means
                        # that identifying exactly what the intervening bit refers
                        # to is a bit arbitrary. In this case we're going to OVER
                        # estimate and refer to the whole loop segment.

                        # TODO: Maybe this should make two chunks instead, one
                        # working backward, and one working forward. But that's
                        # a job for another day.

                        # First find where we are starting this remainder
                        # in the template (as an index in the buffer).
                        # Any segments *after* cur_idx are involved.
                        if last_owu_idx is None or last_owu_idx + 1 >= len(
                            int_file_slice.slice_buffer
                        ):
                            cur_idx = 0  # pragma: no cover
                        else:
                            cur_idx = last_owu_idx + 1

                        # We need to know how many block_ends are after this.
                        block_ends = sum(
                            slc[1] == "block_end"
                            for slc in int_file_slice.slice_buffer[cur_idx:]
                        )
                        # We can allow up to this number of preceding block starts
                        block_start_indices = [
                            idx
                            for idx, slc in enumerate(
                                int_file_slice.slice_buffer[:cur_idx]
                            )
                            if slc[1] == "block_start"
                        ]

                        # Trim anything which we're not allowed to use.
                        if len(block_start_indices) > block_ends:
                            offset = block_start_indices[-1 - block_ends] + 1
                            elem_sub_buffer = int_file_slice.slice_buffer[offset:]
                            cur_idx -= offset
                        else:
                            elem_sub_buffer = int_file_slice.slice_buffer

                        # We also need to know whether any of the *starting*
                        # segments are involved.
                        # Anything up to start_idx (exclusive) is included.
                        include_start = raw_idx > elem_sub_buffer[0][2]

                        # The ending point of this slice, is already decided.
                        end_point = elem_sub_buffer[-1].end_source_idx()

                        # If start_idx is None, we're in luck. We don't need to include the beginning.
                        if include_start:
                            start_point = elem_sub_buffer[0].source_idx
                        # Otherwise we know it's looped round, we need to include the whole slice.
                        else:
                            start_point = elem_sub_buffer[cur_idx].source_idx

                        tricky = TemplatedFileSlice(
                            "templated",
                            slice(start_point, end_point),
                            slice(starts[1], template_idx),
                        )

                        templater_logger.debug(
                            "        Yielding Tricky Case : %s",
                            tricky,
                        )

                        yield tricky

                # Yield the literal
                owu_literal_slice = TemplatedFileSlice(
                    "literal",
                    slice(raw_idx, raw_idx + raw_len),
                    slice(template_idx, template_idx + raw_len),
                )
                templater_logger.debug(
                    "    Yielding Unique: %r, %s",
                    raw,
                    owu_literal_slice,
                )
                yield owu_literal_slice
                # Update our bookmark
                starts = (
                    raw_idx + raw_len,
                    template_idx + raw_len,
                )

            if starts[1] < stops[1] and last_owu_idx is not None:
                # Yield the end bit
                templater_logger.debug("        Attempting Subsplit [post].")
                yield from cls._split_uniques_coalesce_rest(
                    [
                        IntermediateFileSlice(
                            "compound",
                            # Slicing is easy here, we have no choice
                            slice(raw_idx + raw_len, stops[0]),
                            slice(starts[1], stops[1]),
                            int_file_slice.slice_buffer[last_owu_idx + 1 :],
                        )
                    ],
                    raw_occs,
                    templ_occs,
                    templated_str,
                )

        # Yield anything from the tail buffer
        if tail_buffer:
            templater_logger.debug(
                "        Yielding Tail Buffer [end]: %s", tail_buffer
            )
            yield from tail_buffer
예제 #6
0
    def _trim_end(
        self, templated_str: str, target_end: str = "head"
    ) -> Tuple["IntermediateFileSlice", List[TemplatedFileSlice]]:
        """Trim the ends of a intermediate segment."""
        target_idx = 0 if target_end == "head" else -1
        terminator_types = ("block_start") if target_end == "head" else ("block_end")
        main_source_slice = self.source_slice
        main_templated_slice = self.templated_slice
        slice_buffer = self.slice_buffer

        end_buffer = []

        # Yield any leading literals, comments or blocks.
        while len(slice_buffer) > 0 and slice_buffer[target_idx].slice_type in (
            "literal",
            "block_start",
            "block_end",
            "comment",
        ):
            focus = slice_buffer[target_idx]
            templater_logger.debug("            %s Focus: %s", target_end, focus)
            # Is it a zero length item?
            if focus.slice_type in ("block_start", "block_end", "comment"):
                # Only add the length in the source space.
                templated_len = 0
            else:
                # Assume it's a literal, check the literal actually matches.
                templated_len = len(focus.raw)
                if target_end == "head":
                    check_slice = slice(
                        main_templated_slice.start,
                        main_templated_slice.start + templated_len,
                    )
                else:
                    check_slice = slice(
                        main_templated_slice.stop - templated_len,
                        main_templated_slice.stop,
                    )

                if templated_str[check_slice] != focus.raw:
                    # It doesn't match, we can't use it. break
                    templater_logger.debug("                Nope")
                    break

            # If it does match, set up the new slices
            if target_end == "head":
                division = (
                    main_source_slice.start + len(focus.raw),
                    main_templated_slice.start + templated_len,
                )
                new_slice = TemplatedFileSlice(
                    focus.slice_type,
                    slice(main_source_slice.start, division[0]),
                    slice(main_templated_slice.start, division[1]),
                )
                end_buffer.append(new_slice)
                main_source_slice = slice(division[0], main_source_slice.stop)
                main_templated_slice = slice(division[1], main_templated_slice.stop)
            else:
                division = (
                    main_source_slice.stop - len(focus.raw),
                    main_templated_slice.stop - templated_len,
                )
                new_slice = TemplatedFileSlice(
                    focus.slice_type,
                    slice(division[0], main_source_slice.stop),
                    slice(division[1], main_templated_slice.stop),
                )
                end_buffer.insert(0, new_slice)
                main_source_slice = slice(main_source_slice.start, division[0])
                main_templated_slice = slice(main_templated_slice.start, division[1])

            slice_buffer.pop(target_idx)
            if focus.slice_type in terminator_types:
                break
        # Return a new Intermediate slice and the buffer.
        # NB: Don't check size of slice buffer here. We can do that later.
        new_intermediate = self.__class__(
            "compound", main_source_slice, main_templated_slice, slice_buffer
        )
        return new_intermediate, end_buffer
예제 #7
0
파일: base_test.py 프로젝트: sti0/sqlfluff
    """Test iter_indices_of_newlines."""
    assert list(iter_indices_of_newlines(raw_str)) == positions


def test__templater_raw():
    """Test the raw templater."""
    t = RawTemplater()
    instr = "SELECT * FROM {{blah}}"
    outstr, _ = t.process(in_str=instr, fname="test")
    assert instr == str(outstr)


SIMPLE_SOURCE_STR = "01234\n6789{{foo}}fo\nbarss"
SIMPLE_TEMPLATED_STR = "01234\n6789x\nfo\nbarfss"
SIMPLE_SLICED_FILE = [
    TemplatedFileSlice(*args) for args in [
        ("literal", slice(0, 10, None), slice(0, 10, None)),
        ("templated", slice(10, 17, None), slice(10, 12, None)),
        ("literal", slice(17, 25, None), slice(12, 20, None)),
    ]
]
SIMPLE_RAW_SLICED_FILE = [
    RawFileSlice(*args) for args in [
        ("x" * 10, "literal", 0),
        ("x" * 7, "templated", 10),
        ("x" * 8, "literal", 17),
    ]
]

COMPLEX_SLICED_FILE = [
    TemplatedFileSlice(*args) for args in [
예제 #8
0
         "compound",
         slice(0, 5),
         slice(0, 5),
         [RawFileSlice("{{i}}", "templated", 0)],
     ),
 ),
 # Test Complete Trimming
 (
     IntermediateFileSlice(
         "compound",
         slice(0, 3),
         slice(0, 3),
         [RawFileSlice("foo", "literal", 0)],
     ),
     "foo",
     [TemplatedFileSlice("literal", slice(0, 3), slice(0, 3))],
     [],
     IntermediateFileSlice(
         "compound",
         slice(3, 3),
         slice(3, 3),
         [],
     ),
 ),
 # Test Basic Trimming.
 (
     IntermediateFileSlice(
         "compound",
         slice(0, 11),
         slice(0, 7),
         [
예제 #9
0
    This is part of fix_string().
    """
    with caplog.at_level(logging.DEBUG, logger="sqlfluff.linter"):
        result = LintedFile._slice_source_file_using_patches(
            source_patches, source_only_slices, raw_source_string)
    assert result == expected_result


templated_file_1 = TemplatedFile.from_string("abc")
templated_file_2 = TemplatedFile(
    "{# blah #}{{ foo }}bc",
    "<testing>",
    "abc",
    [
        TemplatedFileSlice("comment", slice(0, 10), slice(0, 0)),
        TemplatedFileSlice("templated", slice(10, 19), slice(0, 1)),
        TemplatedFileSlice("literal", slice(19, 21), slice(1, 3)),
    ],
    [
        RawFileSlice("{# blah #}", "comment", 0),
        RawFileSlice("{{ foo }}", "templated", 10),
        RawFileSlice("bc", "literal", 19),
    ],
)


@pytest.mark.parametrize(
    "tree,templated_file,expected_result",
    [
        # Trivial example
예제 #10
0
    def process(self,
                *,
                in_str: str,
                fname: str,
                config=None,
                formatter=None) -> Tuple[Optional[TemplatedFile], list]:
        """Process a string and return a TemplatedFile.

        Note that the arguments are enforced as keywords
        because Templaters can have differences in their
        `process` method signature.
        A Templater that only supports reading from a file
        would need the following signature:
            process(*, fname, in_str=None, config=None)
        (arguments are swapped)

        Args:
            in_str (:obj:`str`): The input string.
            fname (:obj:`str`, optional): The filename of this string. This is
                mostly for loading config files at runtime.
            config (:obj:`FluffConfig`): A specific config to use for this
                templating operation. Only necessary for some templaters.
            formatter (:obj:`CallbackFormatter`): Optional object for output.

        """
        context = self.get_context(config)
        template_slices = []
        raw_slices = []
        last_pos_raw, last_pos_templated = 0, 0
        out_str = ""

        regex = context["__bind_param_regex"]
        # when the param has no name, use a 1-based index
        param_counter = 1
        for found_param in regex.finditer(in_str):
            span = found_param.span()
            if "param_name" not in found_param.groupdict():
                param_name = str(param_counter)
                param_counter += 1
            else:
                param_name = found_param["param_name"]
            last_literal_length = span[0] - last_pos_raw
            try:
                replacement = str(context[param_name])
            except KeyError as err:
                # TODO: Add a url here so people can get more help.
                raise SQLTemplaterError(
                    "Failure in placeholder templating: {}. Have you configured your "
                    "variables?".format(err))
            # add the literal to the slices
            template_slices.append(
                TemplatedFileSlice(
                    slice_type="literal",
                    source_slice=slice(last_pos_raw, span[0], None),
                    templated_slice=slice(
                        last_pos_templated,
                        last_pos_templated + last_literal_length,
                        None,
                    ),
                ))
            raw_slices.append(
                RawFileSlice(
                    raw=in_str[last_pos_raw:span[0]],
                    slice_type="literal",
                    source_idx=last_pos_raw,
                ))
            out_str += in_str[last_pos_raw:span[0]]
            # add the current replaced element
            start_template_pos = last_pos_templated + last_literal_length
            template_slices.append(
                TemplatedFileSlice(
                    slice_type="templated",
                    source_slice=slice(span[0], span[1], None),
                    templated_slice=slice(
                        start_template_pos,
                        start_template_pos + len(replacement), None),
                ))
            raw_slices.append(
                RawFileSlice(
                    raw=in_str[span[0]:span[1]],
                    slice_type="templated",
                    source_idx=span[0],
                ))
            out_str += replacement
            # update the indexes
            last_pos_raw = span[1]
            last_pos_templated = start_template_pos + len(replacement)
        # add the last literal, if any
        if len(in_str) > last_pos_raw:
            template_slices.append(
                TemplatedFileSlice(
                    slice_type="literal",
                    source_slice=slice(last_pos_raw, len(in_str), None),
                    templated_slice=slice(
                        last_pos_templated,
                        last_pos_templated + (len(in_str) - last_pos_raw),
                        None,
                    ),
                ))
            raw_slices.append(
                RawFileSlice(
                    raw=in_str[last_pos_raw:],
                    slice_type="literal",
                    source_idx=last_pos_raw,
                ))
            out_str += in_str[last_pos_raw:]
        return (
            TemplatedFile(
                # original string
                source_str=in_str,
                # string after all replacements
                templated_str=out_str,
                # filename
                fname=fname,
                # list of TemplatedFileSlice
                sliced_file=template_slices,
                # list of RawFileSlice, same size
                raw_sliced=raw_slices,
            ),
            [],  # violations, always empty
        )
예제 #11
0
    def _unsafe_process(self, fname, in_str=None, config=None):
        original_file_path = os.path.relpath(fname, start=os.getcwd())

        # Below, we monkeypatch Environment.from_string() to intercept when dbt
        # compiles (i.e. runs Jinja) to expand the "node" corresponding to fname.
        # We do this to capture the Jinja context at the time of compilation, i.e.:
        # - Jinja Environment object
        # - Jinja "globals" dictionary
        #
        # This info is captured by the "make_template()" function, which in
        # turn is used by our parent class' (JinjaTemplater) slice_file()
        # function.
        old_from_string = Environment.from_string
        try:
            make_template = None

            def from_string(*args, **kwargs):
                """Replaces (via monkeypatch) the jinja2.Environment function."""
                nonlocal make_template
                # Is it processing the node corresponding to fname?
                globals = kwargs.get("globals")
                if globals:
                    model = globals.get("model")
                    if model:
                        if model.get(
                                "original_file_path") == original_file_path:
                            # Yes. Capture the important arguments and create
                            # a make_template() function.
                            env = args[0]
                            globals = args[2] if len(
                                args) >= 3 else kwargs["globals"]

                            def make_template(in_str):
                                env.add_extension(SnapshotExtension)
                                return env.from_string(in_str, globals=globals)

                return old_from_string(*args, **kwargs)

        finally:
            # Undo the monkeypatch.
            Environment.from_string = from_string

        node = self._find_node(fname, config)

        save_ephemeral_nodes = dict(
            (k, v) for k, v in self.dbt_manifest.nodes.items()
            if v.config.materialized == "ephemeral"
            and not getattr(v, "compiled", False))
        with self.connection():
            node = self.dbt_compiler.compile_node(
                node=node,
                manifest=self.dbt_manifest,
            )

            Environment.from_string = old_from_string

            if hasattr(node, "injected_sql"):
                # If injected SQL is present, it contains a better picture
                # of what will actually hit the database (e.g. with tests).
                # However it's not always present.
                compiled_sql = node.injected_sql
            else:
                compiled_sql = node.compiled_sql

            if not compiled_sql:  # pragma: no cover
                raise SQLTemplaterError(
                    "dbt templater compilation failed silently, check your "
                    "configuration by running `dbt compile` directly.")

            with open(fname) as source_dbt_model:
                source_dbt_sql = source_dbt_model.read()

            n_trailing_newlines = len(source_dbt_sql) - len(
                source_dbt_sql.rstrip("\n"))

            templater_logger.debug(
                "    Trailing newline count in source dbt model: %r",
                n_trailing_newlines,
            )
            templater_logger.debug("    Raw SQL before compile: %r",
                                   source_dbt_sql)
            templater_logger.debug("    Node raw SQL: %r", node.raw_sql)
            templater_logger.debug("    Node compiled SQL: %r", compiled_sql)

            # When using dbt-templater, trailing newlines are ALWAYS REMOVED during
            # compiling. Unless fixed (like below), this will cause:
            #    1. L009 linting errors when running "sqlfluff lint foo_bar.sql"
            #       since the linter will use the compiled code with the newlines
            #       removed.
            #    2. "No newline at end of file" warnings in Git/GitHub since
            #       sqlfluff uses the compiled SQL to write fixes back to the
            #       source SQL in the dbt model.
            # The solution is:
            #    1. Check for trailing newlines before compiling by looking at the
            #       raw SQL in the source dbt file, store the count of trailing
            #       newlines.
            #    2. Append the count from #1 above to the node.raw_sql and
            #       compiled_sql objects, both of which have had the trailing
            #       newlines removed by the dbt-templater.
            node.raw_sql = node.raw_sql + "\n" * n_trailing_newlines
            compiled_sql = compiled_sql + "\n" * n_trailing_newlines

            raw_sliced, sliced_file, templated_sql = self.slice_file(
                source_dbt_sql,
                compiled_sql,
                config=config,
                make_template=make_template,
            )
        # :HACK: If calling compile_node() compiled any ephemeral nodes,
        # restore them to their earlier state. This prevents a runtime error
        # in the dbt "_inject_ctes_into_sql()" function that occurs with
        # 2nd-level ephemeral model dependencies (e.g. A -> B -> C, where
        # both B and C are ephemeral). Perhaps there is a better way to do
        # this, but this seems good enough for now.
        for k, v in save_ephemeral_nodes.items():
            if getattr(self.dbt_manifest.nodes[k], "compiled", False):
                self.dbt_manifest.nodes[k] = v

        if make_template and n_trailing_newlines:
            # Update templated_sql as we updated the other strings above. Update
            # sliced_file to reflect the mapping of the added character(s) back
            # to the raw SQL.
            templated_sql = templated_sql + "\n" * n_trailing_newlines
            sliced_file.append(
                TemplatedFileSlice(
                    slice_type="literal",
                    source_slice=slice(
                        len(source_dbt_sql) - n_trailing_newlines,
                        len(source_dbt_sql)),
                    templated_slice=slice(
                        len(templated_sql) - n_trailing_newlines,
                        len(templated_sql)),
                ))
        return (
            TemplatedFile(
                source_str=source_dbt_sql,
                templated_str=templated_sql,
                fname=fname,
                sliced_file=sliced_file,
                raw_sliced=raw_sliced,
            ),
            # No violations returned in this way.
            [],
        )