예제 #1
0
    def generate_test_segments_func(elems):
        """Roughly generate test segments.

        This function isn't totally robust, but good enough
        for testing. Use with caution.
        """
        buff = []
        raw_file = "".join(elems)
        templated_file = TemplatedFile.from_string(raw_file)
        idx = 0

        for elem in elems:
            if elem == "<indent>":
                buff.append(
                    Indent(pos_marker=PositionMarker.from_point(
                        idx, idx, templated_file)))
                continue
            elif elem == "<dedent>":
                buff.append(
                    Dedent(pos_marker=PositionMarker.from_point(
                        idx, idx, templated_file)))
                continue

            seg_kwargs = {}

            if set(elem) <= {" ", "\t"}:
                SegClass = WhitespaceSegment
            elif set(elem) <= {"\n"}:
                SegClass = NewlineSegment
            elif elem == "(":
                SegClass = SymbolSegment
                seg_kwargs = {"name": "bracket_open"}
            elif elem == ")":
                SegClass = SymbolSegment
                seg_kwargs = {"name": "bracket_close"}
            elif elem.startswith("--"):
                SegClass = CommentSegment
                seg_kwargs = {"name": "inline_comment"}
            elif elem.startswith('"'):
                SegClass = CodeSegment
                seg_kwargs = {"name": "double_quote"}
            elif elem.startswith("'"):
                SegClass = CodeSegment
                seg_kwargs = {"name": "single_quote"}
            else:
                SegClass = CodeSegment

            # Set a none position marker which we'll realign at the end.
            buff.append(
                SegClass(raw=elem,
                         pos_marker=PositionMarker(
                             slice(idx, idx + len(elem)),
                             slice(idx, idx + len(elem)),
                             templated_file,
                         ),
                         **seg_kwargs))
            idx += len(elem)

        return tuple(buff)
예제 #2
0
    def generate_test_segments_func(elems):
        """Roughly generate test segments.

        This function isn't totally robust, but good enough
        for testing. Use with caution.
        """
        buff = []
        raw_buff = ""
        for elem in elems:
            if elem == "<indent>":
                buff.append(Indent(FilePositionMarker().advance_by(raw_buff)))
                continue
            elif elem == "<dedent>":
                buff.append(Dedent(FilePositionMarker().advance_by(raw_buff)))
                continue

            if set(elem) <= {" ", "\t"}:
                cls = RawSegment.make(" ",
                                      name="whitespace",
                                      type="whitespace")
            elif set(elem) <= {"\n"}:
                cls = RawSegment.make("\n", name="newline", type="newline")
            elif elem == "(":
                cls = RawSegment.make("(", name="bracket_open", _is_code=True)
            elif elem == ")":
                cls = RawSegment.make(")", name="bracket_close", _is_code=True)
            elif elem.startswith("--"):
                cls = RawSegment.make("--", name="inline_comment")
            elif elem.startswith('"'):
                cls = RawSegment.make('"', name="double_quote", _is_code=True)
            elif elem.startswith("'"):
                cls = RawSegment.make("'", name="single_quote", _is_code=True)
            else:
                cls = RawSegment.make("", _is_code=True)

            buff.append(cls(elem, FilePositionMarker().advance_by(raw_buff)))
            raw_buff += elem
        return tuple(buff)  # Make sure we return a tuple
예제 #3
0
    def elements_to_segments(
            self, elements: List[TemplateElement],
            templated_file: TemplatedFile) -> Tuple[RawSegment, ...]:
        """Convert a tuple of lexed elements into a tuple of segments."""
        # Working buffer to build up segments
        segment_buffer: List[RawSegment] = []

        lexer_logger.info("Elements to Segments.")
        # Get the templated slices to re-insert tokens for them
        source_only_slices = templated_file.source_only_slices()
        lexer_logger.info("Source-only slices: %s", source_only_slices)

        # Now work out source slices, and add in template placeholders.
        for element in elements:
            # Calculate Source Slice
            source_slice = templated_file.templated_slice_to_source_slice(
                element.template_slice)
            # The calculated source slice will include any source only slices.
            # We should consider all of them in turn to see whether we can
            # insert them.
            for source_only_slice in source_only_slices:
                # If it's later in the source, stop looking. Any later
                # ones *also* won't match.
                if source_only_slice.source_idx > source_slice.start:
                    break
                # Is there a templated section within this source slice?
                # If there is then for some reason I can't quite explain,
                # it will always be at the start of the section. This is
                # very convenient beause it means we'll always have the
                # start and end of it in a definite position. This makes
                # slicing and looping much easier.
                elif source_only_slice.source_idx == source_slice.start:
                    lexer_logger.debug(
                        "Found templated section! %s, %s, %s",
                        source_only_slice.source_slice(),
                        source_only_slice.slice_type,
                        element.template_slice.start,
                    )
                    # Calculate a slice for any placeholders
                    placeholder_source_slice = slice(
                        source_slice.start, source_only_slice.end_source_idx())
                    # Adjust the source slice accordingly.
                    source_slice = slice(source_only_slice.end_source_idx(),
                                         source_slice.stop)

                    # TODO: Readjust this to remove .when once ProtoSegment is in.

                    # Add segments as appropriate.
                    # If it's a block end, add a dedent.
                    if source_only_slice.slice_type in ("block_end",
                                                        "block_mid"):
                        segment_buffer.append(
                            Dedent.when(template_blocks_indent=True)(
                                pos_marker=PositionMarker.from_point(
                                    placeholder_source_slice.start,
                                    element.template_slice.start,
                                    templated_file,
                                )))
                    # Always add a placeholder
                    segment_buffer.append(
                        TemplateSegment(
                            pos_marker=PositionMarker(
                                placeholder_source_slice,
                                slice(
                                    element.template_slice.start,
                                    element.template_slice.start,
                                ),
                                templated_file,
                            ),
                            source_str=source_only_slice.raw,
                            block_type=source_only_slice.slice_type,
                        ))
                    # If it's a block end, add a dedent.
                    if source_only_slice.slice_type in ("block_start",
                                                        "block_mid"):
                        segment_buffer.append(
                            Indent.when(template_blocks_indent=True)(
                                pos_marker=PositionMarker.from_point(
                                    placeholder_source_slice.stop,
                                    element.template_slice.start,
                                    templated_file,
                                )))

            # Add the actual segment
            segment_buffer.append(
                element.to_segment(pos_marker=PositionMarker(
                    source_slice,
                    element.template_slice,
                    templated_file,
                ), ))

        # Convert to tuple before return
        return tuple(segment_buffer)
예제 #4
0
파일: lexer.py 프로젝트: sreev/sqlfluff
    def enrich_segments(
        segment_buff: Tuple[BaseSegment, ...], templated_file: TemplatedFile
    ) -> Tuple[BaseSegment, ...]:
        """Enrich the segments using the templated file.

        We use the mapping in the template to provide positions
        in the source file.
        """
        # Make a new buffer to hold the enriched segments.
        # We need a new buffer to hold the new meta segments
        # introduced.
        new_segment_buff = []
        # Get the templated slices to re-insert tokens for them
        source_only_slices = templated_file.source_only_slices()

        lexer_logger.info(
            "Enriching Segments. Source-only slices: %s", source_only_slices
        )

        for segment in segment_buff:
            templated_slice = slice(
                segment.pos_marker.char_pos,
                segment.pos_marker.char_pos + len(segment.raw),
            )
            source_slice = templated_file.templated_slice_to_source_slice(
                templated_slice
            )

            # At this stage, templated slices will be INCLUDED in the source slice,
            # so we should consider whether we've captured any. If we have then
            # we need to re-evaluate whether it's a literal or not.

            for source_only_slice in source_only_slices:
                if source_only_slice.source_idx > source_slice.start:
                    break
                elif source_only_slice.source_idx == source_slice.start:
                    lexer_logger.debug(
                        "Found templated section! %s, %s, %s",
                        source_only_slice.source_slice(),
                        source_only_slice.slice_type,
                        templated_slice.start,
                    )
                    # Adjust the source slice accordingly.
                    source_slice = slice(
                        source_only_slice.end_source_idx(), source_slice.stop
                    )

                    # Add segments as appropriate.
                    # If it's a block end, add a dedent.
                    if source_only_slice.slice_type in ("block_end", "block_mid"):
                        new_segment_buff.append(
                            Dedent.when(template_blocks_indent=True)(
                                pos_marker=segment.pos_marker
                            )
                        )
                    # Always add a placeholder
                    new_segment_buff.append(
                        TemplateSegment(
                            pos_marker=segment.pos_marker,
                            source_str=source_only_slice.raw,
                            block_type=source_only_slice.slice_type,
                        )
                    )
                    # If it's a block end, add a dedent.
                    if source_only_slice.slice_type in ("block_start", "block_mid"):
                        new_segment_buff.append(
                            Indent.when(template_blocks_indent=True)(
                                pos_marker=segment.pos_marker
                            )
                        )

            source_line, source_pos = templated_file.get_line_pos_of_char_pos(
                source_slice.start
            )

            # Recalculate is_literal
            is_literal = templated_file.is_source_slice_literal(source_slice)

            segment.pos_marker = EnrichedFilePositionMarker(
                statement_index=segment.pos_marker.statement_index,
                line_no=segment.pos_marker.line_no,
                line_pos=segment.pos_marker.line_pos,
                char_pos=segment.pos_marker.char_pos,
                templated_slice=templated_slice,
                source_slice=source_slice,
                is_literal=is_literal,
                source_pos_marker=FilePositionMarker(
                    segment.pos_marker.statement_index,
                    source_line,
                    source_pos,
                    source_slice.start,
                ),
            )
            new_segment_buff.append(segment)

        lexer_logger.debug("Enriched Segments:")
        for seg in new_segment_buff:
            lexer_logger.debug(
                "\tTmp: %s\tSrc: %s\tSeg: %s",
                getattr(seg.pos_marker, "templated_slice", None),
                getattr(seg.pos_marker, "source_slice", None),
                seg,
            )

        return tuple(new_segment_buff)
예제 #5
0
    def elements_to_segments(
            self, elements: List[TemplateElement],
            templated_file: TemplatedFile) -> Tuple[RawSegment, ...]:
        """Convert a tuple of lexed elements into a tuple of segments."""
        # Working buffer to build up segments
        segment_buffer: List[RawSegment] = []

        lexer_logger.info("Elements to Segments.")
        # Get the templated slices to re-insert tokens for them
        source_only_slices = templated_file.source_only_slices()
        lexer_logger.info("Source-only slices: %s", source_only_slices)
        stash_source_slice, last_source_slice = None, None

        # Now work out source slices, and add in template placeholders.
        for idx, element in enumerate(elements):
            # Calculate Source Slice
            if idx != 0:
                last_source_slice = stash_source_slice
            source_slice = templated_file.templated_slice_to_source_slice(
                element.template_slice)
            stash_source_slice = source_slice
            # Output the slice as we lex.
            lexer_logger.debug(
                "  %s, %s, %s, %r",
                idx,
                element,
                source_slice,
                templated_file.templated_str[element.template_slice],
            )

            # The calculated source slice will include any source only slices.
            # We should consider all of them in turn to see whether we can
            # insert them.
            so_slices = []
            # Only look for source only slices if we've got a new source slice to
            # avoid unnecessary duplication.
            if last_source_slice != source_slice:
                for source_only_slice in source_only_slices:
                    # If it's later in the source, stop looking. Any later
                    # ones *also* won't match.
                    if source_only_slice.source_idx >= source_slice.stop:
                        break
                    elif source_only_slice.source_idx >= source_slice.start:
                        so_slices.append(source_only_slice)

            if so_slices:
                lexer_logger.debug("    Collected Source Only Slices")
                for so_slice in so_slices:
                    lexer_logger.debug("       %s", so_slice)

                # Calculate some things which will be useful
                templ_str = templated_file.templated_str[
                    element.template_slice]
                source_str = templated_file.source_str[source_slice]

                # For reasons which aren't entirely clear right now, if there is
                # an included literal, it will always be at the end. Let's see if it's
                # there.
                if source_str.endswith(templ_str):
                    existing_len = len(templ_str)
                else:
                    existing_len = 0

                # Calculate slices
                placeholder_slice = slice(source_slice.start,
                                          source_slice.stop - existing_len)
                placeholder_str = source_str[:-existing_len]
                source_slice = slice(source_slice.stop - existing_len,
                                     source_slice.stop)
                # If it doesn't manage to extract a placeholder string from the source
                # just concatenate the source only strings. There is almost always
                # only one of them.
                if not placeholder_str:
                    placeholder_str = "".join(s.raw for s in so_slices)
                lexer_logger.debug(
                    "    Overlap Length: %s. PS: %s, LS: %s, p_str: %r, templ_str: %r",
                    existing_len,
                    placeholder_slice,
                    source_slice,
                    placeholder_str,
                    templ_str,
                )

                # Caluculate potential indent/dedent
                block_slices = sum(
                    s.slice_type.startswith("block_") for s in so_slices)
                block_balance = sum(s.slice_type == "block_start"
                                    for s in so_slices) - sum(
                                        s.slice_type == "block_end"
                                        for s in so_slices)
                lead_dedent = so_slices[0].slice_type in ("block_end",
                                                          "block_mid")
                trail_indent = so_slices[-1].slice_type in ("block_start",
                                                            "block_mid")
                add_indents = self.config.get("template_blocks_indent",
                                              "indentation")
                lexer_logger.debug(
                    "    Block Slices: %s. Block Balance: %s. Lead: %s, Trail: %s, Add: %s",
                    block_slices,
                    block_balance,
                    lead_dedent,
                    trail_indent,
                    add_indents,
                )

                # Add a dedent if appropriate.
                if lead_dedent and add_indents:
                    lexer_logger.debug("      DEDENT")
                    segment_buffer.append(
                        Dedent(pos_marker=PositionMarker.from_point(
                            placeholder_slice.start,
                            element.template_slice.start,
                            templated_file,
                        )))

                # Always add a placeholder
                segment_buffer.append(
                    TemplateSegment(
                        pos_marker=PositionMarker(
                            placeholder_slice,
                            slice(
                                element.template_slice.start,
                                element.template_slice.start,
                            ),
                            templated_file,
                        ),
                        source_str=placeholder_str,
                        block_type=so_slices[0].slice_type
                        if len(so_slices) == 1 else "compound",
                    ))
                lexer_logger.debug("      Placholder: %s, %r",
                                   segment_buffer[-1], placeholder_str)

                # Add a dedent if appropriate.
                if trail_indent and add_indents:
                    lexer_logger.debug("      INDENT")
                    segment_buffer.append(
                        Indent(pos_marker=PositionMarker.from_point(
                            placeholder_slice.stop,
                            element.template_slice.start,
                            templated_file,
                        )))

            # Add the actual segment
            segment_buffer.append(
                element.to_segment(pos_marker=PositionMarker(
                    source_slice,
                    element.template_slice,
                    templated_file,
                ), ))

        # Convert to tuple before return
        return tuple(segment_buffer)
예제 #6
0
    def match(self, segments: Tuple["BaseSegment", ...],
              parse_context: ParseContext) -> MatchResult:
        """Match if this is a bracketed sequence, with content that matches one of the elements.

        1. work forwards to find the first bracket.
           If we find something other that whitespace, then fail out.
        2. Once we have the first bracket, we need to bracket count forward to find its partner.
        3. Assuming we find its partner then we try and match what goes between them
           using the match method of Sequence.
           If we match, great. If not, then we return an empty match.
           If we never find its partner then we return an empty match but should probably
           log a parsing warning, or error?

        """
        # Trim ends if allowed.
        if self.allow_gaps:
            pre_nc, seg_buff, post_nc = trim_non_code_segments(segments)
        else:
            seg_buff = segments

        # Rehydrate the bracket segments in question.
        start_bracket, end_bracket = self.get_bracket_from_dialect(
            parse_context)
        # Allow optional override for special bracket-like things
        start_bracket = self.start_bracket or start_bracket
        end_bracket = self.end_bracket or end_bracket

        # Look for the first bracket
        with parse_context.deeper_match() as ctx:
            start_match = start_bracket.match(seg_buff, parse_context=ctx)
        if start_match:
            seg_buff = start_match.unmatched_segments
        else:
            # Can't find the opening bracket. No Match.
            return MatchResult.from_unmatched(segments)

        # Look for the closing bracket
        content_segs, end_match, _ = self._bracket_sensitive_look_ahead_match(
            segments=seg_buff,
            matchers=[end_bracket],
            parse_context=parse_context,
            start_bracket=start_bracket,
            end_bracket=end_bracket,
            bracket_pairs_set=self.bracket_pairs_set,
        )
        if not end_match:
            raise SQLParseError(
                "Couldn't find closing bracket for opening bracket.",
                segment=start_match.matched_segments[0],
            )

        # Match the content now we've confirmed the brackets.

        # First deal with the case of TOTALLY EMPTY BRACKETS e.g. "()"
        if not content_segs:
            # If it's allowed, return a match.
            if not self._elements or all(e.is_optional()
                                         for e in self._elements):
                return MatchResult(
                    start_match.matched_segments + end_match.matched_segments,
                    end_match.unmatched_segments,
                )
            # If not, don't.
            else:
                return MatchResult.from_unmatched(segments)

        # Then trim whitespace and deal with the case of no code content e.g. "(   )"
        if self.allow_gaps:
            pre_nc, content_segs, post_nc = trim_non_code_segments(
                content_segs)
        else:
            pre_nc = ()
            post_nc = ()

        # If we don't have anything left after trimming, act accordingly.
        if not content_segs:
            if not self._elements or (all(e.is_optional()
                                          for e in self._elements)
                                      and self.allow_gaps):
                return MatchResult(
                    start_match.matched_segments + pre_nc + post_nc +
                    end_match.matched_segments,
                    end_match.unmatched_segments,
                )
            else:
                return MatchResult.from_unmatched(segments)

        # Match using super. Sequence will interpret the content of the elements.
        with parse_context.deeper_match() as ctx:
            content_match = super().match(content_segs, parse_context=ctx)

        # We require a complete match for the content (hopefully for obvious reasons)
        if content_match.is_complete():
            # Append some indent and dedent tokens at the start and the end.
            return MatchResult(
                # We need to realign the meta segments so the pos markers are correct.
                BaseSegment._position_segments(
                    (
                        # NB: The nc segments go *outside* the indents.
                        start_match.matched_segments +
                        (Indent(), )  # Add a meta indent here
                        + pre_nc + content_match.matched_segments + post_nc +
                        (Dedent(), )  # Add a meta indent here
                        + end_match.matched_segments), ),
                end_match.unmatched_segments,
            )
        # No complete match. Fail.
        else:
            return MatchResult.from_unmatched(segments)
예제 #7
0
    def generate_test_segments_func(elems):
        """Roughly generate test segments.

        This function isn't totally robust, but good enough
        for testing. Use with caution.
        """
        buff = []
        raw_file = "".join(elems)
        templated_file = TemplatedFile.from_string(raw_file)
        idx = 0

        for elem in elems:
            if elem == "<indent>":
                buff.append(
                    Indent(pos_marker=PositionMarker.from_point(
                        idx, idx, templated_file)))
                continue
            elif elem == "<dedent>":
                buff.append(
                    Dedent(pos_marker=PositionMarker.from_point(
                        idx, idx, templated_file)))
                continue

            if set(elem) <= {" ", "\t"}:
                cls = RawSegment.make(" ",
                                      name="whitespace",
                                      type="whitespace",
                                      _is_code=False)
            elif set(elem) <= {"\n"}:
                cls = RawSegment.make("\n",
                                      name="newline",
                                      type="newline",
                                      _is_code=False)
            elif elem == "(":
                cls = RawSegment.make("(", name="bracket_open")
            elif elem == ")":
                cls = RawSegment.make(")", name="bracket_close")
            elif elem.startswith("--"):
                cls = RawSegment.make("--",
                                      name="inline_comment",
                                      _is_code=False)
            elif elem.startswith('"'):
                cls = RawSegment.make('"', name="double_quote")
            elif elem.startswith("'"):
                cls = RawSegment.make("'", name="single_quote")
            else:
                cls = RawSegment.make("")

            # Set a none position marker which we'll realign at the end.
            buff.append(
                cls(
                    elem,
                    pos_marker=PositionMarker(
                        slice(idx, idx + len(elem)),
                        slice(idx, idx + len(elem)),
                        templated_file,
                    ),
                ))
            idx += len(elem)

        return tuple(buff)
예제 #8
0
    def elements_to_segments(
            self, elements: List[TemplateElement],
            templated_file: TemplatedFile) -> Tuple[RawSegment, ...]:
        """Convert a tuple of lexed elements into a tuple of segments."""
        # Working buffer to build up segments
        segment_buffer: List[RawSegment] = []

        lexer_logger.info("Elements to Segments.")
        # Get the templated slices to re-insert tokens for them
        source_only_slices = templated_file.source_only_slices()
        lexer_logger.info("Source-only slices: %s", source_only_slices)
        stash_source_slice, last_source_slice = None, None

        # Now work out source slices, and add in template placeholders.
        for idx, element in enumerate(elements):
            # Calculate Source Slice
            if idx != 0:
                last_source_slice = stash_source_slice
            source_slice = templated_file.templated_slice_to_source_slice(
                element.template_slice)
            stash_source_slice = source_slice
            # Output the slice as we lex.
            lexer_logger.debug(
                "  %s, %s, %s, %r",
                idx,
                element,
                source_slice,
                templated_file.templated_str[element.template_slice],
            )

            # Detect when we've gone backward in the source.
            # NOTE: If it's the _same_ slice then don't insert a marker
            # because we're probably just within a single templated
            # section.
            if (last_source_slice
                    and last_source_slice.stop > source_slice.start
                    and last_source_slice != source_slice):
                # If we have, insert a loop marker to reflect that.
                lexer_logger.debug(
                    "      Backward jump detected. Inserting Loop Marker")
                segment_buffer.append(
                    TemplateLoop(pos_marker=PositionMarker.from_point(
                        last_source_slice.stop,
                        element.template_slice.start,
                        templated_file,
                    )))

            # The calculated source slice will include any source only slices.
            # We should consider all of them in turn to see whether we can
            # insert them.
            so_slices = []
            # Only look for source only slices if we've got a new source slice to
            # avoid unnecessary duplication.
            if last_source_slice != source_slice:
                for source_only_slice in source_only_slices:
                    # If it's later in the source, stop looking. Any later
                    # ones *also* won't match.
                    if source_only_slice.source_idx >= source_slice.stop:
                        break
                    elif source_only_slice.source_idx >= source_slice.start:
                        so_slices.append(source_only_slice)

            if so_slices:
                lexer_logger.debug("    Collected Source Only Slices")
                for so_slice in so_slices:
                    lexer_logger.debug("       %s", so_slice)

                # Calculate some things which will be useful
                templ_str = templated_file.templated_str[
                    element.template_slice]
                source_str = templated_file.source_str[source_slice]

                # For reasons which aren't entirely clear right now, if there is
                # an included literal, it will always be at the end. Let's see if it's
                # there.
                if source_str.endswith(templ_str):
                    existing_len = len(templ_str)
                else:
                    existing_len = 0

                # Calculate slices
                placeholder_slice = slice(source_slice.start,
                                          source_slice.stop - existing_len)
                placeholder_str = source_str[:-existing_len]
                source_slice = slice(source_slice.stop - existing_len,
                                     source_slice.stop)
                # If it doesn't manage to extract a placeholder string from the source
                # just concatenate the source only strings. There is almost always
                # only one of them.
                if not placeholder_str:
                    placeholder_str = "".join(s.raw for s in so_slices)
                # The Jinja templater sometimes returns source-only slices with
                # gaps between. For example, in this section:
                #
                #   {% else %}
                #   JOIN
                #       {{action}}_raw_effect_sizes
                #   USING
                #       ({{ states }})
                #   {% endif %}
                #
                # we might get {% else %} and {% endif %} slices, without the
                # 4 lines between. This indicates those lines were not executed
                # In this case, generate a placeholder where the skipped code is
                # omitted but noted with a brief string, e.g.:
                #
                # "{% else %}... [103 unused template characters] ...{% endif %}".
                #
                # This is more readable -- it would be REALLY confusing for a
                # placeholder to include code that wasn't even executed!!
                if len(so_slices) >= 2:
                    has_gap = False
                    gap_placeholder_parts = []
                    last_slice = None
                    # For each slice...
                    for so_slice in so_slices:
                        # If it's not the first slice, was there a gap?
                        if last_slice:
                            end_last = last_slice.source_idx + len(
                                last_slice.raw)
                            chars_skipped = so_slice.source_idx - end_last
                            if chars_skipped:
                                # Yes, gap between last_slice and so_slice.
                                has_gap = True

                                # Generate a string documenting the gap.
                                if chars_skipped >= 10:
                                    gap_placeholder_parts.append(
                                        f"... [{chars_skipped} unused template "
                                        "characters] ...")
                                else:
                                    gap_placeholder_parts.append("...")
                        # Now add the slice's source.
                        gap_placeholder_parts.append(so_slice.raw)
                        last_slice = so_slice
                    if has_gap:
                        placeholder_str = "".join(gap_placeholder_parts)
                lexer_logger.debug(
                    "    Overlap Length: %s. PS: %s, LS: %s, p_str: %r, templ_str: %r",
                    existing_len,
                    placeholder_slice,
                    source_slice,
                    placeholder_str,
                    templ_str,
                )

                # Calculate potential indent/dedent
                block_slices = sum(
                    s.slice_type.startswith("block_") for s in so_slices)
                block_balance = sum(s.slice_type == "block_start"
                                    for s in so_slices) - sum(
                                        s.slice_type == "block_end"
                                        for s in so_slices)
                lead_dedent = so_slices[0].slice_type in ("block_end",
                                                          "block_mid")
                trail_indent = so_slices[-1].slice_type in ("block_start",
                                                            "block_mid")
                add_indents = self.config.get("template_blocks_indent",
                                              "indentation")
                lexer_logger.debug(
                    "    Block Slices: %s. Block Balance: %s. Lead: %s, Trail: %s, "
                    "Add: %s",
                    block_slices,
                    block_balance,
                    lead_dedent,
                    trail_indent,
                    add_indents,
                )

                # Add a dedent if appropriate.
                if lead_dedent and add_indents:
                    lexer_logger.debug("      DEDENT")
                    segment_buffer.append(
                        Dedent(pos_marker=PositionMarker.from_point(
                            placeholder_slice.start,
                            element.template_slice.start,
                            templated_file,
                        )))

                # Always add a placeholder
                segment_buffer.append(
                    TemplateSegment(
                        pos_marker=PositionMarker(
                            placeholder_slice,
                            slice(
                                element.template_slice.start,
                                element.template_slice.start,
                            ),
                            templated_file,
                        ),
                        source_str=placeholder_str,
                        block_type=so_slices[0].slice_type
                        if len(so_slices) == 1 else "compound",
                    ))
                lexer_logger.debug("      Placeholder: %s, %r",
                                   segment_buffer[-1], placeholder_str)

                # Add an indent if appropriate.
                if trail_indent and add_indents:
                    lexer_logger.debug("      INDENT")
                    segment_buffer.append(
                        Indent(
                            is_template=True,
                            pos_marker=PositionMarker.from_point(
                                placeholder_slice.stop,
                                element.template_slice.start,
                                templated_file,
                            ),
                        ))

            # Add the actual segment
            segment_buffer.append(
                element.to_segment(pos_marker=PositionMarker(
                    source_slice,
                    element.template_slice,
                    templated_file,
                ), ))

            # Generate placeholders for any source-only slices that *follow*
            # the last element. This happens, for example, if a Jinja templated
            # file ends with "{% endif %}", and there's no trailing newline.
            if idx == len(elements) - 1:
                so_slices = [
                    so for so in source_only_slices
                    if so.source_idx >= source_slice.stop
                ]
                for so_slice in so_slices:
                    segment_buffer.append(
                        TemplateSegment(
                            pos_marker=PositionMarker(
                                slice(so_slice.source_idx,
                                      so_slice.end_source_idx()),
                                slice(
                                    element.template_slice.stop,
                                    element.template_slice.stop,
                                ),
                                templated_file,
                            ),
                            source_str=so_slice.raw,
                            block_type=so_slice.slice_type,
                        ))

        # Add an end of file marker
        segment_buffer.append(
            EndOfFile(pos_marker=segment_buffer[-1].pos_marker.
                      end_point_marker() if segment_buffer else PositionMarker.
                      from_point(0, 0, templated_file)))

        # Convert to tuple before return
        return tuple(segment_buffer)
예제 #9
0
파일: sequence.py 프로젝트: sti0/sqlfluff
    def match(self, segments: Tuple["BaseSegment", ...],
              parse_context: ParseContext) -> MatchResult:
        """Match if a bracketed sequence, with content that matches one of the elements.

        1. work forwards to find the first bracket.
           If we find something other that whitespace, then fail out.
        2. Once we have the first bracket, we need to bracket count forward to find its
           partner.
        3. Assuming we find its partner then we try and match what goes between them
           using the match method of Sequence.
           If we match, great. If not, then we return an empty match.
           If we never find its partner then we return an empty match but should
           probably log a parsing warning, or error?

        """
        # Trim ends if allowed.
        if self.allow_gaps:
            pre_nc, seg_buff, post_nc = trim_non_code_segments(segments)
        else:
            seg_buff = segments  # pragma: no cover TODO?

        # Rehydrate the bracket segments in question.
        # bracket_persits controls whether we make a BracketedSegment or not.
        start_bracket, end_bracket, bracket_persists = self.get_bracket_from_dialect(
            parse_context)
        # Allow optional override for special bracket-like things
        start_bracket = self.start_bracket or start_bracket
        end_bracket = self.end_bracket or end_bracket

        # Are we dealing with a pre-existing BracketSegment?
        if seg_buff[0].is_type("bracketed"):
            seg: BracketedSegment = cast(BracketedSegment, seg_buff[0])
            content_segs = seg.segments[len(seg.start_bracket
                                            ):-len(seg.end_bracket)]
            bracket_segment = seg
            trailing_segments = seg_buff[1:]
        # Otherwise try and match the segments directly.
        else:
            # Look for the first bracket
            with parse_context.deeper_match() as ctx:
                start_match = start_bracket.match(seg_buff, parse_context=ctx)
            if start_match:
                seg_buff = start_match.unmatched_segments
            else:
                # Can't find the opening bracket. No Match.
                return MatchResult.from_unmatched(segments)

            # Look for the closing bracket
            content_segs, end_match, _ = self._bracket_sensitive_look_ahead_match(
                segments=seg_buff,
                matchers=[end_bracket],
                parse_context=parse_context,
                start_bracket=start_bracket,
                end_bracket=end_bracket,
                bracket_pairs_set=self.bracket_pairs_set,
            )
            if not end_match:  # pragma: no cover
                raise SQLParseError(
                    "Couldn't find closing bracket for opening bracket.",
                    segment=start_match.matched_segments[0],
                )

            # Construct a bracket segment
            bracket_segment = BracketedSegment(
                segments=(start_match.matched_segments + content_segs +
                          end_match.matched_segments),
                start_bracket=start_match.matched_segments,
                end_bracket=end_match.matched_segments,
            )
            trailing_segments = end_match.unmatched_segments

        # Then trim whitespace and deal with the case of non-code content e.g. "(   )"
        if self.allow_gaps:
            pre_segs, content_segs, post_segs = trim_non_code_segments(
                content_segs)
        else:  # pragma: no cover TODO?
            pre_segs = ()
            post_segs = ()

        # If we've got a case of empty brackets check whether that is allowed.
        if not content_segs:
            if not self._elements or (all(e.is_optional()
                                          for e in self._elements) and
                                      (self.allow_gaps or
                                       (not pre_segs and not post_segs))):
                return MatchResult(
                    (bracket_segment, )
                    if bracket_persists else bracket_segment.segments,
                    trailing_segments,
                )
            else:
                return MatchResult.from_unmatched(segments)

        # Match the content using super. Sequence will interpret the content of the
        # elements.
        with parse_context.deeper_match() as ctx:
            content_match = super().match(content_segs, parse_context=ctx)

        # We require a complete match for the content (hopefully for obvious reasons)
        if content_match.is_complete():
            # Reconstruct the bracket segment post match.
            # We need to realign the meta segments so the pos markers are correct.
            # Have we already got indents?
            meta_idx = None
            for idx, seg in enumerate(bracket_segment.segments):
                if (seg.is_meta and cast(MetaSegment, seg).indent_val > 0
                        and not cast(MetaSegment, seg).is_template):
                    meta_idx = idx
                    break
            # If we've already got indents, don't add more.
            if meta_idx:
                bracket_segment.segments = BaseSegment._position_segments(
                    bracket_segment.start_bracket + pre_segs +
                    content_match.all_segments() + post_segs +
                    bracket_segment.end_bracket)
            # Append some indent and dedent tokens at the start and the end.
            else:
                bracket_segment.segments = BaseSegment._position_segments(
                    # NB: The nc segments go *outside* the indents.
                    bracket_segment.start_bracket +
                    (Indent(), )  # Add a meta indent here
                    + pre_segs + content_match.all_segments() + post_segs +
                    (Dedent(), )  # Add a meta indent here
                    + bracket_segment.end_bracket)
            return MatchResult(
                (bracket_segment, )
                if bracket_persists else bracket_segment.segments,
                trailing_segments,
            )
        # No complete match. Fail.
        else:
            return MatchResult.from_unmatched(segments)