コード例 #1
0
ファイル: conftest.py プロジェクト: zhongjiajie/sqlfluff
    def generate_test_segments_func(elems):
        """Roughly generate test segments.

        This function isn't totally robust, but good enough
        for testing. Use with caution.
        """
        buff = []
        raw_file = "".join(elems)
        templated_file = TemplatedFile.from_string(raw_file)
        idx = 0

        for elem in elems:
            if elem == "<indent>":
                buff.append(
                    Indent(pos_marker=PositionMarker.from_point(
                        idx, idx, templated_file)))
                continue
            elif elem == "<dedent>":
                buff.append(
                    Dedent(pos_marker=PositionMarker.from_point(
                        idx, idx, templated_file)))
                continue

            seg_kwargs = {}

            if set(elem) <= {" ", "\t"}:
                SegClass = WhitespaceSegment
            elif set(elem) <= {"\n"}:
                SegClass = NewlineSegment
            elif elem == "(":
                SegClass = SymbolSegment
                seg_kwargs = {"name": "bracket_open"}
            elif elem == ")":
                SegClass = SymbolSegment
                seg_kwargs = {"name": "bracket_close"}
            elif elem.startswith("--"):
                SegClass = CommentSegment
                seg_kwargs = {"name": "inline_comment"}
            elif elem.startswith('"'):
                SegClass = CodeSegment
                seg_kwargs = {"name": "double_quote"}
            elif elem.startswith("'"):
                SegClass = CodeSegment
                seg_kwargs = {"name": "single_quote"}
            else:
                SegClass = CodeSegment

            # Set a none position marker which we'll realign at the end.
            buff.append(
                SegClass(raw=elem,
                         pos_marker=PositionMarker(
                             slice(idx, idx + len(elem)),
                             slice(idx, idx + len(elem)),
                             templated_file,
                         ),
                         **seg_kwargs))
            idx += len(elem)

        return tuple(buff)
コード例 #2
0
    def elements_to_segments(
            self, elements: List[TemplateElement],
            templated_file: TemplatedFile) -> Tuple[RawSegment, ...]:
        """Convert a tuple of lexed elements into a tuple of segments."""
        # Working buffer to build up segments
        segment_buffer: List[RawSegment] = []

        lexer_logger.info("Elements to Segments.")
        # Get the templated slices to re-insert tokens for them
        source_only_slices = templated_file.source_only_slices()
        lexer_logger.info("Source-only slices: %s", source_only_slices)

        # Now work out source slices, and add in template placeholders.
        for element in elements:
            # Calculate Source Slice
            source_slice = templated_file.templated_slice_to_source_slice(
                element.template_slice)
            # The calculated source slice will include any source only slices.
            # We should consider all of them in turn to see whether we can
            # insert them.
            for source_only_slice in source_only_slices:
                # If it's later in the source, stop looking. Any later
                # ones *also* won't match.
                if source_only_slice.source_idx > source_slice.start:
                    break
                # Is there a templated section within this source slice?
                # If there is then for some reason I can't quite explain,
                # it will always be at the start of the section. This is
                # very convenient beause it means we'll always have the
                # start and end of it in a definite position. This makes
                # slicing and looping much easier.
                elif source_only_slice.source_idx == source_slice.start:
                    lexer_logger.debug(
                        "Found templated section! %s, %s, %s",
                        source_only_slice.source_slice(),
                        source_only_slice.slice_type,
                        element.template_slice.start,
                    )
                    # Calculate a slice for any placeholders
                    placeholder_source_slice = slice(
                        source_slice.start, source_only_slice.end_source_idx())
                    # Adjust the source slice accordingly.
                    source_slice = slice(source_only_slice.end_source_idx(),
                                         source_slice.stop)

                    # TODO: Readjust this to remove .when once ProtoSegment is in.

                    # Add segments as appropriate.
                    # If it's a block end, add a dedent.
                    if source_only_slice.slice_type in ("block_end",
                                                        "block_mid"):
                        segment_buffer.append(
                            Dedent.when(template_blocks_indent=True)(
                                pos_marker=PositionMarker.from_point(
                                    placeholder_source_slice.start,
                                    element.template_slice.start,
                                    templated_file,
                                )))
                    # Always add a placeholder
                    segment_buffer.append(
                        TemplateSegment(
                            pos_marker=PositionMarker(
                                placeholder_source_slice,
                                slice(
                                    element.template_slice.start,
                                    element.template_slice.start,
                                ),
                                templated_file,
                            ),
                            source_str=source_only_slice.raw,
                            block_type=source_only_slice.slice_type,
                        ))
                    # If it's a block end, add a dedent.
                    if source_only_slice.slice_type in ("block_start",
                                                        "block_mid"):
                        segment_buffer.append(
                            Indent.when(template_blocks_indent=True)(
                                pos_marker=PositionMarker.from_point(
                                    placeholder_source_slice.stop,
                                    element.template_slice.start,
                                    templated_file,
                                )))

            # Add the actual segment
            segment_buffer.append(
                element.to_segment(pos_marker=PositionMarker(
                    source_slice,
                    element.template_slice,
                    templated_file,
                ), ))

        # Convert to tuple before return
        return tuple(segment_buffer)
コード例 #3
0
    def elements_to_segments(
            self, elements: List[TemplateElement],
            templated_file: TemplatedFile) -> Tuple[RawSegment, ...]:
        """Convert a tuple of lexed elements into a tuple of segments."""
        # Working buffer to build up segments
        segment_buffer: List[RawSegment] = []

        lexer_logger.info("Elements to Segments.")
        # Get the templated slices to re-insert tokens for them
        source_only_slices = templated_file.source_only_slices()
        lexer_logger.info("Source-only slices: %s", source_only_slices)
        stash_source_slice, last_source_slice = None, None

        # Now work out source slices, and add in template placeholders.
        for idx, element in enumerate(elements):
            # Calculate Source Slice
            if idx != 0:
                last_source_slice = stash_source_slice
            source_slice = templated_file.templated_slice_to_source_slice(
                element.template_slice)
            stash_source_slice = source_slice
            # Output the slice as we lex.
            lexer_logger.debug(
                "  %s, %s, %s, %r",
                idx,
                element,
                source_slice,
                templated_file.templated_str[element.template_slice],
            )

            # The calculated source slice will include any source only slices.
            # We should consider all of them in turn to see whether we can
            # insert them.
            so_slices = []
            # Only look for source only slices if we've got a new source slice to
            # avoid unnecessary duplication.
            if last_source_slice != source_slice:
                for source_only_slice in source_only_slices:
                    # If it's later in the source, stop looking. Any later
                    # ones *also* won't match.
                    if source_only_slice.source_idx >= source_slice.stop:
                        break
                    elif source_only_slice.source_idx >= source_slice.start:
                        so_slices.append(source_only_slice)

            if so_slices:
                lexer_logger.debug("    Collected Source Only Slices")
                for so_slice in so_slices:
                    lexer_logger.debug("       %s", so_slice)

                # Calculate some things which will be useful
                templ_str = templated_file.templated_str[
                    element.template_slice]
                source_str = templated_file.source_str[source_slice]

                # For reasons which aren't entirely clear right now, if there is
                # an included literal, it will always be at the end. Let's see if it's
                # there.
                if source_str.endswith(templ_str):
                    existing_len = len(templ_str)
                else:
                    existing_len = 0

                # Calculate slices
                placeholder_slice = slice(source_slice.start,
                                          source_slice.stop - existing_len)
                placeholder_str = source_str[:-existing_len]
                source_slice = slice(source_slice.stop - existing_len,
                                     source_slice.stop)
                # If it doesn't manage to extract a placeholder string from the source
                # just concatenate the source only strings. There is almost always
                # only one of them.
                if not placeholder_str:
                    placeholder_str = "".join(s.raw for s in so_slices)
                lexer_logger.debug(
                    "    Overlap Length: %s. PS: %s, LS: %s, p_str: %r, templ_str: %r",
                    existing_len,
                    placeholder_slice,
                    source_slice,
                    placeholder_str,
                    templ_str,
                )

                # Caluculate potential indent/dedent
                block_slices = sum(
                    s.slice_type.startswith("block_") for s in so_slices)
                block_balance = sum(s.slice_type == "block_start"
                                    for s in so_slices) - sum(
                                        s.slice_type == "block_end"
                                        for s in so_slices)
                lead_dedent = so_slices[0].slice_type in ("block_end",
                                                          "block_mid")
                trail_indent = so_slices[-1].slice_type in ("block_start",
                                                            "block_mid")
                add_indents = self.config.get("template_blocks_indent",
                                              "indentation")
                lexer_logger.debug(
                    "    Block Slices: %s. Block Balance: %s. Lead: %s, Trail: %s, Add: %s",
                    block_slices,
                    block_balance,
                    lead_dedent,
                    trail_indent,
                    add_indents,
                )

                # Add a dedent if appropriate.
                if lead_dedent and add_indents:
                    lexer_logger.debug("      DEDENT")
                    segment_buffer.append(
                        Dedent(pos_marker=PositionMarker.from_point(
                            placeholder_slice.start,
                            element.template_slice.start,
                            templated_file,
                        )))

                # Always add a placeholder
                segment_buffer.append(
                    TemplateSegment(
                        pos_marker=PositionMarker(
                            placeholder_slice,
                            slice(
                                element.template_slice.start,
                                element.template_slice.start,
                            ),
                            templated_file,
                        ),
                        source_str=placeholder_str,
                        block_type=so_slices[0].slice_type
                        if len(so_slices) == 1 else "compound",
                    ))
                lexer_logger.debug("      Placholder: %s, %r",
                                   segment_buffer[-1], placeholder_str)

                # Add a dedent if appropriate.
                if trail_indent and add_indents:
                    lexer_logger.debug("      INDENT")
                    segment_buffer.append(
                        Indent(pos_marker=PositionMarker.from_point(
                            placeholder_slice.stop,
                            element.template_slice.start,
                            templated_file,
                        )))

            # Add the actual segment
            segment_buffer.append(
                element.to_segment(pos_marker=PositionMarker(
                    source_slice,
                    element.template_slice,
                    templated_file,
                ), ))

        # Convert to tuple before return
        return tuple(segment_buffer)
コード例 #4
0
ファイル: conftest.py プロジェクト: yaegassy/sqlfluff
    def generate_test_segments_func(elems):
        """Roughly generate test segments.

        This function isn't totally robust, but good enough
        for testing. Use with caution.
        """
        buff = []
        raw_file = "".join(elems)
        templated_file = TemplatedFile.from_string(raw_file)
        idx = 0

        for elem in elems:
            if elem == "<indent>":
                buff.append(
                    Indent(pos_marker=PositionMarker.from_point(
                        idx, idx, templated_file)))
                continue
            elif elem == "<dedent>":
                buff.append(
                    Dedent(pos_marker=PositionMarker.from_point(
                        idx, idx, templated_file)))
                continue

            if set(elem) <= {" ", "\t"}:
                cls = RawSegment.make(" ",
                                      name="whitespace",
                                      type="whitespace",
                                      _is_code=False)
            elif set(elem) <= {"\n"}:
                cls = RawSegment.make("\n",
                                      name="newline",
                                      type="newline",
                                      _is_code=False)
            elif elem == "(":
                cls = RawSegment.make("(", name="bracket_open")
            elif elem == ")":
                cls = RawSegment.make(")", name="bracket_close")
            elif elem.startswith("--"):
                cls = RawSegment.make("--",
                                      name="inline_comment",
                                      _is_code=False)
            elif elem.startswith('"'):
                cls = RawSegment.make('"', name="double_quote")
            elif elem.startswith("'"):
                cls = RawSegment.make("'", name="single_quote")
            else:
                cls = RawSegment.make("")

            # Set a none position marker which we'll realign at the end.
            buff.append(
                cls(
                    elem,
                    pos_marker=PositionMarker(
                        slice(idx, idx + len(elem)),
                        slice(idx, idx + len(elem)),
                        templated_file,
                    ),
                ))
            idx += len(elem)

        return tuple(buff)
コード例 #5
0
    def elements_to_segments(
            self, elements: List[TemplateElement],
            templated_file: TemplatedFile) -> Tuple[RawSegment, ...]:
        """Convert a tuple of lexed elements into a tuple of segments."""
        # Working buffer to build up segments
        segment_buffer: List[RawSegment] = []

        lexer_logger.info("Elements to Segments.")
        # Get the templated slices to re-insert tokens for them
        source_only_slices = templated_file.source_only_slices()
        lexer_logger.info("Source-only slices: %s", source_only_slices)
        stash_source_slice, last_source_slice = None, None

        # Now work out source slices, and add in template placeholders.
        for idx, element in enumerate(elements):
            # Calculate Source Slice
            if idx != 0:
                last_source_slice = stash_source_slice
            source_slice = templated_file.templated_slice_to_source_slice(
                element.template_slice)
            stash_source_slice = source_slice
            # Output the slice as we lex.
            lexer_logger.debug(
                "  %s, %s, %s, %r",
                idx,
                element,
                source_slice,
                templated_file.templated_str[element.template_slice],
            )

            # Detect when we've gone backward in the source.
            # NOTE: If it's the _same_ slice then don't insert a marker
            # because we're probably just within a single templated
            # section.
            if (last_source_slice
                    and last_source_slice.stop > source_slice.start
                    and last_source_slice != source_slice):
                # If we have, insert a loop marker to reflect that.
                lexer_logger.debug(
                    "      Backward jump detected. Inserting Loop Marker")
                segment_buffer.append(
                    TemplateLoop(pos_marker=PositionMarker.from_point(
                        last_source_slice.stop,
                        element.template_slice.start,
                        templated_file,
                    )))

            # The calculated source slice will include any source only slices.
            # We should consider all of them in turn to see whether we can
            # insert them.
            so_slices = []
            # Only look for source only slices if we've got a new source slice to
            # avoid unnecessary duplication.
            if last_source_slice != source_slice:
                for source_only_slice in source_only_slices:
                    # If it's later in the source, stop looking. Any later
                    # ones *also* won't match.
                    if source_only_slice.source_idx >= source_slice.stop:
                        break
                    elif source_only_slice.source_idx >= source_slice.start:
                        so_slices.append(source_only_slice)

            if so_slices:
                lexer_logger.debug("    Collected Source Only Slices")
                for so_slice in so_slices:
                    lexer_logger.debug("       %s", so_slice)

                # Calculate some things which will be useful
                templ_str = templated_file.templated_str[
                    element.template_slice]
                source_str = templated_file.source_str[source_slice]

                # For reasons which aren't entirely clear right now, if there is
                # an included literal, it will always be at the end. Let's see if it's
                # there.
                if source_str.endswith(templ_str):
                    existing_len = len(templ_str)
                else:
                    existing_len = 0

                # Calculate slices
                placeholder_slice = slice(source_slice.start,
                                          source_slice.stop - existing_len)
                placeholder_str = source_str[:-existing_len]
                source_slice = slice(source_slice.stop - existing_len,
                                     source_slice.stop)
                # If it doesn't manage to extract a placeholder string from the source
                # just concatenate the source only strings. There is almost always
                # only one of them.
                if not placeholder_str:
                    placeholder_str = "".join(s.raw for s in so_slices)
                # The Jinja templater sometimes returns source-only slices with
                # gaps between. For example, in this section:
                #
                #   {% else %}
                #   JOIN
                #       {{action}}_raw_effect_sizes
                #   USING
                #       ({{ states }})
                #   {% endif %}
                #
                # we might get {% else %} and {% endif %} slices, without the
                # 4 lines between. This indicates those lines were not executed
                # In this case, generate a placeholder where the skipped code is
                # omitted but noted with a brief string, e.g.:
                #
                # "{% else %}... [103 unused template characters] ...{% endif %}".
                #
                # This is more readable -- it would be REALLY confusing for a
                # placeholder to include code that wasn't even executed!!
                if len(so_slices) >= 2:
                    has_gap = False
                    gap_placeholder_parts = []
                    last_slice = None
                    # For each slice...
                    for so_slice in so_slices:
                        # If it's not the first slice, was there a gap?
                        if last_slice:
                            end_last = last_slice.source_idx + len(
                                last_slice.raw)
                            chars_skipped = so_slice.source_idx - end_last
                            if chars_skipped:
                                # Yes, gap between last_slice and so_slice.
                                has_gap = True

                                # Generate a string documenting the gap.
                                if chars_skipped >= 10:
                                    gap_placeholder_parts.append(
                                        f"... [{chars_skipped} unused template "
                                        "characters] ...")
                                else:
                                    gap_placeholder_parts.append("...")
                        # Now add the slice's source.
                        gap_placeholder_parts.append(so_slice.raw)
                        last_slice = so_slice
                    if has_gap:
                        placeholder_str = "".join(gap_placeholder_parts)
                lexer_logger.debug(
                    "    Overlap Length: %s. PS: %s, LS: %s, p_str: %r, templ_str: %r",
                    existing_len,
                    placeholder_slice,
                    source_slice,
                    placeholder_str,
                    templ_str,
                )

                # Calculate potential indent/dedent
                block_slices = sum(
                    s.slice_type.startswith("block_") for s in so_slices)
                block_balance = sum(s.slice_type == "block_start"
                                    for s in so_slices) - sum(
                                        s.slice_type == "block_end"
                                        for s in so_slices)
                lead_dedent = so_slices[0].slice_type in ("block_end",
                                                          "block_mid")
                trail_indent = so_slices[-1].slice_type in ("block_start",
                                                            "block_mid")
                add_indents = self.config.get("template_blocks_indent",
                                              "indentation")
                lexer_logger.debug(
                    "    Block Slices: %s. Block Balance: %s. Lead: %s, Trail: %s, "
                    "Add: %s",
                    block_slices,
                    block_balance,
                    lead_dedent,
                    trail_indent,
                    add_indents,
                )

                # Add a dedent if appropriate.
                if lead_dedent and add_indents:
                    lexer_logger.debug("      DEDENT")
                    segment_buffer.append(
                        Dedent(pos_marker=PositionMarker.from_point(
                            placeholder_slice.start,
                            element.template_slice.start,
                            templated_file,
                        )))

                # Always add a placeholder
                segment_buffer.append(
                    TemplateSegment(
                        pos_marker=PositionMarker(
                            placeholder_slice,
                            slice(
                                element.template_slice.start,
                                element.template_slice.start,
                            ),
                            templated_file,
                        ),
                        source_str=placeholder_str,
                        block_type=so_slices[0].slice_type
                        if len(so_slices) == 1 else "compound",
                    ))
                lexer_logger.debug("      Placeholder: %s, %r",
                                   segment_buffer[-1], placeholder_str)

                # Add an indent if appropriate.
                if trail_indent and add_indents:
                    lexer_logger.debug("      INDENT")
                    segment_buffer.append(
                        Indent(
                            is_template=True,
                            pos_marker=PositionMarker.from_point(
                                placeholder_slice.stop,
                                element.template_slice.start,
                                templated_file,
                            ),
                        ))

            # Add the actual segment
            segment_buffer.append(
                element.to_segment(pos_marker=PositionMarker(
                    source_slice,
                    element.template_slice,
                    templated_file,
                ), ))

            # Generate placeholders for any source-only slices that *follow*
            # the last element. This happens, for example, if a Jinja templated
            # file ends with "{% endif %}", and there's no trailing newline.
            if idx == len(elements) - 1:
                so_slices = [
                    so for so in source_only_slices
                    if so.source_idx >= source_slice.stop
                ]
                for so_slice in so_slices:
                    segment_buffer.append(
                        TemplateSegment(
                            pos_marker=PositionMarker(
                                slice(so_slice.source_idx,
                                      so_slice.end_source_idx()),
                                slice(
                                    element.template_slice.stop,
                                    element.template_slice.stop,
                                ),
                                templated_file,
                            ),
                            source_str=so_slice.raw,
                            block_type=so_slice.slice_type,
                        ))

        # Add an end of file marker
        segment_buffer.append(
            EndOfFile(pos_marker=segment_buffer[-1].pos_marker.
                      end_point_marker() if segment_buffer else PositionMarker.
                      from_point(0, 0, templated_file)))

        # Convert to tuple before return
        return tuple(segment_buffer)