Ejemplo n.º 1
0
def test__parser__grammar__base__bracket_sensitive_look_ahead_match(
    bracket_seg_list, fresh_ansi_dialect
):
    """Test the _bracket_sensitive_look_ahead_match method of the BaseGrammar."""
    fs = KeywordSegment.make("foo")
    bs = KeywordSegment.make("bar")
    # We need a dialect here to do bracket matching
    with RootParseContext(dialect=fresh_ansi_dialect) as ctx:
        # Basic version, we should find bar first
        pre_section, match, matcher = BaseGrammar._bracket_sensitive_look_ahead_match(
            bracket_seg_list, [fs, bs], ctx
        )
        assert pre_section == ()
        assert matcher == bs
        # NB the middle element is a match object
        assert match.matched_segments == (bs("bar", bracket_seg_list[0].pos_marker),)

        # Look ahead for foo, we should find the one AFTER the brackets, not the
        # on IN the brackets.
        pre_section, match, matcher = BaseGrammar._bracket_sensitive_look_ahead_match(
            bracket_seg_list, [fs], ctx
        )
        # NB: The bracket segments will have been mutated, so we can't directly compare
        assert len(pre_section) == 8
        assert matcher == fs
        # We shouldn't match the whitespace with the keyword
        assert match.matched_segments == (fs("foo", bracket_seg_list[8].pos_marker),)
Ejemplo n.º 2
0
def test__parser__grammar_oneof_take_longest_match(seg_list):
    """Test that the OneOf grammar takes the longest match."""
    fooRegex = ReSegment.make(r"fo{2}")
    baar = KeywordSegment.make(
        "baar",
    )
    foo = KeywordSegment.make(
        "foo",
    )
    fooBaar = Sequence(
        foo,
        baar,
    )

    # Even if fooRegex comes first, fooBaar
    # is a longer match and should be taken
    g = OneOf(fooRegex, fooBaar)
    with RootParseContext(dialect=None) as ctx:
        assert fooRegex.match(seg_list[2:], parse_context=ctx).matched_segments == (
            fooRegex("foo", seg_list[2].pos_marker),
        )
        assert g.match(seg_list[2:], parse_context=ctx).matched_segments == (
            foo("foo", seg_list[2].pos_marker),
            baar("baar", seg_list[3].pos_marker),
        )
Ejemplo n.º 3
0
def test__parser__grammar_oneof_exclude(seg_list):
    """Test the OneOf grammar exclude option."""
    fs = KeywordSegment.make("foo")
    bs = KeywordSegment.make("bar")
    g = OneOf(bs, exclude=Sequence(bs, fs))
    with RootParseContext(dialect=None) as ctx:
        # Just against the first alone
        assert g.match(seg_list[:1], parse_context=ctx)
        # Now with the bit to exclude included
        assert not g.match(seg_list, parse_context=ctx)
Ejemplo n.º 4
0
def test__parser__grammar_startswith_b(
    include_terminator, match_length, seg_list, fresh_ansi_dialect, caplog
):
    """Test the StartsWith grammar with a terminator (included & exluded)."""
    baar = KeywordSegment.make("baar")
    bar = KeywordSegment.make("bar")
    grammar = StartsWith(bar, terminator=baar, include_terminator=include_terminator)
    with RootParseContext(dialect=fresh_ansi_dialect) as ctx:
        with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"):
            m = grammar.match(seg_list, parse_context=ctx)
            assert len(m) == match_length
Ejemplo n.º 5
0
def test__parser__grammar_oneof_templated(seg_list):
    """Test the OneOf grammar.

    NB: Should behave the same regardless of code_only.

    """
    fs = KeywordSegment.make("foo")
    bs = KeywordSegment.make("bar")
    g = OneOf(fs, bs)
    with RootParseContext(dialect=None) as ctx:
        # This shouldn't match, but it *ALSO* shouldn't raise an exception.
        # https://github.com/sqlfluff/sqlfluff/issues/780
        assert not g.match(seg_list[5:], parse_context=ctx)
Ejemplo n.º 6
0
def test__parser__grammar_oneof(seg_list, allow_gaps):
    """Test the OneOf grammar.

    NB: Should behave the same regardless of code_only.

    """
    fs = KeywordSegment.make("foo")
    bs = KeywordSegment.make("bar")
    g = OneOf(fs, bs, allow_gaps=allow_gaps)
    with RootParseContext(dialect=None) as ctx:
        # Check directly
        assert g.match(seg_list, parse_context=ctx).matched_segments == (bs(
            "bar", seg_list[0].pos_marker), )
        # Check with a bit of whitespace
        assert not g.match(seg_list[1:], parse_context=ctx)
Ejemplo n.º 7
0
def test__parser__grammar__base__bracket_fail_with_open_paren_close_square_mismatch(
        generate_test_segments, fresh_ansi_dialect):
    """Test _bracket_sensitive_look_ahead_match failure case.

    Should fail when the type of a close bracket doesn't match the type of the
    corresponding open bracket, but both are "definite" brackets.
    """
    fs = KeywordSegment.make("foo")
    # We need a dialect here to do bracket matching
    with RootParseContext(dialect=fresh_ansi_dialect) as ctx:
        # Basic version, we should find bar first
        with pytest.raises(SQLParseError) as sql_parse_error:
            BaseGrammar._bracket_sensitive_look_ahead_match(
                generate_test_segments([
                    "select",
                    " ",
                    "*",
                    " ",
                    "from",
                    "(",
                    "foo",
                    "]",  # Bracket types don't match (parens vs square)
                ]),
                [fs],
                ctx,
            )
        assert sql_parse_error.match("Found unexpected end bracket")
Ejemplo n.º 8
0
    def expand(self):
        """Expand any callable references to concrete ones.

        This must be called before using the dialect. But
        allows more flexible definitions to happen at runtime.

        """
        # Are we already expanded?
        if self.expanded:
            return
        # Expand any callable elements of the dialect.
        for key in self._library:
            if isinstance(self._library[key], SegmentGenerator):
                # If the element is callable, call it passing the current
                # dialect and store the result in its place.
                # Use the .replace() method for its error handling.
                self.replace(**{key: self._library[key].expand(self)})
        # Expand any keyword sets.
        for keyword_set in [
                "unreserved_keywords",
                "reserved_keywords",
        ]:  # e.g. reserved_keywords, (JOIN, ...)
            # Make sure the values are available as KeywordSegments
            for kw in self.sets(keyword_set):
                n = kw.capitalize() + "KeywordSegment"
                if n not in self._library:
                    self._library[n] = KeywordSegment.make(kw.lower())
        self.expanded = True
Ejemplo n.º 9
0
def test__parser__grammar_greedyuntil_bracketed(bracket_seg_list, fresh_ansi_dialect):
    """Test the GreedyUntil grammar with brackets."""
    fs = KeywordSegment.make("foo")
    g = GreedyUntil(fs)
    with RootParseContext(dialect=fresh_ansi_dialect) as ctx:
        # Check that we can make it past the brackets
        assert len(g.match(bracket_seg_list, parse_context=ctx)) == 7
Ejemplo n.º 10
0
def test__parser__grammar_startswith_a(keyword, match_truthy, seg_list,
                                       fresh_ansi_dialect, caplog):
    """Test the StartsWith grammar simply."""
    Keyword = KeywordSegment.make(keyword)
    grammar = StartsWith(Keyword)
    with RootParseContext(dialect=fresh_ansi_dialect) as ctx:
        with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"):
            m = grammar.match(seg_list, parse_context=ctx)
            assert bool(m) is match_truthy
Ejemplo n.º 11
0
def make_result_tuple(result_slice, matcher_keywords, seg_list):
    """Make a comparison tuple for test matching."""
    # No result slice means no match.
    if not result_slice:
        return ()

    return tuple(
        KeywordSegment.make(elem.raw)(elem.raw, elem.pos_marker) if elem.raw in
        matcher_keywords else elem for elem in seg_list[result_slice])
Ejemplo n.º 12
0
def test__parser__grammar_greedyuntil(keyword, seg_list, enforce_ws, slice_len,
                                      fresh_ansi_dialect):
    """Test the GreedyUntil grammar."""
    grammar = GreedyUntil(
        KeywordSegment.make(keyword),
        enforce_whitespace_preceeding_terminator=enforce_ws,
    )
    with RootParseContext(dialect=fresh_ansi_dialect) as ctx:
        assert (grammar.match(
            seg_list,
            parse_context=ctx).matched_segments == seg_list[:slice_len])
Ejemplo n.º 13
0
def test__parser__grammar_sequence_nested(seg_list, caplog):
    """Test the Sequence grammar when nested."""
    fs = KeywordSegment.make("foo")
    bs = KeywordSegment.make("bar")
    bas = KeywordSegment.make("baar")
    g = Sequence(Sequence(bs, fs), bas)
    with RootParseContext(dialect=None) as ctx:
        with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"):
            # Matching the start of the list shouldn't work
            logging.info("#### TEST 1")
            assert not g.match(seg_list[:2], parse_context=ctx)
            # Matching the whole list should, and the result should be flat
            logging.info("#### TEST 2")
            assert g.match(seg_list, parse_context=ctx).matched_segments == (
                bs("bar", seg_list[0].pos_marker),
                seg_list[1],  # This will be the whitespace segment
                fs("foo", seg_list[2].pos_marker),
                bas("baar", seg_list[3].pos_marker)
                # NB: No whitespace at the end, this shouldn't be consumed.
            )
Ejemplo n.º 14
0
def test__parser__grammar__base__longest_trimmed_match__adv(seg_list, caplog):
    """Test the _longest_trimmed_match method of the BaseGrammar."""
    bs = KeywordSegment.make("bar")
    fs = KeywordSegment.make("foo")
    matchers = [
        bs,
        fs,
        Sequence(bs, fs),  # This should be the winner.
        OneOf(bs, fs),
        Sequence(bs, fs),  # Another to check we return the first
    ]
    fs = KeywordSegment.make("foo")
    bs = KeywordSegment.make("bar")
    with RootParseContext(dialect=None) as ctx:
        # Matching the first element of the list
        with caplog.at_level(logging.DEBUG, logger="sqluff.parser"):
            match, matcher = BaseGrammar._longest_trimmed_match(seg_list, matchers, ctx)
    # Check we got a match
    assert match
    # Check we got the right one.
    assert matcher is matchers[2]
    # And it matched the first three segments
    assert len(match) == 3
Ejemplo n.º 15
0
def test__parser__grammar_delimited(
    min_delimiters,
    allow_gaps,
    allow_trailing,
    token_list,
    match_len,
    caplog,
    generate_test_segments,
    fresh_ansi_dialect,
):
    """Test the Delimited grammar when not code_only."""
    seg_list = generate_test_segments(token_list)
    g = Delimited(
        KeywordSegment.make("bar"),
        delimiter=KeywordSegment.make(".", name="dot"),
        allow_gaps=allow_gaps,
        allow_trailing=allow_trailing,
        min_delimiters=min_delimiters,
    )
    with RootParseContext(dialect=fresh_ansi_dialect) as ctx:
        with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"):
            # Matching with whitespace shouldn't match if we need at least one delimiter
            m = g.match(seg_list, parse_context=ctx)
            assert len(m) == match_len
Ejemplo n.º 16
0
def test__parser__grammar_sequence(seg_list, caplog):
    """Test the Sequence grammar."""
    fs = KeywordSegment.make("foo")
    bs = KeywordSegment.make("bar")
    g = Sequence(bs, fs)
    gc = Sequence(bs, fs, allow_gaps=False)
    with RootParseContext(dialect=None) as ctx:
        with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"):
            # Should be able to match the list using the normal matcher
            logging.info("#### TEST 1")
            m = g.match(seg_list, parse_context=ctx)
            assert m
            assert len(m) == 3
            assert m.matched_segments == (
                bs("bar", seg_list[0].pos_marker),
                seg_list[1],  # This will be the whitespace segment
                fs("foo", seg_list[2].pos_marker),
            )
            # Shouldn't with the allow_gaps matcher
            logging.info("#### TEST 2")
            assert not gc.match(seg_list, parse_context=ctx)
            # Shouldn't match even on the normal one if we don't start at the beginning
            logging.info("#### TEST 2")
            assert not g.match(seg_list[1:], parse_context=ctx)
Ejemplo n.º 17
0
def test__parser__grammar_oneof_take_first(seg_list):
    """Test that the OneOf grammar takes first match in case they are of same length."""
    fooRegex = ReSegment.make(r"fo{2}")
    foo = KeywordSegment.make("foo", )

    # Both segments would match "foo"
    # so we test that order matters
    g1 = OneOf(fooRegex, foo)
    g2 = OneOf(foo, fooRegex)
    with RootParseContext(dialect=None) as ctx:
        assert g1.match(seg_list[2:],
                        parse_context=ctx).matched_segments == (fooRegex(
                            "foo", seg_list[2].pos_marker), )
        assert g2.match(seg_list[2:],
                        parse_context=ctx).matched_segments == (foo(
                            "foo", seg_list[2].pos_marker), )
Ejemplo n.º 18
0
def test__parser__grammar__base__look_ahead_match(
    seg_list_slice,
    matcher_keywords,
    result_slice,
    winning_matcher,
    pre_match_slice,
    seg_list,
):
    """Test the _look_ahead_match method of the BaseGrammar."""
    # Make the matcher keywords
    matchers = [KeywordSegment.make(keyword) for keyword in matcher_keywords]
    # Fetch the matching keyword from above by index
    winning_matcher = matchers[matcher_keywords.index(winning_matcher)]

    with RootParseContext(dialect=None) as ctx:
        m = BaseGrammar._look_ahead_match(
            seg_list[seg_list_slice],
            matchers,
            ctx,
        )

    # Check structure of the response.
    assert isinstance(m, tuple)
    assert len(m) == 3
    # Unpack
    result_pre_match, result_match, result_matcher = m

    # Check the right matcher won
    assert result_matcher == winning_matcher

    # Make check tuple for the pre-match section
    if pre_match_slice:
        pre_match_slice = seg_list[pre_match_slice]
    else:
        pre_match_slice = ()
    assert result_pre_match == pre_match_slice

    # Make the check tuple
    expected_result = make_result_tuple(
        result_slice=result_slice,
        matcher_keywords=matcher_keywords,
        seg_list=seg_list,
    )
    assert result_match.matched_segments == expected_result
Ejemplo n.º 19
0
def test__parser__grammar__base__longest_trimmed_match__basic(
    seg_list, seg_list_slice, matcher_keywords, trim_noncode, result_slice
):
    """Test the _longest_trimmed_match method of the BaseGrammar."""
    # Make the matcher keywords
    matchers = [KeywordSegment.make(keyword) for keyword in matcher_keywords]

    with RootParseContext(dialect=None) as ctx:
        m, _ = BaseGrammar._longest_trimmed_match(
            seg_list[seg_list_slice], matchers, ctx, trim_noncode=trim_noncode
        )

    # Make the check tuple
    expected_result = make_result_tuple(
        result_slice=result_slice,
        matcher_keywords=matcher_keywords,
        seg_list=seg_list,
    )

    assert m.matched_segments == expected_result
Ejemplo n.º 20
0
    def expand(self) -> "Dialect":
        """Expand any callable references to concrete ones.

        This must be called before using the dialect. But
        allows more flexible definitions to happen at runtime.

        NOTE: This method returns a copy of the current dialect
        so that we don't pollute the original dialect and get
        dependency issues.


        Returns:
            :obj:`Dialect`: a copy of the given dialect but
                with expanded references.
        """
        # Are we already expanded?
        if self.expanded:
            raise ValueError(
                "Attempted to re-expand an already expanded dialect.")

        expanded_copy = self.copy_as(name=self.name)
        # Expand any callable elements of the dialect.
        for key in expanded_copy._library:
            if isinstance(expanded_copy._library[key], SegmentGenerator):
                # If the element is callable, call it passing the current
                # dialect and store the result in its place.
                # Use the .replace() method for its error handling.
                expanded_copy.replace(
                    **{key: expanded_copy._library[key].expand(expanded_copy)})
        # Expand any keyword sets.
        for keyword_set in [
                "unreserved_keywords",
                "reserved_keywords",
        ]:  # e.g. reserved_keywords, (JOIN, ...)
            # Make sure the values are available as KeywordSegments
            for kw in expanded_copy.sets(keyword_set):
                n = kw.capitalize() + "KeywordSegment"
                if n not in expanded_copy._library:
                    expanded_copy._library[n] = KeywordSegment.make(kw.lower())
        expanded_copy.expanded = True
        return expanded_copy
Ejemplo n.º 21
0
def test__parser__core_ephemeral_segment(raw_seg_list):
    """Test the Mystical KeywordSegment."""
    # First make a keyword
    BarKeyword = KeywordSegment.make("bar")

    ephemeral_segment = EphemeralSegment.make(
        match_grammar=BarKeyword, parse_grammar=BarKeyword, name="foobar"
    )

    with RootParseContext(dialect=None) as ctx:
        # Test on a slice containing only the first element
        m = ephemeral_segment.match(raw_seg_list[:1], parse_context=ctx)
        assert m
        # Make sure that it matches as an instance of EphemeralSegment
        elem = m.matched_segments[0]
        assert isinstance(elem, ephemeral_segment)
        # Parse it and make sure we don't get an EphemeralSegment back
        res = elem.parse(ctx)
        assert isinstance(res, tuple)
        elem = res[0]
        assert not isinstance(elem, ephemeral_segment)
        assert isinstance(elem, BarKeyword)
Ejemplo n.º 22
0
def test__parser__core_keyword(raw_seg_list):
    """Test the Mystical KeywordSegment."""
    # First make a keyword
    FooKeyword = KeywordSegment.make("foo")
    # Check it looks as expected
    assert issubclass(FooKeyword, KeywordSegment)
    assert FooKeyword.__name__ == "FOO_KeywordSegment"
    assert FooKeyword._template == "FOO"
    with RootParseContext(dialect=None) as ctx:
        # Match it against a list and check it doesn't match
        assert not FooKeyword.match(raw_seg_list, parse_context=ctx)
        # Match it against a the first element and check it doesn't match
        assert not FooKeyword.match(raw_seg_list[0], parse_context=ctx)
        # Match it against a the first element as a list and check it doesn't match
        assert not FooKeyword.match([raw_seg_list[0]], parse_context=ctx)
        # Match it against the final element (returns tuple)
        m = FooKeyword.match(raw_seg_list[1], parse_context=ctx)
        assert m
        assert m.matched_segments[0].raw == "foo"
        assert isinstance(m.matched_segments[0], FooKeyword)
        # Match it against the final element as a list
        assert FooKeyword.match([raw_seg_list[1]], parse_context=ctx)
        # Match it against a list slice and check it still works
        assert FooKeyword.match(raw_seg_list[1:], parse_context=ctx)
Ejemplo n.º 23
0
"""The Test file for The New Parser (Grammar Classes)."""

import logging

from sqlfluff.core.parser import BaseSegment, KeywordSegment, Anything
from sqlfluff.core.parser.context import RootParseContext

BarKeyword = KeywordSegment.make("bar")


class BasicSegment(BaseSegment):
    """A basic segment for testing parse and expand."""

    type = "basic"
    match_grammar = Anything()
    parse_grammar = BarKeyword


def test__parser__parse_match(seg_list):
    """Test match method on a real segment."""
    with RootParseContext(dialect=None) as ctx:
        # This should match and have consumed everything, which should
        # now be part of a BasicSegment.
        m = BasicSegment.match(seg_list[:1], parse_context=ctx)
        assert m
        assert len(m.matched_segments) == 1
        assert isinstance(m.matched_segments[0], BasicSegment)
        assert m.matched_segments[0].segments[0].type == "raw"


def test__parser__parse_parse(seg_list, caplog):
Ejemplo n.º 24
0
 def make_keyword(cls, raw):
     """Make a keyword segment."""
     # For the name of the segment, we force the string to lowercase.
     kws = KeywordSegment.make(raw.lower())
     # At the moment we let the rule dictate *case* here.
     return kws(raw=raw, pos_marker=None)
Ejemplo n.º 25
0
        dict(is_code=True),
    ),
    (
        "double_quote",
        "regex",
        r'([rR]?[bB]?|[bB]?[rR]?)?(\"\"\"((?<!\\)(\\{2})*\\\"|\"{,2}(?!\")|[^\"])*(?<!\\)(\\{2})*\"\"\"|"((?<!\\)(\\{2})*\\"|[^"])*(?<!\\)(\\{2})*")',
        dict(is_code=True),
    ),
])

bigquery_dialect.add(
    DoubleQuotedLiteralSegment=NamedSegment.make("double_quote",
                                                 name="quoted_literal",
                                                 type="literal",
                                                 trim_chars=('"', )),
    StructKeywordSegment=KeywordSegment.make("struct", name="struct"),
)

bigquery_dialect.replace(FunctionContentsExpressionGrammar=OneOf(
    Ref("DatetimeUnitSegment"),
    Sequence(
        Ref("ExpressionSegment"),
        Sequence(OneOf("IGNORE", "RESPECT"), "NULLS", optional=True),
    ),
), )

# Add additional datetime units
# https://cloud.google.com/bigquery/docs/reference/standard-sql/date_functions#extract
bigquery_dialect.sets("datetime_units").update([
    "MICROSECOND", "DAYOFWEEK", "ISOWEEK", "ISOYEAR", "DATE", "DATETIME",
    "TIME"