def _dialect_specific_segment_parses(dialect, segmentref, raw, caplog): """Test that specific segments parse as expected. NB: We're testing the PARSE function not the MATCH function although this will be a recursive parse and so the match function of SUBSECTIONS will be tested if present. The match function of the parent will not be tested. """ config = FluffConfig(overrides=dict(dialect=dialect)) seg_list = lex(raw, config=config) Seg = validate_segment(segmentref, config=config) # Most segments won't handle the end of file marker. We should strip it. if seg_list[-1].is_type("end_of_file"): seg_list = seg_list[:-1] # This test is different if we're working with RawSegment # derivatives or not. if isinstance(Seg, Matchable) or issubclass(Seg, RawSegment): print("Raw/Parser route...") with RootParseContext.from_config(config) as ctx: with caplog.at_level(logging.DEBUG): parsed = Seg.match(segments=seg_list, parse_context=ctx) assert isinstance(parsed, MatchResult) assert len(parsed.matched_segments) == 1 print(parsed) parsed = parsed.matched_segments[0] print(parsed) else: print("Base route...") # Construct an unparsed segment seg = Seg(seg_list, pos_marker=seg_list[0].pos_marker) # Perform the match (THIS IS THE MEAT OF THE TEST) with RootParseContext.from_config(config) as ctx: with caplog.at_level(logging.DEBUG): parsed = seg.parse(parse_context=ctx) print(parsed) assert isinstance(parsed, Seg) # Check we get a good response print(parsed) print(type(parsed)) print(type(parsed.raw)) # Check we're all there. assert parsed.raw == raw # Check that there's nothing un parsable typs = parsed.type_set() assert "unparsable" not in typs
def test__parser__grammar__base__bracket_fail_with_unexpected_end_bracket( generate_test_segments, fresh_ansi_dialect): """Test _bracket_sensitive_look_ahead_match edge case. Should fail gracefully and stop matching if we find a trailing unmatched. """ fs = StringParser("foo", KeywordSegment) # We need a dialect here to do bracket matching with RootParseContext(dialect=fresh_ansi_dialect) as ctx: _, match, _ = BaseGrammar._bracket_sensitive_look_ahead_match( generate_test_segments([ "bar", "(", # This bracket pair should be mutated ")", " ", ")", # This is the unmatched bracket " ", "foo", ]), [fs], ctx, ) # Check we don't match (even though there's a foo at the end) assert not match # Check the first bracket pair have been mutated. segs = match.unmatched_segments assert segs[1].is_type("bracketed") assert segs[1].raw == "()" assert len(segs[1].segments) == 2 # Check the trailing foo hasn't been mutated assert segs[5].raw == "foo" assert not isinstance(segs[5], KeywordSegment)
def test__parser__grammar__base__bracket_sensitive_look_ahead_match( bracket_seg_list, fresh_ansi_dialect): """Test the _bracket_sensitive_look_ahead_match method of the BaseGrammar.""" bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) # We need a dialect here to do bracket matching with RootParseContext(dialect=fresh_ansi_dialect) as ctx: # Basic version, we should find bar first pre_section, match, matcher = BaseGrammar._bracket_sensitive_look_ahead_match( bracket_seg_list, [fs, bs], ctx) assert pre_section == () assert matcher == bs # NB the middle element is a match object assert match.matched_segments == (KeywordSegment( "bar", bracket_seg_list[0].pos_marker), ) # Look ahead for foo, we should find the one AFTER the brackets, not the # on IN the brackets. pre_section, match, matcher = BaseGrammar._bracket_sensitive_look_ahead_match( bracket_seg_list, [fs], ctx) # NB: The bracket segments will have been mutated, so we can't directly compare. # Make sure we've got a bracketed section in there. assert len(pre_section) == 5 assert pre_section[2].is_type("bracketed") assert len(pre_section[2].segments) == 4 assert matcher == fs # We shouldn't match the whitespace with the keyword assert match.matched_segments == (KeywordSegment( "foo", bracket_seg_list[8].pos_marker), ) # Check that the unmatched segments are nothing. assert not match.unmatched_segments
def test__parser__grammar_sequence_indent_conditional(seg_list, caplog): """Test the Sequence grammar with indents.""" bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) # We will assume the default config has indented_joins = False. # We're testing without explictly setting the `config_type` because # that's the assumed way of using the grammar in practice. g = Sequence( Conditional(Indent, indented_joins=False), bs, Conditional(Indent, indented_joins=True), fs, ) with RootParseContext(dialect=None) as ctx: with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"): m = g.match(seg_list, parse_context=ctx) assert m # Check we get an Indent. assert isinstance(m.matched_segments[0], Indent) assert isinstance(m.matched_segments[1], KeywordSegment) # check the whitespace is still there assert isinstance(m.matched_segments[2], WhitespaceSegment) # Check the second Indent does not appear assert not isinstance(m.matched_segments[3], Indent) assert isinstance(m.matched_segments[3], KeywordSegment)
def test__parser__grammar_greedyuntil_bracketed(bracket_seg_list, fresh_ansi_dialect): """Test the GreedyUntil grammar with brackets.""" fs = KeywordSegment.make("foo") g = GreedyUntil(fs) with RootParseContext(dialect=fresh_ansi_dialect) as ctx: # Check that we can make it past the brackets assert len(g.match(bracket_seg_list, parse_context=ctx)) == 7
def test__parser__grammar__base__bracket_sensitive_look_ahead_match( bracket_seg_list, fresh_ansi_dialect ): """Test the _bracket_sensitive_look_ahead_match method of the BaseGrammar.""" fs = KeywordSegment.make("foo") bs = KeywordSegment.make("bar") # We need a dialect here to do bracket matching with RootParseContext(dialect=fresh_ansi_dialect) as ctx: # Basic version, we should find bar first pre_section, match, matcher = BaseGrammar._bracket_sensitive_look_ahead_match( bracket_seg_list, [fs, bs], ctx ) assert pre_section == () assert matcher == bs # NB the middle element is a match object assert match.matched_segments == (bs("bar", bracket_seg_list[0].pos_marker),) # Look ahead for foo, we should find the one AFTER the brackets, not the # on IN the brackets. pre_section, match, matcher = BaseGrammar._bracket_sensitive_look_ahead_match( bracket_seg_list, [fs], ctx ) # NB: The bracket segments will have been mutated, so we can't directly compare assert len(pre_section) == 8 assert matcher == fs # We shouldn't match the whitespace with the keyword assert match.matched_segments == (fs("foo", bracket_seg_list[8].pos_marker),)
def test__parser__grammar_oneof_take_longest_match(seg_list): """Test that the OneOf grammar takes the longest match.""" fooRegex = ReSegment.make(r"fo{2}") baar = KeywordSegment.make( "baar", ) foo = KeywordSegment.make( "foo", ) fooBaar = Sequence( foo, baar, ) # Even if fooRegex comes first, fooBaar # is a longer match and should be taken g = OneOf(fooRegex, fooBaar) with RootParseContext(dialect=None) as ctx: assert fooRegex.match(seg_list[2:], parse_context=ctx).matched_segments == ( fooRegex("foo", seg_list[2].pos_marker), ) assert g.match(seg_list[2:], parse_context=ctx).matched_segments == ( foo("foo", seg_list[2].pos_marker), baar("baar", seg_list[3].pos_marker), )
def test__parser__grammar_sequence(seg_list, caplog): """Test the Sequence grammar.""" bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) g = Sequence(bs, fs) # If running in the test environment, assert that Sequence recognises this if getenv("SQLFLUFF_TESTENV", ""): assert g.test_env gc = Sequence(bs, fs, allow_gaps=False) with RootParseContext(dialect=None) as ctx: with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"): # Should be able to match the list using the normal matcher logging.info("#### TEST 1") m = g.match(seg_list, parse_context=ctx) assert m assert len(m) == 3 assert m.matched_segments == ( KeywordSegment("bar", seg_list[0].pos_marker), seg_list[1], # This will be the whitespace segment KeywordSegment("foo", seg_list[2].pos_marker), ) # Shouldn't with the allow_gaps matcher logging.info("#### TEST 2") assert not gc.match(seg_list, parse_context=ctx) # Shouldn't match even on the normal one if we don't start at the beginning logging.info("#### TEST 2") assert not g.match(seg_list[1:], parse_context=ctx)
def test__parser__core_keyword(raw_seg_list): """Test the Mystical KeywordSegment.""" # First make a keyword FooKeyword = StringParser("foo", KeywordSegment, type="bar") # Check it looks as expected assert FooKeyword.template.upper() == "FOO" with RootParseContext(dialect=None) as ctx: # Match it against a list and check it doesn't match assert not FooKeyword.match(raw_seg_list, parse_context=ctx) # Match it against a the first element and check it doesn't match assert not FooKeyword.match(raw_seg_list[0], parse_context=ctx) # Match it against a the first element as a list and check it doesn't match assert not FooKeyword.match([raw_seg_list[0]], parse_context=ctx) # Match it against the final element (returns tuple) m = FooKeyword.match(raw_seg_list[1], parse_context=ctx) assert m assert m.matched_segments[0].raw == "foo" assert isinstance(m.matched_segments[0], KeywordSegment) # Match it against the final element as a list assert FooKeyword.match([raw_seg_list[1]], parse_context=ctx) # Match it against a list slice and check it still works assert FooKeyword.match(raw_seg_list[1:], parse_context=ctx) # Check that the types work right. Importantly that the "bar" # type makes it in. assert m.matched_segments[0].class_types == {"base", "keyword", "raw", "bar"}
def test__parser__grammar__base__bracket_fail_with_open_paren_close_square_mismatch( generate_test_segments, fresh_ansi_dialect ): """Test _bracket_sensitive_look_ahead_match failure case. Should fail when the type of a close bracket doesn't match the type of the corresponding open bracket, but both are "definite" brackets. """ fs = StringParser("foo", KeywordSegment) # We need a dialect here to do bracket matching with RootParseContext(dialect=fresh_ansi_dialect) as ctx: # Basic version, we should find bar first with pytest.raises(SQLParseError) as sql_parse_error: BaseGrammar._bracket_sensitive_look_ahead_match( generate_test_segments( [ "select", " ", "*", " ", "from", "(", "foo", "]", # Bracket types don't match (parens vs square) ] ), [fs], ctx, ) assert sql_parse_error.match("Found unexpected end bracket")
def test__parser__base_segments_base(raw_seg_list, fresh_ansi_dialect): """Test base segments behave as expected.""" base_seg = DummySegment(raw_seg_list) # Check we assume the position correctly assert ( base_seg.pos_marker.start_point_marker() == raw_seg_list[0].pos_marker.start_point_marker() ) assert ( base_seg.pos_marker.end_point_marker() == raw_seg_list[-1].pos_marker.end_point_marker() ) with RootParseContext(dialect=fresh_ansi_dialect) as ctx: # Expand and given we don't have a grammar we should get the same thing assert base_seg.parse(parse_context=ctx) == base_seg # Check that we correctly reconstruct the raw assert base_seg.raw == "foobar.barfoo" # Check tuple assert base_seg.to_tuple() == ( "dummy", (raw_seg_list[0].to_tuple(), raw_seg_list[1].to_tuple()), ) # Check Formatting and Stringification assert str(base_seg) == repr(base_seg) == "<DummySegment: ([L: 1, P: 1])>" assert base_seg.stringify(ident=1, tabsize=2) == ( "[L: 1, P: 1] | dummy:\n" "[L: 1, P: 1] | raw: " " 'foobar'\n" "[L: 1, P: 7] | raw: " " '.barfoo'\n" )
def test__parser__grammar_noncode(seg_list, fresh_ansi_dialect): """Test the NonCodeMatcher.""" with RootParseContext(dialect=fresh_ansi_dialect) as ctx: m = NonCodeMatcher().match(seg_list[1:], parse_context=ctx) # NonCode Matcher doesn't work with simple assert NonCodeMatcher().simple(ctx) is None # We should match one and only one segment assert len(m) == 1
def test__parser__grammar_startswith_a(keyword, match_truthy, seg_list, fresh_ansi_dialect, caplog): """Test the StartsWith grammar simply.""" Keyword = StringParser(keyword, KeywordSegment) grammar = StartsWith(Keyword) with RootParseContext(dialect=fresh_ansi_dialect) as ctx: with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"): m = grammar.match(seg_list, parse_context=ctx) assert bool(m) is match_truthy
def test__parser__grammar_ref_exclude(generate_test_segments, fresh_ansi_dialect): """Test the Ref grammar exclude option.""" ni = Ref("NakedIdentifierSegment", exclude=Ref.keyword("ABS")) ts = generate_test_segments(["ABS", "ABSOLUTE"]) with RootParseContext(dialect=fresh_ansi_dialect) as ctx: # Asset ABS does not match, due to the exclude assert not ni.match([ts[0]], parse_context=ctx) # Asset ABSOLUTE does match assert ni.match([ts[1]], parse_context=ctx)
def test__parser__parse_match(seg_list): """Test match method on a real segment.""" with RootParseContext(dialect=None) as ctx: # This should match and have consumed everything, which should # now be part of a BasicSegment. m = BasicSegment.match(seg_list[:1], parse_context=ctx) assert m assert len(m.matched_segments) == 1 assert isinstance(m.matched_segments[0], BasicSegment) assert m.matched_segments[0].segments[0].type == "raw"
def test__parser__grammar_oneof_exclude(seg_list): """Test the OneOf grammar exclude option.""" fs = KeywordSegment.make("foo") bs = KeywordSegment.make("bar") g = OneOf(bs, exclude=Sequence(bs, fs)) with RootParseContext(dialect=None) as ctx: # Just against the first alone assert g.match(seg_list[:1], parse_context=ctx) # Now with the bit to exclude included assert not g.match(seg_list, parse_context=ctx)
def test__parser__grammar_greedyuntil(keyword, seg_list, enforce_ws, slice_len, fresh_ansi_dialect): """Test the GreedyUntil grammar.""" grammar = GreedyUntil( StringParser(keyword, KeywordSegment), enforce_whitespace_preceding_terminator=enforce_ws, ) with RootParseContext(dialect=fresh_ansi_dialect) as ctx: assert (grammar.match( seg_list, parse_context=ctx).matched_segments == seg_list[:slice_len])
def test__parser__grammar_startswith_b( include_terminator, match_length, seg_list, fresh_ansi_dialect, caplog ): """Test the StartsWith grammar with a terminator (included & exluded).""" baar = KeywordSegment.make("baar") bar = KeywordSegment.make("bar") grammar = StartsWith(bar, terminator=baar, include_terminator=include_terminator) with RootParseContext(dialect=fresh_ansi_dialect) as ctx: with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"): m = grammar.match(seg_list, parse_context=ctx) assert len(m) == match_length
def parse( self, segments: Sequence["BaseSegment"], recurse=True, fname: str = None ) -> Optional["BaseSegment"]: """Parse a series of lexed tokens using the current dialect.""" if not segments: return None # Instantiate the root segment root_segment = self.RootSegment(segments=segments, fname=fname) # Call .parse() on that segment with RootParseContext.from_config(config=self.config, recurse=recurse) as ctx: parsed = root_segment.parse(parse_context=ctx) return parsed
def test__parser__grammar_sequence_indent(seg_list, caplog): """Test the Sequence grammar with indents.""" bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) g = Sequence(Indent, bs, fs) with RootParseContext(dialect=None) as ctx: with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"): m = g.match(seg_list, parse_context=ctx) assert m # check we get an indent. assert isinstance(m.matched_segments[0], Indent) assert isinstance(m.matched_segments[1], KeywordSegment)
def test__parser__grammar_greedyuntil_bracketed(bracket_seg_list, fresh_ansi_dialect): """Test the GreedyUntil grammar with brackets.""" fs = StringParser("foo", KeywordSegment) g = GreedyUntil(fs) with RootParseContext(dialect=fresh_ansi_dialect) as ctx: # Check that we can make it past the brackets match = g.match(bracket_seg_list, parse_context=ctx) assert len(match) == 4 # Check we successfully constructed a bracketed segment assert match.matched_segments[2].is_type("bracketed") assert match.matched_segments[2].raw == "(foo )" # Check that the unmatched segments is foo AND the whitespace assert len(match.unmatched_segments) == 2
def test__parser__grammar_oneof_templated(seg_list): """Test the OneOf grammar. NB: Should behave the same regardless of code_only. """ bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) g = OneOf(fs, bs) with RootParseContext(dialect=None) as ctx: # This shouldn't match, but it *ALSO* shouldn't raise an exception. # https://github.com/sqlfluff/sqlfluff/issues/780 assert not g.match(seg_list[5:], parse_context=ctx)
def test__parser__multistringparser__match(generate_test_segments): """Test the MultiStringParser matchable.""" parser = MultiStringParser(["foo", "bar"], KeywordSegment) with RootParseContext(dialect=None) as ctx: # Check directly seg_list = generate_test_segments(["foo", "fo"]) # Matches when it should assert parser.match( seg_list[:1], parse_context=ctx).matched_segments == (KeywordSegment( "foo", seg_list[0].pos_marker), ) # Doesn't match when it shouldn't assert parser.match(seg_list[1:], parse_context=ctx).matched_segments == tuple()
def test__parser__parse_expand(seg_list): """Test expand method on a real segment.""" with RootParseContext(dialect=None) as ctx: # Match the segment, and get the matched segments segments = BasicSegment.match(seg_list[:1], parse_context=ctx).matched_segments # Remind ourselves that this should be tuple containing a BasicSegment assert isinstance(segments[0], BasicSegment) # Now expand those segments, using the base class version (not that it should matter) res = BasicSegment.expand(segments, parse_context=ctx) # Check we get an iterable containing a BasicSegment assert isinstance(res[0], BasicSegment) # Check that we now have a keyword inside assert isinstance(res[0].segments[0], KeywordSegment)
def test__parser__grammar_oneof(seg_list, allow_gaps): """Test the OneOf grammar. NB: Should behave the same regardless of code_only. """ fs = KeywordSegment.make("foo") bs = KeywordSegment.make("bar") g = OneOf(fs, bs, allow_gaps=allow_gaps) with RootParseContext(dialect=None) as ctx: # Check directly assert g.match(seg_list, parse_context=ctx).matched_segments == (bs( "bar", seg_list[0].pos_marker), ) # Check with a bit of whitespace assert not g.match(seg_list[1:], parse_context=ctx)
def _dialect_specific_segment_not_match(dialect, segmentref, raw, caplog): """Test that specific segments do not match. NB: We're testing the MATCH function not the PARSE function. This is the opposite to the above. """ config = FluffConfig(overrides=dict(dialect=dialect)) seg_list = lex(raw, config=config) Seg = validate_segment(segmentref, config=config) with RootParseContext.from_config(config) as ctx: with caplog.at_level(logging.DEBUG): match = Seg.match(segments=seg_list, parse_context=ctx) assert not match
def test__parser__parse_parse(seg_list, caplog): """Test parse method on a real segment.""" with RootParseContext(dialect=None) as ctx: # Match the segment, and get the inner segment seg = BasicSegment.match(seg_list[:1], parse_context=ctx).matched_segments[0] # Remind ourselves that this should be an unparsed BasicSegment assert isinstance(seg, BasicSegment) # Now parse that segment, with debugging because this is # where we'll need to debug if things fail. with caplog.at_level(logging.DEBUG): res = seg.parse(parse_context=ctx) # Check it's still a BasicSegment assert isinstance(res, BasicSegment) # Check that we now have a keyword inside assert isinstance(res.segments[0], KeywordSegment)
def test__parser__grammar__base__ephemeral_segment(seg_list): """Test the ephemeral features BaseGrammar. Normally you cant call .match() on a BaseGrammar, but if things are set up right, then it should be possible in the case that the ephemeral_name is set. """ g = BaseGrammar(ephemeral_name="TestGrammar") with RootParseContext(dialect=None) as ctx: m = g.match(seg_list, ctx) # Check we get an ephemeral segment assert isinstance(m.matched_segments[0], EphemeralSegment) chkpoint = m.matched_segments[0] # Check it's got the same content. assert chkpoint.segments == seg_list
def test__parser__grammar_oneof_take_first(seg_list): """Test that the OneOf grammar takes first match in case they are of same length.""" fooRegex = RegexParser(r"fo{2}", KeywordSegment) foo = StringParser("foo", KeywordSegment) # Both segments would match "foo" # so we test that order matters g1 = OneOf(fooRegex, foo) g2 = OneOf(foo, fooRegex) with RootParseContext(dialect=None) as ctx: assert g1.match(seg_list[2:], parse_context=ctx).matched_segments == ( KeywordSegment("foo", seg_list[2].pos_marker), ) assert g2.match(seg_list[2:], parse_context=ctx).matched_segments == ( KeywordSegment("foo", seg_list[2].pos_marker), )
def test__parser__grammar__base__look_ahead_match( seg_list_slice, matcher_keywords, result_slice, winning_matcher, pre_match_slice, seg_list, ): """Test the _look_ahead_match method of the BaseGrammar.""" # Make the matcher keywords matchers = [ StringParser(keyword, KeywordSegment) for keyword in matcher_keywords ] # Fetch the matching keyword from above by index winning_matcher = matchers[matcher_keywords.index(winning_matcher)] with RootParseContext(dialect=None) as ctx: m = BaseGrammar._look_ahead_match( seg_list[seg_list_slice], matchers, ctx, ) # Check structure of the response. assert isinstance(m, tuple) assert len(m) == 3 # Unpack result_pre_match, result_match, result_matcher = m # Check the right matcher won assert result_matcher == winning_matcher # Make check tuple for the pre-match section if pre_match_slice: pre_match_slice = seg_list[pre_match_slice] else: pre_match_slice = () assert result_pre_match == pre_match_slice # Make the check tuple expected_result = make_result_tuple( result_slice=result_slice, matcher_keywords=matcher_keywords, seg_list=seg_list, ) assert result_match.matched_segments == expected_result