def test__parser__grammar_oneof_take_longest_match(seg_list): """Test that the OneOf grammar takes the longest match.""" fooRegex = ReSegment.make(r"fo{2}") baar = KeywordSegment.make( "baar", ) foo = KeywordSegment.make( "foo", ) fooBaar = Sequence( foo, baar, ) # Even if fooRegex comes first, fooBaar # is a longer match and should be taken g = OneOf(fooRegex, fooBaar) with RootParseContext(dialect=None) as ctx: assert fooRegex.match(seg_list[2:], parse_context=ctx).matched_segments == ( fooRegex("foo", seg_list[2].pos_marker), ) assert g.match(seg_list[2:], parse_context=ctx).matched_segments == ( foo("foo", seg_list[2].pos_marker), baar("baar", seg_list[3].pos_marker), )
def test__parser__grammar__base__bracket_sensitive_look_ahead_match( bracket_seg_list, fresh_ansi_dialect): """Test the _bracket_sensitive_look_ahead_match method of the BaseGrammar.""" bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) # We need a dialect here to do bracket matching with RootParseContext(dialect=fresh_ansi_dialect) as ctx: # Basic version, we should find bar first pre_section, match, matcher = BaseGrammar._bracket_sensitive_look_ahead_match( bracket_seg_list, [fs, bs], ctx) assert pre_section == () assert matcher == bs # NB the middle element is a match object assert match.matched_segments == (KeywordSegment( "bar", bracket_seg_list[0].pos_marker), ) # Look ahead for foo, we should find the one AFTER the brackets, not the # on IN the brackets. pre_section, match, matcher = BaseGrammar._bracket_sensitive_look_ahead_match( bracket_seg_list, [fs], ctx) # NB: The bracket segments will have been mutated, so we can't directly compare. # Make sure we've got a bracketed section in there. assert len(pre_section) == 5 assert pre_section[2].is_type("bracketed") assert len(pre_section[2].segments) == 4 assert matcher == fs # We shouldn't match the whitespace with the keyword assert match.matched_segments == (KeywordSegment( "foo", bracket_seg_list[8].pos_marker), ) # Check that the unmatched segments are nothing. assert not match.unmatched_segments
def test__parser__grammar_sequence(seg_list, caplog): """Test the Sequence grammar.""" bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) g = Sequence(bs, fs) # If running in the test environment, assert that Sequence recognises this if getenv("SQLFLUFF_TESTENV", ""): assert g.test_env gc = Sequence(bs, fs, allow_gaps=False) with RootParseContext(dialect=None) as ctx: with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"): # Should be able to match the list using the normal matcher logging.info("#### TEST 1") m = g.match(seg_list, parse_context=ctx) assert m assert len(m) == 3 assert m.matched_segments == ( KeywordSegment("bar", seg_list[0].pos_marker), seg_list[1], # This will be the whitespace segment KeywordSegment("foo", seg_list[2].pos_marker), ) # Shouldn't with the allow_gaps matcher logging.info("#### TEST 2") assert not gc.match(seg_list, parse_context=ctx) # Shouldn't match even on the normal one if we don't start at the beginning logging.info("#### TEST 2") assert not g.match(seg_list[1:], parse_context=ctx)
def _eval(self, segment, raw_stack, **kwargs): """Look for UNION keyword not immediately followed by DISTINCT or ALL. Note that UNION DISTINCT is valid, rule only applies to bare UNION. The function does this by looking for a segment of type set_operator which has a UNION but no DISTINCT or ALL. """ if segment.is_type("set_operator"): if "UNION" in segment.raw.upper() and not ( "ALL" in segment.raw.upper() or "DISTINCT" in segment.raw.upper() ): return LintResult( anchor=segment, fixes=[ LintFix( "edit", segment.segments[0], [ KeywordSegment("UNION"), WhitespaceSegment(), KeywordSegment("DISTINCT"), ], ) ], ) return LintResult()
def test__parser__grammar__base__bracket_sensitive_look_ahead_match( bracket_seg_list, fresh_ansi_dialect ): """Test the _bracket_sensitive_look_ahead_match method of the BaseGrammar.""" fs = KeywordSegment.make("foo") bs = KeywordSegment.make("bar") # We need a dialect here to do bracket matching with RootParseContext(dialect=fresh_ansi_dialect) as ctx: # Basic version, we should find bar first pre_section, match, matcher = BaseGrammar._bracket_sensitive_look_ahead_match( bracket_seg_list, [fs, bs], ctx ) assert pre_section == () assert matcher == bs # NB the middle element is a match object assert match.matched_segments == (bs("bar", bracket_seg_list[0].pos_marker),) # Look ahead for foo, we should find the one AFTER the brackets, not the # on IN the brackets. pre_section, match, matcher = BaseGrammar._bracket_sensitive_look_ahead_match( bracket_seg_list, [fs], ctx ) # NB: The bracket segments will have been mutated, so we can't directly compare assert len(pre_section) == 8 assert matcher == fs # We shouldn't match the whitespace with the keyword assert match.matched_segments == (fs("foo", bracket_seg_list[8].pos_marker),)
def _coalesce_fix_list( context: RuleContext, coalesce_arg_1: BaseSegment, coalesce_arg_2: BaseSegment, preceding_not: bool = False, ) -> List[LintFix]: """Generate list of fixes to convert CASE statement to COALESCE function.""" # Add coalesce and opening parenthesis. edits = [ KeywordSegment("coalesce"), SymbolSegment("(", type="start_bracket"), coalesce_arg_1, SymbolSegment(",", type="comma"), WhitespaceSegment(), coalesce_arg_2, SymbolSegment(")", type="end_bracket"), ] if preceding_not: not_edits: List[BaseSegment] = [ KeywordSegment("not"), WhitespaceSegment(), ] edits = not_edits + edits fixes = [LintFix.replace( context.segment, edits, )] return fixes
def test__parser__grammar_oneof_exclude(seg_list): """Test the OneOf grammar exclude option.""" fs = KeywordSegment.make("foo") bs = KeywordSegment.make("bar") g = OneOf(bs, exclude=Sequence(bs, fs)) with RootParseContext(dialect=None) as ctx: # Just against the first alone assert g.match(seg_list[:1], parse_context=ctx) # Now with the bit to exclude included assert not g.match(seg_list, parse_context=ctx)
def test__parser__grammar_startswith_b( include_terminator, match_length, seg_list, fresh_ansi_dialect, caplog ): """Test the StartsWith grammar with a terminator (included & exluded).""" baar = KeywordSegment.make("baar") bar = KeywordSegment.make("bar") grammar = StartsWith(bar, terminator=baar, include_terminator=include_terminator) with RootParseContext(dialect=fresh_ansi_dialect) as ctx: with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"): m = grammar.match(seg_list, parse_context=ctx) assert len(m) == match_length
def _eval(self, context: RuleContext) -> LintResult: """Look for UNION keyword not immediately followed by DISTINCT or ALL. Note that UNION DISTINCT is valid, rule only applies to bare UNION. The function does this by looking for a segment of type set_operator which has a UNION but no DISTINCT or ALL. Note only some dialects have concept of UNION DISTINCT, so rule is only applied to dialects that are known to support this syntax. """ if context.dialect.name not in [ "ansi", "bigquery", "hive", "mysql", "redshift", ]: return LintResult() if context.segment.is_type("set_operator"): if "union" in context.segment.raw and not ( "ALL" in context.segment.raw.upper() or "DISTINCT" in context.segment.raw.upper()): return LintResult( anchor=context.segment, fixes=[ LintFix.replace( context.segment.segments[0], [ KeywordSegment("union"), WhitespaceSegment(), KeywordSegment("distinct"), ], ) ], ) elif "UNION" in context.segment.raw.upper() and not ( "ALL" in context.segment.raw.upper() or "DISTINCT" in context.segment.raw.upper()): return LintResult( anchor=context.segment, fixes=[ LintFix.replace( context.segment.segments[0], [ KeywordSegment("UNION"), WhitespaceSegment(), KeywordSegment("DISTINCT"), ], ) ], ) return LintResult()
def test__parser__grammar_oneof_templated(seg_list): """Test the OneOf grammar. NB: Should behave the same regardless of code_only. """ fs = KeywordSegment.make("foo") bs = KeywordSegment.make("bar") g = OneOf(fs, bs) with RootParseContext(dialect=None) as ctx: # This shouldn't match, but it *ALSO* shouldn't raise an exception. # https://github.com/sqlfluff/sqlfluff/issues/780 assert not g.match(seg_list[5:], parse_context=ctx)
def _create_base_is_null_sequence( is_upper: bool, operator_raw: str, ) -> CorrectionListType: is_seg = KeywordSegment("IS" if is_upper else "is") not_seg = KeywordSegment("NOT" if is_upper else "not") if operator_raw == "=": return [is_seg] return [ is_seg, WhitespaceSegment(), not_seg, ]
def test__parser__grammar_oneof(seg_list, allow_gaps): """Test the OneOf grammar. NB: Should behave the same regardless of code_only. """ fs = KeywordSegment.make("foo") bs = KeywordSegment.make("bar") g = OneOf(fs, bs, allow_gaps=allow_gaps) with RootParseContext(dialect=None) as ctx: # Check directly assert g.match(seg_list, parse_context=ctx).matched_segments == (bs( "bar", seg_list[0].pos_marker), ) # Check with a bit of whitespace assert not g.match(seg_list[1:], parse_context=ctx)
def test__parser__grammar_oneof_take_first(seg_list): """Test that the OneOf grammar takes first match in case they are of same length.""" fooRegex = RegexParser(r"fo{2}", KeywordSegment) foo = StringParser("foo", KeywordSegment) # Both segments would match "foo" # so we test that order matters g1 = OneOf(fooRegex, foo) g2 = OneOf(foo, fooRegex) with RootParseContext(dialect=None) as ctx: assert g1.match(seg_list[2:], parse_context=ctx).matched_segments == ( KeywordSegment("foo", seg_list[2].pos_marker), ) assert g2.match(seg_list[2:], parse_context=ctx).matched_segments == ( KeywordSegment("foo", seg_list[2].pos_marker), )
def _eval(self, segment, parent_stack, raw_stack, **kwargs): """Implicit aliasing of table/column not allowed. Use explicit `AS` clause. We look for the alias segment, and then evaluate its parent and whether it contains an AS keyword. This is the _eval function for both L011 and L012. The use of `raw_stack` is just for working out how much whitespace to add. """ if segment.is_type("alias_expression"): if parent_stack[-1].is_type(*self._target_elems): if not any(e.name.lower() == "as" for e in segment.segments): insert_buff = [] insert_str = "" # Add initial whitespace if we need to... if raw_stack[-1].name not in ["whitespace", "newline"]: insert_buff.append(WhitespaceSegment()) insert_str += " " # Add an AS (Uppercase for now, but could be corrected later) insert_buff.append(KeywordSegment("AS")) insert_str += "AS" # Add a trailing whitespace if we need to if segment.segments[0].name not in ["whitespace", "newline"]: insert_buff.append(WhitespaceSegment()) insert_str += " " return LintResult( anchor=segment, fixes=[LintFix("create", segment.segments[0], insert_buff)], ) return None
def expand(self): """Expand any callable references to concrete ones. This must be called before using the dialect. But allows more flexible definitions to happen at runtime. """ # Are we already expanded? if self.expanded: return # Expand any callable elements of the dialect. for key in self._library: if isinstance(self._library[key], SegmentGenerator): # If the element is callable, call it passing the current # dialect and store the result in its place. # Use the .replace() method for its error handling. self.replace(**{key: self._library[key].expand(self)}) # Expand any keyword sets. for keyword_set in [ "unreserved_keywords", "reserved_keywords", ]: # e.g. reserved_keywords, (JOIN, ...) # Make sure the values are available as KeywordSegments for kw in self.sets(keyword_set): n = kw.capitalize() + "KeywordSegment" if n not in self._library: self._library[n] = KeywordSegment.make(kw.lower()) self.expanded = True
def test__parser__grammar_greedyuntil_bracketed(bracket_seg_list, fresh_ansi_dialect): """Test the GreedyUntil grammar with brackets.""" fs = KeywordSegment.make("foo") g = GreedyUntil(fs) with RootParseContext(dialect=fresh_ansi_dialect) as ctx: # Check that we can make it past the brackets assert len(g.match(bracket_seg_list, parse_context=ctx)) == 7
def test__parser__grammar__base__bracket_fail_with_open_paren_close_square_mismatch( generate_test_segments, fresh_ansi_dialect): """Test _bracket_sensitive_look_ahead_match failure case. Should fail when the type of a close bracket doesn't match the type of the corresponding open bracket, but both are "definite" brackets. """ fs = KeywordSegment.make("foo") # We need a dialect here to do bracket matching with RootParseContext(dialect=fresh_ansi_dialect) as ctx: # Basic version, we should find bar first with pytest.raises(SQLParseError) as sql_parse_error: BaseGrammar._bracket_sensitive_look_ahead_match( generate_test_segments([ "select", " ", "*", " ", "from", "(", "foo", "]", # Bracket types don't match (parens vs square) ]), [fs], ctx, ) assert sql_parse_error.match("Found unexpected end bracket")
def test__parser__grammar_anysetof(generate_test_segments): """Test the AnySetOf grammar.""" token_list = ["bar", " \t ", "foo", " \t ", "bar"] seg_list = generate_test_segments(token_list) bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) g = AnySetOf(fs, bs) with RootParseContext(dialect=None) as ctx: # Check directly assert g.match(seg_list, parse_context=ctx).matched_segments == ( KeywordSegment("bar", seg_list[0].pos_marker), WhitespaceSegment(" \t ", seg_list[1].pos_marker), KeywordSegment("foo", seg_list[2].pos_marker), ) # Check with a bit of whitespace assert not g.match(seg_list[1:], parse_context=ctx)
def test__parser__grammar_startswith_a(keyword, match_truthy, seg_list, fresh_ansi_dialect, caplog): """Test the StartsWith grammar simply.""" Keyword = KeywordSegment.make(keyword) grammar = StartsWith(Keyword) with RootParseContext(dialect=fresh_ansi_dialect) as ctx: with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"): m = grammar.match(seg_list, parse_context=ctx) assert bool(m) is match_truthy
def make_result_tuple(result_slice, matcher_keywords, seg_list): """Make a comparison tuple for test matching.""" # No result slice means no match. if not result_slice: return () return tuple( KeywordSegment(elem.raw, pos_marker=elem.pos_marker) if elem.raw in matcher_keywords else elem for elem in seg_list[result_slice])
def _eval(self, segment, parent_stack, raw_stack, **kwargs): """Implicit aliasing of table/column not allowed. Use explicit `AS` clause. We look for the alias segment, and then evaluate its parent and whether it contains an AS keyword. This is the _eval function for both L011 and L012. The use of `raw_stack` is just for working out how much whitespace to add. """ fixes = [] if segment.is_type("alias_expression"): if parent_stack[-1].is_type(*self._target_elems): if any(e.name.lower() == "as" for e in segment.segments): if self.aliasing == "implicit": if segment.segments[0].name.lower() == "as": # Remove the AS as we're using implict aliasing fixes.append(LintFix("delete", segment.segments[0])) anchor = raw_stack[-1] # Remove whitespace before (if exists) or after (if not) if (len(raw_stack) > 0 and raw_stack[-1].type == "whitespace"): fixes.append(LintFix("delete", raw_stack[-1])) elif (len(segment.segments) > 0 and segment.segments[1].type == "whitespace"): fixes.append( LintFix("delete", segment.segments[1])) return LintResult(anchor=anchor, fixes=fixes) else: insert_buff = [] # Add initial whitespace if we need to... if raw_stack[-1].name not in ["whitespace", "newline"]: insert_buff.append(WhitespaceSegment()) # Add an AS (Uppercase for now, but could be corrected later) insert_buff.append(KeywordSegment("AS")) # Add a trailing whitespace if we need to if segment.segments[0].name not in [ "whitespace", "newline" ]: insert_buff.append(WhitespaceSegment()) return LintResult( anchor=segment, fixes=[ LintFix("create", segment.segments[0], insert_buff) ], ) return None
def test__parser__grammar_sequence_nested(seg_list, caplog): """Test the Sequence grammar when nested.""" fs = KeywordSegment.make("foo") bs = KeywordSegment.make("bar") bas = KeywordSegment.make("baar") g = Sequence(Sequence(bs, fs), bas) with RootParseContext(dialect=None) as ctx: with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"): # Matching the start of the list shouldn't work logging.info("#### TEST 1") assert not g.match(seg_list[:2], parse_context=ctx) # Matching the whole list should, and the result should be flat logging.info("#### TEST 2") assert g.match(seg_list, parse_context=ctx).matched_segments == ( bs("bar", seg_list[0].pos_marker), seg_list[1], # This will be the whitespace segment fs("foo", seg_list[2].pos_marker), bas("baar", seg_list[3].pos_marker) # NB: No whitespace at the end, this shouldn't be consumed. )
def test__parser__grammar_greedyuntil(keyword, seg_list, enforce_ws, slice_len, fresh_ansi_dialect): """Test the GreedyUntil grammar.""" grammar = GreedyUntil( KeywordSegment.make(keyword), enforce_whitespace_preceeding_terminator=enforce_ws, ) with RootParseContext(dialect=fresh_ansi_dialect) as ctx: assert (grammar.match( seg_list, parse_context=ctx).matched_segments == seg_list[:slice_len])
def test__parser__multistringparser__match(generate_test_segments): """Test the MultiStringParser matchable.""" parser = MultiStringParser(["foo", "bar"], KeywordSegment) with RootParseContext(dialect=None) as ctx: # Check directly seg_list = generate_test_segments(["foo", "fo"]) # Matches when it should assert parser.match( seg_list[:1], parse_context=ctx).matched_segments == (KeywordSegment( "foo", seg_list[0].pos_marker), ) # Doesn't match when it shouldn't assert parser.match(seg_list[1:], parse_context=ctx).matched_segments == tuple()
def test__parser__grammar__base__longest_trimmed_match__adv(seg_list, caplog): """Test the _longest_trimmed_match method of the BaseGrammar.""" bs = KeywordSegment.make("bar") fs = KeywordSegment.make("foo") matchers = [ bs, fs, Sequence(bs, fs), # This should be the winner. OneOf(bs, fs), Sequence(bs, fs), # Another to check we return the first ] fs = KeywordSegment.make("foo") bs = KeywordSegment.make("bar") with RootParseContext(dialect=None) as ctx: # Matching the first element of the list with caplog.at_level(logging.DEBUG, logger="sqluff.parser"): match, matcher = BaseGrammar._longest_trimmed_match(seg_list, matchers, ctx) # Check we got a match assert match # Check we got the right one. assert matcher is matchers[2] # And it matched the first three segments assert len(match) == 3
def test__parser__grammar_delimited( min_delimiters, allow_gaps, allow_trailing, token_list, match_len, caplog, generate_test_segments, fresh_ansi_dialect, ): """Test the Delimited grammar when not code_only.""" seg_list = generate_test_segments(token_list) g = Delimited( KeywordSegment.make("bar"), delimiter=KeywordSegment.make(".", name="dot"), allow_gaps=allow_gaps, allow_trailing=allow_trailing, min_delimiters=min_delimiters, ) with RootParseContext(dialect=fresh_ansi_dialect) as ctx: with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"): # Matching with whitespace shouldn't match if we need at least one delimiter m = g.match(seg_list, parse_context=ctx) assert len(m) == match_len
def test__parser__grammar_sequence(seg_list, caplog): """Test the Sequence grammar.""" fs = KeywordSegment.make("foo") bs = KeywordSegment.make("bar") g = Sequence(bs, fs) gc = Sequence(bs, fs, allow_gaps=False) with RootParseContext(dialect=None) as ctx: with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"): # Should be able to match the list using the normal matcher logging.info("#### TEST 1") m = g.match(seg_list, parse_context=ctx) assert m assert len(m) == 3 assert m.matched_segments == ( bs("bar", seg_list[0].pos_marker), seg_list[1], # This will be the whitespace segment fs("foo", seg_list[2].pos_marker), ) # Shouldn't with the allow_gaps matcher logging.info("#### TEST 2") assert not gc.match(seg_list, parse_context=ctx) # Shouldn't match even on the normal one if we don't start at the beginning logging.info("#### TEST 2") assert not g.match(seg_list[1:], parse_context=ctx)
def test__parser__grammar__base__look_ahead_match( seg_list_slice, matcher_keywords, result_slice, winning_matcher, pre_match_slice, seg_list, ): """Test the _look_ahead_match method of the BaseGrammar.""" # Make the matcher keywords matchers = [KeywordSegment.make(keyword) for keyword in matcher_keywords] # Fetch the matching keyword from above by index winning_matcher = matchers[matcher_keywords.index(winning_matcher)] with RootParseContext(dialect=None) as ctx: m = BaseGrammar._look_ahead_match( seg_list[seg_list_slice], matchers, ctx, ) # Check structure of the response. assert isinstance(m, tuple) assert len(m) == 3 # Unpack result_pre_match, result_match, result_matcher = m # Check the right matcher won assert result_matcher == winning_matcher # Make check tuple for the pre-match section if pre_match_slice: pre_match_slice = seg_list[pre_match_slice] else: pre_match_slice = () assert result_pre_match == pre_match_slice # Make the check tuple expected_result = make_result_tuple( result_slice=result_slice, matcher_keywords=matcher_keywords, seg_list=seg_list, ) assert result_match.matched_segments == expected_result
def test__parser__grammar__base__longest_trimmed_match__basic( seg_list, seg_list_slice, matcher_keywords, trim_noncode, result_slice ): """Test the _longest_trimmed_match method of the BaseGrammar.""" # Make the matcher keywords matchers = [KeywordSegment.make(keyword) for keyword in matcher_keywords] with RootParseContext(dialect=None) as ctx: m, _ = BaseGrammar._longest_trimmed_match( seg_list[seg_list_slice], matchers, ctx, trim_noncode=trim_noncode ) # Make the check tuple expected_result = make_result_tuple( result_slice=result_slice, matcher_keywords=matcher_keywords, seg_list=seg_list, ) assert m.matched_segments == expected_result
def expand(self) -> "Dialect": """Expand any callable references to concrete ones. This must be called before using the dialect. But allows more flexible definitions to happen at runtime. NOTE: This method returns a copy of the current dialect so that we don't pollute the original dialect and get dependency issues. Returns: :obj:`Dialect`: a copy of the given dialect but with expanded references. """ # Are we already expanded? if self.expanded: raise ValueError( "Attempted to re-expand an already expanded dialect.") expanded_copy = self.copy_as(name=self.name) # Expand any callable elements of the dialect. for key in expanded_copy._library: if isinstance(expanded_copy._library[key], SegmentGenerator): # If the element is callable, call it passing the current # dialect and store the result in its place. # Use the .replace() method for its error handling. expanded_copy.replace( **{key: expanded_copy._library[key].expand(expanded_copy)}) # Expand any keyword sets. for keyword_set in [ "unreserved_keywords", "reserved_keywords", ]: # e.g. reserved_keywords, (JOIN, ...) # Make sure the values are available as KeywordSegments for kw in expanded_copy.sets(keyword_set): n = kw.capitalize() + "KeywordSegment" if n not in expanded_copy._library: expanded_copy._library[n] = KeywordSegment.make(kw.lower()) expanded_copy.expanded = True return expanded_copy