def _parse_tokens( tokens: Sequence[BaseSegment], config: FluffConfig, recurse: bool = True ) -> Tuple[Optional[BaseSegment], List[SQLParseError]]: parser = Parser(config=config) violations = [] # Parse the file and log any problems try: parsed: Optional[BaseSegment] = parser.parse(tokens, recurse=recurse) except SQLParseError as err: linter_logger.info("PARSING FAILED! : %s", err) violations.append(err) return None, violations if parsed: linter_logger.info("\n###\n#\n# {}\n#\n###".format("Parsed Tree:")) linter_logger.info("\n" + parsed.stringify()) # We may succeed parsing, but still have unparsable segments. Extract them here. for unparsable in parsed.iter_unparsables(): # No exception has been raised explicitly, but we still create one here # so that we can use the common interface violations.append( SQLParseError( "Line {0[0]}, Position {0[1]}: Found unparsable section: {1!r}".format( unparsable.pos_marker.working_loc, unparsable.raw if len(unparsable.raw) < 40 else unparsable.raw[:40] + "...", ), segment=unparsable, ) ) linter_logger.info("Found unparsable segment...") linter_logger.info(unparsable.stringify()) return parsed, violations
def parse_noqa(comment: str, line_no: int): """Extract ignore mask entries from a comment string.""" # Also trim any whitespace afterward if comment.startswith("noqa"): # This is an ignore identifier comment_remainder = comment[4:] if comment_remainder: if not comment_remainder.startswith(":"): return SQLParseError( "Malformed 'noqa' section. Expected 'noqa: <rule>[,...]", line_no=line_no, ) comment_remainder = comment_remainder[1:].strip() if comment_remainder: action: Optional[str] if "=" in comment_remainder: action, rule_part = comment_remainder.split("=", 1) if action not in {"disable", "enable"}: # pragma: no cover return SQLParseError( "Malformed 'noqa' section. " "Expected 'noqa: enable=<rule>[,...] | all' " "or 'noqa: disable=<rule>[,...] | all", line_no=line_no, ) else: action = None rule_part = comment_remainder if rule_part in {"disable", "enable"}: return SQLParseError( "Malformed 'noqa' section. " "Expected 'noqa: enable=<rule>[,...] | all' " "or 'noqa: disable=<rule>[,...] | all", line_no=line_no, ) rules: Optional[Tuple[str, ...]] if rule_part != "all": rules = tuple(r.strip() for r in rule_part.split(",")) else: rules = None return NoQaDirective(line_no, rules, action) return NoQaDirective(line_no, None, None) return None
def generate_parse_fixture(example): """Parse example SQL file, write parse tree to YAML file.""" dialect, sqlfile = example tree = parse_example_file(dialect, sqlfile) _hash = compute_parse_tree_hash(tree) # Remove the .sql file extension root = sqlfile[:-4] path = os.path.join("test", "fixtures", "dialects", dialect, root + ".yml") with open(path, "w", newline="\n") as f: r = None if tree: # Check we don't have any base types or unparsable sections types = tree.type_set() if "base" in types: raise SQLParseError( f"Unnamed base section when parsing: {f.name}") if "unparsable" in types: raise SQLParseError(f"Could not parse: {f.name}") r = dict( [("_hash", _hash)] + list(tree.as_record(code_only=True, show_raw=True).items())) print( "# YML test files are auto-generated from SQL files and should not be " "edited by", '# hand. To help enforce this, the "hash" field in the file must match ' "a hash", "# computed by SQLFluff when running the tests. Please run", "# `python test/generate_parse_fixture_yml.py` to generate them after " "adding or", "# altering SQL files.", file=f, sep="\n", ) yaml.dump(r, f, default_flow_style=False, sort_keys=False) else: f.write("")
def generate_one_parse_fixture(example: _ParseExample) -> None: """Parse example SQL file, write parse tree to YAML file.""" dialect, sqlfile = example tree = parse_example_file(dialect, sqlfile) _hash = compute_parse_tree_hash(tree) # Remove the .sql file extension path = _create_yaml_path(example) with open(path, "w", newline="\n") as f: r: Optional[Dict[str, Optional[str]]] = None if not tree: f.write("") return # Check we don't have any base types or unparsable sections types = tree.type_set() if "base" in types: raise SQLParseError(f"Unnamed base section when parsing: {f.name}") if "unparsable" in types: raise SQLParseError(f"Could not parse: {f.name}") records = tree.as_record(code_only=True, show_raw=True) assert records, "TypeGuard" r = dict([("_hash", _hash), *list(records.items())]) print( "# YML test files are auto-generated from SQL files and should not be " "edited by", '# hand. To help enforce this, the "hash" field in the file must match ' "a hash", "# computed by SQLFluff when running the tests. Please run", "# `python test/generate_parse_fixture_yml.py` to generate them after " "adding or", "# altering SQL files.", file=f, sep="\n", ) yaml.dump(r, f, default_flow_style=False, sort_keys=False) return
def check_still_complete( segments_in: Tuple["BaseSegment", ...], matched_segments: Tuple["BaseSegment", ...], unmatched_segments: Tuple["BaseSegment", ...], ) -> bool: """Check that the segments in are the same as the segments out.""" initial_str = join_segments_raw(segments_in) current_str = join_segments_raw(matched_segments + unmatched_segments) if initial_str != current_str: # pragma: no cover raise SQLParseError( f"Could not parse: {current_str}", segment=unmatched_segments[0], ) return True
def _bracket_sensitive_look_ahead_match( cls, segments, matchers, parse_context, start_bracket=None, end_bracket=None, bracket_pairs_set="bracket_pairs", ): """Same as `_look_ahead_match` but with bracket counting. NB: Given we depend on `_look_ahead_match` we can also utilise the same performance optimisations which are implemented there. bracket_pairs_set: Allows specific segments to override the available bracket pairs. See the definition of "angle_bracket_pairs" in the BigQuery dialect for additional context on why this exists. Returns: `tuple` of (unmatched_segments, match_object, matcher). """ # Type munging matchers = list(matchers) if isinstance(segments, BaseSegment): segments = [segments] # Have we been passed an empty list? if len(segments) == 0: return ((), MatchResult.from_unmatched(segments), None) # Get hold of the bracket matchers from the dialect, and append them # to the list of matchers. We get them from the relevant set on the # dialect. We use zip twice to "unzip" them. We ignore the first # argument because that's just the name. _, start_bracket_refs, end_bracket_refs = zip( *parse_context.dialect.sets(bracket_pairs_set) ) # These are matchables, probably StringParsers. start_brackets = [ parse_context.dialect.ref(seg_ref) for seg_ref in start_bracket_refs ] end_brackets = [ parse_context.dialect.ref(seg_ref) for seg_ref in end_bracket_refs ] # Add any bracket-like things passed as arguments if start_bracket: start_brackets += [start_bracket] if end_bracket: end_brackets += [end_bracket] bracket_matchers = start_brackets + end_brackets # Make some buffers seg_buff = segments pre_seg_buff = () # NB: Tuple bracket_stack: List[BracketInfo] = [] # Iterate while True: # Do we have anything left to match on? if seg_buff: # Yes we have buffer left to work with. # Are we already in a bracket stack? if bracket_stack: # Yes, we're just looking for the closing bracket, or # another opening bracket. pre, match, matcher = cls._look_ahead_match( seg_buff, bracket_matchers, parse_context=parse_context, ) if match: # NB: We can only consider this as a nested bracket if the start # and end tokens are not the same. If a matcher is both a start and # end token we cannot deepen the bracket stack. In general, quoted # strings are a typical example where the start and end tokens are # the same. Currently, though, quoted strings are handled elsewhere # in the parser, and there are no cases where *this* code has to # handle identical start and end brackets. For now, consider this # a small, speculative investment in a possible future requirement. if matcher in start_brackets and matcher not in end_brackets: # Same procedure as below in finding brackets. bracket_stack.append( BracketInfo( bracket=match.matched_segments[0], ) ) pre_seg_buff += pre pre_seg_buff += match.matched_segments seg_buff = match.unmatched_segments continue elif matcher in end_brackets: # Found an end bracket. Does its type match that of # the innermost start bracket? E.g. ")" matches "(", # "]" matches "[". # For the start bracket we don't have the matcher # but we can work out the name, so we use that for # the lookup. start_index = [ bracket.name for bracket in start_brackets ].index(bracket_stack[-1].bracket.name) # For the end index, we can just look for the matcher end_index = end_brackets.index(matcher) bracket_types_match = start_index == end_index if bracket_types_match: # Yes, the types match. So we've found a # matching end bracket. Pop the stack and carry # on. bracket_stack.pop() pre_seg_buff += pre pre_seg_buff += match.matched_segments seg_buff = match.unmatched_segments continue else: # The types don't match. Error. raise SQLParseError( f"Found unexpected end bracket!, was expecting {end_brackets[start_index]}, but got {matcher}", segment=match.matched_segments[0], ) else: raise RuntimeError("I don't know how we get here?!") else: # No match, we're in a bracket stack. Error. raise SQLParseError( "Couldn't find closing bracket for opening bracket.", segment=bracket_stack[-1].bracket, ) else: # No, we're open to more opening brackets or the thing(s) # that we're otherwise looking for. pre, match, matcher = cls._look_ahead_match( seg_buff, matchers + bracket_matchers, parse_context=parse_context, ) if match: if matcher in matchers: # It's one of the things we were looking for! # Return. return (pre_seg_buff + pre, match, matcher) elif matcher in start_brackets: # We've found the start of a bracket segment. # NB: It might not *actually* be the bracket itself, # but could be some non-code element preceding it. # That's actually ok. # Add the bracket to the stack. bracket_stack.append( BracketInfo( bracket=match.matched_segments[0], ) ) # Add the matched elements and anything before it to the # pre segment buffer. Reset the working buffer. pre_seg_buff += pre pre_seg_buff += match.matched_segments seg_buff = match.unmatched_segments continue elif matcher in end_brackets: # We've found an unexpected end bracket! This is likely # because we're matching a section which should have ended. # If we had a match, it would have matched by now, so this # means no match. parse_match_logging( cls.__name__, "_bracket_sensitive_look_ahead_match", "UEXB", parse_context=parse_context, v_level=3, got=matcher, ) return ((), MatchResult.from_unmatched(segments), None) else: # This shouldn't happen!? raise NotImplementedError( "This shouldn't happen. Panic in _bracket_sensitive_look_ahead_match." ) else: # Not in a bracket stack, but no match. This is a happy # unmatched exit. return ((), MatchResult.from_unmatched(segments), None) else: # No we're at the end: # Now check have we closed all our brackets? if bracket_stack: # No we haven't. raise SQLParseError( f"Couldn't find closing bracket for opened brackets: `{bracket_stack}`.", segment=bracket_stack[-1].bracket, ) # We reached the end with no open brackets. This is a friendly # unmatched return. return ((), MatchResult.from_unmatched(segments), None)
def _bracket_sensitive_look_ahead_match(cls, segments, matchers, parse_context, start_bracket=None, end_bracket=None): """Same as `_look_ahead_match` but with bracket counting. NB: Given we depend on `_look_ahead_match` we can also utilise the same performance optimisations which are implemented there. Returns: `tuple` of (unmatched_segments, match_object, matcher). """ # Type munging matchers = list(matchers) if isinstance(segments, BaseSegment): segments = [segments] # Have we been passed an empty list? if len(segments) == 0: return ((), MatchResult.from_unmatched(segments), None) # Get hold of the bracket matchers from the dialect, and append them # to the list of matchers. We get them from the relevant set on the # dialect. We use zip twice to "unzip" them. We ignore the first # argument because that's just the name. _, start_bracket_refs, end_bracket_refs, definitely_bracket = zip( *parse_context.dialect.sets("bracket_pairs")) # These are currently strings which need rehydrating start_brackets = [ parse_context.dialect.ref(seg_ref) for seg_ref in start_bracket_refs ] end_brackets = [ parse_context.dialect.ref(seg_ref) for seg_ref in end_bracket_refs ] start_definite = list(definitely_bracket) end_definite = list(definitely_bracket) # Add any bracket-like things passed as arguments if start_bracket: start_brackets += [start_bracket] start_definite += [True] if end_bracket: end_brackets += [end_bracket] end_definite += [True] bracket_matchers = start_brackets + end_brackets # Make some buffers seg_buff = segments pre_seg_buff = () # NB: Tuple bracket_stack: List[BracketInfo] = [] # Iterate while True: # Do we have anything left to match on? if seg_buff: # Yes we have buffer left to work with. # Are we already in a bracket stack? if bracket_stack: # Yes, we're just looking for the closing bracket, or # another opening bracket. pre, match, matcher = cls._look_ahead_match( seg_buff, bracket_matchers, parse_context=parse_context, ) if match: # NB: We can only consider this as a nested bracket if the start # and end tokens are not the same. If a matcher is both a start and # end token we cannot deepen the bracket stack. In general, quoted # strings are a typical example where the start and end tokens are # the same. Currently, though, quoted strings are handled elsewhere # in the parser, and there are no cases where *this* code has to # handle identical start and end brackets. For now, consider this # a small, speculative investment in a possible future requirement. if matcher in start_brackets and matcher not in end_brackets: # Same procedure as below in finding brackets. bracket_stack.append( BracketInfo( bracket=match.matched_segments[0], is_definite=start_definite[ start_brackets.index(matcher)], )) pre_seg_buff += pre pre_seg_buff += match.matched_segments seg_buff = match.unmatched_segments continue elif matcher in end_brackets: # Found an end bracket. Does its type match that of # the innermost start bracket (e.g. ")" matches "(", # "]" matches "[". start_index = start_brackets.index( type(bracket_stack[-1].bracket)) end_index = end_brackets.index(matcher) bracket_types_match = start_index == end_index if bracket_types_match: # Yes, the types match. So we've found a # matching end bracket. Pop the stack and carry # on. bracket_stack.pop() pre_seg_buff += pre pre_seg_buff += match.matched_segments seg_buff = match.unmatched_segments continue else: # The types don't match. Check whether the end # bracket is a definite bracket. end_is_definite = end_definite[end_index] if not end_is_definite: # The end bracket whose type didn't match # the innermost open bracket is not # definite. Assume it's not a bracket and # carry on. pre_seg_buff += pre pre_seg_buff += match.matched_segments seg_buff = match.unmatched_segments else: # Definite end bracket does not match the # innermost start bracket. Was the innermost # start bracket definite? If yes, error. If # no, assume it was not a bracket. # Can we remove any brackets from the stack which aren't definites # to resolve the issue? for idx in range( len(bracket_stack) - 1, -1, -1): if not bracket_stack[idx].is_definite: del bracket_stack[idx] # We don't change the string buffer, we assume that was ok. break else: raise SQLParseError( f"Found unexpected end bracket!, was expecting {end_brackets[start_index]}, but got {matcher}", segment=match.matched_segments[0], ) else: raise RuntimeError( "I don't know how we get here?!") else: # No match, we're in a bracket stack. Either this is an error, # OR we were mistaken in our initial identification of the opening # bracket. That's only allowed if `not definitely_bracket`. # Can we remove any brackets from the stack which aren't definites # to resolve the issue? for idx, elem in enumerate(reversed(bracket_stack)): if not elem.is_definite: del bracket_stack[-idx] # We don't change the string buffer, we assume that was ok. break else: # No we can't. We don't have a match and we're in a bracket stack. raise SQLParseError( "Couldn't find closing bracket for opening bracket.", segment=bracket_stack[-1].bracket, ) # We have attempted a potential solution to the problem. Loop around. continue else: # No, we're open to more opening brackets or the thing(s) # that we're otherwise looking for. pre, match, matcher = cls._look_ahead_match( seg_buff, matchers + bracket_matchers, parse_context=parse_context, ) if match: if matcher in matchers: # It's one of the things we were looking for! # Return. return (pre_seg_buff + pre, match, matcher) elif matcher in start_brackets: # We've found the start of a bracket segment. # NB: It might not *actually* be the bracket itself, # but could be some non-code element preceding it. # That's actually ok. # Add the bracket to the stack. bracket_stack.append( BracketInfo( bracket=match.matched_segments[0], is_definite=start_definite[ start_brackets.index(matcher)], )) # Add the matched elements and anything before it to the # pre segment buffer. Reset the working buffer. pre_seg_buff += pre pre_seg_buff += match.matched_segments seg_buff = match.unmatched_segments continue elif matcher in end_brackets: # each bracket with its "definite" attribute bracket_is_definite = end_definite[ end_brackets.index(matcher)] if bracket_is_definite: # We've found an unexpected end bracket! raise SQLParseError( f"Found unexpected end bracket!, was expecting one of: {matchers + bracket_matchers}, but got {matcher}", segment=match.matched_segments[0], ) pre_seg_buff += pre pre_seg_buff += match.matched_segments seg_buff = match.unmatched_segments continue else: # This shouldn't happen!? raise NotImplementedError( "This shouldn't happen. Panic in _bracket_sensitive_look_ahead_match." ) else: # Not in a bracket stack, but no match. This is a happy # unmatched exit. return ((), MatchResult.from_unmatched(segments), None) else: # No we're at the end: # Now check have we closed all our brackets? if bracket_stack: # No we haven't. # Check that the unclosed brackets are definite definite_bracket_stack = [ b for b in bracket_stack if b.is_definite ] if definite_bracket_stack: raise SQLParseError( f"Couldn't find closing bracket for opened brackets: `{bracket_stack}`.", segment=bracket_stack[-1].bracket, ) # We at the end but without a bracket left open. This is a # friendly unmatched return. return ((), MatchResult.from_unmatched(segments), None)
def parse_noqa( comment: str, line_no: int, rule_codes: List[str], ): """Extract ignore mask entries from a comment string.""" # Also trim any whitespace afterward # Comment lines can also have noqa e.g. # --dafhsdkfwdiruweksdkjdaffldfsdlfjksd -- noqa: L016 # Therefore extract last possible inline ignore. comment = [c.strip() for c in comment.split("--")][-1] if comment.startswith("noqa"): # This is an ignore identifier comment_remainder = comment[4:] if comment_remainder: if not comment_remainder.startswith(":"): return SQLParseError( "Malformed 'noqa' section. Expected 'noqa: <rule>[,...]", line_no=line_no, ) comment_remainder = comment_remainder[1:].strip() if comment_remainder: action: Optional[str] if "=" in comment_remainder: action, rule_part = comment_remainder.split("=", 1) if action not in {"disable", "enable"}: # pragma: no cover return SQLParseError( "Malformed 'noqa' section. " "Expected 'noqa: enable=<rule>[,...] | all' " "or 'noqa: disable=<rule>[,...] | all", line_no=line_no, ) else: action = None rule_part = comment_remainder if rule_part in {"disable", "enable"}: return SQLParseError( "Malformed 'noqa' section. " "Expected 'noqa: enable=<rule>[,...] | all' " "or 'noqa: disable=<rule>[,...] | all", line_no=line_no, ) rules: Optional[Tuple[str, ...]] if rule_part != "all": # Rules can be globs therefore we compare to the rule_set to # expand the globs. unexpanded_rules = tuple(r.strip() for r in rule_part.split(",")) expanded_rules = [] for r in unexpanded_rules: expanded_rule = [ x for x in fnmatch.filter(rule_codes, r) if x not in expanded_rules ] if expanded_rule: expanded_rules.extend(expanded_rule) elif r not in expanded_rules: # We were unable to expand the glob. # Therefore assume the user is referencing # a special error type (e.g. PRS, LXR, or TMP) # and add this to the list of rules to ignore. expanded_rules.append(r) rules = tuple(expanded_rules) else: rules = None return NoQaDirective(line_no, rules, action) return NoQaDirective(line_no, None, None) return None
def match(self, segments: Tuple["BaseSegment", ...], parse_context: ParseContext) -> MatchResult: """Match if this is a bracketed sequence, with content that matches one of the elements. 1. work forwards to find the first bracket. If we find something other that whitespace, then fail out. 2. Once we have the first bracket, we need to bracket count forward to find its partner. 3. Assuming we find its partner then we try and match what goes between them using the match method of Sequence. If we match, great. If not, then we return an empty match. If we never find its partner then we return an empty match but should probably log a parsing warning, or error? """ # Trim ends if allowed. if self.allow_gaps: pre_nc, seg_buff, post_nc = trim_non_code_segments(segments) else: seg_buff = segments # Rehydrate the bracket segments in question. start_bracket, end_bracket = self.get_bracket_from_dialect( parse_context) # Allow optional override for special bracket-like things start_bracket = self.start_bracket or start_bracket end_bracket = self.end_bracket or end_bracket # Look for the first bracket with parse_context.deeper_match() as ctx: start_match = start_bracket.match(seg_buff, parse_context=ctx) if start_match: seg_buff = start_match.unmatched_segments else: # Can't find the opening bracket. No Match. return MatchResult.from_unmatched(segments) # Look for the closing bracket content_segs, end_match, _ = self._bracket_sensitive_look_ahead_match( segments=seg_buff, matchers=[end_bracket], parse_context=parse_context, start_bracket=start_bracket, end_bracket=end_bracket, bracket_pairs_set=self.bracket_pairs_set, ) if not end_match: raise SQLParseError( "Couldn't find closing bracket for opening bracket.", segment=start_match.matched_segments[0], ) # Match the content now we've confirmed the brackets. # First deal with the case of TOTALLY EMPTY BRACKETS e.g. "()" if not content_segs: # If it's allowed, return a match. if not self._elements or all(e.is_optional() for e in self._elements): return MatchResult( start_match.matched_segments + end_match.matched_segments, end_match.unmatched_segments, ) # If not, don't. else: return MatchResult.from_unmatched(segments) # Then trim whitespace and deal with the case of no code content e.g. "( )" if self.allow_gaps: pre_nc, content_segs, post_nc = trim_non_code_segments( content_segs) else: pre_nc = () post_nc = () # If we don't have anything left after trimming, act accordingly. if not content_segs: if not self._elements or (all(e.is_optional() for e in self._elements) and self.allow_gaps): return MatchResult( start_match.matched_segments + pre_nc + post_nc + end_match.matched_segments, end_match.unmatched_segments, ) else: return MatchResult.from_unmatched(segments) # Match using super. Sequence will interpret the content of the elements. with parse_context.deeper_match() as ctx: content_match = super().match(content_segs, parse_context=ctx) # We require a complete match for the content (hopefully for obvious reasons) if content_match.is_complete(): # Append some indent and dedent tokens at the start and the end. return MatchResult( # We need to realign the meta segments so the pos markers are correct. BaseSegment._position_segments( ( # NB: The nc segments go *outside* the indents. start_match.matched_segments + (Indent(), ) # Add a meta indent here + pre_nc + content_match.matched_segments + post_nc + (Dedent(), ) # Add a meta indent here + end_match.matched_segments), ), end_match.unmatched_segments, ) # No complete match. Fail. else: return MatchResult.from_unmatched(segments)
def match(self, segments: Tuple["BaseSegment", ...], parse_context: ParseContext) -> MatchResult: """Match if a bracketed sequence, with content that matches one of the elements. 1. work forwards to find the first bracket. If we find something other that whitespace, then fail out. 2. Once we have the first bracket, we need to bracket count forward to find its partner. 3. Assuming we find its partner then we try and match what goes between them using the match method of Sequence. If we match, great. If not, then we return an empty match. If we never find its partner then we return an empty match but should probably log a parsing warning, or error? """ # Trim ends if allowed. if self.allow_gaps: pre_nc, seg_buff, post_nc = trim_non_code_segments(segments) else: seg_buff = segments # pragma: no cover TODO? # Rehydrate the bracket segments in question. # bracket_persits controls whether we make a BracketedSegment or not. start_bracket, end_bracket, bracket_persists = self.get_bracket_from_dialect( parse_context) # Allow optional override for special bracket-like things start_bracket = self.start_bracket or start_bracket end_bracket = self.end_bracket or end_bracket # Are we dealing with a pre-existing BracketSegment? if seg_buff[0].is_type("bracketed"): seg: BracketedSegment = cast(BracketedSegment, seg_buff[0]) content_segs = seg.segments[len(seg.start_bracket ):-len(seg.end_bracket)] bracket_segment = seg trailing_segments = seg_buff[1:] # Otherwise try and match the segments directly. else: # Look for the first bracket with parse_context.deeper_match() as ctx: start_match = start_bracket.match(seg_buff, parse_context=ctx) if start_match: seg_buff = start_match.unmatched_segments else: # Can't find the opening bracket. No Match. return MatchResult.from_unmatched(segments) # Look for the closing bracket content_segs, end_match, _ = self._bracket_sensitive_look_ahead_match( segments=seg_buff, matchers=[end_bracket], parse_context=parse_context, start_bracket=start_bracket, end_bracket=end_bracket, bracket_pairs_set=self.bracket_pairs_set, ) if not end_match: # pragma: no cover raise SQLParseError( "Couldn't find closing bracket for opening bracket.", segment=start_match.matched_segments[0], ) # Construct a bracket segment bracket_segment = BracketedSegment( segments=(start_match.matched_segments + content_segs + end_match.matched_segments), start_bracket=start_match.matched_segments, end_bracket=end_match.matched_segments, ) trailing_segments = end_match.unmatched_segments # Then trim whitespace and deal with the case of non-code content e.g. "( )" if self.allow_gaps: pre_segs, content_segs, post_segs = trim_non_code_segments( content_segs) else: # pragma: no cover TODO? pre_segs = () post_segs = () # If we've got a case of empty brackets check whether that is allowed. if not content_segs: if not self._elements or (all(e.is_optional() for e in self._elements) and (self.allow_gaps or (not pre_segs and not post_segs))): return MatchResult( (bracket_segment, ) if bracket_persists else bracket_segment.segments, trailing_segments, ) else: return MatchResult.from_unmatched(segments) # Match the content using super. Sequence will interpret the content of the # elements. with parse_context.deeper_match() as ctx: content_match = super().match(content_segs, parse_context=ctx) # We require a complete match for the content (hopefully for obvious reasons) if content_match.is_complete(): # Reconstruct the bracket segment post match. # We need to realign the meta segments so the pos markers are correct. # Have we already got indents? meta_idx = None for idx, seg in enumerate(bracket_segment.segments): if (seg.is_meta and cast(MetaSegment, seg).indent_val > 0 and not cast(MetaSegment, seg).is_template): meta_idx = idx break # If we've already got indents, don't add more. if meta_idx: bracket_segment.segments = BaseSegment._position_segments( bracket_segment.start_bracket + pre_segs + content_match.all_segments() + post_segs + bracket_segment.end_bracket) # Append some indent and dedent tokens at the start and the end. else: bracket_segment.segments = BaseSegment._position_segments( # NB: The nc segments go *outside* the indents. bracket_segment.start_bracket + (Indent(), ) # Add a meta indent here + pre_segs + content_match.all_segments() + post_segs + (Dedent(), ) # Add a meta indent here + bracket_segment.end_bracket) return MatchResult( (bracket_segment, ) if bracket_persists else bracket_segment.segments, trailing_segments, ) # No complete match. Fail. else: return MatchResult.from_unmatched(segments)
def _bracket_sensitive_look_ahead_match( cls, segments: Tuple[BaseSegment, ...], matchers: List[MatchableType], parse_context: ParseContext, start_bracket: Optional[Matchable] = None, end_bracket: Optional[Matchable] = None, bracket_pairs_set: str = "bracket_pairs", ) -> Tuple[Tuple[BaseSegment, ...], MatchResult, Optional[MatchableType]]: """Same as `_look_ahead_match` but with bracket counting. NB: Given we depend on `_look_ahead_match` we can also utilise the same performance optimisations which are implemented there. bracket_pairs_set: Allows specific segments to override the available bracket pairs. See the definition of "angle_bracket_pairs" in the BigQuery dialect for additional context on why this exists. Returns: `tuple` of (unmatched_segments, match_object, matcher). """ # Have we been passed an empty tuple? if not segments: return ((), MatchResult.from_unmatched(segments), None) # Get hold of the bracket matchers from the dialect, and append them # to the list of matchers. We get them from the relevant set on the # dialect. We use zip twice to "unzip" them. We ignore the first # argument because that's just the name. _, start_bracket_refs, end_bracket_refs, persists = zip( *parse_context.dialect.sets(bracket_pairs_set)) # These are matchables, probably StringParsers. start_brackets = [ parse_context.dialect.ref(seg_ref) for seg_ref in start_bracket_refs ] end_brackets = [ parse_context.dialect.ref(seg_ref) for seg_ref in end_bracket_refs ] # Add any bracket-like things passed as arguments if start_bracket: start_brackets += [start_bracket] if end_bracket: end_brackets += [end_bracket] bracket_matchers = start_brackets + end_brackets # Make some buffers seg_buff: Tuple[BaseSegment, ...] = segments pre_seg_buff: Tuple[BaseSegment, ...] = () bracket_stack: List[BracketInfo] = [] # Iterate while True: # Do we have anything left to match on? if seg_buff: # Yes we have buffer left to work with. # Are we already in a bracket stack? if bracket_stack: # Yes, we're just looking for the closing bracket, or # another opening bracket. pre, match, matcher = cls._look_ahead_match( seg_buff, bracket_matchers, parse_context=parse_context, ) if match: # NB: We can only consider this as a nested bracket if the start # and end tokens are not the same. If a matcher is both a start # and end token we cannot deepen the bracket stack. In general, # quoted strings are a typical example where the start and end # tokens are the same. Currently, though, quoted strings are # handled elsewhere in the parser, and there are no cases where # *this* code has to handle identical start and end brackets. # For now, consider this a small, speculative investment in a # possible future requirement. if matcher in start_brackets and matcher not in end_brackets: # Add any segments leading up to this to the previous # bracket. bracket_stack[-1].segments += pre # Add a bracket to the stack and add the matches from the # segment. bracket_stack.append( BracketInfo( bracket=match.matched_segments[0], segments=match.matched_segments, )) seg_buff = match.unmatched_segments continue elif matcher in end_brackets: # Found an end bracket. Does its type match that of # the innermost start bracket? E.g. ")" matches "(", # "]" matches "[". # For the start bracket we don't have the matcher # but we can work out the type, so we use that for # the lookup. start_index = [ bracket.type for bracket in start_brackets ].index(bracket_stack[-1].bracket.get_type()) # For the end index, we can just look for the matcher end_index = end_brackets.index(matcher) bracket_types_match = start_index == end_index if bracket_types_match: # Yes, the types match. So we've found a # matching end bracket. Pop the stack, construct # a bracketed segment and carry # on. # Complete the bracketed info bracket_stack[-1].segments += ( pre + match.matched_segments) # Construct a bracketed segment (as a tuple) if allowed. persist_bracket = persists[end_brackets.index( matcher)] if persist_bracket: new_segments: Tuple[BaseSegment, ...] = ( bracket_stack[-1].to_segment( end_bracket=match.matched_segments ), ) else: new_segments = bracket_stack[-1].segments # Remove the bracket set from the stack bracket_stack.pop() # If we're still in a bracket, add the new segments to # that bracket, otherwise add them to the buffer if bracket_stack: bracket_stack[-1].segments += new_segments else: pre_seg_buff += new_segments seg_buff = match.unmatched_segments continue else: # The types don't match. Error. raise SQLParseError( f"Found unexpected end bracket!, " f"was expecting " f"{end_brackets[start_index]}, " f"but got {matcher}", segment=match.matched_segments[0], ) else: # pragma: no cover raise RuntimeError( "I don't know how we get here?!") else: # pragma: no cover # No match, we're in a bracket stack. Error. raise SQLParseError( "Couldn't find closing bracket for opening bracket.", segment=bracket_stack[-1].bracket, ) else: # No, we're open to more opening brackets or the thing(s) # that we're otherwise looking for. pre, match, matcher = cls._look_ahead_match( seg_buff, matchers + bracket_matchers, parse_context=parse_context, ) if match: if matcher in matchers: # It's one of the things we were looking for! # Return. return (pre_seg_buff + pre, match, matcher) elif matcher in start_brackets: # We've found the start of a bracket segment. # NB: It might not *actually* be the bracket itself, # but could be some non-code element preceding it. # That's actually ok. # Add the bracket to the stack. bracket_stack.append( BracketInfo( bracket=match.matched_segments[0], segments=match.matched_segments, )) # The matched element has already been added to the bracket. # Add anything before it to the pre segment buffer. # Reset the working buffer. pre_seg_buff += pre seg_buff = match.unmatched_segments continue elif matcher in end_brackets: # We've found an unexpected end bracket! This is likely # because we're matching a section which should have ended. # If we had a match, it would have matched by now, so this # means no match. parse_match_logging( cls.__name__, "_bracket_sensitive_look_ahead_match", "UEXB", parse_context=parse_context, v_level=3, got=matcher, ) # From here we'll drop out to the happy unmatched exit. else: # pragma: no cover # This shouldn't happen!? raise NotImplementedError( "This shouldn't happen. Panic in " "_bracket_sensitive_look_ahead_match.") # Not in a bracket stack, but no match. # From here we'll drop out to the happy unmatched exit. else: # No we're at the end: # Now check have we closed all our brackets? if bracket_stack: # pragma: no cover # No we haven't. raise SQLParseError( "Couldn't find closing bracket for opened brackets: " f"`{bracket_stack}`.", segment=bracket_stack[-1].bracket, ) # This is the happy unmatched path. This occurs when: # - We reached the end with no open brackets. # - No match while outside a bracket stack. # - We found an unexpected end bracket before matching something # interesting. We return with the mutated segments so we can reuse any # bracket matching. return ((), MatchResult.from_unmatched(pre_seg_buff + seg_buff), None)