def generate_test_segments_func(elems): """Roughly generate test segments. This function isn't totally robust, but good enough for testing. Use with caution. """ buff = [] raw_file = "".join(elems) templated_file = TemplatedFile.from_string(raw_file) idx = 0 for elem in elems: if elem == "<indent>": buff.append( Indent(pos_marker=PositionMarker.from_point( idx, idx, templated_file))) continue elif elem == "<dedent>": buff.append( Dedent(pos_marker=PositionMarker.from_point( idx, idx, templated_file))) continue seg_kwargs = {} if set(elem) <= {" ", "\t"}: SegClass = WhitespaceSegment elif set(elem) <= {"\n"}: SegClass = NewlineSegment elif elem == "(": SegClass = SymbolSegment seg_kwargs = {"name": "bracket_open"} elif elem == ")": SegClass = SymbolSegment seg_kwargs = {"name": "bracket_close"} elif elem.startswith("--"): SegClass = CommentSegment seg_kwargs = {"name": "inline_comment"} elif elem.startswith('"'): SegClass = CodeSegment seg_kwargs = {"name": "double_quote"} elif elem.startswith("'"): SegClass = CodeSegment seg_kwargs = {"name": "single_quote"} else: SegClass = CodeSegment # Set a none position marker which we'll realign at the end. buff.append( SegClass(raw=elem, pos_marker=PositionMarker( slice(idx, idx + len(elem)), slice(idx, idx + len(elem)), templated_file, ), **seg_kwargs)) idx += len(elem) return tuple(buff)
def generate_test_segments_func(elems): """Roughly generate test segments. This function isn't totally robust, but good enough for testing. Use with caution. """ buff = [] raw_buff = "" for elem in elems: if elem == "<indent>": buff.append(Indent(FilePositionMarker().advance_by(raw_buff))) continue elif elem == "<dedent>": buff.append(Dedent(FilePositionMarker().advance_by(raw_buff))) continue if set(elem) <= {" ", "\t"}: cls = RawSegment.make(" ", name="whitespace", type="whitespace") elif set(elem) <= {"\n"}: cls = RawSegment.make("\n", name="newline", type="newline") elif elem == "(": cls = RawSegment.make("(", name="bracket_open", _is_code=True) elif elem == ")": cls = RawSegment.make(")", name="bracket_close", _is_code=True) elif elem.startswith("--"): cls = RawSegment.make("--", name="inline_comment") elif elem.startswith('"'): cls = RawSegment.make('"', name="double_quote", _is_code=True) elif elem.startswith("'"): cls = RawSegment.make("'", name="single_quote", _is_code=True) else: cls = RawSegment.make("", _is_code=True) buff.append(cls(elem, FilePositionMarker().advance_by(raw_buff))) raw_buff += elem return tuple(buff) # Make sure we return a tuple
def elements_to_segments( self, elements: List[TemplateElement], templated_file: TemplatedFile) -> Tuple[RawSegment, ...]: """Convert a tuple of lexed elements into a tuple of segments.""" # Working buffer to build up segments segment_buffer: List[RawSegment] = [] lexer_logger.info("Elements to Segments.") # Get the templated slices to re-insert tokens for them source_only_slices = templated_file.source_only_slices() lexer_logger.info("Source-only slices: %s", source_only_slices) # Now work out source slices, and add in template placeholders. for element in elements: # Calculate Source Slice source_slice = templated_file.templated_slice_to_source_slice( element.template_slice) # The calculated source slice will include any source only slices. # We should consider all of them in turn to see whether we can # insert them. for source_only_slice in source_only_slices: # If it's later in the source, stop looking. Any later # ones *also* won't match. if source_only_slice.source_idx > source_slice.start: break # Is there a templated section within this source slice? # If there is then for some reason I can't quite explain, # it will always be at the start of the section. This is # very convenient beause it means we'll always have the # start and end of it in a definite position. This makes # slicing and looping much easier. elif source_only_slice.source_idx == source_slice.start: lexer_logger.debug( "Found templated section! %s, %s, %s", source_only_slice.source_slice(), source_only_slice.slice_type, element.template_slice.start, ) # Calculate a slice for any placeholders placeholder_source_slice = slice( source_slice.start, source_only_slice.end_source_idx()) # Adjust the source slice accordingly. source_slice = slice(source_only_slice.end_source_idx(), source_slice.stop) # TODO: Readjust this to remove .when once ProtoSegment is in. # Add segments as appropriate. # If it's a block end, add a dedent. if source_only_slice.slice_type in ("block_end", "block_mid"): segment_buffer.append( Dedent.when(template_blocks_indent=True)( pos_marker=PositionMarker.from_point( placeholder_source_slice.start, element.template_slice.start, templated_file, ))) # Always add a placeholder segment_buffer.append( TemplateSegment( pos_marker=PositionMarker( placeholder_source_slice, slice( element.template_slice.start, element.template_slice.start, ), templated_file, ), source_str=source_only_slice.raw, block_type=source_only_slice.slice_type, )) # If it's a block end, add a dedent. if source_only_slice.slice_type in ("block_start", "block_mid"): segment_buffer.append( Indent.when(template_blocks_indent=True)( pos_marker=PositionMarker.from_point( placeholder_source_slice.stop, element.template_slice.start, templated_file, ))) # Add the actual segment segment_buffer.append( element.to_segment(pos_marker=PositionMarker( source_slice, element.template_slice, templated_file, ), )) # Convert to tuple before return return tuple(segment_buffer)
def enrich_segments( segment_buff: Tuple[BaseSegment, ...], templated_file: TemplatedFile ) -> Tuple[BaseSegment, ...]: """Enrich the segments using the templated file. We use the mapping in the template to provide positions in the source file. """ # Make a new buffer to hold the enriched segments. # We need a new buffer to hold the new meta segments # introduced. new_segment_buff = [] # Get the templated slices to re-insert tokens for them source_only_slices = templated_file.source_only_slices() lexer_logger.info( "Enriching Segments. Source-only slices: %s", source_only_slices ) for segment in segment_buff: templated_slice = slice( segment.pos_marker.char_pos, segment.pos_marker.char_pos + len(segment.raw), ) source_slice = templated_file.templated_slice_to_source_slice( templated_slice ) # At this stage, templated slices will be INCLUDED in the source slice, # so we should consider whether we've captured any. If we have then # we need to re-evaluate whether it's a literal or not. for source_only_slice in source_only_slices: if source_only_slice.source_idx > source_slice.start: break elif source_only_slice.source_idx == source_slice.start: lexer_logger.debug( "Found templated section! %s, %s, %s", source_only_slice.source_slice(), source_only_slice.slice_type, templated_slice.start, ) # Adjust the source slice accordingly. source_slice = slice( source_only_slice.end_source_idx(), source_slice.stop ) # Add segments as appropriate. # If it's a block end, add a dedent. if source_only_slice.slice_type in ("block_end", "block_mid"): new_segment_buff.append( Dedent.when(template_blocks_indent=True)( pos_marker=segment.pos_marker ) ) # Always add a placeholder new_segment_buff.append( TemplateSegment( pos_marker=segment.pos_marker, source_str=source_only_slice.raw, block_type=source_only_slice.slice_type, ) ) # If it's a block end, add a dedent. if source_only_slice.slice_type in ("block_start", "block_mid"): new_segment_buff.append( Indent.when(template_blocks_indent=True)( pos_marker=segment.pos_marker ) ) source_line, source_pos = templated_file.get_line_pos_of_char_pos( source_slice.start ) # Recalculate is_literal is_literal = templated_file.is_source_slice_literal(source_slice) segment.pos_marker = EnrichedFilePositionMarker( statement_index=segment.pos_marker.statement_index, line_no=segment.pos_marker.line_no, line_pos=segment.pos_marker.line_pos, char_pos=segment.pos_marker.char_pos, templated_slice=templated_slice, source_slice=source_slice, is_literal=is_literal, source_pos_marker=FilePositionMarker( segment.pos_marker.statement_index, source_line, source_pos, source_slice.start, ), ) new_segment_buff.append(segment) lexer_logger.debug("Enriched Segments:") for seg in new_segment_buff: lexer_logger.debug( "\tTmp: %s\tSrc: %s\tSeg: %s", getattr(seg.pos_marker, "templated_slice", None), getattr(seg.pos_marker, "source_slice", None), seg, ) return tuple(new_segment_buff)
def elements_to_segments( self, elements: List[TemplateElement], templated_file: TemplatedFile) -> Tuple[RawSegment, ...]: """Convert a tuple of lexed elements into a tuple of segments.""" # Working buffer to build up segments segment_buffer: List[RawSegment] = [] lexer_logger.info("Elements to Segments.") # Get the templated slices to re-insert tokens for them source_only_slices = templated_file.source_only_slices() lexer_logger.info("Source-only slices: %s", source_only_slices) stash_source_slice, last_source_slice = None, None # Now work out source slices, and add in template placeholders. for idx, element in enumerate(elements): # Calculate Source Slice if idx != 0: last_source_slice = stash_source_slice source_slice = templated_file.templated_slice_to_source_slice( element.template_slice) stash_source_slice = source_slice # Output the slice as we lex. lexer_logger.debug( " %s, %s, %s, %r", idx, element, source_slice, templated_file.templated_str[element.template_slice], ) # The calculated source slice will include any source only slices. # We should consider all of them in turn to see whether we can # insert them. so_slices = [] # Only look for source only slices if we've got a new source slice to # avoid unnecessary duplication. if last_source_slice != source_slice: for source_only_slice in source_only_slices: # If it's later in the source, stop looking. Any later # ones *also* won't match. if source_only_slice.source_idx >= source_slice.stop: break elif source_only_slice.source_idx >= source_slice.start: so_slices.append(source_only_slice) if so_slices: lexer_logger.debug(" Collected Source Only Slices") for so_slice in so_slices: lexer_logger.debug(" %s", so_slice) # Calculate some things which will be useful templ_str = templated_file.templated_str[ element.template_slice] source_str = templated_file.source_str[source_slice] # For reasons which aren't entirely clear right now, if there is # an included literal, it will always be at the end. Let's see if it's # there. if source_str.endswith(templ_str): existing_len = len(templ_str) else: existing_len = 0 # Calculate slices placeholder_slice = slice(source_slice.start, source_slice.stop - existing_len) placeholder_str = source_str[:-existing_len] source_slice = slice(source_slice.stop - existing_len, source_slice.stop) # If it doesn't manage to extract a placeholder string from the source # just concatenate the source only strings. There is almost always # only one of them. if not placeholder_str: placeholder_str = "".join(s.raw for s in so_slices) lexer_logger.debug( " Overlap Length: %s. PS: %s, LS: %s, p_str: %r, templ_str: %r", existing_len, placeholder_slice, source_slice, placeholder_str, templ_str, ) # Caluculate potential indent/dedent block_slices = sum( s.slice_type.startswith("block_") for s in so_slices) block_balance = sum(s.slice_type == "block_start" for s in so_slices) - sum( s.slice_type == "block_end" for s in so_slices) lead_dedent = so_slices[0].slice_type in ("block_end", "block_mid") trail_indent = so_slices[-1].slice_type in ("block_start", "block_mid") add_indents = self.config.get("template_blocks_indent", "indentation") lexer_logger.debug( " Block Slices: %s. Block Balance: %s. Lead: %s, Trail: %s, Add: %s", block_slices, block_balance, lead_dedent, trail_indent, add_indents, ) # Add a dedent if appropriate. if lead_dedent and add_indents: lexer_logger.debug(" DEDENT") segment_buffer.append( Dedent(pos_marker=PositionMarker.from_point( placeholder_slice.start, element.template_slice.start, templated_file, ))) # Always add a placeholder segment_buffer.append( TemplateSegment( pos_marker=PositionMarker( placeholder_slice, slice( element.template_slice.start, element.template_slice.start, ), templated_file, ), source_str=placeholder_str, block_type=so_slices[0].slice_type if len(so_slices) == 1 else "compound", )) lexer_logger.debug(" Placholder: %s, %r", segment_buffer[-1], placeholder_str) # Add a dedent if appropriate. if trail_indent and add_indents: lexer_logger.debug(" INDENT") segment_buffer.append( Indent(pos_marker=PositionMarker.from_point( placeholder_slice.stop, element.template_slice.start, templated_file, ))) # Add the actual segment segment_buffer.append( element.to_segment(pos_marker=PositionMarker( source_slice, element.template_slice, templated_file, ), )) # Convert to tuple before return return tuple(segment_buffer)
def match(self, segments: Tuple["BaseSegment", ...], parse_context: ParseContext) -> MatchResult: """Match if this is a bracketed sequence, with content that matches one of the elements. 1. work forwards to find the first bracket. If we find something other that whitespace, then fail out. 2. Once we have the first bracket, we need to bracket count forward to find its partner. 3. Assuming we find its partner then we try and match what goes between them using the match method of Sequence. If we match, great. If not, then we return an empty match. If we never find its partner then we return an empty match but should probably log a parsing warning, or error? """ # Trim ends if allowed. if self.allow_gaps: pre_nc, seg_buff, post_nc = trim_non_code_segments(segments) else: seg_buff = segments # Rehydrate the bracket segments in question. start_bracket, end_bracket = self.get_bracket_from_dialect( parse_context) # Allow optional override for special bracket-like things start_bracket = self.start_bracket or start_bracket end_bracket = self.end_bracket or end_bracket # Look for the first bracket with parse_context.deeper_match() as ctx: start_match = start_bracket.match(seg_buff, parse_context=ctx) if start_match: seg_buff = start_match.unmatched_segments else: # Can't find the opening bracket. No Match. return MatchResult.from_unmatched(segments) # Look for the closing bracket content_segs, end_match, _ = self._bracket_sensitive_look_ahead_match( segments=seg_buff, matchers=[end_bracket], parse_context=parse_context, start_bracket=start_bracket, end_bracket=end_bracket, bracket_pairs_set=self.bracket_pairs_set, ) if not end_match: raise SQLParseError( "Couldn't find closing bracket for opening bracket.", segment=start_match.matched_segments[0], ) # Match the content now we've confirmed the brackets. # First deal with the case of TOTALLY EMPTY BRACKETS e.g. "()" if not content_segs: # If it's allowed, return a match. if not self._elements or all(e.is_optional() for e in self._elements): return MatchResult( start_match.matched_segments + end_match.matched_segments, end_match.unmatched_segments, ) # If not, don't. else: return MatchResult.from_unmatched(segments) # Then trim whitespace and deal with the case of no code content e.g. "( )" if self.allow_gaps: pre_nc, content_segs, post_nc = trim_non_code_segments( content_segs) else: pre_nc = () post_nc = () # If we don't have anything left after trimming, act accordingly. if not content_segs: if not self._elements or (all(e.is_optional() for e in self._elements) and self.allow_gaps): return MatchResult( start_match.matched_segments + pre_nc + post_nc + end_match.matched_segments, end_match.unmatched_segments, ) else: return MatchResult.from_unmatched(segments) # Match using super. Sequence will interpret the content of the elements. with parse_context.deeper_match() as ctx: content_match = super().match(content_segs, parse_context=ctx) # We require a complete match for the content (hopefully for obvious reasons) if content_match.is_complete(): # Append some indent and dedent tokens at the start and the end. return MatchResult( # We need to realign the meta segments so the pos markers are correct. BaseSegment._position_segments( ( # NB: The nc segments go *outside* the indents. start_match.matched_segments + (Indent(), ) # Add a meta indent here + pre_nc + content_match.matched_segments + post_nc + (Dedent(), ) # Add a meta indent here + end_match.matched_segments), ), end_match.unmatched_segments, ) # No complete match. Fail. else: return MatchResult.from_unmatched(segments)
def generate_test_segments_func(elems): """Roughly generate test segments. This function isn't totally robust, but good enough for testing. Use with caution. """ buff = [] raw_file = "".join(elems) templated_file = TemplatedFile.from_string(raw_file) idx = 0 for elem in elems: if elem == "<indent>": buff.append( Indent(pos_marker=PositionMarker.from_point( idx, idx, templated_file))) continue elif elem == "<dedent>": buff.append( Dedent(pos_marker=PositionMarker.from_point( idx, idx, templated_file))) continue if set(elem) <= {" ", "\t"}: cls = RawSegment.make(" ", name="whitespace", type="whitespace", _is_code=False) elif set(elem) <= {"\n"}: cls = RawSegment.make("\n", name="newline", type="newline", _is_code=False) elif elem == "(": cls = RawSegment.make("(", name="bracket_open") elif elem == ")": cls = RawSegment.make(")", name="bracket_close") elif elem.startswith("--"): cls = RawSegment.make("--", name="inline_comment", _is_code=False) elif elem.startswith('"'): cls = RawSegment.make('"', name="double_quote") elif elem.startswith("'"): cls = RawSegment.make("'", name="single_quote") else: cls = RawSegment.make("") # Set a none position marker which we'll realign at the end. buff.append( cls( elem, pos_marker=PositionMarker( slice(idx, idx + len(elem)), slice(idx, idx + len(elem)), templated_file, ), )) idx += len(elem) return tuple(buff)
def elements_to_segments( self, elements: List[TemplateElement], templated_file: TemplatedFile) -> Tuple[RawSegment, ...]: """Convert a tuple of lexed elements into a tuple of segments.""" # Working buffer to build up segments segment_buffer: List[RawSegment] = [] lexer_logger.info("Elements to Segments.") # Get the templated slices to re-insert tokens for them source_only_slices = templated_file.source_only_slices() lexer_logger.info("Source-only slices: %s", source_only_slices) stash_source_slice, last_source_slice = None, None # Now work out source slices, and add in template placeholders. for idx, element in enumerate(elements): # Calculate Source Slice if idx != 0: last_source_slice = stash_source_slice source_slice = templated_file.templated_slice_to_source_slice( element.template_slice) stash_source_slice = source_slice # Output the slice as we lex. lexer_logger.debug( " %s, %s, %s, %r", idx, element, source_slice, templated_file.templated_str[element.template_slice], ) # Detect when we've gone backward in the source. # NOTE: If it's the _same_ slice then don't insert a marker # because we're probably just within a single templated # section. if (last_source_slice and last_source_slice.stop > source_slice.start and last_source_slice != source_slice): # If we have, insert a loop marker to reflect that. lexer_logger.debug( " Backward jump detected. Inserting Loop Marker") segment_buffer.append( TemplateLoop(pos_marker=PositionMarker.from_point( last_source_slice.stop, element.template_slice.start, templated_file, ))) # The calculated source slice will include any source only slices. # We should consider all of them in turn to see whether we can # insert them. so_slices = [] # Only look for source only slices if we've got a new source slice to # avoid unnecessary duplication. if last_source_slice != source_slice: for source_only_slice in source_only_slices: # If it's later in the source, stop looking. Any later # ones *also* won't match. if source_only_slice.source_idx >= source_slice.stop: break elif source_only_slice.source_idx >= source_slice.start: so_slices.append(source_only_slice) if so_slices: lexer_logger.debug(" Collected Source Only Slices") for so_slice in so_slices: lexer_logger.debug(" %s", so_slice) # Calculate some things which will be useful templ_str = templated_file.templated_str[ element.template_slice] source_str = templated_file.source_str[source_slice] # For reasons which aren't entirely clear right now, if there is # an included literal, it will always be at the end. Let's see if it's # there. if source_str.endswith(templ_str): existing_len = len(templ_str) else: existing_len = 0 # Calculate slices placeholder_slice = slice(source_slice.start, source_slice.stop - existing_len) placeholder_str = source_str[:-existing_len] source_slice = slice(source_slice.stop - existing_len, source_slice.stop) # If it doesn't manage to extract a placeholder string from the source # just concatenate the source only strings. There is almost always # only one of them. if not placeholder_str: placeholder_str = "".join(s.raw for s in so_slices) # The Jinja templater sometimes returns source-only slices with # gaps between. For example, in this section: # # {% else %} # JOIN # {{action}}_raw_effect_sizes # USING # ({{ states }}) # {% endif %} # # we might get {% else %} and {% endif %} slices, without the # 4 lines between. This indicates those lines were not executed # In this case, generate a placeholder where the skipped code is # omitted but noted with a brief string, e.g.: # # "{% else %}... [103 unused template characters] ...{% endif %}". # # This is more readable -- it would be REALLY confusing for a # placeholder to include code that wasn't even executed!! if len(so_slices) >= 2: has_gap = False gap_placeholder_parts = [] last_slice = None # For each slice... for so_slice in so_slices: # If it's not the first slice, was there a gap? if last_slice: end_last = last_slice.source_idx + len( last_slice.raw) chars_skipped = so_slice.source_idx - end_last if chars_skipped: # Yes, gap between last_slice and so_slice. has_gap = True # Generate a string documenting the gap. if chars_skipped >= 10: gap_placeholder_parts.append( f"... [{chars_skipped} unused template " "characters] ...") else: gap_placeholder_parts.append("...") # Now add the slice's source. gap_placeholder_parts.append(so_slice.raw) last_slice = so_slice if has_gap: placeholder_str = "".join(gap_placeholder_parts) lexer_logger.debug( " Overlap Length: %s. PS: %s, LS: %s, p_str: %r, templ_str: %r", existing_len, placeholder_slice, source_slice, placeholder_str, templ_str, ) # Calculate potential indent/dedent block_slices = sum( s.slice_type.startswith("block_") for s in so_slices) block_balance = sum(s.slice_type == "block_start" for s in so_slices) - sum( s.slice_type == "block_end" for s in so_slices) lead_dedent = so_slices[0].slice_type in ("block_end", "block_mid") trail_indent = so_slices[-1].slice_type in ("block_start", "block_mid") add_indents = self.config.get("template_blocks_indent", "indentation") lexer_logger.debug( " Block Slices: %s. Block Balance: %s. Lead: %s, Trail: %s, " "Add: %s", block_slices, block_balance, lead_dedent, trail_indent, add_indents, ) # Add a dedent if appropriate. if lead_dedent and add_indents: lexer_logger.debug(" DEDENT") segment_buffer.append( Dedent(pos_marker=PositionMarker.from_point( placeholder_slice.start, element.template_slice.start, templated_file, ))) # Always add a placeholder segment_buffer.append( TemplateSegment( pos_marker=PositionMarker( placeholder_slice, slice( element.template_slice.start, element.template_slice.start, ), templated_file, ), source_str=placeholder_str, block_type=so_slices[0].slice_type if len(so_slices) == 1 else "compound", )) lexer_logger.debug(" Placeholder: %s, %r", segment_buffer[-1], placeholder_str) # Add an indent if appropriate. if trail_indent and add_indents: lexer_logger.debug(" INDENT") segment_buffer.append( Indent( is_template=True, pos_marker=PositionMarker.from_point( placeholder_slice.stop, element.template_slice.start, templated_file, ), )) # Add the actual segment segment_buffer.append( element.to_segment(pos_marker=PositionMarker( source_slice, element.template_slice, templated_file, ), )) # Generate placeholders for any source-only slices that *follow* # the last element. This happens, for example, if a Jinja templated # file ends with "{% endif %}", and there's no trailing newline. if idx == len(elements) - 1: so_slices = [ so for so in source_only_slices if so.source_idx >= source_slice.stop ] for so_slice in so_slices: segment_buffer.append( TemplateSegment( pos_marker=PositionMarker( slice(so_slice.source_idx, so_slice.end_source_idx()), slice( element.template_slice.stop, element.template_slice.stop, ), templated_file, ), source_str=so_slice.raw, block_type=so_slice.slice_type, )) # Add an end of file marker segment_buffer.append( EndOfFile(pos_marker=segment_buffer[-1].pos_marker. end_point_marker() if segment_buffer else PositionMarker. from_point(0, 0, templated_file))) # Convert to tuple before return return tuple(segment_buffer)
def match(self, segments: Tuple["BaseSegment", ...], parse_context: ParseContext) -> MatchResult: """Match if a bracketed sequence, with content that matches one of the elements. 1. work forwards to find the first bracket. If we find something other that whitespace, then fail out. 2. Once we have the first bracket, we need to bracket count forward to find its partner. 3. Assuming we find its partner then we try and match what goes between them using the match method of Sequence. If we match, great. If not, then we return an empty match. If we never find its partner then we return an empty match but should probably log a parsing warning, or error? """ # Trim ends if allowed. if self.allow_gaps: pre_nc, seg_buff, post_nc = trim_non_code_segments(segments) else: seg_buff = segments # pragma: no cover TODO? # Rehydrate the bracket segments in question. # bracket_persits controls whether we make a BracketedSegment or not. start_bracket, end_bracket, bracket_persists = self.get_bracket_from_dialect( parse_context) # Allow optional override for special bracket-like things start_bracket = self.start_bracket or start_bracket end_bracket = self.end_bracket or end_bracket # Are we dealing with a pre-existing BracketSegment? if seg_buff[0].is_type("bracketed"): seg: BracketedSegment = cast(BracketedSegment, seg_buff[0]) content_segs = seg.segments[len(seg.start_bracket ):-len(seg.end_bracket)] bracket_segment = seg trailing_segments = seg_buff[1:] # Otherwise try and match the segments directly. else: # Look for the first bracket with parse_context.deeper_match() as ctx: start_match = start_bracket.match(seg_buff, parse_context=ctx) if start_match: seg_buff = start_match.unmatched_segments else: # Can't find the opening bracket. No Match. return MatchResult.from_unmatched(segments) # Look for the closing bracket content_segs, end_match, _ = self._bracket_sensitive_look_ahead_match( segments=seg_buff, matchers=[end_bracket], parse_context=parse_context, start_bracket=start_bracket, end_bracket=end_bracket, bracket_pairs_set=self.bracket_pairs_set, ) if not end_match: # pragma: no cover raise SQLParseError( "Couldn't find closing bracket for opening bracket.", segment=start_match.matched_segments[0], ) # Construct a bracket segment bracket_segment = BracketedSegment( segments=(start_match.matched_segments + content_segs + end_match.matched_segments), start_bracket=start_match.matched_segments, end_bracket=end_match.matched_segments, ) trailing_segments = end_match.unmatched_segments # Then trim whitespace and deal with the case of non-code content e.g. "( )" if self.allow_gaps: pre_segs, content_segs, post_segs = trim_non_code_segments( content_segs) else: # pragma: no cover TODO? pre_segs = () post_segs = () # If we've got a case of empty brackets check whether that is allowed. if not content_segs: if not self._elements or (all(e.is_optional() for e in self._elements) and (self.allow_gaps or (not pre_segs and not post_segs))): return MatchResult( (bracket_segment, ) if bracket_persists else bracket_segment.segments, trailing_segments, ) else: return MatchResult.from_unmatched(segments) # Match the content using super. Sequence will interpret the content of the # elements. with parse_context.deeper_match() as ctx: content_match = super().match(content_segs, parse_context=ctx) # We require a complete match for the content (hopefully for obvious reasons) if content_match.is_complete(): # Reconstruct the bracket segment post match. # We need to realign the meta segments so the pos markers are correct. # Have we already got indents? meta_idx = None for idx, seg in enumerate(bracket_segment.segments): if (seg.is_meta and cast(MetaSegment, seg).indent_val > 0 and not cast(MetaSegment, seg).is_template): meta_idx = idx break # If we've already got indents, don't add more. if meta_idx: bracket_segment.segments = BaseSegment._position_segments( bracket_segment.start_bracket + pre_segs + content_match.all_segments() + post_segs + bracket_segment.end_bracket) # Append some indent and dedent tokens at the start and the end. else: bracket_segment.segments = BaseSegment._position_segments( # NB: The nc segments go *outside* the indents. bracket_segment.start_bracket + (Indent(), ) # Add a meta indent here + pre_segs + content_match.all_segments() + post_segs + (Dedent(), ) # Add a meta indent here + bracket_segment.end_bracket) return MatchResult( (bracket_segment, ) if bracket_persists else bracket_segment.segments, trailing_segments, ) # No complete match. Fail. else: return MatchResult.from_unmatched(segments)