def generate_test_segments_func(elems): """Roughly generate test segments. This function isn't totally robust, but good enough for testing. Use with caution. """ buff = [] raw_file = "".join(elems) templated_file = TemplatedFile.from_string(raw_file) idx = 0 for elem in elems: if elem == "<indent>": buff.append( Indent(pos_marker=PositionMarker.from_point( idx, idx, templated_file))) continue elif elem == "<dedent>": buff.append( Dedent(pos_marker=PositionMarker.from_point( idx, idx, templated_file))) continue seg_kwargs = {} if set(elem) <= {" ", "\t"}: SegClass = WhitespaceSegment elif set(elem) <= {"\n"}: SegClass = NewlineSegment elif elem == "(": SegClass = SymbolSegment seg_kwargs = {"name": "bracket_open"} elif elem == ")": SegClass = SymbolSegment seg_kwargs = {"name": "bracket_close"} elif elem.startswith("--"): SegClass = CommentSegment seg_kwargs = {"name": "inline_comment"} elif elem.startswith('"'): SegClass = CodeSegment seg_kwargs = {"name": "double_quote"} elif elem.startswith("'"): SegClass = CodeSegment seg_kwargs = {"name": "single_quote"} else: SegClass = CodeSegment # Set a none position marker which we'll realign at the end. buff.append( SegClass(raw=elem, pos_marker=PositionMarker( slice(idx, idx + len(elem)), slice(idx, idx + len(elem)), templated_file, ), **seg_kwargs)) idx += len(elem) return tuple(buff)
def elements_to_segments( self, elements: List[TemplateElement], templated_file: TemplatedFile) -> Tuple[RawSegment, ...]: """Convert a tuple of lexed elements into a tuple of segments.""" # Working buffer to build up segments segment_buffer: List[RawSegment] = [] lexer_logger.info("Elements to Segments.") # Get the templated slices to re-insert tokens for them source_only_slices = templated_file.source_only_slices() lexer_logger.info("Source-only slices: %s", source_only_slices) # Now work out source slices, and add in template placeholders. for element in elements: # Calculate Source Slice source_slice = templated_file.templated_slice_to_source_slice( element.template_slice) # The calculated source slice will include any source only slices. # We should consider all of them in turn to see whether we can # insert them. for source_only_slice in source_only_slices: # If it's later in the source, stop looking. Any later # ones *also* won't match. if source_only_slice.source_idx > source_slice.start: break # Is there a templated section within this source slice? # If there is then for some reason I can't quite explain, # it will always be at the start of the section. This is # very convenient beause it means we'll always have the # start and end of it in a definite position. This makes # slicing and looping much easier. elif source_only_slice.source_idx == source_slice.start: lexer_logger.debug( "Found templated section! %s, %s, %s", source_only_slice.source_slice(), source_only_slice.slice_type, element.template_slice.start, ) # Calculate a slice for any placeholders placeholder_source_slice = slice( source_slice.start, source_only_slice.end_source_idx()) # Adjust the source slice accordingly. source_slice = slice(source_only_slice.end_source_idx(), source_slice.stop) # TODO: Readjust this to remove .when once ProtoSegment is in. # Add segments as appropriate. # If it's a block end, add a dedent. if source_only_slice.slice_type in ("block_end", "block_mid"): segment_buffer.append( Dedent.when(template_blocks_indent=True)( pos_marker=PositionMarker.from_point( placeholder_source_slice.start, element.template_slice.start, templated_file, ))) # Always add a placeholder segment_buffer.append( TemplateSegment( pos_marker=PositionMarker( placeholder_source_slice, slice( element.template_slice.start, element.template_slice.start, ), templated_file, ), source_str=source_only_slice.raw, block_type=source_only_slice.slice_type, )) # If it's a block end, add a dedent. if source_only_slice.slice_type in ("block_start", "block_mid"): segment_buffer.append( Indent.when(template_blocks_indent=True)( pos_marker=PositionMarker.from_point( placeholder_source_slice.stop, element.template_slice.start, templated_file, ))) # Add the actual segment segment_buffer.append( element.to_segment(pos_marker=PositionMarker( source_slice, element.template_slice, templated_file, ), )) # Convert to tuple before return return tuple(segment_buffer)
def elements_to_segments( self, elements: List[TemplateElement], templated_file: TemplatedFile) -> Tuple[RawSegment, ...]: """Convert a tuple of lexed elements into a tuple of segments.""" # Working buffer to build up segments segment_buffer: List[RawSegment] = [] lexer_logger.info("Elements to Segments.") # Get the templated slices to re-insert tokens for them source_only_slices = templated_file.source_only_slices() lexer_logger.info("Source-only slices: %s", source_only_slices) stash_source_slice, last_source_slice = None, None # Now work out source slices, and add in template placeholders. for idx, element in enumerate(elements): # Calculate Source Slice if idx != 0: last_source_slice = stash_source_slice source_slice = templated_file.templated_slice_to_source_slice( element.template_slice) stash_source_slice = source_slice # Output the slice as we lex. lexer_logger.debug( " %s, %s, %s, %r", idx, element, source_slice, templated_file.templated_str[element.template_slice], ) # The calculated source slice will include any source only slices. # We should consider all of them in turn to see whether we can # insert them. so_slices = [] # Only look for source only slices if we've got a new source slice to # avoid unnecessary duplication. if last_source_slice != source_slice: for source_only_slice in source_only_slices: # If it's later in the source, stop looking. Any later # ones *also* won't match. if source_only_slice.source_idx >= source_slice.stop: break elif source_only_slice.source_idx >= source_slice.start: so_slices.append(source_only_slice) if so_slices: lexer_logger.debug(" Collected Source Only Slices") for so_slice in so_slices: lexer_logger.debug(" %s", so_slice) # Calculate some things which will be useful templ_str = templated_file.templated_str[ element.template_slice] source_str = templated_file.source_str[source_slice] # For reasons which aren't entirely clear right now, if there is # an included literal, it will always be at the end. Let's see if it's # there. if source_str.endswith(templ_str): existing_len = len(templ_str) else: existing_len = 0 # Calculate slices placeholder_slice = slice(source_slice.start, source_slice.stop - existing_len) placeholder_str = source_str[:-existing_len] source_slice = slice(source_slice.stop - existing_len, source_slice.stop) # If it doesn't manage to extract a placeholder string from the source # just concatenate the source only strings. There is almost always # only one of them. if not placeholder_str: placeholder_str = "".join(s.raw for s in so_slices) lexer_logger.debug( " Overlap Length: %s. PS: %s, LS: %s, p_str: %r, templ_str: %r", existing_len, placeholder_slice, source_slice, placeholder_str, templ_str, ) # Caluculate potential indent/dedent block_slices = sum( s.slice_type.startswith("block_") for s in so_slices) block_balance = sum(s.slice_type == "block_start" for s in so_slices) - sum( s.slice_type == "block_end" for s in so_slices) lead_dedent = so_slices[0].slice_type in ("block_end", "block_mid") trail_indent = so_slices[-1].slice_type in ("block_start", "block_mid") add_indents = self.config.get("template_blocks_indent", "indentation") lexer_logger.debug( " Block Slices: %s. Block Balance: %s. Lead: %s, Trail: %s, Add: %s", block_slices, block_balance, lead_dedent, trail_indent, add_indents, ) # Add a dedent if appropriate. if lead_dedent and add_indents: lexer_logger.debug(" DEDENT") segment_buffer.append( Dedent(pos_marker=PositionMarker.from_point( placeholder_slice.start, element.template_slice.start, templated_file, ))) # Always add a placeholder segment_buffer.append( TemplateSegment( pos_marker=PositionMarker( placeholder_slice, slice( element.template_slice.start, element.template_slice.start, ), templated_file, ), source_str=placeholder_str, block_type=so_slices[0].slice_type if len(so_slices) == 1 else "compound", )) lexer_logger.debug(" Placholder: %s, %r", segment_buffer[-1], placeholder_str) # Add a dedent if appropriate. if trail_indent and add_indents: lexer_logger.debug(" INDENT") segment_buffer.append( Indent(pos_marker=PositionMarker.from_point( placeholder_slice.stop, element.template_slice.start, templated_file, ))) # Add the actual segment segment_buffer.append( element.to_segment(pos_marker=PositionMarker( source_slice, element.template_slice, templated_file, ), )) # Convert to tuple before return return tuple(segment_buffer)
def generate_test_segments_func(elems): """Roughly generate test segments. This function isn't totally robust, but good enough for testing. Use with caution. """ buff = [] raw_file = "".join(elems) templated_file = TemplatedFile.from_string(raw_file) idx = 0 for elem in elems: if elem == "<indent>": buff.append( Indent(pos_marker=PositionMarker.from_point( idx, idx, templated_file))) continue elif elem == "<dedent>": buff.append( Dedent(pos_marker=PositionMarker.from_point( idx, idx, templated_file))) continue if set(elem) <= {" ", "\t"}: cls = RawSegment.make(" ", name="whitespace", type="whitespace", _is_code=False) elif set(elem) <= {"\n"}: cls = RawSegment.make("\n", name="newline", type="newline", _is_code=False) elif elem == "(": cls = RawSegment.make("(", name="bracket_open") elif elem == ")": cls = RawSegment.make(")", name="bracket_close") elif elem.startswith("--"): cls = RawSegment.make("--", name="inline_comment", _is_code=False) elif elem.startswith('"'): cls = RawSegment.make('"', name="double_quote") elif elem.startswith("'"): cls = RawSegment.make("'", name="single_quote") else: cls = RawSegment.make("") # Set a none position marker which we'll realign at the end. buff.append( cls( elem, pos_marker=PositionMarker( slice(idx, idx + len(elem)), slice(idx, idx + len(elem)), templated_file, ), )) idx += len(elem) return tuple(buff)
def elements_to_segments( self, elements: List[TemplateElement], templated_file: TemplatedFile) -> Tuple[RawSegment, ...]: """Convert a tuple of lexed elements into a tuple of segments.""" # Working buffer to build up segments segment_buffer: List[RawSegment] = [] lexer_logger.info("Elements to Segments.") # Get the templated slices to re-insert tokens for them source_only_slices = templated_file.source_only_slices() lexer_logger.info("Source-only slices: %s", source_only_slices) stash_source_slice, last_source_slice = None, None # Now work out source slices, and add in template placeholders. for idx, element in enumerate(elements): # Calculate Source Slice if idx != 0: last_source_slice = stash_source_slice source_slice = templated_file.templated_slice_to_source_slice( element.template_slice) stash_source_slice = source_slice # Output the slice as we lex. lexer_logger.debug( " %s, %s, %s, %r", idx, element, source_slice, templated_file.templated_str[element.template_slice], ) # Detect when we've gone backward in the source. # NOTE: If it's the _same_ slice then don't insert a marker # because we're probably just within a single templated # section. if (last_source_slice and last_source_slice.stop > source_slice.start and last_source_slice != source_slice): # If we have, insert a loop marker to reflect that. lexer_logger.debug( " Backward jump detected. Inserting Loop Marker") segment_buffer.append( TemplateLoop(pos_marker=PositionMarker.from_point( last_source_slice.stop, element.template_slice.start, templated_file, ))) # The calculated source slice will include any source only slices. # We should consider all of them in turn to see whether we can # insert them. so_slices = [] # Only look for source only slices if we've got a new source slice to # avoid unnecessary duplication. if last_source_slice != source_slice: for source_only_slice in source_only_slices: # If it's later in the source, stop looking. Any later # ones *also* won't match. if source_only_slice.source_idx >= source_slice.stop: break elif source_only_slice.source_idx >= source_slice.start: so_slices.append(source_only_slice) if so_slices: lexer_logger.debug(" Collected Source Only Slices") for so_slice in so_slices: lexer_logger.debug(" %s", so_slice) # Calculate some things which will be useful templ_str = templated_file.templated_str[ element.template_slice] source_str = templated_file.source_str[source_slice] # For reasons which aren't entirely clear right now, if there is # an included literal, it will always be at the end. Let's see if it's # there. if source_str.endswith(templ_str): existing_len = len(templ_str) else: existing_len = 0 # Calculate slices placeholder_slice = slice(source_slice.start, source_slice.stop - existing_len) placeholder_str = source_str[:-existing_len] source_slice = slice(source_slice.stop - existing_len, source_slice.stop) # If it doesn't manage to extract a placeholder string from the source # just concatenate the source only strings. There is almost always # only one of them. if not placeholder_str: placeholder_str = "".join(s.raw for s in so_slices) # The Jinja templater sometimes returns source-only slices with # gaps between. For example, in this section: # # {% else %} # JOIN # {{action}}_raw_effect_sizes # USING # ({{ states }}) # {% endif %} # # we might get {% else %} and {% endif %} slices, without the # 4 lines between. This indicates those lines were not executed # In this case, generate a placeholder where the skipped code is # omitted but noted with a brief string, e.g.: # # "{% else %}... [103 unused template characters] ...{% endif %}". # # This is more readable -- it would be REALLY confusing for a # placeholder to include code that wasn't even executed!! if len(so_slices) >= 2: has_gap = False gap_placeholder_parts = [] last_slice = None # For each slice... for so_slice in so_slices: # If it's not the first slice, was there a gap? if last_slice: end_last = last_slice.source_idx + len( last_slice.raw) chars_skipped = so_slice.source_idx - end_last if chars_skipped: # Yes, gap between last_slice and so_slice. has_gap = True # Generate a string documenting the gap. if chars_skipped >= 10: gap_placeholder_parts.append( f"... [{chars_skipped} unused template " "characters] ...") else: gap_placeholder_parts.append("...") # Now add the slice's source. gap_placeholder_parts.append(so_slice.raw) last_slice = so_slice if has_gap: placeholder_str = "".join(gap_placeholder_parts) lexer_logger.debug( " Overlap Length: %s. PS: %s, LS: %s, p_str: %r, templ_str: %r", existing_len, placeholder_slice, source_slice, placeholder_str, templ_str, ) # Calculate potential indent/dedent block_slices = sum( s.slice_type.startswith("block_") for s in so_slices) block_balance = sum(s.slice_type == "block_start" for s in so_slices) - sum( s.slice_type == "block_end" for s in so_slices) lead_dedent = so_slices[0].slice_type in ("block_end", "block_mid") trail_indent = so_slices[-1].slice_type in ("block_start", "block_mid") add_indents = self.config.get("template_blocks_indent", "indentation") lexer_logger.debug( " Block Slices: %s. Block Balance: %s. Lead: %s, Trail: %s, " "Add: %s", block_slices, block_balance, lead_dedent, trail_indent, add_indents, ) # Add a dedent if appropriate. if lead_dedent and add_indents: lexer_logger.debug(" DEDENT") segment_buffer.append( Dedent(pos_marker=PositionMarker.from_point( placeholder_slice.start, element.template_slice.start, templated_file, ))) # Always add a placeholder segment_buffer.append( TemplateSegment( pos_marker=PositionMarker( placeholder_slice, slice( element.template_slice.start, element.template_slice.start, ), templated_file, ), source_str=placeholder_str, block_type=so_slices[0].slice_type if len(so_slices) == 1 else "compound", )) lexer_logger.debug(" Placeholder: %s, %r", segment_buffer[-1], placeholder_str) # Add an indent if appropriate. if trail_indent and add_indents: lexer_logger.debug(" INDENT") segment_buffer.append( Indent( is_template=True, pos_marker=PositionMarker.from_point( placeholder_slice.stop, element.template_slice.start, templated_file, ), )) # Add the actual segment segment_buffer.append( element.to_segment(pos_marker=PositionMarker( source_slice, element.template_slice, templated_file, ), )) # Generate placeholders for any source-only slices that *follow* # the last element. This happens, for example, if a Jinja templated # file ends with "{% endif %}", and there's no trailing newline. if idx == len(elements) - 1: so_slices = [ so for so in source_only_slices if so.source_idx >= source_slice.stop ] for so_slice in so_slices: segment_buffer.append( TemplateSegment( pos_marker=PositionMarker( slice(so_slice.source_idx, so_slice.end_source_idx()), slice( element.template_slice.stop, element.template_slice.stop, ), templated_file, ), source_str=so_slice.raw, block_type=so_slice.slice_type, )) # Add an end of file marker segment_buffer.append( EndOfFile(pos_marker=segment_buffer[-1].pos_marker. end_point_marker() if segment_buffer else PositionMarker. from_point(0, 0, templated_file))) # Convert to tuple before return return tuple(segment_buffer)