def seg_list(generate_test_segments): """A preset list of segments for testing. Includes a templated segment for completeness. """ main_list = generate_test_segments(["bar", " \t ", "foo", "baar", " \t "]) ts = TemplateSegment( pos_marker=main_list[-1].get_end_pos_marker(), source_str="{# comment #}", block_type="comment", ) return main_list + (ts, )
def elements_to_segments( self, elements: List[TemplateElement], templated_file: TemplatedFile) -> Tuple[RawSegment, ...]: """Convert a tuple of lexed elements into a tuple of segments.""" # Working buffer to build up segments segment_buffer: List[RawSegment] = [] lexer_logger.info("Elements to Segments.") # Get the templated slices to re-insert tokens for them source_only_slices = templated_file.source_only_slices() lexer_logger.info("Source-only slices: %s", source_only_slices) # Now work out source slices, and add in template placeholders. for element in elements: # Calculate Source Slice source_slice = templated_file.templated_slice_to_source_slice( element.template_slice) # The calculated source slice will include any source only slices. # We should consider all of them in turn to see whether we can # insert them. for source_only_slice in source_only_slices: # If it's later in the source, stop looking. Any later # ones *also* won't match. if source_only_slice.source_idx > source_slice.start: break # Is there a templated section within this source slice? # If there is then for some reason I can't quite explain, # it will always be at the start of the section. This is # very convenient beause it means we'll always have the # start and end of it in a definite position. This makes # slicing and looping much easier. elif source_only_slice.source_idx == source_slice.start: lexer_logger.debug( "Found templated section! %s, %s, %s", source_only_slice.source_slice(), source_only_slice.slice_type, element.template_slice.start, ) # Calculate a slice for any placeholders placeholder_source_slice = slice( source_slice.start, source_only_slice.end_source_idx()) # Adjust the source slice accordingly. source_slice = slice(source_only_slice.end_source_idx(), source_slice.stop) # TODO: Readjust this to remove .when once ProtoSegment is in. # Add segments as appropriate. # If it's a block end, add a dedent. if source_only_slice.slice_type in ("block_end", "block_mid"): segment_buffer.append( Dedent.when(template_blocks_indent=True)( pos_marker=PositionMarker.from_point( placeholder_source_slice.start, element.template_slice.start, templated_file, ))) # Always add a placeholder segment_buffer.append( TemplateSegment( pos_marker=PositionMarker( placeholder_source_slice, slice( element.template_slice.start, element.template_slice.start, ), templated_file, ), source_str=source_only_slice.raw, block_type=source_only_slice.slice_type, )) # If it's a block end, add a dedent. if source_only_slice.slice_type in ("block_start", "block_mid"): segment_buffer.append( Indent.when(template_blocks_indent=True)( pos_marker=PositionMarker.from_point( placeholder_source_slice.stop, element.template_slice.start, templated_file, ))) # Add the actual segment segment_buffer.append( element.to_segment(pos_marker=PositionMarker( source_slice, element.template_slice, templated_file, ), )) # Convert to tuple before return return tuple(segment_buffer)
def enrich_segments( segment_buff: Tuple[BaseSegment, ...], templated_file: TemplatedFile ) -> Tuple[BaseSegment, ...]: """Enrich the segments using the templated file. We use the mapping in the template to provide positions in the source file. """ # Make a new buffer to hold the enriched segments. # We need a new buffer to hold the new meta segments # introduced. new_segment_buff = [] # Get the templated slices to re-insert tokens for them source_only_slices = templated_file.source_only_slices() lexer_logger.info( "Enriching Segments. Source-only slices: %s", source_only_slices ) for segment in segment_buff: templated_slice = slice( segment.pos_marker.char_pos, segment.pos_marker.char_pos + len(segment.raw), ) source_slice = templated_file.templated_slice_to_source_slice( templated_slice ) # At this stage, templated slices will be INCLUDED in the source slice, # so we should consider whether we've captured any. If we have then # we need to re-evaluate whether it's a literal or not. for source_only_slice in source_only_slices: if source_only_slice.source_idx > source_slice.start: break elif source_only_slice.source_idx == source_slice.start: lexer_logger.debug( "Found templated section! %s, %s, %s", source_only_slice.source_slice(), source_only_slice.slice_type, templated_slice.start, ) # Adjust the source slice accordingly. source_slice = slice( source_only_slice.end_source_idx(), source_slice.stop ) # Add segments as appropriate. # If it's a block end, add a dedent. if source_only_slice.slice_type in ("block_end", "block_mid"): new_segment_buff.append( Dedent.when(template_blocks_indent=True)( pos_marker=segment.pos_marker ) ) # Always add a placeholder new_segment_buff.append( TemplateSegment( pos_marker=segment.pos_marker, source_str=source_only_slice.raw, block_type=source_only_slice.slice_type, ) ) # If it's a block end, add a dedent. if source_only_slice.slice_type in ("block_start", "block_mid"): new_segment_buff.append( Indent.when(template_blocks_indent=True)( pos_marker=segment.pos_marker ) ) source_line, source_pos = templated_file.get_line_pos_of_char_pos( source_slice.start ) # Recalculate is_literal is_literal = templated_file.is_source_slice_literal(source_slice) segment.pos_marker = EnrichedFilePositionMarker( statement_index=segment.pos_marker.statement_index, line_no=segment.pos_marker.line_no, line_pos=segment.pos_marker.line_pos, char_pos=segment.pos_marker.char_pos, templated_slice=templated_slice, source_slice=source_slice, is_literal=is_literal, source_pos_marker=FilePositionMarker( segment.pos_marker.statement_index, source_line, source_pos, source_slice.start, ), ) new_segment_buff.append(segment) lexer_logger.debug("Enriched Segments:") for seg in new_segment_buff: lexer_logger.debug( "\tTmp: %s\tSrc: %s\tSeg: %s", getattr(seg.pos_marker, "templated_slice", None), getattr(seg.pos_marker, "source_slice", None), seg, ) return tuple(new_segment_buff)
def elements_to_segments( self, elements: List[TemplateElement], templated_file: TemplatedFile) -> Tuple[RawSegment, ...]: """Convert a tuple of lexed elements into a tuple of segments.""" # Working buffer to build up segments segment_buffer: List[RawSegment] = [] lexer_logger.info("Elements to Segments.") # Get the templated slices to re-insert tokens for them source_only_slices = templated_file.source_only_slices() lexer_logger.info("Source-only slices: %s", source_only_slices) stash_source_slice, last_source_slice = None, None # Now work out source slices, and add in template placeholders. for idx, element in enumerate(elements): # Calculate Source Slice if idx != 0: last_source_slice = stash_source_slice source_slice = templated_file.templated_slice_to_source_slice( element.template_slice) stash_source_slice = source_slice # Output the slice as we lex. lexer_logger.debug( " %s, %s, %s, %r", idx, element, source_slice, templated_file.templated_str[element.template_slice], ) # The calculated source slice will include any source only slices. # We should consider all of them in turn to see whether we can # insert them. so_slices = [] # Only look for source only slices if we've got a new source slice to # avoid unnecessary duplication. if last_source_slice != source_slice: for source_only_slice in source_only_slices: # If it's later in the source, stop looking. Any later # ones *also* won't match. if source_only_slice.source_idx >= source_slice.stop: break elif source_only_slice.source_idx >= source_slice.start: so_slices.append(source_only_slice) if so_slices: lexer_logger.debug(" Collected Source Only Slices") for so_slice in so_slices: lexer_logger.debug(" %s", so_slice) # Calculate some things which will be useful templ_str = templated_file.templated_str[ element.template_slice] source_str = templated_file.source_str[source_slice] # For reasons which aren't entirely clear right now, if there is # an included literal, it will always be at the end. Let's see if it's # there. if source_str.endswith(templ_str): existing_len = len(templ_str) else: existing_len = 0 # Calculate slices placeholder_slice = slice(source_slice.start, source_slice.stop - existing_len) placeholder_str = source_str[:-existing_len] source_slice = slice(source_slice.stop - existing_len, source_slice.stop) # If it doesn't manage to extract a placeholder string from the source # just concatenate the source only strings. There is almost always # only one of them. if not placeholder_str: placeholder_str = "".join(s.raw for s in so_slices) lexer_logger.debug( " Overlap Length: %s. PS: %s, LS: %s, p_str: %r, templ_str: %r", existing_len, placeholder_slice, source_slice, placeholder_str, templ_str, ) # Caluculate potential indent/dedent block_slices = sum( s.slice_type.startswith("block_") for s in so_slices) block_balance = sum(s.slice_type == "block_start" for s in so_slices) - sum( s.slice_type == "block_end" for s in so_slices) lead_dedent = so_slices[0].slice_type in ("block_end", "block_mid") trail_indent = so_slices[-1].slice_type in ("block_start", "block_mid") add_indents = self.config.get("template_blocks_indent", "indentation") lexer_logger.debug( " Block Slices: %s. Block Balance: %s. Lead: %s, Trail: %s, Add: %s", block_slices, block_balance, lead_dedent, trail_indent, add_indents, ) # Add a dedent if appropriate. if lead_dedent and add_indents: lexer_logger.debug(" DEDENT") segment_buffer.append( Dedent(pos_marker=PositionMarker.from_point( placeholder_slice.start, element.template_slice.start, templated_file, ))) # Always add a placeholder segment_buffer.append( TemplateSegment( pos_marker=PositionMarker( placeholder_slice, slice( element.template_slice.start, element.template_slice.start, ), templated_file, ), source_str=placeholder_str, block_type=so_slices[0].slice_type if len(so_slices) == 1 else "compound", )) lexer_logger.debug(" Placholder: %s, %r", segment_buffer[-1], placeholder_str) # Add a dedent if appropriate. if trail_indent and add_indents: lexer_logger.debug(" INDENT") segment_buffer.append( Indent(pos_marker=PositionMarker.from_point( placeholder_slice.stop, element.template_slice.start, templated_file, ))) # Add the actual segment segment_buffer.append( element.to_segment(pos_marker=PositionMarker( source_slice, element.template_slice, templated_file, ), )) # Convert to tuple before return return tuple(segment_buffer)
def elements_to_segments( self, elements: List[TemplateElement], templated_file: TemplatedFile) -> Tuple[RawSegment, ...]: """Convert a tuple of lexed elements into a tuple of segments.""" # Working buffer to build up segments segment_buffer: List[RawSegment] = [] lexer_logger.info("Elements to Segments.") # Get the templated slices to re-insert tokens for them source_only_slices = templated_file.source_only_slices() lexer_logger.info("Source-only slices: %s", source_only_slices) stash_source_slice, last_source_slice = None, None # Now work out source slices, and add in template placeholders. for idx, element in enumerate(elements): # Calculate Source Slice if idx != 0: last_source_slice = stash_source_slice source_slice = templated_file.templated_slice_to_source_slice( element.template_slice) stash_source_slice = source_slice # Output the slice as we lex. lexer_logger.debug( " %s, %s, %s, %r", idx, element, source_slice, templated_file.templated_str[element.template_slice], ) # Detect when we've gone backward in the source. # NOTE: If it's the _same_ slice then don't insert a marker # because we're probably just within a single templated # section. if (last_source_slice and last_source_slice.stop > source_slice.start and last_source_slice != source_slice): # If we have, insert a loop marker to reflect that. lexer_logger.debug( " Backward jump detected. Inserting Loop Marker") segment_buffer.append( TemplateLoop(pos_marker=PositionMarker.from_point( last_source_slice.stop, element.template_slice.start, templated_file, ))) # The calculated source slice will include any source only slices. # We should consider all of them in turn to see whether we can # insert them. so_slices = [] # Only look for source only slices if we've got a new source slice to # avoid unnecessary duplication. if last_source_slice != source_slice: for source_only_slice in source_only_slices: # If it's later in the source, stop looking. Any later # ones *also* won't match. if source_only_slice.source_idx >= source_slice.stop: break elif source_only_slice.source_idx >= source_slice.start: so_slices.append(source_only_slice) if so_slices: lexer_logger.debug(" Collected Source Only Slices") for so_slice in so_slices: lexer_logger.debug(" %s", so_slice) # Calculate some things which will be useful templ_str = templated_file.templated_str[ element.template_slice] source_str = templated_file.source_str[source_slice] # For reasons which aren't entirely clear right now, if there is # an included literal, it will always be at the end. Let's see if it's # there. if source_str.endswith(templ_str): existing_len = len(templ_str) else: existing_len = 0 # Calculate slices placeholder_slice = slice(source_slice.start, source_slice.stop - existing_len) placeholder_str = source_str[:-existing_len] source_slice = slice(source_slice.stop - existing_len, source_slice.stop) # If it doesn't manage to extract a placeholder string from the source # just concatenate the source only strings. There is almost always # only one of them. if not placeholder_str: placeholder_str = "".join(s.raw for s in so_slices) # The Jinja templater sometimes returns source-only slices with # gaps between. For example, in this section: # # {% else %} # JOIN # {{action}}_raw_effect_sizes # USING # ({{ states }}) # {% endif %} # # we might get {% else %} and {% endif %} slices, without the # 4 lines between. This indicates those lines were not executed # In this case, generate a placeholder where the skipped code is # omitted but noted with a brief string, e.g.: # # "{% else %}... [103 unused template characters] ...{% endif %}". # # This is more readable -- it would be REALLY confusing for a # placeholder to include code that wasn't even executed!! if len(so_slices) >= 2: has_gap = False gap_placeholder_parts = [] last_slice = None # For each slice... for so_slice in so_slices: # If it's not the first slice, was there a gap? if last_slice: end_last = last_slice.source_idx + len( last_slice.raw) chars_skipped = so_slice.source_idx - end_last if chars_skipped: # Yes, gap between last_slice and so_slice. has_gap = True # Generate a string documenting the gap. if chars_skipped >= 10: gap_placeholder_parts.append( f"... [{chars_skipped} unused template " "characters] ...") else: gap_placeholder_parts.append("...") # Now add the slice's source. gap_placeholder_parts.append(so_slice.raw) last_slice = so_slice if has_gap: placeholder_str = "".join(gap_placeholder_parts) lexer_logger.debug( " Overlap Length: %s. PS: %s, LS: %s, p_str: %r, templ_str: %r", existing_len, placeholder_slice, source_slice, placeholder_str, templ_str, ) # Calculate potential indent/dedent block_slices = sum( s.slice_type.startswith("block_") for s in so_slices) block_balance = sum(s.slice_type == "block_start" for s in so_slices) - sum( s.slice_type == "block_end" for s in so_slices) lead_dedent = so_slices[0].slice_type in ("block_end", "block_mid") trail_indent = so_slices[-1].slice_type in ("block_start", "block_mid") add_indents = self.config.get("template_blocks_indent", "indentation") lexer_logger.debug( " Block Slices: %s. Block Balance: %s. Lead: %s, Trail: %s, " "Add: %s", block_slices, block_balance, lead_dedent, trail_indent, add_indents, ) # Add a dedent if appropriate. if lead_dedent and add_indents: lexer_logger.debug(" DEDENT") segment_buffer.append( Dedent(pos_marker=PositionMarker.from_point( placeholder_slice.start, element.template_slice.start, templated_file, ))) # Always add a placeholder segment_buffer.append( TemplateSegment( pos_marker=PositionMarker( placeholder_slice, slice( element.template_slice.start, element.template_slice.start, ), templated_file, ), source_str=placeholder_str, block_type=so_slices[0].slice_type if len(so_slices) == 1 else "compound", )) lexer_logger.debug(" Placeholder: %s, %r", segment_buffer[-1], placeholder_str) # Add an indent if appropriate. if trail_indent and add_indents: lexer_logger.debug(" INDENT") segment_buffer.append( Indent( is_template=True, pos_marker=PositionMarker.from_point( placeholder_slice.stop, element.template_slice.start, templated_file, ), )) # Add the actual segment segment_buffer.append( element.to_segment(pos_marker=PositionMarker( source_slice, element.template_slice, templated_file, ), )) # Generate placeholders for any source-only slices that *follow* # the last element. This happens, for example, if a Jinja templated # file ends with "{% endif %}", and there's no trailing newline. if idx == len(elements) - 1: so_slices = [ so for so in source_only_slices if so.source_idx >= source_slice.stop ] for so_slice in so_slices: segment_buffer.append( TemplateSegment( pos_marker=PositionMarker( slice(so_slice.source_idx, so_slice.end_source_idx()), slice( element.template_slice.stop, element.template_slice.stop, ), templated_file, ), source_str=so_slice.raw, block_type=so_slice.slice_type, )) # Add an end of file marker segment_buffer.append( EndOfFile(pos_marker=segment_buffer[-1].pos_marker. end_point_marker() if segment_buffer else PositionMarker. from_point(0, 0, templated_file))) # Convert to tuple before return return tuple(segment_buffer)
PositionMarker(slice(2, 3), slice(2, 3), templated_file_1), "code", ), ]), templated_file_1, [ FixPatch(slice(0, 3), "abz", "literal", slice(0, 3), "abc", "abc") ], ), # More complicated templating example ( BaseSegment([ TemplateSegment( PositionMarker(slice(0, 10), slice(0, 0), templated_file_2), "{# blah #}", "comment", ), RawSegment( "a", PositionMarker(slice(10, 20), slice(0, 1), templated_file_2), "code", ), RawSegment( "b", PositionMarker(slice(19, 20), slice(1, 2), templated_file_2), "code", ), RawSegment(