def _slice_template(cls, in_str: str) -> Iterator[RawFileSlice]: """Slice template in jinja. NB: Starts and ends of blocks are not distinguished. """ env = cls._get_jinja_env() str_buff = "" idx = 0 # We decide the "kind" of element we're dealing with # using it's _closing_ tag rather than it's opening # tag. The types here map back to similar types of # sections in the python slicer. block_types = { "variable_end": "templated", "block_end": "block", "comment_end": "comment", # Raw tags should behave like blocks. Note that # raw_end and raw_begin are whole tags rather # than blocks and comments where we get partial # tags. "raw_end": "block", "raw_begin": "block", } # https://jinja.palletsprojects.com/en/2.11.x/api/#jinja2.Environment.lex for _, elem_type, raw in env.lex(cls._preprocess_template(in_str)): if elem_type == "data": yield RawFileSlice(raw, "literal", idx) idx += len(raw) continue str_buff += raw # raw_end and raw_begin behave a little differently in # that the whole tag shows up in one go rather than getting # parts of the tag at a time. if elem_type.endswith("_end") or elem_type == "raw_begin": block_type = block_types[elem_type] block_subtype = None # Handle starts and ends of blocks if block_type == "block": # Trim off the brackets and then the whitespace m_open = cls.re_open_tag.search(str_buff) m_close = cls.re_close_tag.search(str_buff) trimmed_content = "" if m_open and m_close: trimmed_content = str_buff[ len(m_open.group(0)):-len(m_close.group(0))] if trimmed_content.startswith("end"): block_type = "block_end" elif trimmed_content.startswith("el"): # else, elif block_type = "block_mid" else: block_type = "block_start" if trimmed_content.split()[0] == "for": block_subtype = "loop" yield RawFileSlice(str_buff, block_type, idx, block_subtype) idx += len(str_buff) str_buff = ""
def _split_invariants( raw_sliced: List[RawFileSlice], literals: List[str], raw_occurances: Dict[str, List[int]], templated_occurances: Dict[str, List[int]], templated_str: str, ) -> Iterator[IntermediateFileSlice]: """Split a sliced file on its invariant literals.""" # Calculate invariants invariants = [ literal for literal in literals if len(raw_occurances[literal]) == 1 and len(templated_occurances[literal]) == 1 ] # Set up some buffers buffer: List[RawFileSlice] = [] idx: Optional[int] = None templ_idx = 0 # Loop through for raw, token_type, raw_pos in raw_sliced: if raw in invariants: if buffer: yield IntermediateFileSlice( "compound", slice(idx, raw_pos), slice(templ_idx, templated_occurances[raw][0]), buffer, ) buffer = [] idx = None yield IntermediateFileSlice( "invariant", slice(raw_pos, raw_pos + len(raw)), slice( templated_occurances[raw][0], templated_occurances[raw][0] + len(raw), ), [ RawFileSlice(raw, token_type, templated_occurances[raw][0]) ], ) templ_idx = templated_occurances[raw][0] + len(raw) else: buffer.append(RawFileSlice(raw, token_type, raw_pos)) if idx is None: idx = raw_pos # If we have a final buffer, yield it if buffer: yield IntermediateFileSlice( "compound", slice((idx or 0), (idx or 0) + sum(len(slc.raw) for slc in buffer)), slice(templ_idx, len(templated_str)), buffer, )
def test__templated_file_source_only_slices(): """Test TemplatedFile.source_only_slices.""" file = TemplatedFile( source_str=" Dummy String again ", # NB: has length 20 raw_sliced=[ RawFileSlice("a" * 10, "literal", 0), RawFileSlice("b" * 7, "comment", 10), RawFileSlice("a" * 10, "literal", 17), ], ) assert file.source_only_slices() == [RawFileSlice("b" * 7, "comment", 10)]
def _slice_template(cls, in_str: str) -> Iterator[RawFileSlice]: """Slice a templated python string into token tuples. This uses Formatter() as per: https://docs.python.org/3/library/string.html#string.Formatter """ fmt = Formatter() in_idx = 0 for literal_text, field_name, format_spec, conversion in fmt.parse(in_str): if literal_text: escape_chars = cls._sorted_occurrence_tuples( cls._substring_occurrences(literal_text, ["}", "{"]) ) idx = 0 while escape_chars: first_char = escape_chars.pop() # Is there a literal first? if first_char[1] > idx: yield RawFileSlice( literal_text[idx : first_char[1]], "literal", in_idx ) in_idx += first_char[1] - idx # Add the escaped idx = first_char[1] + len(first_char[0]) # We double them here to make the raw yield RawFileSlice( literal_text[first_char[1] : idx] * 2, "escaped", in_idx ) # Will always be 2 in this case. # This is because ALL escape sequences in the python formatter # are two characters which reduce to one. in_idx += 2 # Deal with last one (if present) if literal_text[idx:]: yield RawFileSlice(literal_text[idx:], "literal", in_idx) in_idx += len(literal_text) - idx # Deal with fields if field_name: constructed_token = "{{{field_name}{conv}{spec}}}".format( field_name=field_name, conv=f"!{conversion}" if conversion else "", spec=f":{format_spec}" if format_spec else "", ) yield RawFileSlice(constructed_token, "templated", in_idx) in_idx += len(constructed_token)
def has_template_conflicts(self, templated_file: TemplatedFile) -> bool: """Does this fix conflict with (i.e. touch) templated code?""" # Goal: Find the raw slices touched by the fix. Two cases, based on # edit type: # 1. "delete", "replace": Raw slices touching the anchor segment. If # ANY are templated, discard the fix. # 2. "create_before", "create_after": Raw slices encompassing the two # character positions surrounding the insertion point (**NOT** the # whole anchor segment, because we're not *touching* the anchor # segment, we're inserting **RELATIVE** to it. If ALL are templated, # discard the fix. assert self.anchor.pos_marker anchor_slice = self.anchor.pos_marker.templated_slice templated_slices = [anchor_slice] check_fn = any if self.edit_type == "create_before": # Consider the first position of the anchor segment and the # position just before it. templated_slices = [ slice(anchor_slice.start, anchor_slice.start + 1), slice(anchor_slice.start - 1, anchor_slice.start), ] check_fn = all elif self.edit_type == "create_after": # Consider the last position of the anchor segment and the # character just after it. templated_slices = [ slice(anchor_slice.stop - 1, anchor_slice.stop), slice(anchor_slice.stop, anchor_slice.stop + 1), ] check_fn = all # TRICKY: For creations at the end of the file, there won't be an # existing slice. In this case, the function adds file_end_slice to the # result, as a sort of placeholder or sentinel value. We pass a literal # slice for "file_end_slice" so that later in this function, the LintFix # is interpreted as literal code. Otherwise, it could be interpreted as # a fix to *templated* code and incorrectly discarded. fix_slices = self._raw_slices_from_templated_slices( templated_file, templated_slices, file_end_slice=RawFileSlice("", "literal", -1), ) # We have the fix slices. Now check for conflicts. result = check_fn(fs.slice_type == "templated" for fs in fix_slices) if result or not self.source: return result # Fix slices were okay. Now check template safety of the "source" field. templated_slices = [ cast(PositionMarker, source.pos_marker).templated_slice for source in self.source ] raw_slices = self._raw_slices_from_templated_slices( templated_file, templated_slices) return any(fs.slice_type == "templated" for fs in raw_slices)
def get_fix_slices( self, templated_file: TemplatedFile, within_only: bool ) -> Set[RawFileSlice]: """Returns slices touched by the fix.""" # Goal: Find the raw slices touched by the fix. Two cases, based on # edit type: # 1. "delete", "replace": Raw slices touching the anchor segment. # 2. "create_before", "create_after": Raw slices encompassing the two # character positions surrounding the insertion point (**NOT** the # whole anchor segment, because we're not *touching* the anchor # segment, we're inserting **RELATIVE** to it. assert self.anchor.pos_marker anchor_slice = self.anchor.pos_marker.templated_slice templated_slices = [anchor_slice] # If "within_only" is set for a "create_*" fix, the slice should only # include the area of code "within" the area of insertion, not the other # side. adjust_boundary = 1 if not within_only else 0 if self.edit_type == "create_before": # Consider the first position of the anchor segment and the # position just before it. templated_slices = [ slice(anchor_slice.start - 1, anchor_slice.start + adjust_boundary), ] elif self.edit_type == "create_after": # Consider the last position of the anchor segment and the # character just after it. templated_slices = [ slice(anchor_slice.stop - adjust_boundary, anchor_slice.stop + 1), ] elif ( self.edit_type == "replace" and self.anchor.pos_marker.source_slice.stop == self.anchor.pos_marker.source_slice.start ): # We're editing something with zero size in the source. This means # it likely _didn't exist_ in the source and so can be edited safely. # We return an empty set because this edit doesn't touch anything # in the source. return set() # TRICKY: For creations at the end of the file, there won't be an # existing slice. In this case, the function adds file_end_slice to the # result, as a sort of placeholder or sentinel value. We pass a literal # slice for "file_end_slice" so that later in this function, the LintFix # is interpreted as literal code. Otherwise, it could be interpreted as # a fix to *templated* code and incorrectly discarded. return self._raw_slices_from_templated_slices( templated_file, templated_slices, file_end_slice=RawFileSlice("", "literal", -1), )
def track_literal(self, raw: str, block_idx: int) -> None: """Set up tracking for a Jinja literal.""" self.raw_sliced.append( RawFileSlice( raw, "literal", self.idx_raw, None, block_idx, )) # Replace literal text with a unique ID. self.raw_slice_info[self.raw_sliced[-1]] = self.slice_info_for_literal( len(raw), "") self.idx_raw += len(raw)
def test__templated_file_templated_slice_to_source_slice( in_slice, out_slice, is_literal, file_slices, raw_slices): """Test TemplatedFile.templated_slice_to_source_slice.""" file = TemplatedFile( source_str="Dummy String", sliced_file=file_slices, raw_sliced=[ rs if isinstance(rs, RawFileSlice) else RawFileSlice(*rs) for rs in raw_slices ], fname="test", ) source_slice = file.templated_slice_to_source_slice(in_slice) literal_test = file.is_source_slice_literal(source_slice) assert (is_literal, source_slice) == (literal_test, out_slice)
def handle_left_whitespace_stripping(self, token: str, block_idx: int) -> None: """If block open uses whitespace stripping, record it. When a "begin" tag (whether block, comment, or data) uses whitespace stripping (https://jinja.palletsprojects.com/en/3.0.x/templates/#whitespace-control) the Jinja lex() function handles this by discarding adjacent whitespace from 'raw_str'. For more insight, see the tokeniter() function in this file: https://github.com/pallets/jinja/blob/main/src/jinja2/lexer.py We want to detect and correct for this in order to: - Correctly update "idx" (if this is wrong, that's a potential DISASTER because lint fixes use this info to update the source file, and incorrect values often result in CORRUPTING the user's file so it's no longer valid SQL. :-O - Guarantee that the slices we return fully "cover" the contents of 'in_str'. We detect skipped characters by looking ahead in in_str for the token just returned from lex(). The token text will either be at the current 'idx_raw' position (if whitespace stripping did not occur) OR it'll be farther along in 'raw_str', but we're GUARANTEED that lex() only skips over WHITESPACE; nothing else. """ # Find the token returned. Did lex() skip over any characters? num_chars_skipped = self.raw_str.index(token, self.idx_raw) - self.idx_raw if not num_chars_skipped: return # Yes. It skipped over some characters. Compute a string # containing the skipped characters. skipped_str = self.raw_str[self.idx_raw:self.idx_raw + num_chars_skipped] # Sanity check: Verify that Jinja only skips over # WHITESPACE, never anything else. if not skipped_str.isspace(): # pragma: no cover templater_logger.warning("Jinja lex() skipped non-whitespace: %s", skipped_str) # Treat the skipped whitespace as a literal. self.raw_sliced.append( RawFileSlice(skipped_str, "literal", self.idx_raw, None, block_idx)) self.raw_slice_info[self.raw_sliced[-1]] = self.slice_info_for_literal( 0) self.idx_raw += num_chars_skipped
def _split_invariants( cls, raw_sliced: List[RawFileSlice], literals: List[str], raw_occurrences: Dict[str, List[int]], templated_occurrences: Dict[str, List[int]], templated_str: str, ) -> Iterator[IntermediateFileSlice]: """Split a sliced file on its invariant literals. We prioritise the _longest_ invariants first as they are more likely to the the anchors. """ # Calculate invariants invariants = [ literal for literal in literals if len(raw_occurrences[literal]) == 1 and len(templated_occurrences[literal]) == 1 ] # Work through the invariants and make sure they appear # in order. for linv in sorted(invariants, key=len, reverse=True): # Any invariants which have templated positions, relative # to source positions, which aren't in order, should be # ignored. # Is this one still relevant? if linv not in invariants: continue source_pos, templ_pos = raw_occurrences[linv], templated_occurrences[linv] # Copy the list before iterating because we're going to edit it. for tinv in invariants.copy(): if tinv != linv: src_dir = source_pos > raw_occurrences[tinv] tmp_dir = templ_pos > templated_occurrences[tinv] # If it's not in the same direction in the source and template remove it. if src_dir != tmp_dir: templater_logger.debug( " Invariant found out of order: %r", tinv ) invariants.remove(tinv) # Set up some buffers buffer: List[RawFileSlice] = [] idx: Optional[int] = None templ_idx = 0 # Loop through for raw, token_type, raw_pos, _ in raw_sliced: if raw in invariants: if buffer: yield IntermediateFileSlice( "compound", slice(idx, raw_pos), slice(templ_idx, templated_occurrences[raw][0]), buffer, ) buffer = [] idx = None yield IntermediateFileSlice( "invariant", slice(raw_pos, raw_pos + len(raw)), slice( templated_occurrences[raw][0], templated_occurrences[raw][0] + len(raw), ), [RawFileSlice(raw, token_type, templated_occurrences[raw][0])], ) templ_idx = templated_occurrences[raw][0] + len(raw) else: buffer.append(RawFileSlice(raw, token_type, raw_pos)) if idx is None: idx = raw_pos # If we have a final buffer, yield it if buffer: yield IntermediateFileSlice( "compound", slice((idx or 0), (idx or 0) + sum(len(slc.raw) for slc in buffer)), slice(templ_idx, len(templated_str)), buffer, )
instr = "SELECT * FROM {{blah}}" outstr, _ = t.process(in_str=instr, fname="test") assert instr == str(outstr) SIMPLE_SOURCE_STR = "01234\n6789{{foo}}fo\nbarss" SIMPLE_TEMPLATED_STR = "01234\n6789x\nfo\nbarfss" SIMPLE_SLICED_FILE = [ TemplatedFileSlice(*args) for args in [ ("literal", slice(0, 10, None), slice(0, 10, None)), ("templated", slice(10, 17, None), slice(10, 12, None)), ("literal", slice(17, 25, None), slice(12, 20, None)), ] ] SIMPLE_RAW_SLICED_FILE = [ RawFileSlice(*args) for args in [ ("x" * 10, "literal", 0), ("x" * 7, "templated", 10), ("x" * 8, "literal", 17), ] ] COMPLEX_SLICED_FILE = [ TemplatedFileSlice(*args) for args in [ ("literal", slice(0, 13, None), slice(0, 13, None)), ("comment", slice(13, 29, None), slice(13, 13, None)), ("literal", slice(29, 44, None), slice(13, 28, None)), ("block_start", slice(44, 68, None), slice(28, 28, None)), ("literal", slice(68, 81, None), slice(28, 41, None)), ("templated", slice(81, 86, None), slice(41, 42, None)), ("literal", slice(86, 110, None), slice(42, 66, None)),
t = PythonTemplater(override_context=dict(noblah="foo")) instr = PYTHON_STRING with pytest.raises(SQLTemplaterError): t.process(in_str=instr) @pytest.mark.parametrize( "int_slice,templated_str,head_test,tail_test,int_test", [ # Test Invariante ( IntermediateFileSlice( "compound", slice(0, 5), slice(0, 5), [RawFileSlice("{{i}}", "templated", 0)], ), "foo", [], [], IntermediateFileSlice( "compound", slice(0, 5), slice(0, 5), [RawFileSlice("{{i}}", "templated", 0)], ), ), # Test Complete Trimming ( IntermediateFileSlice( "compound",
def process(self, *, in_str: str, fname: str, config=None, formatter=None) -> Tuple[Optional[TemplatedFile], list]: """Process a string and return a TemplatedFile. Note that the arguments are enforced as keywords because Templaters can have differences in their `process` method signature. A Templater that only supports reading from a file would need the following signature: process(*, fname, in_str=None, config=None) (arguments are swapped) Args: in_str (:obj:`str`): The input string. fname (:obj:`str`, optional): The filename of this string. This is mostly for loading config files at runtime. config (:obj:`FluffConfig`): A specific config to use for this templating operation. Only necessary for some templaters. formatter (:obj:`CallbackFormatter`): Optional object for output. """ context = self.get_context(config) template_slices = [] raw_slices = [] last_pos_raw, last_pos_templated = 0, 0 out_str = "" regex = context["__bind_param_regex"] # when the param has no name, use a 1-based index param_counter = 1 for found_param in regex.finditer(in_str): span = found_param.span() if "param_name" not in found_param.groupdict(): param_name = str(param_counter) param_counter += 1 else: param_name = found_param["param_name"] last_literal_length = span[0] - last_pos_raw try: replacement = str(context[param_name]) except KeyError as err: # TODO: Add a url here so people can get more help. raise SQLTemplaterError( "Failure in placeholder templating: {}. Have you configured your " "variables?".format(err)) # add the literal to the slices template_slices.append( TemplatedFileSlice( slice_type="literal", source_slice=slice(last_pos_raw, span[0], None), templated_slice=slice( last_pos_templated, last_pos_templated + last_literal_length, None, ), )) raw_slices.append( RawFileSlice( raw=in_str[last_pos_raw:span[0]], slice_type="literal", source_idx=last_pos_raw, )) out_str += in_str[last_pos_raw:span[0]] # add the current replaced element start_template_pos = last_pos_templated + last_literal_length template_slices.append( TemplatedFileSlice( slice_type="templated", source_slice=slice(span[0], span[1], None), templated_slice=slice( start_template_pos, start_template_pos + len(replacement), None), )) raw_slices.append( RawFileSlice( raw=in_str[span[0]:span[1]], slice_type="templated", source_idx=span[0], )) out_str += replacement # update the indexes last_pos_raw = span[1] last_pos_templated = start_template_pos + len(replacement) # add the last literal, if any if len(in_str) > last_pos_raw: template_slices.append( TemplatedFileSlice( slice_type="literal", source_slice=slice(last_pos_raw, len(in_str), None), templated_slice=slice( last_pos_templated, last_pos_templated + (len(in_str) - last_pos_raw), None, ), )) raw_slices.append( RawFileSlice( raw=in_str[last_pos_raw:], slice_type="literal", source_idx=last_pos_raw, )) out_str += in_str[last_pos_raw:] return ( TemplatedFile( # original string source_str=in_str, # string after all replacements templated_str=out_str, # filename fname=fname, # list of TemplatedFileSlice sliced_file=template_slices, # list of RawFileSlice, same size raw_sliced=raw_slices, ), [], # violations, always empty )
def _slice_template(self) -> List[RawFileSlice]: """Slice template in jinja. NB: Starts and ends of blocks are not distinguished. """ str_buff = "" idx = 0 # We decide the "kind" of element we're dealing with # using it's _closing_ tag rather than it's opening # tag. The types here map back to similar types of # sections in the python slicer. block_types = { "variable_end": "templated", "block_end": "block", "comment_end": "comment", # Raw tags should behave like blocks. Note that # raw_end and raw_begin are whole tags rather # than blocks and comments where we get partial # tags. "raw_end": "block", "raw_begin": "block", } # https://jinja.palletsprojects.com/en/2.11.x/api/#jinja2.Environment.lex stack = [] result = [] set_idx = None unique_alternate_id: Optional[str] alternate_code: Optional[str] for _, elem_type, raw in self.env.lex(self.raw_str): # Replace literal text with a unique ID. if elem_type == "data": if set_idx is None: unique_alternate_id = self.next_slice_id() alternate_code = f"\0{unique_alternate_id}_{len(raw)}" else: unique_alternate_id = self.next_slice_id() alternate_code = f"\0set{unique_alternate_id}_{len(raw)}" result.append(RawFileSlice( raw, "literal", idx, )) self.raw_slice_info[result[-1]] = RawSliceInfo( unique_alternate_id, alternate_code, []) idx += len(raw) continue str_buff += raw if elem_type.endswith("_begin"): # When a "begin" tag (whether block, comment, or data) uses # whitespace stripping ( # https://jinja.palletsprojects.com/en/3.0.x/templates/#whitespace-control # ), the Jinja lex() function handles this by discarding adjacent # whitespace from in_str. For more insight, see the tokeniter() # function in this file: # https://github.com/pallets/jinja/blob/main/src/jinja2/lexer.py # We want to detect and correct for this in order to: # - Correctly update "idx" (if this is wrong, that's a # potential DISASTER because lint fixes use this info to # update the source file, and incorrect values often result in # CORRUPTING the user's file so it's no longer valid SQL. :-O # - Guarantee that the slices we return fully "cover" the # contents of in_str. # # We detect skipped characters by looking ahead in in_str for # the token just returned from lex(). The token text will either # be at the current 'idx' position (if whitespace stripping did # not occur) OR it'll be farther along in in_str, but we're # GUARANTEED that lex() only skips over WHITESPACE; nothing else. # Find the token returned. Did lex() skip over any characters? num_chars_skipped = self.raw_str.index(raw, idx) - idx if num_chars_skipped: # Yes. It skipped over some characters. Compute a string # containing the skipped characters. skipped_str = self.raw_str[idx:idx + num_chars_skipped] # Sanity check: Verify that Jinja only skips over # WHITESPACE, never anything else. if not skipped_str.isspace(): # pragma: no cover templater_logger.warning( "Jinja lex() skipped non-whitespace: %s", skipped_str) # Treat the skipped whitespace as a literal. result.append(RawFileSlice(skipped_str, "literal", idx)) self.raw_slice_info[result[-1]] = RawSliceInfo("", "", []) idx += num_chars_skipped # raw_end and raw_begin behave a little differently in # that the whole tag shows up in one go rather than getting # parts of the tag at a time. unique_alternate_id = None alternate_code = None trimmed_content = "" if elem_type.endswith("_end") or elem_type == "raw_begin": block_type = block_types[elem_type] block_subtype = None # Handle starts and ends of blocks if block_type in ("block", "templated"): # Trim off the brackets and then the whitespace m_open = self.re_open_tag.search(str_buff) m_close = self.re_close_tag.search(str_buff) if m_open and m_close: trimmed_content = str_buff[ len(m_open.group(0)):-len(m_close.group(0))] # :TRICKY: Syntactically, the Jinja {% include %} directive looks # like a block, but its behavior is basically syntactic sugar for # {{ open("somefile).read() }}. Thus, treat it as templated code. if block_type == "block" and trimmed_content.startswith( "include "): block_type = "templated" if block_type == "block": if trimmed_content.startswith("end"): block_type = "block_end" elif trimmed_content.startswith("el"): # else, elif block_type = "block_mid" else: block_type = "block_start" if trimmed_content.split()[0] == "for": block_subtype = "loop" else: # For "templated", evaluate the content in case of side # effects, but return a unique slice ID. if trimmed_content: assert m_open and m_close unique_id = self.next_slice_id() unique_alternate_id = unique_id prefix = "set" if set_idx is not None else "" open_ = m_open.group(1) close_ = m_close.group(1) alternate_code = ( f"\0{prefix}{unique_alternate_id} {open_} " f"{trimmed_content} {close_}") if block_type == "block_start" and trimmed_content.split( )[0] in ( "macro", "set", ): # Jinja supports two forms of {% set %}: # - {% set variable = value %} # - {% set variable %}value{% endset %} # https://jinja.palletsprojects.com/en/2.10.x/templates/#block-assignments # When the second format is used, set the variable 'is_set' # to a non-None value. This info is used elsewhere, as # literals inside a {% set %} block require special handling # during the trace. trimmed_content_parts = trimmed_content.split(maxsplit=2) if len(trimmed_content_parts ) <= 2 or not trimmed_content_parts[2].startswith( "="): set_idx = len(result) elif block_type == "block_end" and set_idx is not None: # Exiting a {% set %} block. Clear the indicator variable. set_idx = None m = regex.search(r"\s+$", raw, regex.MULTILINE | regex.DOTALL) if raw.startswith("-") and m: # Right whitespace was stripped. Split off the trailing # whitespace into a separate slice. The desired behavior is # to behave similarly as the left stripping case above. # Note that the stakes are a bit different, because lex() # hasn't *omitted* any characters from the strings it # returns, it has simply grouped them differently than we # want. trailing_chars = len(m.group(0)) if block_type.startswith("block_"): alternate_code = self._remove_block_whitespace_control( str_buff[:-trailing_chars]) result.append( RawFileSlice( str_buff[:-trailing_chars], block_type, idx, block_subtype, )) self.raw_slice_info[result[-1]] = RawSliceInfo( unique_alternate_id, alternate_code, []) block_idx = len(result) - 1 idx += len(str_buff) - trailing_chars result.append( RawFileSlice( str_buff[-trailing_chars:], "literal", idx, )) self.raw_slice_info[result[-1]] = RawSliceInfo("", "", []) idx += trailing_chars else: if block_type.startswith("block_"): alternate_code = self._remove_block_whitespace_control( str_buff) result.append( RawFileSlice( str_buff, block_type, idx, block_subtype, )) self.raw_slice_info[result[-1]] = RawSliceInfo( unique_alternate_id, alternate_code, []) block_idx = len(result) - 1 idx += len(str_buff) if block_type == "block_start" and trimmed_content.split( )[0] in ( "for", "if", ): stack.append(block_idx) elif block_type == "block_mid": # Record potential forward jump over this block. self.raw_slice_info[result[ stack[-1]]].next_slice_indices.append(block_idx) stack.pop() stack.append(block_idx) elif block_type == "block_end" and trimmed_content.split( )[0] in ( "endfor", "endif", ): # Record potential forward jump over this block. self.raw_slice_info[result[ stack[-1]]].next_slice_indices.append(block_idx) if result[stack[-1]].slice_subtype == "loop": # Record potential backward jump to the loop beginning. self.raw_slice_info[ result[block_idx]].next_slice_indices.append( stack[-1] + 1) stack.pop() str_buff = "" return result
def analyze(self, make_template: Callable[[str], Template]) -> JinjaTracer: """Slice template in jinja.""" # str_buff and str_parts are two ways we keep track of tokens received # from Jinja. str_buff concatenates them together, while str_parts # accumulates the individual strings. We generally prefer using # str_parts. That's because Jinja doesn't just split on whitespace, so # by keeping tokens as Jinja returns them, the code is more robust. # Consider the following: # {% set col= "col1" %} # Note there's no space after col. Jinja splits this up for us. If we # simply concatenated the parts together and later split on whitespace, # we'd need some ugly, fragile logic to handle various whitespace # possibilities: # {% set col= "col1" %} # {% set col = "col1" %} # {% set col ="col1" %} # By using str_parts and letting Jinja handle this, it just works. str_buff = "" str_parts = [] # https://jinja.palletsprojects.com/en/2.11.x/api/#jinja2.Environment.lex block_idx = 0 last_elem_type = None for _, elem_type, raw in self.env.lex(self.raw_str): if last_elem_type == "block_end" or elem_type == "block_start": block_idx += 1 last_elem_type = elem_type if elem_type == "data": self.track_literal(raw, block_idx) continue str_buff += raw str_parts.append(raw) if elem_type.endswith("_begin"): self.handle_left_whitespace_stripping(raw, block_idx) raw_slice_info: RawSliceInfo = self.make_raw_slice_info(None, None) tag_contents = [] # raw_end and raw_begin behave a little differently in # that the whole tag shows up in one go rather than getting # parts of the tag at a time. m_open = None m_close = None if elem_type.endswith("_end") or elem_type == "raw_begin": block_type = self.block_types[elem_type] block_subtype = None # Handle starts and ends of blocks if block_type in ("block", "templated"): m_open = self.re_open_tag.search(str_parts[0]) m_close = self.re_close_tag.search(str_parts[-1]) if m_open and m_close: tag_contents = self.extract_tag_contents( str_parts, m_close, m_open, str_buff) if block_type == "block" and tag_contents: block_type, block_subtype = self.extract_block_type( tag_contents[0], block_subtype) if block_type == "templated" and tag_contents: assert m_open and m_close raw_slice_info = self.track_templated( m_open, m_close, tag_contents) raw_slice_info_temp = self.update_inside_set_call_macro_or_block( block_type, tag_contents, m_open, m_close, tag_contents) if raw_slice_info_temp: raw_slice_info = raw_slice_info_temp m_strip_right = regex.search(r"\s+$", raw, regex.MULTILINE | regex.DOTALL) if elem_type.endswith("_end") and raw.startswith( "-") and m_strip_right: # Right whitespace was stripped after closing block. Split # off the trailing whitespace into a separate slice. The # desired behavior is to behave similarly as the left # stripping case. Note that the stakes are a bit lower here, # because lex() hasn't *omitted* any characters from the # strings it returns, it has simply grouped them differently # than we want. trailing_chars = len(m_strip_right.group(0)) self.raw_sliced.append( RawFileSlice( str_buff[:-trailing_chars], block_type, self.idx_raw, block_subtype, block_idx, )) self.raw_slice_info[self.raw_sliced[-1]] = raw_slice_info slice_idx = len(self.raw_sliced) - 1 self.idx_raw += len(str_buff) - trailing_chars self.raw_sliced.append( RawFileSlice( str_buff[-trailing_chars:], "literal", self.idx_raw, None, block_idx, )) self.raw_slice_info[ self.raw_sliced[-1]] = self.slice_info_for_literal(0) self.idx_raw += trailing_chars else: self.raw_sliced.append( RawFileSlice( str_buff, block_type, self.idx_raw, block_subtype, block_idx, )) self.raw_slice_info[self.raw_sliced[-1]] = raw_slice_info slice_idx = len(self.raw_sliced) - 1 self.idx_raw += len(str_buff) if block_type.startswith("block"): self.track_block_start(block_type, tag_contents[0]) self.track_block_end(block_type, tag_contents[0]) self.update_next_slice_indices(slice_idx, block_type, tag_contents[0]) str_buff = "" str_parts = [] return JinjaTracer( self.raw_str, self.raw_sliced, self.raw_slice_info, self.sliced_file, make_template, )
"""Tests for the raw_file_slices module.""" import pytest from sqlfluff.utils.functional import raw_file_slices from sqlfluff.core.templaters.base import RawFileSlice rs_templated_abc = RawFileSlice("{{abc}}", "templated", 0) rs_templated_def = RawFileSlice("{{def}}", "templated", 0) rs_literal_abc = RawFileSlice("abc", "literal", 0) @pytest.mark.parametrize( ["input", "expected"], [ [ raw_file_slices.RawFileSlices(rs_templated_abc, templated_file=None), True, ], [ raw_file_slices.RawFileSlices(rs_templated_def, templated_file=None), False, ], [ raw_file_slices.RawFileSlices( rs_templated_abc, rs_templated_def, templated_file=None), False, ], ], )