def get_raw_slices(self, elem: BaseSegment) -> Optional[List[RawFileSlice]]: if not self.templated_file: # pragma: no cover return None if not elem.is_type("placeholder"): return None assert elem.pos_marker, "TypeGuard" slices = self.templated_file.raw_slices_spanning_source_slice( elem.pos_marker.source_slice) return slices or None
def _get_element_template_info( cls, elem: BaseSegment, templated_file: Optional[TemplatedFile] ) -> Optional[str]: if elem.is_type("placeholder"): if templated_file is None: raise ValueError("Parameter templated_file cannot be: None.") assert elem.pos_marker slices = templated_file.raw_slices_spanning_source_slice( elem.pos_marker.source_slice ) if slices: return slices[0].slice_type return None
def _segment_length(elem: BaseSegment, tab_space_size: int): # Start by assuming the typical case, where we need not consider slices # or templating. raw = elem.raw # If it's whitespace, it might be a mixture of literal and templated # whitespace. Check for this. if elem.is_type("whitespace") and elem.is_templated: # Templated case: Find the leading *literal* whitespace. assert elem.pos_marker templated_file = elem.pos_marker.templated_file # Extract the leading literal whitespace, slice by slice. raw = "" for raw_slice in Segments( elem, templated_file=templated_file).raw_slices.select( loop_while=rsp.is_slice_type("literal")): # Compute and append raw_slice's contribution. raw += sp.raw_slice(elem, raw_slice) # convert to spaces for convenience (and hanging indents) return raw.replace("\t", " " * tab_space_size)
def is_alias_required(cls, from_expression_element: BaseSegment, dialect_name: str) -> bool: """Given an alias, is it REQUIRED to be present? Aliases are required in SOME, but not all dialects when there's a VALUES clause. """ # Look for a table_expression (i.e. VALUES clause) as a descendant of # the FROM expression, potentially nested inside brackets. The reason we # allow nesting in brackets is that in some dialects (e.g. TSQL), this # is actually *required* in order for SQL Server to parse it. for segment in from_expression_element.iter_segments( expanding=("bracketed", )): if segment.is_type("table_expression"): # Found a table expression. Does it have a VALUES clause? if segment.get_child("values_clause"): # Found a VALUES clause. Is this a dialect that requires # VALUE clauses to be aliased? return (dialect_name in cls._dialects_requiring_alias_for_values_clause) elif any( seg.is_type("select_statement", "set_expression", "with_compound_statement") for seg in segment.iter_segments( expanding=("bracketed", ))): # The FROM expression is a derived table, i.e. a nested # SELECT. In this case, the alias is required in every # dialect we checked (MySQL, Postgres, T-SQL). # https://pganalyze.com/docs/log-insights/app-errors/U115 return True else: # None of the special cases above applies, so the alias is # not required. return False # This should never happen. Return False just to be safe. return False # pragma: no cover
def match(self, segments, parse_context): """Match a specific sequence of elements.""" if isinstance(segments, BaseSegment): segments = tuple(segments) matched_segments = MatchResult.from_empty() unmatched_segments = segments # Buffers of uninstantiated meta segments. meta_pre_nc = () meta_post_nc = () early_break = False for idx, elem in enumerate(self._elements): # Check for an early break. if early_break: break while True: # Consume non-code if appropriate if self.allow_gaps: pre_nc, mid_seg, post_nc = trim_non_code_segments( unmatched_segments) else: pre_nc = () mid_seg = unmatched_segments post_nc = () # Is it an indent or dedent? if elem.is_meta: # Elements with a negative indent value come AFTER # the whitespace. Positive or neutral come BEFORE. if elem.indent_val < 0: meta_post_nc += (elem(), ) else: meta_pre_nc += (elem(), ) break # Is it a conditional? If so is it active if isinstance( elem, Conditional) and not elem.is_enabled(parse_context): # If it's not active, skip it. break if len(pre_nc + mid_seg + post_nc) == 0: # We've run our of sequence without matching everything. # Do only optional or meta elements remain? if all(e.is_optional() or e.is_meta or isinstance(elem, Conditional) for e in self._elements[idx:]): # then it's ok, and we can return what we've got so far. # No need to deal with anything left over because we're at the end, # unless it's a meta segment. # We'll add those meta segments after any existing ones. So # the go on the meta_post_nc stack. for e in self._elements[idx:]: # If it's meta, instantiate it. if e.is_meta: meta_post_nc += (e(), ) # If it's conditional and it's enabled, match it. if isinstance(e, Conditional) and e.is_enabled( parse_context): meta_match = e.match(tuple(), parse_context) if meta_match: meta_post_nc += meta_match.matched_segments # Early break to exit via the happy match path. early_break = True break else: # we've got to the end of the sequence without matching all # required elements. return MatchResult.from_unmatched(segments) else: # We've already dealt with potential whitespace above, so carry on to matching with parse_context.deeper_match() as ctx: elem_match = elem.match(mid_seg, parse_context=ctx) if elem_match.has_match(): # We're expecting mostly partial matches here, but complete # matches are possible. Don't be greedy with whitespace! matched_segments += (meta_pre_nc + pre_nc + meta_post_nc + elem_match.matched_segments) meta_pre_nc = () meta_post_nc = () unmatched_segments = elem_match.unmatched_segments + post_nc # Each time we do this, we do a sense check to make sure we haven't # dropped anything. (Because it's happened before!). check_still_complete( segments, matched_segments.matched_segments, unmatched_segments, ) # Break out of the while loop and move to the next element. break else: # If we can't match an element, we should ascertain whether it's # required. If so then fine, move on, but otherwise we should crash # out without a match. We have not matched the sequence. if elem.is_optional(): # This will crash us out of the while loop and move us # onto the next matching element break else: return MatchResult.from_unmatched(segments) # If we get to here, we've matched all of the elements (or skipped them) # but still have some segments left (or perhaps have precisely zero left). # In either case, we're golden. Return successfully, with any leftovers as # the unmatched elements. Meta all go at the end regardless of wny trailing # whitespace. return MatchResult( BaseSegment._position_segments( matched_segments.matched_segments + meta_pre_nc + meta_post_nc, ), unmatched_segments, )
def match(self, segments: Tuple["BaseSegment", ...], parse_context: ParseContext) -> MatchResult: """Match if this is a bracketed sequence, with content that matches one of the elements. 1. work forwards to find the first bracket. If we find something other that whitespace, then fail out. 2. Once we have the first bracket, we need to bracket count forward to find its partner. 3. Assuming we find its partner then we try and match what goes between them using the match method of Sequence. If we match, great. If not, then we return an empty match. If we never find its partner then we return an empty match but should probably log a parsing warning, or error? """ # Trim ends if allowed. if self.allow_gaps: pre_nc, seg_buff, post_nc = trim_non_code_segments(segments) else: seg_buff = segments # Rehydrate the bracket segments in question. start_bracket, end_bracket = self.get_bracket_from_dialect( parse_context) # Allow optional override for special bracket-like things start_bracket = self.start_bracket or start_bracket end_bracket = self.end_bracket or end_bracket # Look for the first bracket with parse_context.deeper_match() as ctx: start_match = start_bracket.match(seg_buff, parse_context=ctx) if start_match: seg_buff = start_match.unmatched_segments else: # Can't find the opening bracket. No Match. return MatchResult.from_unmatched(segments) # Look for the closing bracket content_segs, end_match, _ = self._bracket_sensitive_look_ahead_match( segments=seg_buff, matchers=[end_bracket], parse_context=parse_context, start_bracket=start_bracket, end_bracket=end_bracket, bracket_pairs_set=self.bracket_pairs_set, ) if not end_match: raise SQLParseError( "Couldn't find closing bracket for opening bracket.", segment=start_match.matched_segments[0], ) # Match the content now we've confirmed the brackets. # First deal with the case of TOTALLY EMPTY BRACKETS e.g. "()" if not content_segs: # If it's allowed, return a match. if not self._elements or all(e.is_optional() for e in self._elements): return MatchResult( start_match.matched_segments + end_match.matched_segments, end_match.unmatched_segments, ) # If not, don't. else: return MatchResult.from_unmatched(segments) # Then trim whitespace and deal with the case of no code content e.g. "( )" if self.allow_gaps: pre_nc, content_segs, post_nc = trim_non_code_segments( content_segs) else: pre_nc = () post_nc = () # If we don't have anything left after trimming, act accordingly. if not content_segs: if not self._elements or (all(e.is_optional() for e in self._elements) and self.allow_gaps): return MatchResult( start_match.matched_segments + pre_nc + post_nc + end_match.matched_segments, end_match.unmatched_segments, ) else: return MatchResult.from_unmatched(segments) # Match using super. Sequence will interpret the content of the elements. with parse_context.deeper_match() as ctx: content_match = super().match(content_segs, parse_context=ctx) # We require a complete match for the content (hopefully for obvious reasons) if content_match.is_complete(): # Append some indent and dedent tokens at the start and the end. return MatchResult( # We need to realign the meta segments so the pos markers are correct. BaseSegment._position_segments( ( # NB: The nc segments go *outside* the indents. start_match.matched_segments + (Indent(), ) # Add a meta indent here + pre_nc + content_match.matched_segments + post_nc + (Dedent(), ) # Add a meta indent here + end_match.matched_segments), ), end_match.unmatched_segments, ) # No complete match. Fail. else: return MatchResult.from_unmatched(segments)
def match(self, segments: Tuple["BaseSegment", ...], parse_context: ParseContext) -> MatchResult: """Match if a bracketed sequence, with content that matches one of the elements. 1. work forwards to find the first bracket. If we find something other that whitespace, then fail out. 2. Once we have the first bracket, we need to bracket count forward to find its partner. 3. Assuming we find its partner then we try and match what goes between them using the match method of Sequence. If we match, great. If not, then we return an empty match. If we never find its partner then we return an empty match but should probably log a parsing warning, or error? """ # Trim ends if allowed. if self.allow_gaps: pre_nc, seg_buff, post_nc = trim_non_code_segments(segments) else: seg_buff = segments # pragma: no cover TODO? # Rehydrate the bracket segments in question. # bracket_persits controls whether we make a BracketedSegment or not. start_bracket, end_bracket, bracket_persists = self.get_bracket_from_dialect( parse_context) # Allow optional override for special bracket-like things start_bracket = self.start_bracket or start_bracket end_bracket = self.end_bracket or end_bracket # Are we dealing with a pre-existing BracketSegment? if seg_buff[0].is_type("bracketed"): seg: BracketedSegment = cast(BracketedSegment, seg_buff[0]) content_segs = seg.segments[len(seg.start_bracket ):-len(seg.end_bracket)] bracket_segment = seg trailing_segments = seg_buff[1:] # Otherwise try and match the segments directly. else: # Look for the first bracket with parse_context.deeper_match() as ctx: start_match = start_bracket.match(seg_buff, parse_context=ctx) if start_match: seg_buff = start_match.unmatched_segments else: # Can't find the opening bracket. No Match. return MatchResult.from_unmatched(segments) # Look for the closing bracket content_segs, end_match, _ = self._bracket_sensitive_look_ahead_match( segments=seg_buff, matchers=[end_bracket], parse_context=parse_context, start_bracket=start_bracket, end_bracket=end_bracket, bracket_pairs_set=self.bracket_pairs_set, ) if not end_match: # pragma: no cover raise SQLParseError( "Couldn't find closing bracket for opening bracket.", segment=start_match.matched_segments[0], ) # Construct a bracket segment bracket_segment = BracketedSegment( segments=(start_match.matched_segments + content_segs + end_match.matched_segments), start_bracket=start_match.matched_segments, end_bracket=end_match.matched_segments, ) trailing_segments = end_match.unmatched_segments # Then trim whitespace and deal with the case of non-code content e.g. "( )" if self.allow_gaps: pre_segs, content_segs, post_segs = trim_non_code_segments( content_segs) else: # pragma: no cover TODO? pre_segs = () post_segs = () # If we've got a case of empty brackets check whether that is allowed. if not content_segs: if not self._elements or (all(e.is_optional() for e in self._elements) and (self.allow_gaps or (not pre_segs and not post_segs))): return MatchResult( (bracket_segment, ) if bracket_persists else bracket_segment.segments, trailing_segments, ) else: return MatchResult.from_unmatched(segments) # Match the content using super. Sequence will interpret the content of the # elements. with parse_context.deeper_match() as ctx: content_match = super().match(content_segs, parse_context=ctx) # We require a complete match for the content (hopefully for obvious reasons) if content_match.is_complete(): # Reconstruct the bracket segment post match. # We need to realign the meta segments so the pos markers are correct. # Have we already got indents? meta_idx = None for idx, seg in enumerate(bracket_segment.segments): if (seg.is_meta and cast(MetaSegment, seg).indent_val > 0 and not cast(MetaSegment, seg).is_template): meta_idx = idx break # If we've already got indents, don't add more. if meta_idx: bracket_segment.segments = BaseSegment._position_segments( bracket_segment.start_bracket + pre_segs + content_match.all_segments() + post_segs + bracket_segment.end_bracket) # Append some indent and dedent tokens at the start and the end. else: bracket_segment.segments = BaseSegment._position_segments( # NB: The nc segments go *outside* the indents. bracket_segment.start_bracket + (Indent(), ) # Add a meta indent here + pre_segs + content_match.all_segments() + post_segs + (Dedent(), ) # Add a meta indent here + bracket_segment.end_bracket) return MatchResult( (bracket_segment, ) if bracket_persists else bracket_segment.segments, trailing_segments, ) # No complete match. Fail. else: return MatchResult.from_unmatched(segments)
[ FixPatch(slice(0, 3), "abz", "literal", slice(0, 3), "abc", "abc") ], ), # Nested literal edit example ( BaseSegment([ RawSegment( "a", PositionMarker(slice(0, 1), slice(0, 1), templated_file_1), "code", ), RawSegment( "b", PositionMarker(slice(1, 2), slice(1, 2), templated_file_1), "code", ), RawSegment( "z", PositionMarker(slice(2, 3), slice(2, 3), templated_file_1), "code", ), ]), templated_file_1, [ FixPatch(slice(0, 3), "abz", "literal", slice(0, 3), "abc", "abc") ], ), # More complicated templating example
def _generate_source_patches( cls, tree: BaseSegment, templated_file: TemplatedFile) -> List[FixPatch]: """Use the fixed tree to generate source patches. Importantly here we deduplicate and sort the patches from their position in the templated file into their intended order in the source file. """ # Iterate patches, filtering and translating as we go: linter_logger.debug("### Beginning Patch Iteration.") filtered_source_patches = [] dedupe_buffer = [] # We use enumerate so that we get an index for each patch. This is entirely # so when debugging logs we can find a given patch again! for idx, patch in enumerate( tree.iter_patches(templated_file=templated_file)): linter_logger.debug(" %s Yielded patch: %s", idx, patch) cls._log_hints(patch, templated_file) # Check for duplicates if patch.dedupe_tuple() in dedupe_buffer: linter_logger.info( " - Skipping. Source space Duplicate: %s", patch.dedupe_tuple(), ) continue # We now evaluate patches in the source-space for whether they overlap # or disrupt any templated sections. # The intent here is that unless explicitly stated, a fix should never # disrupt a templated section. # NOTE: We rely here on the patches being generated in order. # TODO: Implement a mechanism for doing templated section fixes. Given # these patches are currently generated from fixed segments, there will # likely need to be an entirely different mechanism # Get the affected raw slices. local_raw_slices = templated_file.raw_slices_spanning_source_slice( patch.source_slice) local_type_list = [slc.slice_type for slc in local_raw_slices] # Deal with the easy cases of 1) New code at end 2) only literals if not local_type_list or set(local_type_list) == {"literal"}: linter_logger.info( " * Keeping patch on new or literal-only section: %s", patch, ) filtered_source_patches.append(patch) dedupe_buffer.append(patch.dedupe_tuple()) # Handle the easy case of an explicit source fix elif patch.patch_category == "source": linter_logger.info( " * Keeping explicit source fix patch: %s", patch, ) filtered_source_patches.append(patch) dedupe_buffer.append(patch.dedupe_tuple()) # Is it a zero length patch. elif (patch.source_slice.start == patch.source_slice.stop and patch.source_slice.start == local_raw_slices[0].source_idx): linter_logger.info( " * Keeping insertion patch on slice boundary: %s", patch, ) filtered_source_patches.append(patch) dedupe_buffer.append(patch.dedupe_tuple()) else: # We've got a situation where the ends of our patch need to be # more carefully mapped. Likely because we're greedily including # a section of source templating with our fix and we need to work # around it gracefully. # Identify all the places the string appears in the source content. positions = list(findall(patch.templated_str, patch.source_str)) if len(positions) != 1: # NOTE: This section is not covered in tests. While we # don't have an example of it's use (we should), the # code after this relies on there being only one # instance found - so the safety check remains. linter_logger.debug( # pragma: no cover " - Skipping edit patch on non-unique templated " "content: %s", patch, ) continue # pragma: no cover # We have a single occurrence of the thing we want to patch. This # means we can use its position to place our patch. new_source_slice = slice( patch.source_slice.start + positions[0], patch.source_slice.start + positions[0] + len(patch.templated_str), ) linter_logger.debug( " * Keeping Tricky Case. Positions: %s, New Slice: %s, " "Patch: %s", positions, new_source_slice, patch, ) patch.source_slice = new_source_slice filtered_source_patches.append(patch) dedupe_buffer.append(patch.dedupe_tuple()) continue # Sort the patches before building up the file. return sorted(filtered_source_patches, key=lambda x: x.source_slice.start)