Esempio n. 1
0
    def match(self, segments: Tuple["BaseSegment", ...],
              parse_context: ParseContext) -> MatchResult:
        """Match if this is a bracketed sequence, with content that matches one of the elements.

        1. work forwards to find the first bracket.
           If we find something other that whitespace, then fail out.
        2. Once we have the first bracket, we need to bracket count forward to find its partner.
        3. Assuming we find its partner then we try and match what goes between them
           using the match method of Sequence.
           If we match, great. If not, then we return an empty match.
           If we never find its partner then we return an empty match but should probably
           log a parsing warning, or error?

        """
        # Trim ends if allowed.
        if self.allow_gaps:
            pre_nc, seg_buff, post_nc = trim_non_code_segments(segments)
        else:
            seg_buff = segments

        # Rehydrate the bracket segments in question.
        start_bracket, end_bracket = self.get_bracket_from_dialect(
            parse_context)
        # Allow optional override for special bracket-like things
        start_bracket = self.start_bracket or start_bracket
        end_bracket = self.end_bracket or end_bracket

        # Look for the first bracket
        with parse_context.deeper_match() as ctx:
            start_match = start_bracket.match(seg_buff, parse_context=ctx)
        if start_match:
            seg_buff = start_match.unmatched_segments
        else:
            # Can't find the opening bracket. No Match.
            return MatchResult.from_unmatched(segments)

        # Look for the closing bracket
        content_segs, end_match, _ = self._bracket_sensitive_look_ahead_match(
            segments=seg_buff,
            matchers=[end_bracket],
            parse_context=parse_context,
            start_bracket=start_bracket,
            end_bracket=end_bracket,
            bracket_pairs_set=self.bracket_pairs_set,
        )
        if not end_match:
            raise SQLParseError(
                "Couldn't find closing bracket for opening bracket.",
                segment=start_match.matched_segments[0],
            )

        # Match the content now we've confirmed the brackets.

        # First deal with the case of TOTALLY EMPTY BRACKETS e.g. "()"
        if not content_segs:
            # If it's allowed, return a match.
            if not self._elements or all(e.is_optional()
                                         for e in self._elements):
                return MatchResult(
                    start_match.matched_segments + end_match.matched_segments,
                    end_match.unmatched_segments,
                )
            # If not, don't.
            else:
                return MatchResult.from_unmatched(segments)

        # Then trim whitespace and deal with the case of no code content e.g. "(   )"
        if self.allow_gaps:
            pre_nc, content_segs, post_nc = trim_non_code_segments(
                content_segs)
        else:
            pre_nc = ()
            post_nc = ()

        # If we don't have anything left after trimming, act accordingly.
        if not content_segs:
            if not self._elements or (all(e.is_optional()
                                          for e in self._elements)
                                      and self.allow_gaps):
                return MatchResult(
                    start_match.matched_segments + pre_nc + post_nc +
                    end_match.matched_segments,
                    end_match.unmatched_segments,
                )
            else:
                return MatchResult.from_unmatched(segments)

        # Match using super. Sequence will interpret the content of the elements.
        with parse_context.deeper_match() as ctx:
            content_match = super().match(content_segs, parse_context=ctx)

        # We require a complete match for the content (hopefully for obvious reasons)
        if content_match.is_complete():
            # Append some indent and dedent tokens at the start and the end.
            return MatchResult(
                # We need to realign the meta segments so the pos markers are correct.
                BaseSegment._position_segments(
                    (
                        # NB: The nc segments go *outside* the indents.
                        start_match.matched_segments +
                        (Indent(), )  # Add a meta indent here
                        + pre_nc + content_match.matched_segments + post_nc +
                        (Dedent(), )  # Add a meta indent here
                        + end_match.matched_segments), ),
                end_match.unmatched_segments,
            )
        # No complete match. Fail.
        else:
            return MatchResult.from_unmatched(segments)
Esempio n. 2
0
    def match(self, segments, parse_context):
        """Match a specific sequence of elements."""
        if isinstance(segments, BaseSegment):
            segments = tuple(segments)

        matched_segments = MatchResult.from_empty()
        unmatched_segments = segments

        # Buffers of uninstantiated meta segments.
        meta_pre_nc = ()
        meta_post_nc = ()
        early_break = False

        for idx, elem in enumerate(self._elements):
            # Check for an early break.
            if early_break:
                break

            while True:
                # Consume non-code if appropriate
                if self.allow_gaps:
                    pre_nc, mid_seg, post_nc = trim_non_code_segments(
                        unmatched_segments)
                else:
                    pre_nc = ()
                    mid_seg = unmatched_segments
                    post_nc = ()

                # Is it an indent or dedent?
                if elem.is_meta:
                    # Elements with a negative indent value come AFTER
                    # the whitespace. Positive or neutral come BEFORE.
                    if elem.indent_val < 0:
                        meta_post_nc += (elem(), )
                    else:
                        meta_pre_nc += (elem(), )
                    break

                # Is it a conditional? If so is it active
                if isinstance(
                        elem,
                        Conditional) and not elem.is_enabled(parse_context):
                    # If it's not active, skip it.
                    break

                if len(pre_nc + mid_seg + post_nc) == 0:
                    # We've run our of sequence without matching everything.
                    # Do only optional or meta elements remain?
                    if all(e.is_optional() or e.is_meta
                           or isinstance(elem, Conditional)
                           for e in self._elements[idx:]):
                        # then it's ok, and we can return what we've got so far.
                        # No need to deal with anything left over because we're at the end,
                        # unless it's a meta segment.

                        # We'll add those meta segments after any existing ones. So
                        # the go on the meta_post_nc stack.
                        for e in self._elements[idx:]:
                            # If it's meta, instantiate it.
                            if e.is_meta:
                                meta_post_nc += (e(), )
                            # If it's conditional and it's enabled, match it.
                            if isinstance(e, Conditional) and e.is_enabled(
                                    parse_context):
                                meta_match = e.match(tuple(), parse_context)
                                if meta_match:
                                    meta_post_nc += meta_match.matched_segments

                        # Early break to exit via the happy match path.
                        early_break = True
                        break
                    else:
                        # we've got to the end of the sequence without matching all
                        # required elements.
                        return MatchResult.from_unmatched(segments)
                else:
                    # We've already dealt with potential whitespace above, so carry on to matching
                    with parse_context.deeper_match() as ctx:
                        elem_match = elem.match(mid_seg, parse_context=ctx)

                    if elem_match.has_match():
                        # We're expecting mostly partial matches here, but complete
                        # matches are possible. Don't be greedy with whitespace!
                        matched_segments += (meta_pre_nc + pre_nc +
                                             meta_post_nc +
                                             elem_match.matched_segments)
                        meta_pre_nc = ()
                        meta_post_nc = ()
                        unmatched_segments = elem_match.unmatched_segments + post_nc
                        # Each time we do this, we do a sense check to make sure we haven't
                        # dropped anything. (Because it's happened before!).
                        check_still_complete(
                            segments,
                            matched_segments.matched_segments,
                            unmatched_segments,
                        )

                        # Break out of the while loop and move to the next element.
                        break
                    else:
                        # If we can't match an element, we should ascertain whether it's
                        # required. If so then fine, move on, but otherwise we should crash
                        # out without a match. We have not matched the sequence.
                        if elem.is_optional():
                            # This will crash us out of the while loop and move us
                            # onto the next matching element
                            break
                        else:
                            return MatchResult.from_unmatched(segments)

        # If we get to here, we've matched all of the elements (or skipped them)
        # but still have some segments left (or perhaps have precisely zero left).
        # In either case, we're golden. Return successfully, with any leftovers as
        # the unmatched elements. Meta all go at the end regardless of wny trailing
        # whitespace.
        return MatchResult(
            BaseSegment._position_segments(
                matched_segments.matched_segments + meta_pre_nc +
                meta_post_nc, ),
            unmatched_segments,
        )
Esempio n. 3
0
    def match(self, segments: Tuple["BaseSegment", ...],
              parse_context: ParseContext) -> MatchResult:
        """Match if a bracketed sequence, with content that matches one of the elements.

        1. work forwards to find the first bracket.
           If we find something other that whitespace, then fail out.
        2. Once we have the first bracket, we need to bracket count forward to find its
           partner.
        3. Assuming we find its partner then we try and match what goes between them
           using the match method of Sequence.
           If we match, great. If not, then we return an empty match.
           If we never find its partner then we return an empty match but should
           probably log a parsing warning, or error?

        """
        # Trim ends if allowed.
        if self.allow_gaps:
            pre_nc, seg_buff, post_nc = trim_non_code_segments(segments)
        else:
            seg_buff = segments  # pragma: no cover TODO?

        # Rehydrate the bracket segments in question.
        # bracket_persits controls whether we make a BracketedSegment or not.
        start_bracket, end_bracket, bracket_persists = self.get_bracket_from_dialect(
            parse_context)
        # Allow optional override for special bracket-like things
        start_bracket = self.start_bracket or start_bracket
        end_bracket = self.end_bracket or end_bracket

        # Are we dealing with a pre-existing BracketSegment?
        if seg_buff[0].is_type("bracketed"):
            seg: BracketedSegment = cast(BracketedSegment, seg_buff[0])
            content_segs = seg.segments[len(seg.start_bracket
                                            ):-len(seg.end_bracket)]
            bracket_segment = seg
            trailing_segments = seg_buff[1:]
        # Otherwise try and match the segments directly.
        else:
            # Look for the first bracket
            with parse_context.deeper_match() as ctx:
                start_match = start_bracket.match(seg_buff, parse_context=ctx)
            if start_match:
                seg_buff = start_match.unmatched_segments
            else:
                # Can't find the opening bracket. No Match.
                return MatchResult.from_unmatched(segments)

            # Look for the closing bracket
            content_segs, end_match, _ = self._bracket_sensitive_look_ahead_match(
                segments=seg_buff,
                matchers=[end_bracket],
                parse_context=parse_context,
                start_bracket=start_bracket,
                end_bracket=end_bracket,
                bracket_pairs_set=self.bracket_pairs_set,
            )
            if not end_match:  # pragma: no cover
                raise SQLParseError(
                    "Couldn't find closing bracket for opening bracket.",
                    segment=start_match.matched_segments[0],
                )

            # Construct a bracket segment
            bracket_segment = BracketedSegment(
                segments=(start_match.matched_segments + content_segs +
                          end_match.matched_segments),
                start_bracket=start_match.matched_segments,
                end_bracket=end_match.matched_segments,
            )
            trailing_segments = end_match.unmatched_segments

        # Then trim whitespace and deal with the case of non-code content e.g. "(   )"
        if self.allow_gaps:
            pre_segs, content_segs, post_segs = trim_non_code_segments(
                content_segs)
        else:  # pragma: no cover TODO?
            pre_segs = ()
            post_segs = ()

        # If we've got a case of empty brackets check whether that is allowed.
        if not content_segs:
            if not self._elements or (all(e.is_optional()
                                          for e in self._elements) and
                                      (self.allow_gaps or
                                       (not pre_segs and not post_segs))):
                return MatchResult(
                    (bracket_segment, )
                    if bracket_persists else bracket_segment.segments,
                    trailing_segments,
                )
            else:
                return MatchResult.from_unmatched(segments)

        # Match the content using super. Sequence will interpret the content of the
        # elements.
        with parse_context.deeper_match() as ctx:
            content_match = super().match(content_segs, parse_context=ctx)

        # We require a complete match for the content (hopefully for obvious reasons)
        if content_match.is_complete():
            # Reconstruct the bracket segment post match.
            # We need to realign the meta segments so the pos markers are correct.
            # Have we already got indents?
            meta_idx = None
            for idx, seg in enumerate(bracket_segment.segments):
                if (seg.is_meta and cast(MetaSegment, seg).indent_val > 0
                        and not cast(MetaSegment, seg).is_template):
                    meta_idx = idx
                    break
            # If we've already got indents, don't add more.
            if meta_idx:
                bracket_segment.segments = BaseSegment._position_segments(
                    bracket_segment.start_bracket + pre_segs +
                    content_match.all_segments() + post_segs +
                    bracket_segment.end_bracket)
            # Append some indent and dedent tokens at the start and the end.
            else:
                bracket_segment.segments = BaseSegment._position_segments(
                    # NB: The nc segments go *outside* the indents.
                    bracket_segment.start_bracket +
                    (Indent(), )  # Add a meta indent here
                    + pre_segs + content_match.all_segments() + post_segs +
                    (Dedent(), )  # Add a meta indent here
                    + bracket_segment.end_bracket)
            return MatchResult(
                (bracket_segment, )
                if bracket_persists else bracket_segment.segments,
                trailing_segments,
            )
        # No complete match. Fail.
        else:
            return MatchResult.from_unmatched(segments)