Ejemplo n.º 1
0
    def _parse_tokens(
        tokens: Sequence[BaseSegment], config: FluffConfig, recurse: bool = True
    ) -> Tuple[Optional[BaseSegment], List[SQLParseError]]:
        parser = Parser(config=config)
        violations = []
        # Parse the file and log any problems
        try:
            parsed: Optional[BaseSegment] = parser.parse(tokens, recurse=recurse)
        except SQLParseError as err:
            linter_logger.info("PARSING FAILED! : %s", err)
            violations.append(err)
            return None, violations

        if parsed:
            linter_logger.info("\n###\n#\n# {}\n#\n###".format("Parsed Tree:"))
            linter_logger.info("\n" + parsed.stringify())
            # We may succeed parsing, but still have unparsable segments. Extract them here.
            for unparsable in parsed.iter_unparsables():
                # No exception has been raised explicitly, but we still create one here
                # so that we can use the common interface
                violations.append(
                    SQLParseError(
                        "Line {0[0]}, Position {0[1]}: Found unparsable section: {1!r}".format(
                            unparsable.pos_marker.working_loc,
                            unparsable.raw
                            if len(unparsable.raw) < 40
                            else unparsable.raw[:40] + "...",
                        ),
                        segment=unparsable,
                    )
                )
                linter_logger.info("Found unparsable segment...")
                linter_logger.info(unparsable.stringify())
        return parsed, violations
Ejemplo n.º 2
0
 def parse_noqa(comment: str, line_no: int):
     """Extract ignore mask entries from a comment string."""
     # Also trim any whitespace afterward
     if comment.startswith("noqa"):
         # This is an ignore identifier
         comment_remainder = comment[4:]
         if comment_remainder:
             if not comment_remainder.startswith(":"):
                 return SQLParseError(
                     "Malformed 'noqa' section. Expected 'noqa: <rule>[,...]",
                     line_no=line_no,
                 )
             comment_remainder = comment_remainder[1:].strip()
             if comment_remainder:
                 action: Optional[str]
                 if "=" in comment_remainder:
                     action, rule_part = comment_remainder.split("=", 1)
                     if action not in {"disable", "enable"}:  # pragma: no cover
                         return SQLParseError(
                             "Malformed 'noqa' section. "
                             "Expected 'noqa: enable=<rule>[,...] | all' "
                             "or 'noqa: disable=<rule>[,...] | all",
                             line_no=line_no,
                         )
                 else:
                     action = None
                     rule_part = comment_remainder
                     if rule_part in {"disable", "enable"}:
                         return SQLParseError(
                             "Malformed 'noqa' section. "
                             "Expected 'noqa: enable=<rule>[,...] | all' "
                             "or 'noqa: disable=<rule>[,...] | all",
                             line_no=line_no,
                         )
                 rules: Optional[Tuple[str, ...]]
                 if rule_part != "all":
                     rules = tuple(r.strip() for r in rule_part.split(","))
                 else:
                     rules = None
                 return NoQaDirective(line_no, rules, action)
         return NoQaDirective(line_no, None, None)
     return None
Ejemplo n.º 3
0
def generate_parse_fixture(example):
    """Parse example SQL file, write parse tree to YAML file."""
    dialect, sqlfile = example
    tree = parse_example_file(dialect, sqlfile)
    _hash = compute_parse_tree_hash(tree)
    # Remove the .sql file extension
    root = sqlfile[:-4]
    path = os.path.join("test", "fixtures", "dialects", dialect, root + ".yml")
    with open(path, "w", newline="\n") as f:
        r = None

        if tree:

            # Check we don't have any base types or unparsable sections
            types = tree.type_set()
            if "base" in types:
                raise SQLParseError(
                    f"Unnamed base section when parsing: {f.name}")
            if "unparsable" in types:
                raise SQLParseError(f"Could not parse: {f.name}")

            r = dict(
                [("_hash", _hash)] +
                list(tree.as_record(code_only=True, show_raw=True).items()))
            print(
                "# YML test files are auto-generated from SQL files and should not be "
                "edited by",
                '# hand. To help enforce this, the "hash" field in the file must match '
                "a hash",
                "# computed by SQLFluff when running the tests. Please run",
                "# `python test/generate_parse_fixture_yml.py`  to generate them after "
                "adding or",
                "# altering SQL files.",
                file=f,
                sep="\n",
            )
            yaml.dump(r, f, default_flow_style=False, sort_keys=False)
        else:
            f.write("")
Ejemplo n.º 4
0
def generate_one_parse_fixture(example: _ParseExample) -> None:
    """Parse example SQL file, write parse tree to YAML file."""
    dialect, sqlfile = example
    tree = parse_example_file(dialect, sqlfile)
    _hash = compute_parse_tree_hash(tree)
    # Remove the .sql file extension
    path = _create_yaml_path(example)
    with open(path, "w", newline="\n") as f:
        r: Optional[Dict[str, Optional[str]]] = None

        if not tree:
            f.write("")
            return

        # Check we don't have any base types or unparsable sections
        types = tree.type_set()
        if "base" in types:
            raise SQLParseError(f"Unnamed base section when parsing: {f.name}")
        if "unparsable" in types:
            raise SQLParseError(f"Could not parse: {f.name}")

        records = tree.as_record(code_only=True, show_raw=True)
        assert records, "TypeGuard"
        r = dict([("_hash", _hash), *list(records.items())])
        print(
            "# YML test files are auto-generated from SQL files and should not be "
            "edited by",
            '# hand. To help enforce this, the "hash" field in the file must match '
            "a hash",
            "# computed by SQLFluff when running the tests. Please run",
            "# `python test/generate_parse_fixture_yml.py`  to generate them after "
            "adding or",
            "# altering SQL files.",
            file=f,
            sep="\n",
        )
        yaml.dump(r, f, default_flow_style=False, sort_keys=False)
        return
Ejemplo n.º 5
0
def check_still_complete(
    segments_in: Tuple["BaseSegment", ...],
    matched_segments: Tuple["BaseSegment", ...],
    unmatched_segments: Tuple["BaseSegment", ...],
) -> bool:
    """Check that the segments in are the same as the segments out."""
    initial_str = join_segments_raw(segments_in)
    current_str = join_segments_raw(matched_segments + unmatched_segments)

    if initial_str != current_str:  # pragma: no cover
        raise SQLParseError(
            f"Could not parse: {current_str}",
            segment=unmatched_segments[0],
        )
    return True
Ejemplo n.º 6
0
    def _bracket_sensitive_look_ahead_match(
        cls,
        segments,
        matchers,
        parse_context,
        start_bracket=None,
        end_bracket=None,
        bracket_pairs_set="bracket_pairs",
    ):
        """Same as `_look_ahead_match` but with bracket counting.

        NB: Given we depend on `_look_ahead_match` we can also utilise
        the same performance optimisations which are implemented there.

        bracket_pairs_set: Allows specific segments to override the available
            bracket pairs. See the definition of "angle_bracket_pairs" in the
            BigQuery dialect for additional context on why this exists.

        Returns:
            `tuple` of (unmatched_segments, match_object, matcher).

        """
        # Type munging
        matchers = list(matchers)
        if isinstance(segments, BaseSegment):
            segments = [segments]

        # Have we been passed an empty list?
        if len(segments) == 0:
            return ((), MatchResult.from_unmatched(segments), None)

        # Get hold of the bracket matchers from the dialect, and append them
        # to the list of matchers. We get them from the relevant set on the
        # dialect. We use zip twice to "unzip" them. We ignore the first
        # argument because that's just the name.
        _, start_bracket_refs, end_bracket_refs = zip(
            *parse_context.dialect.sets(bracket_pairs_set)
        )
        # These are matchables, probably StringParsers.
        start_brackets = [
            parse_context.dialect.ref(seg_ref) for seg_ref in start_bracket_refs
        ]
        end_brackets = [
            parse_context.dialect.ref(seg_ref) for seg_ref in end_bracket_refs
        ]
        # Add any bracket-like things passed as arguments
        if start_bracket:
            start_brackets += [start_bracket]
        if end_bracket:
            end_brackets += [end_bracket]
        bracket_matchers = start_brackets + end_brackets

        # Make some buffers
        seg_buff = segments
        pre_seg_buff = ()  # NB: Tuple
        bracket_stack: List[BracketInfo] = []

        # Iterate
        while True:
            # Do we have anything left to match on?
            if seg_buff:
                # Yes we have buffer left to work with.
                # Are we already in a bracket stack?
                if bracket_stack:
                    # Yes, we're just looking for the closing bracket, or
                    # another opening bracket.
                    pre, match, matcher = cls._look_ahead_match(
                        seg_buff,
                        bracket_matchers,
                        parse_context=parse_context,
                    )

                    if match:
                        # NB: We can only consider this as a nested bracket if the start
                        # and end tokens are not the same. If a matcher is both a start and
                        # end token we cannot deepen the bracket stack. In general, quoted
                        # strings are a typical example where the start and end tokens are
                        # the same. Currently, though, quoted strings are handled elsewhere
                        # in the parser, and there are no cases where *this* code has to
                        # handle identical start and end brackets. For now, consider this
                        # a small, speculative investment in a possible future requirement.
                        if matcher in start_brackets and matcher not in end_brackets:
                            # Same procedure as below in finding brackets.
                            bracket_stack.append(
                                BracketInfo(
                                    bracket=match.matched_segments[0],
                                )
                            )
                            pre_seg_buff += pre
                            pre_seg_buff += match.matched_segments
                            seg_buff = match.unmatched_segments
                            continue
                        elif matcher in end_brackets:
                            # Found an end bracket. Does its type match that of
                            # the innermost start bracket? E.g. ")" matches "(",
                            # "]" matches "[".
                            # For the start bracket we don't have the matcher
                            # but we can work out the name, so we use that for
                            # the lookup.
                            start_index = [
                                bracket.name for bracket in start_brackets
                            ].index(bracket_stack[-1].bracket.name)
                            # For the end index, we can just look for the matcher
                            end_index = end_brackets.index(matcher)
                            bracket_types_match = start_index == end_index
                            if bracket_types_match:
                                # Yes, the types match. So we've found a
                                # matching end bracket. Pop the stack and carry
                                # on.
                                bracket_stack.pop()
                                pre_seg_buff += pre
                                pre_seg_buff += match.matched_segments
                                seg_buff = match.unmatched_segments
                                continue
                            else:
                                # The types don't match. Error.
                                raise SQLParseError(
                                    f"Found unexpected end bracket!, was expecting {end_brackets[start_index]}, but got {matcher}",
                                    segment=match.matched_segments[0],
                                )

                        else:
                            raise RuntimeError("I don't know how we get here?!")
                    else:
                        # No match, we're in a bracket stack. Error.
                        raise SQLParseError(
                            "Couldn't find closing bracket for opening bracket.",
                            segment=bracket_stack[-1].bracket,
                        )
                else:
                    # No, we're open to more opening brackets or the thing(s)
                    # that we're otherwise looking for.
                    pre, match, matcher = cls._look_ahead_match(
                        seg_buff,
                        matchers + bracket_matchers,
                        parse_context=parse_context,
                    )

                    if match:
                        if matcher in matchers:
                            # It's one of the things we were looking for!
                            # Return.
                            return (pre_seg_buff + pre, match, matcher)
                        elif matcher in start_brackets:
                            # We've found the start of a bracket segment.
                            # NB: It might not *actually* be the bracket itself,
                            # but could be some non-code element preceding it.
                            # That's actually ok.

                            # Add the bracket to the stack.
                            bracket_stack.append(
                                BracketInfo(
                                    bracket=match.matched_segments[0],
                                )
                            )
                            # Add the matched elements and anything before it to the
                            # pre segment buffer. Reset the working buffer.
                            pre_seg_buff += pre
                            pre_seg_buff += match.matched_segments
                            seg_buff = match.unmatched_segments
                            continue
                        elif matcher in end_brackets:
                            # We've found an unexpected end bracket! This is likely
                            # because we're matching a section which should have ended.
                            # If we had a match, it would have matched by now, so this
                            # means no match.
                            parse_match_logging(
                                cls.__name__,
                                "_bracket_sensitive_look_ahead_match",
                                "UEXB",
                                parse_context=parse_context,
                                v_level=3,
                                got=matcher,
                            )
                            return ((), MatchResult.from_unmatched(segments), None)
                        else:
                            # This shouldn't happen!?
                            raise NotImplementedError(
                                "This shouldn't happen. Panic in _bracket_sensitive_look_ahead_match."
                            )
                    else:
                        # Not in a bracket stack, but no match. This is a happy
                        # unmatched exit.
                        return ((), MatchResult.from_unmatched(segments), None)
            else:
                # No we're at the end:
                # Now check have we closed all our brackets?
                if bracket_stack:
                    # No we haven't.
                    raise SQLParseError(
                        f"Couldn't find closing bracket for opened brackets: `{bracket_stack}`.",
                        segment=bracket_stack[-1].bracket,
                    )

                # We reached the end with no open brackets. This is a friendly
                # unmatched return.
                return ((), MatchResult.from_unmatched(segments), None)
Ejemplo n.º 7
0
    def _bracket_sensitive_look_ahead_match(cls,
                                            segments,
                                            matchers,
                                            parse_context,
                                            start_bracket=None,
                                            end_bracket=None):
        """Same as `_look_ahead_match` but with bracket counting.

        NB: Given we depend on `_look_ahead_match` we can also utilise
        the same performance optimisations which are implemented there.

        Returns:
            `tuple` of (unmatched_segments, match_object, matcher).

        """
        # Type munging
        matchers = list(matchers)
        if isinstance(segments, BaseSegment):
            segments = [segments]

        # Have we been passed an empty list?
        if len(segments) == 0:
            return ((), MatchResult.from_unmatched(segments), None)

        # Get hold of the bracket matchers from the dialect, and append them
        # to the list of matchers. We get them from the relevant set on the
        # dialect. We use zip twice to "unzip" them. We ignore the first
        # argument because that's just the name.
        _, start_bracket_refs, end_bracket_refs, definitely_bracket = zip(
            *parse_context.dialect.sets("bracket_pairs"))
        # These are currently strings which need rehydrating
        start_brackets = [
            parse_context.dialect.ref(seg_ref)
            for seg_ref in start_bracket_refs
        ]
        end_brackets = [
            parse_context.dialect.ref(seg_ref) for seg_ref in end_bracket_refs
        ]
        start_definite = list(definitely_bracket)
        end_definite = list(definitely_bracket)
        # Add any bracket-like things passed as arguments
        if start_bracket:
            start_brackets += [start_bracket]
            start_definite += [True]
        if end_bracket:
            end_brackets += [end_bracket]
            end_definite += [True]
        bracket_matchers = start_brackets + end_brackets

        # Make some buffers
        seg_buff = segments
        pre_seg_buff = ()  # NB: Tuple
        bracket_stack: List[BracketInfo] = []

        # Iterate
        while True:
            # Do we have anything left to match on?
            if seg_buff:
                # Yes we have buffer left to work with.
                # Are we already in a bracket stack?
                if bracket_stack:
                    # Yes, we're just looking for the closing bracket, or
                    # another opening bracket.
                    pre, match, matcher = cls._look_ahead_match(
                        seg_buff,
                        bracket_matchers,
                        parse_context=parse_context,
                    )

                    if match:
                        # NB: We can only consider this as a nested bracket if the start
                        # and end tokens are not the same. If a matcher is both a start and
                        # end token we cannot deepen the bracket stack. In general, quoted
                        # strings are a typical example where the start and end tokens are
                        # the same. Currently, though, quoted strings are handled elsewhere
                        # in the parser, and there are no cases where *this* code has to
                        # handle identical start and end brackets. For now, consider this
                        # a small, speculative investment in a possible future requirement.
                        if matcher in start_brackets and matcher not in end_brackets:
                            # Same procedure as below in finding brackets.
                            bracket_stack.append(
                                BracketInfo(
                                    bracket=match.matched_segments[0],
                                    is_definite=start_definite[
                                        start_brackets.index(matcher)],
                                ))
                            pre_seg_buff += pre
                            pre_seg_buff += match.matched_segments
                            seg_buff = match.unmatched_segments
                            continue
                        elif matcher in end_brackets:
                            # Found an end bracket. Does its type match that of
                            # the innermost start bracket (e.g. ")" matches "(",
                            # "]" matches "[".
                            start_index = start_brackets.index(
                                type(bracket_stack[-1].bracket))
                            end_index = end_brackets.index(matcher)
                            bracket_types_match = start_index == end_index
                            if bracket_types_match:
                                # Yes, the types match. So we've found a
                                # matching end bracket. Pop the stack and carry
                                # on.
                                bracket_stack.pop()
                                pre_seg_buff += pre
                                pre_seg_buff += match.matched_segments
                                seg_buff = match.unmatched_segments
                                continue
                            else:
                                # The types don't match. Check whether the end
                                # bracket is a definite bracket.
                                end_is_definite = end_definite[end_index]
                                if not end_is_definite:
                                    # The end bracket whose type didn't match
                                    # the innermost open bracket is not
                                    # definite. Assume it's not a bracket and
                                    # carry on.
                                    pre_seg_buff += pre
                                    pre_seg_buff += match.matched_segments
                                    seg_buff = match.unmatched_segments
                                else:
                                    # Definite end bracket does not match the
                                    # innermost start bracket. Was the innermost
                                    # start bracket definite? If yes, error. If
                                    # no, assume it was not a bracket.
                                    # Can we remove any brackets from the stack which aren't definites
                                    # to resolve the issue?
                                    for idx in range(
                                            len(bracket_stack) - 1, -1, -1):
                                        if not bracket_stack[idx].is_definite:
                                            del bracket_stack[idx]
                                            # We don't change the string buffer, we assume that was ok.
                                            break
                                    else:
                                        raise SQLParseError(
                                            f"Found unexpected end bracket!, was expecting {end_brackets[start_index]}, but got {matcher}",
                                            segment=match.matched_segments[0],
                                        )

                        else:
                            raise RuntimeError(
                                "I don't know how we get here?!")
                    else:
                        # No match, we're in a bracket stack. Either this is an error,
                        # OR we were mistaken in our initial identification of the opening
                        # bracket. That's only allowed if `not definitely_bracket`.

                        # Can we remove any brackets from the stack which aren't definites
                        # to resolve the issue?
                        for idx, elem in enumerate(reversed(bracket_stack)):
                            if not elem.is_definite:
                                del bracket_stack[-idx]
                                # We don't change the string buffer, we assume that was ok.
                                break
                        else:
                            # No we can't. We don't have a match and we're in a bracket stack.
                            raise SQLParseError(
                                "Couldn't find closing bracket for opening bracket.",
                                segment=bracket_stack[-1].bracket,
                            )
                        # We have attempted a potential solution to the problem. Loop around.
                        continue
                else:
                    # No, we're open to more opening brackets or the thing(s)
                    # that we're otherwise looking for.
                    pre, match, matcher = cls._look_ahead_match(
                        seg_buff,
                        matchers + bracket_matchers,
                        parse_context=parse_context,
                    )

                    if match:
                        if matcher in matchers:
                            # It's one of the things we were looking for!
                            # Return.
                            return (pre_seg_buff + pre, match, matcher)
                        elif matcher in start_brackets:
                            # We've found the start of a bracket segment.
                            # NB: It might not *actually* be the bracket itself,
                            # but could be some non-code element preceding it.
                            # That's actually ok.

                            # Add the bracket to the stack.
                            bracket_stack.append(
                                BracketInfo(
                                    bracket=match.matched_segments[0],
                                    is_definite=start_definite[
                                        start_brackets.index(matcher)],
                                ))
                            # Add the matched elements and anything before it to the
                            # pre segment buffer. Reset the working buffer.
                            pre_seg_buff += pre
                            pre_seg_buff += match.matched_segments
                            seg_buff = match.unmatched_segments
                            continue
                        elif matcher in end_brackets:
                            # each bracket with its "definite" attribute
                            bracket_is_definite = end_definite[
                                end_brackets.index(matcher)]
                            if bracket_is_definite:
                                # We've found an unexpected end bracket!
                                raise SQLParseError(
                                    f"Found unexpected end bracket!, was expecting one of: {matchers + bracket_matchers}, but got {matcher}",
                                    segment=match.matched_segments[0],
                                )
                            pre_seg_buff += pre
                            pre_seg_buff += match.matched_segments
                            seg_buff = match.unmatched_segments
                            continue
                        else:
                            # This shouldn't happen!?
                            raise NotImplementedError(
                                "This shouldn't happen. Panic in _bracket_sensitive_look_ahead_match."
                            )
                    else:
                        # Not in a bracket stack, but no match. This is a happy
                        # unmatched exit.
                        return ((), MatchResult.from_unmatched(segments), None)
            else:
                # No we're at the end:
                # Now check have we closed all our brackets?
                if bracket_stack:
                    # No we haven't.
                    # Check that the unclosed brackets are definite
                    definite_bracket_stack = [
                        b for b in bracket_stack if b.is_definite
                    ]
                    if definite_bracket_stack:
                        raise SQLParseError(
                            f"Couldn't find closing bracket for opened brackets: `{bracket_stack}`.",
                            segment=bracket_stack[-1].bracket,
                        )

                # We at the end but without a bracket left open. This is a
                # friendly unmatched return.
                return ((), MatchResult.from_unmatched(segments), None)
Ejemplo n.º 8
0
    def parse_noqa(
        comment: str,
        line_no: int,
        rule_codes: List[str],
    ):
        """Extract ignore mask entries from a comment string."""
        # Also trim any whitespace afterward

        # Comment lines can also have noqa e.g.
        # --dafhsdkfwdiruweksdkjdaffldfsdlfjksd -- noqa: L016
        # Therefore extract last possible inline ignore.
        comment = [c.strip() for c in comment.split("--")][-1]

        if comment.startswith("noqa"):
            # This is an ignore identifier
            comment_remainder = comment[4:]
            if comment_remainder:
                if not comment_remainder.startswith(":"):
                    return SQLParseError(
                        "Malformed 'noqa' section. Expected 'noqa: <rule>[,...]",
                        line_no=line_no,
                    )
                comment_remainder = comment_remainder[1:].strip()
                if comment_remainder:
                    action: Optional[str]
                    if "=" in comment_remainder:
                        action, rule_part = comment_remainder.split("=", 1)
                        if action not in {"disable",
                                          "enable"}:  # pragma: no cover
                            return SQLParseError(
                                "Malformed 'noqa' section. "
                                "Expected 'noqa: enable=<rule>[,...] | all' "
                                "or 'noqa: disable=<rule>[,...] | all",
                                line_no=line_no,
                            )
                    else:
                        action = None
                        rule_part = comment_remainder
                        if rule_part in {"disable", "enable"}:
                            return SQLParseError(
                                "Malformed 'noqa' section. "
                                "Expected 'noqa: enable=<rule>[,...] | all' "
                                "or 'noqa: disable=<rule>[,...] | all",
                                line_no=line_no,
                            )
                    rules: Optional[Tuple[str, ...]]
                    if rule_part != "all":
                        # Rules can be globs therefore we compare to the rule_set to
                        # expand the globs.
                        unexpanded_rules = tuple(r.strip()
                                                 for r in rule_part.split(","))
                        expanded_rules = []
                        for r in unexpanded_rules:
                            expanded_rule = [
                                x for x in fnmatch.filter(rule_codes, r)
                                if x not in expanded_rules
                            ]
                            if expanded_rule:
                                expanded_rules.extend(expanded_rule)
                            elif r not in expanded_rules:
                                # We were unable to expand the glob.
                                # Therefore assume the user is referencing
                                # a special error type (e.g. PRS, LXR, or TMP)
                                # and add this to the list of rules to ignore.
                                expanded_rules.append(r)
                        rules = tuple(expanded_rules)
                    else:
                        rules = None
                    return NoQaDirective(line_no, rules, action)
            return NoQaDirective(line_no, None, None)
        return None
Ejemplo n.º 9
0
    def match(self, segments: Tuple["BaseSegment", ...],
              parse_context: ParseContext) -> MatchResult:
        """Match if this is a bracketed sequence, with content that matches one of the elements.

        1. work forwards to find the first bracket.
           If we find something other that whitespace, then fail out.
        2. Once we have the first bracket, we need to bracket count forward to find its partner.
        3. Assuming we find its partner then we try and match what goes between them
           using the match method of Sequence.
           If we match, great. If not, then we return an empty match.
           If we never find its partner then we return an empty match but should probably
           log a parsing warning, or error?

        """
        # Trim ends if allowed.
        if self.allow_gaps:
            pre_nc, seg_buff, post_nc = trim_non_code_segments(segments)
        else:
            seg_buff = segments

        # Rehydrate the bracket segments in question.
        start_bracket, end_bracket = self.get_bracket_from_dialect(
            parse_context)
        # Allow optional override for special bracket-like things
        start_bracket = self.start_bracket or start_bracket
        end_bracket = self.end_bracket or end_bracket

        # Look for the first bracket
        with parse_context.deeper_match() as ctx:
            start_match = start_bracket.match(seg_buff, parse_context=ctx)
        if start_match:
            seg_buff = start_match.unmatched_segments
        else:
            # Can't find the opening bracket. No Match.
            return MatchResult.from_unmatched(segments)

        # Look for the closing bracket
        content_segs, end_match, _ = self._bracket_sensitive_look_ahead_match(
            segments=seg_buff,
            matchers=[end_bracket],
            parse_context=parse_context,
            start_bracket=start_bracket,
            end_bracket=end_bracket,
            bracket_pairs_set=self.bracket_pairs_set,
        )
        if not end_match:
            raise SQLParseError(
                "Couldn't find closing bracket for opening bracket.",
                segment=start_match.matched_segments[0],
            )

        # Match the content now we've confirmed the brackets.

        # First deal with the case of TOTALLY EMPTY BRACKETS e.g. "()"
        if not content_segs:
            # If it's allowed, return a match.
            if not self._elements or all(e.is_optional()
                                         for e in self._elements):
                return MatchResult(
                    start_match.matched_segments + end_match.matched_segments,
                    end_match.unmatched_segments,
                )
            # If not, don't.
            else:
                return MatchResult.from_unmatched(segments)

        # Then trim whitespace and deal with the case of no code content e.g. "(   )"
        if self.allow_gaps:
            pre_nc, content_segs, post_nc = trim_non_code_segments(
                content_segs)
        else:
            pre_nc = ()
            post_nc = ()

        # If we don't have anything left after trimming, act accordingly.
        if not content_segs:
            if not self._elements or (all(e.is_optional()
                                          for e in self._elements)
                                      and self.allow_gaps):
                return MatchResult(
                    start_match.matched_segments + pre_nc + post_nc +
                    end_match.matched_segments,
                    end_match.unmatched_segments,
                )
            else:
                return MatchResult.from_unmatched(segments)

        # Match using super. Sequence will interpret the content of the elements.
        with parse_context.deeper_match() as ctx:
            content_match = super().match(content_segs, parse_context=ctx)

        # We require a complete match for the content (hopefully for obvious reasons)
        if content_match.is_complete():
            # Append some indent and dedent tokens at the start and the end.
            return MatchResult(
                # We need to realign the meta segments so the pos markers are correct.
                BaseSegment._position_segments(
                    (
                        # NB: The nc segments go *outside* the indents.
                        start_match.matched_segments +
                        (Indent(), )  # Add a meta indent here
                        + pre_nc + content_match.matched_segments + post_nc +
                        (Dedent(), )  # Add a meta indent here
                        + end_match.matched_segments), ),
                end_match.unmatched_segments,
            )
        # No complete match. Fail.
        else:
            return MatchResult.from_unmatched(segments)
Ejemplo n.º 10
0
    def match(self, segments: Tuple["BaseSegment", ...],
              parse_context: ParseContext) -> MatchResult:
        """Match if a bracketed sequence, with content that matches one of the elements.

        1. work forwards to find the first bracket.
           If we find something other that whitespace, then fail out.
        2. Once we have the first bracket, we need to bracket count forward to find its
           partner.
        3. Assuming we find its partner then we try and match what goes between them
           using the match method of Sequence.
           If we match, great. If not, then we return an empty match.
           If we never find its partner then we return an empty match but should
           probably log a parsing warning, or error?

        """
        # Trim ends if allowed.
        if self.allow_gaps:
            pre_nc, seg_buff, post_nc = trim_non_code_segments(segments)
        else:
            seg_buff = segments  # pragma: no cover TODO?

        # Rehydrate the bracket segments in question.
        # bracket_persits controls whether we make a BracketedSegment or not.
        start_bracket, end_bracket, bracket_persists = self.get_bracket_from_dialect(
            parse_context)
        # Allow optional override for special bracket-like things
        start_bracket = self.start_bracket or start_bracket
        end_bracket = self.end_bracket or end_bracket

        # Are we dealing with a pre-existing BracketSegment?
        if seg_buff[0].is_type("bracketed"):
            seg: BracketedSegment = cast(BracketedSegment, seg_buff[0])
            content_segs = seg.segments[len(seg.start_bracket
                                            ):-len(seg.end_bracket)]
            bracket_segment = seg
            trailing_segments = seg_buff[1:]
        # Otherwise try and match the segments directly.
        else:
            # Look for the first bracket
            with parse_context.deeper_match() as ctx:
                start_match = start_bracket.match(seg_buff, parse_context=ctx)
            if start_match:
                seg_buff = start_match.unmatched_segments
            else:
                # Can't find the opening bracket. No Match.
                return MatchResult.from_unmatched(segments)

            # Look for the closing bracket
            content_segs, end_match, _ = self._bracket_sensitive_look_ahead_match(
                segments=seg_buff,
                matchers=[end_bracket],
                parse_context=parse_context,
                start_bracket=start_bracket,
                end_bracket=end_bracket,
                bracket_pairs_set=self.bracket_pairs_set,
            )
            if not end_match:  # pragma: no cover
                raise SQLParseError(
                    "Couldn't find closing bracket for opening bracket.",
                    segment=start_match.matched_segments[0],
                )

            # Construct a bracket segment
            bracket_segment = BracketedSegment(
                segments=(start_match.matched_segments + content_segs +
                          end_match.matched_segments),
                start_bracket=start_match.matched_segments,
                end_bracket=end_match.matched_segments,
            )
            trailing_segments = end_match.unmatched_segments

        # Then trim whitespace and deal with the case of non-code content e.g. "(   )"
        if self.allow_gaps:
            pre_segs, content_segs, post_segs = trim_non_code_segments(
                content_segs)
        else:  # pragma: no cover TODO?
            pre_segs = ()
            post_segs = ()

        # If we've got a case of empty brackets check whether that is allowed.
        if not content_segs:
            if not self._elements or (all(e.is_optional()
                                          for e in self._elements) and
                                      (self.allow_gaps or
                                       (not pre_segs and not post_segs))):
                return MatchResult(
                    (bracket_segment, )
                    if bracket_persists else bracket_segment.segments,
                    trailing_segments,
                )
            else:
                return MatchResult.from_unmatched(segments)

        # Match the content using super. Sequence will interpret the content of the
        # elements.
        with parse_context.deeper_match() as ctx:
            content_match = super().match(content_segs, parse_context=ctx)

        # We require a complete match for the content (hopefully for obvious reasons)
        if content_match.is_complete():
            # Reconstruct the bracket segment post match.
            # We need to realign the meta segments so the pos markers are correct.
            # Have we already got indents?
            meta_idx = None
            for idx, seg in enumerate(bracket_segment.segments):
                if (seg.is_meta and cast(MetaSegment, seg).indent_val > 0
                        and not cast(MetaSegment, seg).is_template):
                    meta_idx = idx
                    break
            # If we've already got indents, don't add more.
            if meta_idx:
                bracket_segment.segments = BaseSegment._position_segments(
                    bracket_segment.start_bracket + pre_segs +
                    content_match.all_segments() + post_segs +
                    bracket_segment.end_bracket)
            # Append some indent and dedent tokens at the start and the end.
            else:
                bracket_segment.segments = BaseSegment._position_segments(
                    # NB: The nc segments go *outside* the indents.
                    bracket_segment.start_bracket +
                    (Indent(), )  # Add a meta indent here
                    + pre_segs + content_match.all_segments() + post_segs +
                    (Dedent(), )  # Add a meta indent here
                    + bracket_segment.end_bracket)
            return MatchResult(
                (bracket_segment, )
                if bracket_persists else bracket_segment.segments,
                trailing_segments,
            )
        # No complete match. Fail.
        else:
            return MatchResult.from_unmatched(segments)
Ejemplo n.º 11
0
    def _bracket_sensitive_look_ahead_match(
        cls,
        segments: Tuple[BaseSegment, ...],
        matchers: List[MatchableType],
        parse_context: ParseContext,
        start_bracket: Optional[Matchable] = None,
        end_bracket: Optional[Matchable] = None,
        bracket_pairs_set: str = "bracket_pairs",
    ) -> Tuple[Tuple[BaseSegment, ...], MatchResult, Optional[MatchableType]]:
        """Same as `_look_ahead_match` but with bracket counting.

        NB: Given we depend on `_look_ahead_match` we can also utilise
        the same performance optimisations which are implemented there.

        bracket_pairs_set: Allows specific segments to override the available
            bracket pairs. See the definition of "angle_bracket_pairs" in the
            BigQuery dialect for additional context on why this exists.

        Returns:
            `tuple` of (unmatched_segments, match_object, matcher).

        """
        # Have we been passed an empty tuple?
        if not segments:
            return ((), MatchResult.from_unmatched(segments), None)

        # Get hold of the bracket matchers from the dialect, and append them
        # to the list of matchers. We get them from the relevant set on the
        # dialect. We use zip twice to "unzip" them. We ignore the first
        # argument because that's just the name.
        _, start_bracket_refs, end_bracket_refs, persists = zip(
            *parse_context.dialect.sets(bracket_pairs_set))
        # These are matchables, probably StringParsers.
        start_brackets = [
            parse_context.dialect.ref(seg_ref)
            for seg_ref in start_bracket_refs
        ]
        end_brackets = [
            parse_context.dialect.ref(seg_ref) for seg_ref in end_bracket_refs
        ]
        # Add any bracket-like things passed as arguments
        if start_bracket:
            start_brackets += [start_bracket]
        if end_bracket:
            end_brackets += [end_bracket]
        bracket_matchers = start_brackets + end_brackets

        # Make some buffers
        seg_buff: Tuple[BaseSegment, ...] = segments
        pre_seg_buff: Tuple[BaseSegment, ...] = ()
        bracket_stack: List[BracketInfo] = []

        # Iterate
        while True:
            # Do we have anything left to match on?
            if seg_buff:
                # Yes we have buffer left to work with.
                # Are we already in a bracket stack?
                if bracket_stack:
                    # Yes, we're just looking for the closing bracket, or
                    # another opening bracket.
                    pre, match, matcher = cls._look_ahead_match(
                        seg_buff,
                        bracket_matchers,
                        parse_context=parse_context,
                    )

                    if match:
                        # NB: We can only consider this as a nested bracket if the start
                        # and end tokens are not the same. If a matcher is both a start
                        # and end token we cannot deepen the bracket stack. In general,
                        # quoted strings are a typical example where the start and end
                        # tokens are the same. Currently, though, quoted strings are
                        # handled elsewhere in the parser, and there are no cases where
                        # *this* code has to handle identical start and end brackets.
                        # For now, consider this a small, speculative investment in a
                        # possible future requirement.
                        if matcher in start_brackets and matcher not in end_brackets:
                            # Add any segments leading up to this to the previous
                            # bracket.
                            bracket_stack[-1].segments += pre
                            # Add a bracket to the stack and add the matches from the
                            # segment.
                            bracket_stack.append(
                                BracketInfo(
                                    bracket=match.matched_segments[0],
                                    segments=match.matched_segments,
                                ))
                            seg_buff = match.unmatched_segments
                            continue
                        elif matcher in end_brackets:
                            # Found an end bracket. Does its type match that of
                            # the innermost start bracket? E.g. ")" matches "(",
                            # "]" matches "[".
                            # For the start bracket we don't have the matcher
                            # but we can work out the type, so we use that for
                            # the lookup.
                            start_index = [
                                bracket.type for bracket in start_brackets
                            ].index(bracket_stack[-1].bracket.get_type())
                            # For the end index, we can just look for the matcher
                            end_index = end_brackets.index(matcher)
                            bracket_types_match = start_index == end_index
                            if bracket_types_match:
                                # Yes, the types match. So we've found a
                                # matching end bracket. Pop the stack, construct
                                # a bracketed segment and carry
                                # on.

                                # Complete the bracketed info
                                bracket_stack[-1].segments += (
                                    pre + match.matched_segments)
                                # Construct a bracketed segment (as a tuple) if allowed.
                                persist_bracket = persists[end_brackets.index(
                                    matcher)]
                                if persist_bracket:
                                    new_segments: Tuple[BaseSegment, ...] = (
                                        bracket_stack[-1].to_segment(
                                            end_bracket=match.matched_segments
                                        ), )
                                else:
                                    new_segments = bracket_stack[-1].segments
                                # Remove the bracket set from the stack
                                bracket_stack.pop()
                                # If we're still in a bracket, add the new segments to
                                # that bracket, otherwise add them to the buffer
                                if bracket_stack:
                                    bracket_stack[-1].segments += new_segments
                                else:
                                    pre_seg_buff += new_segments
                                seg_buff = match.unmatched_segments
                                continue
                            else:
                                # The types don't match. Error.
                                raise SQLParseError(
                                    f"Found unexpected end bracket!, "
                                    f"was expecting "
                                    f"{end_brackets[start_index]}, "
                                    f"but got {matcher}",
                                    segment=match.matched_segments[0],
                                )

                        else:  # pragma: no cover
                            raise RuntimeError(
                                "I don't know how we get here?!")
                    else:  # pragma: no cover
                        # No match, we're in a bracket stack. Error.
                        raise SQLParseError(
                            "Couldn't find closing bracket for opening bracket.",
                            segment=bracket_stack[-1].bracket,
                        )
                else:
                    # No, we're open to more opening brackets or the thing(s)
                    # that we're otherwise looking for.
                    pre, match, matcher = cls._look_ahead_match(
                        seg_buff,
                        matchers + bracket_matchers,
                        parse_context=parse_context,
                    )

                    if match:
                        if matcher in matchers:
                            # It's one of the things we were looking for!
                            # Return.
                            return (pre_seg_buff + pre, match, matcher)
                        elif matcher in start_brackets:
                            # We've found the start of a bracket segment.
                            # NB: It might not *actually* be the bracket itself,
                            # but could be some non-code element preceding it.
                            # That's actually ok.

                            # Add the bracket to the stack.
                            bracket_stack.append(
                                BracketInfo(
                                    bracket=match.matched_segments[0],
                                    segments=match.matched_segments,
                                ))
                            # The matched element has already been added to the bracket.
                            # Add anything before it to the pre segment buffer.
                            # Reset the working buffer.
                            pre_seg_buff += pre
                            seg_buff = match.unmatched_segments
                            continue
                        elif matcher in end_brackets:
                            # We've found an unexpected end bracket! This is likely
                            # because we're matching a section which should have ended.
                            # If we had a match, it would have matched by now, so this
                            # means no match.
                            parse_match_logging(
                                cls.__name__,
                                "_bracket_sensitive_look_ahead_match",
                                "UEXB",
                                parse_context=parse_context,
                                v_level=3,
                                got=matcher,
                            )
                            # From here we'll drop out to the happy unmatched exit.
                        else:  # pragma: no cover
                            # This shouldn't happen!?
                            raise NotImplementedError(
                                "This shouldn't happen. Panic in "
                                "_bracket_sensitive_look_ahead_match.")
                    # Not in a bracket stack, but no match.
                    # From here we'll drop out to the happy unmatched exit.
            else:
                # No we're at the end:
                # Now check have we closed all our brackets?
                if bracket_stack:  # pragma: no cover
                    # No we haven't.
                    raise SQLParseError(
                        "Couldn't find closing bracket for opened brackets: "
                        f"`{bracket_stack}`.",
                        segment=bracket_stack[-1].bracket,
                    )

            # This is the happy unmatched path. This occurs when:
            # - We reached the end with no open brackets.
            # - No match while outside a bracket stack.
            # - We found an unexpected end bracket before matching something
            # interesting. We return with the mutated segments so we can reuse any
            # bracket matching.
            return ((), MatchResult.from_unmatched(pre_seg_buff + seg_buff),
                    None)