Example #1
0
    def _ParseCharClass(self):
        # type: () -> List[glob_part_t]
        """
    Returns:
      a CharClass if the parse suceeds, or a Literal if fails.  In the latter
      case, we also append a warning.
    """
        first_token = glob_part.Literal(self.token_type, self.token_val)
        balance = 1  # We already saw a [
        tokens = []  # type: List[Tuple[Id_t, str]]

        # NOTE: There is a special rule where []] and [[] are valid globs.  Also
        # [^[] and sometimes [^]], although that one is ambiguous!
        # And [[:space:]] and [[.class.]] has to be taken into account too.  I'm
        # punting on this now because the rule isn't clear and consistent between
        # shells.

        while True:
            self._Next()

            if self.token_type == Id.Eol_Tok:
                # TODO: location info
                self.warnings.append(
                    'Malformed character class; treating as literal')
                parts = [first_token]  # type: List[glob_part_t]
                for (id_, s) in tokens:
                    parts.append(glob_part.Literal(id_, s))
                return parts

            if self.token_type == Id.Glob_LBracket:
                balance += 1
            elif self.token_type == Id.Glob_RBracket:
                balance -= 1

            if balance == 0:
                break
            tokens.append(
                (self.token_type, self.token_val))  # Don't append the last ]

        negated = False
        if len(tokens):
            id1, _ = tokens[0]
            # NOTE: Both ! and ^ work for negation in globs
            # https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html#Pattern-Matching
            # TODO: Warn about the one that's not recommended?
            if id1 in (Id.Glob_Bang, Id.Glob_Caret):
                negated = True
                tokens = tokens[1:]
        strs = [s for _, s in tokens]
        return [glob_part.CharClass(negated, strs)]
Example #2
0
    def Parse(self):
        # type: () -> Tuple[List[glob_part_t], List[str]]
        """
    Returns:
      regex string (or None if it's not a glob)
      A list of warnings about the syntax
    """
        parts = []  # type: List[glob_part_t]

        while True:
            self._Next()
            id_ = self.token_type
            s = self.token_val

            #util.log('%s %r', self.token_type, self.token_val)
            if id_ == Id.Eol_Tok:
                break

            if id_ in (Id.Glob_Star, Id.Glob_QMark):
                parts.append(glob_part.Operator(id_))

            elif id_ == Id.Glob_LBracket:
                # Could return a Literal or a CharClass
                parts.extend(self._ParseCharClass())

            else:  # Glob_{Bang,Caret,CleanLiterals,OtherLiteral,RBracket,EscapedChar,
                #       BadBackslash}
                parts.append(glob_part.Literal(id_, s))

            # Also check for warnings.  TODO: location info.
            if id_ == Id.Glob_RBracket:
                self.warnings.append('Got unescaped right bracket')
            if id_ == Id.Glob_BadBackslash:
                self.warnings.append('Got unescaped trailing backslash')

        return parts, self.warnings