def _ParseCharClass(self): # type: () -> List[glob_part_t] """ Returns: a CharClass if the parse suceeds, or a Literal if fails. In the latter case, we also append a warning. """ first_token = glob_part.Literal(self.token_type, self.token_val) balance = 1 # We already saw a [ tokens = [] # type: List[Tuple[Id_t, str]] # NOTE: There is a special rule where []] and [[] are valid globs. Also # [^[] and sometimes [^]], although that one is ambiguous! # And [[:space:]] and [[.class.]] has to be taken into account too. I'm # punting on this now because the rule isn't clear and consistent between # shells. while True: self._Next() if self.token_type == Id.Eol_Tok: # TODO: location info self.warnings.append( 'Malformed character class; treating as literal') parts = [first_token] # type: List[glob_part_t] for (id_, s) in tokens: parts.append(glob_part.Literal(id_, s)) return parts if self.token_type == Id.Glob_LBracket: balance += 1 elif self.token_type == Id.Glob_RBracket: balance -= 1 if balance == 0: break tokens.append( (self.token_type, self.token_val)) # Don't append the last ] negated = False if len(tokens): id1, _ = tokens[0] # NOTE: Both ! and ^ work for negation in globs # https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html#Pattern-Matching # TODO: Warn about the one that's not recommended? if id1 in (Id.Glob_Bang, Id.Glob_Caret): negated = True tokens = tokens[1:] strs = [s for _, s in tokens] return [glob_part.CharClass(negated, strs)]
def Parse(self): # type: () -> Tuple[List[glob_part_t], List[str]] """ Returns: regex string (or None if it's not a glob) A list of warnings about the syntax """ parts = [] # type: List[glob_part_t] while True: self._Next() id_ = self.token_type s = self.token_val #util.log('%s %r', self.token_type, self.token_val) if id_ == Id.Eol_Tok: break if id_ in (Id.Glob_Star, Id.Glob_QMark): parts.append(glob_part.Operator(id_)) elif id_ == Id.Glob_LBracket: # Could return a Literal or a CharClass parts.extend(self._ParseCharClass()) else: # Glob_{Bang,Caret,CleanLiterals,OtherLiteral,RBracket,EscapedChar, # BadBackslash} parts.append(glob_part.Literal(id_, s)) # Also check for warnings. TODO: location info. if id_ == Id.Glob_RBracket: self.warnings.append('Got unescaped right bracket') if id_ == Id.Glob_BadBackslash: self.warnings.append('Got unescaped trailing backslash') return parts, self.warnings