def parse_redirection_input(self, node): tt = self.peek_token() assert tt == '<' input = None start = self.peek.start if self.peek.preceding == '': assert node.kind == 'word' # < seen without preceding whitespace. So see if the # last token is a positive integer. If it is, assume it's # an fd to redirect and pop it, else leave it in as part of # the command line. try: try_num = int(node.word) if try_num > 0: input = try_num except ValueError: pass redirect_kind = tt self.consume(tt) tt = self.peek_token() if tt not in ('word', 'number'): raise errors.ParsingError('syntax: expecting filename after <', self.source, self.peek.start) self.consume(tt) if input is not None: start = node.pos[0] node = None redirect = Node(kind='redirect', input=input, type=redirect_kind, output=output, pos=(start, self.token.end)) return redirect, node
def consume(self, tt): self.token = self.peek self.peek = self.next_token() if self.token.type != tt: got = self.token.type if got is None: got = 'EOF' raise errors.ParsingError( 'consume: expected %r (got %s)' % (tt, got), self.source, self.token.start)
def parse_reserved_word(self): rw = self.peek.t if rw == '{': self.consume('word') try: node = self.parse_list() except ReservedWordError, e: node = e.node if (node.kind != 'list' or node.parts[-1].kind != 'operator' or node.parts[-1].op != ';'): raise errors.ParsingError( 'syntax: group command must ' 'terminate with a semicolon', self.source, node.pos[1]) self.peek_token() if self.is_reserved(self.peek) != '}': raise errors.ParsingError( 'syntax: group command must terminate ' 'with }', self.source, self.peek.start) self.consume('word') return '{', node
def parse_command_part(self): node = Node(kind='word', word=self.peek.t, pos=(self.peek.start, self.peek.end)) if self.peek.type == 'word': self.consume('word') elif self.peek.type == 'number': self.consume('number') else: raise errors.ParsingError('syntax: expected word or number', self.source, self.peek.start) redirections, usednode = self.parse_redirections(node) if usednode: return redirections else: return [node] + redirections
def parse_redirection_input_here(self): # handle <<, <<< tt = self.peek_token() assert tt in ('<<', '<<<') redirect_kind = tt self.consume(tt) tt = self.peek_token() if tt not in ('word', 'number'): raise errors.ParsingError( 'syntax: expecting word after %s' % redirect_kind, self.source, self.peek.start) start = self.token.start redirect_target = self.peek.t self.consume(tt) redirect = Node(kind='redirect', input=None, type=redirect_kind, output=redirect_target, pos=(start, self.token.end)) return redirect
def parse_redirections1(self, prevnode): # handle >, >>, >&, <& tt = self.peek_token() input = None start = self.peek.start if self.peek.preceding == '' and prevnode: assert prevnode.kind == 'word' # >, >>, >& or <& seen without preceding whitespace. So see if the # last token is a positive integer. If it is, assume it's # an fd to redirect and pop it, else leave it in as part of # the command line. try: try_num = int(prevnode.word) if try_num >= 0: input = try_num except ValueError: pass redirect_kind = tt self.consume(tt) tt = self.peek_token() # >&/<& followed with & if tt == '&' and redirect_kind in ('>&', '<&'): raise errors.ParsingError( 'syntax: %s cannot redirect to fd' % redirect_kind, self.source, self.peek.start) # need word/number/& after >/>>/>&/<& if tt not in ('word', 'number', '&'): raise errors.ParsingError('syntax: expecting filename or fd', self.source, self.peek.start) # don't accept 2>&filename if redirect_kind in ('>&', '<&') and tt != 'number' and input is not None: raise errors.ParsingError( 'syntax: fd cannot precede >& redirection', self.source, prevnode.pos[0]) output = '' if tt == '&': # >>& self.consume('&') tt = self.peek_token() if tt != 'number': raise errors.ParsingError('syntax: fd expected after &', self.source, self.peek.start) output += '&' elif redirect_kind in ('>&', '<&') and tt == 'number': # >&n/<&n, change redirect kind to '>' or '<' redirect_kind = redirect_kind[0] output += '&' output += self.peek.t self.consume(tt) if input is not None: start = prevnode.pos[0] prevnode = None redirect = Node(kind='redirect', input=input, type=redirect_kind, output=output, pos=(start, self.token.end)) return redirect, prevnode
def next_token(self): try: t = self.lex.get_token() except ValueError, e: raise errors.ParsingError(str(e), self.source, self.lexpos)
def tokenize(s): '''tokenize s, we use (the limited) shlex module for now, in the future this could be improved to a minimal bash parser another bit of information we return besides the tokens themselves is the start and end position of the token in the original string. this is tricky since shlex doesn't provide it and we have to look into its string pointer''' s = s.strip() stream = StringIO(s) lexer = shlex.shlex(stream, posix=True) lexer.whitespace_split = True lexer.commenters = '' startpos = 0 it = util.peekable(lexer) try: for t in it: endpos = stream.tell() # remember endpos, we're going to peek next which will move the underlying # string pointer tt = endpos # if we have another token, backup one char to not include the space # between args if it.hasnext(): endpos -= 1 # startpos is the previous endpos which may include a lot of spaces # between arguments # before: 'a b' # ^ while s[startpos].isspace(): startpos += 1 # after: 'a b' # ^ yielded = False if '=' in t: x, y = t.split('=', 1) # was it something like 'x=..'? if x: # was it 'x='? if not y: # we don't want to lose the =, so yield it by itself and # it will be marked as unknown by the matcher # yield 'x' and '=' yield tokenstate(startpos, startpos + len(x), x) yield tokenstate(startpos + len(x), startpos + len(x) + 1, '=') else: # yield 'x=..' yield tokenstate(startpos, startpos + len(x), x) yielded = True if y: # yield '=y' yield tokenstate(startpos + len(x), endpos, '=' + y) yielded = True if not yielded: # no '=' in current token or it was literally just '=' yield tokenstate(startpos, endpos, t) startpos = tt except ValueError, e: raise errors.ParsingError(str(e), s, stream.tell())