def p_subshell(p): '''subshell : LEFT_PAREN compound_list RIGHT_PAREN''' lparen = ast.node(kind='reservedword', word=p[1], pos=p.lexspan(1)) rparen = ast.node(kind='reservedword', word=p[3], pos=p.lexspan(3)) parts = [lparen, p[2], rparen] p[0] = ast.node(kind='compound', list=parts, redirects=[], pos=_partsspan(parts))
def p_group_command(p): '''group_command : LEFT_CURLY compound_list RIGHT_CURLY''' lcurly = ast.node(kind='reservedword', word=p[1], pos=p.lexspan(1)) rcurly = ast.node(kind='reservedword', word=p[3], pos=p.lexspan(3)) parts = [lcurly, p[2], rcurly] p[0] = ast.node(kind='compound', list=parts, redirects=[], pos=_partsspan(parts))
def p_shell_command(p): '''shell_command : for_command | case_command | WHILE compound_list DO compound_list DONE | UNTIL compound_list DO compound_list DONE | select_command | if_command | subshell | group_command | arith_command | cond_command | arith_for_command''' if len(p) == 2: p[0] = p[1] else: # while or until assert p[2].kind == 'list' parts = _makeparts(p) kind = parts[0].word assert kind in ('while', 'until') p[0] = ast.node(kind='compound', redirects=[], list=[ast.node(kind=kind, parts=parts, pos=_partsspan(parts))], pos=_partsspan(parts)) assert p[0].kind == 'compound'
def _expandword(parser, tokenword): if parser._expansionlimit == -1: # we enter this branch in the following conditions: # - currently parsing a substitution as a result of an expansion # - the previous expansion had limit == 0 # # this means that this node is a descendant of a substitution in an # unexpanded word and will be filtered in the limit == 0 condition below # # (the reason we even expand when limit == 0 is to get quote removal) node = ast.node(kind='word', word=tokenword, pos=(tokenword.lexpos, tokenword.endlexpos), parts=[]) return node else: quoted = bool(tokenword.flags & flags.word.QUOTED) doublequoted = quoted and tokenword.value[0] == '"' # TODO set qheredocument parts, expandedword = subst._expandwordinternal(parser, tokenword, 0, doublequoted, 0, 0) # limit reached, don't include substitutions (still expanded to get # quote removal though) if parser._expansionlimit == 0: parts = [node for node in parts if 'substitution' not in node.kind] node = ast.node(kind='word', word=expandedword, pos=(tokenword.lexpos, tokenword.endlexpos), parts=parts) return node
def p_list0(p): '''list0 : list1 NEWLINE newline_list | list1 AMPERSAND newline_list | list1 SEMICOLON newline_list''' parts = p[1] if len(parts) > 1 or p.slice[2].ttype != tokenizer.tokentype.NEWLINE: parts.append(ast.node(kind='operator', op=p[2], pos=p.lexspan(2))) p[0] = ast.node(kind='list', parts=parts, pos=_partsspan(parts)) else: p[0] = parts[0]
def p_if_command(p): '''if_command : IF compound_list THEN compound_list FI | IF compound_list THEN compound_list ELSE compound_list FI | IF compound_list THEN compound_list elif_clause FI''' # we currently don't distinguish the various lists that make up the # command, because it's not needed later on. if there will be a need # we can always add different nodes for elif/else. parts = _makeparts(p) p[0] = ast.node(kind='compound', redirects=[], list=[ast.node(kind='if', parts=parts, pos=_partsspan(parts))], pos=_partsspan(parts))
def p_redirection(p): '''redirection : GREATER WORD | LESS WORD | NUMBER GREATER WORD | NUMBER LESS WORD | REDIR_WORD GREATER WORD | REDIR_WORD LESS WORD | GREATER_GREATER WORD | NUMBER GREATER_GREATER WORD | REDIR_WORD GREATER_GREATER WORD | GREATER_BAR WORD | NUMBER GREATER_BAR WORD | REDIR_WORD GREATER_BAR WORD | LESS_GREATER WORD | NUMBER LESS_GREATER WORD | REDIR_WORD LESS_GREATER WORD | LESS_LESS_LESS WORD | NUMBER LESS_LESS_LESS WORD | REDIR_WORD LESS_LESS_LESS WORD | LESS_AND NUMBER | NUMBER LESS_AND NUMBER | REDIR_WORD LESS_AND NUMBER | GREATER_AND NUMBER | NUMBER GREATER_AND NUMBER | REDIR_WORD GREATER_AND NUMBER | LESS_AND WORD | NUMBER LESS_AND WORD | REDIR_WORD LESS_AND WORD | GREATER_AND WORD | NUMBER GREATER_AND WORD | REDIR_WORD GREATER_AND WORD | GREATER_AND DASH | NUMBER GREATER_AND DASH | REDIR_WORD GREATER_AND DASH | LESS_AND DASH | NUMBER LESS_AND DASH | REDIR_WORD LESS_AND DASH | AND_GREATER WORD | AND_GREATER_GREATER WORD''' parserobj = p.context if len(p) == 3: output = p[2] if p.slice[2].ttype == tokenizer.tokentype.WORD: output = _expandword(parserobj, p.slice[2]) p[0] = ast.node(kind='redirect', input=None, type=p[1], heredoc=None, output=output, pos=(p.lexpos(1), p.endlexpos(2))) else: output = p[3] if p.slice[3].ttype == tokenizer.tokentype.WORD: output = _expandword(parserobj, p.slice[3]) p[0] = ast.node(kind='redirect', input=p[1], type=p[2], heredoc=None, output=output, pos=(p.lexpos(1), p.endlexpos(3)))
def p_case_command(p): '''case_command : CASE WORD newline_list IN newline_list ESAC | CASE WORD newline_list IN case_clause_sequence newline_list ESAC | CASE WORD newline_list IN case_clause ESAC''' parts = _makeparts(p) p[0] = ast.node(kind='compound', redirects=[], list=[ ast.node(kind='case', parts=parts, pos=_partsspan(parts), lineno=parts[-1].lineno) ], pos=_partsspan(parts), lineno=parts[-1].lineno)
def compoundnode(s, *parts, **kwargs): redirects = kwargs.pop('redirects', []) assert not kwargs return ast.node(kind='compound', s=s, list=list(parts), redirects=redirects)
def redirectnode(s, input, type, output, heredoc=None): return ast.node(kind='redirect', input=input, type=type, output=output, heredoc=heredoc, s=s)
def listnode(s, *parts): for i in range(len(parts)): if i % 2 == 0: assert parts[i].kind in ('command', 'pipeline', 'compound'), parts[i].kind else: assert parts[i].kind == 'operator', parts[i].kind return ast.node(kind='list', parts=list(parts), s=s)
def _extractcommandsubst(parserobj, string, sindex, sxcommand=False): if string[sindex] == '(': raise NotImplementedError('arithmetic expansion') #return _extractdelimitedstring(parserobj, string, sindex, '$(', '(', '(', sxcommand=True) else: node, si = _parsedolparen(parserobj, string, sindex) si += 1 return ast.node(kind='commandsubstitution', command=node, pos=(sindex-2, si), lineno=node.lineno), si
def _extractcommandsubst(parserobj, string, sindex, sxcommand=False): if string[sindex] == '(': raise NotImplementedError('arithmetic expansion') #return _extractdelimitedstring(parserobj, string, sindex, '$(', '(', '(', sxcommand=True) else: node, si = _parsedolparen(parserobj, string, sindex) si += 1 return ast.node(kind='commandsubstitution', command=node, pos=(sindex-2, si)), si
def p_list_terminator(p): '''list_terminator : NEWLINE | SEMICOLON | EOF''' if p[1] == ';': p[0] = ast.node(kind='operator', op=';', pos=p.lexspan(1), lineno=p.lineno(1))
def _make_pattern(part_, action_): if action_.kind == 'list': action_ = action_.parts else: action_ = [action_] return ast.node(kind='pattern', pattern=part_, actions=action_, pos=(part_[0].pos[0], action_[-1].pos[1]), lineno=action_[-1].lineno)
def p_function_def(p): '''function_def : WORD LEFT_PAREN RIGHT_PAREN newline_list function_body | FUNCTION WORD LEFT_PAREN RIGHT_PAREN newline_list function_body | FUNCTION WORD newline_list function_body''' parts = _makeparts(p) body = parts[-1] name = parts[ast.findfirstkind(parts, 'word')] p[0] = ast.node(kind='function', name=name, body=body, parts=parts, pos=_partsspan(parts))
def p_pipeline(p): '''pipeline : pipeline BAR newline_list pipeline | pipeline BAR_AND newline_list pipeline | command''' if len(p) == 2: p[0] = [p[1]] else: p[0] = p[1] p[0].append(ast.node(kind='pipe', pipe=p[2], pos=p.lexspan(2))) p[0].extend(p[len(p) - 1])
def pipelinenode(s, *parts): oldparts = parts if parts[0].kind == 'reservedword' and parts[0].word == '!': parts = parts[1:] for i in range(len(parts)): if i % 2 == 0: assert parts[i].kind in ('command', 'compound'), parts[i].kind else: assert parts[i].kind == 'pipe', parts[i].kind return ast.node(kind='pipeline', s=s, parts=list(oldparts))
def p_compound_list(p): '''compound_list : list | newline_list list1''' if len(p) == 2: p[0] = p[1] else: parts = p[2] if len(parts) > 1: p[0] = ast.node(kind='list', parts=parts, pos=_partsspan(parts)) else: p[0] = parts[0]
def p_elif_clause(p): '''elif_clause : ELIF compound_list THEN compound_list | ELIF compound_list THEN compound_list ELSE compound_list | ELIF compound_list THEN compound_list elif_clause''' parts = [] for i in range(1, len(p)): if isinstance(p[i], ast.node): parts.append(p[i]) else: parts.append(ast.node(kind='reservedword', word=p[i], pos=p.lexspan(i))) p[0] = parts
def p_simple_list(p): '''simple_list : simple_list1 | simple_list1 AMPERSAND | simple_list1 SEMICOLON''' tok = p.lexer heredoc.gatherheredocuments(tok) if len(p) == 3 or len(p[1]) > 1: parts = p[1] if len(p) == 3: parts.append(ast.node(kind='operator', op=p[2], pos=p.lexspan(2))) p[0] = ast.node(kind='list', parts=parts, pos=_partsspan(parts)) else: assert len(p[1]) == 1 p[0] = p[1][0] if (len(p) == 2 and p.lexer._parserstate & flags.parser.CMDSUBST and p.lexer._current_token.nopos() == p.lexer._shell_eof_token): # accept the input p.accept()
def p_simple_list1(p): '''simple_list1 : simple_list1 AND_AND newline_list simple_list1 | simple_list1 OR_OR newline_list simple_list1 | simple_list1 AMPERSAND simple_list1 | simple_list1 SEMICOLON simple_list1 | pipeline_command''' if len(p) == 2: p[0] = [p[1]] else: p[0] = p[1] p[0].append(ast.node(kind='operator', op=p[2], pos=p.lexspan(2))) p[0].extend(p[len(p) - 1])
def p_for_command(p): '''for_command : FOR WORD newline_list DO compound_list DONE | FOR WORD newline_list LEFT_CURLY compound_list RIGHT_CURLY | FOR WORD SEMICOLON newline_list DO compound_list DONE | FOR WORD SEMICOLON newline_list LEFT_CURLY compound_list RIGHT_CURLY | FOR WORD newline_list IN word_list list_terminator newline_list DO compound_list DONE | FOR WORD newline_list IN word_list list_terminator newline_list LEFT_CURLY compound_list RIGHT_CURLY | FOR WORD newline_list IN list_terminator newline_list DO compound_list DONE | FOR WORD newline_list IN list_terminator newline_list LEFT_CURLY compound_list RIGHT_CURLY''' parts = _makeparts(p) # find the operatornode that we might have there due to # list_terminator/newline_list and convert it to a reservedword so its # considered as part of the for loop for i, part in enumerate(parts): if part.kind == 'operator' and part.op == ';': parts[i] = ast.node(kind='reservedword', word=';', pos=part.pos) break # there could be only one in there... p[0] = ast.node(kind='compound', redirects=[], list=[ast.node(kind='for', parts=parts, pos=_partsspan(parts))], pos=_partsspan(parts))
def p_pipeline_command(p): '''pipeline_command : pipeline | BANG pipeline_command | timespec pipeline_command | timespec list_terminator | BANG list_terminator''' if len(p) == 2: if len(p[1]) == 1: p[0] = p[1][0] else: p[0] = ast.node(kind='pipeline', parts=p[1], pos=(p[1][0].pos[0], p[1][-1].pos[1])) else: # XXX timespec node = ast.node(kind='reservedword', word='!', pos=p.lexspan(1)) if p[2].kind == 'pipeline': p[0] = p[2] p[0].parts.insert(0, node) p[0].pos = (p[0].parts[0].pos[0], p[0].parts[-1].pos[1]) else: p[0] = ast.node(kind='pipeline', parts=[node, p[2]], pos=(node.pos[0], p[2].pos[1]))
def _paramexpand(parserobj, string, sindex, lineno=0): node = None zindex = sindex + 1 c = string[zindex] if zindex < len(string) else None if c and c in '0123456789$#?-!*@': # XXX 7685 node = ast.node(kind='parameter', value=c, pos=(sindex, zindex+1), lineno=lineno) elif c == '{': # XXX 7863 # TODO not start enough, doesn't consider escaping zindex = string.find('}', zindex + 1) node = ast.node(kind='parameter', value=string[sindex+2:zindex], pos=(sindex, zindex+1), lineno=lineno) # TODO # return _parameterbraceexpand(string, zindex) elif c == '(': return _extractcommandsubst(parserobj, string, zindex + 1) elif c == '[': raise NotImplementedError('arithmetic substitution') #return _extractarithmeticsubst(string, zindex + 1) else: tindex = zindex for zindex in range(tindex, len(string) + 1): if zindex == len(string): break if not string[zindex].isalnum() and not string[zindex] == '_': break temp1 = string[sindex:zindex] if temp1: return (ast.node(kind='parameter', value=temp1[1:], pos=(sindex, zindex), lineno=lineno), zindex) if zindex < len(string): zindex += 1 return node, zindex
def p_redirection_heredoc(p): '''redirection : LESS_LESS WORD | NUMBER LESS_LESS WORD | REDIR_WORD LESS_LESS WORD | LESS_LESS_MINUS WORD | NUMBER LESS_LESS_MINUS WORD | REDIR_WORD LESS_LESS_MINUS WORD''' parserobj = p.context assert isinstance(parserobj, _parser) output = ast.node(kind='word', word=p[len(p)-1], parts=[], pos=p.lexspan(len(p)-1)) if len(p) == 3: p[0] = ast.node(kind='redirect', input=None, type=p[1], heredoc=None, output=output, pos=(p.lexpos(1), p.endlexpos(2))) else: p[0] = ast.node(kind='redirect', input=p[1], type=p[2], heredoc=None, output=output, pos=(p.lexpos(1), p.endlexpos(3))) if p.slice[len(p)-2].ttype == tokenizer.tokentype.LESS_LESS: parserobj.redirstack.append((p[0], False)) else: parserobj.redirstack.append((p[0], True))
def p_list1(p): '''list1 : list1 AND_AND newline_list list1 | list1 OR_OR newline_list list1 | list1 AMPERSAND newline_list list1 | list1 SEMICOLON newline_list list1 | list1 NEWLINE newline_list list1 | pipeline_command''' if len(p) == 2: p[0] = [p[1]] else: p[0] = p[1] # XXX newline p[0].append(ast.node(kind='operator', op=p[2], pos=p.lexspan(2))) p[0].extend(p[len(p) - 1])
def _paramexpand(parserobj, string, sindex): node = None zindex = sindex + 1 c = string[zindex] if zindex < len(string) else None if c and c in '0123456789$#?-!*@': # XXX 7685 node = ast.node(kind='parameter', value=c, pos=(sindex, zindex+1)) elif c == '{': # XXX 7863 # TODO not start enough, doesn't consider escaping zindex = string.find('}', zindex + 1) node = ast.node(kind='parameter', value=string[sindex+2:zindex], pos=(sindex, zindex+1)) # TODO # return _parameterbraceexpand(string, zindex) elif c == '(': return _extractcommandsubst(parserobj, string, zindex + 1) elif c == '[': raise NotImplementedError('arithmetic substitution') #return _extractarithmeticsubst(string, zindex + 1) else: tindex = zindex for zindex in range(tindex, len(string) + 1): if zindex == len(string): break if not string[zindex].isalnum() and not string[zindex] == '_': break temp1 = string[sindex:zindex] if temp1: return (ast.node(kind='parameter', value=temp1[1:], pos=(sindex, zindex)), zindex) if zindex < len(string): zindex += 1 return node, zindex
def p_command(p): '''command : simple_command | shell_command | shell_command redirection_list | function_def | coproc''' if isinstance(p[1], ast.node): p[0] = p[1] if len(p) == 3: assert p[0].kind == 'compound' p[0].redirects.extend(p[2]) assert p[0].pos[0] < p[0].redirects[-1].pos[1] p[0].pos = (p[0].pos[0], p[0].redirects[-1].pos[1]) else: p[0] = ast.node(kind='command', parts=p[1], pos=_partsspan(p[1]))
def makeheredoc(tokenizer, redirnode, lineno, killleading): redirword = remove_escape(string_quote_removal(redirnode.output.word)) #redirword = redirnode.output.word document = [] startpos = tokenizer._shell_input_line_index #fullline = self.tok.readline(bool(redirword.output.flags & flags.word.QUOTED)) fullline = tokenizer.readline(False) while fullline: if killleading: while fullline[0] == '\t': fullline = fullline[1:] if not fullline: continue if fullline[:-1] == redirword and fullline[len(redirword)] == '\n': document.append(fullline[:-1]) # document_done break document.append(fullline) #fullline = self.readline(bool(redirnode.flags & flags.word.QUOTED)) fullline = tokenizer.readline(False) if not fullline: raise errors.ParsingError( "here-document at line %d delimited by end-of-file (wanted %r)" % (lineno, redirword), tokenizer._shell_input_line, tokenizer._shell_input_line_index) document = ''.join(document) endpos = tokenizer._shell_input_line_index - 1 assert hasattr(redirnode, 'heredoc') num_of_lines = document.count('\n') + 1 redirnode.heredoc = ast.node(kind='heredoc', value=document, pos=(startpos, endpos), lineno=num_of_lines) # if the heredoc immediately follows this node, fix its end pos if redirnode.pos[1] + 1 == startpos: redirnode.pos = (redirnode.pos[0], endpos) return document
def _makeparts(p): parts = [] for i in range(1, len(p)): if isinstance(p[i], ast.node): parts.append(p[i]) elif isinstance(p[i], list): parts.extend(p[i]) elif isinstance(p.slice[i], tokenizer.token): if p.slice[i].ttype == tokenizer.tokentype.WORD: parserobj = p.context parts.append(_expandword(parserobj, p.slice[i])) else: parts.append( ast.node(kind='reservedword', word=p[i], pos=p.lexspan(i))) else: pass return parts
def _makeparts(p): parts = [] for i in range(1, len(p)): if isinstance(p[i], ast.node): parts.append(p[i]) elif isinstance(p[i], list): parts.extend(p[i]) elif isinstance(p.slice[i], tokenizer.token): if p.slice[i].ttype == tokenizer.tokentype.WORD: parserobj = p.context parts.append(_expandword(parserobj, p.slice[i])) else: parts.append(ast.node(kind='reservedword', word=p[i], pos=p.lexspan(i))) else: pass return parts
def makeheredoc(tokenizer, redirnode, lineno, killleading): # redirword = string_quote_removal(redirectnode.word) redirword = redirnode.output.word document = [] startpos = tokenizer._shell_input_line_index # fullline = self.tok.readline(bool(redirword.output.flags & flags.word.QUOTED)) fullline = tokenizer.readline(False) while fullline: if killleading: while fullline[0] == "\t": fullline = fullline[1:] if not fullline: continue if fullline[:-1] == redirword and fullline[len(redirword)] == "\n": document.append(fullline[:-1]) # document_done break document.append(fullline) # fullline = self.readline(bool(redirnode.flags & flags.word.QUOTED)) fullline = tokenizer.readline(False) if not fullline: raise errors.ParsingError( "here-document at line %d delimited by end-of-file (wanted %r)" % (lineno, redirword), tokenizer._shell_input_line, tokenizer._shell_input_line_index, ) document = "".join(document) endpos = tokenizer._shell_input_line_index - 1 assert hasattr(redirnode, "heredoc") redirnode.heredoc = ast.node(kind="heredoc", value=document, pos=(startpos, endpos)) # if the heredoc immediately follows this node, fix its end pos if redirnode.pos[1] + 1 == startpos: redirnode.pos = (redirnode.pos[0], endpos) return document
def p_inputunit(p): '''inputunit : simple_list simple_list_terminator | NEWLINE | error NEWLINE | EOF''' # XXX if p.lexer._parserstate & flags.parser.CMDSUBST: p.lexer._parserstate.add(flags.parser.EOFTOKEN) if isinstance(p[1], ast.node): p[0] = p[1] # accept right here in case the input contains more lines that are # not part of the current command p.accept() if p.slice[1].ttype == tokenizer.tokentype.NEWLINE: p[0] = ast.node(kind='newline', pos=(p.lexpos(1), p.lexpos(1)), lineno=1) p.accept()
def commandnode(s, *parts): return ast.node(kind='command', s=s, parts=list(parts))
def functionnode(s, name, body, *parts): return ast.node(kind='function', name=name, body=body, parts=list(parts), s=s)
def whilenode(s, *parts): return ast.node(kind='while', parts=list(parts), s=s)
def fornode(s, *parts): return ast.node(kind='for', parts=list(parts), s=s)
def ifnode(s, *parts): return ast.node(kind='if', parts=list(parts), s=s)
def comsubnode(s, command): return ast.node(kind='commandsubstitution', s=s, command=command)