def p_shell_command(p): '''shell_command : for_command | case_command | WHILE compound_list DO compound_list DONE | UNTIL compound_list DO compound_list DONE | select_command | if_command | subshell | group_command | arith_command | cond_command | arith_for_command''' if len(p) == 2: p[0] = p[1] else: # while or until assert p[2].kind == 'list' parts = _makeparts(p) kind = parts[0].word assert kind in ('while', 'until') p[0] = bast.node(kind='compound', redirects=[], list=[bast.node(kind=kind, parts=parts, pos=_partsspan(parts))], pos=_partsspan(parts)) assert p[0].kind == 'compound'
def _expandword(parser, tokenword): if parser._expansionlimit == -1: # we enter this branch in the following conditions: # - currently parsing a substitution as a result of an expansion # - the previous expansion had limit == 0 # # this means that this node is a descendant of a substitution in an # unexpanded word and will be filtered in the limit == 0 condition below # # (the reason we even expand when limit == 0 is to get quote removal) node = bast.node(kind='word', word=tokenword, pos=(tokenword.lexpos, tokenword.endlexpos), parts=[]) return node else: quoted = bool(tokenword.flags & flags.word.QUOTED) doublequoted = quoted and tokenword.value[0] == '"' # TODO set qheredocument parts, expandedword = subst._expandwordinternal(parser, tokenword, 0, doublequoted, 0, 0) # limit reached, don't include substitutions (still expanded to get # quote removal though) if parser._expansionlimit == 0: parts = [node for node in parts if 'substitution' not in node.kind] node = bast.node(kind='word', word=expandedword, pos=(tokenword.lexpos, tokenword.endlexpos), parts=parts) return node
def p_group_command(p): '''group_command : LEFT_CURLY compound_list RIGHT_CURLY''' lcurly = bast.node(kind='reservedword', word=p[1], pos=p.lexspan(1)) rcurly = bast.node(kind='reservedword', word=p[3], pos=p.lexspan(3)) parts = [lcurly, p[2], rcurly] p[0] = bast.node(kind='compound', list=parts, redirects=[], pos=_partsspan(parts))
def p_subshell(p): '''subshell : LEFT_PAREN compound_list RIGHT_PAREN''' lparen = bast.node(kind='reservedword', word=p[1], pos=p.lexspan(1)) rparen = bast.node(kind='reservedword', word=p[3], pos=p.lexspan(3)) parts = [lparen, p[2], rparen] p[0] = bast.node(kind='compound', list=parts, redirects=[], pos=_partsspan(parts))
def p_list0(p): '''list0 : list1 NEWLINE newline_list | list1 AMPERSAND newline_list | list1 SEMICOLON newline_list''' parts = p[1] if len(parts) > 1 or p.slice[2].ttype != tokenizer.tokentype.NEWLINE: parts.append(bast.node(kind='operator', op=p[2], pos=p.lexspan(2))) p[0] = bast.node(kind='list', parts=parts, pos=_partsspan(parts)) else: p[0] = parts[0]
def p_if_command(p): '''if_command : IF compound_list THEN compound_list FI | IF compound_list THEN compound_list ELSE compound_list FI | IF compound_list THEN compound_list elif_clause FI''' # we currently don't distinguish the various lists that make up the # command, because it's not needed later on. if there will be a need # we can always add different nodes for elif/else. parts = _makeparts(p) p[0] = bast.node(kind='compound', redirects=[], list=[bast.node(kind='if', parts=parts, pos=_partsspan(parts))], pos=_partsspan(parts))
def p_redirection(p): '''redirection : GREATER WORD | LESS WORD | NUMBER GREATER WORD | NUMBER LESS WORD | REDIR_WORD GREATER WORD | REDIR_WORD LESS WORD | GREATER_GREATER WORD | NUMBER GREATER_GREATER WORD | REDIR_WORD GREATER_GREATER WORD | GREATER_BAR WORD | NUMBER GREATER_BAR WORD | REDIR_WORD GREATER_BAR WORD | LESS_GREATER WORD | NUMBER LESS_GREATER WORD | REDIR_WORD LESS_GREATER WORD | LESS_LESS_LESS WORD | NUMBER LESS_LESS_LESS WORD | REDIR_WORD LESS_LESS_LESS WORD | LESS_AND NUMBER | NUMBER LESS_AND NUMBER | REDIR_WORD LESS_AND NUMBER | GREATER_AND NUMBER | NUMBER GREATER_AND NUMBER | REDIR_WORD GREATER_AND NUMBER | LESS_AND WORD | NUMBER LESS_AND WORD | REDIR_WORD LESS_AND WORD | GREATER_AND WORD | NUMBER GREATER_AND WORD | REDIR_WORD GREATER_AND WORD | GREATER_AND DASH | NUMBER GREATER_AND DASH | REDIR_WORD GREATER_AND DASH | LESS_AND DASH | NUMBER LESS_AND DASH | REDIR_WORD LESS_AND DASH | AND_GREATER WORD | AND_GREATER_GREATER WORD''' parserobj = p.context if len(p) == 3: output = p[2] if p.slice[2].ttype == tokenizer.tokentype.WORD: output = _expandword(parserobj, p.slice[2]) p[0] = bast.node(kind='redirect', input=None, type=p[1], heredoc=None, output=output, pos=(p.lexpos(1), p.endlexpos(2))) else: output = p[3] if p.slice[3].ttype == tokenizer.tokentype.WORD: output = _expandword(parserobj, p.slice[3]) p[0] = bast.node(kind='redirect', input=p[1], type=p[2], heredoc=None, output=output, pos=(p.lexpos(1), p.endlexpos(3)))
def _extractcommandsubst(parserobj, string, sindex, sxcommand=False): if string[sindex] == '(': raise NotImplementedError('arithmetic expansion') #return _extractdelimitedstring(parserobj, string, sindex, '$(', '(', '(', sxcommand=True) else: node, si = _parsedolparen(parserobj, string, sindex) si += 1 return bast.node(kind='commandsubstitution', command=node, pos=(sindex - 2, si)), si
def p_function_def(p): '''function_def : WORD LEFT_PAREN RIGHT_PAREN newline_list function_body | FUNCTION WORD LEFT_PAREN RIGHT_PAREN newline_list function_body | FUNCTION WORD newline_list function_body''' parts = _makeparts(p) body = parts[-1] name = parts[bast.findfirstkind(parts, 'word')] p[0] = bast.node(kind='function', name=name, body=body, parts=parts, pos=_partsspan(parts))
def p_pipeline(p): '''pipeline : pipeline BAR newline_list pipeline | pipeline BAR_AND newline_list pipeline | command''' if len(p) == 2: p[0] = [p[1]] else: p[0] = p[1] p[0].append(bast.node(kind='pipe', pipe=p[2], pos=p.lexspan(2))) p[0].extend(p[len(p) - 1])
def p_simple_list(p): '''simple_list : simple_list1 | simple_list1 AMPERSAND | simple_list1 SEMICOLON''' tok = p.lexer heredoc.gatherheredocuments(tok) if len(p) == 3 or len(p[1]) > 1: parts = p[1] if len(p) == 3: parts.append(bast.node(kind='operator', op=p[2], pos=p.lexspan(2))) p[0] = bast.node(kind='list', parts=parts, pos=_partsspan(parts)) else: assert len(p[1]) == 1 p[0] = p[1][0] if (len(p) == 2 and p.lexer._parserstate & flags.parser.CMDSUBST and p.lexer._current_token.nopos() == p.lexer._shell_eof_token): # accept the input p.accept()
def p_elif_clause(p): '''elif_clause : ELIF compound_list THEN compound_list | ELIF compound_list THEN compound_list ELSE compound_list | ELIF compound_list THEN compound_list elif_clause''' parts = [] for i in range(1, len(p)): if isinstance(p[i], bast.node): parts.append(p[i]) else: parts.append(bast.node(kind='reservedword', word=p[i], pos=p.lexspan(i))) p[0] = parts
def p_compound_list(p): '''compound_list : list | newline_list list1''' if len(p) == 2: p[0] = p[1] else: parts = p[2] if len(parts) > 1: p[0] = bast.node(kind='list', parts=parts, pos=_partsspan(parts)) else: p[0] = parts[0]
def p_simple_list1(p): '''simple_list1 : simple_list1 AND_AND newline_list simple_list1 | simple_list1 OR_OR newline_list simple_list1 | simple_list1 AMPERSAND simple_list1 | simple_list1 SEMICOLON simple_list1 | pipeline_command''' if len(p) == 2: p[0] = [p[1]] else: p[0] = p[1] p[0].append(bast.node(kind='operator', op=p[2], pos=p.lexspan(2))) p[0].extend(p[len(p) - 1])
def p_for_command(p): '''for_command : FOR WORD newline_list DO compound_list DONE | FOR WORD newline_list LEFT_CURLY compound_list RIGHT_CURLY | FOR WORD SEMICOLON newline_list DO compound_list DONE | FOR WORD SEMICOLON newline_list LEFT_CURLY compound_list RIGHT_CURLY | FOR WORD newline_list IN word_list list_terminator newline_list DO compound_list DONE | FOR WORD newline_list IN word_list list_terminator newline_list LEFT_CURLY compound_list RIGHT_CURLY | FOR WORD newline_list IN list_terminator newline_list DO compound_list DONE | FOR WORD newline_list IN list_terminator newline_list LEFT_CURLY compound_list RIGHT_CURLY''' parts = _makeparts(p) # find the operatornode that we might have there due to # list_terminator/newline_list and convert it to a reservedword so its # considered as part of the for loop for i, part in enumerate(parts): if part.kind == 'operator' and part.op == ';': parts[i] = bast.node(kind='reservedword', word=';', pos=part.pos) break # there could be only one in there... p[0] = bast.node(kind='compound', redirects=[], list=[bast.node(kind='for', parts=parts, pos=_partsspan(parts))], pos=_partsspan(parts))
def p_pipeline_command(p): '''pipeline_command : pipeline | BANG pipeline_command | timespec pipeline_command | timespec list_terminator | BANG list_terminator''' if len(p) == 2: if len(p[1]) == 1: p[0] = p[1][0] else: p[0] = bast.node(kind='pipeline', parts=p[1], pos=(p[1][0].pos[0], p[1][-1].pos[1])) else: # XXX timespec node = bast.node(kind='reservedword', word='!', pos=p.lexspan(1)) if p[2].kind == 'pipeline': p[0] = p[2] p[0].parts.insert(0, node) p[0].pos = (p[0].parts[0].pos[0], p[0].parts[-1].pos[1]) else: p[0] = bast.node(kind='pipeline', parts=[node, p[2]], pos=(node.pos[0], p[2].pos[1]))
def p_redirection_heredoc(p): '''redirection : LESS_LESS WORD | NUMBER LESS_LESS WORD | REDIR_WORD LESS_LESS WORD | LESS_LESS_MINUS WORD | NUMBER LESS_LESS_MINUS WORD | REDIR_WORD LESS_LESS_MINUS WORD''' parserobj = p.context assert isinstance(parserobj, _parser) output = bast.node(kind='word', word=p[len(p) - 1], parts=[], pos=p.lexspan(len(p)-1)) if len(p) == 3: p[0] = bast.node(kind='redirect', input=None, type=p[1], heredoc=None, output=output, pos=(p.lexpos(1), p.endlexpos(2))) else: p[0] = bast.node(kind='redirect', input=p[1], type=p[2], heredoc=None, output=output, pos=(p.lexpos(1), p.endlexpos(3))) if p.slice[len(p)-2].ttype == tokenizer.tokentype.LESS_LESS: parserobj.redirstack.append((p[0], False)) else: parserobj.redirstack.append((p[0], True))
def p_list1(p): '''list1 : list1 AND_AND newline_list list1 | list1 OR_OR newline_list list1 | list1 AMPERSAND newline_list list1 | list1 SEMICOLON newline_list list1 | list1 NEWLINE newline_list list1 | pipeline_command''' if len(p) == 2: p[0] = [p[1]] else: p[0] = p[1] # XXX newline p[0].append(bast.node(kind='operator', op=p[2], pos=p.lexspan(2))) p[0].extend(p[len(p) - 1])
def _paramexpand(parserobj, string, sindex): node = None zindex = sindex + 1 c = string[zindex] if zindex < len(string) else None if c and c in '0123456789$#?-!*@': # XXX 7685 node = bast.node(kind='parameter', value=c, pos=(sindex, zindex+1)) elif c == '{': # XXX 7863 # TODO not start enough, doesn't consider escaping zindex = string.find('}', zindex + 1) node = bast.node(kind='parameter', value=string[sindex + 2:zindex], pos=(sindex, zindex+1)) # TODO # return _parameterbraceexpand(string, zindex) elif c == '(': return _extractcommandsubst(parserobj, string, zindex + 1) elif c == '[': raise NotImplementedError('arithmetic substitution') #return _extractarithmeticsubst(string, zindex + 1) else: tindex = zindex for zindex in range(tindex, len(string) + 1): if zindex == len(string): break if not string[zindex].isalnum() and not string[zindex] == '_': break temp1 = string[sindex:zindex] if temp1: return (bast.node(kind='parameter', value=temp1[1:], pos=(sindex, zindex)), zindex) if zindex < len(string): zindex += 1 return node, zindex
def p_command(p): '''command : simple_command | shell_command | shell_command redirection_list | function_def | coproc''' if isinstance(p[1], bast.node): p[0] = p[1] if len(p) == 3: assert p[0].kind == 'compound' p[0].redirects.extend(p[2]) assert p[0].pos[0] < p[0].redirects[-1].pos[1] p[0].pos = (p[0].pos[0], p[0].redirects[-1].pos[1]) else: p[0] = bast.node(kind='command', parts=p[1], pos=_partsspan(p[1]))
def _makeparts(p): parts = [] for i in range(1, len(p)): if isinstance(p[i], bast.node): parts.append(p[i]) elif isinstance(p[i], list): parts.extend(p[i]) elif isinstance(p.slice[i], tokenizer.token): if p.slice[i].ttype == tokenizer.tokentype.WORD: parserobj = p.context parts.append(_expandword(parserobj, p.slice[i])) else: parts.append(bast.node(kind='reservedword', word=p[i], pos=p.lexspan(i))) else: pass return parts
def normalize_command(node, current): arg_status = None # determine argument types head_commands = [] unary_logic_ops = [] binary_logic_ops = [] unprocessed_unary_logic_ops = [] unprocessed_binary_logic_ops = [] def expecting(a_t): for arg_type, is_list, filled in arg_status["non-optional"]: if not is_list and filled: continue if arg_type == a_t: return True for arg_type, is_list, filled in arg_status["optional"]: if not is_list and filled: continue if arg_type == a_t: return True return False def organize_buffer(lparenth, rparenth): node = lparenth.rsb while node != rparenth: # if node.kind == "unarylogicop": # adjust_unary_operators(node) node = node.rsb node = lparenth.rsb while node != rparenth: # if node.kind == "binarylogicop": # adjust_binary_operators(node) node = node.rsb node = lparenth.rsb if node.rsb == rparenth: return lparenth.rsb else: norm_node = BracketNode() while node != rparenth: attach_to_tree(node, norm_node) node = node.rsb return norm_node def adjust_unary_operators(node): if node.associate == UnaryLogicOpNode.RIGHT: # change right sibling to child rsb = node.rsb if not rsb: print("Warning: unary logic operator without a right " "sibling.") print(node.parent) return if rsb.value == "(": unprocessed_unary_logic_ops.append(node) return if rsb.value == ")": # TODO: this corner case is not handled very well node.associate = UnaryLogicOpNode.LEFT unprocessed_unary_logic_ops.append(node) return make_sibling(node, rsb.rsb) node.parent.remove_child(rsb) rsb.lsb = None rsb.rsb = None node.add_child(rsb) elif node.associate == UnaryLogicOpNode.LEFT: # change left sibling to child lsb = node.lsb if not lsb: print("Warning: unary logic operator without a left " "sibling.") print(node.parent) return if lsb.value == ")": unprocessed_unary_logic_ops.append(node) return if (lsb.kind == "binarylogicop" and lsb.get_num_of_children() < 2) \ or lsb.value == "(": # TODO: this corner case is not handled very well # it is often triggered by the bizarreness of -prune return make_sibling(lsb.lsb, node) node.parent.remove_child(lsb) lsb.lsb = None lsb.rsb = None node.add_child(lsb) else: raise AttributeError("Cannot decide unary operator " "assocation: {}".format(node.symbok)) # resolve single child of binary operators left as the result of # parentheses processing if node.parent.kind == "bracket" and node.parent.get_num_of_children() == 1: node.grandparent.replace_child(node.parent, node) def adjust_binary_operators(node): # change right sibling to Child # change left sibling to child rsb = node.rsb lsb = node.lsb if not rsb or not lsb: raise AttributeError("Error: binary logic operator must have " "both left and right siblings.") if rsb.value == "(" or lsb.value == ")": unprocessed_binary_logic_ops.append(node) # sibling is parenthese return assert(rsb.value != ")") assert(lsb.value != "(") make_sibling(node, rsb.rsb) make_sibling(lsb.lsb, node) node.parent.remove_child(rsb) node.parent.remove_child(lsb) rsb.rsb = None lsb.lsb = None if lsb.kind == "binarylogicop" and lsb.value == node.value: for lsbc in lsb.children: make_parent_child(node, lsbc) make_parent_child(node, rsb) lsbcr = lsb.get_right_child() make_sibling(lsbcr, rsb) else: make_parent_child(node, lsb) make_parent_child(node, rsb) make_sibling(lsb, rsb) # resolve single child of binary operators left as the result of # parentheses processing if node.parent.kind == "binarylogicop" \ and node.parent.value == "-and": if node.parent.get_num_of_children() == 1: node.grandparent.replace_child(node.parent, node) def attach_flag(node, attach_point_info): attach_point = attach_point_info[0] if node.word.startswith("--") \ or is_unary_logic_op(node, attach_point) \ or node.word in binary_logic_operators \ or attach_point.value == "find" \ or len(node.word) <= 1: normalize_flag(node, attach_point) else: # split flags assert(node.word.startswith('-')) options = node.word[1:] if len(options) == 1 and not options.isdigit(): normalize_flag(node, attach_point) else: if options[-1].isdigit() and \ ((attach_point.value == "grep" and options.startswith("A")) or (attach_point.value == "grep" and options.startswith("B")) or (attach_point.value == "grep" and options.startswith("C")) or (attach_point.value == "head" and options.isdigit()) or (attach_point.value == "tail" and options.isdigit()) or (attach_point.value == "head" and options.startswith("n")) or (attach_point.value == "tail" and options.startswith("n")) or (attach_point.value == "awk" and options.startswith("F")) or (attach_point.value == "xargs" and options.startswith("n")) or (attach_point.value == "xargs" and options.startswith("l")) or (attach_point.value == "xargs" and options.startswith("P"))): node.word = re.sub(_DIGIT_RE, _NUM, node.word) normalize_flag(node, attach_point) else: str = options + " splitted into: " for option in options: new_node = copy.deepcopy(node) new_node.word = '-' + option normalize_flag(new_node, attach_point) str += new_node.word + ' ' if verbose: print(str) head_cmd = attach_point.headcommand.value flag = node.word arg_type = man_lookup.get_flag_arg_type(head_cmd, flag) if arg_type: # flag is expecting an argument attach_point = attach_point.get_right_child() return (attach_point, ["argument"], [arg_type]) else: # flag does not take arguments return attach_point_info def look_above(attach_point): head_cmd = attach_point.headcommand return (head_cmd, ["flags", "arguments"], None) # Attach point format: (pointer_to_the_attach_point, # ast_node_type, arg_type) attach_point_info = (current, ["headcommand"], []) ind = 0 while ind < len(node.parts): attach_point = attach_point_info[0] possible_node_kinds = attach_point_info[1] possible_arg_types = attach_point_info[2] child = node.parts[ind] if child.kind == 'word': # prioritize processing of logic operators if is_unary_logic_op(child, attach_point): norm_node = UnaryLogicOpNode(child.word) attach_to_tree(norm_node, attach_point) unary_logic_ops.append(norm_node) elif is_binary_logic_op(child, attach_point): norm_node = BinaryLogicOpNode(child.word) attach_to_tree(norm_node, attach_point) binary_logic_ops.append(norm_node) else: if child.word == "--" and not attach_point.is_command("awk"): attach_point_info = (attach_point_info[0], ["argument"], attach_point_info[2]) ind += 1 continue if len(possible_node_kinds) == 1: # no ast_node_kind ambiguation node_kind = possible_node_kinds[0] if node_kind == "headcommand": norm_node = normalize_headcommand(child, attach_point) head_commands.append(norm_node) head_cmd = norm_node.value arg_status = copy.deepcopy(man_lookup.get_arg_types(head_cmd)) attach_point_info = \ (norm_node, ["flag", "argument"], None) elif node_kind == "argument": if possible_arg_types and "Utility" in possible_arg_types: # embedded command leaded by # ["-exec", "-execdir", "-ok", "-okdir"] new_command_node = bast.node(kind="command", word="", parts=[], pos=(-1,-1)) new_command_node.parts = [] subcommand_added = False for j in xrange(ind, len(node.parts)): if hasattr(node.parts[j], 'word') \ and (node.parts[j].word == ";" \ or node.parts[j].word == "+"): normalize_command(new_command_node, attach_point) attach_point.value += \ '::' + node.parts[j].word subcommand_added = True break else: new_command_node.parts.\ append(node.parts[j]) if not subcommand_added: print("Warning: -exec missing ending ';'") normalize_command(new_command_node, attach_point) attach_point.value += '::' + ";" ind = j else: if possible_arg_types: arg_type = list(possible_arg_types)[0] else: # "--" encountered arg_type = cmd_arg_type_check(child, arg_status) # recurse to main normalization to handle # argument with deep structures normalize(child, attach_point, "argument", arg_type) attach_point_info = look_above(attach_point) else: # need to decide ast_node_kind if child.word.startswith("-") and \ not ((attach_point.value in ["head", "tail"] and child.word[1:].isdigit()) or (attach_point.value in ["chmod"] and ('r' in child.word or 'x' in child.word or 'w' in child.word or 'X' in child.word or 's' in child.word or 't' in child.word or 'u' in child.word or 'g' in child.word or 'o' in child.word))): # child is a flag attach_point_info = \ attach_flag(child, attach_point_info) else: # child is an argument if expecting("Utility"): # embedded command leaded by # ["sh", "csh", "ksh", "tcsh", # "zsh", "bash", "exec", "xargs"] new_command_node = bast.node(kind="command", word="", parts=[], pos=(-1,-1)) new_command_node.parts = [] for j in xrange(ind, len(node.parts)): new_command_node.parts.append(node.parts[j]) normalize_command(new_command_node, attach_point) ind = j else: arg_type = cmd_arg_type_check(child.word, arg_status) # recurse to main normalization to handle argument # with deep structures normalize(child, attach_point, "argument", arg_type) attach_point_info = look_above(attach_point) elif child.kind == "assignment": normalize(child, attach_point, "assignment") elif child.kind == "redirect": normalize(child, attach_point, "redirect") ind += 1 # TODO: some commands get parsed with no head command # This is usually due to unrecognized utilities e.g. "mp3player". if len(head_commands) == 0: return if len(head_commands) > 1: print("Error: multiple headcommands in one command.") for hc in head_commands: print(hc.symbol) sys.exit() head_command = head_commands[0] # process (embedded) parenthese -- treat as implicit "-and" stack = [] depth = 0 def pop_stack_content(depth, rparenth, stack_top=None): # popping pushed states off the stack popped = stack.pop() while (popped.value != "("): head_command.remove_child(popped) popped = stack.pop() lparenth = popped if not rparenth: # unbalanced brackets rparenth = ArgumentNode(value=")") make_parent_child(stack_top.parent, rparenth) make_sibling(stack_top, rparenth) new_child = organize_buffer(lparenth, rparenth) i = head_command.substitute_parentheses(lparenth, rparenth, new_child) depth -= 1 if depth > 0: # embedded parenthese stack.append(new_child) return depth, i i = 0 while i < head_command.get_num_of_children(): child = head_command.children[i] if child.value == "(": stack.append(child) depth += 1 elif child.value == ")": assert(depth >= 0) # fix imbalanced parentheses: missing '(' if depth == 0: # simply drop the single ')' detach_from_tree(child, child.parent) else: depth, i = pop_stack_content(depth, child) else: if depth > 0: stack.append(child) else: if child.kind == "unarylogicop": unprocessed_unary_logic_ops.append(child) if child.kind == "binarylogicop": unprocessed_binary_logic_ops.append(child) i += 1 # fix imbalanced parentheses: missing ')' while (depth > 0): depth, _ = pop_stack_content(depth, None, stack[-1]) assert(len(stack) == 0) assert(depth == 0) # for ul in unprocessed_unary_logic_ops: # adjust_unary_operators(ul) # for bl in unprocessed_binary_logic_ops: # adjust_binary_operators(bl) # recover omitted arguments if head_command.value == "find": arguments = [] for child in head_command.children: if child.is_argument(): arguments.append(child) if head_command.get_num_of_children() > 0 and len(arguments) < 1: norm_node = ArgumentNode(value=".", arg_type="File") make_sibling(norm_node, head_command.children[0]) norm_node.parent = head_command head_command.children.insert(0, norm_node) # "grep" normalization if head_command.value == "egrep": head_command.value = "grep" flag_present = False for child in head_command.children: if child.is_option() and child.value in ["-E", "--extended-regexp"]: flag_present = True if not flag_present: norm_node = FlagNode(value="-E") make_sibling(norm_node, head_command.children[0]) norm_node.parent = head_command head_command.children.insert(0, norm_node) if head_command.value == "fgrep": head_command.value = "grep" flag_present = False for child in head_command.children: if child.is_option() and child.value in ["-F", "--fixed-strings"]: flag_present = True if not flag_present: norm_node = FlagNode(value="-F") make_sibling(norm_node, head_command.children[0]) norm_node.parent = head_command head_command.children.insert(0, norm_node) # "xargs" normalization has_repl_str = False if head_command.value == "xargs": for flag in head_command.get_flags(): if flag.value == "-I": has_repl_str = True repl_str = flag.get_argument() assert(repl_str is not None) if repl_str.value != "{}": utility = head_command.get_subcommand() assert(utility is not None) utility.normalize_repl_str(repl_str.value, '{}') repl_str.value = "{}" repl_str.arg_type = "ReservedWord" # add a replace str if not present if not has_repl_str: utility = head_command.get_subcommand() assert(utility is not None) for i in xrange(head_command.get_num_of_children()): if head_command.children[i].is_headcommand(): repl_str_flag_node = FlagNode("-I") repl_str_node = ArgumentNode("{}", "ReservedWord") repl_str_node2 = ArgumentNode("{}", "ReservedWord") make_parent_child(repl_str_flag_node, repl_str_node) head_command.children.insert(i, repl_str_flag_node) repl_str_flag_node.parent = head_command repl_str_flag_node.lsb = head_command.children[i-1] head_command.children[i-1].rsb = repl_str_flag_node sub_command = head_command.children[i+1] repl_str_node2.parent = sub_command repl_str_node2.lsb = sub_command.get_right_child() sub_command.children.append(repl_str_node2) break
def _expandwordinternal(parserobj, wordtoken, qheredocument, qdoublequotes, quoted, isexp): # bash/subst.c L8132 istring = '' parts = [] tindex = [0] sindex = [0] string = wordtoken.value def nextchar(): sindex[0] += 1 if sindex[0] < len(string): return string[sindex[0]] def peekchar(): if sindex[0]+1 < len(string): return string[sindex[0]+1] while True: if sindex[0] == len(string): break # goto finished_with_string c = string[sindex[0]] if c in '<>': if (nextchar() != '(' or qheredocument or qdoublequotes or (wordtoken.flags & set([flags.word.DQUOTE, flags.word.NOPROCSUB]))): sindex[0] -= 1 # goto add_character sindex[0] += 1 istring += c else: tindex = sindex[0] + 1 node, sindex[0] = _extractprocesssubst(parserobj, string, tindex) parts.append(bast.node(kind='processsubstitution', command=node, pos=(tindex - 2, sindex[0]))) istring += string[tindex - 2:sindex[0]] # goto dollar_add_string # TODO # elif c == '=': # pass # elif c == ':': # pass elif c == '~': if (wordtoken.flags & set([flags.word.NOTILDE, flags.word.DQUOTE]) or (sindex[0] > 0 and not (wordtoken.flags & flags.word.NOTILDE)) or qdoublequotes or qheredocument): wordtoken.flags.clear() wordtoken.flags.add(flags.word.ITILDE) sindex[0] += 1 istring += c else: stopatcolon = wordtoken.flags & set([flags.word.ASSIGNRHS, flags.word.ASSIGNMENT, flags.word.TILDEEXP]) expand = True for i in range(sindex[0], len(string)): r = string[i] if r == '/': break if r in "\\'\"": expand = False break if stopatcolon and r == ':': break else: # go one past the end if we didn't exit early i += 1 if i > sindex[0] and expand: node = bast.node(kind='tilde', value=string[sindex[0]:i], pos=(sindex[0], i)) parts.append(node) istring += string[sindex[0]:i] sindex[0] = i elif c == '$' and len(string) > 1: tindex = sindex[0] node, sindex[0] = _paramexpand(parserobj, string, sindex[0]) if node: parts.append(node) istring += string[tindex:sindex[0]] elif c == '`': tindex = sindex[0] # bare instance of `` if nextchar() == '`': sindex[0] += 1 istring += '``' else: x = _stringextract(string, sindex[0], "`") if x == -1: raise errors.ParsingError('bad substitution: no closing "`" ' 'in %s' % string) else: if wordtoken.flags & flags.word.NOCOMSUB: pass else: sindex[0] = x word = string[tindex+1:sindex[0]] command, ttindex = _recursiveparse(parserobj, word, 0) _adjustpositions(command, tindex+1, len(string)) ttindex += 1 # ttindex is on the closing char # assert sindex[0] == ttindex # go one past the closing ` sindex[0] += 1 node = bast.node(kind='commandsubstitution', command=command, pos=(tindex, sindex[0])) parts.append(node) istring += string[tindex:sindex[0]] elif c == '\\': istring += string[sindex[0]+1:sindex[0]+2] sindex[0] += 2 elif c == '"': sindex[0] += 1 continue # 8513 #if qdoublequotes or qheredocument: # sindex[0] += 1 #else: # tindex = sindex[0] + 1 # parts, sindex[0] = _stringextractdoublequoted(string, sindex[0]) # if tindex == 1 and sindex[0] == len(string): # quotedstate = 'wholly' # else: # quotedstate = 'partially' elif c == "'": # entire string surronded by single quotes, no expansion is # going to happen if sindex[0] == 0 and string[-1] == "'": return [], string[1:-1] # check if we're inside double quotes if not qdoublequotes: # look for the closing ', we know we have one or otherwise # this wouldn't tokenize due to unmatched ' tindex = sindex[0] sindex[0] = string.find("'", sindex[0]) + 1 istring += string[tindex+1:sindex[0]-1] else: # this is a single quote inside double quotes, add it istring += c sindex[0] += 1 else: istring += string[sindex[0]:sindex[0]+1] sindex[0] += 1 if parts: class v(bast.nodevisitor): def visitnode(self, node): assert node.pos[1] + wordtoken.lexpos <= wordtoken.endlexpos node.pos = (node.pos[0] + wordtoken.lexpos, node.pos[1] + wordtoken.lexpos) visitor = v() for node in parts: visitor.visit(node) return parts, istring
def p_list_terminator(p): '''list_terminator : NEWLINE | SEMICOLON | EOF''' if p[1] == ';': p[0] = bast.node(kind='operator', op=';', pos=p.lexspan(1))