Beispiel #1
0
def p_shell_command(p):
    '''shell_command : for_command
                     | case_command
                     | WHILE compound_list DO compound_list DONE
                     | UNTIL compound_list DO compound_list DONE
                     | select_command
                     | if_command
                     | subshell
                     | group_command
                     | arith_command
                     | cond_command
                     | arith_for_command'''
    if len(p) == 2:
        p[0] = p[1]
    else:
        # while or until
        assert p[2].kind == 'list'

        parts = _makeparts(p)
        kind = parts[0].word
        assert kind in ('while', 'until')
        p[0] = bast.node(kind='compound',
                         redirects=[],
                         list=[bast.node(kind=kind, parts=parts, pos=_partsspan(parts))],
                         pos=_partsspan(parts))

    assert p[0].kind == 'compound'
Beispiel #2
0
def _expandword(parser, tokenword):
    if parser._expansionlimit == -1:
        # we enter this branch in the following conditions:
        # - currently parsing a substitution as a result of an expansion
        # - the previous expansion had limit == 0
        #
        # this means that this node is a descendant of a substitution in an
        # unexpanded word and will be filtered in the limit == 0 condition below
        #
        # (the reason we even expand when limit == 0 is to get quote removal)
        node = bast.node(kind='word', word=tokenword,
                         pos=(tokenword.lexpos, tokenword.endlexpos), parts=[])
        return node
    else:
        quoted = bool(tokenword.flags & flags.word.QUOTED)
        doublequoted = quoted and tokenword.value[0] == '"'

        # TODO set qheredocument
        parts, expandedword = subst._expandwordinternal(parser,
                                                        tokenword, 0,
                                                        doublequoted, 0, 0)

        # limit reached, don't include substitutions (still expanded to get
        # quote removal though)
        if parser._expansionlimit == 0:
            parts = [node for node in parts if 'substitution' not in node.kind]

        node = bast.node(kind='word', word=expandedword,
                         pos=(tokenword.lexpos, tokenword.endlexpos), parts=parts)
        return node
Beispiel #3
0
def p_group_command(p):
    '''group_command : LEFT_CURLY compound_list RIGHT_CURLY'''
    lcurly = bast.node(kind='reservedword', word=p[1], pos=p.lexspan(1))
    rcurly = bast.node(kind='reservedword', word=p[3], pos=p.lexspan(3))
    parts = [lcurly, p[2], rcurly]
    p[0] = bast.node(kind='compound', list=parts, redirects=[],
                     pos=_partsspan(parts))
Beispiel #4
0
def p_subshell(p):
    '''subshell : LEFT_PAREN compound_list RIGHT_PAREN'''
    lparen = bast.node(kind='reservedword', word=p[1], pos=p.lexspan(1))
    rparen = bast.node(kind='reservedword', word=p[3], pos=p.lexspan(3))
    parts = [lparen, p[2], rparen]
    p[0] = bast.node(kind='compound', list=parts, redirects=[],
                     pos=_partsspan(parts))
Beispiel #5
0
def p_list0(p):
    '''list0 : list1 NEWLINE newline_list
             | list1 AMPERSAND newline_list
             | list1 SEMICOLON newline_list'''
    parts = p[1]
    if len(parts) > 1 or p.slice[2].ttype != tokenizer.tokentype.NEWLINE:
        parts.append(bast.node(kind='operator', op=p[2], pos=p.lexspan(2)))
        p[0] = bast.node(kind='list', parts=parts, pos=_partsspan(parts))
    else:
        p[0] = parts[0]
Beispiel #6
0
def p_if_command(p):
    '''if_command : IF compound_list THEN compound_list FI
                  | IF compound_list THEN compound_list ELSE compound_list FI
                  | IF compound_list THEN compound_list elif_clause FI'''
    # we currently don't distinguish the various lists that make up the
    # command, because it's not needed later on. if there will be a need
    # we can always add different nodes for elif/else.
    parts = _makeparts(p)
    p[0] = bast.node(kind='compound',
                     redirects=[],
                     list=[bast.node(kind='if', parts=parts, pos=_partsspan(parts))],
                     pos=_partsspan(parts))
Beispiel #7
0
def p_redirection(p):
    '''redirection : GREATER WORD
                   | LESS WORD
                   | NUMBER GREATER WORD
                   | NUMBER LESS WORD
                   | REDIR_WORD GREATER WORD
                   | REDIR_WORD LESS WORD
                   | GREATER_GREATER WORD
                   | NUMBER GREATER_GREATER WORD
                   | REDIR_WORD GREATER_GREATER WORD
                   | GREATER_BAR WORD
                   | NUMBER GREATER_BAR WORD
                   | REDIR_WORD GREATER_BAR WORD
                   | LESS_GREATER WORD
                   | NUMBER LESS_GREATER WORD
                   | REDIR_WORD LESS_GREATER WORD
                   | LESS_LESS_LESS WORD
                   | NUMBER LESS_LESS_LESS WORD
                   | REDIR_WORD LESS_LESS_LESS WORD
                   | LESS_AND NUMBER
                   | NUMBER LESS_AND NUMBER
                   | REDIR_WORD LESS_AND NUMBER
                   | GREATER_AND NUMBER
                   | NUMBER GREATER_AND NUMBER
                   | REDIR_WORD GREATER_AND NUMBER
                   | LESS_AND WORD
                   | NUMBER LESS_AND WORD
                   | REDIR_WORD LESS_AND WORD
                   | GREATER_AND WORD
                   | NUMBER GREATER_AND WORD
                   | REDIR_WORD GREATER_AND WORD
                   | GREATER_AND DASH
                   | NUMBER GREATER_AND DASH
                   | REDIR_WORD GREATER_AND DASH
                   | LESS_AND DASH
                   | NUMBER LESS_AND DASH
                   | REDIR_WORD LESS_AND DASH
                   | AND_GREATER WORD
                   | AND_GREATER_GREATER WORD'''
    parserobj = p.context
    if len(p) == 3:
        output = p[2]
        if p.slice[2].ttype == tokenizer.tokentype.WORD:
            output = _expandword(parserobj, p.slice[2])
        p[0] = bast.node(kind='redirect', input=None, type=p[1], heredoc=None,
                         output=output, pos=(p.lexpos(1), p.endlexpos(2)))
    else:
        output = p[3]
        if p.slice[3].ttype == tokenizer.tokentype.WORD:
            output = _expandword(parserobj, p.slice[3])
        p[0] = bast.node(kind='redirect', input=p[1], type=p[2], heredoc=None,
                         output=output, pos=(p.lexpos(1), p.endlexpos(3)))
Beispiel #8
0
def _extractcommandsubst(parserobj, string, sindex, sxcommand=False):
    if string[sindex] == '(':
        raise NotImplementedError('arithmetic expansion')
        #return _extractdelimitedstring(parserobj, string, sindex, '$(', '(', '(', sxcommand=True)
    else:
        node, si = _parsedolparen(parserobj, string, sindex)
        si += 1
        return bast.node(kind='commandsubstitution', command=node, pos=(sindex - 2, si)), si
Beispiel #9
0
def p_function_def(p):
    '''function_def : WORD LEFT_PAREN RIGHT_PAREN newline_list function_body
                    | FUNCTION WORD LEFT_PAREN RIGHT_PAREN newline_list function_body
                    | FUNCTION WORD newline_list function_body'''
    parts = _makeparts(p)
    body = parts[-1]
    name = parts[bast.findfirstkind(parts, 'word')]

    p[0] = bast.node(kind='function', name=name, body=body, parts=parts,
                     pos=_partsspan(parts))
Beispiel #10
0
def p_pipeline(p):
    '''pipeline : pipeline BAR newline_list pipeline
                | pipeline BAR_AND newline_list pipeline
                | command'''
    if len(p) == 2:
        p[0] = [p[1]]
    else:
        p[0] = p[1]
        p[0].append(bast.node(kind='pipe', pipe=p[2], pos=p.lexspan(2)))
        p[0].extend(p[len(p) - 1])
Beispiel #11
0
def p_simple_list(p):
    '''simple_list : simple_list1
                   | simple_list1 AMPERSAND
                   | simple_list1 SEMICOLON'''
    tok = p.lexer
    heredoc.gatherheredocuments(tok)

    if len(p) == 3 or len(p[1]) > 1:
        parts = p[1]
        if len(p) == 3:
            parts.append(bast.node(kind='operator', op=p[2], pos=p.lexspan(2)))
        p[0] = bast.node(kind='list', parts=parts, pos=_partsspan(parts))
    else:
        assert len(p[1]) == 1
        p[0] = p[1][0]

    if (len(p) == 2 and p.lexer._parserstate & flags.parser.CMDSUBST and
            p.lexer._current_token.nopos() == p.lexer._shell_eof_token):
        # accept the input
        p.accept()
Beispiel #12
0
def p_elif_clause(p):
    '''elif_clause : ELIF compound_list THEN compound_list
                   | ELIF compound_list THEN compound_list ELSE compound_list
                   | ELIF compound_list THEN compound_list elif_clause'''
    parts = []
    for i in range(1, len(p)):
        if isinstance(p[i], bast.node):
            parts.append(p[i])
        else:
            parts.append(bast.node(kind='reservedword', word=p[i], pos=p.lexspan(i)))
    p[0] = parts
Beispiel #13
0
def p_compound_list(p):
    '''compound_list : list
                     | newline_list list1'''
    if len(p) == 2:
        p[0] = p[1]
    else:
        parts = p[2]
        if len(parts) > 1:
            p[0] = bast.node(kind='list', parts=parts, pos=_partsspan(parts))
        else:
            p[0] = parts[0]
Beispiel #14
0
def p_simple_list1(p):
    '''simple_list1 : simple_list1 AND_AND newline_list simple_list1
                    | simple_list1 OR_OR newline_list simple_list1
                    | simple_list1 AMPERSAND simple_list1
                    | simple_list1 SEMICOLON simple_list1
                    | pipeline_command'''
    if len(p) == 2:
        p[0] = [p[1]]
    else:
        p[0] = p[1]
        p[0].append(bast.node(kind='operator', op=p[2], pos=p.lexspan(2)))
        p[0].extend(p[len(p) - 1])
Beispiel #15
0
def p_for_command(p):
    '''for_command : FOR WORD newline_list DO compound_list DONE
                   | FOR WORD newline_list LEFT_CURLY compound_list RIGHT_CURLY
                   | FOR WORD SEMICOLON newline_list DO compound_list DONE
                   | FOR WORD SEMICOLON newline_list LEFT_CURLY compound_list RIGHT_CURLY
                   | FOR WORD newline_list IN word_list list_terminator newline_list DO compound_list DONE
                   | FOR WORD newline_list IN word_list list_terminator newline_list LEFT_CURLY compound_list RIGHT_CURLY
                   | FOR WORD newline_list IN list_terminator newline_list DO compound_list DONE
                   | FOR WORD newline_list IN list_terminator newline_list LEFT_CURLY compound_list RIGHT_CURLY'''
    parts = _makeparts(p)
    # find the operatornode that we might have there due to
    # list_terminator/newline_list and convert it to a reservedword so its
    # considered as part of the for loop
    for i, part in enumerate(parts):
        if part.kind == 'operator' and part.op == ';':
            parts[i] = bast.node(kind='reservedword', word=';', pos=part.pos)
            break # there could be only one in there...

    p[0] = bast.node(kind='compound',
                     redirects=[],
                     list=[bast.node(kind='for', parts=parts, pos=_partsspan(parts))],
                     pos=_partsspan(parts))
Beispiel #16
0
def p_pipeline_command(p):
    '''pipeline_command : pipeline
                        | BANG pipeline_command
                        | timespec pipeline_command
                        | timespec list_terminator
                        | BANG list_terminator'''
    if len(p) == 2:
        if len(p[1]) == 1:
            p[0] = p[1][0]
        else:
            p[0] = bast.node(kind='pipeline', parts=p[1],
                             pos=(p[1][0].pos[0], p[1][-1].pos[1]))
    else:
        # XXX timespec
        node = bast.node(kind='reservedword', word='!', pos=p.lexspan(1))
        if p[2].kind == 'pipeline':
            p[0] = p[2]
            p[0].parts.insert(0, node)
            p[0].pos = (p[0].parts[0].pos[0], p[0].parts[-1].pos[1])
        else:
            p[0] = bast.node(kind='pipeline', parts=[node, p[2]],
                             pos=(node.pos[0], p[2].pos[1]))
Beispiel #17
0
def p_redirection_heredoc(p):
    '''redirection : LESS_LESS WORD
                   | NUMBER LESS_LESS WORD
                   | REDIR_WORD LESS_LESS WORD
                   | LESS_LESS_MINUS WORD
                   | NUMBER LESS_LESS_MINUS WORD
                   | REDIR_WORD LESS_LESS_MINUS WORD'''
    parserobj = p.context
    assert isinstance(parserobj, _parser)

    output = bast.node(kind='word', word=p[len(p) - 1], parts=[],
                       pos=p.lexspan(len(p)-1))
    if len(p) == 3:
        p[0] = bast.node(kind='redirect', input=None, type=p[1], heredoc=None,
                         output=output, pos=(p.lexpos(1), p.endlexpos(2)))
    else:
        p[0] = bast.node(kind='redirect', input=p[1], type=p[2], heredoc=None,
                         output=output, pos=(p.lexpos(1), p.endlexpos(3)))

    if p.slice[len(p)-2].ttype == tokenizer.tokentype.LESS_LESS:
        parserobj.redirstack.append((p[0], False))
    else:
        parserobj.redirstack.append((p[0], True))
Beispiel #18
0
def p_list1(p):
    '''list1 : list1 AND_AND newline_list list1
             | list1 OR_OR newline_list list1
             | list1 AMPERSAND newline_list list1
             | list1 SEMICOLON newline_list list1
             | list1 NEWLINE newline_list list1
             | pipeline_command'''
    if len(p) == 2:
        p[0] = [p[1]]
    else:
        p[0] = p[1]
        # XXX newline
        p[0].append(bast.node(kind='operator', op=p[2], pos=p.lexspan(2)))
        p[0].extend(p[len(p) - 1])
Beispiel #19
0
def _paramexpand(parserobj, string, sindex):
    node = None
    zindex = sindex + 1
    c = string[zindex] if zindex < len(string) else None
    if c and c in '0123456789$#?-!*@':
        # XXX 7685
        node = bast.node(kind='parameter', value=c,
                         pos=(sindex, zindex+1))
    elif c == '{':
        # XXX 7863
        # TODO not start enough, doesn't consider escaping
        zindex = string.find('}', zindex + 1)
        node = bast.node(kind='parameter', value=string[sindex + 2:zindex],
                         pos=(sindex, zindex+1))
        # TODO
        # return _parameterbraceexpand(string, zindex)
    elif c == '(':
        return _extractcommandsubst(parserobj, string, zindex + 1)
    elif c == '[':
        raise NotImplementedError('arithmetic substitution')
        #return _extractarithmeticsubst(string, zindex + 1)
    else:
        tindex = zindex
        for zindex in range(tindex, len(string) + 1):
            if zindex == len(string):
                break
            if not string[zindex].isalnum() and not string[zindex] == '_':
                break
        temp1 = string[sindex:zindex]
        if temp1:
            return (bast.node(kind='parameter', value=temp1[1:], pos=(sindex, zindex)),
                    zindex)

    if zindex < len(string):
        zindex += 1

    return node, zindex
Beispiel #20
0
def p_command(p):
    '''command : simple_command
               | shell_command
               | shell_command redirection_list
               | function_def
               | coproc'''
    if isinstance(p[1], bast.node):
        p[0] = p[1]
        if len(p) == 3:
            assert p[0].kind == 'compound'
            p[0].redirects.extend(p[2])
            assert p[0].pos[0] < p[0].redirects[-1].pos[1]
            p[0].pos = (p[0].pos[0], p[0].redirects[-1].pos[1])
    else:
        p[0] = bast.node(kind='command', parts=p[1], pos=_partsspan(p[1]))
Beispiel #21
0
def _makeparts(p):
    parts = []
    for i in range(1, len(p)):
        if isinstance(p[i], bast.node):
            parts.append(p[i])
        elif isinstance(p[i], list):
            parts.extend(p[i])
        elif isinstance(p.slice[i], tokenizer.token):
            if p.slice[i].ttype == tokenizer.tokentype.WORD:
                parserobj = p.context
                parts.append(_expandword(parserobj, p.slice[i]))
            else:
                parts.append(bast.node(kind='reservedword', word=p[i],
                                       pos=p.lexspan(i)))
        else:
            pass

    return parts
    def normalize_command(node, current):
        arg_status = None                       # determine argument types
        head_commands = []
        unary_logic_ops = []
        binary_logic_ops = []
        unprocessed_unary_logic_ops = []
        unprocessed_binary_logic_ops = []

        def expecting(a_t):
            for arg_type, is_list, filled in arg_status["non-optional"]:
                if not is_list and filled:
                    continue
                if arg_type == a_t:
                    return True
            for arg_type, is_list, filled in arg_status["optional"]:
                if not is_list and filled:
                    continue
                if arg_type == a_t:
                    return True
            return False

        def organize_buffer(lparenth, rparenth):
            node = lparenth.rsb
            while node != rparenth:
                # if node.kind == "unarylogicop":
                #     adjust_unary_operators(node)
                node = node.rsb
            node = lparenth.rsb
            while node != rparenth:
                # if node.kind == "binarylogicop":
                #     adjust_binary_operators(node)
                node = node.rsb
            node = lparenth.rsb
            if node.rsb == rparenth:
                return lparenth.rsb
            else:
                norm_node = BracketNode()
                while node != rparenth:
                    attach_to_tree(node, norm_node)
                    node = node.rsb
                return norm_node

        def adjust_unary_operators(node):
            if node.associate == UnaryLogicOpNode.RIGHT:
                # change right sibling to child
                rsb = node.rsb
                if not rsb:
                    print("Warning: unary logic operator without a right "
                          "sibling.")
                    print(node.parent)
                    return
                if rsb.value == "(":
                    unprocessed_unary_logic_ops.append(node)
                    return
                if rsb.value == ")":
                    # TODO: this corner case is not handled very well
                    node.associate = UnaryLogicOpNode.LEFT
                    unprocessed_unary_logic_ops.append(node)
                    return
                make_sibling(node, rsb.rsb)
                node.parent.remove_child(rsb)
                rsb.lsb = None
                rsb.rsb = None
                node.add_child(rsb)
            elif node.associate == UnaryLogicOpNode.LEFT:
                # change left sibling to child
                lsb = node.lsb
                if not lsb:
                    print("Warning: unary logic operator without a left "
                          "sibling.")
                    print(node.parent)
                    return
                if lsb.value == ")":
                    unprocessed_unary_logic_ops.append(node)
                    return
                if (lsb.kind == "binarylogicop" and lsb.get_num_of_children() < 2) \
                        or lsb.value == "(":
                    # TODO: this corner case is not handled very well
                    # it is often triggered by the bizarreness of -prune
                    return
                make_sibling(lsb.lsb, node)
                node.parent.remove_child(lsb)
                lsb.lsb = None
                lsb.rsb = None
                node.add_child(lsb)
            else:
                raise AttributeError("Cannot decide unary operator "
                                     "assocation: {}".format(node.symbok))

            # resolve single child of binary operators left as the result of
            # parentheses processing
            if node.parent.kind == "bracket" and node.parent.get_num_of_children() == 1:
                node.grandparent.replace_child(node.parent, node)

        def adjust_binary_operators(node):
            # change right sibling to Child
            # change left sibling to child
            rsb = node.rsb
            lsb = node.lsb

            if not rsb or not lsb:
                raise AttributeError("Error: binary logic operator must have "
                                     "both left and right siblings.")

            if rsb.value == "(" or lsb.value == ")":
                unprocessed_binary_logic_ops.append(node)
                # sibling is parenthese
                return

            assert(rsb.value != ")")
            assert(lsb.value != "(")

            make_sibling(node, rsb.rsb)
            make_sibling(lsb.lsb, node)
            node.parent.remove_child(rsb)
            node.parent.remove_child(lsb)
            rsb.rsb = None
            lsb.lsb = None

            if lsb.kind == "binarylogicop" and lsb.value == node.value:
                for lsbc in lsb.children:
                    make_parent_child(node, lsbc)
                make_parent_child(node, rsb)
                lsbcr = lsb.get_right_child()
                make_sibling(lsbcr, rsb)
            else:
                make_parent_child(node, lsb)
                make_parent_child(node, rsb)
                make_sibling(lsb, rsb)

            # resolve single child of binary operators left as the result of
            # parentheses processing
            if node.parent.kind == "binarylogicop" \
                    and node.parent.value == "-and":
                if node.parent.get_num_of_children() == 1:
                    node.grandparent.replace_child(node.parent, node)

        def attach_flag(node, attach_point_info):
            attach_point = attach_point_info[0]

            if node.word.startswith("--") \
                or is_unary_logic_op(node, attach_point) \
                or node.word in binary_logic_operators \
                or attach_point.value == "find" \
                or len(node.word) <= 1:
                normalize_flag(node, attach_point)
            else:
                # split flags
                assert(node.word.startswith('-'))
                options = node.word[1:]
                if len(options) == 1 and not options.isdigit():
                    normalize_flag(node, attach_point)
                else:
                    if options[-1].isdigit() and \
                        ((attach_point.value == "grep" and options.startswith("A")) or
                         (attach_point.value == "grep" and options.startswith("B")) or
                         (attach_point.value == "grep" and options.startswith("C")) or
                         (attach_point.value == "head" and options.isdigit()) or
                         (attach_point.value == "tail" and options.isdigit()) or
                         (attach_point.value == "head" and options.startswith("n")) or
                         (attach_point.value == "tail" and options.startswith("n")) or
                         (attach_point.value == "awk" and options.startswith("F")) or
                         (attach_point.value == "xargs" and options.startswith("n")) or
                         (attach_point.value == "xargs" and options.startswith("l")) or
                         (attach_point.value == "xargs" and options.startswith("P"))):
                        node.word = re.sub(_DIGIT_RE, _NUM, node.word)
                        normalize_flag(node, attach_point)
                    else:
                        str = options + " splitted into: "
                        for option in options:
                            new_node = copy.deepcopy(node)
                            new_node.word = '-' + option
                            normalize_flag(new_node, attach_point)
                            str += new_node.word + ' '
                        if verbose:
                            print(str)

            head_cmd = attach_point.headcommand.value
            flag = node.word
            arg_type = man_lookup.get_flag_arg_type(head_cmd, flag)
            if arg_type:
                # flag is expecting an argument
                attach_point = attach_point.get_right_child()
                return (attach_point, ["argument"], [arg_type])
            else:
                # flag does not take arguments
                return attach_point_info

        def look_above(attach_point):
            head_cmd = attach_point.headcommand
            return (head_cmd, ["flags", "arguments"], None)

        # Attach point format: (pointer_to_the_attach_point,
        #                       ast_node_type, arg_type)
        attach_point_info = (current, ["headcommand"], [])

        ind = 0
        while ind < len(node.parts):
            attach_point = attach_point_info[0]
            possible_node_kinds = attach_point_info[1]
            possible_arg_types = attach_point_info[2]

            child = node.parts[ind]
            if child.kind == 'word':
                # prioritize processing of logic operators
                if is_unary_logic_op(child, attach_point):
                    norm_node = UnaryLogicOpNode(child.word)
                    attach_to_tree(norm_node, attach_point)
                    unary_logic_ops.append(norm_node)
                elif is_binary_logic_op(child, attach_point):
                    norm_node = BinaryLogicOpNode(child.word)
                    attach_to_tree(norm_node, attach_point)
                    binary_logic_ops.append(norm_node)
                else:
                    if child.word == "--" and not attach_point.is_command("awk"):
                        attach_point_info = (attach_point_info[0],
                                             ["argument"],
                                             attach_point_info[2])
                        ind += 1
                        continue

                    if len(possible_node_kinds) == 1:
                        # no ast_node_kind ambiguation
                        node_kind = possible_node_kinds[0]
                        if node_kind == "headcommand":
                            norm_node = normalize_headcommand(child,
                                                              attach_point)
                            head_commands.append(norm_node)
                            head_cmd = norm_node.value
                            arg_status = copy.deepcopy(man_lookup.get_arg_types(head_cmd))
                            attach_point_info = \
                                (norm_node, ["flag", "argument"], None)
                        elif node_kind == "argument":
                            if possible_arg_types and "Utility" in possible_arg_types:
                                # embedded command leaded by
                                # ["-exec", "-execdir", "-ok", "-okdir"]
                                new_command_node = bast.node(kind="command",
                                                             word="",
                                                             parts=[],
                                                             pos=(-1,-1))
                                new_command_node.parts = []
                                subcommand_added = False
                                for j in xrange(ind, len(node.parts)):
                                    if hasattr(node.parts[j], 'word') \
                                        and (node.parts[j].word == ";" \
                                        or node.parts[j].word == "+"):
                                        normalize_command(new_command_node,
                                                          attach_point)
                                        attach_point.value += \
                                            '::' + node.parts[j].word
                                        subcommand_added = True
                                        break
                                    else:
                                        new_command_node.parts.\
                                            append(node.parts[j])
                                if not subcommand_added:
                                    print("Warning: -exec missing ending ';'")
                                    normalize_command(new_command_node,
                                                      attach_point)
                                    attach_point.value += '::' + ";"
                                ind = j
                            else:
                                if possible_arg_types:
                                    arg_type = list(possible_arg_types)[0]
                                else:
                                    # "--" encountered
                                    arg_type = cmd_arg_type_check(child,
                                                                  arg_status)
                                # recurse to main normalization to handle
                                # argument with deep structures
                                normalize(child, attach_point, "argument",
                                          arg_type)
                            attach_point_info = look_above(attach_point)
                    else:
                        # need to decide ast_node_kind
                        if child.word.startswith("-") and \
                                not ((attach_point.value in ["head", "tail"]
                                      and child.word[1:].isdigit()) or
                                     (attach_point.value in ["chmod"]
                                      and ('r' in child.word or
                                           'x' in child.word or
                                           'w' in child.word or
                                           'X' in child.word or
                                           's' in child.word or
                                           't' in child.word or
                                           'u' in child.word or
                                           'g' in child.word or
                                           'o' in child.word))):
                            # child is a flag
                            attach_point_info = \
                                attach_flag(child, attach_point_info)
                        else:
                            # child is an argument
                            if expecting("Utility"):
                                # embedded command leaded by
                                # ["sh", "csh", "ksh", "tcsh",
                                #  "zsh", "bash", "exec", "xargs"]
                                new_command_node = bast.node(kind="command",
                                                             word="",
                                                             parts=[],
                                                             pos=(-1,-1))
                                new_command_node.parts = []
                                for j in xrange(ind, len(node.parts)):
                                    new_command_node.parts.append(node.parts[j])
                                normalize_command(new_command_node,
                                                  attach_point)
                                ind = j
                            else:
                                arg_type = cmd_arg_type_check(child.word, arg_status)
                                # recurse to main normalization to handle argument
                                # with deep structures
                                normalize(child, attach_point, "argument", arg_type)
                            attach_point_info = look_above(attach_point)

            elif child.kind == "assignment":
                normalize(child, attach_point, "assignment")
            elif child.kind == "redirect":
                normalize(child, attach_point, "redirect")

            ind += 1

        # TODO: some commands get parsed with no head command
        # This is usually due to unrecognized utilities e.g. "mp3player".
        if len(head_commands) == 0:
            return

        if len(head_commands) > 1:
            print("Error: multiple headcommands in one command.")
            for hc in head_commands:
                print(hc.symbol)
            sys.exit()

        head_command = head_commands[0]

        # process (embedded) parenthese -- treat as implicit "-and"
        stack = []
        depth = 0

        def pop_stack_content(depth, rparenth, stack_top=None):
            # popping pushed states off the stack
            popped = stack.pop()
            while (popped.value != "("):
                head_command.remove_child(popped)
                popped = stack.pop()
            lparenth = popped
            if not rparenth:
                # unbalanced brackets
                rparenth = ArgumentNode(value=")")
                make_parent_child(stack_top.parent, rparenth)
                make_sibling(stack_top, rparenth)
            new_child = organize_buffer(lparenth, rparenth)
            i = head_command.substitute_parentheses(lparenth, rparenth,
                                                    new_child)
            depth -= 1
            if depth > 0:
                # embedded parenthese
                stack.append(new_child)
            return depth, i

        i = 0
        while i < head_command.get_num_of_children():
            child = head_command.children[i]
            if child.value == "(":
                stack.append(child)
                depth += 1
            elif child.value == ")":
                assert(depth >= 0)
                # fix imbalanced parentheses: missing '('
                if depth == 0:
                    # simply drop the single ')'
                    detach_from_tree(child, child.parent)
                else:
                    depth, i = pop_stack_content(depth, child)
            else:
                if depth > 0:
                    stack.append(child)
                else:
                    if child.kind == "unarylogicop":
                        unprocessed_unary_logic_ops.append(child)
                    if child.kind == "binarylogicop":
                        unprocessed_binary_logic_ops.append(child)
            i += 1

        # fix imbalanced parentheses: missing ')'
        while (depth > 0):
            depth, _ = pop_stack_content(depth, None, stack[-1])

        assert(len(stack) == 0)
        assert(depth == 0)

        # for ul in unprocessed_unary_logic_ops:
        #     adjust_unary_operators(ul)

        # for bl in unprocessed_binary_logic_ops:
        #     adjust_binary_operators(bl)

        # recover omitted arguments
        if head_command.value == "find":
            arguments = []
            for child in head_command.children:
                if child.is_argument():
                    arguments.append(child)
            if head_command.get_num_of_children() > 0 and len(arguments) < 1:
                norm_node = ArgumentNode(value=".", arg_type="File")
                make_sibling(norm_node, head_command.children[0])
                norm_node.parent = head_command
                head_command.children.insert(0, norm_node)

        # "grep" normalization
        if head_command.value == "egrep":
            head_command.value = "grep"
            flag_present = False
            for child in head_command.children:
                if child.is_option() and child.value in ["-E", "--extended-regexp"]:
                    flag_present = True
            if not flag_present:
                norm_node = FlagNode(value="-E")
                make_sibling(norm_node, head_command.children[0])
                norm_node.parent = head_command
                head_command.children.insert(0, norm_node)

        if head_command.value == "fgrep":
            head_command.value = "grep"
            flag_present = False
            for child in head_command.children:
                if child.is_option() and child.value in ["-F", "--fixed-strings"]:
                    flag_present = True
            if not flag_present:
                norm_node = FlagNode(value="-F")
                make_sibling(norm_node, head_command.children[0])
                norm_node.parent = head_command
                head_command.children.insert(0, norm_node)

        # "xargs" normalization
        has_repl_str = False
        if head_command.value == "xargs":
            for flag in head_command.get_flags():
                if flag.value == "-I":
                    has_repl_str = True
                    repl_str = flag.get_argument()
                    assert(repl_str is not None)
                    if repl_str.value != "{}":
                        utility = head_command.get_subcommand()
                        assert(utility is not None)
                        utility.normalize_repl_str(repl_str.value, '{}')
                        repl_str.value = "{}"
                        repl_str.arg_type = "ReservedWord"
            # add a replace str if not present
            if not has_repl_str:
                utility = head_command.get_subcommand()
                assert(utility is not None)
                for i in xrange(head_command.get_num_of_children()):
                    if head_command.children[i].is_headcommand():
                        repl_str_flag_node = FlagNode("-I")
                        repl_str_node = ArgumentNode("{}", "ReservedWord")
                        repl_str_node2 = ArgumentNode("{}", "ReservedWord")
                        make_parent_child(repl_str_flag_node, repl_str_node)

                        head_command.children.insert(i, repl_str_flag_node)
                        repl_str_flag_node.parent = head_command
                        repl_str_flag_node.lsb = head_command.children[i-1]
                        head_command.children[i-1].rsb = repl_str_flag_node

                        sub_command = head_command.children[i+1]
                        repl_str_node2.parent = sub_command
                        repl_str_node2.lsb = sub_command.get_right_child()
                        sub_command.children.append(repl_str_node2)
                        break
Beispiel #23
0
def _expandwordinternal(parserobj, wordtoken, qheredocument, qdoublequotes, quoted, isexp):
    # bash/subst.c L8132
    istring = ''
    parts = []
    tindex = [0]
    sindex = [0]
    string = wordtoken.value
    def nextchar():
        sindex[0] += 1
        if sindex[0] < len(string):
            return string[sindex[0]]
    def peekchar():
        if sindex[0]+1 < len(string):
            return string[sindex[0]+1]

    while True:
        if sindex[0] == len(string):
            break
            # goto finished_with_string
        c = string[sindex[0]]
        if c in '<>':
            if (nextchar() != '(' or qheredocument or qdoublequotes or
                (wordtoken.flags & set([flags.word.DQUOTE, flags.word.NOPROCSUB]))):
                sindex[0] -= 1

                # goto add_character
                sindex[0] += 1
                istring += c
            else:
                tindex = sindex[0] + 1

                node, sindex[0] = _extractprocesssubst(parserobj, string, tindex)

                parts.append(bast.node(kind='processsubstitution', command=node,
                                       pos=(tindex - 2, sindex[0])))
                istring += string[tindex - 2:sindex[0]]
                # goto dollar_add_string
        # TODO
        # elif c == '=':
        #     pass
        # elif c == ':':
        #     pass
        elif c == '~':
            if (wordtoken.flags & set([flags.word.NOTILDE, flags.word.DQUOTE]) or
                (sindex[0] > 0 and not (wordtoken.flags & flags.word.NOTILDE)) or
                qdoublequotes or qheredocument):
                wordtoken.flags.clear()
                wordtoken.flags.add(flags.word.ITILDE)
                sindex[0] += 1
                istring += c
            else:
                stopatcolon = wordtoken.flags & set([flags.word.ASSIGNRHS,
                                                    flags.word.ASSIGNMENT,
                                                    flags.word.TILDEEXP])
                expand = True
                for i in range(sindex[0], len(string)):
                    r = string[i]
                    if r == '/':
                        break
                    if r in "\\'\"":
                        expand = False
                        break
                    if stopatcolon and r == ':':
                        break
                else:
                    # go one past the end if we didn't exit early
                    i += 1

                if i > sindex[0] and expand:
                    node = bast.node(kind='tilde', value=string[sindex[0]:i],
                                     pos=(sindex[0], i))
                    parts.append(node)
                istring += string[sindex[0]:i]
                sindex[0] = i

        elif c == '$' and len(string) > 1:
            tindex = sindex[0]
            node, sindex[0] = _paramexpand(parserobj, string, sindex[0])
            if node:
                parts.append(node)
            istring += string[tindex:sindex[0]]
        elif c == '`':
            tindex = sindex[0]
            # bare instance of ``
            if nextchar() == '`':
                sindex[0] += 1
                istring += '``'
            else:
                x = _stringextract(string, sindex[0], "`")
                if x == -1:
                    raise errors.ParsingError('bad substitution: no closing "`" '
                                              'in %s' % string)
                else:
                    if wordtoken.flags & flags.word.NOCOMSUB:
                        pass
                    else:
                        sindex[0] = x

                        word = string[tindex+1:sindex[0]]
                        command, ttindex = _recursiveparse(parserobj, word, 0)
                        _adjustpositions(command, tindex+1, len(string))
                        ttindex += 1 # ttindex is on the closing char

                        # assert sindex[0] == ttindex
                        # go one past the closing `
                        sindex[0] += 1

                        node = bast.node(kind='commandsubstitution',
                                         command=command,
                                         pos=(tindex, sindex[0]))
                        parts.append(node)
                        istring += string[tindex:sindex[0]]

        elif c == '\\':
            istring += string[sindex[0]+1:sindex[0]+2]
            sindex[0] += 2
        elif c == '"':
            sindex[0] += 1
            continue

            # 8513
            #if qdoublequotes or qheredocument:
            #    sindex[0] += 1
            #else:
            #    tindex = sindex[0] + 1
            #    parts, sindex[0] = _stringextractdoublequoted(string, sindex[0])
            #    if tindex == 1 and sindex[0] == len(string):
            #        quotedstate = 'wholly'
            #    else:
            #        quotedstate = 'partially'

        elif c == "'":
            # entire string surronded by single quotes, no expansion is
            # going to happen
            if sindex[0] == 0 and string[-1] == "'":
                return [], string[1:-1]

            # check if we're inside double quotes
            if not qdoublequotes:
                # look for the closing ', we know we have one or otherwise
                # this wouldn't tokenize due to unmatched '
                tindex = sindex[0]
                sindex[0] = string.find("'", sindex[0]) + 1

                istring += string[tindex+1:sindex[0]-1]
            else:
                # this is a single quote inside double quotes, add it
                istring += c
                sindex[0] += 1
        else:
            istring += string[sindex[0]:sindex[0]+1]
            sindex[0] += 1

    if parts:
        class v(bast.nodevisitor):
            def visitnode(self, node):
                assert node.pos[1] + wordtoken.lexpos <= wordtoken.endlexpos
                node.pos = (node.pos[0] + wordtoken.lexpos,
                            node.pos[1] + wordtoken.lexpos)
        visitor = v()
        for node in parts:
            visitor.visit(node)

    return parts, istring
Beispiel #24
0
def p_list_terminator(p):
    '''list_terminator : NEWLINE
                       | SEMICOLON
                       | EOF'''
    if p[1] == ';':
        p[0] = bast.node(kind='operator', op=';', pos=p.lexspan(1))