Example #1
0
    def testShellFuncExecution(self):
        ex = cmd_exec_test.InitExecutor()
        func_node = ast.FuncDef()

        c1 = ast.CompoundWord()
        t1 = ast.token(Id.Lit_Chars, 'f1')
        c1.parts.append(ast.LiteralPart(t1))

        c2 = ast.CompoundWord()
        t2 = ast.token(Id.Lit_Chars, 'f2')
        c2.parts.append(ast.LiteralPart(t2))

        a = ast.ArrayLiteralPart()
        a.words = [c1, c2]
        w = ast.CompoundWord()
        w.parts.append(a)

        # Set global COMPREPLY=(f1 f2)
        pair = ast.assign_pair(ast.LhsName('COMPREPLY'), assign_op_e.Equal, w)
        pair.spids.append(0)  # dummy
        pairs = [pair]
        body_node = ast.Assignment(Id.Assign_None, [], pairs)

        func_node.name = 'myfunc'
        func_node.body = body_node

        a = completion.ShellFuncAction(ex, func_node)
        matches = list(a.Matches([], 0, 'f'))
        self.assertEqual(['f1 ', 'f2 '], matches)
Example #2
0
    def testPipeline2(self):
        Banner('ls | cut -d . -f 1 | head')
        p = process.Pipeline()
        p.Add(_ExtProc(['ls']))
        p.Add(_ExtProc(['cut', '-d', '.', '-f', '1']))
        p.Add(_ExtProc(['head']))

        print(p.Run(_WAITER))

        ex = InitExecutor()

        # Simulating subshell for each command
        w1 = ast.CompoundWord()
        w1.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, 'ls')))
        node1 = ast.SimpleCommand()
        node1.words = [w1]

        w2 = ast.CompoundWord()
        w2.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, 'head')))
        node2 = ast.SimpleCommand()
        node2.words = [w2]

        w3 = ast.CompoundWord()
        w3.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, 'sort')))
        w4 = ast.CompoundWord()
        w4.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, '--reverse')))
        node3 = ast.SimpleCommand()
        node3.words = [w3, w4]

        p = process.Pipeline()
        p.Add(Process(process.SubProgramThunk(ex, node1)))
        p.Add(Process(process.SubProgramThunk(ex, node2)))
        p.Add(Process(process.SubProgramThunk(ex, node3)))

        print(p.Run(_WAITER))
Example #3
0
    def testVarOps(self):
        ev = InitEvaluator()  # initializes x=xxx and y=yyy
        unset_sub = ast.BracedVarSub(ast.token(Id.VSub_Name, 'unset'))
        part_vals = []
        ev._EvalWordPart(unset_sub, part_vals)
        print(part_vals)

        set_sub = ast.BracedVarSub(ast.token(Id.VSub_Name, 'x'))
        part_vals = []
        ev._EvalWordPart(set_sub, part_vals)
        print(part_vals)

        # Now add some ops
        part = ast.LiteralPart(ast.token(Id.Lit_Chars, 'default'))
        arg_word = ast.CompoundWord([part])
        test_op = ast.StringUnary(Id.VTest_ColonHyphen, arg_word)
        unset_sub.suffix_op = test_op
        set_sub.suffix_op = test_op

        part_vals = []
        ev._EvalWordPart(unset_sub, part_vals)
        print(part_vals)

        part_vals = []
        ev._EvalWordPart(set_sub, part_vals)
        print(part_vals)
Example #4
0
  def _MaybeReadHereDocs(self):
    for h in self.pending_here_docs:
      lines = []
      #log('HERE %r' % h.here_end)
      while True:
        # If op is <<-, strip off all leading tabs (NOT spaces).
        # (in C++, just bump the start?)
        line_id, line = self.line_reader.GetLine()

        #print("LINE %r %r" % (line, h.here_end))
        if not line:  # EOF
          # An unterminated here doc is just a warning in bash.  We make it
          # fatal because we want to be strict, and because it causes problems
          # reporting other errors.
          # Attribute it to the << in <<EOF for now.
          self.AddErrorContext('Unterminated here doc', span_id=h.spids[0])
          return False

        # NOTE: Could do this runtime to preserve LST.
        if h.op_id == Id.Redir_DLessDash:
          line = line.lstrip('\t')
        if line.rstrip() == h.here_end:
          break

        lines.append((line_id, line))

      parts = []
      if h.do_expansion:
        # NOTE: We read all lines at once, instead of doing it line-by-line,
        # because of cases like this:
        # cat <<EOF
        # 1 $(echo 2
        # echo 3) 4
        # EOF

        from osh import parse_lib  # Avoid circular import
        w_parser = parse_lib.MakeWordParserForHereDoc(lines, self.arena)
        word = w_parser.ReadHereDocBody()
        if not word:
          self.AddErrorContext(
              'Error reading here doc body: %s', w_parser.Error())
          return False
        h.body = word
        h.was_filled = True
      else:
        # Each line is a single span.  TODO: Add span_id to token.
        tokens = [
            ast.token(Id.Lit_Chars, line, const.NO_INTEGER)
            for _, line in lines]
        parts = [ast.LiteralPart(t) for t in tokens]
        h.body = ast.CompoundWord(parts)
        h.was_filled = True

    # No .clear() until Python 3.3.
    del self.pending_here_docs[:]

    return True
Example #5
0
    def _ReadPatSubVarOp(self, lex_mode):
        """
    Match     = ('/' | '#' | '%') WORD
    VarSub    = ...
              | VarOf '/' Match '/' WORD
    """
        do_all = False
        do_prefix = False
        do_suffix = False

        pat = self._ReadVarOpArg(lex_mode,
                                 eof_type=Id.Lit_Slash,
                                 empty_ok=False)
        if not pat: return None

        if len(pat.parts) == 1:
            ok, s, quoted = word.StaticEval(pat)
            if ok and s == '/' and not quoted:  # Looks like ${a////c}, read again
                self._Next(lex_mode)
                self._Peek()
                p = ast.LiteralPart(self.cur_token)
                pat.parts.append(p)

        # Check for other modifiers
        first_part = pat.parts[0]
        if first_part.tag == word_part_e.LiteralPart:
            lit_id = first_part.token.id
            if lit_id == Id.Lit_Slash:
                do_all = True
                pat.parts.pop(0)
            elif lit_id == Id.Lit_Percent:
                do_prefix = True
                pat.parts.pop(0)
            elif lit_id == Id.Lit_Pound:
                do_suffix = True
                pat.parts.pop(0)

        #self._Peek()
        if self.token_type == Id.Right_VarSub:
            return ast.PatSub(pat, None, do_all, do_prefix, do_suffix)

        elif self.token_type == Id.Lit_Slash:
            replace = self._ReadVarOpArg(lex_mode)  # do not stop at /
            if not replace: return None

            self._Peek()
            if self.token_type == Id.Right_VarSub:
                return ast.PatSub(pat, replace, do_all, do_prefix, do_suffix)

            else:
                self._BadToken("Expected } after pat sub, got %s",
                               self.cur_token)
                return None

        else:
            self._BadToken("Expected } after pat sub, got %s", self.cur_token)
            return None
Example #6
0
  def _MaybeReadHereDocs(self, node):
    here_docs = _GetHereDocsToFill(node)
    #print('')
    #print('--> FILLING', here_docs)
    #print('')
    for h in here_docs:
      lines = []
      #print(h.here_end)
      while True:
        # If op is <<-, strip off all leading tabs (NOT spaces).
        # (in C++, just bump the start?)
        line_id, line = self.line_reader.GetLine()

        #print("LINE %r %r" % (line, h.here_end))
        if not line:  # EOF
          print('WARNING: unterminated here doc', file=sys.stderr)
          break

        if h.op_id == Id.Redir_DLessDash:
          line = line.lstrip('\t')
        if line.rstrip() == h.here_end:
          break

        lines.append((line_id, line))

      parts = []
      if h.do_expansion:
        # NOTE: We read all lines at once, instead of doing it line-by-line,
        # because of cases like this:
        # cat <<EOF
        # 1 $(echo 2
        # echo 3) 4
        # EOF

        # TODO: Move this import
        from osh import parse_lib
        # TODO: Thread arena.  need self.arena
        w_parser = parse_lib.MakeWordParserForHereDoc(lines)
        word = w_parser.ReadHereDocBody()
        if not word:
          self.AddErrorContext('Error reading here doc body: %s', w_parser.Error())
          return False
        h.arg_word = word
        h.was_filled = True
      else:
        # TODO: Add span_id to token
        # Each line is a single span.
        tokens = [ast.token(Id.Lit_Chars, line) for _, line in lines]
        parts = [ast.LiteralPart(t) for t in tokens]
        h.arg_word = ast.CompoundWord(parts)
        h.was_filled = True

    #print('')
    #print('--> FILLED', here_docs)
    #print('')
    return True
Example #7
0
    def testMultiLine(self):
        w_parser = InitWordParser("""\
ls foo

# Multiple newlines and comments should be ignored

ls bar
""")

        print('--MULTI')
        w = w_parser.ReadWord(LexMode.OUTER)
        parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'ls'))]
        self.assertEqual(ast.CompoundWord(parts), w)

        w = w_parser.ReadWord(LexMode.OUTER)
        parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'foo'))]
        self.assertEqual(ast.CompoundWord(parts), w)

        w = w_parser.ReadWord(LexMode.OUTER)
        t = ast.token(Id.Op_Newline, '\n')
        self.assertEqual(ast.TokenWord(t), w)

        w = w_parser.ReadWord(LexMode.OUTER)
        parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'ls'))]
        self.assertEqual(ast.CompoundWord(parts), w)

        w = w_parser.ReadWord(LexMode.OUTER)
        parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'bar'))]
        self.assertEqual(ast.CompoundWord(parts), w)

        w = w_parser.ReadWord(LexMode.OUTER)
        t = ast.token(Id.Op_Newline, '\n')
        self.assertEqual(ast.TokenWord(t), w)

        w = w_parser.ReadWord(LexMode.OUTER)
        t = ast.token(Id.Eof_Real, '')
        self.assertEqual(ast.TokenWord(t), w)
Example #8
0
    def testVarOps(self):
        ev = InitEvaluator()  # initializes x=xxx and y=yyy
        unset_sub = ast.BracedVarSub(ast.token(Id.Lit_Chars, 'unset'))
        print(ev.EvalVarSub(unset_sub))

        set_sub = ast.BracedVarSub(ast.token(Id.Lit_Chars, 'x'))
        print(ev.EvalVarSub(set_sub))

        part = ast.LiteralPart(ast.token(Id.Lit_Chars, 'default'))
        arg_word = ast.CompoundWord([part])
        test_op = ast.StringUnary(Id.VTest_ColonHyphen, arg_word)
        unset_sub.suffix_op = test_op
        set_sub.suffix_op = test_op

        print(ev.EvalVarSub(unset_sub))
        print(ev.EvalVarSub(set_sub))
Example #9
0
def TildeDetect(word):
    """Detect tilde expansion.

  If it needs to include a TildeSubPart, return a new word.  Otherwise return
  None.

  NOTE: This algorithm would be a simpler if
  1. We could assume some regex for user names.
  2. We didn't need to do brace expansion first, like {~foo,~bar}
  OR
  - If Lit_Slash were special (it is in the VAROP states, but not OUTER
  state).  We could introduce another lexer mode after you hit Lit_Tilde?

  So we have to scan all LiteralPart instances until they contain a '/'.

  http://unix.stackexchange.com/questions/157426/what-is-the-regex-to-validate-linux-users
  "It is usually recommended to only use usernames that begin with a lower
  case letter or an underscore, followed by lower case letters, digits,
  underscores, or dashes. They can end with a dollar sign. In regular
  expression terms: [a-z_][a-z0-9_-]*[$]?

  On Debian, the only constraints are that usernames must neither start with
  a dash ('-') nor contain a colon (':') or a whitespace (space: ' ', end
  of line: '\n', tabulation: '\t', etc.). Note that using a slash ('/') may
  break the default algorithm for the definition of the user's home
  directory.
  """
    if not word.parts:
        return None
    part0 = word.parts[0]
    if _LiteralPartId(part0) != Id.Lit_Tilde:
        return None

    prefix = ''
    found_slash = False
    # search for the next /
    for i in range(1, len(word.parts)):
        # Not a literal part, and we did NOT find a slash.  So there is no
        # TildeSub applied.  This would be something like ~X$var, ~$var,
        # ~$(echo), etc..  The slash is necessary.
        if word.parts[i].tag != word_part_e.LiteralPart:
            return None
        val = word.parts[i].token.val
        p = val.find('/')

        if p == -1:  # no slash yet
            prefix += val

        elif p >= 0:
            # e.g. for ~foo!bar/baz, extract "bar"
            # NOTE: requires downcast to LiteralPart
            pre, post = val[:p], val[p:]
            prefix += pre
            tilde_part = ast.TildeSubPart(prefix)
            # TODO: Need a span_id here.  Or use different algorithm.
            #print('SPLITTING %s p = %d' % (word.parts[i], p), file=sys.stderr)
            remainder_part = ast.LiteralPart(ast.token(Id.Lit_Chars, post))
            found_slash = True
            break

    w = ast.CompoundWord()
    if found_slash:
        w.parts.append(tilde_part)
        w.parts.append(remainder_part)
        j = i + 1
        while j < len(word.parts):
            w.parts.append(word.parts[j])
            j += 1
    else:
        # The whole thing is a tilde sub, e.g. ~foo or ~foo!bar
        w.parts.append(ast.TildeSubPart(prefix))
    return w
Example #10
0
    def _ReadCompoundWord(self,
                          eof_type=Id.Undefined_Tok,
                          lex_mode=LexMode.OUTER,
                          empty_ok=True):
        """
    Precondition: Looking at the first token of the first word part
    Postcondition: Looking at the token after, e.g. space or operator
    """
        #print('_ReadCompoundWord', lex_mode)
        word = ast.CompoundWord()

        num_parts = 0
        done = False
        while not done:
            allow_done = empty_ok or num_parts != 0
            self._Peek()
            #print('CW',self.cur_token)
            if allow_done and self.token_type == eof_type:
                done = True  # e.g. for ${}

            # Keywords like "for" are treated like literals
            elif self.token_kind in (Kind.Lit, Kind.KW, Kind.Assign,
                                     Kind.ControlFlow, Kind.BoolUnary,
                                     Kind.BoolBinary):
                if self.token_type == Id.Lit_EscapedChar:
                    part = ast.EscapedLiteralPart(self.cur_token)
                else:
                    part = ast.LiteralPart(self.cur_token)
                    #part.xspans.append(self.cur_token.span_id)

                word.parts.append(part)

                if self.token_type == Id.Lit_VarLike:
                    #print('@', self.lexer.LookAhead())
                    #print('@', self.cursor)
                    #print('@', self.cur_token)

                    t = self.lexer.LookAhead(LexMode.OUTER)
                    if t.id == Id.Op_LParen:
                        self.lexer.PushHint(Id.Op_RParen,
                                            Id.Right_ArrayLiteral)
                        part2 = self._ReadArrayLiteralPart()
                        if not part2:
                            self.AddErrorContext(
                                '_ReadArrayLiteralPart failed')
                            return False
                        word.parts.append(part2)

            elif self.token_kind == Kind.VSub:
                part = ast.SimpleVarSub(self.cur_token)
                word.parts.append(part)

            elif self.token_kind == Kind.Left:
                #print('_ReadLeftParts')
                part = self._ReadLeftParts()
                if not part:
                    return None
                word.parts.append(part)

            # NOT done yet, will advance below
            elif self.token_kind == Kind.Right:
                # Still part of the word; will be done on the next iter.
                if self.token_type == Id.Right_DoubleQuote:
                    pass
                elif self.token_type == Id.Right_CommandSub:
                    pass
                elif self.token_type == Id.Right_Subshell:
                    # LEXER HACK for (case x in x) ;; esac )
                    assert self.next_lex_mode is None  # Rewind before it's used
                    if self.lexer.MaybeUnreadOne():
                        self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
                        self._Next(lex_mode)
                    done = True
                else:
                    done = True

            elif self.token_kind == Kind.Ignored:
                done = True

            else:
                # LEXER HACK for unbalanced case clause.  'case foo in esac' is valid,
                # so to test for ESAC, we can read ) before getting a chance to
                # PushHint(Id.Op_RParen, Id.Right_CasePat).  So here we unread one
                # token and do it again.

                # We get Id.Op_RParen at top level:      case x in x) ;; esac
                # We get Id.Eof_RParen inside ComSub:  $(case x in x) ;; esac )
                if self.token_type in (Id.Op_RParen, Id.Eof_RParen):
                    assert self.next_lex_mode is None  # Rewind before it's used
                    if self.lexer.MaybeUnreadOne():
                        if self.token_type == Id.Eof_RParen:
                            # Redo translation
                            self.lexer.PushHint(Id.Op_RParen, Id.Eof_RParen)
                        self._Next(lex_mode)

                done = True  # anything we don't recognize means we're done

            if not done:
                self._Next(lex_mode)
            num_parts += 1
        return word
Example #11
0
    def _ReadDoubleQuotedPart(self, eof_type=Id.Undefined_Tok, here_doc=False):
        """
    Args:
      eof_type: for stopping at }, Id.Lit_RBrace
      here_doc: Whether we are reading in a here doc context

    Also ${foo%%a b c}  # treat this as double quoted.  until you hit
    """
        quoted_part = ast.DoubleQuotedPart()
        left_spid = -1
        right_spid = -1  # gets set later

        if self.cur_token is not None:  # None in here doc case
            left_spid = self.cur_token.span_id

        done = False
        while not done:
            self._Next(LexMode.DQ)
            self._Peek()
            #print(self.cur_token)

            if self.token_type == eof_type:  # e.g. stop at }
                done = True
                continue

            elif self.token_kind == Kind.Lit:
                if self.token_type == Id.Lit_EscapedChar:
                    part = ast.EscapedLiteralPart(self.cur_token)
                else:
                    part = ast.LiteralPart(self.cur_token)
                quoted_part.parts.append(part)

            elif self.token_kind == Kind.Left:
                part = self._ReadDoubleQuotedLeftParts()
                if not part:
                    return None
                quoted_part.parts.append(part)

            elif self.token_kind == Kind.VSub:
                part = ast.SimpleVarSub(self.cur_token)
                quoted_part.parts.append(part)

            elif self.token_kind == Kind.Right:
                assert self.token_type == Id.Right_DoubleQuote
                if here_doc:
                    # Turn Id.Right_DoubleQuote into a literal part
                    quoted_part.parts.append(ast.LiteralPart(self.cur_token))
                else:
                    done = True  # assume Id.Right_DoubleQuote
                    right_spid = self.cur_token.span_id

            elif self.token_kind == Kind.Eof:
                if here_doc:  # here docs will have an EOF in their token stream
                    done = True
                else:
                    self.AddErrorContext(
                        'Unexpected EOF in double-quoted string')
                    return False

            else:
                raise AssertionError(self.cur_token)

        quoted_part.spids.extend((left_spid, right_spid))
        return quoted_part