예제 #1
0
파일: word_.py 프로젝트: o11c/oil
def BoolId(w):
    # type: (word_t) -> Id_t
    UP_w = w
    with tagswitch(w) as case:
        if case(word_e.String):  # for test/[
            w = cast(word__String, UP_w)
            return w.id

        elif case(word_e.Token):
            tok = cast(Token, UP_w)
            return tok.id

        elif case(word_e.Compound):
            w = cast(compound_word, UP_w)

            if len(w.parts) != 1:
                return Id.Word_Compound

            token_type = _LiteralId(w.parts[0])
            if token_type == Id.Undefined_Tok:
                return Id.Word_Compound  # It's a regular word

            # This is outside the BoolUnary/BoolBinary namespace, but works the same.
            if token_type in (Id.KW_Bang, Id.Lit_DRightBracket):
                return token_type  # special boolean "tokens"

            token_kind = consts.GetKind(token_type)
            if token_kind in (Kind.BoolUnary, Kind.BoolBinary):
                return token_type  # boolean operators

            return Id.Word_Compound

        else:
            # I think Empty never happens in this context?
            raise AssertionError(w.tag_())
예제 #2
0
파일: word_.py 프로젝트: o11c/oil
def CommandId(w):
    # type: (word_t) -> Id_t
    UP_w = w
    with tagswitch(w) as case:
        if case(word_e.Token):
            tok = cast(Token, UP_w)
            return tok.id

        elif case(word_e.Compound):
            w = cast(compound_word, UP_w)

            # Has to be a single literal part
            if len(w.parts) != 1:
                return Id.Word_Compound

            token_type = _LiteralId(w.parts[0])
            if token_type == Id.Undefined_Tok:
                return Id.Word_Compound

            elif token_type in (Id.Lit_LBrace, Id.Lit_RBrace, Id.Lit_Equals,
                                Id.ControlFlow_Return):
                # OSH and Oil recognize:  { }
                # Oil recognizes:         = return
                return token_type

            token_kind = consts.GetKind(token_type)
            if token_kind == Kind.KW:
                return token_type

            return Id.Word_Compound

        else:
            raise AssertionError(w.tag_())
예제 #3
0
    def _Next(self, lex_mode):
        # type: (lex_mode_t) -> None
        """Set the next lex state, but don't actually read a token.

    We need this for proper interactive parsing.
    """
        self.cur_token = self.lexer.Read(lex_mode)
        self.token_type = self.cur_token.id
        self.token_kind = consts.GetKind(self.token_type)
예제 #4
0
 def _Peek(self):
   # type: () -> None
   """Helper method."""
   if self.next_lex_mode != lex_mode_e.Undefined:
     self.cur_token = self.lexer.Read(self.next_lex_mode)
     self.token_type = self.cur_token.id
     self.token_kind = consts.GetKind(self.token_type)
     self.parse_ctx.trail.AppendToken(self.cur_token)   # For completion
     self.next_lex_mode = lex_mode_e.Undefined
예제 #5
0
  def testTokens(self):
    print(Id.Op_Newline)
    print(Tok(Id.Op_Newline, '\n'))

    print(Id.Op_Newline)

    print(Kind.Eof)
    print(Kind.Left)

    print('--')
    num_kinds = 0
    for name in dir(Kind):
      if name[0].isupper():
        kind = getattr(Kind, name)
        print('%-20s %s' % (name, kind))
        num_kinds += 1

    print()
    print('Number of Kinds:', num_kinds)
    print()

    for name in dir(Id):
      if name[0].isupper():
        id_ = getattr(Id, name)
        print('%-30s %s' % (name, id_))

    # 309 out of 256 tokens now
    print()
    print('Number of IDs:', len(ID_SPEC.id_str2int))

    t = Tok(Id.Arith_Plus, '+')
    self.assertEqual(Kind.Arith, consts.GetKind(t.id))
    t = Tok(Id.Arith_CaretEqual, '^=')
    self.assertEqual(Kind.Arith, consts.GetKind(t.id))
    t = Tok(Id.Arith_RBrace, '}')
    self.assertEqual(Kind.Arith, consts.GetKind(t.id))

    t = Tok(Id.BoolBinary_GlobDEqual, '==')
    self.assertEqual(Kind.BoolBinary, consts.GetKind(t.id))

    t = Tok(Id.BoolBinary_Equal, '=')
    self.assertEqual(Kind.BoolBinary, consts.GetKind(t.id))
예제 #6
0
파일: word_.py 프로젝트: o11c/oil
def CommandKind(w):
    # type: (word_t) -> Kind_t
    """The CommandKind is for coarse-grained decisions in the CommandParser."""
    if w.tag_() == word_e.Token:
        tok = cast(Token, w)
        return consts.GetKind(tok.id)

    # NOTE: This is a bit inconsistent with CommandId, because we never
    # return Kind.KW (or Kind.Lit).  But the CommandParser is easier to write
    # this way.
    return Kind.Word
예제 #7
0
파일: word_.py 프로젝트: o11c/oil
def KeywordToken(w):
    # type: (compound_word) -> Tuple[Kind_t, Optional[Token]]
    """Tests if a word is an assignment or control flow word."""
    no_token = None  # type: Optional[Token]

    if len(w.parts) != 1:
        return Kind.Undefined, no_token

    UP_part0 = w.parts[0]
    token_type = _LiteralId(UP_part0)
    if token_type == Id.Undefined_Tok:
        return Kind.Undefined, no_token

    token_kind = consts.GetKind(token_type)
    if token_kind == Kind.ControlFlow:
        return token_kind, cast(Token, UP_part0)

    return Kind.Undefined, no_token
예제 #8
0
    def _NextOne(self, lex_mode=lex_mode_e.DBracket):
        # type: (lex_mode_t) -> None
        n = len(self.words)
        if n == 2:
            assert lex_mode == lex_mode_e.DBracket
            self.words[0] = self.words[1]
            self.cur_word = self.words[0]
            self.words.pop()
        elif n in (0, 1):
            w = self.w_parser.ReadWord(lex_mode)  # may raise
            if n == 0:
                self.words.append(w)
            else:
                self.words[0] = w
            self.cur_word = w

        assert self.cur_word is not None
        self.op_id = word_.BoolId(self.cur_word)
        self.b_kind = consts.GetKind(self.op_id)
예제 #9
0
def Parse(lexer):
    # type: (Lexer) -> List[Token]
    """Given a QSN literal in a string, return the corresponding byte string.

  Grammar:
      qsn = SingleQuote Kind.Char* SingleQuote Whitespace? Eof_Real
  """
    tok = lexer.Read(lex_mode_e.QSN)
    # Caller ensures this.  It's really a left single quote.
    assert tok.id == Id.Right_SingleQuote

    result = []  # type: List[Token]
    while True:
        tok = lexer.Read(lex_mode_e.QSN)
        #log('tok = %s', tok)

        if tok.id == Id.Unknown_Tok:  # extra error
            p_die('Unexpected token in QSN string', token=tok)

        kind = consts.GetKind(tok.id)
        if kind != Kind.Char:
            break

        result.append(tok)

    if tok.id != Id.Right_SingleQuote:
        p_die('Expected closing single quote in QSN string', token=tok)

    # HACK: read in shell's SQ_C mode to get whitespace, which is disallowe
    # INSIDE QSN.  This gets Eof_Real too.
    tok = lexer.Read(lex_mode_e.SQ_C)

    # Doesn't work because we want to allow literal newlines / tabs
    if tok.id == Id.Char_Literals:
        if not _IsWhitespace(tok.val):
            p_die("Unexpected data after closing quote", token=tok)
        tok = lexer.Read(lex_mode_e.QSN)

    if tok.id != Id.Eof_Real:
        p_die('Unexpected token after QSN string', token=tok)

    return result
예제 #10
0
    def Read(self, lex_mode):
        # type: (lex_mode_t) -> Token
        # Inner loop optimization
        line = self.line
        line_pos = self.line_pos

        tok_type, end_pos = match.OneToken(lex_mode, line, line_pos)
        if tok_type == Id.Eol_Tok:  # Do NOT add a span for this sentinel!
            return _EOL_TOK

        # Save on allocations!  We often don't look at the token value.
        # TODO: can inline this function with formula on 16-bit Id.
        kind = consts.GetKind(tok_type)

        # Whitelist doesn't work well?  Use blacklist for now.
        # - Kind.KW is sometimes a literal in a word
        # - Kind.Right is for " in here docs.  Lexer isn't involved.
        # - Got an error with Kind.Left too that I don't understand
        # if kind in (Kind.Lit, Kind.VSub, Kind.Redir, Kind.Char, Kind.Backtick, Kind.KW, Kind.Right):

        if kind in (Kind.Arith, Kind.Op, Kind.WS, Kind.Ignored, Kind.Eof):
            tok_val = None  # type: Optional[str]
        else:
            tok_val = line[line_pos:end_pos]
        # NOTE: We're putting the arena hook in LineLexer and not Lexer because we
        # want it to be "low level".  The only thing fabricated here is a newline
        # added at the last line, so we don't end with \0.

        if self.arena_skip:  # make another token from the last span
            assert self.last_span_id != runtime.NO_SPID
            span_id = self.last_span_id
            self.arena_skip = False
        else:
            tok_len = end_pos - line_pos
            span_id = self.arena.AddLineSpan(self.line_id, line_pos, tok_len)
            self.last_span_id = span_id
        #log('LineLexer.Read() span ID %d for %s', span_id, tok_type)

        t = Token(tok_type, span_id, tok_val)
        self.line_pos = end_pos
        return t
예제 #11
0
    def _MaybeReplaceLeaf(self, node):
        # type: (re_t) -> Tuple[Optional[re_t], bool]
        """
    If a leaf node needs to be evaluated, do it and return the replacement.
    Otherwise return None.
    """
        new_leaf = None
        recurse = True

        if node.tag == re_e.Speck:
            id_ = node.id
            if id_ == Id.Expr_Dot:
                new_leaf = re.Primitive(Id.Re_Dot)
            elif id_ == Id.Arith_Caret:  # ^
                new_leaf = re.Primitive(Id.Re_Start)
            elif id_ == Id.Expr_Dollar:  # $
                new_leaf = re.Primitive(Id.Re_End)
            else:
                raise NotImplementedError(id_)

        elif node.tag == re_e.Token:
            id_ = node.id
            val = node.val

            if id_ == Id.Expr_Name:
                if val == 'dot':
                    new_leaf = re.Primitive(Id.Re_Dot)
                else:
                    raise NotImplementedError(val)

            elif id_ == Id.Expr_Symbol:
                if val == '%start':
                    new_leaf = re.Primitive(Id.Re_Start)
                elif val == '%end':
                    new_leaf = re.Primitive(Id.Re_End)
                else:
                    raise NotImplementedError(val)

            else:  # Must be Id.Char_{OneChar,Hex,Unicode4,Unicode8}
                kind = consts.GetKind(id_)
                assert kind == Kind.Char, id_
                s = word_compile.EvalCStringToken(id_, val)
                new_leaf = re.LiteralChars(s, node.span_id)

        elif node.tag == re_e.SingleQuoted:
            s = word_eval.EvalSingleQuoted(node)
            new_leaf = re.LiteralChars(s, node.left.span_id)

        elif node.tag == re_e.DoubleQuoted:
            s = self.word_ev.EvalDoubleQuotedToString(node)
            new_leaf = re.LiteralChars(s, node.left.span_id)

        elif node.tag == re_e.BracedVarSub:
            s = self.word_ev.EvalBracedVarSubToString(node)
            new_leaf = re.LiteralChars(s, node.spids[0])

        elif node.tag == re_e.SimpleVarSub:
            s = self.word_ev.EvalSimpleVarSubToString(node.token)
            new_leaf = re.LiteralChars(s, node.token.span_id)

        elif node.tag == re_e.Splice:
            obj = self.LookupVar(node.name.val)
            if not isinstance(obj, objects.Regex):
                e_die("Can't splice object of type %r into regex",
                      obj.__class__,
                      token=node.name)
            # Note: we only splice the regex, and ignore flags.
            # Should we warn about this?
            new_leaf = obj.regex

        # These are leaves we don't need to do anything with.
        elif node.tag == re_e.PosixClass:
            recurse = False
        elif node.tag == re_e.PerlClass:
            recurse = False

        return new_leaf, recurse
예제 #12
0
    def ParseFactor(self):
        # type: () -> bool_expr_t
        """
    Factor  : WORD
            | UNARY_OP WORD
            | WORD BINARY_OP WORD
            | '(' Expr ')'
    """
        if self.b_kind == Kind.BoolUnary:
            # Just save the type and not the token itself?
            op = self.op_id
            self._Next()
            w = self.cur_word
            # e.g. [[ -f < ]].  But [[ -f '<' ]] is OK

            tag = w.tag_()
            if tag != word_e.Compound and tag != word_e.String:
                p_die('Invalid argument to unary operator', word=w)
            self._Next()
            node = bool_expr.Unary(op, w)  # type: bool_expr_t
            return node

        if self.b_kind == Kind.Word:
            # Peek ahead another token.
            t2 = self._LookAhead()
            t2_op_id = word_.BoolId(t2)
            t2_b_kind = consts.GetKind(t2_op_id)

            #log('t2 %s / t2_op_id %s / t2_b_kind %s', t2, t2_op_id, t2_b_kind)
            # Op for < and >, -a and -o pun
            if t2_b_kind == Kind.BoolBinary or t2_op_id in (Id.Op_Less,
                                                            Id.Op_Great):
                left = self.cur_word

                self._Next()
                op = self.op_id

                # TODO: Need to change to lex_mode_e.BashRegex.
                # _Next(lex_mode) then?
                is_regex = t2_op_id == Id.BoolBinary_EqualTilde
                if is_regex:
                    self._Next(lex_mode=lex_mode_e.BashRegex)
                else:
                    self._Next()

                right = self.cur_word
                if is_regex:
                    # NOTE: StaticEval for checking regex syntax isn't enough.  We could
                    # need to pass do_ere so that the quoted parts get escaped.
                    #ok, s, unused_quoted = word_.StaticEval(right)
                    pass

                self._Next()
                return bool_expr.Binary(op, left, right)
            else:
                # [[ foo ]]
                w = self.cur_word
                self._Next()
                return bool_expr.WordTest(w)

        if self.op_id == Id.Op_LParen:
            self._Next()
            node = self.ParseExpr()
            if self.op_id != Id.Op_RParen:
                p_die('Expected ), got %s',
                      word_.Pretty(self.cur_word),
                      word=self.cur_word)
            self._Next()
            return node

        # It's not WORD, UNARY_OP, or '('
        p_die('Unexpected token in boolean expression', word=self.cur_word)
예제 #13
0
def _PushOilTokens(parse_ctx, gr, p, lex):
    # type: (ParseContext, Grammar, parse.Parser, Lexer) -> Token
    """Push tokens onto pgen2's parser.

  Returns the last token so it can be reused/seen by the CommandParser.
  """
    #log('keywords = %s', gr.keywords)
    #log('tokens = %s', gr.tokens)

    last_token = None  # type: Optional[Token]
    prev_was_newline = False

    balance = 0  # to ignore newlines

    while True:
        if last_token:  # e.g. left over from WordParser
            tok = last_token
            #log('last_token = %s', last_token)
            last_token = None
        else:
            tok = lex.Read(lex_mode_e.Expr)
            #log('tok = %s', tok)

        # Comments and whitespace.  Newlines aren't ignored.
        if consts.GetKind(tok.id) == Kind.Ignored:
            continue

        # For multiline lists, maps, etc.
        if tok.id == Id.Op_Newline:
            if balance > 0:
                #log('*** SKIPPING NEWLINE')
                continue
            # Eliminate duplicate newline tokens.  It makes the grammar simpler, and
            # it's consistent with CPython's lexer and our own WordParser.
            if prev_was_newline:
                continue
            prev_was_newline = True
        else:
            prev_was_newline = False

        balance += _OTHER_BALANCE.get(tok.id, 0)
        #log('BALANCE after seeing %s = %d', tok.id, balance)

        #if tok.id == Id.Expr_Name and tok.val in KEYWORDS:
        #  tok.id = KEYWORDS[tok.val]
        #  log('Replaced with %s', tok.id)

        assert tok.id < 256, Id_str(tok.id)

        ilabel = _Classify(gr, tok)
        #log('tok = %s, ilabel = %d', tok, ilabel)

        if p.addtoken(tok.id, tok, ilabel):
            return tok

        #
        # Mututally recursive calls into the command/word parsers.
        #

        if mylib.PYTHON:
            if tok.id == Id.Left_PercentParen:  # %(
                left_tok = tok
                lex.PushHint(Id.Op_RParen, Id.Right_ShArrayLiteral)

                # Blame the opening token
                line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
                w_parser = parse_ctx.MakeWordParser(lex, line_reader)
                words = []
                close_tok = None  # type: Optional[Token]
                while True:
                    w = w_parser.ReadWord(lex_mode_e.ShCommand)
                    if 0:
                        log('w = %s', w)

                    if w.tag_() == word_e.Token:
                        tok = cast(Token, w)
                        if tok.id == Id.Right_ShArrayLiteral:
                            close_tok = tok
                            break
                        elif tok.id == Id.Op_Newline:  # internal newlines allowed
                            continue
                        else:
                            # Token
                            p_die('Unexpected token in array literal: %r',
                                  tok.val,
                                  word=w)

                    assert isinstance(w, compound_word)  # for MyPy
                    words.append(w)

                words2 = braces.BraceDetectAll(words)
                words3 = word_.TildeDetectAll(words2)

                typ = Id.Expr_CastedDummy

                lit_part = sh_array_literal(left_tok, words3)
                opaque = cast(Token, lit_part)  # HACK for expr_to_ast
                done = p.addtoken(typ, opaque, gr.tokens[typ])
                assert not done  # can't end the expression

                # Now push the closing )
                ilabel = _Classify(gr, close_tok)
                done = p.addtoken(tok.id, close_tok, ilabel)
                assert not done  # can't end the expression

                continue

            # $(  @(  &(
            if tok.id in (Id.Left_DollarParen, Id.Left_AtParen,
                          Id.Left_AmpParen):

                left_token = tok

                lex.PushHint(Id.Op_RParen, Id.Eof_RParen)
                line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
                c_parser = parse_ctx.MakeParserForCommandSub(
                    line_reader, lex, Id.Eof_RParen)
                node = c_parser.ParseCommandSub()
                # A little gross: Copied from osh/word_parse.py
                right_token = c_parser.w_parser.cur_token

                cs_part = command_sub(left_token, node)
                cs_part.spids.append(left_token.span_id)
                cs_part.spids.append(right_token.span_id)

                typ = Id.Expr_CastedDummy
                opaque = cast(Token, cs_part)  # HACK for expr_to_ast
                done = p.addtoken(typ, opaque, gr.tokens[typ])
                assert not done  # can't end the expression

                # Now push the closing )
                ilabel = _Classify(gr, right_token)
                done = p.addtoken(right_token.id, right_token, ilabel)
                assert not done  # can't end the expression

                continue

            if tok.id == Id.Left_DoubleQuote:
                left_token = tok
                line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
                w_parser = parse_ctx.MakeWordParser(lex, line_reader)

                parts = []  # type: List[word_part_t]
                last_token = w_parser.ReadDoubleQuoted(left_token, parts)
                expr_dq_part = double_quoted(left_token, parts)

                typ = Id.Expr_CastedDummy
                opaque = cast(Token, expr_dq_part)  # HACK for expr_to_ast
                done = p.addtoken(typ, opaque, gr.tokens[typ])
                assert not done  # can't end the expression

                continue

            if tok.id == Id.Left_DollarBrace:
                left_token = tok
                line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
                w_parser = parse_ctx.MakeWordParser(lex, line_reader)

                part, last_token = w_parser.ReadBracedVarSub(left_token)

                # It's casted word_part__BracedVarSub -> dummy -> expr__BracedVarSub!
                typ = Id.Expr_CastedDummy
                opaque = cast(Token, part)  # HACK for expr_to_ast
                done = p.addtoken(typ, opaque, gr.tokens[typ])
                assert not done  # can't end the expression

                continue

            # '' and r'' and c''
            if tok.id in (Id.Left_SingleQuote, Id.Left_RSingleQuote,
                          Id.Left_CSingleQuote):
                if tok.id == Id.Left_CSingleQuote:
                    sq_mode = lex_mode_e.SQ_C
                else:
                    sq_mode = lex_mode_e.SQ_Raw

                left_token = tok
                line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
                w_parser = parse_ctx.MakeWordParser(lex, line_reader)

                tokens = []  # type: List[Token]
                last_token = w_parser.ReadSingleQuoted(sq_mode, left_token,
                                                       tokens, True)

                sq_part = single_quoted(left_token, tokens)

                typ = Id.Expr_CastedDummy
                opaque = cast(Token, sq_part)  # HACK for expr_to_ast
                done = p.addtoken(typ, opaque, gr.tokens[typ])
                assert not done  # can't end the expression
                continue

    else:
        # We never broke out -- EOF is too soon (how can this happen???)
        raise parse.ParseError("incomplete input", tok.id, tok)
 def testMode_DBracket(self):
     lex = _InitLexer('-z foo')
     t = lex.Read(lex_mode_e.DBracket)
     self.assertTokensEqual(Tok(Id.BoolUnary_z, '-z'), t)
     self.assertEqual(Kind.BoolUnary, consts.GetKind(t.id))