コード例 #1
0
ファイル: parser.py プロジェクト: EdwardBetts/rply
    def parse(self, tokenizer, state=None):
        from rply.token import Token

        lookahead = None
        lookaheadstack = []

        statestack = [0]
        symstack = [Token("$end", "$end")]

        current_state = 0
        while True:
            if self.lr_table.default_reductions[current_state]:
                t = self.lr_table.default_reductions[current_state]
                current_state = self._reduce_production(
                    t, symstack, statestack, state
                )
                continue

            if lookahead is None:
                if lookaheadstack:
                    lookahead = lookaheadstack.pop()
                else:
                    try:
                        lookahead = next(tokenizer)
                    except StopIteration:
                        lookahead = None

                if lookahead is None:
                    lookahead = Token("$end", "$end")

            ltype = lookahead.gettokentype()
            if ltype in self.lr_table.lr_action[current_state]:
                t = self.lr_table.lr_action[current_state][ltype]
                if t > 0:
                    statestack.append(t)
                    current_state = t
                    symstack.append(lookahead)
                    lookahead = None
                    continue
                elif t < 0:
                    current_state = self._reduce_production(
                        t, symstack, statestack, state
                    )
                    continue
                else:
                    n = symstack[-1]
                    return n
            else:
                # TODO: actual error handling here
                if self.error_handler is not None:
                    if state is None:
                        self.error_handler(lookahead)
                    else:
                        self.error_handler(state, lookahead)
                    raise AssertionError("For now, error_handler must raise.")
                else:
                    raise ParsingError(None, lookahead.getsourcepos())
コード例 #2
0
    def parse(self, tokenizer, state=None):
        from rply.token import Token

        lookahead = None
        lookaheadstack = []
        statestack = [0]
        symstack = [Token("$end", "$end")]
        current_state = 0
        while True:
            if self.lr_table.default_reductions[current_state]:
                t = self.lr_table.default_reductions[current_state]
                current_state = self._reduce_production(
                    t, symstack, statestack, state)
                continue
            else:
                if lookahead is None:
                    if lookaheadstack:
                        lookahead = lookaheadstack.pop()
                else:
                    try:
                        lookahead = next(tokenizer)
                    except StopIteration:
                        lookahead = None

                    if lookahead is None:
                        lookahead = Token("$end", "$end")
            ltype = lookahead.gettokentype()
            if ltype in self.lr_table.lr_action[current_state]:
                t = self.lr_table.lr_action[current_state][ltype]
                if t > 0:
                    statestack.append(t)
                    current_state = t
                    symstack.append(lookahead)
                    lookahead = None
                    continue
                else:
                    if t < 0:
                        current_state = self._reduce_production(
                            t, symstack, statestack, state)
                        continue
                    else:
                        n = symstack[(-1)]
                    return n
            elif self.error_handler is not None:
                if state is None:
                    self.error_handler(lookahead)
                else:
                    self.error_handler(state, lookahead)
                lookahead = None
                continue
            else:
                raise ParsingError(None, lookahead.getsourcepos())
コード例 #3
0
ファイル: lexer.py プロジェクト: prologic/cream
    def __init__(self, stream):
        self.stream = []
        self.idx = 0

        TAB_WIDTH = 4
        indent = 0
        current_indent = 0
        indent_token = None
        indent_start_pos = 0

        while True:
            try:
                token = stream.next()

            except StopIteration:
                if current_indent > 0:
                    print(current_indent)
                    dedents = [Token('DEDENT', '')] * (current_indent / TAB_WIDTH)
                    self.stream.extend(dedents)
                break

            token_type = token.gettokentype()
            if token_type == 'WHITESPACE':
                indent_token = token
                # WHITESPACE is tab only now.
                indent = len(token.getstr()) * TAB_WIDTH
            elif token_type == 'NEWLINE':
                # print("%d <=> %d" % (current_indent, indent))
                if current_indent < indent:
                    indent_token.name = 'INDENT'
                    current_indent = indent
                elif current_indent > indent:
                    dedent_num = (current_indent - indent) / TAB_WIDTH
                    for i in range(0, dedent_num):
                        if not indent_token:
                            indent_token = Token('', '', token.getsourcepos())
                            self.stream.insert(indent_start_pos, indent_token)
                        indent_token.name = 'DEDENT'
                        indent_token = None
                    current_indent = indent
                else:
                    if indent_token:
                        self.stream.remove(indent_token)
                indent = 0
                indent_token = None
                indent_start_pos = len(self.stream) + 1

            self.stream.append(token)
コード例 #4
0
ファイル: lexer.py プロジェクト: netyum/hippyvm
    def token(self):
        """ Return the next token (a Token object) found in the
            input buffer. None is returned if the end of the
            buffer was reached.
            In case of a lexing error (the current chunk of the
            buffer matches no rule), a LexerError is raised with
            the position of the error.
        """
        if self.pos >= len(self.buf):
            if len(self.context_stack) != 1:
                raise LexerError("contexts are not closed", -1)
            return None
        else:
            if self.pos >= self.heredoc_finish and self.heredoc_finish != -1:
                start = self.pos
                end = self.pos + self.heredoc_lgt
                assert start >= 0
                assert end >= 0
                tok = Token('T_END_HEREDOC', self.buf[start:end], self.lineno)
                self.pos = self.heredoc_finish + self.heredoc_lgt
                self.heredoc_finish = -1
                self.heredoc_lgt = 0
                self.context_stack.pop()
                return tok
            tmp_buf = self._gettmpbuf(self.pos)
            ctx = self.context_stack[-1]
            rules = self.rules[ctx]
            for token_regex, token_type in rules:
                pos = self.pos
                assert pos >= 0
                m = self.match(token_regex, tmp_buf, pos)
                if m:
                    start, end = self._getstartend(m)
                    value = self.buf[start:end]
                    if token_type == 'H_NEW_LINE':
                        self.lineno += 1
                    elif token_type == 'T_COMMENT':
                        self.lineno += value.count('\n')
                    elif token_type == 'T_CONSTANT_ENCAPSED_STRING':
                        self.lineno += value.count("\n")
                    # tokens changing the context
                    tok = Token(token_type, value, self.lineno)
                    tok = self.maybe_change_context(ctx, tok, token_type, end)
                    self.last_token = token_type
                    return tok

            # if we're here, no rule matched
            raise LexerError("unknown token", self.lineno)
コード例 #5
0
ファイル: test_interpreter.py プロジェクト: njucjc/cycy
 def test_parse_error(self):
     errors = []
     cycy = interpreter.CyCy(handle_error=errors.append)
     cycy.interpret(["asdf"])
     self.assertEqual(
         errors,
         [
             ParseError(token=Token("IDENTIFIER", "asdf"), source="asdf"),
         ],
     )
コード例 #6
0
 def next(self):
     if self.idx >= len(self.s):
         raise StopIteration
     for rule in self.lexer.ignore_rules:
         match = rule.matches(self.s, self.idx)
         if match:
             self.idx = match.end
             return self.next()
     for rule in self.lexer.rules:
         match = rule.matches(self.s, self.idx)
         if match:
             source_pos = self.__get_position__(match.start)
             token = Token(rule.name, self.s[match.start:match.end],
                           source_pos)
             self.idx = match.end
             return token
     else:
         raise LexingError(None, SourcePosition(self.idx, -1, -1))
コード例 #7
0
ファイル: lexer.py プロジェクト: biwin/Python-projects
 def next(self):
     if self.idx >= len(self.s):
         return None
     for rule in self.lexer.ignore_rules:
         match = rule.matches(self.s, self.idx)
         if match:
             self.idx = match.end
             return self.next()
     for rule in self.lexer.rules:
         match = rule.matches(self.s, self.idx)
         if match:
             # TODO: lineno and colno
             source_pos = SourcePosition(match.start, -1, -1)
             token = Token(rule.name, self.s[match.start:match.end],
                           source_pos)
             self.idx = match.end
             return token
     else:
         raise LexingError(None, SourcePosition(self.idx, -1, -1))
コード例 #8
0
ファイル: lexer.py プロジェクト: netyum/hippyvm
 def _scan_double_quote(self, tok):
     p = 1
     v = tok.value
     if v[0] == "b":
         p += 1
     backslash = False
     while p < len(v):
         c = v[p]
         if not backslash:
             if c == '"':
                 # not encountered anything funny, this is just T_STRING
                 return tok
             if (((c == '$' and p < len(v) - 1 and v[p + 1].isalpha())
                  or (c == "{" and p < len(v) - 1 and v[p + 1] == "$")
                  or (c == "$" and p < len(v) - 1 and v[p + 1] == "{"))):
                 p += 1
                 self.context_stack.append(CONTEXT_DOUBLEQUOTE)
                 return Token('"', '"', self.lineno)
             elif c == '\\':
                 backslash = True
         else:
             backslash = False
         p += 1
     assert False
コード例 #9
0
ファイル: parser.py プロジェクト: solanolabs/rply
    def parse(self, tokenizer, state=None):
        from rply.token import Token

        lookahead = None
        lookaheadstack = []

        statestack = [0]
        symstack = [Token("$end", None)]

        current_state = 0
        while True:
            if lookahead is None:
                if lookaheadstack:
                    lookahead = lookaheadstack.pop()
                else:
                    lookahead = tokenizer.next()

                if lookahead is None:
                    lookahead = Token("$end", None)

            ltype = lookahead.gettokentype()
            if ltype in self.lr_table.lr_action[current_state]:
                t = self.lr_table.lr_action[current_state][ltype]
                if t > 0:
                    statestack.append(t)
                    current_state = t
                    symstack.append(lookahead)
                    lookahead = None
                    continue
                elif t < 0:
                    # reduce a symbol on the stack and emit a production
                    p = self.lr_table.grammar.productions[-t]
                    pname = p.name
                    plen = p.getlength()
                    start = len(symstack) + (-plen - 1)
                    assert start >= 0
                    targ = symstack[start:]
                    del targ[0]
                    start = len(symstack) + (-plen)
                    assert start >= 0
                    del symstack[start:]
                    del statestack[start:]
                    if state is None:
                        value = p.func(targ)
                    else:
                        value = p.func(state, targ)
                    symstack.append(value)
                    current_state = self.lr_table.lr_goto[statestack[-1]][pname]
                    statestack.append(current_state)
                    continue
                else:
                    n = symstack[-1]
                    return n
            else:
                # TODO: actual error handling here
                if self.error_handler is not None:
                    if state is None:
                        self.error_handler(lookahead)
                    else:
                        self.error_handler(state, lookahead)
                    raise AssertionError("For now, error_handler must raise.")
                else:
                    raise ParsingError(lookahead.getsourcepos())
コード例 #10
0
ファイル: test_tokens.py プロジェクト: rlamy/rply
 def test_source_pos(self):
     t = Token("VALUE", "3", SourcePosition(5, 2, 1))
     assert t.getsourcepos().lineno == 2
コード例 #11
0
ファイル: ast.py プロジェクト: zjl233/moe
 def __init__(self, left, opt: Token, right):
     self.left = left
     self.opt = opt.getstr()
     self.right = right
コード例 #12
0
ファイル: ast.py プロジェクト: zjl233/moe
 def __init__(self, value: Token):
     self.value = int(value.getstr())
コード例 #13
0
 def test_eq(self):
     t = Token("VALUE", "3", SourcePosition(-1, -1, -1))
     assert not (t == 3)
     assert t != 3
コード例 #14
0
 def test_source_pos(self):
     t = Token("VALUE", "3", SourcePosition(5, 2, 1))
     assert t.getsourcepos().lineno == 2
コード例 #15
0
 def test_repr(self):
     t = Token("VALUE", "3")
     assert repr(t) == "Token('VALUE', '3')"
コード例 #16
0
ファイル: lexer.py プロジェクト: netyum/hippyvm
    def maybe_change_context(self, ctx, tok, token_type, endpos):
        # print self.context_stack, tok.name, tok.value
        if ctx == CONTEXT_OBJECT_ACCESS:
            self.context_stack.pop()
        elif (ctx == CONTEXT_NORMAL
              and token_type == "T_CONSTANT_ENCAPSED_STRING"
              and (tok.value[0] == '"' or tok.value[:2] == 'b"')):
            newtok = self._scan_double_quote(tok)
            if newtok.name == '"':
                # we have to rewind a little
                ofs = 1
                if tok.value[0] == 'b':
                    ofs += 1
                self.pos = endpos - len(tok.value) + ofs
            else:
                self.pos = endpos
            return newtok

        elif ctx == CONTEXT_BACKTICK and tok.value[0] == '`':
            self.context_stack.pop()
        elif ctx == CONTEXT_NORMAL and token_type == '`':
            self.context_stack.append(CONTEXT_BACKTICK)
        elif ctx == CONTEXT_BACKTICK and token_type == '"':
            self.context_stack.append(CONTEXT_DOUBLEQUOTE)
        elif ctx == CONTEXT_BACKTICK and token_type == '`':
            self.context_stack.pop()
        elif ctx == CONTEXT_NORMAL and token_type == "T_START_HEREDOC":
            lgt = 3
            if tok.value.startswith("b"):
                lgt += 1
            start = lgt
            end = len(tok.value) - 1
            while tok.value[start] in (' ', '\t'):
                start += 1
            while tok.value[end] in (' ', '\t'):
                end -= 1
            assert end >= 0
            marker = tok.value[start:end]
            if marker.startswith('"'):
                if not marker.endswith('"'):
                    raise LexerError("wrong marker", self.lineno)
                end = len(marker) - 1
                assert end >= 0
                marker = marker[1:end]
            heredoc_marker = "\n" + marker + ";"
            start = self.pos + len(tok.value) - 1
            assert start >= 0
            self.heredoc_finish = self.buf.find(heredoc_marker, start)
            self.heredoc_lgt = len(heredoc_marker) - 1
            if self.heredoc_finish == -1:
                # XXX case where heredoc does not end with [;]
                # its then heredoc is an argument and end like ... HEND );
                heredoc_marker = "\n" + marker
                self.heredoc_finish = self.buf.find(heredoc_marker, start)
                if self.heredoc_finish == -1:
                    raise LexerError("unfinished heredoc", self.lineno)
                self.heredoc_lgt = len(heredoc_marker)

            self.context_stack.append(CONTEXT_HEREDOC)
        elif ctx == CONTEXT_DOUBLEQUOTE and token_type == '"':
            self.context_stack.pop()
        elif ctx == CONTEXT_BACKTICK and token_type == '"':
            self.context_stack.pop()
        elif ((ctx == CONTEXT_DOUBLEQUOTE or ctx == CONTEXT_HEREDOC
               or ctx == CONTEXT_BACKTICK)
              and token_type == "T_DOLLAR_OPEN_CURLY_BRACES"):
            self.pos = endpos - 1
            self.context_stack.append(CONTEXT_CURLY_BRACES)
            return tok
        elif (ctx == CONTEXT_CURLY_BRACES and token_type == "{"
              and self.last_token == "T_DOLLAR_OPEN_CURLY_BRACES"):
            # instead, we recognize it as a variable
            tmp_buf = self._gettmpbuf(self.pos)
            m = self.match(self.var_re, tmp_buf, self.pos)
            assert m is not None
            start, end = self._getstartend(m)
            tok = Token("T_VARIABLE", self.buf[start:end], tok.lineno)
            self.pos = end
            return tok
        elif ((ctx == CONTEXT_DOUBLEQUOTE or ctx == CONTEXT_HEREDOC)
              and token_type == "T_VARIABLE"):
            # only if the next one is [
            if self.buf[endpos] == "[":
                self.context_stack.append(CONTEXT_BRACKETS)
        elif ((ctx == CONTEXT_DOUBLEQUOTE or ctx == CONTEXT_HEREDOC)
              and token_type == "T_OBJECT_OPERATOR"):
            if (self.last_token != "T_VARIABLE"
                    or not self.buf[self.pos + 2].isalpha()):
                tok = Token("T_ENCAPSED_AND_WHITESPACE", tok.value, tok.lineno)
            else:
                self.context_stack.append(CONTEXT_OBJECT_ACCESS)
        elif token_type == "T_OBJECT_OPERATOR":
            self.context_stack.append(CONTEXT_OBJECT_ACCESS)
        elif ctx == CONTEXT_BRACKETS and token_type == "]":
            self.context_stack.pop()
        elif ctx == CONTEXT_CURLY_BRACES and token_type == "}":
            # XXX this is incorrect but we don't care at the moment
            #     if someone inserts } inside ] we have to do something else
            #     like scan grammar until we hit it
            self.context_stack.pop()
        self.pos = endpos
        return tok