Esempio n. 1
0
class TreeSerializer(IterObject):
    def __init__(self, tree):
        self.max_look_behind = 10
        super(TreeSerializer, self).__init__(tree)

    def resetIter(self):
        self.outData = LimLQueue(
            self.max_look_behind)  # limited record of yielded tokens
        self.tokenStream = LQueue(self.inData.toListG())
        self.tokenStreamIterable = iter(self.tokenStream)
        super(TreeSerializer, self).resetIter()

    ##
    # Peek n tokens behind
    def lookbehind(self, n=1):
        if n > self.max_look_behind:
            raise SyntaxException(
                "TokenStream: can only look %d elements behind" %
                self.max_look_behind)
        return self.outData[n]

    ##
    # Peek n tokens ahead
    #
    # peek needs to circumvent __iter__ and access the LQueue directly
    def peek(self, n=1):
        toks = []
        cnt = 0
        # get the desired token
        while cnt < n:
            t = self.tokenStreamIterable.next()
            toks.append(t)
            if t['type'] == "eof":
                break
            cnt += 1
        # put all retrieved tokens back
        for t in toks[::-1]:
            self.tokenStream.putBack(t)
        return toks[-1]

    def __iter__(self):
        for tok in self.tokenStreamIterable:
            self.outData.appendleft(tok)
            yield tok
Esempio n. 2
0
class TreeSerializer(IterObject):

    def __init__(self, tree):
        self.max_look_behind = 10
        super(TreeSerializer, self).__init__(tree)

    def resetIter(self):
        self.outData = LimLQueue(self.max_look_behind)  # limited record of yielded tokens
        self.tokenStream = LQueue(self.inData.toListG())
        self.tokenStreamIterable = iter(self.tokenStream)
        super(TreeSerializer, self).resetIter()

    ##
    # Peek n tokens behind
    def lookbehind(self, n=1):
        if n>self.max_look_behind:
            raise SyntaxException("TokenStream: can only look %d elements behind" % self.max_look_behind)
        return self.outData[n]

    ##
    # Peek n tokens ahead
    # 
    # peek needs to circumvent __iter__ and access the LQueue directly
    def peek(self, n=1):
        toks = []
        cnt  = 0
        # get the desired token
        while cnt < n:
            t = self.tokenStreamIterable.next()
            toks.append(t)
            if t['type'] == "eof":
                break
            cnt += 1
        # put all retrieved tokens back
        for t in toks[::-1]:
            self.tokenStream.putBack(t)
        return toks[-1]


    def __iter__(self):
        for tok in self.tokenStreamIterable:
            self.outData.appendleft(tok)
            yield tok
Esempio n. 3
0
 def resetIter(self):
     self.outData = LimLQueue(self.max_look_behind)  # limited record of yielded tokens
     self.tokenStream = LQueue(self.inData.toListG())
     self.tokenStreamIterable = iter(self.tokenStream)
     super(TreeSerializer, self).resetIter()
Esempio n. 4
0
 def resetIter(self):
     self.outData = LimLQueue(
         self.max_look_behind)  # limited record of yielded tokens
     self.tokenStream = LQueue(self.inData.toListG())
     self.tokenStreamIterable = iter(self.tokenStream)
     super(TreeSerializer, self).resetIter()
Esempio n. 5
0
 def resetIter(self):
     self.tokenStream= LQueue(iter(self.inData))
     self.tok_stream = iter(self.tokenStream)
     super(TokenStream, self).resetIter()
Esempio n. 6
0
class TokenStream(IterObject):

    def __init__(self, inData):
        self.line       = 0
        self.spos       = 0  # current char pos
        self.sol        = 0  # last start-of-line char pos
        super(TokenStream, self).__init__(inData)

    def resetIter(self):
        self.tokenStream= LQueue(iter(self.inData))
        self.tok_stream = iter(self.tokenStream)
        super(TokenStream, self).resetIter()

    def peek(self, n=1):
        "peek n tokens ahead"
        toks = []
        cnt  = 0

        # get the desired token
        while cnt < n:
            t = self.tok_stream.next()
            toks.append(t)
            if t['type'] == "eof":
                break
            while self._nonGrammaticalToken(t):
                t = self.tok_stream.next()
                toks.append(t)
            cnt += 1

        # put all retrieved tokens back
        for t in toks[::-1]:
            self.tokenStream.putBack(t)

        return self._symbolFromToken(Token(toks[-1]))

    def _nonGrammaticalToken(self, tok):
        return tok['type'] in ['white', 'comment', 'eol']


    def _symbolFromToken(self, tok):
        s = None

        # TODO: Stuff for another refac:
        # The following huge dispatch could be avoided if the tokenizer already
        # provided the tokens with the right attributes (esp. name, detail).

        # tok isinstanceof Token()
        if (tok.name == "white" # TODO: 'white'?!
            or tok.name == 'comment'):
            pass
        elif tok.name == "eof":
            symbol = symbol_table.get("eof")
            s = symbol()
            s.value = ""
        elif tok.name == "eol":
            self.line += 1                  # increase line count
            self.sol  = tok.spos + tok.len  # char pos of next line start
            self.spos = tok.spos
            pass # don't yield this (yet)
            #s = symbol_table.get("eol")()
        # 'operation' nodes
        elif tok.detail in (
            MULTI_TOKEN_OPERATORS
            + MULTI_PROTECTED_OPERATORS
            + SINGLE_RIGHT_OPERATORS
            + SINGLE_LEFT_OPERATORS
            ):
            s = symbol_table[tok.value]()
            s.type = "operation"
            s.set('operator', tok.detail)
        # 'assignment' nodes
        elif tok.detail in ASSIGN_OPERATORS:
            s = symbol_table[tok.value]()
            s.type = "assignment"
            s.set('operator', tok.detail)
        # 'constant' nodes
        elif tok.name in ('number', 'string', 'regexp'):
            symbol = symbol_table["constant"]
            s = symbol()
            s.set('value', tok.value)
            if tok.name == 'number':
                s.set('constantType', 'number')
                s.set('detail', tok.detail)
            elif tok.name == 'string':
                s.set('constantType', 'string')
                s.set('detail', tok.detail)
            elif tok.name == 'regexp':
                s.set('constantType', 'regexp')
        elif tok.name in ('reserved',) and tok.detail in ("TRUE", "FALSE"):
            symbol = symbol_table["constant"]
            s = symbol()
            s.set('value', tok.value)
            s.set('constantType', 'boolean')
        elif tok.name in ('name',):
            s = symbol_table["identifier"]()
            s.set('value', tok.value)
        else:
            # TODO: token, reserved, builtin
            # name or operator
            symbol = symbol_table.get(tok.value)
            if symbol:
                s = symbol()
                s.value = tok.value
            else:
                # don't make assumptions about correct tokens here, as we might be in the
                # middle of a regexp
                #raise SyntaxError("Unknown operator %r (pos %d)" % (tok.value, tok.spos))
                symbol = symbol_table['(unknown)']
                s = symbol()
                s.value = tok.value

        if s:
            s.value = tok.value
            s.set('column', tok.column)
            s.set('line', tok.line)

        # SPY-POINT
        #print tok
        return s

    ##
    # yields syntax nodes as "tokens" (kind of a misnomer)
    def __iter__(self):
        for t in self.tok_stream:
            tok = Token(t)
            s = self._symbolFromToken(tok)
            if not s:
                continue
            self.spos = tok.spos
            s.spos    = tok.spos
            yield s
Esempio n. 7
0
 def resetIter(self):
     self.tokenStream= LQueue(iter(self.inData))
     self.tok_stream = iter(self.tokenStream)
     super(TokenStream, self).resetIter()
Esempio n. 8
0
class TokenStream(IterObject):

    def __init__(self, inData):
        self.line       = 0
        self.spos       = 0  # current char pos
        self.sol        = 0  # last start-of-line char pos
        super(TokenStream, self).__init__(inData)

    def resetIter(self):
        self.tokenStream= LQueue(iter(self.inData))
        self.tok_stream = iter(self.tokenStream)
        super(TokenStream, self).resetIter()

    def peek(self, n=1):
        "peek n tokens ahead"
        toks = []
        cnt  = 0

        # get the desired token
        while cnt < n:
            t = self.tok_stream.next()
            toks.append(t)
            if t['type'] == "eof":
                break
            while self._nonGrammaticalToken(t):
                t = self.tok_stream.next()
                toks.append(t)
            cnt += 1

        # put all retrieved tokens back
        for t in toks[::-1]:
            self.tokenStream.putBack(t)

        return self._symbolFromToken(Token(toks[-1]))

    def _nonGrammaticalToken(self, tok):
        return tok['type'] in ['white', 'comment', 'eol']


    def _symbolFromToken(self, tok):
        s = None

        # TODO: Stuff for another refac:
        # The following huge dispatch could be avoided if the tokenizer already
        # provided the tokens with the right attributes (esp. name, detail).

        # tok isinstanceof Token()
        if (tok.name == "white" # TODO: 'white'?!
            or tok.name == 'comment'):
            pass
        elif tok.name == "eof":
            symbol = symbol_table.get("eof")
            s = symbol()
            s.value = ""
        elif tok.name == "eol":
            self.line += 1                  # increase line count
            self.sol  = tok.spos + tok.len  # char pos of next line start
            self.spos = tok.spos
            pass # don't yield this (yet)
            #s = symbol_table.get("eol")()
        # 'operation' nodes
        elif tok.detail in (
            MULTI_TOKEN_OPERATORS
            + MULTI_PROTECTED_OPERATORS
            + SINGLE_RIGHT_OPERATORS
            + SINGLE_LEFT_OPERATORS
            ):
            s = symbol_table[tok.value]()
            s.type = "operation"
            s.set('operator', tok.detail)
        # 'assignment' nodes
        elif tok.detail in ASSIGN_OPERATORS:
            s = symbol_table[tok.value]()
            s.type = "assignment"
            s.set('operator', tok.detail)
        # 'constant' nodes
        elif tok.name in ('number', 'string', 'regexp'):
            symbol = symbol_table["constant"]
            s = symbol()
            s.set('value', tok.value)
            if tok.name == 'number':
                s.set('constantType', 'number')
                s.set('detail', tok.detail)
            elif tok.name == 'string':
                s.set('constantType', 'string')
                s.set('detail', tok.detail)
            elif tok.name == 'regexp':
                s.set('constantType', 'regexp')
        elif tok.name in ('reserved',) and tok.detail in ("TRUE", "FALSE"):
            symbol = symbol_table["constant"]
            s = symbol()
            s.set('value', tok.value)
            s.set('constantType', 'boolean')
        elif tok.name in ('name',):
            s = symbol_table["identifier"]()
            s.set('value', tok.value)
        else:
            # TODO: token, reserved, builtin
            # name or operator
            symbol = symbol_table.get(tok.value)
            if symbol:
                s = symbol()
                s.value = tok.value
            else:
                # don't make assumptions about correct tokens here, as we might be in the
                # middle of a regexp
                #raise SyntaxError("Unknown operator %r (pos %d)" % (tok.value, tok.spos))
                symbol = symbol_table['(unknown)']
                s = symbol()
                s.value = tok.value

        if s:
            s.value = tok.value
            s.set('column', tok.column)
            s.set('line', tok.line)

        # SPY-POINT
        #print tok
        return s

    ##
    # yields syntax nodes as "tokens" (kind of a misnomer)
    def __iter__(self):
        for t in self.tok_stream:
            tok = Token(t)
            s = self._symbolFromToken(tok)
            if not s:
                continue
            self.spos = tok.spos
            s.spos    = tok.spos
            yield s