Example #1
0
 def __init__(self, input=None):
     # TODO: possibly clear error listeners and maybe modify error handling 
     #       strategy
     self._prelexer = PreLexer(input)
     self._indentations = [ ]
     self._state_handlers = {
         'FILE_START': self._state_file_start,
         'DEFAULT': self._state_default,
         'BEGIN_BLOCK': self._state_begin_block,
         'FIND_INDENTATION': self._state_find_indentation,
         'CLOSE_ALL_SCOPES': self._state_close_all_scopes,
         'CLOSE_SCOPE': self._state_close_scope,
         'EOF': self._state_eof
         }
     self._State = Enum('_State', list(self._state_handlers.keys()))
     self._state = self._State.FILE_START
     # Some states want to know what token is next, but postpone emitting it
     self._lookaheadToken = None
Example #2
0
class AHLexer:
    def __init__(self, input=None):
        # TODO: possibly clear error listeners and maybe modify error handling 
        #       strategy
        self._prelexer = PreLexer(input)
        self._indentations = [ ]
        self._state_handlers = {
            'FILE_START': self._state_file_start,
            'DEFAULT': self._state_default,
            'BEGIN_BLOCK': self._state_begin_block,
            'FIND_INDENTATION': self._state_find_indentation,
            'CLOSE_ALL_SCOPES': self._state_close_all_scopes,
            'CLOSE_SCOPE': self._state_close_scope,
            'EOF': self._state_eof
            }
        self._State = Enum('_State', list(self._state_handlers.keys()))
        self._state = self._State.FILE_START
        # Some states want to know what token is next, but postpone emitting it
        self._lookaheadToken = None

    # TokenSource interface
    def getCharPositionInLine(self):
        return self._prelexer.getCharPositionInLine()

    def getInputStream(self):
        return self._prelexer.getInputStream()

    def getLine(self):
        return self._prelexer.getLine()

    def getSourceName(self):
        return self._prelexer.getSourceName()

    def getTokenFactory(self):
        return self._prelexer.getTokenFactory()

    def nextToken(self):
        if self._state.name not in self._state_handlers:
            raise RuntimeError("Invalid state")
        return self._state_handlers[self._state.name]()

    def setTokenFactory(self, factory):
        return self._prelexer.setTokenFactory(factory)

    # TODO: potentially change implementation and add other erro listener methods
    def addErrorListener(self, listener):
        self._prelexer.addErrorListener(listener)

    def removeErrorListeners(self):
        self._prelexer.removeErrorListeners()

    # Private methods
    def _state_file_start(self):
        nextToken = self._skip_newlines()
        if nextToken.type == Token.EOF:
            self._push_indentation(0)
            return self._gotoState(self._State.EOF)
        self._push_indentation(nextToken.column)
        self._lookaheadToken = nextToken
        return self._gotoState(self._State.DEFAULT)

    def _state_default(self):
        if self._lookaheadToken is not None:
            nextToken = self._lookaheadToken
            self._lookaheadToken = None
        else:
            nextToken = self._prelexer.nextToken()

        if nextToken.type == PreLexer.NEWLINE:
            return self._maybe_newline(nextToken)
        elif nextToken.type == PreLexer.Where:
            self._state = self._State.BEGIN_BLOCK
            return convertPreLexerTokenToAHToken(nextToken)
        elif nextToken.type == Token.EOF:
            self._state = self._State.CLOSE_ALL_SCOPES
            return self._make_token(AHToken.NEWLINE, '\n')
        else:
            return convertPreLexerTokenToAHToken(nextToken)

    # Precondition: self._lookaheadToken is None
    def _state_begin_block(self):
        self._state = self._State.FIND_INDENTATION
        return self._make_token(AHToken.BeginBlock, "<Begin Block>")

    # Precondition: self._lookaheadToken is None
    def _state_find_indentation(self):
        nextToken = self._skip_newlines()
        currentIndentation = self._top_indentation()
        if nextToken.type == Token.EOF:
            # Empty block
            self._push_indentation(currentIndentation + 1)
            return self._gotoState(self._State.CLOSE_ALL_SCOPES)
        if nextToken.column > currentIndentation:
            self._push_indentation(nextToken.column)
            self._lookaheadToken = nextToken
            return self._gotoState(self._State.DEFAULT)
        else:
            # Empty block
            self._push_indentation(currentIndentation + 1)
            self._lookaheadToken = nextToken
            return self._gotoState(self._State.CLOSE_SCOPE)

    # Preconditions: * self._lookaheadToken is not None
    #                * lookaheadToken indentation is lower than top of
    #                  indentation stack
    def _state_close_scope(self):
        token = self._lookaheadToken
        assert token is not None
        assert token.column < self._top_indentation()
        if self._is_lowest_indentation_block():
            raise LexerError(
                "Indentation error. Indentation is lower than lowest block",
                token.line, token.column, token.start)
        self._pop_indentation()
        currentIndentation = self._top_indentation()
        if token.column < currentIndentation:
            pass # Continue closing scopes
        elif token.column == currentIndentation:
            # Close block and process lookahead token as usual
            self._state = self._State.DEFAULT
        else: # token.column > currentIndentation
            raise LexerError("Indentation error. Can't continue expression "
                             "after closing block", token.line, token.column,
                             token.start)
        # TODO: creating tokens <Start Block> and <End Block> should be refactored
        return self._make_token(AHToken.EndBlock, "<End Block>")

    # Precondition: * self._lookaheadToken is None
    #               * last read token was EOF
    def _state_close_all_scopes(self):
        if self._is_lowest_indentation_block():
            return self._gotoState(self._State.EOF)
        self._pop_indentation()
        return self._make_token(AHToken.EndBlock, "<End Block>")

    # Precondition: * self._lookaheadToken is None
    #               * last read token was EOF
    #               * All indentation blocks are closed
    def _state_eof(self):
        return self._prelexer.emitEOF()

    def _maybe_newline(self, newlineToken):
        nextToken = self._skip_newlines()
        if nextToken.type == Token.EOF:
            self._state = self._State.CLOSE_ALL_SCOPES
            return convertPreLexerTokenToAHToken(newlineToken)

        currentIndentation = self._top_indentation()
        if nextToken.column == currentIndentation:
            # Alright, start new line
            self._lookaheadToken = nextToken
            self._state = self._State.DEFAULT
            return convertPreLexerTokenToAHToken(newlineToken)
        elif nextToken.column > currentIndentation:
            # Just continue previous line
            self._state = self._State.DEFAULT
            return convertPreLexerTokenToAHToken(nextToken)
        else:
            # It's new line and also closing scope
            self._lookaheadToken = nextToken
            self._state = self._State.CLOSE_SCOPE
            return convertPreLexerTokenToAHToken(newlineToken)

    def _is_lowest_indentation_block(self):
        return 1 == len(self._indentations)

    def _push_indentation(self, value):
        self._indentations.append(value)

    def _pop_indentation(self):
        return self._indentations.pop()

    def _top_indentation(self):
        return self._indentations[-1]

    def _skip_newlines(self):
        nextToken = self._prelexer.nextToken()
        while nextToken.type == PreLexer.NEWLINE:
            nextToken = self._prelexer.nextToken()
        return nextToken

    # TODO: this method name is not clear, it makes zero-width token
    def _make_token(self, type, text):
        pl = self._prelexer
        return pl._factory.create(
            pl._tokenFactorySourcePair,
            type,
            text,
            pl.DEFAULT_TOKEN_CHANNEL,
            pl._input.index,
            pl._input.index - 1,
            pl.line,
            pl.column
            )

    def _gotoState(self, state):
        self._state = state
        return self._state_handlers[state.name]()