def stateWhitespace1(cls, parserState: ParserState): token = parserState.Token errorMessage = "Expected entityInstantiation name (identifier)." if isinstance(token, CharacterToken): if (token == "\n"): parserState.NewToken = LinebreakToken(token) if (not isinstance(parserState.LastBlock, MultiLineCommentBlock)): parserState.NewBlock = EntityInstantiationBlock( parserState.LastBlock, parserState.TokenMarker, endToken=parserState.NewToken.PreviousToken, multiPart=True) _ = LinebreakBlock(parserState.NewBlock, parserState.NewToken) else: parserState.NewBlock = LinebreakBlock( parserState.LastBlock, parserState.NewToken) parserState.TokenMarker = None parserState.PushState = LinebreakBlock.stateLinebreak return elif (token == "-"): parserState.NewBlock = EntityInstantiationBlock( parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.PushState = SingleLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif (token == "/"): parserState.NewBlock = EntityInstantiationBlock( parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.PushState = MultiLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif isinstance(token, StringToken): parserState.NewToken = IdentifierToken(token) parserState.NextState = cls.stateEntityInstantiationName return elif (isinstance(token, SpaceToken) and isinstance(parserState.LastBlock, MultiLineCommentBlock)): parserState.NewToken = BoundaryToken(token) parserState.NewBlock = WhitespaceBlock(parserState.LastBlock, parserState.NewToken) parserState.TokenMarker = None return raise TokenParserException(errorMessage, token)
def stateWhitespace4(cls, parserState: ParserState): token = parserState.Token errorMessage = "Expected ':=' after type mark." if isinstance(token, CharacterToken): if (token == ":"): parserState.NewToken = BoundaryToken(token) parserState.NextState = cls.statePossibleVariableAssignment return elif (token == "\n"): parserState.NewToken = LinebreakToken(token) if (not isinstance(parserState.LastBlock, MultiLineCommentBlock)): parserState.NewBlock = AttributeDeclarationBlock( parserState.LastBlock, parserState.TokenMarker, endToken=parserState.NewToken.PreviousToken, multiPart=True) _ = LinebreakBlock(parserState.NewBlock, parserState.NewToken) else: parserState.NewBlock = LinebreakBlock( parserState.LastBlock, parserState.NewToken) parserState.TokenMarker = None parserState.PushState = LinebreakBlock.stateLinebreak return elif (token == "-"): parserState.NewBlock = AttributeDeclarationBlock( parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.PushState = SingleLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif (token == "/"): parserState.NewBlock = AttributeDeclarationBlock( parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.PushState = MultiLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif (isinstance(token, SpaceToken) and isinstance(parserState.LastBlock, MultiLineCommentBlock)): parserState.NewToken = BoundaryToken(token) parserState.NewBlock = WhitespaceBlock(parserState.LastBlock, parserState.NewToken) parserState.TokenMarker = None return raise TokenParserException(errorMessage, token)
def stateEntityInstantiationName(cls, parserState: ParserState): token = parserState.Token errorMessage = "Expected ';' after entityInstantiation name." if isinstance(token, CharacterToken): if (token == ";"): parserState.NewToken = EndToken(token) parserState.NewBlock = EntityInstantiationBlock( parserState.LastBlock, parserState.TokenMarker, endToken=parserState.NewToken) parserState.Pop() return elif (token == "\n"): parserState.NewBlock = EntityInstantiationBlock( parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.NewToken = LinebreakToken(token) _ = LinebreakBlock(parserState.NewBlock, parserState.NewToken) parserState.TokenMarker = None parserState.NextState = cls.stateWhitespace2 parserState.PushState = LinebreakBlock.stateLinebreak return elif (token == "-"): parserState.NewBlock = EntityInstantiationBlock( parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.NextState = cls.stateWhitespace2 parserState.PushState = SingleLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif (token == "/"): parserState.NewBlock = EntityInstantiationBlock( parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.NextState = cls.stateWhitespace2 parserState.PushState = MultiLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif isinstance(token, SpaceToken): parserState.NextState = cls.stateWhitespace2 return raise TokenParserException(errorMessage, token)
def stateGenericKeyword(cls, parserState: ParserState): token = parserState.Token errorMessage = "Expected whitespace or '(' after keyword GENERIC." if isinstance(token, CharacterToken): if (token == "("): parserState.NewToken = BoundaryToken(token) parserState.NewBlock = OpenBlock(parserState.LastBlock, parserState.TokenMarker, endToken=parserState.NewToken) parserState.NextState = CloseBlock.stateClosingParenthesis parserState.PushState = OpenBlock.stateOpeningParenthesis parserState.Counter = 1 return elif (token == "\n"): parserState.NewBlock = OpenBlock(parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.NewToken = LinebreakToken(token) _ = LinebreakBlock(parserState.NewBlock, parserState.NewToken) parserState.TokenMarker = None parserState.NextState = cls.stateWhitespace1 parserState.PushState = LinebreakBlock.stateLinebreak return elif (token == "-"): parserState.NewBlock = OpenBlock(parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.NextState = cls.stateWhitespace1 parserState.PushState = SingleLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif (token == "/"): parserState.NewBlock = OpenBlock(parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.NextState = cls.stateWhitespace1 parserState.PushState = MultiLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif isinstance(token, SpaceToken): parserState.NextState = cls.stateWhitespace1 return raise BlockParserException(errorMessage, token)
def stateColon1(cls, parserState: ParserState): token = parserState.Token errorMessage = "Expected type mark or whitespace after ':'." if isinstance(token, CharacterToken): if (token == "\n"): parserState.NewBlock = AttributeDeclarationBlock( parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.NewToken = LinebreakToken(token) _ = LinebreakBlock(parserState.NewBlock, parserState.NewToken) parserState.TokenMarker = None parserState.NextState = cls.stateWhitespace3 parserState.PushState = LinebreakBlock.stateLinebreak return elif (token == "-"): parserState.NewBlock = AttributeDeclarationBlock( parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.NextState = cls.stateWhitespace3 parserState.PushState = SingleLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif (token == "/"): parserState.NewBlock = AttributeDeclarationBlock( parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.NextState = cls.stateWhitespace3 parserState.PushState = MultiLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif isinstance(token, SpaceToken): parserState.NewToken = BoundaryToken(token) parserState.NextState = cls.stateWhitespace3 return elif isinstance(token, WordToken): parserState.NewToken = IdentifierToken(token) parserState.NextState = cls.stateTypeMarkName return raise TokenParserException(errorMessage, token)
def stateDeclarativeRegion(cls, parserState: ParserState): errorMessage = "Expected one of these keywords: generic, port, begin, end." token = parserState.Token if isinstance(parserState.Token, CharacterToken): if (token == "\n"): parserState.NewToken = LinebreakToken(token) parserState.NewBlock = LinebreakBlock(parserState.LastBlock, parserState.NewToken) parserState.TokenMarker = parserState.NewToken return elif (token == "-"): parserState.PushState = SingleLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif (token == "/"): parserState.PushState = MultiLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif isinstance(token, SpaceToken): parserState.NewToken = IndentationToken(token) parserState.NewBlock = IndentationBlock(parserState.LastBlock, parserState.NewToken) return elif isinstance(token, StringToken): keyword = token.Value.lower() if (keyword == "generic"): newToken = GenericKeyword(token) parserState.PushState = GenericList.OpenBlock.stateGenericKeyword elif (keyword == "port"): newToken = PortKeyword(token) parserState.PushState = PortList.OpenBlock.statePortKeyword elif (keyword == "end"): newToken = EndKeyword(token) parserState.NextState = EndGenerateBlock.stateEndKeyword elif (keyword == "begin"): parserState.NewToken = BeginKeyword(token) parserState.NewBlock = ElseGenerateBeginBlock( parserState.LastBlock, parserState.NewToken) parserState.NextState = ElseGenerateBeginBlock.stateBeginKeyword return else: raise TokenParserException(errorMessage, token) parserState.NewToken = newToken parserState.TokenMarker = newToken return raise TokenParserException(errorMessage, token)
def stateWhitespace1(cls, parserState: ParserState): token = parserState.Token errorMessage = "Expected ';'." if isinstance(token, CharacterToken): if (token == ";"): parserState.NewToken = EndToken(token) parserState.NewBlock = CloseBlock( parserState.LastBlock, parserState.TokenMarker, endToken=parserState.NewToken) parserState.Pop() return elif (token == "\n"): # TODO: review this linebreak case parserState.NewToken = LinebreakToken(token) parserState.PushState = LinebreakBlock.stateLinebreak parserState.TokenMarker = parserState.NewToken return elif (token == "-"): parserState.NewBlock = CloseBlock(parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.PushState = SingleLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif (token == "/"): parserState.NewBlock = CloseBlock(parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.PushState = MultiLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif (isinstance(token, SpaceToken) and isinstance(parserState.LastBlock, MultiLineCommentBlock)): parserState.NewToken = BoundaryToken(token) parserState.NewBlock = WhitespaceBlock(parserState.LastBlock, parserState.NewToken) parserState.TokenMarker = None return raise BlockParserException(errorMessage, token)
def stateGenerateKeyword(cls, parserState: ParserState): token = parserState.Token errorMessage = "Expected whitespace after keyword GENERATE." if isinstance(token, CharacterToken): if (token == "\n"): parserState.NewBlock = ElsIfConditionBlock( parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.NewToken = LinebreakToken(token) _ = LinebreakBlock(parserState.NewBlock, parserState.NewToken) parserState.TokenMarker = None parserState.NextState = cls.stateWhitespace1 parserState.PushState = LinebreakBlock.stateLinebreak return elif (token == "-"): parserState.NewBlock = ElsIfConditionBlock( parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.NextState = cls.stateWhitespace1 parserState.PushState = SingleLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif (token == "/"): parserState.NewBlock = ElsIfConditionBlock( parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.NextState = cls.stateWhitespace1 parserState.PushState = MultiLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif isinstance(token, SpaceToken): parserState.NewToken = BoundaryToken(token) parserState.NextState = cls.stateWhitespace1 return raise TokenParserException(errorMessage, token)
def stateVariableAssignment(cls, parserState: ParserState): token = parserState.Token errorMessage = "Expected ':=' or whitespace after attributeSpecification mark." if isinstance(token, CharacterToken): if (token == "\n"): parserState.NewBlock = AttributeSpecificationBlock( parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.NewToken = LinebreakToken(token) _ = LinebreakBlock(parserState.NewBlock, parserState.NewToken) parserState.TokenMarker = None parserState.NextState = cls.stateWhitespace5 parserState.PushState = LinebreakBlock.stateLinebreak return elif (token == "-"): parserState.NewBlock = AttributeSpecificationBlock( parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.NextState = cls.stateWhitespace5 parserState.PushState = SingleLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif (token == "/"): parserState.NewBlock = AttributeSpecificationBlock( parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.NextState = cls.stateWhitespace5 parserState.PushState = MultiLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif isinstance(token, SpaceToken): parserState.NextState = cls.stateWhitespace5 return raise TokenParserException(errorMessage, token)
def stateClosingParenthesis(cls, parserState: ParserState): token = parserState.Token errorMessage = "Expected ';' or whitespace." if isinstance(token, CharacterToken): if (token == ";"): parserState.NewToken = EndToken(token) parserState.NewBlock = CloseBlock( parserState.LastBlock, parserState.TokenMarker, endToken=parserState.NewToken) parserState.Pop() return elif (token == "\n"): parserState.NewToken = LinebreakToken(token) parserState.PushState = LinebreakBlock.stateLinebreak parserState.TokenMarker = parserState.NewToken return elif (token == "-"): parserState.NewBlock = CloseBlock(parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.NextState = cls.stateWhitespace1 parserState.PushState = SingleLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif (token == "/"): parserState.NewBlock = CloseBlock(parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.NextState = cls.stateWhitespace1 parserState.PushState = MultiLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif isinstance(token, SpaceToken): parserState.NextState = cls.stateWhitespace1 return raise BlockParserException(errorMessage, token)
def stateOpeningParenthesis(cls, parserState: ParserState): token = parserState.Token errorMessage = "Expected generic name (identifier)." if isinstance(token, CharacterToken): if (token == ")"): # if (parserState.TokenMarker != token): # parserState.NewBlock = IndentationBlock(parserState.LastBlock, parserState.TokenMarker, token.PreviousToken) parserState.Pop() parserState.TokenMarker = token return elif (token == "\n"): parserState.NewToken = LinebreakToken(token) parserState.NewBlock = LinebreakBlock(parserState.LastBlock, parserState.NewToken) parserState.TokenMarker = None parserState.PushState = LinebreakBlock.stateLinebreak return elif (token == "-"): parserState.TokenMarker = None parserState.PushState = SingleLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif (token == "/"): parserState.TokenMarker = None parserState.PushState = MultiLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif isinstance(token, SpaceToken): parserState.NewToken = IndentationToken(token) parserState.NewBlock = IndentationBlock(parserState.LastBlock, parserState.NewToken) return elif isinstance(token, WordToken): parserState.NewToken = IdentifierToken(token) parserState.TokenMarker = parserState.NewToken parserState.NextState = ItemBlock.stateItemRemainder # if (parserState.TokenMarker != token): # parserState.NewBlock = IndentationBlock(parserState.LastBlock, parserState.TokenMarker, token) return raise BlockParserException(errorMessage, token)
def stateBeginKeyword(cls, parserState: ParserState): token = parserState.Token errorMessage = "Expected label or one of these keywords: assert, process." if isinstance(token, CharacterToken): if (token == "\n"): parserState.NewToken = LinebreakToken(token) parserState.NewBlock = LinebreakBlock(parserState.LastBlock, parserState.NewToken) parserState.TokenMarker = parserState.NewToken return elif (token == "-"): parserState.PushState = SingleLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif (token == "/"): parserState.PushState = MultiLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif isinstance(token, SpaceToken): return # parserState.NewToken = IndentationToken(token) # parserState.NewBlock = IndentationBlock(parserState.LastBlock, parserState.NewToken) # return elif isinstance(token, StringToken): keyword = token.Value.lower() if (keyword == "process"): newToken = ProcessKeyword(token) parserState.PushState = Process.OpenBlock.stateProcessKeyword elif (keyword == "assert"): newToken = AssertKeyword(token) parserState.PushState = AssertBlock.stateAssertKeyword elif (keyword == "end"): newToken = EndKeyword(token) parserState.NextState = EndGenerateBlock.stateEndKeyword else: raise TokenParserException(errorMessage, token) parserState.NewToken = newToken parserState.TokenMarker = newToken return raise TokenParserException(errorMessage, token)
def stateItemDelimiter(cls, parserState: ParserState): token = parserState.Token errorMessage = "Expected generic name (identifier)." if (isinstance(token, CharacterToken) and (token == "\n")): parserState.NewToken = LinebreakToken(token) parserState.NewBlock = LinebreakBlock(parserState.LastBlock, parserState.NewToken) parserState.TokenMarker = None parserState.NextState = OpenBlock.stateOpeningParenthesis parserState.PushState = LinebreakBlock.stateLinebreak return elif isinstance(token, SpaceToken): parserState.NextState = OpenBlock.stateOpeningParenthesis return elif isinstance(token, WordToken): parserState.NewToken = IdentifierToken(token) parserState.TokenMarker = parserState.NewToken parserState.NextState = ItemBlock.stateItemRemainder return raise BlockParserException(errorMessage, token)
def stateWhitespace2(cls, parserState: ParserState): token = parserState.Token errorMessage = "Expected keyword IS after generate name." if isinstance(token, CharacterToken): if (token == "\n"): parserState.NewToken = LinebreakToken(token) if (not isinstance(parserState.LastBlock, MultiLineCommentBlock)): parserState.NewBlock = CaseBlock(parserState.LastBlock, parserState.TokenMarker, endToken=parserState.NewToken.PreviousToken, multiPart=True) _ = LinebreakBlock(parserState.NewBlock, parserState.NewToken) else: parserState.NewBlock = LinebreakBlock(parserState.LastBlock, parserState.NewToken) parserState.TokenMarker = None parserState.PushState = LinebreakBlock.stateLinebreak return elif (token == "-"): parserState.NewBlock = CaseBlock(parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.PushState = SingleLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif (token == "/"): parserState.NewBlock = CaseBlock(parserState.LastBlock, parserState.TokenMarker, endToken=token.PreviousToken, multiPart=True) parserState.TokenMarker = None parserState.PushState = MultiLineCommentBlock.statePossibleCommentStart parserState.TokenMarker = token return elif (isinstance(token, WordToken) and (token <= "is")): parserState.NewToken = IsKeyword(token) parserState.NewBlock = CaseBlock(parserState.LastBlock, parserState.TokenMarker, endToken=parserState.NewToken) parserState.NextState = cls.stateDeclarativeRegion return elif (isinstance(token, SpaceToken) and isinstance(parserState.LastBlock, MultiLineCommentBlock)): parserState.NewToken = BoundaryToken(token) parserState.NewBlock = WhitespaceBlock(parserState.LastBlock, parserState.NewToken) parserState.TokenMarker = None return raise BlockParserException(errorMessage, token)
def GetVHDLTokenizer(cls, iterable: Iterator[str]): previousToken = StartOfDocumentToken() tokenKind = cls.TokenKind.OtherChars start = SourceCodePosition(1, 1, 1) buffer = "" absolute = 0 column = 0 row = 1 __NUMBER_CHARACTERS__ = "0123456789" __ALPHA_CHARACTERS__ = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" __WHITESPACE_CHARACTERS__ = " \t" __FUSEABLE_CHARS__ = "=<:/*>?" yield previousToken for char in iterable: absolute += 1 column += 1 # State: SpaceChars if (tokenKind is cls.TokenKind.SpaceChars): if (char in __WHITESPACE_CHARACTERS__): buffer += char else: end = SourceCodePosition(row, column - 1, absolute - 1) if isinstance(previousToken, (LinebreakToken, SingleLineCommentToken, StartOfDocumentToken)): previousToken = IndentationToken( previousToken, buffer, start, end) else: previousToken = SpaceToken(previousToken, buffer, start, end) yield previousToken start = SourceCodePosition(row, column, absolute) buffer = char if (char in __NUMBER_CHARACTERS__): tokenKind = cls.TokenKind.IntegerChars elif (char in __ALPHA_CHARACTERS__): tokenKind = cls.TokenKind.AlphaChars elif (char == "'"): tokenKind = cls.TokenKind.PossibleCharacterLiteral elif (char == "\""): tokenKind = cls.TokenKind.PossibleStringLiteralStart elif (char == "-"): tokenKind = cls.TokenKind.PossibleSingleLineCommentStart elif (char == "\r"): tokenKind = cls.TokenKind.PossibleLinebreak elif (char == "\n"): previousToken = LinebreakToken(previousToken, char, start, start) yield previousToken tokenKind = cls.TokenKind.OtherChars elif (char in __FUSEABLE_CHARS__): buffer = char tokenKind = cls.TokenKind.FuseableCharacter elif (char == "."): tokenKind = cls.TokenKind.PossibleRealLiteral elif (char == "\\"): tokenKind = cls.TokenKind.PossibleExtendedIdentifierStart elif ((char == "`") and isinstance(previousToken, (SpaceToken, LinebreakToken))): tokenKind = cls.TokenKind.Directive else: previousToken = CharacterToken(previousToken, char, start) yield previousToken tokenKind = cls.TokenKind.OtherChars # State: IntegerChars elif (tokenKind is cls.TokenKind.IntegerChars): if ((char in __NUMBER_CHARACTERS__) or (char == "_")): buffer += char elif (char == "."): buffer += char tokenKind = cls.TokenKind.RealChars else: previousToken = IntegerLiteralToken( previousToken, buffer, start, SourceCodePosition(row, column, absolute)) yield previousToken start = SourceCodePosition(row, column, absolute) buffer = char if (char in __WHITESPACE_CHARACTERS__): tokenKind = cls.TokenKind.SpaceChars elif (char in __ALPHA_CHARACTERS__): tokenKind = cls.TokenKind.AlphaChars elif (char == "'"): tokenKind = cls.TokenKind.PossibleCharacterLiteral elif (char == "\""): tokenKind = cls.TokenKind.PossibleStringLiteralStart elif (char == "-"): tokenKind = cls.TokenKind.PossibleSingleLineCommentStart elif (char == "\r"): tokenKind = cls.TokenKind.PossibleLinebreak elif (char == "\n"): previousToken = LinebreakToken(previousToken, char, start, start) yield previousToken tokenKind = cls.TokenKind.OtherChars elif (char in __FUSEABLE_CHARS__): buffer = char tokenKind = cls.TokenKind.FuseableCharacter elif (char == "\\"): tokenKind = cls.TokenKind.PossibleExtendedIdentifierStart elif ((char == "`") and isinstance(previousToken, (SpaceToken, LinebreakToken))): tokenKind = cls.TokenKind.Directive else: previousToken = CharacterToken(previousToken, char, start) yield previousToken tokenKind = cls.TokenKind.OtherChars # State: RealChars elif (tokenKind is cls.TokenKind.RealChars): if ((char in __NUMBER_CHARACTERS__) or (char == "_")): buffer += char else: previousToken = RealLiteralToken( previousToken, buffer, start, SourceCodePosition(row, column, absolute)) yield previousToken start = SourceCodePosition(row, column, absolute) buffer = char if (char in __WHITESPACE_CHARACTERS__): tokenKind = cls.TokenKind.SpaceChars elif (char in __ALPHA_CHARACTERS__): tokenKind = cls.TokenKind.AlphaChars elif (char == "'"): tokenKind = cls.TokenKind.PossibleCharacterLiteral elif (char == "\""): tokenKind = cls.TokenKind.PossibleStringLiteralStart elif (char == "-"): tokenKind = cls.TokenKind.PossibleSingleLineCommentStart elif (char == "\r"): tokenKind = cls.TokenKind.PossibleLinebreak elif (char == "\n"): previousToken = LinebreakToken(previousToken, char, start, start) yield previousToken tokenKind = cls.TokenKind.OtherChars elif (char in __FUSEABLE_CHARS__): buffer = char tokenKind = cls.TokenKind.FuseableCharacter elif (char == "\\"): tokenKind = cls.TokenKind.PossibleExtendedIdentifierStart elif ((char == "`") and isinstance(previousToken, (SpaceToken, LinebreakToken))): tokenKind = cls.TokenKind.Directive else: previousToken = CharacterToken(previousToken, char, start) yield previousToken tokenKind = cls.TokenKind.OtherChars # State: AlphaChars elif (tokenKind is cls.TokenKind.AlphaChars): if ((char in __ALPHA_CHARACTERS__) or (char == "_")): buffer += char else: previousToken = WordToken( previousToken, buffer, start, SourceCodePosition(row, column, absolute)) yield previousToken start = SourceCodePosition(row, column, absolute) buffer = char if (char in __WHITESPACE_CHARACTERS__): tokenKind = cls.TokenKind.SpaceChars elif (char == "'"): tokenKind = cls.TokenKind.PossibleCharacterLiteral elif (char == "\""): tokenKind = cls.TokenKind.PossibleStringLiteralStart elif (char == "-"): tokenKind = cls.TokenKind.PossibleSingleLineCommentStart elif (char == "\r"): tokenKind = cls.TokenKind.PossibleLinebreak elif (char == "\n"): previousToken = LinebreakToken(previousToken, char, start, start) yield previousToken tokenKind = cls.TokenKind.OtherChars elif (char in __FUSEABLE_CHARS__): buffer = char tokenKind = cls.TokenKind.FuseableCharacter elif (char == "\\"): tokenKind = cls.TokenKind.PossibleExtendedIdentifierStart elif ((char == "`") and isinstance(previousToken, (SpaceToken, LinebreakToken))): tokenKind = cls.TokenKind.Directive else: previousToken = CharacterToken(previousToken, char, start) yield previousToken tokenKind = cls.TokenKind.OtherChars # State: PossibleSingleLineCommentStart elif (tokenKind is cls.TokenKind.PossibleSingleLineCommentStart): if (char == "-"): buffer = "--" tokenKind = cls.TokenKind.SingleLineComment else: previousToken = CharacterToken(previousToken, "-", start) yield previousToken buffer = char if (char in __WHITESPACE_CHARACTERS__): tokenKind = cls.TokenKind.SpaceChars elif (char in __NUMBER_CHARACTERS__): tokenKind = cls.TokenKind.IntegerChars elif (char in __ALPHA_CHARACTERS__): tokenKind = cls.TokenKind.AlphaChars elif (char == "'"): tokenKind = cls.TokenKind.PossibleCharacterLiteral elif (char == "\""): tokenKind = cls.TokenKind.PossibleStringLiteralStart elif (char == "/r"): tokenKind = cls.TokenKind.PossibleLinebreak elif (char == "/n"): previousToken = LinebreakToken(previousToken, char, start, start) yield previousToken tokenKind = cls.TokenKind.OtherChars elif (char in __FUSEABLE_CHARS__): buffer = char tokenKind = cls.TokenKind.FuseableCharacter elif (char == "\\"): tokenKind = cls.TokenKind.PossibleExtendedIdentifierStart elif ((char == "`") and isinstance(previousToken, (SpaceToken, LinebreakToken))): tokenKind = cls.TokenKind.Directive else: previousToken = CharacterToken(previousToken, char, start) yield previousToken tokenKind = cls.TokenKind.OtherChars # State: PossibleLinebreak elif (tokenKind is cls.TokenKind.PossibleLinebreak): end = SourceCodePosition(row, column, absolute) if (char == "\n"): tokenKind = cls.TokenKind.OtherChars if (buffer[:2] == "--"): buffer += char previousToken = SingleLineCommentToken( previousToken, buffer, start, end) else: previousToken = LinebreakToken(previousToken, "\r\n", start, end) buffer = "\r\n" yield previousToken else: previousToken = LinebreakToken(previousToken, "\r", start, end) yield previousToken start = end buffer = char if (char in __WHITESPACE_CHARACTERS__): tokenKind = cls.TokenKind.SpaceChars elif (char in __NUMBER_CHARACTERS__): tokenKind = cls.TokenKind.IntegerChars elif (char in __ALPHA_CHARACTERS__): tokenKind = cls.TokenKind.AlphaChars elif (char == "'"): tokenKind = cls.TokenKind.PossibleCharacterLiteral elif (char == "\""): tokenKind = cls.TokenKind.PossibleStringLiteralStart elif (char == "-"): tokenKind = cls.TokenKind.PossibleSingleLineCommentStart elif (char == "/r"): tokenKind = cls.TokenKind.PossibleLinebreak elif (char == "/n"): previousToken = LinebreakToken(previousToken, char, start, start) yield previousToken tokenKind = cls.TokenKind.OtherChars elif (char in __FUSEABLE_CHARS__): buffer = char tokenKind = cls.TokenKind.FuseableCharacter elif (char == "\\"): tokenKind = cls.TokenKind.PossibleExtendedIdentifierStart elif ((char == "`") and isinstance(previousToken, (SpaceToken, LinebreakToken))): tokenKind = cls.TokenKind.Directive else: previousToken = CharacterToken(previousToken, char, start) yield previousToken tokenKind = cls.TokenKind.OtherChars # State: PossibleRealLiteral elif (tokenKind is cls.TokenKind.PossibleRealLiteral): if (char in __NUMBER_CHARACTERS__): buffer += char tokenKind = cls.TokenKind.RealChars else: previousToken = CharacterToken(previousToken, ".", start) yield previousToken start = SourceCodePosition(row, column, absolute) buffer = char if (char in __WHITESPACE_CHARACTERS__): tokenKind = cls.TokenKind.SpaceChars elif (char in __NUMBER_CHARACTERS__): tokenKind = cls.TokenKind.IntegerChars elif (char in __ALPHA_CHARACTERS__): tokenKind = cls.TokenKind.AlphaChars elif (char == "'"): tokenKind = cls.TokenKind.PossibleCharacterLiteral elif (char == "\""): tokenKind = cls.TokenKind.PossibleStringLiteralStart elif (char == "-"): tokenKind = cls.TokenKind.PossibleSingleLineCommentStart elif (char == "/r"): tokenKind = cls.TokenKind.PossibleLinebreak elif (char == "/n"): previousToken = LinebreakToken(previousToken, char, start, start) yield previousToken tokenKind = cls.TokenKind.OtherChars elif (char in __FUSEABLE_CHARS__): buffer = char tokenKind = cls.TokenKind.FuseableCharacter elif (char == "\\"): tokenKind = cls.TokenKind.PossibleExtendedIdentifierStart elif ((char == "`") and isinstance(previousToken, (SpaceToken, LinebreakToken))): tokenKind = cls.TokenKind.Directive else: previousToken = CharacterToken(previousToken, char, start) yield previousToken tokenKind = cls.TokenKind.OtherChars # State: PossibleCharacterLiteral elif (tokenKind is cls.TokenKind.PossibleCharacterLiteral): buffer += char if (len(buffer) == 2): if (buffer[1] == "'"): previousToken = CharacterToken(previousToken, "'", start) yield previousToken previousToken = CharacterToken( previousToken, "'", SourceCodePosition(row, column, absolute)) yield previousToken tokenKind = cls.TokenKind.OtherChars else: continue elif ((len(buffer) == 3) and (buffer[2] == "'")): previousToken = CharacterLiteralToken( previousToken, buffer, start, SourceCodePosition(row, column, absolute)) yield previousToken tokenKind = cls.TokenKind.OtherChars else: previousToken = CharacterToken(previousToken, "'", start) yield previousToken start.Column += 1 start.Absolute += 1 buffer = buffer[:2] if ((buffer[0] in __ALPHA_CHARACTERS__) and (buffer[1] in __ALPHA_CHARACTERS__)): tokenKind = cls.TokenKind.AlphaChars elif ((buffer[0] in __WHITESPACE_CHARACTERS__) and (buffer[1] in __WHITESPACE_CHARACTERS__)): tokenKind = cls.TokenKind.SpaceChars else: raise TokenizerException( "Ambiguous syntax detected. buffer: '{buffer}'". format(buffer=buffer), start) # State: PossibleStringLiteralStart elif (tokenKind is cls.TokenKind.PossibleStringLiteralStart): buffer += char if (char == "\""): previousToken = StringLiteralToken( previousToken, buffer, start, SourceCodePosition(row, column, absolute)) yield previousToken tokenKind = cls.TokenKind.OtherChars # State: PossibleExtendedIdentifierStart elif (tokenKind is cls.TokenKind.PossibleExtendedIdentifierStart): buffer += char if (char == "\\"): previousToken = ExtendedIdentifier( previousToken, buffer, start, SourceCodePosition(row, column, absolute)) yield previousToken tokenKind = cls.TokenKind.OtherChars # State: Directive elif (tokenKind is cls.TokenKind.Directive): buffer += char if (char == "\r"): tokenKind = cls.TokenKind.PossibleLinebreak elif (char == "\n"): previousToken = DirectiveToken( previousToken, buffer, start, SourceCodePosition(row, column, absolute)) yield previousToken tokenKind = cls.TokenKind.OtherChars # State: SingleLineComment elif (tokenKind is cls.TokenKind.SingleLineComment): buffer += char if (char == "\r"): tokenKind = cls.TokenKind.PossibleLinebreak elif (char == "\n"): previousToken = SingleLineCommentToken( previousToken, buffer, start, SourceCodePosition(row, column, absolute)) yield previousToken tokenKind = cls.TokenKind.OtherChars # State: MultiLineComment elif (tokenKind is cls.TokenKind.MultiLineComment): buffer += char if (buffer[-2:] == "*/"): previousToken = MultiLineCommentToken( previousToken, buffer, start, SourceCodePosition(row, column, absolute)) yield previousToken tokenKind = cls.TokenKind.OtherChars # State: FuseableCharacter elif (tokenKind is cls.TokenKind.FuseableCharacter): fused = buffer + char if (fused in ("=>", "**", ":=", "/=", "<=", ">=", "<>", "<<", ">>", "??", "?=", "?<", "?>", "?/=", "?<=", "?>=")): previousToken = FusedCharacterToken( previousToken, fused, start, SourceCodePosition(row, column, absolute)) yield previousToken tokenKind = cls.TokenKind.OtherChars elif (fused in ("?/", "?<", "?>")): buffer = fused elif (fused == "/*"): buffer = fused tokenKind = cls.TokenKind.MultiLineComment else: previousToken = CharacterToken(previousToken, buffer[0], start) yield previousToken if (len(buffer) == 2): previousToken = CharacterToken(previousToken, buffer[1], start) yield previousToken buffer = char if (char in __WHITESPACE_CHARACTERS__): tokenKind = cls.TokenKind.SpaceChars elif (char in __NUMBER_CHARACTERS__): tokenKind = cls.TokenKind.IntegerChars elif (char in __ALPHA_CHARACTERS__): tokenKind = cls.TokenKind.AlphaChars elif (char == "'"): tokenKind = cls.TokenKind.PossibleCharacterLiteral elif (char == "\""): tokenKind = cls.TokenKind.PossibleStringLiteralStart elif (char == "-"): tokenKind = cls.TokenKind.PossibleSingleLineCommentStart elif (char == "\r"): tokenKind = cls.TokenKind.PossibleLinebreak elif (char == "\n"): previousToken = LinebreakToken(previousToken, char, start, start) yield previousToken tokenKind = cls.TokenKind.OtherChars elif (char in __FUSEABLE_CHARS__): pass elif (char == "\\"): tokenKind = cls.TokenKind.PossibleExtendedIdentifierStart elif ((char == "`") and isinstance(previousToken, (SpaceToken, LinebreakToken))): tokenKind = cls.TokenKind.Directive else: previousToken = CharacterToken(previousToken, char, start) yield previousToken # State: OtherChars elif (tokenKind is cls.TokenKind.OtherChars): start = SourceCodePosition(row, column, absolute) buffer = char if (char in __WHITESPACE_CHARACTERS__): tokenKind = cls.TokenKind.SpaceChars elif (char in __NUMBER_CHARACTERS__): tokenKind = cls.TokenKind.IntegerChars elif (char in __ALPHA_CHARACTERS__): tokenKind = cls.TokenKind.AlphaChars elif (char == "'"): tokenKind = cls.TokenKind.PossibleCharacterLiteral elif (char == "\""): tokenKind = cls.TokenKind.PossibleStringLiteralStart elif (char == "-"): tokenKind = cls.TokenKind.PossibleSingleLineCommentStart elif (char == "\r"): tokenKind = cls.TokenKind.PossibleLinebreak elif (char == "\n"): previousToken = LinebreakToken(previousToken, char, start, start) yield previousToken tokenKind = cls.TokenKind.OtherChars elif (char in __FUSEABLE_CHARS__): buffer = char tokenKind = cls.TokenKind.FuseableCharacter elif (char == "\\"): tokenKind = cls.TokenKind.PossibleExtendedIdentifierStart elif ((char == "`") and isinstance(previousToken, (SpaceToken, LinebreakToken))): tokenKind = cls.TokenKind.Directive else: tokenKind = cls.TokenKind.OtherChars previousToken = CharacterToken(previousToken, char, start) yield previousToken # State: unknown else: raise TokenizerException( "Unknown state.", SourceCodePosition(row, column, absolute)) if (char == "\n"): column = 0 row += 1 # end for if (tokenKind is cls.TokenKind.MultiLineComment): raise TokenizerException( "End of document before end of multi line comment.", SourceCodePosition(row, column, absolute)) # End of document yield EndOfDocumentToken(previousToken, SourceCodePosition(row, column, absolute))