def testValidId(self): testProgram = "A_valid_Id_123" answer = [ Token(TokenType.identifier, "A_valid_Id_123"), Token(TokenType.eof) ] scanner = Scanner(testProgram) self.assertEqual(scanner.scan(testProgram), answer)
def testNoWhiteSpace(self): testProgram = "3/5" answer = [ Token(TokenType.number, 3), Token(TokenType.operator, "/"), Token(TokenType.number, 5), Token(TokenType.eof) ] scanner = Scanner(testProgram) self.assertEqual(scanner.scan(testProgram), answer)
def testComments(self): testProgram = "(*A_valid_Id_123*)" answer = [Token(TokenType.eof)] scanner = Scanner(testProgram) self.assertEqual(scanner.scan(testProgram), answer)
def testAllTokens(self): testProgram = "3 identifier () : , < + - * / = if" answerList = [ Token(TokenType.number, 3), Token(TokenType.identifier, "identifier"), Token(TokenType.punct, "("), Token(TokenType.punct, ")"), Token(TokenType.punct, ":"), Token(TokenType.punct, ","), Token(TokenType.operator, "<"), Token(TokenType.operator, "+"), Token(TokenType.operator, "-"), Token(TokenType.operator, "*"), Token(TokenType.operator, "/"), Token(TokenType.operator, "="), Token(TokenType.keyword, "if"), Token(TokenType.eof) ] scanner = Scanner(testProgram) self.assertEqual(scanner.scan(testProgram), answerList)
def parse(self): stack = [] pushRule([NonTerminal.PROGRAM, Token(TokenType.eof)], stack) while stack: tos = top(stack) if isinstance(tos, Token): token = self.scanner.next() # Checking if values are the same. Used punctuations, operators, and keywords if tos.getTokenType() == TokenType.punct or tos.getTokenType( ) == TokenType.operator or tos.getTokenType( ) == TokenType.keyword: if tos.value() == token.value(): pop(stack) else: errorMessage = 'Error line {}: Expected {} but received {}' raise TypeError( errorMessage.format(token.getLineNumber(), tos.value(), token.value())) # Checking if an identifier is print. Used in the print-statement rule. elif token.value() == "print": if tos.value() == token.value(): pop(stack) else: errorMessage = 'Error line {}: Expected {} but received {}' raise TypeError( errorMessage.format(token.getLineNumber(), tos.value(), token.value())) # Checking if tokenTypes are the same. Used for Numbers, EOF, Booleans, and identifiers that are not print. elif tos.getTokenType() == token.getTokenType(): pop(stack) else: errorMessage = 'Error line {}: Expected {} but received {}' raise TypeError( errorMessage.format(token.getLineNumber(), tos.getTokenType(), token.getTokenType())) elif isinstance(tos, NonTerminal): token = self.scanner.peek() if token.getTokenType( ) == TokenType.punct or token.getTokenType( ) == TokenType.operator or token.getTokenType( ) == TokenType.keyword: rule = parseTable.get((tos, token.value())) elif token.value() == "print": rule = parseTable.get((tos, "print")) else: rule = parseTable.get((tos, token.getTokenType())) if rule is not None: pop(stack) pushRule(rule, stack) else: errorMessage = 'Error line {}: {} cannot be expanded on by {}' raise TypeError( errorMessage.format(token.getLineNumber(), tos, token)) else: errorMessage = 'invalid item on stack: {}' raise TypeError(errorMessage.format(tos)) # Checking if there is more tokens after the program was completed. if not token.isEof(): errorMessage = 'Error: unexpected token at end: {}' raise TypeError(errorMessage.format(token)) return True
FACTOR_TAIL = 16 ACTUALS = 17 NONEMPTYACTUALS = 18 NONEMPTYACT_TAIL = 19 LITERAL = 20 PRINT_STATEMENT = 21 parseTable = { (NonTerminal.PROGRAM, "function"): [NonTerminal.DEFINITIONS], (NonTerminal.PROGRAM, TokenType.eof): [NonTerminal.DEFINITIONS], (NonTerminal.DEFINITIONS, "function"): [NonTerminal.DEF, NonTerminal.DEFINITIONS], (NonTerminal.DEFINITIONS, TokenType.eof): [], (NonTerminal.DEF, "function"): [ Token(TokenType.keyword, "function"), Token(TokenType.identifier, ""), Token(TokenType.punct, "("), NonTerminal.FORMALS, Token(TokenType.punct, ")"), Token(TokenType.punct, ":"), NonTerminal.TYPE, NonTerminal.BODY ], (NonTerminal.FORMALS, ")"): [], (NonTerminal.FORMALS, TokenType.identifier): [NonTerminal.NONEMPTYFORMALS], (NonTerminal.NONEMPTYFORMALS, TokenType.identifier): [NonTerminal.FORMAL, NonTerminal.NONEMPTYFORMALS_TAIL], (NonTerminal.NONEMPTYFORMALS_TAIL, ","): [Token(TokenType.punct, ","), NonTerminal.NONEMPTYFORMALS], (NonTerminal.NONEMPTYFORMALS_TAIL, ")"): [], (NonTerminal.FORMAL, TokenType.identifier): [ Token(TokenType.identifier, ""), Token(TokenType.punct, ":"), NonTerminal.TYPE
def scan(self, programStr): keywords = { "integer": "Keyword", "boolean": "Keyword", "if": "Keyword", "then": "Keyword", "else": "Keyword", "not": "Keyword", "and": "Keyword", "or": "Keyword", "function": "Keyword", "main": "Identifier", "print": "Identifier", "true": "Boolean", "false": "Boolean" } alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" punct = "(),:" operators = "+-*/<=" accum = "" state = State.start tokens = [] strPos = 0 while strPos < len(programStr): if state == State.start: if programStr[strPos].isspace(): pass elif programStr[strPos] in operators: tokens.append(Token(TokenType.operator, programStr[strPos])) elif programStr[strPos] in "),:": tokens.append(Token(TokenType.punct, programStr[strPos])) elif programStr[strPos] in "0": accum = programStr[strPos] state = State.zero elif programStr[strPos] in "123456789": accum = programStr[strPos] state = State.number elif programStr[strPos] in alphabet: accum = programStr[strPos] state = State.identifier elif programStr[strPos] == "(": if strPos < len(programStr) - 1 and programStr[strPos + 1] == "*": state = State.comment strPos += 1 else: tokens.append( Token(TokenType.punct, programStr[strPos])) else: #Character not exceptable in the language errorMessage = 'Invalid character in program {}' raise ValueError(errorMessage.format(programStr[strPos])) strPos += 1 elif state == State.zero: if programStr[strPos].isspace(): tokens.append(Token(TokenType.number, int(accum))) elif programStr[strPos] in (operators + punct): tokens.append(Token(TokenType.number, int(accum))) strPos -= 1 elif programStr[strPos].isdigit(): errorMessage = 'Numbers cannot have a leading 0: 0{}' raise ValueError(errorMessage.format(programStr[strPos])) else: errorMessage = 'Invalid character after 0: 0{}' raise ValueError(errorMessage.format(programStr[strPos])) accum = "" state = State.start strPos += 1 elif state == State.number: if programStr[strPos].isdigit(): accum += programStr[strPos] strPos += 1 elif programStr[strPos].isspace(): tokens.append(Token(TokenType.number, int(accum))) accum = "" state = State.start strPos += 1 elif programStr[strPos] in (operators + punct): tokens.append(Token(TokenType.number, int(accum))) accum = "" state = State.start else: errorMessage = 'Invalid character in number {}*{}*' raise ValueError( errorMessage.format(accum, programStr[strPos])) elif state == State.identifier: if programStr[strPos] in (alphabet + "_1234567890"): accum += programStr[strPos] strPos += 1 elif programStr[strPos].isspace(): if accum in keywords: type = keywords[accum] if type == "Keyword": tokens.append(Token(TokenType.keyword, accum)) elif type == "Boolean": tokens.append(Token(TokenType.boolean, accum)) else: tokens.append(Token(TokenType.identifier, accum)) else: #TBD add identifier to keywords dictionary tokens.append(Token(TokenType.identifier, accum)) accum = "" strPos += 1 state = State.start elif programStr[strPos] in (operators + punct): if accum in keywords: type = keywords[accum] if type == "Keyword": tokens.append(Token(TokenType.keyword, accum)) elif type == "Boolean": tokens.append(Token(TokenType.boolean, accum)) else: tokens.append(Token(TokenType.identifier, accum)) else: #TBD add identifier to keywords dictionary tokens.append(Token(TokenType.identifier, accum)) accum = "" state = State.start else: errorMessage = 'Invalid character in string {}*{}*' raise ValueError( errorMessage.format(accum, programStr[strPos])) elif state == State.comment: if programStr[strPos] == "*": if strPos < len(programStr) - 1 and programStr[strPos + 1] == ")": strPos += 1 state = State.start strPos += 1 else: errorMessage = 'Invalid state {}' raise TypeError(errorMessage.format(state)) #Handles accum at the end of the file if accum != "": if state == State.zero: tokens.append(Token(TokenType.number, int(accum))) elif state == State.number: tokens.append(Token(TokenType.number, int(accum))) elif state == State.identifier: if accum in keywords: type = keywords[accum] if type == "Keyword": tokens.append(Token(TokenType.keyword, accum)) elif type == "Boolean": tokens.append(Token(TokenType.boolean, accum)) else: tokens.append(Token(TokenType.identifier, accum)) else: #TBD add identifier to keywords dictionary tokens.append(Token(TokenType.identifier, accum)) else: errorMessage = 'Invalid state {} with this accum {}' raise TypeError(errorMessage.format(state, accum)) if state == State.comment: errorMessage = 'Comment was never close {}' raise TypeError(errorMessage.format(state)) tokens.append(Token(TokenType.eof)) return tokens