Ejemplo n.º 1
0
 def testValidId(self):
     testProgram = "A_valid_Id_123"
     answer = [
         Token(TokenType.identifier, "A_valid_Id_123"),
         Token(TokenType.eof)
     ]
     scanner = Scanner(testProgram)
     self.assertEqual(scanner.scan(testProgram), answer)
Ejemplo n.º 2
0
 def testNoWhiteSpace(self):
     testProgram = "3/5"
     answer = [
         Token(TokenType.number, 3),
         Token(TokenType.operator, "/"),
         Token(TokenType.number, 5),
         Token(TokenType.eof)
     ]
     scanner = Scanner(testProgram)
     self.assertEqual(scanner.scan(testProgram), answer)
Ejemplo n.º 3
0
 def testComments(self):
     testProgram = "(*A_valid_Id_123*)"
     answer = [Token(TokenType.eof)]
     scanner = Scanner(testProgram)
     self.assertEqual(scanner.scan(testProgram), answer)
Ejemplo n.º 4
0
 def testAllTokens(self):
     testProgram = "3 identifier () : , < + - * / = if"
     answerList = [
         Token(TokenType.number, 3),
         Token(TokenType.identifier, "identifier"),
         Token(TokenType.punct, "("),
         Token(TokenType.punct, ")"),
         Token(TokenType.punct, ":"),
         Token(TokenType.punct, ","),
         Token(TokenType.operator, "<"),
         Token(TokenType.operator, "+"),
         Token(TokenType.operator, "-"),
         Token(TokenType.operator, "*"),
         Token(TokenType.operator, "/"),
         Token(TokenType.operator, "="),
         Token(TokenType.keyword, "if"),
         Token(TokenType.eof)
     ]
     scanner = Scanner(testProgram)
     self.assertEqual(scanner.scan(testProgram), answerList)
Ejemplo n.º 5
0
    def parse(self):
        stack = []
        pushRule([NonTerminal.PROGRAM, Token(TokenType.eof)], stack)
        while stack:
            tos = top(stack)
            if isinstance(tos, Token):
                token = self.scanner.next()
                # Checking if values are the same. Used punctuations, operators, and keywords
                if tos.getTokenType() == TokenType.punct or tos.getTokenType(
                ) == TokenType.operator or tos.getTokenType(
                ) == TokenType.keyword:
                    if tos.value() == token.value():
                        pop(stack)
                    else:
                        errorMessage = 'Error line {}: Expected  {}  but received {}'
                        raise TypeError(
                            errorMessage.format(token.getLineNumber(),
                                                tos.value(), token.value()))
                # Checking if an identifier is print. Used in the print-statement rule.
                elif token.value() == "print":
                    if tos.value() == token.value():
                        pop(stack)
                    else:
                        errorMessage = 'Error line {}: Expected  {}  but received {}'
                        raise TypeError(
                            errorMessage.format(token.getLineNumber(),
                                                tos.value(), token.value()))
                # Checking if tokenTypes are the same. Used for Numbers, EOF, Booleans, and identifiers that are not print.
                elif tos.getTokenType() == token.getTokenType():
                    pop(stack)
                else:
                    errorMessage = 'Error line {}: Expected  {}  but received {}'
                    raise TypeError(
                        errorMessage.format(token.getLineNumber(),
                                            tos.getTokenType(),
                                            token.getTokenType()))

            elif isinstance(tos, NonTerminal):
                token = self.scanner.peek()
                if token.getTokenType(
                ) == TokenType.punct or token.getTokenType(
                ) == TokenType.operator or token.getTokenType(
                ) == TokenType.keyword:
                    rule = parseTable.get((tos, token.value()))
                elif token.value() == "print":
                    rule = parseTable.get((tos, "print"))
                else:
                    rule = parseTable.get((tos, token.getTokenType()))

                if rule is not None:
                    pop(stack)
                    pushRule(rule, stack)
                else:
                    errorMessage = 'Error line {}: {} cannot be expanded on by {}'
                    raise TypeError(
                        errorMessage.format(token.getLineNumber(), tos, token))
            else:
                errorMessage = 'invalid item on stack: {}'
                raise TypeError(errorMessage.format(tos))

        # Checking if there is more tokens after the program was completed.
        if not token.isEof():
            errorMessage = 'Error: unexpected token at end: {}'
            raise TypeError(errorMessage.format(token))

        return True
Ejemplo n.º 6
0
    FACTOR_TAIL = 16
    ACTUALS = 17
    NONEMPTYACTUALS = 18
    NONEMPTYACT_TAIL = 19
    LITERAL = 20
    PRINT_STATEMENT = 21


parseTable = {
    (NonTerminal.PROGRAM, "function"): [NonTerminal.DEFINITIONS],
    (NonTerminal.PROGRAM, TokenType.eof): [NonTerminal.DEFINITIONS],
    (NonTerminal.DEFINITIONS, "function"):
    [NonTerminal.DEF, NonTerminal.DEFINITIONS],
    (NonTerminal.DEFINITIONS, TokenType.eof): [],
    (NonTerminal.DEF, "function"): [
        Token(TokenType.keyword, "function"),
        Token(TokenType.identifier, ""),
        Token(TokenType.punct, "("), NonTerminal.FORMALS,
        Token(TokenType.punct, ")"),
        Token(TokenType.punct, ":"), NonTerminal.TYPE, NonTerminal.BODY
    ],
    (NonTerminal.FORMALS, ")"): [],
    (NonTerminal.FORMALS, TokenType.identifier): [NonTerminal.NONEMPTYFORMALS],
    (NonTerminal.NONEMPTYFORMALS, TokenType.identifier):
    [NonTerminal.FORMAL, NonTerminal.NONEMPTYFORMALS_TAIL],
    (NonTerminal.NONEMPTYFORMALS_TAIL, ","):
    [Token(TokenType.punct, ","), NonTerminal.NONEMPTYFORMALS],
    (NonTerminal.NONEMPTYFORMALS_TAIL, ")"): [],
    (NonTerminal.FORMAL, TokenType.identifier): [
        Token(TokenType.identifier, ""),
        Token(TokenType.punct, ":"), NonTerminal.TYPE
Ejemplo n.º 7
0
    def scan(self, programStr):
        keywords = {
            "integer": "Keyword",
            "boolean": "Keyword",
            "if": "Keyword",
            "then": "Keyword",
            "else": "Keyword",
            "not": "Keyword",
            "and": "Keyword",
            "or": "Keyword",
            "function": "Keyword",
            "main": "Identifier",
            "print": "Identifier",
            "true": "Boolean",
            "false": "Boolean"
        }
        alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
        punct = "(),:"
        operators = "+-*/<="
        accum = ""
        state = State.start
        tokens = []
        strPos = 0
        while strPos < len(programStr):
            if state == State.start:
                if programStr[strPos].isspace():
                    pass
                elif programStr[strPos] in operators:
                    tokens.append(Token(TokenType.operator,
                                        programStr[strPos]))
                elif programStr[strPos] in "),:":
                    tokens.append(Token(TokenType.punct, programStr[strPos]))
                elif programStr[strPos] in "0":
                    accum = programStr[strPos]
                    state = State.zero
                elif programStr[strPos] in "123456789":
                    accum = programStr[strPos]
                    state = State.number
                elif programStr[strPos] in alphabet:
                    accum = programStr[strPos]
                    state = State.identifier
                elif programStr[strPos] == "(":
                    if strPos < len(programStr) - 1 and programStr[strPos +
                                                                   1] == "*":
                        state = State.comment
                        strPos += 1
                    else:
                        tokens.append(
                            Token(TokenType.punct, programStr[strPos]))
                else:  #Character not exceptable in the language
                    errorMessage = 'Invalid character in program {}'
                    raise ValueError(errorMessage.format(programStr[strPos]))
                strPos += 1

            elif state == State.zero:
                if programStr[strPos].isspace():
                    tokens.append(Token(TokenType.number, int(accum)))
                elif programStr[strPos] in (operators + punct):
                    tokens.append(Token(TokenType.number, int(accum)))
                    strPos -= 1
                elif programStr[strPos].isdigit():
                    errorMessage = 'Numbers cannot have a leading 0: 0{}'
                    raise ValueError(errorMessage.format(programStr[strPos]))
                else:
                    errorMessage = 'Invalid character after 0: 0{}'
                    raise ValueError(errorMessage.format(programStr[strPos]))
                accum = ""
                state = State.start
                strPos += 1

            elif state == State.number:
                if programStr[strPos].isdigit():
                    accum += programStr[strPos]
                    strPos += 1
                elif programStr[strPos].isspace():
                    tokens.append(Token(TokenType.number, int(accum)))
                    accum = ""
                    state = State.start
                    strPos += 1
                elif programStr[strPos] in (operators + punct):
                    tokens.append(Token(TokenType.number, int(accum)))
                    accum = ""
                    state = State.start
                else:
                    errorMessage = 'Invalid character in number  {}*{}*'
                    raise ValueError(
                        errorMessage.format(accum, programStr[strPos]))

            elif state == State.identifier:
                if programStr[strPos] in (alphabet + "_1234567890"):
                    accum += programStr[strPos]
                    strPos += 1
                elif programStr[strPos].isspace():
                    if accum in keywords:
                        type = keywords[accum]
                        if type == "Keyword":
                            tokens.append(Token(TokenType.keyword, accum))
                        elif type == "Boolean":
                            tokens.append(Token(TokenType.boolean, accum))
                        else:
                            tokens.append(Token(TokenType.identifier, accum))
                    else:
                        #TBD add identifier to keywords dictionary
                        tokens.append(Token(TokenType.identifier, accum))
                    accum = ""
                    strPos += 1
                    state = State.start
                elif programStr[strPos] in (operators + punct):
                    if accum in keywords:
                        type = keywords[accum]
                        if type == "Keyword":
                            tokens.append(Token(TokenType.keyword, accum))
                        elif type == "Boolean":
                            tokens.append(Token(TokenType.boolean, accum))
                        else:
                            tokens.append(Token(TokenType.identifier, accum))
                    else:
                        #TBD add identifier to keywords dictionary
                        tokens.append(Token(TokenType.identifier, accum))
                    accum = ""
                    state = State.start

                else:
                    errorMessage = 'Invalid character in string  {}*{}*'
                    raise ValueError(
                        errorMessage.format(accum, programStr[strPos]))

            elif state == State.comment:
                if programStr[strPos] == "*":
                    if strPos < len(programStr) - 1 and programStr[strPos +
                                                                   1] == ")":
                        strPos += 1
                        state = State.start
                strPos += 1
            else:
                errorMessage = 'Invalid state {}'
                raise TypeError(errorMessage.format(state))
        #Handles accum at the end of the file
        if accum != "":
            if state == State.zero:
                tokens.append(Token(TokenType.number, int(accum)))
            elif state == State.number:
                tokens.append(Token(TokenType.number, int(accum)))
            elif state == State.identifier:
                if accum in keywords:
                    type = keywords[accum]
                    if type == "Keyword":
                        tokens.append(Token(TokenType.keyword, accum))
                    elif type == "Boolean":
                        tokens.append(Token(TokenType.boolean, accum))
                    else:
                        tokens.append(Token(TokenType.identifier, accum))
                else:
                    #TBD add identifier to keywords dictionary
                    tokens.append(Token(TokenType.identifier, accum))
            else:
                errorMessage = 'Invalid state {} with this accum {}'
                raise TypeError(errorMessage.format(state, accum))

        if state == State.comment:
            errorMessage = 'Comment was never close {}'
            raise TypeError(errorMessage.format(state))

        tokens.append(Token(TokenType.eof))
        return tokens