Ejemplo n.º 1
0
class Lexer:

    tokens = [
        TokenType.DELIMETERS_ARIT.value, TokenType.IDENTIFIER_ARIT.value,
        TokenType.OPERATORS_ARIT.value, TokenType.TOKEN_NUM_ARIT.value
    ]

    def __init__(self, src):
        self.line = 1
        self.col = 0
        self.fp = open(src, "r")
        self.buffer = self.fp.readline()
        self.buffer_size = len(self.buffer)
        self.dfa = DFA(7, ARIT_GOL[1], ARIT_GOL[0])
        token_list = []

    def hasNextTok(self):
        return True if self.buffer != "" else False

    def getInput(self):
        self.buffer = self.fp.readline()
        if self.buffer != "":
            self.buffer_size = len(self.buffer)
            self.line += 1
            self.col = 0

    #Function used for getting tokens and adding them to the symbol
    def getToken(self):
        self.dfa.current_state = 0
        #If line over, get new line
        if self.buffer_size == self.col + 1:
            self.getInput()
            if not self.hasNextTok():
                return None
        #Current lexeme
        lexeme = ""
        ''' Get characters from the buffer while
        dfa is not in a final state'''
        while True:
            char = self.buffer[self.col]
            #print("Line {0}, col {1} : {2}".format(self.line,self.col,char))
            self.dfa.transition(char)
            if self.dfa.isFinalState():
                break
            lexeme += char
            self.col += 1
        #print(lexeme)
        #Return the match by checking all tokA
        for type in Lexer.tokens:
            for regex in type:
                exp = re.compile(regex)
                if exp.match(lexeme):
                    return Token(type[regex], lexeme, self.line)
        #if it gets here
        Exception()