def get_name(kind): """ used in reporting errors in match() on parsing stage """ if (kind >= 0 and kind < len(Token.token_types)): return Token.token_types[kind] raise ScannerException("Index out of bounds. Unknown token " + str(kind)) return None
def match(self,tokval): """ if match return value of token """ if self.peek().kind != tokval: ##print self raise ScannerException("cannot find token "+ \ Token.get_name(tokval) + " got " \ + str(self.peek()) \ + " instead!") return self.dequeue()
def get_lexeme(self, chunks, pos): if chunks == None: return None if (self.debug): print("chunks", chunks) if chunks == "பதிப்பி": tval = EzhilLexeme(chunks, EzhilToken.PRINT) elif chunks == "தேர்ந்தெடு": tval = EzhilLexeme(chunks, EzhilToken.SWITCH) elif chunks == "தேர்வு": tval = EzhilLexeme(chunks, EzhilToken.CASE) elif chunks == "ஏதேனில்": tval = EzhilLexeme(chunks, EzhilToken.OTHERWISE) elif chunks == "ஆனால்": tval = EzhilLexeme(chunks, EzhilToken.IF) elif chunks == "இல்லைஆனால்": tval = EzhilLexeme(chunks, EzhilToken.ELSEIF) elif chunks == "இல்லை": tval = EzhilLexeme(chunks, EzhilToken.ELSE) elif chunks == "ஆக": tval = EzhilLexeme(chunks, EzhilToken.FOR) elif chunks == "ஒவ்வொன்றாக": tval = EzhilLexeme(chunks, EzhilToken.FOREACH) elif chunks == "இல்": tval = EzhilLexeme(chunks, EzhilToken.COMMA) elif chunks == "வரை": tval = EzhilLexeme(chunks, EzhilToken.WHILE) elif chunks == "செய்": tval = EzhilLexeme(chunks, EzhilToken.DO) elif chunks == "முடியேனில்": tval = EzhilLexeme(chunks, EzhilToken.DOWHILE) elif chunks == "பின்கொடு": tval = EzhilLexeme(chunks, EzhilToken.RETURN) elif chunks == "முடி": tval = EzhilLexeme(chunks, EzhilToken.END) elif chunks == "நிரல்பாகம்": tval = EzhilLexeme(chunks, EzhilToken.DEF) elif chunks == "தொடர்": tval = EzhilLexeme(chunks, EzhilToken.CONTINUE) elif chunks == "நிறுத்து": tval = EzhilLexeme(chunks, EzhilToken.BREAK) elif chunks == "@": tval = EzhilLexeme(chunks, EzhilToken.ATRATEOF) elif chunks == "=": tval = EzhilLexeme(chunks, EzhilToken.EQUALS) elif chunks == "-": tval = EzhilLexeme(chunks, EzhilToken.MINUS) elif chunks == "+": tval = EzhilLexeme(chunks, EzhilToken.PLUS) elif chunks == ">": tval = EzhilLexeme(chunks, EzhilToken.GT) elif chunks == "<": tval = EzhilLexeme(chunks, EzhilToken.LT) elif chunks == ">=": tval = EzhilLexeme(chunks, EzhilToken.GTEQ) elif chunks == "<=": tval = EzhilLexeme(chunks, EzhilToken.LTEQ) elif chunks == "==": tval = EzhilLexeme(chunks, EzhilToken.EQUALITY) elif chunks == "!=": tval = EzhilLexeme(chunks, EzhilToken.NEQ) elif chunks == "*": tval = EzhilLexeme(chunks, EzhilToken.PROD) elif chunks == "/": tval = EzhilLexeme(chunks, EzhilToken.DIV) elif chunks == ",": tval = EzhilLexeme(chunks, EzhilToken.COMMA) elif chunks == "(": tval = EzhilLexeme(chunks, EzhilToken.LPAREN) elif chunks == ")": tval = EzhilLexeme(chunks, EzhilToken.RPAREN) elif chunks == "[": tval = EzhilLexeme(chunks, EzhilToken.LSQRBRACE) elif chunks == "]": tval = EzhilLexeme(chunks, EzhilToken.RSQRBRACE) elif chunks == "{": tval = Lexeme(chunks, Token.LCURLBRACE) elif chunks == "}": tval = Lexeme(chunks, Token.RCURLBRACE) elif chunks == ":": tval = Lexeme(chunks, Token.COLON) elif chunks == "%": tval = EzhilLexeme(chunks, EzhilToken.MOD) elif chunks == "^": tval = EzhilLexeme(chunks, EzhilToken.EXP) elif chunks == "&&": tval = Lexeme(chunks, EzhilToken.LOGICAL_AND) elif chunks == "&": tval = Lexeme(chunks, EzhilToken.BITWISE_AND) elif chunks == "||": tval = Lexeme(chunks, EzhilToken.LOGICAL_OR) elif chunks == "|": tval = Lexeme(chunks, EzhilToken.BITWISE_OR) elif chunks == "!": tval = Lexeme(chunks, EzhilToken.LOGICAL_NOT) elif (chunks[0] == "\"" and chunks[-1] == "\""): tval = EzhilLexeme(chunks[1:-1], EzhilToken.STRING) elif isdigit(chunks[0]) or chunks[0] == '+' or chunks[0] == '-': #tval=EzhilLexeme(float(chunks),EzhilToken.NUMBER) # deduce a float or integer if (chunks.find('.') >= 0 or chunks.find('e') >= 0 or chunks.find('E') >= 0): tval = EzhilLexeme(float(chunks), EzhilToken.NUMBER) else: tval = EzhilLexeme(int(chunks), EzhilToken.NUMBER) elif isalpha(chunks[0]) or has_tamil(chunks) or chunks[0] == '_': ## check for tamil/english/mixed indentifiers even starting with a lead '_' tval = EzhilLexeme(chunks, EzhilToken.ID) else: raise ScannerException("Lexical error: " + str(chunks) + " at Line , Col " + str(self.get_line_col(pos)) + " in file " + self.fname) [l, c] = self.get_line_col(pos) tval.set_line_col([l, c]) tval.set_file_name(self.fname) self.tokens.append(tval) if (self.debug): print("Lexer token = ", str(tval)) return l
def tokenize(self, data=None): """ do hard-work of tokenizing and put EzhilLexemes into the tokens[] Q """ if (self.stdin_mode): if (self.debug): print(self.tokens) ## cleanup the Q for stdin_mode of any EOF that can remain. if (len(self.tokens) != 0): self.match(EzhilToken.EOF) if (len(self.tokens) != 0): raise ScannerException( "Lexer: token Q has previous session tokens ") self.tokens = list() else: data = "".join(self.File.readlines()) if (self.debug): print(data) idx = 0 tok_start_idx = 0 while (idx < len(data)): c = data[idx] if (c == ' ' or c == '\t' or c == '\n'): if (c == '\n'): ##actual col = idx - col_idx self.update_line_col(idx) idx = idx + 1 elif (c == '\r'): idx = idx + 1 continue elif (c == '#'): ## single line skip comments like Python/Octave start = idx while (idx < len(data) and not (data[idx] in ['\r', '\n'])): idx = idx + 1 if (data[idx] == '\r'): idx = idx + 1 end = idx self.comments[self.line] = data[start:end] elif (isdigit(c)): #or c == '+' or c == '-' ): num = c tok_start_idx = idx idx = idx + 1 ## FIXME: this prevents you from +.xyz, or -.xyz use 0.xyz ## instead. also may throw an error if we exceed ## buffer-length. if (c in ['+', '-'] and (idx < len(data)) and not isdigit(data[idx])): self.get_lexeme(c, idx) continue in_sci_notation = False while ((idx < len(data)) and (isdigit(data[idx]) or data[idx] in ['+', '-', 'e', 'E', '.'])): if (data[idx] in ['+', '-'] and not in_sci_notation): break elif (data[idx] in ['e', 'E']): in_sci_notation = True num = num + data[idx] idx = idx + 1 self.get_lexeme(num, tok_start_idx) elif (c == "\""): tok_start_idx = idx s = c idx = idx + 1 while (idx < len(data) and (data[idx] != '\"')): if (data[idx] == '\\'): idx = idx + 1 if (data[idx] == 'n'): s = s + '\n' elif (data[idx] == 't'): s = s + '\t' else: s = s + data[idx] else: s = s + data[idx] idx = idx + 1 s = s + data[idx] idx = idx + 1 self.get_lexeme(s, tok_start_idx) elif (istamil(c) or isalpha(c) or c == '_'): tok_start_idx = idx s = c idx = idx + 1 while ( (idx < len(data)) and (not data[idx] in EzhilToken.FORBIDDEN_FOR_IDENTIFIERS)): s = s + data[idx] idx = idx + 1 self.get_lexeme(s, tok_start_idx) elif (c in self.unary_binary_ops): tok_start_idx = idx if (len(data) > (1 + idx) and data[idx + 1] in ['=', '|', '&']): c = c + data[idx + 1] idx = idx + 1 self.get_lexeme(c, tok_start_idx) idx = idx + 1 elif c == ";": # treat as newline idx = idx + 1 continue else: tok_start_idx = idx idx = idx + 1 self.get_lexeme(c, tok_start_idx) tok_start_idx = idx ## close the file if not stdin_mode if (not self.stdin_mode): self.File.close() ## and manually add an EOF statement. eof_tok = EzhilLexeme("", EzhilToken.EOF) eof_tok.set_line_col(self.get_line_col(tok_start_idx)) self.tokens.append(eof_tok) if (self.debug): print("before reverse") self.dump_tokens() self.tokens.reverse() if (self.debug): print("after reverse") self.dump_tokens() return
def peek(self): """ remove Lexeme from the head of Q""" if len(self.tokens) == 0: raise ScannerException("tokens[] queue is empty ") ##print "**> PEEK-ing, ",self.tokens[-1] return self.tokens[-1]
def tokenize(self,data=None): """ do hard-work of tokenizing and put Lexemes into the tokens[] Q """ if ( self.stdin_mode ): if ( self.debug ): print(self.tokens) ## cleanup the Q for stdin_mode of any EOF that can remain. if ( len(self.tokens) != 0 ): self.match( Token.EOF ) if( len(self.tokens) != 0 ): raise ScannerException("Lexer: token Q has previous session tokens ") self.tokens = list() else: data = "".join(self.File.readlines()) idx = 0 tok_start_idx = 0 while ( idx < len( data ) ): c = data[idx] if ( c == ' 'or c == '\t' or c == '\n' ): if ( c == '\n' ): ##actual col = idx - col_idx self.update_line_col(idx) idx = idx + 1 elif ( c == '#' ): ## single line skip comments like Python/Octave while ( idx < len( data ) and data[idx] !='\n' ): idx = idx + 1 elif ( isdigit(c) or c == '+' or c == '-' ): num = c tok_start_idx = idx idx = idx + 1 ## FIXME: this prevents you from +.xyz, or -.xyz use 0.xyz ## instead. also may throw an error if we exceed ## buffer-length. if ( c in ['+','-'] and ( idx < len( data ) ) and not isdigit(data[idx]) ): self.get_lexeme( c , idx ) continue while ( ( idx < len( data) ) and ( isdigit(data[idx]) or data[idx] == '.') ): num = num + data[idx] idx = idx + 1 self.get_lexeme( num , tok_start_idx ) elif ( c == "\"" ): tok_start_idx = idx s = c idx = idx + 1 while ( idx < len( data ) and ( data[idx] != '\"' ) ): s = s + data[idx] if ( data[idx] == '\\' ): idx = idx + 1 idx = idx + 1 s = s+data[idx] idx = idx + 1 self.get_lexeme( s , tok_start_idx ) elif ( isalpha( c ) ): tok_start_idx = idx s = c idx = idx + 1 while ( ( idx < len( data ) ) and ( isalpha(data[idx]) or isdigit( data[idx] ) or data[idx] in [ "\"", "_" ] ) ): s = s + data[idx] idx = idx + 1 self.get_lexeme( s , tok_start_idx ) elif ( c in self.unary_binary_ops ): tok_start_idx = idx if ( len(data) > ( 1 + idx ) and data[idx+1] in ['=','|','&'] ): c = c +data[idx+1] idx = idx + 1 self.get_lexeme( c , tok_start_idx ) idx = idx + 1 else: tok_start_idx = idx idx = idx + 1 self.get_lexeme( c , tok_start_idx ) tok_start_idx = idx ## close the file if not stdin_mode if ( not self.stdin_mode ): self.File.close() ## and manually add an EOF statement. eof_tok = Lexeme("",Token.EOF ) eof_tok.set_line_col( self.get_line_col( tok_start_idx ) ) self.tokens.append( eof_tok ) self.tokens.reverse() return
def get_lexeme(self,chunks , pos): if chunks == None: return None if chunks == "print": tval=Lexeme(chunks,Token.PRINT) elif chunks == "if": tval = Lexeme( chunks, Token.IF ) elif chunks == "elseif": tval = Lexeme( chunks, Token.ELSEIF ) elif chunks == "else": tval = Lexeme( chunks, Token.ELSE ) elif chunks == "for": tval = Lexeme( chunks, Token.FOR ) elif chunks == "while": tval = Lexeme( chunks, Token.WHILE ) elif chunks == "do": tval = Lexeme( chunks, Token.DO ) elif chunks == "return": tval=Lexeme(chunks,Token.RETURN) elif chunks == "end": tval=Lexeme(chunks,Token.END) elif chunks == "def": tval=Lexeme(chunks,Token.DEF) elif chunks == "continue": tval=Lexeme(chunks,Token.CONTINUE) elif chunks == "break": tval=Lexeme(chunks,Token.BREAK) elif chunks == "=": tval=Lexeme(chunks,Token.EQUALS) elif chunks == "-": tval=Lexeme(chunks,Token.MINUS) elif chunks == "+": tval=Lexeme(chunks,Token.PLUS) elif chunks == ">": tval=Lexeme(chunks,Token.GT) elif chunks == "<": tval=Lexeme(chunks,Token.LT) elif chunks == ">=": tval=Lexeme(chunks,Token.GTEQ) elif chunks == "<=": tval=Lexeme(chunks,Token.LTEQ) elif chunks == "==": tval=Lexeme(chunks,Token.EQUALITY) elif chunks == "!=": tval=Lexeme(chunks,Token.NEQ) elif chunks == "*": tval=Lexeme(chunks,Token.PROD) elif chunks == "/": tval=Lexeme(chunks,Token.DIV) elif chunks == ",": tval=Lexeme(chunks,Token.COMMA) elif chunks == "(": tval=Lexeme(chunks,Token.LPAREN) elif chunks == ")": tval=Lexeme(chunks,Token.RPAREN) elif chunks == "[": tval=Lexeme(chunks,Token.LSQRBRACE) elif chunks == "]": tval=Lexeme(chunks,Token.RSQRBRACE) elif chunks == "{": tval=Lexeme(chunks,Token.LCURLBRACE) elif chunks == "}": tval=Lexeme(chunks,Token.RCURLBRACE) elif chunks == ":": tval=Lexeme(chunks,Token.COLON) elif chunks == "%": tval=Lexeme(chunks,Token.MOD) elif chunks == "^": tval=Lexeme(chunks,Token.EXP) elif chunks == "&&": tval=Lexeme(chunks,Token.LOGICAL_AND) elif chunks == "&": tval=Lexeme(chunks,Token.BITWISE_AND) elif chunks == "||": tval=Lexeme(chunks,Token.LOGICAL_OR) elif chunks == "|": tval=Lexeme(chunks,Token.BITWISE_OR) elif ( chunks[0] == "\"" and chunks[-1] == "\"" ): tval = Lexeme( chunks[1:-1], Token.STRING ) elif isdigit(chunks[0]) or chunks[0]=='+' or chunks[0]=='-': # deduce a float or integer if ( chunks.find('.') >= 0 or chunks.find('e') >= 0 or chunks.find('E') >= 0 ): tval=Lexeme(float(chunks),Token.NUMBER) else: tval=Lexeme(int(chunks),Token.NUMBER) elif isalpha(chunks[0]): tval=Lexeme(chunks,Token.ID) else: raise ScannerException("Lexical error: " + str(chunks) + " at Line , Col "+str(self.get_line_col( pos )) +" in file "+self.fname ) [l,c]=self.get_line_col( pos ) tval.set_line_col( [l,c] ) tval.set_file_name( self.fname ) self.tokens.append( tval ) return l
def get_lexeme(self, chunks, pos): if (self.debug): print(u"get_lexeme", chunks, pos) if chunks == None: return None if chunks == u"பதிப்பி": tval = EzhilLexeme(chunks, EzhilToken.PRINT) elif chunks == u"தேர்ந்தெடு": tval = EzhilLexeme(chunks, EzhilToken.SWITCH) elif chunks == u"தேர்வு": tval = EzhilLexeme(chunks, EzhilToken.CASE) elif chunks == u"ஏதேனில்": tval = EzhilLexeme(chunks, EzhilToken.OTHERWISE) elif chunks == u"ஆனால்": tval = EzhilLexeme(chunks, EzhilToken.IF) elif chunks == u"இல்லைஆனால்": tval = EzhilLexeme(chunks, EzhilToken.ELSEIF) elif chunks == u"இல்லை": tval = EzhilLexeme(chunks, EzhilToken.ELSE) elif chunks == u"ஆக": tval = EzhilLexeme(chunks, EzhilToken.FOR) elif chunks == u"ஒவ்வொன்றாக": tval = EzhilLexeme(chunks, EzhilToken.FOREACH) elif chunks == u"இல்": tval = EzhilLexeme(chunks, EzhilToken.COMMA) elif chunks == u"வரை": tval = EzhilLexeme(chunks, EzhilToken.WHILE) elif chunks == u"செய்": tval = EzhilLexeme(chunks, EzhilToken.DO) elif chunks == u"முடியேனில்": tval = EzhilLexeme(chunks, EzhilToken.DOWHILE) elif chunks == u"பின்கொடு": tval = EzhilLexeme(chunks, EzhilToken.RETURN) elif chunks == u"முடி": tval = EzhilLexeme(chunks, EzhilToken.END) elif chunks == u"நிரல்பாகம்": tval = EzhilLexeme(chunks, EzhilToken.DEF) elif chunks == u"தொடர்": tval = EzhilLexeme(chunks, EzhilToken.CONTINUE) elif chunks == u"நிறுத்து": tval = EzhilLexeme(chunks, EzhilToken.BREAK) elif chunks == u"@": tval = EzhilLexeme(chunks, EzhilToken.ATRATEOF) elif chunks == u"=": tval = EzhilLexeme(chunks, EzhilToken.EQUALS) elif chunks == u"-": tval = EzhilLexeme(chunks, EzhilToken.MINUS) elif chunks == u"+": tval = EzhilLexeme(chunks, EzhilToken.PLUS) elif chunks == u">": tval = EzhilLexeme(chunks, EzhilToken.GT) elif chunks == u"<": tval = EzhilLexeme(chunks, EzhilToken.LT) elif chunks == u">=": tval = EzhilLexeme(chunks, EzhilToken.GTEQ) elif chunks == u"<=": tval = EzhilLexeme(chunks, EzhilToken.LTEQ) elif chunks == u"==": tval = EzhilLexeme(chunks, EzhilToken.EQUALITY) elif chunks == u"!=": tval = EzhilLexeme(chunks, EzhilToken.NEQ) elif chunks == u"*": tval = EzhilLexeme(chunks, EzhilToken.PROD) elif chunks == u"/": tval = EzhilLexeme(chunks, EzhilToken.DIV) elif chunks == u",": tval = EzhilLexeme(chunks, EzhilToken.COMMA) elif chunks == u"(": tval = EzhilLexeme(chunks, EzhilToken.LPAREN) elif chunks == u")": tval = EzhilLexeme(chunks, EzhilToken.RPAREN) elif chunks == u"[": tval = EzhilLexeme(chunks, EzhilToken.LSQRBRACE) elif chunks == u"]": tval = EzhilLexeme(chunks, EzhilToken.RSQRBRACE) elif chunks == u"{": tval = Lexeme(chunks, Token.LCURLBRACE) elif chunks == u"}": tval = Lexeme(chunks, Token.RCURLBRACE) elif chunks == u":": tval = Lexeme(chunks, Token.COLON) elif chunks == u"%": tval = EzhilLexeme(chunks, EzhilToken.MOD) elif chunks == u"^": tval = EzhilLexeme(chunks, EzhilToken.EXP) elif chunks == u"&&": tval = Lexeme(chunks, EzhilToken.LOGICAL_AND) elif chunks == u"&": tval = Lexeme(chunks, EzhilToken.BITWISE_AND) elif chunks == u"||": tval = Lexeme(chunks, EzhilToken.LOGICAL_OR) elif chunks == u"|": tval = Lexeme(chunks, EzhilToken.BITWISE_OR) elif chunks == u"!": tval = Lexeme(chunks, EzhilToken.LOGICAL_NOT) elif (chunks[0] == u"\"" and chunks[-1] == u"\""): tval = EzhilLexeme(chunks[1:-1], EzhilToken.STRING) elif chunks[0].isdigit() or chunks[0] == '+' or chunks[0] == '-': #tval=EzhilLexeme(float(chunks),EzhilToken.NUMBER) # deduce a float or integer if (chunks.find(u'.') >= 0 or chunks.find(u'e') >= 0 or chunks.find(u'E') >= 0): tval = EzhilLexeme(float(chunks), EzhilToken.NUMBER) else: tval = EzhilLexeme(int(chunks), EzhilToken.NUMBER) else: ## check for tamil/english/mixed indentifiers even starting with a lead '_' match_obj = re.match(EzhilToken.RE_ALPHA_NUMERIC_, chunks) if match_obj: if len(match_obj.group(0)) != len(chunks): raise ScannerException( u"Lexical error: Invalid identifier name '" + unicode(chunks) + u"' at Line , Col " + unicode(self.get_line_col(pos)) + u" in file " + self.fname) tval = EzhilLexeme(chunks, EzhilToken.ID) else: raise ScannerException(u"Lexical error: " + unicode(chunks) + u" at Line , Col " + unicode(self.get_line_col(pos)) + u" in file " + self.fname) [l, c] = self.get_line_col(pos) tval.set_line_col([l, c]) tval.set_file_name(self.fname) self.tokens.append(tval) if (self.debug): print(u"Lexer token = ", tval) return l
def tokenize(self, data=None): """ do hard-work of tokenizing and put EzhilLexemes into the tokens[] Q """ if (self.debug): print(u"Start of Ezhil lexer - begin tokenize") if (self.stdin_mode): if (self.debug): print(self.tokens) ## cleanup the Q for stdin_mode of any EOF that can remain. if (len(self.tokens) != 0): self.match(EzhilToken.EOF) if (len(self.tokens) != 0): raise ScannerException( "Lexer: token Q has previous session tokens ") self.tokens = list() else: if hasattr(self.File, 'data'): data = self.File.data else: data = u"".join(self.File.readlines()) if (self.debug): print(data) idx = 0 tok_start_idx = 0 while (idx < len(data)): c = data[idx] if (self.debug): print(idx, c) if (istamil(c) or c.isalpha() or c == u'_'): tok_start_idx = idx s = c idx = idx + 1 while ((idx < len(data)) and self.is_allowed_for_identifier(data[idx])): s = s + data[idx] idx = idx + 1 if idx < len(data) and not data[idx].isspace(): if data[idx] in ['#', '$', '@', '\'', '"']: raise ScannerException( "Lexer: token %s is not valid for identifier, with prefix %s" % (data[idx], s)) self.get_lexeme(s, tok_start_idx) elif (c.isspace()): # or c in u' 'or c == u'\t' or c == u'\n' if (c == u'\n'): ##actual col = idx - col_idx self.update_line_col(idx) idx = idx + 1 elif (c == u'\r'): idx = idx + 1 continue elif (c == u'#'): ## single line skip comments like Python/Octave start = idx while (idx < len(data) and not (data[idx] in [u'\r', u'\n'])): idx = idx + 1 if (idx < len(data) and data[idx] == u'\r'): idx = idx + 1 end = idx self.comments[self.line] = data[start:end] elif (c.isdigit()): #or c == '+' or c == '-' ): num = c tok_start_idx = idx idx = idx + 1 ## FIXME: this prevents you from +.xyz, or -.xyz use 0.xyz ## instead. also may throw an error if we exceed ## buffer-length. if (c in [u'+', u'-'] and (idx < len(data)) and not data[idx].isdigit()): self.get_lexeme(c, idx) continue in_sci_notation = False while ((idx < len(data)) and (data[idx].isdigit() or data[idx] in [u'+', u'-', u'e', u'E', u'.'])): if (data[idx] in [u'+', u'-'] and not in_sci_notation): break elif (data[idx] in [u'e', u'E']): in_sci_notation = True num = num + data[idx] idx = idx + 1 self.get_lexeme(num, tok_start_idx) elif (c == u"\""): tok_start_idx = idx s = c idx = idx + 1 while (idx < len(data) and (data[idx] != u'\"')): if (data[idx] == u'\\'): idx = idx + 1 if (data[idx] == u'n'): s = s + u'\n' elif (data[idx] == u't'): s = s + u'\t' else: s = s + data[idx] else: s = s + data[idx] idx = idx + 1 s = s + data[idx] idx = idx + 1 self.get_lexeme(s, tok_start_idx) elif (c in self.unary_binary_ops): tok_start_idx = idx if (len(data) > (1 + idx) and data[idx + 1] in [u'=', u'|', u'&']): c = c + data[idx + 1] idx = idx + 1 self.get_lexeme(c, tok_start_idx) idx = idx + 1 elif c == u";": # treat as newline idx = idx + 1 continue else: tok_start_idx = idx idx = idx + 1 self.get_lexeme(c, tok_start_idx) tok_start_idx = idx ## close the file if not stdin_mode if (not self.stdin_mode): self.File.close() ## and manually add an EOF statement. eof_tok = EzhilLexeme("", EzhilToken.EOF) eof_tok.set_line_col(self.get_line_col(tok_start_idx)) self.tokens.append(eof_tok) if (self.debug): print(u"before reverse") self.dump_tokens() self.tokens.reverse() if (self.debug): print(u"after reverse") self.dump_tokens() return