def __match__identifier__(self, c): # 匹配标识符(也有可能是关键字) ans = c c = self.pre_read() while c != -1 and (c == '_' or Utils.is_digit(c) or Utils.is_alpha(c)): ans += c c = self.pre_read(ans) if c == -1: return c if c in "$@~": # 不会出现在标识符后面的字符 self.tokens.append( Utils.GetErrorElement(self.lineno, "invalid identifier")) else: ans = ans.lower() self.tokens.append( Utils.StringToKeyWordMap.get(ans) or Utils.StringToIdentifierElement(ans)) return c
def __match_string__(self): # 匹配字符串 t = "" success = True while True: c = self.read() if c == -1 or c == '\n': # 错误,读入下一行 self.tokens.append( Utils.GetErrorElement(self.lineno, "invalid string")) self.readline() success = False break if c != '"': t += c else: break if success: self.tokens.append(Utils.StringToStringLiteral(t)) return self.pre_read()
def __match_number__(self, c): # 匹配数字 success = True num = c base = 10 c = self.pre_read(c) if c == -1: return c if num == '0' and Utils.is_Hex(c): num += c base = 16 c = self.read() elif num == '0' and Utils.is_digit(c): base = 8 while c != -1: if base == 16 and (Utils.is_digit(c) or 'a' < c < 'f' or 'A' < c < 'F'): num += c elif base == 10 and (Utils.is_digit(c) or c == '.'): num += c elif base == 8 and (Utils.is_digit(c) and '0' < c < '9'): num += c else: if Utils.is_alpha(c): success = False break c = self.pre_read() if not success: self.tokens.append( Utils.GetErrorElement(self.lineno, "invalid number")) self.readline() return self.pre_read() if '.' in num and base == 10: # 小数 self.tokens.append(Utils.StringToFloatLiteral(num)) else: # 整数 self.tokens.append(Utils.StringToIntegerLiteral(num, base)) return c
def analyse(self): # 词法分析过程 c = self.read() while c != -1: while True: # 处理非法字符,空白和换行 if c == '\uFFFF': # INVALID CHARACTER return elif c == ' ' or c == '\t': # 空白,跳过 pass elif c == '\n' or c == '\r': # 换行,读入下一行 self.readline() else: # 其他的字符进入匹配 valid = True break c = self.read() if c == -1: return if c == '-': c = self.__match_non_word__(c, ['-', '=']) if c == -1: return valid = False elif c == '+': c = self.__match_non_word__(c, ['+', '=']) if c == -1: return valid = False elif c == '*': c = self.__match_non_word__(c, ['=']) if c == -1: return valid = False elif c == '/': c = self.__match_non_word__(c, ['=', '/']) if c == -1: return valid = False elif c == '|': c = self.__match_non_word__(c, ['|']) if c == -1: return valid = False elif c == '&': c = self.__match_non_word__(c, ['&']) if c == -1: return valid = False elif c == '<': c = self.__match_non_word__(c, ['=', '>', '<']) if c == -1: return valid = False elif c == '>': c = self.__match_non_word__(c, ['=', '>']) if c == -1: return valid = False elif c == ':': c = self.__match_non_word__(c, ['=']) if c == -1: return valid = False elif c == '!': c = self.__match_non_word__(c, ['=']) if c == -1: return valid = False elif c == '=': c = self.__match_non_word__(c, ['=']) if c == -1: return valid = False elif c in ';()[]{},^': c = self.__match_non_word__(c, []) if c == -1: return valid = False elif c == '"': c = self.__match_string__() if c == -1: return valid = False if valid: if Utils.is_digit(c): c = self.__match_number__(c) if c == -1: return elif Utils.is_alpha(c) or c == '_': c = self.__match__identifier__(c) if c == -1: return else: self.tokens.append( Utils.GetErrorElement(self.lineno, "invalid identifier")) self.readline() c = self.pre_read() if c == -1: return c