def restore_tokens(self, file: _io.BytesIO): self.tokens.clear() while True: flag = int.from_bytes(file.read(1), "big") if flag == 0: self.tokens.append(stl.Token((stl.EOF, None))) break else: line = int(stl.read_string(file)) file_name = stl.read_string(file) lf = line, file_name if flag == 1: token: stl.NumToken = stl.NumToken(lf, stl.read_string(file)) elif flag == 2: token: stl.LiteralToken = stl.LiteralToken( lf, stl.read_string(file)) elif flag == 3: token: stl.IdToken = stl.IdToken(lf, stl.read_string(file)) elif flag == 4: token: stl.DocToken = stl.DocToken(lf, stl.read_string(file)) else: raise stl.ParseException("Unknown flag: {}".format(flag)) self.tokens.append(token)
def tokenize(self, source): """ Tokenize the source spl source code into a list of tokens, stored in the memory of this Lexer. :param source: the source code, whether an opened file or a list of lines. :return: None """ self.tokens.clear() if self.import_lang and self.file_name[-7:] != "lang.sp": self.tokens += [ stl.IdToken(LINE_FILE, "import"), stl.IdToken(LINE_FILE, "namespace"), stl.LiteralToken(LINE_FILE, "lang") ] self.find_import(0, 3) if isinstance(source, list): self.tokenize_text(source) else: self.tokenize_file(source)
def proceed_line(self, line: str, line_num: (int, str), in_doc: bool, doc: str) -> (bool, str): """ Tokenize a line. :param line: line to be proceed :param line_num: the line number and the name of source file :param in_doc: whether it is currently in docstring, before proceed this line :param doc: the current doc :return: whether it is currently in docstring, after proceed this line """ in_single = False in_double = False literal = "" non_literal = "" length = len(line) i = -1 while i < length - 1: i += 1 ch = line[i] if not in_double and not in_single: if in_doc: if ch == "*" and i < length - 1 and line[i + 1] == "/": in_doc = False i += 2 continue else: if ch == "/" and i < length - 1 and line[i + 1] == "*": in_doc = True i += 1 if not in_doc: if len(doc) > 0: self.tokens.append(stl.DocToken(line_num, doc[2:])) doc = "" if in_double: if ch == '"': in_double = False self.tokens.append( stl.LiteralToken(line_num, literal, True)) literal = "" continue elif in_single: if ch == "'": in_single = False self.tokens.append( stl.LiteralToken(line_num, literal, False)) literal = "" continue else: if ch == '"': in_double = True self.line_tokenize(non_literal, line_num) non_literal = "" continue elif ch == "'": in_single = True self.line_tokenize(non_literal, line_num) non_literal = "" continue if in_single or in_double: literal += ch else: non_literal += ch if len(non_literal) > 1 and non_literal[-2:] == "//": self.line_tokenize(non_literal[:-2], line_num) non_literal = "" break else: doc += ch if len(non_literal) > 0: self.line_tokenize(non_literal, line_num) return in_doc, doc