def run(self): stream = self.context.stream # If the macro tokenizer was called with an unkown opening delimiter sequence, mark it as an error and exit if self.opening_delimiter != self.__class__.OPENING_DELIMITER: yield Tokens.ERROR( self.opening_delimiter, self.opening_delimiter_position, self.opening_delimiter_position_after, "Comment tokenizer called with unknown opening sequence “%s”" % self.opening_delimiter) return stream.push() opening_comment_token = Tokens.BEGIN_MACRO( self.__class__.OPENING_DELIMITER, self.opening_delimiter_position, self.opening_delimiter_position_after) yield opening_comment_token value = "" value_first_position = stream.copy_absolute_position() while True: if stream.next_is_EOF(): yield Tokens.COMMENT(value, value_first_position, stream.copy_absolute_position()) yield Tokens.END_MACRO(opening_comment_token, "", stream.copy_absolute_position(), stream.copy_absolute_position()) stream.pop() return value += stream.read()
def interpolation(context): stream = context.stream readtable = context.readtable if stream.next_is_EOF(): return else: seq, properties = readtable.probe(stream) assert 'type' in properties seq_type = properties.type if seq_type == RT.ISOLATED_CONSTITUENT: yield Tokens.CONSTITUENT(seq, stream.absolute_position_of_unread_seq(seq), stream.copy_absolute_position()) elif seq_type == RT.MACRO: for token in tokenize_macro(context, seq, properties): yield token elif seq_type == RT.CONSTITUENT: first_position = stream.absolute_position_of_unread_seq(seq) concatenation = seq + read_and_concatenate_constituent_sequences(stream, readtable) yield Tokens.CONSTITUENT(concatenation, first_position, stream.copy_absolute_position()) # Step 3 else: context.error = True first_position = stream.absolute_position_of_unread_seq(seq) error_message = properties.error_message if 'error_message' in properties else "Unexpected character '%s' in interpolation." % seq yield Tokens.ERROR(seq, first_position, stream.copy_absolute_position(), error_message)
def run(self): readtable = self.context.readtable stream = self.context.stream """:type : CharacterStream""" self.context.expected_closing_seqs += 1 # emit a BEGIN_MACRO token, and remember it opening_delimiter_token = Tokens.BEGIN_MACRO( self.opening_delimiter, self.opening_delimiter_position, self.opening_delimiter_position_after) yield opening_delimiter_token node_type = None # Stage 2 (Parsing of the form) while True: # 2.1 if stream.next_is_EOF(): self.context.expected_closing_seqs -= 1 yield Tokens.ERROR( self.opening_delimiter, stream.copy_absolute_position(), stream.copy_absolute_position(), "Expected `%s` closing sequence was not found." % self.closing_delimiter) return else: seq, properties = readtable.probe(stream) assert 'type' in properties seq_type = properties.type # 2.1 if seq_type == RT.CLOSING: self.context.expected_closing_seqs -= 1 if seq != self.closing_delimiter: yield Tokens.ERROR( seq, stream.absolute_position_of_unread_seq(seq), stream.copy_absolute_position(), "Expected '%s' closing sequence was not found, `%s` was found instead." % (self.closing_delimiter, seq)) return elif node_type is None: yield Tokens.ERROR( seq, stream.absolute_position_of_unread_seq(seq), stream.copy_absolute_position(), "Empty form/seq.") else: opening_delimiter_token.node_type = node_type # return the delimiter tokenizers to their usual selves self.set_delimiter_tokenizers(readtable, "DelimiterTokenizer", "LispModeTokenizer") yield Tokens.END_MACRO( opening_delimiter_token, seq, stream.absolute_position_of_unread_seq(seq), stream.copy_absolute_position()) return # 2.2 elif seq_type == RT.WHITESPACE: pass # 2.3 elif seq_type == RT.NEWLINE: pass # 2.5 elif seq_type == RT.PUNCTUATION: raise TokenizingError( stream.copy_absolute_position(), "Unexpected punctuation '%s' inside lisp mode." % seq) # yield Tokens.PUNCTUATION(self.last_begin_token, seq, stream.absolute_position_of_unread_seq(seq), stream.copy_absolute_position()) # 2.6 elif seq_type == RT.MACRO: assert 'tokenizer' in properties abs_macro_seq_position = stream.absolute_position_of_unread_seq( seq) abs_macro_seq_position_after = stream.copy_absolute_position( ) TokenizerClass = self.context[properties.tokenizer] assert issubclass(TokenizerClass, Tokenizer) tokenizer = TokenizerClass(self.context, seq, abs_macro_seq_position, abs_macro_seq_position_after) for token in tokenizer.run(): yield token # 2.7 elif seq_type == RT.CONSTITUENT or seq_type == RT.ISOLATED_CONSTITUENT: first_position = stream.absolute_position_of_unread_seq( seq) concatenation = seq + util.read_and_concatenate_constituent_sequences_ignore_isolation( stream, readtable) yield Tokens.CONSTITUENT(concatenation, first_position, stream.copy_absolute_position()) if node_type is None: seq = self.read_non_whitespace_seq(readtable, stream) if seq == ',': node_type = "seq" else: if seq is not None: stream.unread_seq(seq) node_type = "form" elif node_type is "seq": seq = self.read_non_whitespace_seq(readtable, stream) if seq == ',': pass elif seq is None: yield Tokens.ERROR( seq, stream.copy_absolute_position(), stream.copy_absolute_position(), "Expected ',' or ')' inside Lisp-mode seq.") elif seq != self.closing_delimiter: yield Tokens.ERROR( seq, stream.absolute_position_of_unread_seq(seq), stream.copy_absolute_position(), "Expected ',' or '%s' inside Lisp-mode seq, found '%s'." % (self.closing_delimiter, seq)) else: stream.unread_seq(seq) elif node_type is "form": pass else: raise NotImplementedError() # 2.8 elif seq_type == RT.INVALID: first_position = stream.absolute_position_of_unread_seq( seq) error_message = properties.error_message if 'error_message' in properties else "Invalid character found in stream." yield Tokens.ERROR(seq, first_position, stream.copy_absolute_position(), error_message)
def run(self): stream = self.context.stream # If the macro tokenizer was called with an unkown opening delimiter sequence, mark it as an error and exit if self.opening_delimiter != self.__class__.OPENING_DELIMITER: yield Tokens.ERROR( self.opening_delimiter, self.opening_delimiter_position, self.opening_delimiter_position_after, "Comment tokenizer called with unknown opening sequence “%s”" % self.opening_delimiter) return stream.push() seen_escape = False opening_comment_token = Tokens.BEGIN_MACRO( self.__class__.OPENING_DELIMITER, self.opening_delimiter_position, self.opening_delimiter_position_after) yield opening_comment_token value = "" value_first_position = stream.copy_absolute_position() while True: if stream.next_is_EOF(): yield Tokens.COMMENT(value, value_first_position, stream.copy_absolute_position()) yield Tokens.END_MACRO(opening_comment_token, "", stream.copy_absolute_position(), stream.copy_absolute_position()) stream.pop() return char = stream.read() if char == '\\': if seen_escape: value += '\\' else: seen_escape = True else: if seen_escape: if char == self.__class__.CLOSING_DELIMITER: value += self.__class__.CLOSING_DELIMITER elif char == '$': value += '$' else: yield Tokens.COMMENT( value, value_first_position, stream.absolute_position_of_unread()) value = "" value_first_position = stream.copy_absolute_position() yield Tokens.ERROR( char, stream.absolute_position_of_unread(), stream.copy_absolute_position(), "Unknown escape code sequence “%s”." % char) seen_escape = False else: if char == self.__class__.CLOSING_DELIMITER: yield Tokens.COMMENT( value, value_first_position, stream.absolute_position_of_unread()) yield Tokens.END_MACRO( opening_comment_token, self.__class__.CLOSING_DELIMITER, stream.absolute_position_of_unread(), stream.copy_absolute_position()) stream.pop() return elif char == '$': yield Tokens.COMMENT( value, value_first_position, stream.absolute_position_of_unread()) value = "" value_first_position = stream.copy_absolute_position() for token in util.interpolation(self.context): yield token else: value += char
def run(self): stream = self.context.stream readtable = self.context.readtable # If the macro tokenizer was called with an unkown opening delimiter sequence, mark it as an error and exit if self.opening_delimiter not in self.__class__.DELIMITER_PAIRS: yield Tokens.ERROR( self.opening_delimiter, self.opening_delimiter_position, self.opening_delimiter_position_after, "Unregistered delimiter pair, for opening sequence “%s”" % self.opening_delimiter) return opening_delimiter_token = Tokens.BEGIN_MACRO( self.opening_delimiter, self.opening_delimiter_position, self.opening_delimiter_position_after) skip_white_lines(stream, readtable) # If there are no tokens following the opening delimiter sequence if stream.next_is_EOF(): yield Tokens.ERROR( self.opening_delimiter, self.opening_delimiter_position, self.opening_delimiter_position_after, "No characters found after opening delimiter '%s'." % self.opening_delimiter) return self.context.expected_closing_seqs += 1 yield opening_delimiter_token stream.push() tokenizer = self.context.DefaultTokenizer(self.context) for token in tokenizer.run(): yield token stream.pop() skip_white_lines(stream, readtable) if stream.next_is_EOF(): yield Tokens.ERROR( self.opening_delimiter, stream.copy_absolute_position(), stream.copy_absolute_position(), "Expected closing delimiter «%s», matching opening delimiter «%s» at position %s." % (self.closing_delimiter, self.opening_delimiter, self.opening_delimiter_position.nameless_str)) else: seq, properties = readtable.probe(stream) if properties.type == RT.CLOSING: self.context.expected_closing_seqs -= 1 if seq != self.closing_delimiter: yield Tokens.ERROR( seq, stream.absolute_position_of_unread_seq(seq), stream.copy_absolute_position(), "Found `%s`, but expected `%s`." % (seq, self.closing_delimiter)) closing_delimiter_token = Tokens.END_MACRO( opening_delimiter_token, "", stream.copy_absolute_position(), stream.copy_absolute_position()) else: closing_delimiter_token = Tokens.END_MACRO( opening_delimiter_token, seq, stream.absolute_position_of_unread_seq(seq), stream.copy_absolute_position()) self.on_close() yield closing_delimiter_token
def run(self): readtable = self.context.readtable stream = self.context.stream """:type : IndentedCharacterStream""" expected_closing_seqs = self.context.expected_closing_seqs emmit_restart_tokens = self.context.emmit_restart_tokens restart_token_count = 0 # Numbers #.#.# refer to the list in the blog post # https://bloff.github.io/lyc/lexing,/syntax/2015/08/30/lexer-2.html while True: # Stage 1 (Preparation) # Find first non-whitespace, non-newline sequence # make sure that it begins at the first column util.skip_white_lines(stream, readtable) # If we find an EOF, we're done tokenizing the stream if stream.next_is_EOF(): # emmit_hanging_restart_tokens() for _ in range(restart_token_count): yield Tokens.VALID_RESTART_TO( stream.copy_absolute_position()) return # [ Handling closing sequences ] # # If we find an (expected) closing sequence, we're also done # if the sequence was not expected, an error token is emmited seq, properties = readtable.probe(stream) if properties.type == RT.CLOSING: if expected_closing_seqs > 0: stream.unread_seq(seq) # emmit_hanging_restart_tokens() for _ in range(restart_token_count): yield Tokens.VALID_RESTART_TO( stream.copy_absolute_position()) return else: yield Tokens.ERROR( seq, stream.absolute_position_of_unread_seq(seq), stream.copy_absolute_position(), "Unexpected closing sequence `%s`." % seq) # Any other sequence is unread stream.unread_seq(seq) # [ Signaling restart positions ] # # If we are asked to emmit restart-position tokens, which serve to pinpoint locations where the # default tokenizer can safely restart, then we do so, and we keep track of how many such tokens # must be terminated if emmit_restart_tokens and stream.current_relative_position.column == 1 and expected_closing_seqs <= 0: restart_token_count += 1 yield Tokens.VALID_RESTART_FROM( stream.copy_absolute_position()) # [ The first BEGIN token ] # emit a BEGIN token, and remember it self.last_begin_token = Tokens.BEGIN( stream.copy_absolute_position()) yield self.last_begin_token # [ Stage 2 - Parsing of segment's first line ] while True: # 2.1 if stream.next_is_EOF(): yield Tokens.END(self.last_begin_token, stream.copy_absolute_position()) # emmit_hanging_restart_tokens() for _ in range(restart_token_count): yield Tokens.VALID_RESTART_TO( stream.copy_absolute_position()) return else: seq, properties = readtable.probe(stream) assert 'type' in properties seq_type = properties.type # 2.1 if seq_type == RT.CLOSING: if expected_closing_seqs <= 0: yield Tokens.ERROR( seq, stream.absolute_position_of_unread_seq(seq), stream.copy_absolute_position(), "Unexpected closing sequence `%s`." % seq) else: stream.unread_seq(seq) yield Tokens.END(self.last_begin_token, stream.copy_absolute_position()) # emmit_hanging_restart_tokens() for _ in range(restart_token_count): yield Tokens.VALID_RESTART_TO( stream.copy_absolute_position()) return # 2.2 elif seq_type == RT.WHITESPACE: pass # 2.3 elif seq_type == RT.NEWLINE: break # goto Stage 2 # 2.4 elif seq_type == RT.ISOLATED_CONSTITUENT: yield Tokens.CONSTITUENT( seq, stream.absolute_position_of_unread_seq(seq), stream.copy_absolute_position()) # 2.5 elif seq_type == RT.PUNCTUATION: yield Tokens.PUNCTUATION( self.last_begin_token, seq, stream.absolute_position_of_unread_seq(seq), stream.copy_absolute_position()) # 2.6 elif seq_type == RT.MACRO: for token in util.tokenize_macro( self.context, seq, properties): yield token # 2.7 elif seq_type == RT.CONSTITUENT: first_position = stream.absolute_position_of_unread_seq( seq) concatenation = seq + util.read_and_concatenate_constituent_sequences( stream, readtable) yield Tokens.CONSTITUENT( concatenation, first_position, stream.copy_absolute_position()) # 2.8 elif seq_type == RT.INVALID: first_position = stream.absolute_position_of_unread_seq( seq) error_message = properties.error_message if 'error_message' in properties else "Invalid character found in stream." yield Tokens.ERROR(seq, first_position, stream.copy_absolute_position(), error_message) # Stage 3 (Parsing of sub-blocks) W = MAX_INT while True: util.skip_white_lines(stream, readtable) relative_column_number = stream.visual_column # 3.2 if stream.next_is_EOF(): yield Tokens.END(self.last_begin_token, stream.copy_absolute_position()) # emmit_hanging_restart_tokens() for _ in range(restart_token_count): yield Tokens.VALID_RESTART_TO( stream.copy_absolute_position()) return # 3.2.1 if relative_column_number == 1: yield Tokens.END(self.last_begin_token, stream.copy_absolute_position()) # DON'T # emmit_hanging_restart_tokens() break # goto Stage 1 again # 3.2.2 elif relative_column_number > W: seq, properties = readtable.probe(stream) if properties.type == RT.CLOSING: if expected_closing_seqs <= 0: yield Tokens.ERROR( seq, stream.absolute_position_of_unread_seq(seq), stream.copy_absolute_position(), "Unexpected closing sequence `%s`." % seq) else: yield Tokens.END( self.last_begin_token, stream.absolute_position_of_unread_seq(seq)) # emmit_hanging_restart_tokens() for _ in range(restart_token_count): yield Tokens.VALID_RESTART_TO( stream.copy_absolute_position()) stream.unread_seq(seq) return else: raise TokenizingError( stream.absolute_position_of_unread_seq(seq), "Unexpected indentation when parsing sub-blocks.") # 3.2.3 elif relative_column_number < W: yield Tokens.INDENT(self.last_begin_token, stream.copy_absolute_position()) W = relative_column_number # 3.2.4 else: # when relative_column_number == W, finish if the first non-whitespace character is a closing seq seq, properties = readtable.probe(stream) if properties.type == RT.CLOSING: if expected_closing_seqs <= 0: yield Tokens.ERROR( seq, stream.absolute_position_of_unread_seq(seq), stream.copy_absolute_position(), "Unexpected closing sequence `%s`." % seq) else: yield Tokens.END( self.last_begin_token, stream.absolute_position_of_unread_seq(seq)) # emmit_hanging_restart_tokens() for _ in range(restart_token_count): yield Tokens.VALID_RESTART_TO( stream.copy_absolute_position()) stream.unread_seq(seq) return else: stream.unread_seq(seq) # 3.3 self.context.stream.push() tokenizer = self.context.DefaultTokenizer(self.context) for token in tokenizer.run(): yield token self.context.stream.pop()
def run(self): stream = self.context.stream readtable = self.context.readtable if stream.next_is_EOF(): yield Tokens.ERROR( self.__class__.MY_OPENING_DELIMITER, self.opening_delimiter_position, self.opening_delimiter_position_after, "No characters found after opening delimiter %s." % repr(self.__class__.MY_OPENING_DELIMITER)) return stream.push() seen_escape = False opening_string_token = Tokens.BEGIN_MACRO( self.__class__.MY_OPENING_DELIMITER, self.opening_delimiter_position, self.opening_delimiter_position_after) yield opening_string_token value = "" value_first_position = stream.copy_absolute_position() while True: if stream.next_is_EOF(): stream.pop() if self.__class__.ALLOW_RUNOFF_CLOSING_DELIMITER: position_before_skipping_white_lines = stream.copy_absolute_position( ) skip_white_lines(stream, readtable) position_before_attempting_to_read_k_chars = stream.copy_absolute_position( ) k_chars = stream.readn( self.__class__.MY_CLOSING_DELIMITER_LENGTH) if k_chars != self.__class__.MY_CLOSING_DELIMITER: yield Tokens.ERROR( k_chars, position_before_attempting_to_read_k_chars, stream.copy_absolute_position(), "Expected closing string-delimiter «%s», matching opening delimiter «%s» at position %s.%s" % (self.__class__.MY_CLOSING_DELIMITER, self.__class__.MY_OPENING_DELIMITER, self.opening_delimiter_position.nameless_str, "" if k_chars is None else " Found " + repr(k_chars))) return else: value += '\n' yield Tokens.STRING( value, value_first_position, position_before_skipping_white_lines) yield Tokens.END_MACRO( opening_string_token, self.__class__.MY_CLOSING_DELIMITER, stream.absolute_position_of_unread(), stream.copy_absolute_position()) return else: yield Tokens.ERROR( "", stream.copy_absolute_position(), stream.copy_absolute_position(), "Expected closing string-delimiter «%s», matching opening delimiter «%s» at position %s." % (self.__class__.MY_CLOSING_DELIMITER, self.__class__.MY_OPENING_DELIMITER, self.opening_delimiter_position.nameless_str)) return char = stream.read() if char == '\\': if seen_escape: value += '\\' seen_escape = False else: seen_escape = True else: if seen_escape: if char in self.__class__.MY_ESCAPE_CHARS: value += self.__class__.MY_ESCAPE_CHARS[char] elif char == self.__class__.MY_INTERPOL_CHAR: value += char elif char == self.__class__.MY_CLOSING_DELIMITER: value += char else: yield Tokens.STRING( value, value_first_position, stream.absolute_position_of_unread()) value = "" value_first_position = stream.copy_absolute_position() yield Tokens.ERROR( char, stream.absolute_position_of_unread(), stream.copy_absolute_position(), "Unknown escape code sequence “%s”." % char) seen_escape = False else: if char == self.MY_INTERPOL_CHAR: yield Tokens.STRING( value, value_first_position, stream.absolute_position_of_unread()) value = "" value_first_position = stream.copy_absolute_position() for token in util.interpolation(self.context): yield token else: value += char last_k_chars = value[-self.__class__. MY_CLOSING_DELIMITER_LENGTH:] if last_k_chars == self.__class__.MY_CLOSING_DELIMITER: value = value[:-self.__class__. MY_CLOSING_DELIMITER_LENGTH] closing_delimiter_first_position = stream.absolute_position_of_unread( self.__class__.MY_CLOSING_DELIMITER_LENGTH) yield Tokens.STRING( value, value_first_position, closing_delimiter_first_position) yield Tokens.END_MACRO( opening_string_token, self.__class__.MY_CLOSING_DELIMITER, closing_delimiter_first_position, stream.copy_absolute_position()) stream.pop() return stream.pop()