Exemple #1
0
    def run(self):
        stream = self.context.stream

        # If the macro tokenizer was called with an unkown opening delimiter sequence, mark it as an error and exit
        if self.opening_delimiter != self.__class__.OPENING_DELIMITER:
            yield Tokens.ERROR(
                self.opening_delimiter, self.opening_delimiter_position,
                self.opening_delimiter_position_after,
                "Comment tokenizer called with unknown opening sequence “%s”" %
                self.opening_delimiter)
            return

        stream.push()
        opening_comment_token = Tokens.BEGIN_MACRO(
            self.__class__.OPENING_DELIMITER, self.opening_delimiter_position,
            self.opening_delimiter_position_after)
        yield opening_comment_token

        value = ""
        value_first_position = stream.copy_absolute_position()
        while True:
            if stream.next_is_EOF():
                yield Tokens.COMMENT(value, value_first_position,
                                     stream.copy_absolute_position())
                yield Tokens.END_MACRO(opening_comment_token, "",
                                       stream.copy_absolute_position(),
                                       stream.copy_absolute_position())
                stream.pop()
                return
            value += stream.read()
Exemple #2
0
def interpolation(context):
    stream = context.stream
    readtable = context.readtable

    if stream.next_is_EOF():
        return
    else:
        seq, properties = readtable.probe(stream)

        assert 'type' in properties
        seq_type = properties.type

        if seq_type == RT.ISOLATED_CONSTITUENT:
            yield Tokens.CONSTITUENT(seq, stream.absolute_position_of_unread_seq(seq), stream.copy_absolute_position())
        elif seq_type == RT.MACRO:
            for token in tokenize_macro(context, seq, properties):
                yield token
        elif seq_type == RT.CONSTITUENT:
            first_position = stream.absolute_position_of_unread_seq(seq)
            concatenation = seq + read_and_concatenate_constituent_sequences(stream, readtable)
            yield Tokens.CONSTITUENT(concatenation, first_position, stream.copy_absolute_position())

        # Step 3
        else:
            context.error = True
            first_position = stream.absolute_position_of_unread_seq(seq)
            error_message = properties.error_message if 'error_message' in properties else "Unexpected character '%s' in interpolation." % seq
            yield Tokens.ERROR(seq, first_position, stream.copy_absolute_position(), error_message)
Exemple #3
0
    def run(self):
        readtable = self.context.readtable
        stream = self.context.stream
        """:type : CharacterStream"""

        self.context.expected_closing_seqs += 1

        # emit a BEGIN_MACRO token, and remember it
        opening_delimiter_token = Tokens.BEGIN_MACRO(
            self.opening_delimiter, self.opening_delimiter_position,
            self.opening_delimiter_position_after)
        yield opening_delimiter_token

        node_type = None

        # Stage 2 (Parsing of the form)
        while True:

            # 2.1
            if stream.next_is_EOF():
                self.context.expected_closing_seqs -= 1
                yield Tokens.ERROR(
                    self.opening_delimiter, stream.copy_absolute_position(),
                    stream.copy_absolute_position(),
                    "Expected `%s` closing sequence was not found." %
                    self.closing_delimiter)
                return
            else:
                seq, properties = readtable.probe(stream)

                assert 'type' in properties
                seq_type = properties.type

                # 2.1
                if seq_type == RT.CLOSING:
                    self.context.expected_closing_seqs -= 1
                    if seq != self.closing_delimiter:
                        yield Tokens.ERROR(
                            seq, stream.absolute_position_of_unread_seq(seq),
                            stream.copy_absolute_position(),
                            "Expected '%s' closing sequence was not found, `%s` was found instead."
                            % (self.closing_delimiter, seq))
                        return
                    elif node_type is None:
                        yield Tokens.ERROR(
                            seq, stream.absolute_position_of_unread_seq(seq),
                            stream.copy_absolute_position(), "Empty form/seq.")
                    else:
                        opening_delimiter_token.node_type = node_type
                        # return the delimiter tokenizers to their usual selves
                        self.set_delimiter_tokenizers(readtable,
                                                      "DelimiterTokenizer",
                                                      "LispModeTokenizer")
                        yield Tokens.END_MACRO(
                            opening_delimiter_token, seq,
                            stream.absolute_position_of_unread_seq(seq),
                            stream.copy_absolute_position())
                        return
                # 2.2
                elif seq_type == RT.WHITESPACE:
                    pass
                # 2.3
                elif seq_type == RT.NEWLINE:
                    pass
                # 2.5
                elif seq_type == RT.PUNCTUATION:
                    raise TokenizingError(
                        stream.copy_absolute_position(),
                        "Unexpected punctuation '%s' inside lisp mode." % seq)

                    # yield Tokens.PUNCTUATION(self.last_begin_token, seq, stream.absolute_position_of_unread_seq(seq), stream.copy_absolute_position())
                # 2.6
                elif seq_type == RT.MACRO:
                    assert 'tokenizer' in properties
                    abs_macro_seq_position = stream.absolute_position_of_unread_seq(
                        seq)
                    abs_macro_seq_position_after = stream.copy_absolute_position(
                    )

                    TokenizerClass = self.context[properties.tokenizer]
                    assert issubclass(TokenizerClass, Tokenizer)
                    tokenizer = TokenizerClass(self.context, seq,
                                               abs_macro_seq_position,
                                               abs_macro_seq_position_after)
                    for token in tokenizer.run():
                        yield token
                # 2.7
                elif seq_type == RT.CONSTITUENT or seq_type == RT.ISOLATED_CONSTITUENT:
                    first_position = stream.absolute_position_of_unread_seq(
                        seq)
                    concatenation = seq + util.read_and_concatenate_constituent_sequences_ignore_isolation(
                        stream, readtable)
                    yield Tokens.CONSTITUENT(concatenation, first_position,
                                             stream.copy_absolute_position())
                    if node_type is None:
                        seq = self.read_non_whitespace_seq(readtable, stream)
                        if seq == ',':
                            node_type = "seq"
                        else:
                            if seq is not None: stream.unread_seq(seq)
                            node_type = "form"
                    elif node_type is "seq":
                        seq = self.read_non_whitespace_seq(readtable, stream)

                        if seq == ',':
                            pass
                        elif seq is None:
                            yield Tokens.ERROR(
                                seq, stream.copy_absolute_position(),
                                stream.copy_absolute_position(),
                                "Expected ',' or ')' inside Lisp-mode seq.")
                        elif seq != self.closing_delimiter:
                            yield Tokens.ERROR(
                                seq,
                                stream.absolute_position_of_unread_seq(seq),
                                stream.copy_absolute_position(),
                                "Expected ',' or '%s' inside Lisp-mode seq, found '%s'."
                                % (self.closing_delimiter, seq))
                        else:
                            stream.unread_seq(seq)

                    elif node_type is "form":
                        pass
                    else:
                        raise NotImplementedError()

                # 2.8
                elif seq_type == RT.INVALID:
                    first_position = stream.absolute_position_of_unread_seq(
                        seq)
                    error_message = properties.error_message if 'error_message' in properties else "Invalid character found in stream."
                    yield Tokens.ERROR(seq, first_position,
                                       stream.copy_absolute_position(),
                                       error_message)
Exemple #4
0
    def run(self):
        stream = self.context.stream

        # If the macro tokenizer was called with an unkown opening delimiter sequence, mark it as an error and exit
        if self.opening_delimiter != self.__class__.OPENING_DELIMITER:
            yield Tokens.ERROR(
                self.opening_delimiter, self.opening_delimiter_position,
                self.opening_delimiter_position_after,
                "Comment tokenizer called with unknown opening sequence “%s”" %
                self.opening_delimiter)
            return

        stream.push()
        seen_escape = False

        opening_comment_token = Tokens.BEGIN_MACRO(
            self.__class__.OPENING_DELIMITER, self.opening_delimiter_position,
            self.opening_delimiter_position_after)
        yield opening_comment_token

        value = ""
        value_first_position = stream.copy_absolute_position()
        while True:
            if stream.next_is_EOF():
                yield Tokens.COMMENT(value, value_first_position,
                                     stream.copy_absolute_position())
                yield Tokens.END_MACRO(opening_comment_token, "",
                                       stream.copy_absolute_position(),
                                       stream.copy_absolute_position())
                stream.pop()
                return
            char = stream.read()
            if char == '\\':
                if seen_escape: value += '\\'
                else: seen_escape = True
            else:
                if seen_escape:
                    if char == self.__class__.CLOSING_DELIMITER:
                        value += self.__class__.CLOSING_DELIMITER
                    elif char == '$':
                        value += '$'
                    else:
                        yield Tokens.COMMENT(
                            value, value_first_position,
                            stream.absolute_position_of_unread())
                        value = ""
                        value_first_position = stream.copy_absolute_position()
                        yield Tokens.ERROR(
                            char, stream.absolute_position_of_unread(),
                            stream.copy_absolute_position(),
                            "Unknown escape code sequence “%s”." % char)
                    seen_escape = False
                else:
                    if char == self.__class__.CLOSING_DELIMITER:
                        yield Tokens.COMMENT(
                            value, value_first_position,
                            stream.absolute_position_of_unread())
                        yield Tokens.END_MACRO(
                            opening_comment_token,
                            self.__class__.CLOSING_DELIMITER,
                            stream.absolute_position_of_unread(),
                            stream.copy_absolute_position())
                        stream.pop()
                        return
                    elif char == '$':
                        yield Tokens.COMMENT(
                            value, value_first_position,
                            stream.absolute_position_of_unread())
                        value = ""
                        value_first_position = stream.copy_absolute_position()
                        for token in util.interpolation(self.context):
                            yield token

                    else:
                        value += char
Exemple #5
0
    def run(self):
        stream = self.context.stream
        readtable = self.context.readtable

        # If the macro tokenizer was called with an unkown opening delimiter sequence, mark it as an error and exit
        if self.opening_delimiter not in self.__class__.DELIMITER_PAIRS:
            yield Tokens.ERROR(
                self.opening_delimiter, self.opening_delimiter_position,
                self.opening_delimiter_position_after,
                "Unregistered delimiter pair, for opening sequence “%s”" %
                self.opening_delimiter)
            return

        opening_delimiter_token = Tokens.BEGIN_MACRO(
            self.opening_delimiter, self.opening_delimiter_position,
            self.opening_delimiter_position_after)

        skip_white_lines(stream, readtable)
        # If there are no tokens following the opening delimiter sequence
        if stream.next_is_EOF():
            yield Tokens.ERROR(
                self.opening_delimiter, self.opening_delimiter_position,
                self.opening_delimiter_position_after,
                "No characters found after opening delimiter '%s'." %
                self.opening_delimiter)
            return

        self.context.expected_closing_seqs += 1
        yield opening_delimiter_token
        stream.push()

        tokenizer = self.context.DefaultTokenizer(self.context)
        for token in tokenizer.run():
            yield token

        stream.pop()

        skip_white_lines(stream, readtable)

        if stream.next_is_EOF():
            yield Tokens.ERROR(
                self.opening_delimiter, stream.copy_absolute_position(),
                stream.copy_absolute_position(),
                "Expected closing delimiter «%s», matching opening delimiter «%s» at position %s."
                % (self.closing_delimiter, self.opening_delimiter,
                   self.opening_delimiter_position.nameless_str))
        else:
            seq, properties = readtable.probe(stream)
            if properties.type == RT.CLOSING:
                self.context.expected_closing_seqs -= 1
                if seq != self.closing_delimiter:
                    yield Tokens.ERROR(
                        seq, stream.absolute_position_of_unread_seq(seq),
                        stream.copy_absolute_position(),
                        "Found `%s`, but expected `%s`." %
                        (seq, self.closing_delimiter))
                    closing_delimiter_token = Tokens.END_MACRO(
                        opening_delimiter_token, "",
                        stream.copy_absolute_position(),
                        stream.copy_absolute_position())
                else:
                    closing_delimiter_token = Tokens.END_MACRO(
                        opening_delimiter_token, seq,
                        stream.absolute_position_of_unread_seq(seq),
                        stream.copy_absolute_position())
                self.on_close()
                yield closing_delimiter_token
    def run(self):
        readtable = self.context.readtable
        stream = self.context.stream
        """:type : IndentedCharacterStream"""
        expected_closing_seqs = self.context.expected_closing_seqs

        emmit_restart_tokens = self.context.emmit_restart_tokens
        restart_token_count = 0

        # Numbers #.#.# refer to the list in the blog post
        # https://bloff.github.io/lyc/lexing,/syntax/2015/08/30/lexer-2.html
        while True:
            # Stage 1 (Preparation)

            # Find first non-whitespace, non-newline sequence
            # make sure that it begins at the first column
            util.skip_white_lines(stream, readtable)

            # If we find an EOF, we're done tokenizing the stream
            if stream.next_is_EOF():
                # emmit_hanging_restart_tokens()
                for _ in range(restart_token_count):
                    yield Tokens.VALID_RESTART_TO(
                        stream.copy_absolute_position())
                return

            #                [  Handling closing sequences  ]
            #
            #  If we find an (expected) closing sequence, we're also done
            # if the sequence was not expected, an error token is emmited
            seq, properties = readtable.probe(stream)
            if properties.type == RT.CLOSING:
                if expected_closing_seqs > 0:
                    stream.unread_seq(seq)
                    # emmit_hanging_restart_tokens()
                    for _ in range(restart_token_count):
                        yield Tokens.VALID_RESTART_TO(
                            stream.copy_absolute_position())
                    return
                else:
                    yield Tokens.ERROR(
                        seq, stream.absolute_position_of_unread_seq(seq),
                        stream.copy_absolute_position(),
                        "Unexpected closing sequence `%s`." % seq)
            # Any other sequence is unread
            stream.unread_seq(seq)

            #                           [  Signaling restart positions  ]
            #
            # If we are asked to emmit restart-position tokens, which serve to pinpoint locations where the
            # default tokenizer can safely restart, then  we do so, and we keep track of how many such tokens
            # must be terminated
            if emmit_restart_tokens and stream.current_relative_position.column == 1 and expected_closing_seqs <= 0:
                restart_token_count += 1
                yield Tokens.VALID_RESTART_FROM(
                    stream.copy_absolute_position())

            #        [ The first BEGIN token ]
            # emit a BEGIN token, and remember it
            self.last_begin_token = Tokens.BEGIN(
                stream.copy_absolute_position())
            yield self.last_begin_token

            #         [  Stage 2 - Parsing of segment's first line  ]
            while True:
                # 2.1
                if stream.next_is_EOF():
                    yield Tokens.END(self.last_begin_token,
                                     stream.copy_absolute_position())
                    # emmit_hanging_restart_tokens()
                    for _ in range(restart_token_count):
                        yield Tokens.VALID_RESTART_TO(
                            stream.copy_absolute_position())
                    return
                else:
                    seq, properties = readtable.probe(stream)

                    assert 'type' in properties
                    seq_type = properties.type

                    # 2.1
                    if seq_type == RT.CLOSING:
                        if expected_closing_seqs <= 0:
                            yield Tokens.ERROR(
                                seq,
                                stream.absolute_position_of_unread_seq(seq),
                                stream.copy_absolute_position(),
                                "Unexpected closing sequence `%s`." % seq)
                        else:
                            stream.unread_seq(seq)
                            yield Tokens.END(self.last_begin_token,
                                             stream.copy_absolute_position())
                            # emmit_hanging_restart_tokens()
                            for _ in range(restart_token_count):
                                yield Tokens.VALID_RESTART_TO(
                                    stream.copy_absolute_position())
                            return
                    # 2.2
                    elif seq_type == RT.WHITESPACE:
                        pass
                    # 2.3
                    elif seq_type == RT.NEWLINE:
                        break  # goto Stage 2
                    # 2.4
                    elif seq_type == RT.ISOLATED_CONSTITUENT:
                        yield Tokens.CONSTITUENT(
                            seq, stream.absolute_position_of_unread_seq(seq),
                            stream.copy_absolute_position())
                    # 2.5
                    elif seq_type == RT.PUNCTUATION:
                        yield Tokens.PUNCTUATION(
                            self.last_begin_token, seq,
                            stream.absolute_position_of_unread_seq(seq),
                            stream.copy_absolute_position())
                    # 2.6
                    elif seq_type == RT.MACRO:
                        for token in util.tokenize_macro(
                                self.context, seq, properties):
                            yield token
                    # 2.7
                    elif seq_type == RT.CONSTITUENT:
                        first_position = stream.absolute_position_of_unread_seq(
                            seq)
                        concatenation = seq + util.read_and_concatenate_constituent_sequences(
                            stream, readtable)
                        yield Tokens.CONSTITUENT(
                            concatenation, first_position,
                            stream.copy_absolute_position())
                    # 2.8
                    elif seq_type == RT.INVALID:
                        first_position = stream.absolute_position_of_unread_seq(
                            seq)
                        error_message = properties.error_message if 'error_message' in properties else "Invalid character found in stream."
                        yield Tokens.ERROR(seq, first_position,
                                           stream.copy_absolute_position(),
                                           error_message)

            # Stage 3 (Parsing of sub-blocks)
            W = MAX_INT
            while True:
                util.skip_white_lines(stream, readtable)
                relative_column_number = stream.visual_column
                # 3.2
                if stream.next_is_EOF():
                    yield Tokens.END(self.last_begin_token,
                                     stream.copy_absolute_position())
                    # emmit_hanging_restart_tokens()
                    for _ in range(restart_token_count):
                        yield Tokens.VALID_RESTART_TO(
                            stream.copy_absolute_position())
                    return
                # 3.2.1
                if relative_column_number == 1:
                    yield Tokens.END(self.last_begin_token,
                                     stream.copy_absolute_position())
                    # DON'T # emmit_hanging_restart_tokens()
                    break  # goto Stage 1 again
                # 3.2.2
                elif relative_column_number > W:
                    seq, properties = readtable.probe(stream)

                    if properties.type == RT.CLOSING:
                        if expected_closing_seqs <= 0:
                            yield Tokens.ERROR(
                                seq,
                                stream.absolute_position_of_unread_seq(seq),
                                stream.copy_absolute_position(),
                                "Unexpected closing sequence `%s`." % seq)
                        else:
                            yield Tokens.END(
                                self.last_begin_token,
                                stream.absolute_position_of_unread_seq(seq))
                            # emmit_hanging_restart_tokens()
                            for _ in range(restart_token_count):
                                yield Tokens.VALID_RESTART_TO(
                                    stream.copy_absolute_position())
                            stream.unread_seq(seq)
                            return
                    else:
                        raise TokenizingError(
                            stream.absolute_position_of_unread_seq(seq),
                            "Unexpected indentation when parsing sub-blocks.")
                # 3.2.3
                elif relative_column_number < W:
                    yield Tokens.INDENT(self.last_begin_token,
                                        stream.copy_absolute_position())
                    W = relative_column_number
                # 3.2.4
                else:
                    # when relative_column_number == W, finish if the first non-whitespace character is a closing seq
                    seq, properties = readtable.probe(stream)

                    if properties.type == RT.CLOSING:
                        if expected_closing_seqs <= 0:
                            yield Tokens.ERROR(
                                seq,
                                stream.absolute_position_of_unread_seq(seq),
                                stream.copy_absolute_position(),
                                "Unexpected closing sequence `%s`." % seq)
                        else:
                            yield Tokens.END(
                                self.last_begin_token,
                                stream.absolute_position_of_unread_seq(seq))
                            # emmit_hanging_restart_tokens()
                            for _ in range(restart_token_count):
                                yield Tokens.VALID_RESTART_TO(
                                    stream.copy_absolute_position())
                            stream.unread_seq(seq)
                            return
                    else:
                        stream.unread_seq(seq)

                # 3.3
                self.context.stream.push()
                tokenizer = self.context.DefaultTokenizer(self.context)
                for token in tokenizer.run():
                    yield token
                self.context.stream.pop()
Exemple #7
0
    def run(self):
        stream = self.context.stream
        readtable = self.context.readtable

        if stream.next_is_EOF():
            yield Tokens.ERROR(
                self.__class__.MY_OPENING_DELIMITER,
                self.opening_delimiter_position,
                self.opening_delimiter_position_after,
                "No characters found after opening delimiter %s." %
                repr(self.__class__.MY_OPENING_DELIMITER))
            return

        stream.push()

        seen_escape = False

        opening_string_token = Tokens.BEGIN_MACRO(
            self.__class__.MY_OPENING_DELIMITER,
            self.opening_delimiter_position,
            self.opening_delimiter_position_after)
        yield opening_string_token

        value = ""
        value_first_position = stream.copy_absolute_position()
        while True:
            if stream.next_is_EOF():
                stream.pop()
                if self.__class__.ALLOW_RUNOFF_CLOSING_DELIMITER:
                    position_before_skipping_white_lines = stream.copy_absolute_position(
                    )
                    skip_white_lines(stream, readtable)
                    position_before_attempting_to_read_k_chars = stream.copy_absolute_position(
                    )
                    k_chars = stream.readn(
                        self.__class__.MY_CLOSING_DELIMITER_LENGTH)
                    if k_chars != self.__class__.MY_CLOSING_DELIMITER:
                        yield Tokens.ERROR(
                            k_chars,
                            position_before_attempting_to_read_k_chars,
                            stream.copy_absolute_position(),
                            "Expected closing string-delimiter «%s», matching opening delimiter «%s» at position %s.%s"
                            % (self.__class__.MY_CLOSING_DELIMITER,
                               self.__class__.MY_OPENING_DELIMITER,
                               self.opening_delimiter_position.nameless_str,
                               "" if k_chars is None else " Found " +
                               repr(k_chars)))
                        return
                    else:
                        value += '\n'
                        yield Tokens.STRING(
                            value, value_first_position,
                            position_before_skipping_white_lines)
                        yield Tokens.END_MACRO(
                            opening_string_token,
                            self.__class__.MY_CLOSING_DELIMITER,
                            stream.absolute_position_of_unread(),
                            stream.copy_absolute_position())
                        return
                else:
                    yield Tokens.ERROR(
                        "", stream.copy_absolute_position(),
                        stream.copy_absolute_position(),
                        "Expected closing string-delimiter «%s», matching opening delimiter «%s» at position %s."
                        % (self.__class__.MY_CLOSING_DELIMITER,
                           self.__class__.MY_OPENING_DELIMITER,
                           self.opening_delimiter_position.nameless_str))
                return

            char = stream.read()
            if char == '\\':
                if seen_escape:
                    value += '\\'
                    seen_escape = False
                else:
                    seen_escape = True
            else:
                if seen_escape:
                    if char in self.__class__.MY_ESCAPE_CHARS:
                        value += self.__class__.MY_ESCAPE_CHARS[char]
                    elif char == self.__class__.MY_INTERPOL_CHAR:
                        value += char
                    elif char == self.__class__.MY_CLOSING_DELIMITER:
                        value += char
                    else:
                        yield Tokens.STRING(
                            value, value_first_position,
                            stream.absolute_position_of_unread())
                        value = ""
                        value_first_position = stream.copy_absolute_position()
                        yield Tokens.ERROR(
                            char, stream.absolute_position_of_unread(),
                            stream.copy_absolute_position(),
                            "Unknown escape code sequence “%s”." % char)
                    seen_escape = False
                else:
                    if char == self.MY_INTERPOL_CHAR:
                        yield Tokens.STRING(
                            value, value_first_position,
                            stream.absolute_position_of_unread())
                        value = ""
                        value_first_position = stream.copy_absolute_position()
                        for token in util.interpolation(self.context):
                            yield token
                    else:
                        value += char
                        last_k_chars = value[-self.__class__.
                                             MY_CLOSING_DELIMITER_LENGTH:]
                        if last_k_chars == self.__class__.MY_CLOSING_DELIMITER:
                            value = value[:-self.__class__.
                                          MY_CLOSING_DELIMITER_LENGTH]
                            closing_delimiter_first_position = stream.absolute_position_of_unread(
                                self.__class__.MY_CLOSING_DELIMITER_LENGTH)
                            yield Tokens.STRING(
                                value, value_first_position,
                                closing_delimiter_first_position)
                            yield Tokens.END_MACRO(
                                opening_string_token,
                                self.__class__.MY_CLOSING_DELIMITER,
                                closing_delimiter_first_position,
                                stream.copy_absolute_position())
                            stream.pop()
                            return

        stream.pop()