Ejemplos de skip_white_lines en Python, ejemplos de anoky.tokenization.tokenizers.util.skip_white_lines en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: string.py Proyecto: bloff/rmtc-parsing

    def run(self):
        stream = self.context.stream
        readtable = self.context.readtable
        skip_white_lines(stream, readtable)

        for token in super(BlockStringTokenizer, self).run():
            yield token

Ejemplo n.º 2

0

Mostrar archivo

    def read_non_whitespace_seq(self, readtable, stream):
        util.skip_white_lines(stream, readtable)

        if stream.next_is_EOF():
            return None
        else:
            seq, properties = readtable.probe(stream)
            return seq

Ejemplo n.º 3

0

Mostrar archivo

Archivo: string.py Proyecto: bloff/rmtc-parsing

    def run(self):
        stream = self.context.stream
        readtable = self.context.readtable
        skip_white_lines(stream, readtable)


        for token in super(BlockStringTokenizer, self).run():
            yield token

Ejemplo n.º 4

0

Mostrar archivo

Archivo: lispmode.py Proyecto: bloff/rmtc-parsing

    def read_non_whitespace_seq(self, readtable, stream):
        util.skip_white_lines(stream, readtable)

        if stream.next_is_EOF():
            return None
        else:
            seq, properties = readtable.probe(stream)
            return seq

Ejemplo n.º 5

0

Mostrar archivo

Archivo: delimiter.py Proyecto: bloff/rmtc-parsing

    def run(self):
        stream = self.context.stream
        readtable = self.context.readtable

        # If the macro tokenizer was called with an unkown opening delimiter sequence, mark it as an error and exit
        if self.opening_delimiter not in self.__class__.DELIMITER_PAIRS:
            yield Tokens.ERROR(self.opening_delimiter, self.opening_delimiter_position,
                               self.opening_delimiter_position_after,
                               "Unregistered delimiter pair, for opening sequence “%s”" % self.opening_delimiter)
            return

        opening_delimiter_token = Tokens.BEGIN_MACRO(self.opening_delimiter, self.opening_delimiter_position,
                                                     self.opening_delimiter_position_after)

        skip_white_lines(stream, readtable)
        # If there are no tokens following the opening delimiter sequence
        if stream.next_is_EOF():
            yield Tokens.ERROR(self.opening_delimiter, self.opening_delimiter_position, self.opening_delimiter_position_after,
                               "No characters found after opening delimiter '%s'." % self.opening_delimiter)
            return

        self.context.expected_closing_seqs += 1
        yield opening_delimiter_token
        stream.push()


        tokenizer = self.context.DefaultTokenizer(self.context)
        for token in tokenizer.run():
            yield token

        stream.pop()

        skip_white_lines(stream, readtable)

        if stream.next_is_EOF():
            yield Tokens.ERROR(self.opening_delimiter, stream.copy_absolute_position(), stream.copy_absolute_position(),
                "Expected closing delimiter «%s», matching opening delimiter «%s» at position %s." % (self.closing_delimiter, self.opening_delimiter, self.opening_delimiter_position.nameless_str))
        else:
            seq, properties = readtable.probe(stream)
            if properties.type == RT.CLOSING:
                self.context.expected_closing_seqs -= 1
                if seq != self.closing_delimiter:
                    yield Tokens.ERROR(seq, stream.absolute_position_of_unread_seq(seq),
                                       stream.copy_absolute_position(),
                                       "Found `%s`, but expected `%s`." % (seq, self.closing_delimiter))
                    closing_delimiter_token = Tokens.END_MACRO(opening_delimiter_token, "",
                                                               stream.copy_absolute_position(),
                                                               stream.copy_absolute_position())
                else:
                    closing_delimiter_token = Tokens.END_MACRO(opening_delimiter_token, seq, stream.absolute_position_of_unread_seq(seq),
                                                               stream.copy_absolute_position())
                self.on_close()
                yield closing_delimiter_token

Ejemplo n.º 6

0

Mostrar archivo

    def run(self):
        stream = self.context.stream
        readtable = self.context.readtable

        # If the macro tokenizer was called with an unkown opening delimiter sequence, mark it as an error and exit
        if self.opening_delimiter not in self.__class__.DELIMITER_PAIRS:
            yield Tokens.ERROR(
                self.opening_delimiter, self.opening_delimiter_position,
                self.opening_delimiter_position_after,
                "Unregistered delimiter pair, for opening sequence “%s”" %
                self.opening_delimiter)
            return

        opening_delimiter_token = Tokens.BEGIN_MACRO(
            self.opening_delimiter, self.opening_delimiter_position,
            self.opening_delimiter_position_after)

        skip_white_lines(stream, readtable)
        # If there are no tokens following the opening delimiter sequence
        if stream.next_is_EOF():
            yield Tokens.ERROR(
                self.opening_delimiter, self.opening_delimiter_position,
                self.opening_delimiter_position_after,
                "No characters found after opening delimiter '%s'." %
                self.opening_delimiter)
            return

        self.context.expected_closing_seqs += 1
        yield opening_delimiter_token
        stream.push()

        tokenizer = self.context.DefaultTokenizer(self.context)
        for token in tokenizer.run():
            yield token

        stream.pop()

        skip_white_lines(stream, readtable)

        if stream.next_is_EOF():
            yield Tokens.ERROR(
                self.opening_delimiter, stream.copy_absolute_position(),
                stream.copy_absolute_position(),
                "Expected closing delimiter «%s», matching opening delimiter «%s» at position %s."
                % (self.closing_delimiter, self.opening_delimiter,
                   self.opening_delimiter_position.nameless_str))
        else:
            seq, properties = readtable.probe(stream)
            if properties.type == RT.CLOSING:
                self.context.expected_closing_seqs -= 1
                if seq != self.closing_delimiter:
                    yield Tokens.ERROR(
                        seq, stream.absolute_position_of_unread_seq(seq),
                        stream.copy_absolute_position(),
                        "Found `%s`, but expected `%s`." %
                        (seq, self.closing_delimiter))
                    closing_delimiter_token = Tokens.END_MACRO(
                        opening_delimiter_token, "",
                        stream.copy_absolute_position(),
                        stream.copy_absolute_position())
                else:
                    closing_delimiter_token = Tokens.END_MACRO(
                        opening_delimiter_token, seq,
                        stream.absolute_position_of_unread_seq(seq),
                        stream.copy_absolute_position())
                self.on_close()
                yield closing_delimiter_token

Ejemplo n.º 7

0

Mostrar archivo

Archivo: string.py Proyecto: bloff/rmtc-parsing

    def run(self):
        stream = self.context.stream
        readtable = self.context.readtable

        if stream.next_is_EOF():
            yield Tokens.ERROR(self.__class__.MY_OPENING_DELIMITER, self.opening_delimiter_position,
                               self.opening_delimiter_position_after,
                               "No characters found after opening delimiter %s." % repr(self.__class__.MY_OPENING_DELIMITER))
            return

        stream.push()

        seen_escape = False

        opening_string_token = Tokens.BEGIN_MACRO(self.__class__.MY_OPENING_DELIMITER,
                                                 self.opening_delimiter_position,
                                                 self.opening_delimiter_position_after)
        yield opening_string_token

        value = ""
        value_first_position = stream.copy_absolute_position()
        while True:
            if stream.next_is_EOF():
                stream.pop()
                if self.__class__.ALLOW_RUNOFF_CLOSING_DELIMITER:
                    position_before_skipping_white_lines = stream.copy_absolute_position()
                    skip_white_lines(stream, readtable)
                    position_before_attempting_to_read_k_chars = stream.copy_absolute_position()
                    k_chars = stream.readn(self.__class__.MY_CLOSING_DELIMITER_LENGTH)
                    if k_chars != self.__class__.MY_CLOSING_DELIMITER:
                        yield Tokens.ERROR(k_chars, position_before_attempting_to_read_k_chars, stream.copy_absolute_position(),
                                          "Expected closing string-delimiter «%s», matching opening delimiter «%s» at position %s.%s" %
                                          (self.__class__.MY_CLOSING_DELIMITER,
                                           self.__class__.MY_OPENING_DELIMITER,
                                           self.opening_delimiter_position.nameless_str,
                                           "" if k_chars is None else " Found " + repr(k_chars)))
                        return
                    else:
                        value += '\n'
                        yield Tokens.STRING(value, value_first_position, position_before_skipping_white_lines)
                        yield Tokens.END_MACRO(opening_string_token, self.__class__.MY_CLOSING_DELIMITER, stream.absolute_position_of_unread(), stream.copy_absolute_position())
                        return
                else:
                    yield Tokens.ERROR("", stream.copy_absolute_position(), stream.copy_absolute_position(),
                                       "Expected closing string-delimiter «%s», matching opening delimiter «%s» at position %s." %
                                       (self.__class__.MY_CLOSING_DELIMITER,
                                        self.__class__.MY_OPENING_DELIMITER,
                                        self.opening_delimiter_position.nameless_str))
                return

            char = stream.read()
            if char == '\\':
                if seen_escape:
                    value += '\\'
                    seen_escape = False
                else: seen_escape = True
            else:
                if seen_escape:
                    if char in self.__class__.MY_ESCAPE_CHARS:
                        value += self.__class__.MY_ESCAPE_CHARS[char]
                    elif char == self.__class__.MY_INTERPOL_CHAR: value += char
                    elif char == self.__class__.MY_CLOSING_DELIMITER: value += char
                    else:
                        yield Tokens.STRING(value, value_first_position, stream.absolute_position_of_unread())
                        value = ""
                        value_first_position = stream.copy_absolute_position()
                        yield Tokens.ERROR(char, stream.absolute_position_of_unread(), stream.copy_absolute_position(), "Unknown escape code sequence “%s”." % char)
                    seen_escape = False
                else:
                    if char == self.MY_INTERPOL_CHAR:
                        yield Tokens.STRING(value, value_first_position, stream.absolute_position_of_unread())
                        value = ""
                        value_first_position = stream.copy_absolute_position()
                        for token in util.interpolation(self.context):
                            yield token
                    else:
                        value += char
                        last_k_chars = value[-self.__class__.MY_CLOSING_DELIMITER_LENGTH:]
                        if last_k_chars == self.__class__.MY_CLOSING_DELIMITER:
                            value = value[:-self.__class__.MY_CLOSING_DELIMITER_LENGTH]
                            closing_delimiter_first_position = stream.absolute_position_of_unread(self.__class__.MY_CLOSING_DELIMITER_LENGTH)
                            yield Tokens.STRING(value, value_first_position, closing_delimiter_first_position)
                            yield Tokens.END_MACRO(opening_string_token, self.__class__.MY_CLOSING_DELIMITER,
                                                   closing_delimiter_first_position, stream.copy_absolute_position())
                            stream.pop()
                            return

        stream.pop()

Ejemplo n.º 8

0

Mostrar archivo

Archivo: indentation_readtable.py Proyecto: bloff/rmtc-parsing

    def run(self):
        readtable = self.context.readtable
        stream = self.context.stream
        """:type : IndentedCharacterStream"""
        expected_closing_seqs = self.context.expected_closing_seqs

        emmit_restart_tokens = self.context.emmit_restart_tokens
        restart_token_count = 0

        # Numbers #.#.# refer to the list in the blog post
        # https://bloff.github.io/lyc/lexing,/syntax/2015/08/30/lexer-2.html
        while True:
            # Stage 1 (Preparation)

            # Find first non-whitespace, non-newline sequence
            # make sure that it begins at the first column
            util.skip_white_lines(stream, readtable)

            # If we find an EOF, we're done tokenizing the stream
            if stream.next_is_EOF():
                # emmit_hanging_restart_tokens()
                for _ in range(restart_token_count):
                    yield Tokens.VALID_RESTART_TO(stream.copy_absolute_position())
                return

            #                [  Handling closing sequences  ]
            #
            #  If we find an (expected) closing sequence, we're also done
            # if the sequence was not expected, an error token is emmited
            seq, properties = readtable.probe(stream)
            if properties.type == RT.CLOSING:
                if expected_closing_seqs > 0:
                    stream.unread_seq(seq)
                    # emmit_hanging_restart_tokens()
                    for _ in range(restart_token_count):
                        yield Tokens.VALID_RESTART_TO(stream.copy_absolute_position())
                    return
                else:
                    yield Tokens.ERROR(
                        seq,
                        stream.absolute_position_of_unread_seq(seq),
                        stream.copy_absolute_position(),
                        "Unexpected closing sequence `%s`." % seq,
                    )
            # Any other sequence is unread
            stream.unread_seq(seq)

            #                           [  Signaling restart positions  ]
            #
            # If we are asked to emmit restart-position tokens, which serve to pinpoint locations where the
            # default tokenizer can safely restart, then  we do so, and we keep track of how many such tokens
            # must be terminated
            if emmit_restart_tokens and stream.current_relative_position.column == 1 and expected_closing_seqs <= 0:
                restart_token_count += 1
                yield Tokens.VALID_RESTART_FROM(stream.copy_absolute_position())

            #        [ The first BEGIN token ]
            # emit a BEGIN token, and remember it
            self.last_begin_token = Tokens.BEGIN(stream.copy_absolute_position())
            yield self.last_begin_token

            #         [  Stage 2 - Parsing of segment's first line  ]
            while True:
                # 2.1
                if stream.next_is_EOF():
                    yield Tokens.END(self.last_begin_token, stream.copy_absolute_position())
                    # emmit_hanging_restart_tokens()
                    for _ in range(restart_token_count):
                        yield Tokens.VALID_RESTART_TO(stream.copy_absolute_position())
                    return
                else:
                    seq, properties = readtable.probe(stream)

                    assert "type" in properties
                    seq_type = properties.type

                    # 2.1
                    if seq_type == RT.CLOSING:
                        if expected_closing_seqs <= 0:
                            yield Tokens.ERROR(
                                seq,
                                stream.absolute_position_of_unread_seq(seq),
                                stream.copy_absolute_position(),
                                "Unexpected closing sequence `%s`." % seq,
                            )
                        else:
                            stream.unread_seq(seq)
                            yield Tokens.END(self.last_begin_token, stream.copy_absolute_position())
                            # emmit_hanging_restart_tokens()
                            for _ in range(restart_token_count):
                                yield Tokens.VALID_RESTART_TO(stream.copy_absolute_position())
                            return
                    # 2.2
                    elif seq_type == RT.WHITESPACE:
                        pass
                    # 2.3
                    elif seq_type == RT.NEWLINE:
                        break  # goto Stage 2
                    # 2.4
                    elif seq_type == RT.ISOLATED_CONSTITUENT:
                        yield Tokens.CONSTITUENT(
                            seq, stream.absolute_position_of_unread_seq(seq), stream.copy_absolute_position()
                        )
                    # 2.5
                    elif seq_type == RT.PUNCTUATION:
                        yield Tokens.PUNCTUATION(
                            self.last_begin_token,
                            seq,
                            stream.absolute_position_of_unread_seq(seq),
                            stream.copy_absolute_position(),
                        )
                    # 2.6
                    elif seq_type == RT.MACRO:
                        for token in util.tokenize_macro(self.context, seq, properties):
                            yield token
                    # 2.7
                    elif seq_type == RT.CONSTITUENT:
                        first_position = stream.absolute_position_of_unread_seq(seq)
                        concatenation = seq + util.read_and_concatenate_constituent_sequences(stream, readtable)
                        yield Tokens.CONSTITUENT(concatenation, first_position, stream.copy_absolute_position())
                    # 2.8
                    elif seq_type == RT.INVALID:
                        first_position = stream.absolute_position_of_unread_seq(seq)
                        error_message = (
                            properties.error_message
                            if "error_message" in properties
                            else "Invalid character found in stream."
                        )
                        yield Tokens.ERROR(seq, first_position, stream.copy_absolute_position(), error_message)

            # Stage 3 (Parsing of sub-blocks)
            W = MAX_INT
            while True:
                util.skip_white_lines(stream, readtable)
                relative_column_number = stream.visual_column
                # 3.2
                if stream.next_is_EOF():
                    yield Tokens.END(self.last_begin_token, stream.copy_absolute_position())
                    # emmit_hanging_restart_tokens()
                    for _ in range(restart_token_count):
                        yield Tokens.VALID_RESTART_TO(stream.copy_absolute_position())
                    return
                # 3.2.1
                if relative_column_number == 1:
                    yield Tokens.END(self.last_begin_token, stream.copy_absolute_position())
                    # DON'T # emmit_hanging_restart_tokens()
                    break  # goto Stage 1 again
                # 3.2.2
                elif relative_column_number > W:
                    seq, properties = readtable.probe(stream)

                    if properties.type == RT.CLOSING:
                        if expected_closing_seqs <= 0:
                            yield Tokens.ERROR(
                                seq,
                                stream.absolute_position_of_unread_seq(seq),
                                stream.copy_absolute_position(),
                                "Unexpected closing sequence `%s`." % seq,
                            )
                        else:
                            yield Tokens.END(self.last_begin_token, stream.absolute_position_of_unread_seq(seq))
                            # emmit_hanging_restart_tokens()
                            for _ in range(restart_token_count):
                                yield Tokens.VALID_RESTART_TO(stream.copy_absolute_position())
                            stream.unread_seq(seq)
                            return
                    else:
                        raise TokenizingError(
                            stream.absolute_position_of_unread_seq(seq),
                            "Unexpected indentation when parsing sub-blocks.",
                        )
                # 3.2.3
                elif relative_column_number < W:
                    yield Tokens.INDENT(self.last_begin_token, stream.copy_absolute_position())
                    W = relative_column_number
                # 3.2.4
                else:
                    # when relative_column_number == W, finish if the first non-whitespace character is a closing seq
                    seq, properties = readtable.probe(stream)

                    if properties.type == RT.CLOSING:
                        if expected_closing_seqs <= 0:
                            yield Tokens.ERROR(
                                seq,
                                stream.absolute_position_of_unread_seq(seq),
                                stream.copy_absolute_position(),
                                "Unexpected closing sequence `%s`." % seq,
                            )
                        else:
                            yield Tokens.END(self.last_begin_token, stream.absolute_position_of_unread_seq(seq))
                            # emmit_hanging_restart_tokens()
                            for _ in range(restart_token_count):
                                yield Tokens.VALID_RESTART_TO(stream.copy_absolute_position())
                            stream.unread_seq(seq)
                            return
                    else:
                        stream.unread_seq(seq)

                # 3.3
                self.context.stream.push()
                tokenizer = self.context.DefaultTokenizer(self.context)
                for token in tokenizer.run():
                    yield token
                self.context.stream.pop()

Ejemplo n.º 9

0

Mostrar archivo

Archivo: indentation_readtable.py Proyecto: bloff/rmtc-parsing

    def run(self):
        readtable = self.context.readtable
        stream = self.context.stream
        """:type : IndentedCharacterStream"""
        expected_closing_seqs = self.context.expected_closing_seqs

        emmit_restart_tokens = self.context.emmit_restart_tokens
        restart_token_count = 0

        # Numbers #.#.# refer to the list in the blog post
        # https://bloff.github.io/lyc/lexing,/syntax/2015/08/30/lexer-2.html
        while True:
            # Stage 1 (Preparation)

            # Find first non-whitespace, non-newline sequence
            # make sure that it begins at the first column
            util.skip_white_lines(stream, readtable)

            # If we find an EOF, we're done tokenizing the stream
            if stream.next_is_EOF():
                # emmit_hanging_restart_tokens()
                for _ in range(restart_token_count):
                    yield Tokens.VALID_RESTART_TO(
                        stream.copy_absolute_position())
                return

            #                [  Handling closing sequences  ]
            #
            #  If we find an (expected) closing sequence, we're also done
            # if the sequence was not expected, an error token is emmited
            seq, properties = readtable.probe(stream)
            if properties.type == RT.CLOSING:
                if expected_closing_seqs > 0:
                    stream.unread_seq(seq)
                    # emmit_hanging_restart_tokens()
                    for _ in range(restart_token_count):
                        yield Tokens.VALID_RESTART_TO(
                            stream.copy_absolute_position())
                    return
                else:
                    yield Tokens.ERROR(
                        seq, stream.absolute_position_of_unread_seq(seq),
                        stream.copy_absolute_position(),
                        "Unexpected closing sequence `%s`." % seq)
            # Any other sequence is unread
            stream.unread_seq(seq)

            #                           [  Signaling restart positions  ]
            #
            # If we are asked to emmit restart-position tokens, which serve to pinpoint locations where the
            # default tokenizer can safely restart, then  we do so, and we keep track of how many such tokens
            # must be terminated
            if emmit_restart_tokens and stream.current_relative_position.column == 1 and expected_closing_seqs <= 0:
                restart_token_count += 1
                yield Tokens.VALID_RESTART_FROM(
                    stream.copy_absolute_position())

            #        [ The first BEGIN token ]
            # emit a BEGIN token, and remember it
            self.last_begin_token = Tokens.BEGIN(
                stream.copy_absolute_position())
            yield self.last_begin_token

            #         [  Stage 2 - Parsing of segment's first line  ]
            while True:
                # 2.1
                if stream.next_is_EOF():
                    yield Tokens.END(self.last_begin_token,
                                     stream.copy_absolute_position())
                    # emmit_hanging_restart_tokens()
                    for _ in range(restart_token_count):
                        yield Tokens.VALID_RESTART_TO(
                            stream.copy_absolute_position())
                    return
                else:
                    seq, properties = readtable.probe(stream)

                    assert 'type' in properties
                    seq_type = properties.type

                    # 2.1
                    if seq_type == RT.CLOSING:
                        if expected_closing_seqs <= 0:
                            yield Tokens.ERROR(
                                seq,
                                stream.absolute_position_of_unread_seq(seq),
                                stream.copy_absolute_position(),
                                "Unexpected closing sequence `%s`." % seq)
                        else:
                            stream.unread_seq(seq)
                            yield Tokens.END(self.last_begin_token,
                                             stream.copy_absolute_position())
                            # emmit_hanging_restart_tokens()
                            for _ in range(restart_token_count):
                                yield Tokens.VALID_RESTART_TO(
                                    stream.copy_absolute_position())
                            return
                    # 2.2
                    elif seq_type == RT.WHITESPACE:
                        pass
                    # 2.3
                    elif seq_type == RT.NEWLINE:
                        break  # goto Stage 2
                    # 2.4
                    elif seq_type == RT.ISOLATED_CONSTITUENT:
                        yield Tokens.CONSTITUENT(
                            seq, stream.absolute_position_of_unread_seq(seq),
                            stream.copy_absolute_position())
                    # 2.5
                    elif seq_type == RT.PUNCTUATION:
                        yield Tokens.PUNCTUATION(
                            self.last_begin_token, seq,
                            stream.absolute_position_of_unread_seq(seq),
                            stream.copy_absolute_position())
                    # 2.6
                    elif seq_type == RT.MACRO:
                        for token in util.tokenize_macro(
                                self.context, seq, properties):
                            yield token
                    # 2.7
                    elif seq_type == RT.CONSTITUENT:
                        first_position = stream.absolute_position_of_unread_seq(
                            seq)
                        concatenation = seq + util.read_and_concatenate_constituent_sequences(
                            stream, readtable)
                        yield Tokens.CONSTITUENT(
                            concatenation, first_position,
                            stream.copy_absolute_position())
                    # 2.8
                    elif seq_type == RT.INVALID:
                        first_position = stream.absolute_position_of_unread_seq(
                            seq)
                        error_message = properties.error_message if 'error_message' in properties else "Invalid character found in stream."
                        yield Tokens.ERROR(seq, first_position,
                                           stream.copy_absolute_position(),
                                           error_message)

            # Stage 3 (Parsing of sub-blocks)
            W = MAX_INT
            while True:
                util.skip_white_lines(stream, readtable)
                relative_column_number = stream.visual_column
                # 3.2
                if stream.next_is_EOF():
                    yield Tokens.END(self.last_begin_token,
                                     stream.copy_absolute_position())
                    # emmit_hanging_restart_tokens()
                    for _ in range(restart_token_count):
                        yield Tokens.VALID_RESTART_TO(
                            stream.copy_absolute_position())
                    return
                # 3.2.1
                if relative_column_number == 1:
                    yield Tokens.END(self.last_begin_token,
                                     stream.copy_absolute_position())
                    # DON'T # emmit_hanging_restart_tokens()
                    break  # goto Stage 1 again
                # 3.2.2
                elif relative_column_number > W:
                    seq, properties = readtable.probe(stream)

                    if properties.type == RT.CLOSING:
                        if expected_closing_seqs <= 0:
                            yield Tokens.ERROR(
                                seq,
                                stream.absolute_position_of_unread_seq(seq),
                                stream.copy_absolute_position(),
                                "Unexpected closing sequence `%s`." % seq)
                        else:
                            yield Tokens.END(
                                self.last_begin_token,
                                stream.absolute_position_of_unread_seq(seq))
                            # emmit_hanging_restart_tokens()
                            for _ in range(restart_token_count):
                                yield Tokens.VALID_RESTART_TO(
                                    stream.copy_absolute_position())
                            stream.unread_seq(seq)
                            return
                    else:
                        raise TokenizingError(
                            stream.absolute_position_of_unread_seq(seq),
                            "Unexpected indentation when parsing sub-blocks.")
                # 3.2.3
                elif relative_column_number < W:
                    yield Tokens.INDENT(self.last_begin_token,
                                        stream.copy_absolute_position())
                    W = relative_column_number
                # 3.2.4
                else:
                    # when relative_column_number == W, finish if the first non-whitespace character is a closing seq
                    seq, properties = readtable.probe(stream)

                    if properties.type == RT.CLOSING:
                        if expected_closing_seqs <= 0:
                            yield Tokens.ERROR(
                                seq,
                                stream.absolute_position_of_unread_seq(seq),
                                stream.copy_absolute_position(),
                                "Unexpected closing sequence `%s`." % seq)
                        else:
                            yield Tokens.END(
                                self.last_begin_token,
                                stream.absolute_position_of_unread_seq(seq))
                            # emmit_hanging_restart_tokens()
                            for _ in range(restart_token_count):
                                yield Tokens.VALID_RESTART_TO(
                                    stream.copy_absolute_position())
                            stream.unread_seq(seq)
                            return
                    else:
                        stream.unread_seq(seq)

                # 3.3
                self.context.stream.push()
                tokenizer = self.context.DefaultTokenizer(self.context)
                for token in tokenizer.run():
                    yield token
                self.context.stream.pop()

Ejemplo n.º 10

0

Mostrar archivo

Archivo: string.py Proyecto: bloff/rmtc-parsing

    def run(self):
        stream = self.context.stream
        readtable = self.context.readtable

        if stream.next_is_EOF():
            yield Tokens.ERROR(
                self.__class__.MY_OPENING_DELIMITER,
                self.opening_delimiter_position,
                self.opening_delimiter_position_after,
                "No characters found after opening delimiter %s." %
                repr(self.__class__.MY_OPENING_DELIMITER))
            return

        stream.push()

        seen_escape = False

        opening_string_token = Tokens.BEGIN_MACRO(
            self.__class__.MY_OPENING_DELIMITER,
            self.opening_delimiter_position,
            self.opening_delimiter_position_after)
        yield opening_string_token

        value = ""
        value_first_position = stream.copy_absolute_position()
        while True:
            if stream.next_is_EOF():
                stream.pop()
                if self.__class__.ALLOW_RUNOFF_CLOSING_DELIMITER:
                    position_before_skipping_white_lines = stream.copy_absolute_position(
                    )
                    skip_white_lines(stream, readtable)
                    position_before_attempting_to_read_k_chars = stream.copy_absolute_position(
                    )
                    k_chars = stream.readn(
                        self.__class__.MY_CLOSING_DELIMITER_LENGTH)
                    if k_chars != self.__class__.MY_CLOSING_DELIMITER:
                        yield Tokens.ERROR(
                            k_chars,
                            position_before_attempting_to_read_k_chars,
                            stream.copy_absolute_position(),
                            "Expected closing string-delimiter «%s», matching opening delimiter «%s» at position %s.%s"
                            % (self.__class__.MY_CLOSING_DELIMITER,
                               self.__class__.MY_OPENING_DELIMITER,
                               self.opening_delimiter_position.nameless_str,
                               "" if k_chars is None else " Found " +
                               repr(k_chars)))
                        return
                    else:
                        value += '\n'
                        yield Tokens.STRING(
                            value, value_first_position,
                            position_before_skipping_white_lines)
                        yield Tokens.END_MACRO(
                            opening_string_token,
                            self.__class__.MY_CLOSING_DELIMITER,
                            stream.absolute_position_of_unread(),
                            stream.copy_absolute_position())
                        return
                else:
                    yield Tokens.ERROR(
                        "", stream.copy_absolute_position(),
                        stream.copy_absolute_position(),
                        "Expected closing string-delimiter «%s», matching opening delimiter «%s» at position %s."
                        % (self.__class__.MY_CLOSING_DELIMITER,
                           self.__class__.MY_OPENING_DELIMITER,
                           self.opening_delimiter_position.nameless_str))
                return

            char = stream.read()
            if char == '\\':
                if seen_escape:
                    value += '\\'
                    seen_escape = False
                else:
                    seen_escape = True
            else:
                if seen_escape:
                    if char in self.__class__.MY_ESCAPE_CHARS:
                        value += self.__class__.MY_ESCAPE_CHARS[char]
                    elif char == self.__class__.MY_INTERPOL_CHAR:
                        value += char
                    elif char == self.__class__.MY_CLOSING_DELIMITER:
                        value += char
                    else:
                        yield Tokens.STRING(
                            value, value_first_position,
                            stream.absolute_position_of_unread())
                        value = ""
                        value_first_position = stream.copy_absolute_position()
                        yield Tokens.ERROR(
                            char, stream.absolute_position_of_unread(),
                            stream.copy_absolute_position(),
                            "Unknown escape code sequence “%s”." % char)
                    seen_escape = False
                else:
                    if char == self.MY_INTERPOL_CHAR:
                        yield Tokens.STRING(
                            value, value_first_position,
                            stream.absolute_position_of_unread())
                        value = ""
                        value_first_position = stream.copy_absolute_position()
                        for token in util.interpolation(self.context):
                            yield token
                    else:
                        value += char
                        last_k_chars = value[-self.__class__.
                                             MY_CLOSING_DELIMITER_LENGTH:]
                        if last_k_chars == self.__class__.MY_CLOSING_DELIMITER:
                            value = value[:-self.__class__.
                                          MY_CLOSING_DELIMITER_LENGTH]
                            closing_delimiter_first_position = stream.absolute_position_of_unread(
                                self.__class__.MY_CLOSING_DELIMITER_LENGTH)
                            yield Tokens.STRING(
                                value, value_first_position,
                                closing_delimiter_first_position)
                            yield Tokens.END_MACRO(
                                opening_string_token,
                                self.__class__.MY_CLOSING_DELIMITER,
                                closing_delimiter_first_position,
                                stream.copy_absolute_position())
                            stream.pop()
                            return

        stream.pop()