Ejemplo n.º 1
0
 def test_recognises_a_word(self):
     self._text = "hello"
     self._verify_tokens(self._tokens.character(Position(1, 1), "h"),
                         self._tokens.character(Position(1, 2), "e"),
                         self._tokens.character(Position(1, 3), "l"),
                         self._tokens.character(Position(1, 4), "l"),
                         self._tokens.character(Position(1, 5), "o"))
Ejemplo n.º 2
0
 def test_recognises_a_comment(self):
     self._text = "%This is a comment\n\\def\\foo"
     self._verify_tokens(
         self._tokens.comment(Position(1, 1), "%This is a comment"),
         self._tokens.new_line(Position(1, 1)),
         self._tokens.command(Position(2, 1), r"\def"),
         self._tokens.command(Position(2, 5), r"\foo"))
Ejemplo n.º 3
0
 def test_recognises_an_opening_group(self):
     self._text = "{"
     self._verify_tokens(self._tokens.begin_group(Position(1, 1)))
Ejemplo n.º 4
0
 def test_recognises_two_commands_separated_by_white_spaces(self):
     self._text = "\\def  \t  \\foo"
     self._verify_tokens(self._tokens.command(Position(1, 1), r"\def"),
                         self._tokens.white_space(Position(1, 5), "  \t  "),
                         self._tokens.command(Position(1, 14), r"\foo"))
Ejemplo n.º 5
0
 def _reset(self):
     self._position = Position(1, 0, self._source.name)
     self._input = Stream(iter(self._source.content), self._on_take)
Ejemplo n.º 6
0
 def test_print_properly(self):
     self.assertEqual(
         Token.DISPLAY.format(text="a",
                              category="character",
                              location=Position(1, 1)), repr(self._token))
Ejemplo n.º 7
0
 def test_equals_a_similar_tokens(self):
     self.assertEqual(self._tokens.character(Position(1, 1), "a"),
                      self._token)
Ejemplo n.º 8
0
 def test_recognises_a_complete_macro_definition(self):
     self._text = "\\def\\point#1#2{(#2,#1)}"
     self._verify_tokens(self._tokens.command(Position(1, 1), r"\def"),
                         self._tokens.command(Position(1, 5), r"\point"),
                         self._tokens.parameter(Position(1, 11), "#1"),
                         self._tokens.parameter(Position(1, 13), "#2"),
                         self._tokens.begin_group(Position(1, 15), "{"),
                         self._tokens.others(Position(1, 16), "("),
                         self._tokens.parameter(Position(1, 17), "#2"),
                         self._tokens.others(Position(1, 19), ","),
                         self._tokens.parameter(Position(1, 20), "#1"),
                         self._tokens.others(Position(1, 22), ")"),
                         self._tokens.end_group(Position(1, 23), "}"))
Ejemplo n.º 9
0
 def test_recognises_a_single_character(self):
     self._text = "b"
     self._verify_tokens(self._tokens.character(Position(1, 1), "b"))
Ejemplo n.º 10
0
 def test_recognises_subscript(self):
     self._text = "_"
     self._verify_tokens(self._tokens.subscript(Position(1, 1)))
Ejemplo n.º 11
0
 def test_recognises_non_breaking_space(self):
     self._text = "~"
     self._verify_tokens(self._tokens.non_breaking_space(Position(1, 1)))
Ejemplo n.º 12
0
 def test_recognises_math_mode(self):
     self._text = "$"
     self._verify_tokens(self._tokens.math(Position(1, 1)))
Ejemplo n.º 13
0
 def _reset(self):
     self._position = Position(1, 0, self._source.name)
     self._input = Stream(iter(self._source.content), self._on_take)
Ejemplo n.º 14
0
class Lexer:
    """
    Scan a stream of character and yields a stream of token. The lexer shall define handler for each category of symbols.
    These handlers are automatically selected using reflection: each handler shall be named "_read_category".
    """

    def __init__(self, symbols, source):
        self._source = source
        self._symbols = symbols
        self._tokens = TokenFactory(self._symbols)
        self._reset()

    def _reset(self):
        self._position = Position(1, 0, self._source.name)
        self._input = Stream(iter(self._source.content), self._on_take)

    def _on_take(self, character):
        if character in self._symbols.NEW_LINE:
            self._position = self._position.next_line()
        else:
            self._position = self._position.next_character()

    @property
    def position(self):
        return self._position

    def _take(self):
        return self._input.take()

    @property
    def _next(self):
        return self._input.look_ahead()

    def __iter__(self):
        return self

    def __next__(self):
        if self._next is None:
            raise StopIteration()
        return self._one_token()

    def _one_token(self):
        handler = self._handler_for(self._symbols.category_of(self._next))
        return handler()

    def _handler_for(self, category):
        handler_name = "_read_" + category.name.lower()
        handler = getattr(self, handler_name)
        assert handler, "Lexer has no handler for '%s' symbols" % category.name
        return handler

    def _read_character(self):
        character = self._take()
        return self._tokens.character(self._position, character)

    def _read_control(self):
        marker = self._take()
        location = self._position
        assert marker in self._symbols.CONTROL
        if not self._next in self._symbols.CHARACTER:
            name = self._take()
        else:
            name = self._take_while(lambda c: c in self._symbols.CHARACTER)
        return self._tokens.command(location, marker + name)

    def _take_while(self, predicate):
        return "".join(self._input.take_while(predicate))

    def _read_comment(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.COMMENT
        text = self._take_while(lambda c: c not in self._symbols.NEW_LINE)
        return self._tokens.comment(location, marker + text)

    def _read_white_spaces(self):
        marker = self._input.take()
        location = self._position
        spaces = self._take_while(lambda c: c in self._symbols.WHITE_SPACES)
        return self._tokens.white_space(location, marker + spaces)

    def _read_new_line(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.NEW_LINE
        return self._tokens.new_line(location, marker)

    def _read_begin_group(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.BEGIN_GROUP
        return self._tokens.begin_group(location, marker)

    def _read_end_group(self):
        marker = self._take()
        location = self._position
        assert marker in self._symbols.END_GROUP
        return self._tokens.end_group(location, marker)

    def _read_parameter(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.PARAMETER
        text = marker + self._take_while(lambda c: c.isdigit())
        return self._tokens.parameter(location, text)

    def _read_math(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.MATH
        return self._tokens.math(location)

    def _read_superscript(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.SUPERSCRIPT
        return self._tokens.superscript(location, marker)

    def _read_subscript(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.SUBSCRIPT
        return self._tokens.subscript(location, marker)

    def _read_non_breaking_space(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.NON_BREAKING_SPACE
        return self._tokens.non_breaking_space(location, marker)

    def _read_others(self):
        marker = self._input.take()
        location = self._position
        #assert marker in self._symbols.OTHERS
        return self._tokens.others(location, marker)
Ejemplo n.º 15
0
 def test_recognises_an_ending_group(self):
     self._text = "}"
     self._verify_tokens(self._tokens.end_group(Position(1, 1)))
Ejemplo n.º 16
0
 def test_recognises_a_single_command(self):
     self._text = r"\myMacro"
     self._verify_tokens(self._tokens.command(Position(1, 1), r"\myMacro"))
Ejemplo n.º 17
0
 def test_recognises_an_parameter(self):
     self._text = "\def#1"
     self._verify_tokens(self._tokens.command(Position(1, 1), r"\def"),
                         self._tokens.parameter(Position(1, 5), "#1"))
Ejemplo n.º 18
0
 def test_recognises_a_single_special_character_command(self):
     self._text = r"\%"
     self._verify_tokens(self._tokens.command(Position(1, 1), r"\%"))
Ejemplo n.º 19
0
 def setUp(self):
     self._tokens = TokenFactory(SymbolTable.default())
     self._token = self._tokens.character(Position(1, 1), "a")
Ejemplo n.º 20
0
 def test_recognises_sequences_of_single_character_command(self):
     self._text = r"\%\$\\"
     self._verify_tokens(self._tokens.command(Position(1, 1), r"\%"),
                         self._tokens.command(Position(1, 3), r"\$"),
                         self._tokens.command(Position(1, 5), r"\\"))
Ejemplo n.º 21
0
 def test_differs_from_a_different_character(self):
     self.assertNotEqual(self._tokens.character(Position(1, 1), "b"),
                         self._token)
Ejemplo n.º 22
0
 def test_recognises_two_commands(self):
     self._text = r"\def\foo"
     self._verify_tokens(self._tokens.command(Position(1, 1), r"\def"),
                         self._tokens.command(Position(1, 5), r"\foo"))
Ejemplo n.º 23
0
 def _at(line, column):
     return Position(line, column)
Ejemplo n.º 24
0
class Lexer:
    """
    Scan a stream of character and yields a stream of token. The lexer shall define handler for each category of symbols.
    These handlers are automatically selected using reflection: each handler shall be named "_read_category".
    """
    def __init__(self, symbols, source):
        self._source = source
        self._symbols = symbols
        self._tokens = TokenFactory(self._symbols)
        self._reset()

    def _reset(self):
        self._position = Position(1, 0, self._source.name)
        self._input = Stream(iter(self._source.content), self._on_take)

    def _on_take(self, character):
        if character in self._symbols.NEW_LINE:
            self._position = self._position.next_line()
        else:
            self._position = self._position.next_character()

    @property
    def position(self):
        return self._position

    def _take(self):
        return self._input.take()

    @property
    def _next(self):
        return self._input.look_ahead()

    def __iter__(self):
        return self

    def __next__(self):
        if self._next is None:
            raise StopIteration()
        return self._one_token()

    def _one_token(self):
        handler = self._handler_for(self._symbols.category_of(self._next))
        return handler()

    def _handler_for(self, category):
        handler_name = "_read_" + category.name.lower()
        handler = getattr(self, handler_name)
        assert handler, "Lexer has no handler for '%s' symbols" % category.name
        return handler

    def _read_character(self):
        character = self._take()
        return self._tokens.character(self._position, character)

    def _read_control(self):
        marker = self._take()
        location = self._position
        assert marker in self._symbols.CONTROL
        if not self._next in self._symbols.CHARACTER:
            name = self._take()
        else:
            name = self._take_while(lambda c: c in self._symbols.CHARACTER)
        return self._tokens.command(location, marker + name)

    def _take_while(self, predicate):
        return "".join(self._input.take_while(predicate))

    def _read_comment(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.COMMENT
        text = self._take_while(lambda c: c not in self._symbols.NEW_LINE)
        return self._tokens.comment(location, marker + text)

    def _read_white_spaces(self):
        marker = self._input.take()
        location = self._position
        spaces = self._take_while(lambda c: c in self._symbols.WHITE_SPACES)
        return self._tokens.white_space(location, marker + spaces)

    def _read_new_line(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.NEW_LINE
        return self._tokens.new_line(location, marker)

    def _read_begin_group(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.BEGIN_GROUP
        return self._tokens.begin_group(location, marker)

    def _read_end_group(self):
        marker = self._take()
        location = self._position
        assert marker in self._symbols.END_GROUP
        return self._tokens.end_group(location, marker)

    def _read_parameter(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.PARAMETER
        text = marker + self._take_while(lambda c: c.isdigit())
        return self._tokens.parameter(location, text)

    def _read_math(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.MATH
        return self._tokens.math(location)

    def _read_superscript(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.SUPERSCRIPT
        return self._tokens.superscript(location, marker)

    def _read_subscript(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.SUBSCRIPT
        return self._tokens.subscript(location, marker)

    def _read_non_breaking_space(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.NON_BREAKING_SPACE
        return self._tokens.non_breaking_space(location, marker)

    def _read_others(self):
        marker = self._input.take()
        location = self._position
        #assert marker in self._symbols.OTHERS
        return self._tokens.others(location, marker)