Ejemplo n.º 1
0
 def setUp(self):
     self._engine = MagicMock()
     self._macros = MacroFactory(self._engine)
     self._symbols = SymbolTable.default()
     self._tokens = TokenFactory(self._symbols)
     self._factory = Factory(self._symbols)
     self._environment = Context(definitions=self._macros.all())
Ejemplo n.º 2
0
class TokenTests(TestCase):
    def setUp(self):
        self._tokens = TokenFactory(SymbolTable.default())
        self._token = self._tokens.character(Position(1, 1), "a")

    def test_equals_itself(self):
        self.assertEqual(self._token, self._token)

    def test_equals_a_similar_tokens(self):
        self.assertEqual(self._tokens.character(Position(1, 1), "a"),
                         self._token)

    def test_differs_from_a_different_character(self):
        self.assertNotEqual(self._tokens.character(Position(1, 1), "b"),
                            self._token)

    def test_differs_from_an_object_of_another_type(self):
        self.assertNotEqual("foo", self._token)

    def test_print_properly(self):
        self.assertEqual(
            Token.DISPLAY.format(text="a",
                                 category="character",
                                 location=Position(1, 1)), repr(self._token))
Ejemplo n.º 3
0
 def setUp(self):
     self._symbols = SymbolTable.default()
     self._tokens = TokenFactory(self._symbols)
     self._text = None
Ejemplo n.º 4
0
class LexerTests(TestCase):
    def setUp(self):
        self._symbols = SymbolTable.default()
        self._tokens = TokenFactory(self._symbols)
        self._text = None

    def _on_take(self, character):
        if character in self._symbols.NEW_LINE:
            self._position = self._position.new_line
        else:
            self._position = self._position.new_character

    def test_recognises_a_single_character(self):
        self._text = "b"
        self._verify_tokens(self._tokens.character(Position(1, 1), "b"))

    def test_recognises_a_word(self):
        self._text = "hello"
        self._verify_tokens(self._tokens.character(Position(1, 1), "h"),
                            self._tokens.character(Position(1, 2), "e"),
                            self._tokens.character(Position(1, 3), "l"),
                            self._tokens.character(Position(1, 4), "l"),
                            self._tokens.character(Position(1, 5), "o"))

    def test_recognises_a_single_command(self):
        self._text = r"\myMacro"
        self._verify_tokens(self._tokens.command(Position(1, 1), r"\myMacro"))

    def test_recognises_a_single_special_character_command(self):
        self._text = r"\%"
        self._verify_tokens(self._tokens.command(Position(1, 1), r"\%"))

    def test_recognises_sequences_of_single_character_command(self):
        self._text = r"\%\$\\"
        self._verify_tokens(self._tokens.command(Position(1, 1), r"\%"),
                            self._tokens.command(Position(1, 3), r"\$"),
                            self._tokens.command(Position(1, 5), r"\\"))

    def test_recognises_two_commands(self):
        self._text = r"\def\foo"
        self._verify_tokens(self._tokens.command(Position(1, 1), r"\def"),
                            self._tokens.command(Position(1, 5), r"\foo"))

    def test_recognises_two_commands_separated_by_white_spaces(self):
        self._text = "\\def  \t  \\foo"
        self._verify_tokens(self._tokens.command(Position(1, 1), r"\def"),
                            self._tokens.white_space(Position(1, 5), "  \t  "),
                            self._tokens.command(Position(1, 14), r"\foo"))

    def test_recognises_a_comment(self):
        self._text = "%This is a comment\n\\def\\foo"
        self._verify_tokens(
            self._tokens.comment(Position(1, 1), "%This is a comment"),
            self._tokens.new_line(Position(1, 1)),
            self._tokens.command(Position(2, 1), r"\def"),
            self._tokens.command(Position(2, 5), r"\foo"))

    def test_recognises_an_opening_group(self):
        self._text = "{"
        self._verify_tokens(self._tokens.begin_group(Position(1, 1)))

    def test_recognises_an_ending_group(self):
        self._text = "}"
        self._verify_tokens(self._tokens.end_group(Position(1, 1)))

    def test_recognises_an_parameter(self):
        self._text = "\def#1"
        self._verify_tokens(self._tokens.command(Position(1, 1), r"\def"),
                            self._tokens.parameter(Position(1, 5), "#1"))

    def test_recognises_a_complete_macro_definition(self):
        self._text = "\\def\\point#1#2{(#2,#1)}"
        self._verify_tokens(self._tokens.command(Position(1, 1), r"\def"),
                            self._tokens.command(Position(1, 5), r"\point"),
                            self._tokens.parameter(Position(1, 11), "#1"),
                            self._tokens.parameter(Position(1, 13), "#2"),
                            self._tokens.begin_group(Position(1, 15), "{"),
                            self._tokens.others(Position(1, 16), "("),
                            self._tokens.parameter(Position(1, 17), "#2"),
                            self._tokens.others(Position(1, 19), ","),
                            self._tokens.parameter(Position(1, 20), "#1"),
                            self._tokens.others(Position(1, 22), ")"),
                            self._tokens.end_group(Position(1, 23), "}"))

    def test_recognises_math_mode(self):
        self._text = "$"
        self._verify_tokens(self._tokens.math(Position(1, 1)))

    def test_recognises_superscript(self):
        self._text = "^"
        self._verify_tokens(self._tokens.superscript(Position(1, 1)))

    def test_recognises_subscript(self):
        self._text = "_"
        self._verify_tokens(self._tokens.subscript(Position(1, 1)))

    def test_recognises_non_breaking_space(self):
        self._text = "~"
        self._verify_tokens(self._tokens.non_breaking_space(Position(1, 1)))

    def _verify_tokens(self, *expected_tokens):
        self.assertListEqual(list(expected_tokens),
                             list(Lexer(self._symbols, Source(self._text))))
Ejemplo n.º 5
0
 def setUp(self):
     self._tokens = TokenFactory(SymbolTable.default())
     self._token = self._tokens.character(Position(1, 1), "a")
Ejemplo n.º 6
0
 def __init__(self, symbols, source):
     self._source = source
     self._symbols = symbols
     self._tokens = TokenFactory(self._symbols)
     self._reset()
Ejemplo n.º 7
0
class Lexer:
    """
    Scan a stream of character and yields a stream of token. The lexer shall define handler for each category of symbols.
    These handlers are automatically selected using reflection: each handler shall be named "_read_category".
    """

    def __init__(self, symbols, source):
        self._source = source
        self._symbols = symbols
        self._tokens = TokenFactory(self._symbols)
        self._reset()

    def _reset(self):
        self._position = Position(1, 0, self._source.name)
        self._input = Stream(iter(self._source.content), self._on_take)

    def _on_take(self, character):
        if character in self._symbols.NEW_LINE:
            self._position = self._position.next_line()
        else:
            self._position = self._position.next_character()

    @property
    def position(self):
        return self._position

    def _take(self):
        return self._input.take()

    @property
    def _next(self):
        return self._input.look_ahead()

    def __iter__(self):
        return self

    def __next__(self):
        if self._next is None:
            raise StopIteration()
        return self._one_token()

    def _one_token(self):
        handler = self._handler_for(self._symbols.category_of(self._next))
        return handler()

    def _handler_for(self, category):
        handler_name = "_read_" + category.name.lower()
        handler = getattr(self, handler_name)
        assert handler, "Lexer has no handler for '%s' symbols" % category.name
        return handler

    def _read_character(self):
        character = self._take()
        return self._tokens.character(self._position, character)

    def _read_control(self):
        marker = self._take()
        location = self._position
        assert marker in self._symbols.CONTROL
        if not self._next in self._symbols.CHARACTER:
            name = self._take()
        else:
            name = self._take_while(lambda c: c in self._symbols.CHARACTER)
        return self._tokens.command(location, marker + name)

    def _take_while(self, predicate):
        return "".join(self._input.take_while(predicate))

    def _read_comment(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.COMMENT
        text = self._take_while(lambda c: c not in self._symbols.NEW_LINE)
        return self._tokens.comment(location, marker + text)

    def _read_white_spaces(self):
        marker = self._input.take()
        location = self._position
        spaces = self._take_while(lambda c: c in self._symbols.WHITE_SPACES)
        return self._tokens.white_space(location, marker + spaces)

    def _read_new_line(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.NEW_LINE
        return self._tokens.new_line(location, marker)

    def _read_begin_group(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.BEGIN_GROUP
        return self._tokens.begin_group(location, marker)

    def _read_end_group(self):
        marker = self._take()
        location = self._position
        assert marker in self._symbols.END_GROUP
        return self._tokens.end_group(location, marker)

    def _read_parameter(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.PARAMETER
        text = marker + self._take_while(lambda c: c.isdigit())
        return self._tokens.parameter(location, text)

    def _read_math(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.MATH
        return self._tokens.math(location)

    def _read_superscript(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.SUPERSCRIPT
        return self._tokens.superscript(location, marker)

    def _read_subscript(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.SUBSCRIPT
        return self._tokens.subscript(location, marker)

    def _read_non_breaking_space(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.NON_BREAKING_SPACE
        return self._tokens.non_breaking_space(location, marker)

    def _read_others(self):
        marker = self._input.take()
        location = self._position
        #assert marker in self._symbols.OTHERS
        return self._tokens.others(location, marker)
Ejemplo n.º 8
0
 def setUp(self):
     self._symbols = SymbolTable.default()
     self._tokens = TokenFactory(self._symbols)
     self._text = None
Ejemplo n.º 9
0
class LexerTests(TestCase):

    def setUp(self):
        self._symbols = SymbolTable.default()
        self._tokens = TokenFactory(self._symbols)
        self._text = None

    def _on_take(self, character):
        if character in self._symbols.NEW_LINE:
            self._position = self._position.new_line
        else:
            self._position = self._position.new_character

    def test_recognises_a_single_character(self):
        self._text = "b"
        self._verify_tokens(self._tokens.character(Position(1, 1), "b"))

    def test_recognises_a_word(self):
        self._text = "hello"
        self._verify_tokens(self._tokens.character(Position(1, 1), "h"),
                            self._tokens.character(Position(1, 2), "e"),
                            self._tokens.character(Position(1, 3), "l"),
                            self._tokens.character(Position(1, 4), "l"),
                            self._tokens.character(Position(1, 5), "o"))

    def test_recognises_a_single_command(self):
        self._text = r"\myMacro"
        self._verify_tokens(self._tokens.command(Position(1, 1), r"\myMacro"))

    def test_recognises_a_single_special_character_command(self):
        self._text = r"\%"
        self._verify_tokens(self._tokens.command(Position(1, 1), r"\%"))

    def test_recognises_sequences_of_single_character_command(self):
        self._text = r"\%\$\\"
        self._verify_tokens(self._tokens.command(Position(1, 1), r"\%"),
                            self._tokens.command(Position(1, 3), r"\$"),
                            self._tokens.command(Position(1, 5), r"\\"))

    def test_recognises_two_commands(self):
        self._text = r"\def\foo"
        self._verify_tokens(self._tokens.command(Position(1, 1), r"\def"),
                            self._tokens.command(Position(1, 5), r"\foo"))

    def test_recognises_two_commands_separated_by_white_spaces(self):
        self._text = "\\def  \t  \\foo"
        self._verify_tokens(self._tokens.command(Position(1, 1), r"\def"),
                            self._tokens.white_space(Position(1, 5), "  \t  "),
                            self._tokens.command(Position(1, 14), r"\foo"))

    def test_recognises_a_comment(self):
        self._text = "%This is a comment\n\\def\\foo"
        self._verify_tokens(self._tokens.comment(Position(1, 1), "%This is a comment"),
                            self._tokens.new_line(Position(1, 1)),
                            self._tokens.command(Position(2, 1), r"\def"),
                            self._tokens.command(Position(2, 5), r"\foo"))

    def test_recognises_an_opening_group(self):
        self._text = "{"
        self._verify_tokens(self._tokens.begin_group(Position(1,1)))

    def test_recognises_an_ending_group(self):
        self._text = "}"
        self._verify_tokens(self._tokens.end_group(Position(1, 1)))

    def test_recognises_an_parameter(self):
        self._text = "\def#1"
        self._verify_tokens(self._tokens.command(Position(1, 1), r"\def"),
                            self._tokens.parameter(Position(1, 5), "#1"))

    def test_recognises_a_complete_macro_definition(self):
        self._text = "\\def\\point#1#2{(#2,#1)}"
        self._verify_tokens(self._tokens.command(Position(1, 1), r"\def"),
                            self._tokens.command(Position(1, 5), r"\point"),
                            self._tokens.parameter(Position(1, 11), "#1"),
                            self._tokens.parameter(Position(1, 13),"#2"),
                            self._tokens.begin_group(Position(1, 15), "{"),
                            self._tokens.others(Position(1, 16), "("),
                            self._tokens.parameter(Position(1, 17),"#2"),
                            self._tokens.others(Position(1, 19), ","),
                            self._tokens.parameter(Position(1, 20), "#1"),
                            self._tokens.others(Position(1, 22), ")"),
                            self._tokens.end_group(Position(1, 23), "}"))

    def test_recognises_math_mode(self):
        self._text = "$"
        self._verify_tokens(self._tokens.math(Position(1,1)))

    def test_recognises_superscript(self):
        self._text = "^"
        self._verify_tokens(self._tokens.superscript(Position(1,1)))

    def test_recognises_subscript(self):
        self._text = "_"
        self._verify_tokens(self._tokens.subscript(Position(1,1)))

    def test_recognises_non_breaking_space(self):
        self._text = "~"
        self._verify_tokens(self._tokens.non_breaking_space(Position(1,1)))

    def _verify_tokens(self, *expected_tokens):
        self.assertListEqual(list(expected_tokens), list(Lexer(self._symbols, Source(self._text))))
Ejemplo n.º 10
0
 def __init__(self, symbols, source):
     self._source = source
     self._symbols = symbols
     self._tokens = TokenFactory(self._symbols)
     self._reset()
Ejemplo n.º 11
0
class Lexer:
    """
    Scan a stream of character and yields a stream of token. The lexer shall define handler for each category of symbols.
    These handlers are automatically selected using reflection: each handler shall be named "_read_category".
    """
    def __init__(self, symbols, source):
        self._source = source
        self._symbols = symbols
        self._tokens = TokenFactory(self._symbols)
        self._reset()

    def _reset(self):
        self._position = Position(1, 0, self._source.name)
        self._input = Stream(iter(self._source.content), self._on_take)

    def _on_take(self, character):
        if character in self._symbols.NEW_LINE:
            self._position = self._position.next_line()
        else:
            self._position = self._position.next_character()

    @property
    def position(self):
        return self._position

    def _take(self):
        return self._input.take()

    @property
    def _next(self):
        return self._input.look_ahead()

    def __iter__(self):
        return self

    def __next__(self):
        if self._next is None:
            raise StopIteration()
        return self._one_token()

    def _one_token(self):
        handler = self._handler_for(self._symbols.category_of(self._next))
        return handler()

    def _handler_for(self, category):
        handler_name = "_read_" + category.name.lower()
        handler = getattr(self, handler_name)
        assert handler, "Lexer has no handler for '%s' symbols" % category.name
        return handler

    def _read_character(self):
        character = self._take()
        return self._tokens.character(self._position, character)

    def _read_control(self):
        marker = self._take()
        location = self._position
        assert marker in self._symbols.CONTROL
        if not self._next in self._symbols.CHARACTER:
            name = self._take()
        else:
            name = self._take_while(lambda c: c in self._symbols.CHARACTER)
        return self._tokens.command(location, marker + name)

    def _take_while(self, predicate):
        return "".join(self._input.take_while(predicate))

    def _read_comment(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.COMMENT
        text = self._take_while(lambda c: c not in self._symbols.NEW_LINE)
        return self._tokens.comment(location, marker + text)

    def _read_white_spaces(self):
        marker = self._input.take()
        location = self._position
        spaces = self._take_while(lambda c: c in self._symbols.WHITE_SPACES)
        return self._tokens.white_space(location, marker + spaces)

    def _read_new_line(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.NEW_LINE
        return self._tokens.new_line(location, marker)

    def _read_begin_group(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.BEGIN_GROUP
        return self._tokens.begin_group(location, marker)

    def _read_end_group(self):
        marker = self._take()
        location = self._position
        assert marker in self._symbols.END_GROUP
        return self._tokens.end_group(location, marker)

    def _read_parameter(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.PARAMETER
        text = marker + self._take_while(lambda c: c.isdigit())
        return self._tokens.parameter(location, text)

    def _read_math(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.MATH
        return self._tokens.math(location)

    def _read_superscript(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.SUPERSCRIPT
        return self._tokens.superscript(location, marker)

    def _read_subscript(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.SUBSCRIPT
        return self._tokens.subscript(location, marker)

    def _read_non_breaking_space(self):
        marker = self._input.take()
        location = self._position
        assert marker in self._symbols.NON_BREAKING_SPACE
        return self._tokens.non_breaking_space(location, marker)

    def _read_others(self):
        marker = self._input.take()
        location = self._position
        #assert marker in self._symbols.OTHERS
        return self._tokens.others(location, marker)