def test_TryTokenize_Everything(self): tokens = mp._Tokenize('hello\nworld\n\n\\def{space}\n \n' + 'List\n*hello\n*world\n\n%code{C++}{x = x + 1}\n\n=section=hello') self.assertEqual([mp.Token('word', 'hello', mp.SourcePos(0, 0, 0, 5)), mp.Token('word', 'world', mp.SourcePos(1, 1, 6, 11)), mp.Token('paragraph-end', '\n', mp.SourcePos(2, 3, 12, 13)), mp.Token('slash', '\\', mp.SourcePos(3, 3, 13, 14)), mp.Token('word', 'def', mp.SourcePos(3, 3, 14, 17)), mp.Token('blob', 'space', mp.SourcePos(3, 3, 17, 24)), mp.Token('paragraph-end', '\n', mp.SourcePos(4, 5, 28, 29)), mp.Token('word', 'List', mp.SourcePos(5, 5, 29, 33)), mp.Token('list-marker', '*', mp.SourcePos(6, 6, 34, 35)), mp.Token('word', 'hello', mp.SourcePos(6, 6, 35, 40)), mp.Token('list-marker', '*', mp.SourcePos(7, 7, 41, 42)), mp.Token('word', 'world', mp.SourcePos(7, 7, 42, 47)), mp.Token('paragraph-end', '\n', mp.SourcePos(8, 9, 48, 49)), mp.Token('cell-marker', '%', mp.SourcePos(9, 9, 49, 50)), mp.Token('word', 'code', mp.SourcePos(9, 9, 50, 54)), mp.Token('blob', 'C++', mp.SourcePos(9, 9, 54, 59)), mp.Token('blob', 'x = x + 1', mp.SourcePos(9, 9, 59, 70)), mp.Token('paragraph-end', '\n', mp.SourcePos(10, 11, 71, 72)), mp.Token('section-marker', '=', mp.SourcePos(11, 11, 72, 73)), mp.Token('word', 'section', mp.SourcePos(11, 11, 73, 80)), mp.Token('section-marker', '=', mp.SourcePos(11, 11, 80, 81)), mp.Token('word', 'hello', mp.SourcePos(11, 11, 81, 86)), mp.Token('paragraph-end', '', mp.SourcePos(11, 11, 86, 86))], tokens)
def test_TryTokenize_ComplexWords(self): tokens = mp._Tokenize('hello world\nhow are \nyou?') self.assertEqual([mp.Token('word', 'hello', mp.SourcePos(0, 0, 0, 5)), mp.Token('word', 'world', mp.SourcePos(0, 0, 6, 11)), mp.Token('word', 'how', mp.SourcePos(1, 1, 12, 15)), mp.Token('word', 'are', mp.SourcePos(1, 1, 16, 19)), mp.Token('word', 'you?', mp.SourcePos(2, 2, 21, 25)), mp.Token('paragraph-end', '', mp.SourcePos(2, 2, 25, 25))], tokens)
def test_TryTokenize_ComplexWordsAndComplexParagraphEnd(self): tokens = mp._Tokenize('hello world\n \n \n\t \nhow are you?') self.assertEqual([mp.Token('word', 'hello', mp.SourcePos(0, 0, 0, 5)), mp.Token('word', 'world', mp.SourcePos(0, 0, 6, 11)), mp.Token('paragraph-end', '\n \n\t \n', mp.SourcePos(1, 4, 14, 22)), mp.Token('word', 'how', mp.SourcePos(4, 4, 22, 25)), mp.Token('word', 'are', mp.SourcePos(4, 4, 26, 29)), mp.Token('word', 'you?', mp.SourcePos(4, 4, 30, 34)), mp.Token('paragraph-end', '', mp.SourcePos(4, 4, 34, 34))], tokens)
def test_TryTokenize_ComplexWordsAndParagraphEnd(self): tokens = mp._Tokenize('hello world\n\nhow are you?') self.assertEqual([mp.Token('word', 'hello', mp.SourcePos(0, 0, 0, 5)), mp.Token('word', 'world', mp.SourcePos(0, 0, 6, 11)), mp.Token('paragraph-end', '\n', mp.SourcePos(1, 2, 12, 13)), mp.Token('word', 'how', mp.SourcePos(2, 2, 13, 16)), mp.Token('word', 'are', mp.SourcePos(2, 2, 17, 20)), mp.Token('word', 'you?', mp.SourcePos(2, 2, 21, 25)), mp.Token('paragraph-end', '', mp.SourcePos(2, 2, 25, 25))], tokens)
def test_TryTokenize_ComplexWordsAndBlob(self): tokens = mp._Tokenize('hello world\nhow are \nyou? {special sauce}\n{el\n\n {} \n}') self.assertEqual([mp.Token('word', 'hello', mp.SourcePos(0, 0, 0, 5)), mp.Token('word', 'world', mp.SourcePos(0, 0, 6, 11)), mp.Token('word', 'how', mp.SourcePos(1, 1, 12, 15)), mp.Token('word', 'are', mp.SourcePos(1, 1, 16, 19)), mp.Token('word', 'you?', mp.SourcePos(2, 2, 21, 25)), mp.Token('blob', 'special sauce', mp.SourcePos(2, 2, 26, 41)), mp.Token('blob', 'el\n\n {} \n', mp.SourcePos(3, 6, 42, 53)), mp.Token('paragraph-end', '', mp.SourcePos(6, 6, 53, 53))], tokens)
def test_TryTokenize_ComplexWordsAndSlash(self): tokens = mp._Tokenize('hello world\nhow \\are \nyou\\?') self.assertEqual([mp.Token('word', 'hello', mp.SourcePos(0, 0, 0, 5)), mp.Token('word', 'world', mp.SourcePos(0, 0, 6, 11)), mp.Token('word', 'how', mp.SourcePos(1, 1, 12, 15)), mp.Token('slash', '\\', mp.SourcePos(1, 1, 16, 17)), mp.Token('word', 'are', mp.SourcePos(1, 1, 17, 20)), mp.Token('word', 'you', mp.SourcePos(2, 2, 22, 25)), mp.Token('slash', '\\', mp.SourcePos(2, 2, 25, 26)), mp.Token('word', '?', mp.SourcePos(2, 2, 26, 27)), mp.Token('paragraph-end', '', mp.SourcePos(2, 2, 27, 27))], tokens)
def test_TryTokenize_ComplexWordsAndSectionMarkers(self): tokens = mp._Tokenize('hello world\nhow =are \nyou==?') self.assertEqual([mp.Token('word', 'hello', mp.SourcePos(0, 0, 0, 5)), mp.Token('word', 'world', mp.SourcePos(0, 0, 6, 11)), mp.Token('word', 'how', mp.SourcePos(1, 1, 12, 15)), mp.Token('section-marker', '=', mp.SourcePos(1, 1, 16, 17)), mp.Token('word', 'are', mp.SourcePos(1, 1, 17, 20)), mp.Token('word', 'you', mp.SourcePos(2, 2, 22, 25)), mp.Token('section-marker', '==', mp.SourcePos(2, 2, 25, 27)), mp.Token('word', '?', mp.SourcePos(2, 2, 27, 28)), mp.Token('paragraph-end', '', mp.SourcePos(2, 2, 28, 28))], tokens)
def test_TryTokenize_Words(self): tokens = mp._Tokenize('hello world') self.assertEqual([mp.Token('word', 'hello', mp.SourcePos(0, 0, 0, 5)), mp.Token('word', 'world', mp.SourcePos(0, 0, 6, 11)), mp.Token('paragraph-end', '', mp.SourcePos(0, 0, 11, 11))], tokens)