Ejemplo n.º 1
0
 def test_wbr_serialize(self):
     chunks = ChunkList(Chunk(u'今日は'), Chunk(u'ご飯を'), Chunk(u'食べます。'))
     result = chunks.wbr_serialize()
     expected = ('<span style="word-break: keep-all;">'
                 u'今日は<wbr></wbr>ご飯を<wbr></wbr>食べます。'
                 '</span>')
     self.assertEqual(result, expected,
                      'Chunks should be separated by WBR tags.')
Ejemplo n.º 2
0
    def test_span_serialize(self):
        chunks = ChunkList(Chunk('Hello'), Chunk.space(), Chunk(u'今天'),
                           Chunk(u'天气'), Chunk(u'很好'))
        attributes = {'class': 'foo'}
        expected = ('<span>'
                    'Hello '
                    u'<span class="foo">今天</span>'
                    u'<span class="foo">天气</span>'
                    u'<span class="foo">很好</span>'
                    '</span>')
        result = chunks.span_serialize(attributes, None)
        self.assertEqual(result, expected,
                         'The chunks should be compiled to a HTML code.')

        chunks = ChunkList(Chunk('Hey<'), Chunk('<script>alert(1)</script>'),
                           Chunk('>guys'))
        attributes = {'class': 'foo'}
        expected = ('<span>'
                    'Hey&lt;&lt;script&gt;alert(1)&lt;/script&gt;&gt;guys'
                    '</span>')
        result = chunks.span_serialize(attributes, None)
        self.assertEqual(result, expected,
                         'HTML tags included in a chunk should be encoded.')

        chunks = ChunkList(Chunk(u'去年'), Chunk(u'インフルエンザに'), Chunk(u'かかった。'))
        attributes = {'class': 'foo'}
        expected = ('<span>'
                    u'<span class="foo">去年</span>'
                    u'インフルエンザに'
                    u'<span class="foo">かかった。</span>'
                    '</span>')
        result = chunks.span_serialize(attributes, 6)
        self.assertEqual(
            result, expected,
            'Chunks that exceed the max length should not be enclosed by a span.'
        )
Ejemplo n.º 3
0
 def setUp(self):
     self.chunks = ChunkList(Chunk('ab', dependency=None),
                             Chunk('cde', dependency=True),
                             Chunk('fgh', dependency=False))
Ejemplo n.º 4
0
class TestChunkList(unittest.TestCase):
    def setUp(self):
        self.chunks = ChunkList(Chunk('ab', dependency=None),
                                Chunk('cde', dependency=True),
                                Chunk('fgh', dependency=False))

    def test_get_overlaps(self):
        # chunks: ab cde fgh
        # range : __ _*_ ___
        chunks = self.chunks.get_overlaps(3, 1)
        expected = ['cde']
        self.assertEqual(expected, [chunk.word for chunk in chunks])

        # chunks: ab cde fgh
        # range : __ **_ ___
        chunks = self.chunks.get_overlaps(2, 2)
        expected = ['cde']
        self.assertEqual(expected, [chunk.word for chunk in chunks])

        # chunks: ab cde fgh
        # range : _* **_ ___
        chunks = self.chunks.get_overlaps(1, 3)
        expected = ['ab', 'cde']
        self.assertEqual(expected, [chunk.word for chunk in chunks])

        # chunks: ab cde fgh
        # range : _* *** ___
        chunks = self.chunks.get_overlaps(1, 4)
        expected = ['ab', 'cde']
        self.assertEqual(expected, [chunk.word for chunk in chunks])

        # chunks: ab cde fgh
        # range : _* *** *__
        chunks = self.chunks.get_overlaps(1, 5)
        expected = ['ab', 'cde', 'fgh']
        self.assertEqual(expected, [chunk.word for chunk in chunks])

    def test_swap(self):
        old_chunks = self.chunks[0:2]
        new_chunk = Chunk('ijk')
        self.chunks.swap(old_chunks, new_chunk)
        expected = ['ijk', 'fgh']
        self.assertEqual(expected, [chunk.word for chunk in self.chunks],
                         'Old chunks should be replaced with the new chunk.')

    def test_concatenate_inner(self):
        self.chunks._concatenate_inner(True)
        self.assertEqual(['ab', 'cdefgh'], [
            chunk.word for chunk in self.chunks
        ], 'Chunks should be concatenated if they depends on the following word.'
                         )
        self.assertEqual(
            [None, False], [chunk.dependency for chunk in self.chunks],
            'Dependency should persist even if it\'s concatenated by others.')

        self.chunks._concatenate_inner(False)
        self.assertEqual(['abcdefgh'], [
            chunk.word for chunk in self.chunks
        ], 'Chunks should be concatenated if they depends on the previous word.'
                         )

    def test_insert_breaklines(self):
        chunks = ChunkList(Chunk(u'これが '), Chunk('Android'))
        chunks._insert_breaklines()
        self.assertEqual(
            [u'これが', '\n', 'Android'], [chunk.word for chunk in chunks],
            'Trailing spaces in CJK chunk should be converted to breaklines.')

    def test_span_serialize(self):
        chunks = ChunkList(Chunk('Hello'), Chunk.space(), Chunk(u'今天'),
                           Chunk(u'天气'), Chunk(u'很好'))
        attributes = {'class': 'foo'}
        expected = ('<span>'
                    'Hello '
                    u'<span class="foo">今天</span>'
                    u'<span class="foo">天气</span>'
                    u'<span class="foo">很好</span>'
                    '</span>')
        result = chunks.span_serialize(attributes, None)
        self.assertEqual(result, expected,
                         'The chunks should be compiled to a HTML code.')

        chunks = ChunkList(Chunk('Hey<'), Chunk('<script>alert(1)</script>'),
                           Chunk('>guys'))
        attributes = {'class': 'foo'}
        expected = ('<span>'
                    'Hey&lt;&lt;script&gt;alert(1)&lt;/script&gt;&gt;guys'
                    '</span>')
        result = chunks.span_serialize(attributes, None)
        self.assertEqual(result, expected,
                         'HTML tags included in a chunk should be encoded.')

        chunks = ChunkList(Chunk(u'去年'), Chunk(u'インフルエンザに'), Chunk(u'かかった。'))
        attributes = {'class': 'foo'}
        expected = ('<span>'
                    u'<span class="foo">去年</span>'
                    u'インフルエンザに'
                    u'<span class="foo">かかった。</span>'
                    '</span>')
        result = chunks.span_serialize(attributes, 6)
        self.assertEqual(
            result, expected,
            'Chunks that exceed the max length should not be enclosed by a span.'
        )

    # TODO (tushuhei) Check if TypeError is raised when any instance but Chunk
    # is given to the list.

    def test_wbr_serialize(self):
        chunks = ChunkList(Chunk(u'今日は'), Chunk(u'ご飯を'), Chunk(u'食べます。'))
        result = chunks.wbr_serialize()
        expected = ('<span style="word-break: keep-all;">'
                    u'今日は<wbr></wbr>ご飯を<wbr></wbr>食べます。'
                    '</span>')
        self.assertEqual(result, expected,
                         'Chunks should be separated by WBR tags.')
Ejemplo n.º 5
0
 def test_insert_breaklines(self):
     chunks = ChunkList(Chunk(u'これが '), Chunk('Android'))
     chunks._insert_breaklines()
     self.assertEqual(
         [u'これが', '\n', 'Android'], [chunk.word for chunk in chunks],
         'Trailing spaces in CJK chunk should be converted to breaklines.')