예제 #1
0
    def test_has_cjk(self):
        result = Chunk('Hello').has_cjk()
        self.assertFalse(result,
                         'should be false when no CJK character is included.')

        result = Chunk(u'AとB').has_cjk()
        self.assertTrue(result,
                        'should be true when any CJK character is included.')
예제 #2
0
 def test_wbr_serialize(self):
     chunks = ChunkList(Chunk(u'今日は'), Chunk(u'ご飯を'), Chunk(u'食べます。'))
     result = chunks.wbr_serialize()
     expected = ('<span style="word-break: keep-all;">'
                 u'今日は<wbr></wbr>ご飯を<wbr></wbr>食べます。'
                 '</span>')
     self.assertEqual(result, expected,
                      'Chunks should be separated by WBR tags.')
예제 #3
0
 def test_swap(self):
     old_chunks = self.chunks[0:2]
     new_chunk = Chunk('ijk')
     self.chunks.swap(old_chunks, new_chunk)
     expected = ['ijk', 'fgh']
     self.assertEqual(expected, [chunk.word for chunk in self.chunks],
                      'Old chunks should be replaced with the new chunk.')
예제 #4
0
 def test_is_open_punct(self):
     puncts = [u'。', u'、', u'「', u'」', u'(', u')', u'[', u']', u'(', u')']
     expected = [
         False, False, True, False, True, False, True, False, True, False
     ]
     results = [Chunk(c).is_open_punct() for c in puncts]
     self.assertListEqual(expected, results,
                          'Open punctuation marks should be detected.')
예제 #5
0
 def setUp(self):
     self.chunks = ChunkList(Chunk('ab', dependency=None),
                             Chunk('cde', dependency=True),
                             Chunk('fgh', dependency=False))
예제 #6
0
    def test_span_serialize(self):
        chunks = ChunkList(Chunk('Hello'), Chunk.space(), Chunk(u'今天'),
                           Chunk(u'天气'), Chunk(u'很好'))
        attributes = {'class': 'foo'}
        expected = ('<span>'
                    'Hello '
                    u'<span class="foo">今天</span>'
                    u'<span class="foo">天气</span>'
                    u'<span class="foo">很好</span>'
                    '</span>')
        result = chunks.span_serialize(attributes, None)
        self.assertEqual(result, expected,
                         'The chunks should be compiled to a HTML code.')

        chunks = ChunkList(Chunk('Hey<'), Chunk('<script>alert(1)</script>'),
                           Chunk('>guys'))
        attributes = {'class': 'foo'}
        expected = ('<span>'
                    'Hey&lt;&lt;script&gt;alert(1)&lt;/script&gt;&gt;guys'
                    '</span>')
        result = chunks.span_serialize(attributes, None)
        self.assertEqual(result, expected,
                         'HTML tags included in a chunk should be encoded.')

        chunks = ChunkList(Chunk(u'去年'), Chunk(u'インフルエンザに'), Chunk(u'かかった。'))
        attributes = {'class': 'foo'}
        expected = ('<span>'
                    u'<span class="foo">去年</span>'
                    u'インフルエンザに'
                    u'<span class="foo">かかった。</span>'
                    '</span>')
        result = chunks.span_serialize(attributes, 6)
        self.assertEqual(
            result, expected,
            'Chunks that exceed the max length should not be enclosed by a span.'
        )
예제 #7
0
 def test_insert_breaklines(self):
     chunks = ChunkList(Chunk(u'これが '), Chunk('Android'))
     chunks._insert_breaklines()
     self.assertEqual(
         [u'これが', '\n', 'Android'], [chunk.word for chunk in chunks],
         'Trailing spaces in CJK chunk should be converted to breaklines.')