def test_has_cjk(self): result = Chunk('Hello').has_cjk() self.assertFalse(result, 'should be false when no CJK character is included.') result = Chunk(u'AとB').has_cjk() self.assertTrue(result, 'should be true when any CJK character is included.')
def test_wbr_serialize(self): chunks = ChunkList(Chunk(u'今日は'), Chunk(u'ご飯を'), Chunk(u'食べます。')) result = chunks.wbr_serialize() expected = ('<span style="word-break: keep-all;">' u'今日は<wbr></wbr>ご飯を<wbr></wbr>食べます。' '</span>') self.assertEqual(result, expected, 'Chunks should be separated by WBR tags.')
def test_swap(self): old_chunks = self.chunks[0:2] new_chunk = Chunk('ijk') self.chunks.swap(old_chunks, new_chunk) expected = ['ijk', 'fgh'] self.assertEqual(expected, [chunk.word for chunk in self.chunks], 'Old chunks should be replaced with the new chunk.')
def test_is_open_punct(self): puncts = [u'。', u'、', u'「', u'」', u'(', u')', u'[', u']', u'(', u')'] expected = [ False, False, True, False, True, False, True, False, True, False ] results = [Chunk(c).is_open_punct() for c in puncts] self.assertListEqual(expected, results, 'Open punctuation marks should be detected.')
def setUp(self): self.chunks = ChunkList(Chunk('ab', dependency=None), Chunk('cde', dependency=True), Chunk('fgh', dependency=False))
def test_span_serialize(self): chunks = ChunkList(Chunk('Hello'), Chunk.space(), Chunk(u'今天'), Chunk(u'天气'), Chunk(u'很好')) attributes = {'class': 'foo'} expected = ('<span>' 'Hello ' u'<span class="foo">今天</span>' u'<span class="foo">天气</span>' u'<span class="foo">很好</span>' '</span>') result = chunks.span_serialize(attributes, None) self.assertEqual(result, expected, 'The chunks should be compiled to a HTML code.') chunks = ChunkList(Chunk('Hey<'), Chunk('<script>alert(1)</script>'), Chunk('>guys')) attributes = {'class': 'foo'} expected = ('<span>' 'Hey<<script>alert(1)</script>>guys' '</span>') result = chunks.span_serialize(attributes, None) self.assertEqual(result, expected, 'HTML tags included in a chunk should be encoded.') chunks = ChunkList(Chunk(u'去年'), Chunk(u'インフルエンザに'), Chunk(u'かかった。')) attributes = {'class': 'foo'} expected = ('<span>' u'<span class="foo">去年</span>' u'インフルエンザに' u'<span class="foo">かかった。</span>' '</span>') result = chunks.span_serialize(attributes, 6) self.assertEqual( result, expected, 'Chunks that exceed the max length should not be enclosed by a span.' )
def test_insert_breaklines(self): chunks = ChunkList(Chunk(u'これが '), Chunk('Android')) chunks._insert_breaklines() self.assertEqual( [u'これが', '\n', 'Android'], [chunk.word for chunk in chunks], 'Trailing spaces in CJK chunk should be converted to breaklines.')