def test_word_segmentation_space_at_end(): s = [("a", 1, 1, 1, 1), ("b", 2, 1, 2, 1), (" ", 3, 1, 5, 1)] x = Segmentation.word(s, " ", include_spaces=True) e = [("ab", 1, 1, 2, 1), (" ", 3, 1, 5, 1)] assert x == e x = Segmentation.word(s, " ", include_spaces=False) e = [("ab", 1, 1, 2, 1)] assert x == e
def test_word_segmentation_space_at_beginning(): s = [(" ", 1, 1, 2, 1), ("b", 3, 1, 3, 1), ("c", 4, 1, 5, 1)] x = Segmentation.word(s, " ", include_spaces=True) e = [(" ", 1, 1, 2, 1), ("bc", 3, 1, 5, 1)] assert x == e x = Segmentation.word(s, " ", include_spaces=False) e = [("bc", 3, 1, 5, 1)] assert x == e
def test_word_segmentation_without_spaces(): s = [ ("a", 1, 1, 2, 10), ("b", 3, 1, 3, 10), (" ", 4, 1, 5, 10), ("c", 6, 1, 800, 10), ] x = Segmentation.word(s, " ", include_spaces=False) e = [("ab", 1, 1, 3, 10), ("c", 6, 1, 800, 10)] assert x == e
def test_char_segmentation_empty(): with pytest.raises(AssertionError): Segmentation.char([""], [], 1)
def test_word_segmentation_one_element(): x = Segmentation.word([("a", 1, 1, 10, 10)], " ", include_spaces=True) assert x == [("a", 1, 1, 10, 10)]
def test_word_segmentation_raises(s): with pytest.raises(AssertionError): Segmentation.word(s, " ")
def test_word_segmentation_empty(): x = Segmentation.word([], " ", include_spaces=True) assert x == []
def test_char_segmentation_scaling_error(): with pytest.raises(AssertionError): Segmentation.char(["a"], [0, 1, 100], 1, width=50)
def test_char_segmentation_scaling(): txt = ["a", "b", "c"] seg = [0, 3, 5, 7, 10] x = Segmentation.char(txt, seg, 1, width=100) e = [("a", 1, 1, 29, 1), ("b", 30, 1, 49, 1), ("c", 50, 1, 69, 1)] assert x == e
def test_char_segmentation(): txt = ["a", "b", "c"] seg = [0, 3, 5, 7, 10] x = Segmentation.char(txt, seg, 1) e = [("a", 1, 1, 2, 1), ("b", 3, 1, 4, 1), ("c", 5, 1, 6, 1)] assert x == e