Esempio n. 1
0
def test_word_segmentation_space_at_end():
    s = [("a", 1, 1, 1, 1), ("b", 2, 1, 2, 1), (" ", 3, 1, 5, 1)]
    x = Segmentation.word(s, " ", include_spaces=True)
    e = [("ab", 1, 1, 2, 1), (" ", 3, 1, 5, 1)]
    assert x == e
    x = Segmentation.word(s, " ", include_spaces=False)
    e = [("ab", 1, 1, 2, 1)]
    assert x == e
Esempio n. 2
0
def test_word_segmentation_space_at_beginning():
    s = [(" ", 1, 1, 2, 1), ("b", 3, 1, 3, 1), ("c", 4, 1, 5, 1)]
    x = Segmentation.word(s, " ", include_spaces=True)
    e = [(" ", 1, 1, 2, 1), ("bc", 3, 1, 5, 1)]
    assert x == e
    x = Segmentation.word(s, " ", include_spaces=False)
    e = [("bc", 3, 1, 5, 1)]
    assert x == e
Esempio n. 3
0
def test_word_segmentation_without_spaces():
    s = [
        ("a", 1, 1, 2, 10),
        ("b", 3, 1, 3, 10),
        (" ", 4, 1, 5, 10),
        ("c", 6, 1, 800, 10),
    ]
    x = Segmentation.word(s, " ", include_spaces=False)
    e = [("ab", 1, 1, 3, 10), ("c", 6, 1, 800, 10)]
    assert x == e
Esempio n. 4
0
def test_char_segmentation_empty():
    with pytest.raises(AssertionError):
        Segmentation.char([""], [], 1)
Esempio n. 5
0
def test_word_segmentation_one_element():
    x = Segmentation.word([("a", 1, 1, 10, 10)], " ", include_spaces=True)
    assert x == [("a", 1, 1, 10, 10)]
Esempio n. 6
0
def test_word_segmentation_raises(s):
    with pytest.raises(AssertionError):
        Segmentation.word(s, " ")
Esempio n. 7
0
def test_word_segmentation_empty():
    x = Segmentation.word([], " ", include_spaces=True)
    assert x == []
Esempio n. 8
0
def test_char_segmentation_scaling_error():
    with pytest.raises(AssertionError):
        Segmentation.char(["a"], [0, 1, 100], 1, width=50)
Esempio n. 9
0
def test_char_segmentation_scaling():
    txt = ["a", "b", "c"]
    seg = [0, 3, 5, 7, 10]
    x = Segmentation.char(txt, seg, 1, width=100)
    e = [("a", 1, 1, 29, 1), ("b", 30, 1, 49, 1), ("c", 50, 1, 69, 1)]
    assert x == e
Esempio n. 10
0
def test_char_segmentation():
    txt = ["a", "b", "c"]
    seg = [0, 3, 5, 7, 10]
    x = Segmentation.char(txt, seg, 1)
    e = [("a", 1, 1, 2, 1), ("b", 3, 1, 4, 1), ("c", 5, 1, 6, 1)]
    assert x == e