Beispiel #1
0
    def test_pre_conditions(self):
        try:
            _ = cut_by_characters(text="ABAB", seg_size=0, overlap=0,
                                  last_prop=1)
            raise AssertionError("Larger than zero error did not raise")
        except AssertionError as error:
            assert str(error) == SEG_NON_POSITIVE_MESSAGE

        try:
            _ = cut_by_characters(text="ABAB", seg_size=2, overlap=-1,
                                  last_prop=1)
            raise AssertionError("None negative error did not raise")
        except AssertionError as error:
            assert str(error) == NEG_OVERLAP_LAST_PROP_MESSAGE

        try:
            _ = cut_by_characters(text="ABAB", seg_size=2, overlap=0,
                                  last_prop=-1)
            raise AssertionError("None negative error did not raise")
        except AssertionError as error:
            assert str(error) == NEG_OVERLAP_LAST_PROP_MESSAGE

        try:
            _ = cut_by_characters(text="ABAB", seg_size=2, overlap=2,
                                  last_prop=1)
            raise AssertionError("Overlap size error did not raise")
        except AssertionError as error:
            assert str(error) == LARGER_SEG_SIZE_MESSAGE
Beispiel #2
0
 def test_empty_string(self):
     assert cut_by_characters(text="", seg_size=10, overlap=5,
                              last_prop=1) == [""]
     assert cut_by_characters(text=" ",
                              seg_size=100,
                              overlap=0,
                              last_prop=0.5) == [" "]
Beispiel #3
0
 def test_string_overlap(self):
     assert cut_by_characters(text="WORD", seg_size=2, overlap=0,
                              last_prop=1) == ["WO", "RD"]
     assert cut_by_characters(text="ABBA", seg_size=2, overlap=1,
                              last_prop=1) == ["AB", "BB", "BA"]
     assert cut_by_characters(text="ABCDE", seg_size=3, overlap=2,
                              last_prop=1) == ["ABC", "BCD", "CDE"]
     assert cut_by_characters(text="ABCDEF", seg_size=4, overlap=3,
                              last_prop=1) == ["ABCD", "BCDE", "CDEF"]
Beispiel #4
0
 def test_string_seg_size(self):
     assert cut_by_characters(text="ABABABAB", seg_size=10, overlap=0,
                              last_prop=1) == ["ABABABAB"]
     assert cut_by_characters(text="ABABABAB", seg_size=2, overlap=0,
                              last_prop=1) == ["AB", "AB", "AB", "AB"]
     assert cut_by_characters(text="ABABABAB", seg_size=3, overlap=0,
                              last_prop=1) == ["ABA", "BABAB"]
     assert cut_by_characters(text="A", seg_size=100, overlap=0,
                              last_prop=1) == ["A"]
     assert cut_by_characters(text="ABCD",
                              seg_size=1, overlap=0,
                              last_prop=1) == ["A", "B", "C", "D"]
Beispiel #5
0
 def test_string_all_funcs(self):
     assert cut_by_characters(text="ABABABABABA",
                              seg_size=4,
                              overlap=1,
                              last_prop=0.5) == [
                                  "ABAB", "BABA", "ABAB", "BA"
                              ]
Beispiel #6
0
 def test_string_overlap(self):
     assert cut_by_characters(text="WORD",
                              seg_size=2,
                              overlap=0,
                              last_prop=1) == ["WO", "RD"]
     assert cut_by_characters(text="ABBA",
                              seg_size=2,
                              overlap=1,
                              last_prop=1) == ["AB", "BB", "BA"]
     assert cut_by_characters(text="ABCDE",
                              seg_size=3,
                              overlap=2,
                              last_prop=1) == ["ABC", "BCD", "CDE"]
     assert cut_by_characters(text="ABCDEF",
                              seg_size=4,
                              overlap=3,
                              last_prop=1) == ["ABCD", "BCDE", "CDEF"]
Beispiel #7
0
 def test_string_last_prop(self):
     assert cut_by_characters(text="ABABABABABA",
                              seg_size=5,
                              overlap=0,
                              last_prop=0.2) == ["ABABA", "BABAB", "A"]
     assert cut_by_characters(text="ABABABABABA",
                              seg_size=5,
                              overlap=0,
                              last_prop=0.21) == ["ABABA", "BABABA"]
     assert cut_by_characters(text="ABABABABABA",
                              seg_size=5,
                              overlap=0,
                              last_prop=2) == ["ABABABABABA"]
     assert cut_by_characters(text="ABCDEFGHIJKL",
                              seg_size=3,
                              overlap=0,
                              last_prop=2) == ["ABC", "DEF", "GHIJKL"]
     assert cut_by_characters(text="ABCDEFGHIJKL",
                              seg_size=3,
                              overlap=0,
                              last_prop=5) == ["ABCDEFGHIJKL"]
     assert cut_by_characters(text="ABCDEFGHIJKL",
                              seg_size=3,
                              overlap=0,
                              last_prop=.2) == ["ABC", "DEF", "GHI", "JKL"]
     assert cut_by_characters(text="ABCDEFGHIJKL",
                              seg_size=3,
                              overlap=0,
                              last_prop=.5) == ["ABC", "DEF", "GHI", "JKL"]
Beispiel #8
0
 def test_string_seg_size(self):
     assert cut_by_characters(text="ABABABAB",
                              seg_size=10,
                              overlap=0,
                              last_prop=1) == ["ABABABAB"]
     assert cut_by_characters(text="ABABABAB",
                              seg_size=2,
                              overlap=0,
                              last_prop=1) == ["AB", "AB", "AB", "AB"]
     assert cut_by_characters(text="ABABABAB",
                              seg_size=3,
                              overlap=0,
                              last_prop=1) == ["ABA", "BABAB"]
     assert cut_by_characters(text="A",
                              seg_size=100,
                              overlap=0,
                              last_prop=1) == ["A"]
     assert cut_by_characters(text="ABCD",
                              seg_size=1,
                              overlap=0,
                              last_prop=1) == ["A", "B", "C", "D"]
Beispiel #9
0
    def test_pre_conditions(self):
        try:
            _ = cut_by_characters(text="ABAB",
                                  seg_size=0,
                                  overlap=0,
                                  last_prop=1)
            raise AssertionError("Larger than zero error did not raise")
        except AssertionError as error:
            assert str(error) == SEG_NON_POSITIVE_MESSAGE

        try:
            _ = cut_by_characters(text="ABAB",
                                  seg_size=2,
                                  overlap=-1,
                                  last_prop=1)
            raise AssertionError("None negative error did not raise")
        except AssertionError as error:
            assert str(error) == NEG_OVERLAP_LAST_PROP_MESSAGE

        try:
            _ = cut_by_characters(text="ABAB",
                                  seg_size=2,
                                  overlap=0,
                                  last_prop=-1)
            raise AssertionError("None negative error did not raise")
        except AssertionError as error:
            assert str(error) == NEG_OVERLAP_LAST_PROP_MESSAGE

        try:
            _ = cut_by_characters(text="ABAB",
                                  seg_size=2,
                                  overlap=2,
                                  last_prop=1)
            raise AssertionError("Overlap size error did not raise")
        except AssertionError as error:
            assert str(error) == LARGER_SEG_SIZE_MESSAGE
Beispiel #10
0
 def test_string_last_prop(self):
     assert cut_by_characters(text="ABABABABABA", seg_size=5, overlap=0,
                              last_prop=0.2) == ["ABABA", "BABAB", "A"]
     assert cut_by_characters(text="ABABABABABA", seg_size=5, overlap=0,
                              last_prop=0.21) == ["ABABA", "BABABA"]
     assert cut_by_characters(text="ABABABABABA", seg_size=5, overlap=0,
                              last_prop=2) == ["ABABABABABA"]
     assert cut_by_characters(text="ABCDEFGHIJKL", seg_size=3, overlap=0,
                              last_prop=2) == ["ABC", "DEF", "GHIJKL"]
     assert cut_by_characters(text="ABCDEFGHIJKL", seg_size=3, overlap=0,
                              last_prop=5) == ["ABCDEFGHIJKL"]
     assert cut_by_characters(text="ABCDEFGHIJKL", seg_size=3, overlap=0,
                              last_prop=.2) == ["ABC", "DEF", "GHI", "JKL"]
     assert cut_by_characters(text="ABCDEFGHIJKL", seg_size=3, overlap=0,
                              last_prop=.5) == ["ABC", "DEF", "GHI", "JKL"]
Beispiel #11
0
 def test_empty_string(self):
     assert cut_by_characters(text="", seg_size=10, overlap=5,
                              last_prop=1) == [""]
     assert cut_by_characters(text=" ", seg_size=100, overlap=0,
                              last_prop=0.5) == [""]
Beispiel #12
0
 def test_string_all_funcs(self):
     assert cut_by_characters(text="ABABABABABA", seg_size=4, overlap=1,
                              last_prop=0.5) == ["ABAB", "BABA", "ABAB",
                                                 "BA"]