Esempio n. 1
0
 def test_cut_by_words_overlap(self):
     assert cut_by_words(text="test test test", seg_size=2, overlap=1,
                         last_prop=.5) == ["test test ",
                                           "test test", "test"]
     assert cut_by_words(text="dog cat duck bird", seg_size=3, overlap=2,
                         last_prop=.5) == ["dog cat duck ", "cat duck bird",
                                           "duck bird"]
Esempio n. 2
0
 def test_cut_by_words_no_whitespace(self):
     assert cut_by_words(text="testtest",
                         seg_size=1,
                         overlap=0,
                         last_prop=1) == ["testtest"]
     assert cut_by_words(text="helloworld helloworld",
                         seg_size=1,
                         overlap=0,
                         last_prop=1) == ["helloworld ", "helloworld"]
Esempio n. 3
0
    def test_cut_by_words(self):
        assert cut_by_words(text=" ", seg_size=1, overlap=0,
                            last_prop=1) == [""]
        assert cut_by_words(text="test test", seg_size=1, overlap=0,
                            last_prop=1) == ["test ", "test"]

        assert cut_by_words(text="abc abc abc abc abc abc abc abc abc abc abc "
                                 "abc abc abc abc abc abc abc abc abc abc "
                                 "abc", seg_size=4, overlap=0, last_prop=.5)\
            == ["abc abc abc abc ", "abc abc abc abc ", "abc abc abc abc ",
                "abc abc abc abc ", "abc abc abc abc ", "abc abc"]
Esempio n. 4
0
 def test_cut_by_words_overlap(self):
     assert cut_by_words(text="test test test",
                         seg_size=2,
                         overlap=1,
                         last_prop=.5) == [
                             "test test ", "test test", "test"
                         ]
     assert cut_by_words(text="dog cat duck bird",
                         seg_size=3,
                         overlap=2,
                         last_prop=.5) == [
                             "dog cat duck ", "cat duck bird", "duck bird"
                         ]
Esempio n. 5
0
    def test_cut_by_words(self):
        assert cut_by_words(text=" ", seg_size=1, overlap=0,
                            last_prop=1) == [""]
        assert cut_by_words(text="test test",
                            seg_size=1,
                            overlap=0,
                            last_prop=1) == ["test ", "test"]

        assert cut_by_words(text="abc abc abc abc abc abc abc abc abc abc abc "
                                 "abc abc abc abc abc abc abc abc abc abc "
                                 "abc", seg_size=4, overlap=0, last_prop=.5)\
            == ["abc abc abc abc ", "abc abc abc abc ", "abc abc abc abc ",
                "abc abc abc abc ", "abc abc abc abc ", "abc abc"]
Esempio n. 6
0
    def test_cut_by_words_zero_chunks_precondition(self):
        try:
            _ = cut_by_words(text=" ", seg_size=0, overlap=0, last_prop=.5)
            raise AssertionError("zero_division error did not raise")
        except AssertionError as error:
            assert str(error) == SEG_NON_POSITIVE_MESSAGE

        try:
            _ = cut_by_words(text="test test", seg_size=0, overlap=0,
                             last_prop=.5)
            raise AssertionError("zero_division error did not raise")
        except AssertionError as error:
            assert str(error) == SEG_NON_POSITIVE_MESSAGE
Esempio n. 7
0
 def test_cut_by_words_proportion(self):
     assert cut_by_words(text="test test test",
                         seg_size=2,
                         overlap=0,
                         last_prop=1) == ["test test test"]
     assert cut_by_words(text="test test test",
                         seg_size=2,
                         overlap=0,
                         last_prop=.5) == ["test test ", "test"]
     assert cut_by_words(text="test test test",
                         seg_size=2,
                         overlap=0,
                         last_prop=1.5) == ["test test test"]
     assert cut_by_words(text="test test test",
                         seg_size=2,
                         overlap=0,
                         last_prop=2) == ["test test test"]
     assert cut_by_words(text="test test test test",
                         seg_size=2,
                         overlap=0,
                         last_prop=1) == ["test test ", "test test"]
     assert cut_by_words(
         text="test test test test test",
         seg_size=2,
         overlap=0,
         last_prop=.5) == ["test test ", "test test ", "test"]
     assert cut_by_words(text="test test test test test",
                         seg_size=2,
                         overlap=0,
                         last_prop=1) == ["test test ", "test test test"]
     assert cut_by_words(text="test test test test test",
                         seg_size=3,
                         overlap=0,
                         last_prop=1) == ["test test test test test"]
Esempio n. 8
0
    def test_seg_size_assertion_error(self):
        try:
            _ = cut_by_words(text="test test", seg_size=1, overlap=1,
                             last_prop=.5)
            raise AssertionError("did not throw error")
        except AssertionError as error:
            assert str(error) == LARGER_SEG_SIZE_MESSAGE

        try:
            _ = cut_by_words(text="test test test", seg_size=1, overlap=2,
                             last_prop=.5)
            raise AssertionError("did not throw error")
        except AssertionError as error:
            assert str(error) == LARGER_SEG_SIZE_MESSAGE
Esempio n. 9
0
    def test_cut_by_words_zero_chunks_precondition(self):
        try:
            _ = cut_by_words(text=" ", seg_size=0, overlap=0, last_prop=.5)
            raise AssertionError("zero_division error did not raise")
        except AssertionError as error:
            assert str(error) == SEG_NON_POSITIVE_MESSAGE

        try:
            _ = cut_by_words(text="test test",
                             seg_size=0,
                             overlap=0,
                             last_prop=.5)
            raise AssertionError("zero_division error did not raise")
        except AssertionError as error:
            assert str(error) == SEG_NON_POSITIVE_MESSAGE
Esempio n. 10
0
 def test_cut_by_words_neg_overlap_precondition(self):
     try:
         _ = cut_by_words(text="test", seg_size=1, overlap=-1,
                          last_prop=.5)
         raise AssertionError("did not throw error")
     except AssertionError as error:
         assert str(error) == NEG_OVERLAP_LAST_PROP_MESSAGE
Esempio n. 11
0
 def test_cut_by_words_neg_chunk_precondition(self):
     try:
         _ = cut_by_words(text="test", seg_size=-1, overlap=0,
                          last_prop=.5)
         raise AssertionError("did not throw error")
     except AssertionError as error:
         assert str(error) == SEG_NON_POSITIVE_MESSAGE
Esempio n. 12
0
    def test_seg_size_assertion_error(self):
        try:
            _ = cut_by_words(text="test test",
                             seg_size=1,
                             overlap=1,
                             last_prop=.5)
            raise AssertionError("did not throw error")
        except AssertionError as error:
            assert str(error) == LARGER_SEG_SIZE_MESSAGE

        try:
            _ = cut_by_words(text="test test test",
                             seg_size=1,
                             overlap=2,
                             last_prop=.5)
            raise AssertionError("did not throw error")
        except AssertionError as error:
            assert str(error) == LARGER_SEG_SIZE_MESSAGE
Esempio n. 13
0
 def test_cut_by_words_proportion(self):
     assert cut_by_words(text="test test test", seg_size=2, overlap=0,
                         last_prop=1) == ["test test test"]
     assert cut_by_words(text="test test test", seg_size=2, overlap=0,
                         last_prop=.5) == ["test test ", "test"]
     assert cut_by_words(text="test test test", seg_size=2, overlap=0,
                         last_prop=1.5) == ["test test test"]
     assert cut_by_words(text="test test test", seg_size=2, overlap=0,
                         last_prop=2) == ["test test test"]
     assert cut_by_words(text="test test test test", seg_size=2,
                         overlap=0, last_prop=1) == [
         "test test ", "test test"]
     assert cut_by_words(text="test test test test test", seg_size=2,
                         overlap=0, last_prop=.5) == [
         "test test ", "test test ", "test"]
     assert cut_by_words(text="test test test test test", seg_size=2,
                         overlap=0, last_prop=1) == [
         "test test ", "test test test"]
     assert cut_by_words(text="test test test test test", seg_size=3,
                         overlap=0, last_prop=1) == [
         "test test test test test"]
Esempio n. 14
0
 def test_cut_by_words_neg_overlap_precondition(self):
     try:
         _ = cut_by_words(text="test", seg_size=1, overlap=-1, last_prop=.5)
         raise AssertionError("did not throw error")
     except AssertionError as error:
         assert str(error) == NEG_OVERLAP_LAST_PROP_MESSAGE
Esempio n. 15
0
 def test_cut_by_words_neg_chunk_precondition(self):
     try:
         _ = cut_by_words(text="test", seg_size=-1, overlap=0, last_prop=.5)
         raise AssertionError("did not throw error")
     except AssertionError as error:
         assert str(error) == SEG_NON_POSITIVE_MESSAGE
Esempio n. 16
0
 def test_cut_by_words_no_whitespace(self):
     assert cut_by_words(text="testtest", seg_size=1, overlap=0,
                         last_prop=1) == ["testtest"]
     assert cut_by_words(text="helloworld helloworld", seg_size=1,
                         overlap=0, last_prop=1) == [
         "helloworld ", "helloworld"]