Ejemplo n.º 1
0
 def test_cut_by_words_overlap(self):
     assert cut_by_words(text="test test test", seg_size=2, overlap=1,
                         last_prop=.5) == ["test test ",
                                           "test test", "test"]
     assert cut_by_words(text="dog cat duck bird", seg_size=3, overlap=2,
                         last_prop=.5) == ["dog cat duck ", "cat duck bird",
                                           "duck bird"]
Ejemplo n.º 2
0
 def test_cut_by_words_no_whitespace(self):
     assert cut_by_words(text="testtest",
                         seg_size=1,
                         overlap=0,
                         last_prop=1) == ["testtest"]
     assert cut_by_words(text="helloworld helloworld",
                         seg_size=1,
                         overlap=0,
                         last_prop=1) == ["helloworld ", "helloworld"]
Ejemplo n.º 3
0
    def test_cut_by_words(self):
        assert cut_by_words(text=" ", seg_size=1, overlap=0,
                            last_prop=1) == [""]
        assert cut_by_words(text="test test", seg_size=1, overlap=0,
                            last_prop=1) == ["test ", "test"]

        assert cut_by_words(text="abc abc abc abc abc abc abc abc abc abc abc "
                                 "abc abc abc abc abc abc abc abc abc abc "
                                 "abc", seg_size=4, overlap=0, last_prop=.5)\
            == ["abc abc abc abc ", "abc abc abc abc ", "abc abc abc abc ",
                "abc abc abc abc ", "abc abc abc abc ", "abc abc"]
Ejemplo n.º 4
0
 def test_cut_by_words_overlap(self):
     assert cut_by_words(text="test test test",
                         seg_size=2,
                         overlap=1,
                         last_prop=.5) == [
                             "test test ", "test test", "test"
                         ]
     assert cut_by_words(text="dog cat duck bird",
                         seg_size=3,
                         overlap=2,
                         last_prop=.5) == [
                             "dog cat duck ", "cat duck bird", "duck bird"
                         ]
Ejemplo n.º 5
0
    def test_cut_by_words(self):
        assert cut_by_words(text=" ", seg_size=1, overlap=0,
                            last_prop=1) == [""]
        assert cut_by_words(text="test test",
                            seg_size=1,
                            overlap=0,
                            last_prop=1) == ["test ", "test"]

        assert cut_by_words(text="abc abc abc abc abc abc abc abc abc abc abc "
                                 "abc abc abc abc abc abc abc abc abc abc "
                                 "abc", seg_size=4, overlap=0, last_prop=.5)\
            == ["abc abc abc abc ", "abc abc abc abc ", "abc abc abc abc ",
                "abc abc abc abc ", "abc abc abc abc ", "abc abc"]
Ejemplo n.º 6
0
    def test_cut_by_words_zero_chunks_precondition(self):
        try:
            _ = cut_by_words(text=" ", seg_size=0, overlap=0, last_prop=.5)
            raise AssertionError("zero_division error did not raise")
        except AssertionError as error:
            assert str(error) == SEG_NON_POSITIVE_MESSAGE

        try:
            _ = cut_by_words(text="test test", seg_size=0, overlap=0,
                             last_prop=.5)
            raise AssertionError("zero_division error did not raise")
        except AssertionError as error:
            assert str(error) == SEG_NON_POSITIVE_MESSAGE
Ejemplo n.º 7
0
 def test_cut_by_words_proportion(self):
     assert cut_by_words(text="test test test",
                         seg_size=2,
                         overlap=0,
                         last_prop=1) == ["test test test"]
     assert cut_by_words(text="test test test",
                         seg_size=2,
                         overlap=0,
                         last_prop=.5) == ["test test ", "test"]
     assert cut_by_words(text="test test test",
                         seg_size=2,
                         overlap=0,
                         last_prop=1.5) == ["test test test"]
     assert cut_by_words(text="test test test",
                         seg_size=2,
                         overlap=0,
                         last_prop=2) == ["test test test"]
     assert cut_by_words(text="test test test test",
                         seg_size=2,
                         overlap=0,
                         last_prop=1) == ["test test ", "test test"]
     assert cut_by_words(
         text="test test test test test",
         seg_size=2,
         overlap=0,
         last_prop=.5) == ["test test ", "test test ", "test"]
     assert cut_by_words(text="test test test test test",
                         seg_size=2,
                         overlap=0,
                         last_prop=1) == ["test test ", "test test test"]
     assert cut_by_words(text="test test test test test",
                         seg_size=3,
                         overlap=0,
                         last_prop=1) == ["test test test test test"]
Ejemplo n.º 8
0
    def test_seg_size_assertion_error(self):
        try:
            _ = cut_by_words(text="test test", seg_size=1, overlap=1,
                             last_prop=.5)
            raise AssertionError("did not throw error")
        except AssertionError as error:
            assert str(error) == LARGER_SEG_SIZE_MESSAGE

        try:
            _ = cut_by_words(text="test test test", seg_size=1, overlap=2,
                             last_prop=.5)
            raise AssertionError("did not throw error")
        except AssertionError as error:
            assert str(error) == LARGER_SEG_SIZE_MESSAGE
Ejemplo n.º 9
0
    def test_cut_by_words_zero_chunks_precondition(self):
        try:
            _ = cut_by_words(text=" ", seg_size=0, overlap=0, last_prop=.5)
            raise AssertionError("zero_division error did not raise")
        except AssertionError as error:
            assert str(error) == SEG_NON_POSITIVE_MESSAGE

        try:
            _ = cut_by_words(text="test test",
                             seg_size=0,
                             overlap=0,
                             last_prop=.5)
            raise AssertionError("zero_division error did not raise")
        except AssertionError as error:
            assert str(error) == SEG_NON_POSITIVE_MESSAGE
Ejemplo n.º 10
0
 def test_cut_by_words_neg_overlap_precondition(self):
     try:
         _ = cut_by_words(text="test", seg_size=1, overlap=-1,
                          last_prop=.5)
         raise AssertionError("did not throw error")
     except AssertionError as error:
         assert str(error) == NEG_OVERLAP_LAST_PROP_MESSAGE
Ejemplo n.º 11
0
 def test_cut_by_words_neg_chunk_precondition(self):
     try:
         _ = cut_by_words(text="test", seg_size=-1, overlap=0,
                          last_prop=.5)
         raise AssertionError("did not throw error")
     except AssertionError as error:
         assert str(error) == SEG_NON_POSITIVE_MESSAGE
Ejemplo n.º 12
0
    def test_seg_size_assertion_error(self):
        try:
            _ = cut_by_words(text="test test",
                             seg_size=1,
                             overlap=1,
                             last_prop=.5)
            raise AssertionError("did not throw error")
        except AssertionError as error:
            assert str(error) == LARGER_SEG_SIZE_MESSAGE

        try:
            _ = cut_by_words(text="test test test",
                             seg_size=1,
                             overlap=2,
                             last_prop=.5)
            raise AssertionError("did not throw error")
        except AssertionError as error:
            assert str(error) == LARGER_SEG_SIZE_MESSAGE
Ejemplo n.º 13
0
 def test_cut_by_words_proportion(self):
     assert cut_by_words(text="test test test", seg_size=2, overlap=0,
                         last_prop=1) == ["test test test"]
     assert cut_by_words(text="test test test", seg_size=2, overlap=0,
                         last_prop=.5) == ["test test ", "test"]
     assert cut_by_words(text="test test test", seg_size=2, overlap=0,
                         last_prop=1.5) == ["test test test"]
     assert cut_by_words(text="test test test", seg_size=2, overlap=0,
                         last_prop=2) == ["test test test"]
     assert cut_by_words(text="test test test test", seg_size=2,
                         overlap=0, last_prop=1) == [
         "test test ", "test test"]
     assert cut_by_words(text="test test test test test", seg_size=2,
                         overlap=0, last_prop=.5) == [
         "test test ", "test test ", "test"]
     assert cut_by_words(text="test test test test test", seg_size=2,
                         overlap=0, last_prop=1) == [
         "test test ", "test test test"]
     assert cut_by_words(text="test test test test test", seg_size=3,
                         overlap=0, last_prop=1) == [
         "test test test test test"]
Ejemplo n.º 14
0
 def test_cut_by_words_neg_overlap_precondition(self):
     try:
         _ = cut_by_words(text="test", seg_size=1, overlap=-1, last_prop=.5)
         raise AssertionError("did not throw error")
     except AssertionError as error:
         assert str(error) == NEG_OVERLAP_LAST_PROP_MESSAGE
Ejemplo n.º 15
0
 def test_cut_by_words_neg_chunk_precondition(self):
     try:
         _ = cut_by_words(text="test", seg_size=-1, overlap=0, last_prop=.5)
         raise AssertionError("did not throw error")
     except AssertionError as error:
         assert str(error) == SEG_NON_POSITIVE_MESSAGE
Ejemplo n.º 16
0
 def test_cut_by_words_no_whitespace(self):
     assert cut_by_words(text="testtest", seg_size=1, overlap=0,
                         last_prop=1) == ["testtest"]
     assert cut_by_words(text="helloworld helloworld", seg_size=1,
                         overlap=0, last_prop=1) == [
         "helloworld ", "helloworld"]