Esempio n. 1
0
 def test_cutter_blank(self):
     assert cut(text=" ",
                cutting_value="1",
                cutting_type="Tokens",
                overlap="0",
                last_prop_percent="100%") == [""]
     assert cut(text="\n",
                cutting_value="1",
                cutting_type="Lines",
                overlap="0",
                last_prop_percent="100%") == []
Esempio n. 2
0
 def test_cutter_negative_numbers(self):
     try:
         _ = cut(text="test", cutting_value="0", cutting_type="words",
                 overlap="0", last_prop_percent="100%") == ["test"]
         raise AssertionError("negative number error does not raise")
     except AssertionError as error:
         assert str(error) == SEG_NON_POSITIVE_MESSAGE
Esempio n. 3
0
    def cut_contents(self) -> List[str]:
        """
        Cuts the contents of the file according to options chosen by the user.

        :return: the substrings that the file contents have been cut up into.
        """

        text_string = self.load_contents()

        # From Lexos 3.1, trim white space at start and end of the string.
        whitespaces = re.compile(r'^\s+')
        text_string = whitespaces.sub('', text_string)

        cutting_value, cutting_type, overlap, last_prop = \
            self.get_cutting_options()

        # From Lexos 3.1, trim the milestone at the start and end of the string
        if cutting_type == "milestone":
            milestone = r'^' + cutting_value + '|' + cutting_value + '$'
            milestone = re.compile(milestone)
            text_string = milestone.sub('', text_string)

        text_strings = cutter.cut(
            text_string,
            cutting_value=cutting_value,
            cutting_type=cutting_type,
            overlap=overlap,
            last_prop_percent=last_prop)

        return text_strings
Esempio n. 4
0
 def test_cutter_type(self):
     try:
         _ = cut(text="test", cutting_value='1', cutting_type="chars",
                 overlap="0", last_prop_percent="100%") == ["test"]
         raise AssertionError("invalid cutting type error does not raise")
     except AssertionError as error:
         assert str(error) == INVALID_CUTTING_TYPE_MESSAGE
Esempio n. 5
0
    def cut_contents(self) -> List[str]:
        """
        Cuts the contents of the file according to options chosen by the user.

        :return: the substrings that the file contents have been cut up into.
        """

        text_string = self.load_contents()

        # From Lexos 3.1, trim white space at start and end of the string.
        whitespaces = re.compile(r'^\s+')
        text_string = whitespaces.sub('', text_string)

        cutting_value, cutting_type, overlap, last_prop = \
            self.get_cutting_options()

        # From Lexos 3.1, trim the milestone at the start and end of the string
        if cutting_type == "milestone":
            milestone = r'^' + cutting_value + '|' + cutting_value + '$'
            milestone = re.compile(milestone)
            text_string = milestone.sub('', text_string)

        text_strings = cutter.cut(
            text_string,
            cutting_value=cutting_value,
            cutting_type=cutting_type,
            overlap=overlap,
            last_prop_percent=last_prop)

        return text_strings
Esempio n. 6
0
 def test_cutter_negative_numbers(self):
     try:
         _ = cut(text="test",
                 cutting_value="0",
                 cutting_type="Tokens",
                 overlap="0",
                 last_prop_percent="100%") == ["test"]
         raise AssertionError("negative number error does not raise")
     except AssertionError as error:
         assert str(error) == SEG_NON_POSITIVE_MESSAGE
Esempio n. 7
0
 def test_cutter_type(self):
     try:
         _ = cut(text="test",
                 cutting_value='1',
                 cutting_type="chars",
                 overlap="0",
                 last_prop_percent="100%") == ["test"]
         raise AssertionError("invalid cutting type error does not raise")
     except AssertionError as error:
         assert str(error) == INVALID_CUTTING_TYPE_MESSAGE
Esempio n. 8
0
    def test_cutter_basic(self):
        assert cut(text="test\ntest\ntest",
                   cutting_value="1",
                   cutting_type="Lines",
                   overlap="0",
                   last_prop_percent="100%") == ["test\n", "test\n", "test"]
        assert cut(text=" test",
                   cutting_value="1",
                   cutting_type="Tokens",
                   overlap="0",
                   last_prop_percent="100%") == ["test"]
        assert cut(text="   \ntest",
                   cutting_value="1",
                   cutting_type="Lines",
                   overlap="0",
                   last_prop_percent="100%") == ["test"]
        assert cut(text=" test",
                   cutting_value="2",
                   cutting_type="Characters",
                   overlap="0",
                   last_prop_percent="100%") == [" t", "est"]
        assert cut(text="test",
                   cutting_value="1",
                   cutting_type="Milestones",
                   overlap="0",
                   last_prop_percent="100%") == ["test"]
        assert cut(text="test",
                   cutting_value="test",
                   cutting_type="Milestones",
                   overlap="0",
                   last_prop_percent="100%") == ["", ""]

        assert cut(text="test",
                   cutting_value="e",
                   cutting_type="Milestones",
                   overlap="0",
                   last_prop_percent="100%") == ["t", "st"]
        assert cut(text="test\ntesttest",
                   cutting_value="3",
                   cutting_type="Segments",
                   overlap="0",
                   last_prop_percent="100%") == ["test\n", "testtest", ""]
        assert cut(text="test test test",
                   cutting_value="3",
                   cutting_type="Segments",
                   overlap="0",
                   last_prop_percent="100%") == ["test ", "test ", "test"]
Esempio n. 9
0
    def test_cutter_basic(self):
        assert cut(text="test\ntest\ntest",
                   cutting_value="1",
                   cutting_type="lines",
                   overlap="0",
                   last_prop_percent="100%") == ["test\n", "test\n", "test"]
        assert cut(text=" test",
                   cutting_value="1",
                   cutting_type="words",
                   overlap="0",
                   last_prop_percent="100%") == ["test"]
        assert cut(text="   \ntest",
                   cutting_value="1",
                   cutting_type="lines",
                   overlap="0",
                   last_prop_percent="100%") == ["   \n", "test"]
        assert cut(text=" test",
                   cutting_value="2",
                   cutting_type="letters",
                   overlap="0",
                   last_prop_percent="100%") == ["te", "st"]
        assert cut(text="test",
                   cutting_value="1",
                   cutting_type="milestone",
                   overlap="0",
                   last_prop_percent="100%") == ["test"]
        assert cut(text="test",
                   cutting_value="test",
                   cutting_type="milestone",
                   overlap="0",
                   last_prop_percent="100%") == ["", ""]

        assert cut(text="test",
                   cutting_value="e",
                   cutting_type="milestone",
                   overlap="0",
                   last_prop_percent="100%") == ["t", "st"]
        assert cut(text="test\ntesttest",
                   cutting_value="3",
                   cutting_type="number",
                   overlap="0",
                   last_prop_percent="100%") == ["test\n", "testtest", ""]
        assert cut(text="test test test",
                   cutting_value="3",
                   cutting_type="number",
                   overlap="0",
                   last_prop_percent="100%") == ["test ", "test ", "test"]
Esempio n. 10
0
    def test_cutter_basic(self):
        assert cut(text="test\ntest\ntest", cutting_value="1",
                   cutting_type="lines", overlap="0",
                   last_prop_percent="100%") == ["test\n", "test\n", "test"]
        assert cut(text=" test", cutting_value="1", cutting_type="words",
                   overlap="0", last_prop_percent="100%") == ["test"]
        assert cut(text="   \ntest", cutting_value="1", cutting_type="lines",
                   overlap="0",
                   last_prop_percent="100%") == ["   \n", "test"]
        assert cut(text=" test", cutting_value="2", cutting_type="letters",
                   overlap="0", last_prop_percent="100%") == ["te", "st"]
        assert cut(text="test", cutting_value="1", cutting_type="milestone",
                   overlap="0", last_prop_percent="100%") == ["test"]
        assert cut(text="test", cutting_value="test", cutting_type="milestone",
                   overlap="0", last_prop_percent="100%") == ["", ""]

        assert cut(text="test", cutting_value="e", cutting_type="milestone",
                   overlap="0", last_prop_percent="100%") == ["t", "st"]
        assert cut(text="test\ntesttest", cutting_value="3",
                   cutting_type="number", overlap="0",
                   last_prop_percent="100%") == ["test\n", "testtest", ""]
        assert cut(text="test test test", cutting_value="3",
                   cutting_type="number", overlap="0",
                   last_prop_percent="100%") == ["test ", "test ", "test"]
Esempio n. 11
0
 def test_cutter_blank(self):
     assert cut(text=" ", cutting_value="1", cutting_type="words",
                overlap="0", last_prop_percent="100%") == [""]
     assert cut(text="\n", cutting_value="1", cutting_type="lines",
                overlap="0", last_prop_percent="100%") == ["\n"]