def test_cutter_blank(self): assert cut(text=" ", cutting_value="1", cutting_type="Tokens", overlap="0", last_prop_percent="100%") == [""] assert cut(text="\n", cutting_value="1", cutting_type="Lines", overlap="0", last_prop_percent="100%") == []
def test_cutter_negative_numbers(self): try: _ = cut(text="test", cutting_value="0", cutting_type="words", overlap="0", last_prop_percent="100%") == ["test"] raise AssertionError("negative number error does not raise") except AssertionError as error: assert str(error) == SEG_NON_POSITIVE_MESSAGE
def cut_contents(self) -> List[str]: """ Cuts the contents of the file according to options chosen by the user. :return: the substrings that the file contents have been cut up into. """ text_string = self.load_contents() # From Lexos 3.1, trim white space at start and end of the string. whitespaces = re.compile(r'^\s+') text_string = whitespaces.sub('', text_string) cutting_value, cutting_type, overlap, last_prop = \ self.get_cutting_options() # From Lexos 3.1, trim the milestone at the start and end of the string if cutting_type == "milestone": milestone = r'^' + cutting_value + '|' + cutting_value + '$' milestone = re.compile(milestone) text_string = milestone.sub('', text_string) text_strings = cutter.cut( text_string, cutting_value=cutting_value, cutting_type=cutting_type, overlap=overlap, last_prop_percent=last_prop) return text_strings
def test_cutter_type(self): try: _ = cut(text="test", cutting_value='1', cutting_type="chars", overlap="0", last_prop_percent="100%") == ["test"] raise AssertionError("invalid cutting type error does not raise") except AssertionError as error: assert str(error) == INVALID_CUTTING_TYPE_MESSAGE
def cut_contents(self) -> List[str]: """ Cuts the contents of the file according to options chosen by the user. :return: the substrings that the file contents have been cut up into. """ text_string = self.load_contents() # From Lexos 3.1, trim white space at start and end of the string. whitespaces = re.compile(r'^\s+') text_string = whitespaces.sub('', text_string) cutting_value, cutting_type, overlap, last_prop = \ self.get_cutting_options() # From Lexos 3.1, trim the milestone at the start and end of the string if cutting_type == "milestone": milestone = r'^' + cutting_value + '|' + cutting_value + '$' milestone = re.compile(milestone) text_string = milestone.sub('', text_string) text_strings = cutter.cut( text_string, cutting_value=cutting_value, cutting_type=cutting_type, overlap=overlap, last_prop_percent=last_prop) return text_strings
def test_cutter_negative_numbers(self): try: _ = cut(text="test", cutting_value="0", cutting_type="Tokens", overlap="0", last_prop_percent="100%") == ["test"] raise AssertionError("negative number error does not raise") except AssertionError as error: assert str(error) == SEG_NON_POSITIVE_MESSAGE
def test_cutter_type(self): try: _ = cut(text="test", cutting_value='1', cutting_type="chars", overlap="0", last_prop_percent="100%") == ["test"] raise AssertionError("invalid cutting type error does not raise") except AssertionError as error: assert str(error) == INVALID_CUTTING_TYPE_MESSAGE
def test_cutter_basic(self): assert cut(text="test\ntest\ntest", cutting_value="1", cutting_type="Lines", overlap="0", last_prop_percent="100%") == ["test\n", "test\n", "test"] assert cut(text=" test", cutting_value="1", cutting_type="Tokens", overlap="0", last_prop_percent="100%") == ["test"] assert cut(text=" \ntest", cutting_value="1", cutting_type="Lines", overlap="0", last_prop_percent="100%") == ["test"] assert cut(text=" test", cutting_value="2", cutting_type="Characters", overlap="0", last_prop_percent="100%") == [" t", "est"] assert cut(text="test", cutting_value="1", cutting_type="Milestones", overlap="0", last_prop_percent="100%") == ["test"] assert cut(text="test", cutting_value="test", cutting_type="Milestones", overlap="0", last_prop_percent="100%") == ["", ""] assert cut(text="test", cutting_value="e", cutting_type="Milestones", overlap="0", last_prop_percent="100%") == ["t", "st"] assert cut(text="test\ntesttest", cutting_value="3", cutting_type="Segments", overlap="0", last_prop_percent="100%") == ["test\n", "testtest", ""] assert cut(text="test test test", cutting_value="3", cutting_type="Segments", overlap="0", last_prop_percent="100%") == ["test ", "test ", "test"]
def test_cutter_basic(self): assert cut(text="test\ntest\ntest", cutting_value="1", cutting_type="lines", overlap="0", last_prop_percent="100%") == ["test\n", "test\n", "test"] assert cut(text=" test", cutting_value="1", cutting_type="words", overlap="0", last_prop_percent="100%") == ["test"] assert cut(text=" \ntest", cutting_value="1", cutting_type="lines", overlap="0", last_prop_percent="100%") == [" \n", "test"] assert cut(text=" test", cutting_value="2", cutting_type="letters", overlap="0", last_prop_percent="100%") == ["te", "st"] assert cut(text="test", cutting_value="1", cutting_type="milestone", overlap="0", last_prop_percent="100%") == ["test"] assert cut(text="test", cutting_value="test", cutting_type="milestone", overlap="0", last_prop_percent="100%") == ["", ""] assert cut(text="test", cutting_value="e", cutting_type="milestone", overlap="0", last_prop_percent="100%") == ["t", "st"] assert cut(text="test\ntesttest", cutting_value="3", cutting_type="number", overlap="0", last_prop_percent="100%") == ["test\n", "testtest", ""] assert cut(text="test test test", cutting_value="3", cutting_type="number", overlap="0", last_prop_percent="100%") == ["test ", "test ", "test"]
def test_cutter_basic(self): assert cut(text="test\ntest\ntest", cutting_value="1", cutting_type="lines", overlap="0", last_prop_percent="100%") == ["test\n", "test\n", "test"] assert cut(text=" test", cutting_value="1", cutting_type="words", overlap="0", last_prop_percent="100%") == ["test"] assert cut(text=" \ntest", cutting_value="1", cutting_type="lines", overlap="0", last_prop_percent="100%") == [" \n", "test"] assert cut(text=" test", cutting_value="2", cutting_type="letters", overlap="0", last_prop_percent="100%") == ["te", "st"] assert cut(text="test", cutting_value="1", cutting_type="milestone", overlap="0", last_prop_percent="100%") == ["test"] assert cut(text="test", cutting_value="test", cutting_type="milestone", overlap="0", last_prop_percent="100%") == ["", ""] assert cut(text="test", cutting_value="e", cutting_type="milestone", overlap="0", last_prop_percent="100%") == ["t", "st"] assert cut(text="test\ntesttest", cutting_value="3", cutting_type="number", overlap="0", last_prop_percent="100%") == ["test\n", "testtest", ""] assert cut(text="test test test", cutting_value="3", cutting_type="number", overlap="0", last_prop_percent="100%") == ["test ", "test ", "test"]
def test_cutter_blank(self): assert cut(text=" ", cutting_value="1", cutting_type="words", overlap="0", last_prop_percent="100%") == [""] assert cut(text="\n", cutting_value="1", cutting_type="lines", overlap="0", last_prop_percent="100%") == ["\n"]