def test_empty_options(self, build_sentences_mock): options = self._default_options() build_sentences_mock.return_value = [] open_mock = mock_open() with patch("swe_pipeline.open", open_mock, create=True): self.assertEqual(run_tokenization(options, "file.txt"), [])
def test_sentences(self, build_sentences_mock): options = self._default_options() build_sentences_mock.return_value = [ ["Hej", "mitt", "namn", "är"], ["Hej", "mitt", "namn", "är", "Slim", "Shady"], ] text = dedent(""" Hej mitt namn är Hej mitt namn är Slim Shady """).strip() open_mock = mock_open(read_data=text) with patch("swe_pipeline.open", open_mock, create=True): self.assertEqual(run_tokenization(options, "file.txt"), [ ["Hej", "mitt", "namn", "är"], ["Hej", "mitt", "namn", "är", "Slim", "Shady"], ])