예제 #1
0
    def test_empty_options(self, build_sentences_mock):
        options = self._default_options()
        build_sentences_mock.return_value = []

        open_mock = mock_open()
        with patch("swe_pipeline.open", open_mock, create=True):
            self.assertEqual(run_tokenization(options, "file.txt"), [])
    def test_empty_options(self, build_sentences_mock):
        options = self._default_options()
        build_sentences_mock.return_value = []

        open_mock = mock_open()
        with patch("swe_pipeline.open", open_mock, create=True):
            self.assertEqual(run_tokenization(options, "file.txt"), [])
예제 #3
0
    def test_sentences(self, build_sentences_mock):
        options = self._default_options()
        build_sentences_mock.return_value = [
            ["Hej", "mitt", "namn", "är"],
            ["Hej", "mitt", "namn", "är", "Slim", "Shady"],
        ]

        text = dedent("""
            Hej mitt namn är
            Hej mitt namn är Slim Shady
        """).strip()

        open_mock = mock_open(read_data=text)
        with patch("swe_pipeline.open", open_mock, create=True):
            self.assertEqual(run_tokenization(options, "file.txt"), [
                ["Hej", "mitt", "namn", "är"],
                ["Hej", "mitt", "namn", "är", "Slim", "Shady"],
            ])
    def test_sentences(self, build_sentences_mock):
        options = self._default_options()
        build_sentences_mock.return_value = [
            ["Hej", "mitt", "namn", "är"],
            ["Hej", "mitt", "namn", "är", "Slim", "Shady"],
        ]

        text = dedent("""
            Hej mitt namn är
            Hej mitt namn är Slim Shady
        """).strip()

        open_mock = mock_open(read_data=text)
        with patch("swe_pipeline.open", open_mock, create=True):
            self.assertEqual(run_tokenization(options, "file.txt"), [
                ["Hej", "mitt", "namn", "är"],
                ["Hej", "mitt", "namn", "är", "Slim", "Shady"],
            ])