Exemple #1
0
    def test_tokenize_should_raise_on_string(self):
        # Given
        s = bytes(b"let's eat food tonight")
        language = bytes(b"en")

        # When / Then
        with self.assertRaises(TypeError):
            tokenize(s, language)
Exemple #2
0
    def test_should_tokenize(self):
        # Given
        u = "let's eat food tonight"
        language = "en"

        # When
        tokens = tokenize(u, language)

        # Then
        self.assertGreater(len(tokens), 0)
        self.assertTrue(all(isinstance(t, dict) for t in tokens))
    def test_should_tokenize(self):
        # Given
        u = "foo bär baz"
        language = "en"

        # When
        tokens = tokenize(u, language)

        # Then
        expected_tokens = [
            {
                "value": "foo",
                "range": {
                    "start": 0,
                    "end": 3
                },
                "char_range": {
                    "start": 0,
                    "end": 3
                }
            },
            {
                "value": "bär",
                "range": {
                    "start": 4,
                    "end": 8
                },
                "char_range": {
                    "start": 4,
                    "end": 7
                }
            },
            {
                "value": "baz",
                "range": {
                    "start": 9,
                    "end": 12
                },
                "char_range": {
                    "start": 8,
                    "end": 11
                }
            },
        ]
        self.assertListEqual(expected_tokens, tokens)
 def test_should_tokenize_empty_string(self):
     self.assertListEqual([], tokenize("", "en"))