Example #1
0
 def test_tokenize_url(self):
     """Tokeniser - URL"""
     global text
     self.assertEqual(
         tokenise("I go to http://www.google.com when I need to find something."),
         "I go to http://www.google.com when I need to find something .".split(" "),
     )
Example #2
0
 def test_tokenize_quotes(self):
     """Tokeniser - quotes"""
     global text
     self.assertEqual(
         tokenise('Hij zegt: "Wat een lief baby\'tje is dat!"'),
         'Hij zegt : " Wat een lief baby\'tje is dat ! "'.split(" "),
     )
Example #3
0
 def test_tokenize_sentences(self):
     """Tokeniser - Multiple sentences"""
     self.assertEqual(
         tokenise(
             "This, is the first sentence! This is the second sentence."),
         "This , is the first sentence ! This is the second sentence .".
         split(" "))
Example #4
0
 def test_tokenize_url(self):
     """Tokeniser - URL"""
     global text
     self.assertEqual(
         tokenise(
             "I go to http://www.google.com when I need to find something."
         ), "I go to http://www.google.com when I need to find something .".
         split(" "))
Example #5
0
 def test_tokenize_numeric(self):
     """Tokeniser - numeric"""
     global text
     self.assertEqual(tokenise("I won € 300,000.00!"), "I won € 300,000.00 !".split(" "))
Example #6
0
 def test_tokenize_mail(self):
     """Tokeniser - Mail"""
     global text
     self.assertEqual(tokenise("Write me at [email protected]."), "Write me at [email protected] .".split(" "))
Example #7
0
 def test_tokenize_noeos(self):
     """Tokeniser - Missing EOS Marker"""
     self.assertEqual(tokenise("This is a test"), "This is a test".split(" "))
Example #8
0
 def test_tokenize_sentences(self):
     """Tokeniser - Multiple sentences"""
     self.assertEqual(
         tokenise("This, is the first sentence! This is the second sentence."),
         "This , is the first sentence ! This is the second sentence .".split(" "),
     )
Example #9
0
 def test_tokenize(self):
     """Tokeniser - One sentence"""
     self.assertEqual(tokenise("This is a test."), "This is a test .".split(" "))
Example #10
0
 def test_tokenize_numeric(self):
     """Tokeniser - numeric"""
     global text
     self.assertEqual(tokenise("I won € 300,000.00!"),
                      "I won € 300,000.00 !".split(" "))
Example #11
0
 def test_tokenize_mail(self):
     """Tokeniser - Mail"""
     global text
     self.assertEqual(tokenise("Write me at [email protected]."),
                      "Write me at [email protected] .".split(" "))
Example #12
0
 def test_tokenize_noeos(self):
     """Tokeniser - Missing EOS Marker"""
     self.assertEqual(tokenise("This is a test"),
                      "This is a test".split(" "))
Example #13
0
 def test_tokenize(self):
     """Tokeniser - One sentence"""
     self.assertEqual(tokenise("This is a test."),
                      "This is a test .".split(" "))
Example #14
0
 def test_tokenize_quotes(self):
     """Tokeniser - quotes"""
     global text
     self.assertEqual(
         tokenise("Hij zegt: \"Wat een lief baby'tje is dat!\""),
         "Hij zegt : \" Wat een lief baby'tje is dat ! \"".split(" "))