Beispiel #1
0
    def test_quotation(self):
        generator = processing.word_tokens("i eat \"food\"")
        self.assertEqual([t.literal for t in generator],
                         "i eat `` food ''".split(" "))

        generator = processing.word_tokens("i eat \"food\".")
        self.assertEqual([t.literal for t in generator],
                         "i eat `` food '' .".split(" "))

        generator = processing.word_tokens("i eat \"food.\"")
        self.assertEqual([t.literal for t in generator],
                         "i eat `` food . ''".split(" "))
Beispiel #2
0
    def test_terminals(self):
        # Terminal .
        generator = processing.word_tokens("i eat food.")
        self.assertEqual([t.literal for t in generator],
                         "i eat food .".split(" "))

        # Terminal ?
        generator = processing.word_tokens("i eat food?")
        self.assertEqual([t.literal for t in generator],
                         "i eat food ?".split(" "))

        # Terminal !
        generator = processing.word_tokens("i eat food!")
        self.assertEqual([t.literal for t in generator],
                         "i eat food !".split(" "))
Beispiel #3
0
    def test_apostrophe(self):
        generator = processing.word_tokens("the new series ' .")
        self.assertEqual([t.literal for t in generator],
                         "the new series ' .".split(" "))

        generator = processing.word_tokens("we don't care.")
        self.assertEqual([t.literal for t in generator],
                         "we do n't care .".split(" "))

        generator = processing.word_tokens("we'll eat soon.")
        self.assertEqual([t.literal for t in generator],
                         "we 'll eat soon .".split(" "))

        generator = processing.word_tokens("i eat 'food'.")
        self.assertEqual([t.literal for t in generator],
                         "i eat ' food ' .".split(" "))

        generator = processing.word_tokens("i eat ' food'.")
        self.assertEqual([t.literal for t in generator],
                         "i eat ' food ' .".split(" "))

        generator = processing.word_tokens("i eat 'food '.")
        self.assertEqual([t.literal for t in generator],
                         "i eat ' food ' .".split(" "))

        generator = processing.word_tokens("i eat ' food '.")
        self.assertEqual([t.literal for t in generator],
                         "i eat ' food ' .".split(" "))
Beispiel #4
0
 def test_capitalization(self):
     generator = processing.word_tokens("I eAt FOOD")
     self.assertEqual([t.literal for t in generator],
                      "i eat food".split(" "))
Beispiel #5
0
 def test_lines(self):
     generator = processing.word_tokens("i  eat\nfood\r.")
     self.assertEqual([t.literal for t in generator],
                      "i eat food .".split(" "))
Beispiel #6
0
 def test_whitespace(self):
     generator = processing.word_tokens(" i  eat      food\u00a0. ")
     self.assertEqual([t.literal for t in generator],
                      "i eat food .".split(" "))