예제 #1
0
 def test_words_tokenizes_the_sentence_correctly(self):
     t = TrueFalseInstance("This is a sentence.", None)
     assert t.words() == {'words': ['this', 'is', 'a', 'sentence', '.']}
     t = TrueFalseInstance("This isn't a sentence.", None)
     assert t.words() == {
         'words': ['this', 'is', "n't", 'a', 'sentence', '.']
     }
     t = TrueFalseInstance("And, I have commas.", None)
     assert t.words() == {'words': ['and', ',', 'i', 'have', 'commas', '.']}
예제 #2
0
 def test_words_tokenizes_the_sentence_correctly(self):
     t = TrueFalseInstance("This is a sentence.", None)
     assert t.words() == {'words': ['this', 'is', 'a', 'sentence', '.']}
     TextInstance.tokenizer = tokenizers['characters']({})
     assert t.words() == {'characters': ['T', 'h', 'i', 's', ' ', 'i', 's', ' ', 'a', ' ', 's',
                                         'e', 'n', 't', 'e', 'n', 'c', 'e', '.']}
     TextInstance.tokenizer = tokenizers['words and characters']({})
     assert t.words() == {'words': ['this', 'is', 'a', 'sentence', '.'],
                          'characters': ['T', 'h', 'i', 's', ' ', 'i', 's', ' ', 'a', ' ', 's',
                                         'e', 'n', 't', 'e', 'n', 'c', 'e', '.']}
     TextInstance.tokenizer = tokenizers['words']({})