def words(self): #TODO inefficient. for sentence in sent_tokenize(self.body): for text in word_tokenize(sentence): yield Word.objects.get_or_create(native_language = self.native_language, native_text = unicode(text))[0]
def sentences(self): return sent_tokenize(self.body)