def test_basic_case_(self): obj = RemovePluralSuffix() objf = CaseFilter(obj) expected = [u"பதிவி", u"கட்டளை", u"அவர்"] words_list = [u"பதிவில்", u"கட்டளைகள்", u"அவர்கள்"] for w, x in zip(words_list, expected): rval = obj.removeSuffix(w) trunc_word = objf.apply(w) self.assertEqual(trunc_word, rval[0]) return
def test_basic_plural_stripper(self): obj = RemovePluralSuffix() expected = [u"பதிவி",u"கட்டளை",u"அவர்",u"ஜாதி",u"மரம்",u"சொல்",u"சிற்பம்"] words_list = [u"பதிவில்",u"கட்டளைகள்",u"அவர்கள்",u"ஜாதிகள்",u"மரங்கள்",u"சொற்கள்",u"சிற்பங்கள்"] for w,x in zip(words_list,expected): rval = obj.removeSuffix(w) self.assertTrue(rval[1]) #if not PYTHON3: print(utf8.get_letters(w),u'->',rval[1]) self.assertEqual(rval[0], x) return
def test_basic_case_(self): obj = RemovePluralSuffix() objf = CaseFilter(obj) expected = [u"பதிவி",u"கட்டளை",u"அவர்"] words_list = [u"பதிவில்",u"கட்டளைகள்",u"அவர்கள்"] for w,x in zip(words_list,expected): rval = obj.removeSuffix(w) trunc_word = objf.apply( w ) self.assertEqual( trunc_word ,rval[0] ) return
def __init__(self,filename=None,lang="ta",mode="non-web"): object.__init__(self) self.lang = lang.lower() self.filename = filename self.user_dict = set() self.case_filter = CaseFilter( RemovePluralSuffix(), RemoveVerbSuffixTense(), RemoveCaseSuffix(), RemovePrefix() ) if self.lang == u"en": self.alphabets = [a for a in string.ascii_lowercase] else: self.alphabets = None if mode == "web": return if not self.filename: self.interactive() else: self.spellcheck(self.filename)