def get(word): word = word.strip() word = word.replace(u' ',u'') letters = utf8.get_letters(word) F = Feature() F.nletters = len(letters)*1.0 F.unigscore = unigram_score(letters) F.bigscore = max(bigram_scores(letters)) for l in letters: try: rtl = reverse_transliterate(l) if any( [rtl.startswith(l) for l in ['a','e','i','o','u'] ] ): F.vowels += 1.0 except Exception as ioe: pass kind = utf8.classify_letter(l) if kind == 'kuril': F.kurils += 1 elif kind == 'nedil': F.nedils += 1 elif kind == 'ayudham': F.ayudhams += 1 elif kind == 'vallinam': F.vallinams += 1 elif kind == 'mellinam': F.mellinams += 1 elif kind == 'idayinam': F.idayinams += 1 elif kind in ['english','digit']: continue elif kind == 'tamil_or_grantham': F.granthams += 1 F.kurils /= F.nletters F.nedils /= F.nletters F.ayudhams /= F.nletters F.vallinams /= F.nletters F.vallinams /= F.nletters F.mellinams /= F.nletters F.idayinams /= F.nletters F.granthams /= F.nletters F.vowels /= F.nletters if letters[0] in utf8.uyir_letters: F.first += 1.0 if letters[0] in utf8.mei_letters: F.first += F.first + 0.25 if letters[0] in utf8.uyirmei_letters: F.first += F.first + 0.05 if letters[-1] in utf8.uyir_letters: F.last += 1.0 if letters[-1] in utf8.mei_letters: F.last += F.last + 0.25 if letters[-1] in utf8.uyirmei_letters: F.last += F.last + 0.05 return F
def test_classified_except(self): with self.assertRaises(ValueError) as ve: utf8.classify_letter(u'.')
def demo(self): for l in utf8.get_letters_iterable(u"இதுதாண்டாபோலிசு"): print("%s - %s" % (l, utf8.classify_letter(l)))
def demo(self): for l in utf8.get_letters_iterable(u"இதுதாண்டாபோலிசு"): print("%s - %s"%(l,utf8.classify_letter(l)))