def next_tamil_letter(self): """ method loads the corpus and returns one Tamil letter at a time in iterable""" with codecs.open(self.filename, "r", "utf-8") as handle: self.handle = handle for letter in filter(utf8.istamil,utf8.get_letters_iterable(self.handle.read())): yield letter return
def test_letter_extract_yield_with_ascii(self): letters = [] ta_str = u"கூவிளம் is என்பது also என்ன a சீர்" for l in utf8.get_letters_iterable(ta_str): letters.append(l) act_letters = utf8.get_letters(ta_str) assert len(letters) == len(act_letters) self.assertEqual(letters[-4], u"a")
def test_letter_extract_yield(self): letters = [] for l in utf8.get_letters_iterable(u"கூவிளம் என்பது என்ன சீர்"): letters.append( l ) #print "len ==== > " , len(letters) assert( len(letters) == 15 ) for pos,letter in enumerate(letters): print(u"%d %s"%(pos,letter)) assert( letter == (u"ர்") )
def test_letter_extract_yield_with_ascii(self): letters = [] for l in utf8.get_letters_iterable(u"கூவிளம் is என்பது also என்ன a சீர்"): letters.append( l ) print "len ==== > " , len(letters) assert(len(letters) == 25 ) for pos,letter in enumerate(letters): print(u"%d %s"%(pos,letter)) assert( letters[-4] == u"a" )
def test_letter_extract_yield(self): letters = [] for l in utf8.get_letters_iterable(u"கூவிளம் என்பது என்ன சீர்"): letters.append(l) print "len ==== > ", len(letters) assert (len(letters) == 15) for pos, letter in enumerate(letters): print(u"%d %s" % (pos, letter)) assert (letter == (u"ர்"))
def test_letter_extract_yield(self): ta_str = u"கூவிளம் என்பது என்ன சீர்" act_letters = utf8.get_letters(ta_str) letters = [] for l in utf8.get_letters_iterable(ta_str): letters.append(l) assert len(letters) == 16 assert len(letters) == len(act_letters) assert letters[-1] == (u"ர்")
def test_letter_extract_yield_with_ascii(self): letters = [] for l in utf8.get_letters_iterable( u"கூவிளம் is என்பது also என்ன a சீர்"): letters.append(l) print "len ==== > ", len(letters) assert (len(letters) == 25) for pos, letter in enumerate(letters): print(u"%d %s" % (pos, letter)) assert (letters[-4] == u"a")
def test_letter_extract_yield_with_ascii(self): letters = [] ta_str = u"கூவிளம் is என்பது also என்ன a சீர்" for l in utf8.get_letters_iterable(ta_str): letters.append( l ) act_letters = utf8.get_letters(ta_str) print( "len ==== > " , len(letters),"get_letters CALL = ",len(act_letters) ) assert(len(letters) == len(act_letters) ) for pos,letter in enumerate(letters): if ( LINUX ): print( u"%d %s"%(pos,letter) ) self.assertEqual( letters[-4], u"a" )
def test_letter_extract_yield(self): ta_str = u"கூவிளம் என்பது என்ன சீர்" act_letters = utf8.get_letters(ta_str) letters = [] for l in utf8.get_letters_iterable(ta_str): letters.append( l ) print( "len ==== > " , len(letters) ) assert( len(letters) == 16 ) print( "len ==== > " , len(letters),"get_letters CALL = ",len(act_letters) ) assert(len(letters) == len(act_letters) ) for pos,letter in enumerate(letters): if ( LINUX ): print(u"%d %s"%(pos,letter)) assert( letter == (u"ர்") )
def demo(self): for l in utf8.get_letters_iterable(u"இதுதாண்டாபோலிசு"): print("%s - %s" % (l, utf8.classify_letter(l)))
def next_tamil_letter(self): self.handle = codecs.open(self.filename,'r','utf-8') for letter in utf8.get_letters_iterable(self.handle.read()): if ( utf8.istamil( letter ) ): yield letter return
def demo(self): for l in utf8.get_letters_iterable(u"இதுதாண்டாபோலிசு"): print("%s - %s"%(l,utf8.classify_letter(l)))