Ejemplo n.º 1
0
class TamilTest(unittest.TestCase):
    def __init__(self,*args):
        unittest.TestCase.__init__(self,*args)
        self.ta_stemmer = TamilStemmer()
        self.assertTrue( self.ta_stemmer != None )
        
    def test_suffix(self):
        wordlist = [u'மலைகள்',u'பாடுதல்',u'ஓடினான்']
        expected = [u'மலை',u'பாடு', u'ஓடி']
        stems = [self.ta_stemmer.stemWord(word) for word in wordlist]
        self.assertSequenceEqual( stems, expected )
Ejemplo n.º 2
0
def tastemmer(request,use_json=False):
   if request.method == "GET":
      return render(request,'stemmer.html',{'text_output':u''})
   assert( request.method == "POST" )
   text_input = request.POST.get("text_input",u"")
   words_in = filter(len,re.split('\s+',text_input))
   words_out = TamilStemmer().stemWords(words_in)
   data = zip(words_in,words_out)
   if use_json:
      json_string = json.dumps(data,ensure_ascii = False)
      response = HttpResponse(json_string,content_type="application/json; charset=utf-8")
   return render(request,'stemmer.html',{'text_output':data,'text_input':text_input})
Ejemplo n.º 3
0
def tastemmer(request, use_json=False):
    if request.method == "GET":
        return render(request, "stemmer.html", {"text_output": ""})
    assert request.method == "POST"
    text_input = request.POST.get("text_input", "")
    words_in = list(filter(len, re.split("\s+", text_input)))
    words_out = TamilStemmer().stemWords(words_in)
    data = list(zip(words_in, words_out))
    if use_json:
        json_string = json.dumps(data, ensure_ascii=False)
        response = HttpResponse(
            json_string, content_type="application/json; charset=utf-8"
        )
    return render(
        request, "stemmer.html", {"text_output": data, "text_input": text_input}
    )
Ejemplo n.º 4
0
from pprint import pprint

import tamil
from tamilstemmer import TamilStemmer

kv = [('நாற்பத்தி ஐந்து', 45), ('ஓர் ஆயிரத்து எழுநூற்று இருபத்தொன்பது', 1729),
      ('ஓர் ஆயிரத்து ஒன்று', 1001)]

stemmer = TamilStemmer()
for k, v in kv:
    pprint([stemmer.stemWord(word) for word in tamil.utf8.get_words(k)])
# ['நாற்பத்தி', 'ஐந்']
# ['ஓர்', 'ஆயிர', 'எழுநூற்று', 'இருபத்தொன்']
# ['ஓர்', 'ஆயிர', 'ஒன்று']
Ejemplo n.º 5
0
 def __init__(self,*args):
     unittest.TestCase.__init__(self,*args)
     self.ta_stemmer = TamilStemmer()
     self.assertTrue( self.ta_stemmer != None )