class TamilTest(unittest.TestCase): def __init__(self,*args): unittest.TestCase.__init__(self,*args) self.ta_stemmer = TamilStemmer() self.assertTrue( self.ta_stemmer != None ) def test_suffix(self): wordlist = [u'மலைகள்',u'பாடுதல்',u'ஓடினான்'] expected = [u'மலை',u'பாடு', u'ஓடி'] stems = [self.ta_stemmer.stemWord(word) for word in wordlist] self.assertSequenceEqual( stems, expected )
def tastemmer(request,use_json=False): if request.method == "GET": return render(request,'stemmer.html',{'text_output':u''}) assert( request.method == "POST" ) text_input = request.POST.get("text_input",u"") words_in = filter(len,re.split('\s+',text_input)) words_out = TamilStemmer().stemWords(words_in) data = zip(words_in,words_out) if use_json: json_string = json.dumps(data,ensure_ascii = False) response = HttpResponse(json_string,content_type="application/json; charset=utf-8") return render(request,'stemmer.html',{'text_output':data,'text_input':text_input})
def tastemmer(request, use_json=False): if request.method == "GET": return render(request, "stemmer.html", {"text_output": ""}) assert request.method == "POST" text_input = request.POST.get("text_input", "") words_in = list(filter(len, re.split("\s+", text_input))) words_out = TamilStemmer().stemWords(words_in) data = list(zip(words_in, words_out)) if use_json: json_string = json.dumps(data, ensure_ascii=False) response = HttpResponse( json_string, content_type="application/json; charset=utf-8" ) return render( request, "stemmer.html", {"text_output": data, "text_input": text_input} )
from pprint import pprint import tamil from tamilstemmer import TamilStemmer kv = [('நாற்பத்தி ஐந்து', 45), ('ஓர் ஆயிரத்து எழுநூற்று இருபத்தொன்பது', 1729), ('ஓர் ஆயிரத்து ஒன்று', 1001)] stemmer = TamilStemmer() for k, v in kv: pprint([stemmer.stemWord(word) for word in tamil.utf8.get_words(k)]) # ['நாற்பத்தி', 'ஐந்'] # ['ஓர்', 'ஆயிர', 'எழுநூற்று', 'இருபத்தொன்'] # ['ஓர்', 'ஆயிர', 'ஒன்று']
def __init__(self,*args): unittest.TestCase.__init__(self,*args) self.ta_stemmer = TamilStemmer() self.assertTrue( self.ta_stemmer != None )