Example #1
0
def stemm(token_words):
    print("\nStemmed Text..")
    v = open('temp_file.txt', 'w')
    exceptions=['ഇതെല്ലാം']
    index_nos=[]
    
    v.writelines(["%s\n" % item  for item in token_words]) #writing all the splitted words to a file temp_file.txt 
    v.close()
    u = open("temp_file.txt", "r")
    sentence=u.readlines()
    str1 = ''.join(sentence)
    noise="[]',\n"
    for char in str1:
        if char in noise:
            str1=str1.replace(char," ")
    
    data = str1.split() #split string into a list
    tokenlist=[]
    for word in data:
        tokenlist.append(word)
    for x in tokenlist:
     if x in exceptions:           
        index_nos.append(tokenlist.index(x))
    
    stemmer = Stemmer()
    result = stemmer.stem(language='malayalam', text=str1)
        
    return result  
Example #2
0
File: mean.py Project: roseanil/SA
f = open("output.txt", "w", encoding="utf-8")
f2 = open("test.txt", "w", encoding="utf-8")
x = "\n"
#File read
mal = open('malayalam.txt', 'r').read()
f.write(mal)
f.write("\n")
f.write("\n")

stemmer = Stemmer()
sentence_mal = mal.split(".")
#lenmatize each word
strings1 = ""
for i in sentence_mal:
    result = stemmer.stem(language='ml_IN', text=i)
    for word, output in result.items():
        strings1 = strings1 + " " + output['stem']
    strings1 = strings1 + "."

#remove punctuations and stop words
strings2 = ""
punctuations = ['?', ':', '!', ',', ';']
stopwords_mal = ["ൽ", "ഉം", "മാ൪", "ആം", "കൾ"]
for i in strings1:
    if i not in punctuations and i not in stopwords_mal:
        strings2 = strings2 + i
print(strings2)
#tokenize into words
word_mal = strings2.split(". ")
for i in range(len(word_mal)):
Example #3
0
from libindic.stemmer import Stemmer
stemmer = Stemmer()
result = stemmer.stem(language='malayalam', text='രാമന്റെ വീട്ടിലേക്ക്')
for word, output in result.items():
    print(word, " : ", output['stem'], " : ", output['inflection'])