Example #1
0
def lesk(context,word):
    splited_context = context.split(" ")
    stemmed_context =[]
    for item in splited_context:
        if item.strip()!='':
            temp = stemm.findstem(item)
            print "stemmed=",temp
            stemmed_context.append(temp)
    #stemmed_context=sthree_stemm.malayalam_stemmer(stemmed_context)
    flag = False
    for item in stemmed_context:
        print "item=",item,"ddddd=",word.strip()
        if item == word.strip():
            flag= True
            break
    if flag!=True:
        return flag
    
    wordnet_obj = wordnet.mysqldbwordnet()
    definitions = wordnet_obj.getDefinitions(word)
    rank_list = []
    ranks = []
    for item in definitions:
        definition =item[0].split(" ")
        stemmed_def =[]
        for item1 in definition:
            if item1.strip()!='':
                temp = stemm.findstem(item1) 
                print "stemmed _definition:",temp
                stemmed_def.append(temp)
        #print "###############################"
        #stemmed_def=sthree_stemm.malayalam_stemmer(stemmed_def)
        rank_count = 0
        for item1 in stemmed_context:
            
            if (item1 in stemmed_def) and (item1 != word):
                rank_count = rank_count + 1
        rank_list.append([item[0],rank_count])
        if rank_count not in ranks:
            ranks.append(rank_count)
             
    ranks.sort(cmp=None, key=None, reverse=True)
    selected_definitions = []
    first_rank = ranks[0]
    print "first rank =",first_rank 
    for item in rank_list:
        #print "eeee=",item[0],"\t",item[1]
        if item[1] == first_rank:
            selected_definitions.append(item[0])
    
    return selected_definitions
def stem():
	f_in=open('output1.txt','r')
	words=f_in.read().split('\n')
	f_out=open('output2.txt','w')
	for item in words:
		stem=mal.findstem(item)
		f_out.write(stem+"\n")
	f_out.close()
Example #3
0
def stem():
        f_in=open("1test",'r')
        words=f_in.read().split('\n')
	'''
	for i in range(len(text)):
		print text[i].encode('utf-8')
	'''
        f_out=open("test","w")
        for i in words:
	 stem=mal.findstem(i)
	 f_out.write(stem+"\n")
        f_out.close()
def stem():
	f_in=open('output1.txt','r')
	words=f_in.read().split('\n')
	f_out=open('output2.txt','w')
	for item in words:
		stem=mal.findstem(item)
		f_out.write(stem+"\n")
	f_out.close()
	f_in2=open("output2.txt","r")
	inp=f_in2.read().split("\n\n")
	f_out2=open("output3.txt","w")
	s1=""
	c=0
	n=len(inp)
	#print "****",len(inp)
	for sent in inp:
		c+=1
		if(sent!=s1 and c<(n-2)):
			#print sent
			print c
			f_out2.write(sent+"\n\n")
		elif sent!=s1 and c==n-2:
			f_out2.write(sent)
Example #5
0
import MalayalamStemmer as mal
f_in=open('output1.txt','r')
words=f_in.read().split('\n')
f_out=open('output2.txt','w')
for item in words:
	stem=mal.findstem(item)
	f_out.write(stem+"\n")
f_out.close()

	
Example #6
0
import sys 
import MalayalamWordnet
import MalayalamStemmer
wordnet_object = MalayalamWordnet.mysqldbwordnet()
f = open('input.txt','r')
out_put_file = open('output.txt','w')
input_text = f.read().split('#');
root =  MalayalamStemmer.findstem(input_text[0])
row= wordnet_object.getDefinitions(root)
#row= wordnet_object.getDefinitions(input_text[0])
'''for item in input_text[1].split():
    print item.decode('UTF-8')[len(item.decode('UTF-8'))-4:len(item.decode('UTF-8'))]
    out_put_file.write(item.decode('UTF-8')+'\t')'''

for item in row:
    print item[0]


# sys.path.append('classes/')
# import mysqldaccess as  m_acc
# 
# 
# k = m_acc.DbAccess(password='******',user='******')
# rows = k.selectDB("SELECT * FROM  sense_table LIMIT 0,100", "hi")
# for item in rows:
#     print item[2]
#     
#k.insertDB("CREATE TABLE A (mm char(20))","error on selection")