Esempio n. 1
0
 def get_ending(self,new_word,n=7):
 	begin=self.nearest_begin(new_word)
     words=self.full_words(begin)
     words=tools.unique_list(words)
     if(len(words)<n):
         n=len(words)
     nearest_words=knn.nearest_k(new_word,words,k=n)
     nearest_words=[distance.decode_digraphs(word_i) 
                       for word_i in nearest_words]
     return nearest_words	
Esempio n. 2
0
def build_forms_histogram(filename, forms2basic, hist_size=0):
    text = tools.read_text(filename, clean_txt=False)
    words = tools.find_words(text)
    words = [code_digraphs(word_i) for word_i in words]
    forms = [forms2basic[word_i] for word_i in words if (word_i in forms2basic)]
    # print(len(forms))
    forms = tools.unique_list(forms)  # list(forms)
    # print(len(forms))
    # print(forms2basic)
    return build_histogram(forms, laplace_smoothing=True, size=hist_size)
Esempio n. 3
0
def build_forms_histogram(filename,forms2basic,hist_size=0):
    text=tools.read_text(filename,clean_txt=False)
    words=tools.find_words(text)    
    words=[code_digraphs(word_i) for word_i in words]
    forms=[ forms2basic[word_i] for word_i in words
                     if(word_i in forms2basic)]
    #print(len(forms))
    forms=tools.unique_list(forms)#list(forms)        
    #print(len(forms))
    #print(forms2basic)
    return build_histogram(forms,laplace_smoothing=True,size=hist_size)