def get_ending(self,new_word,n=7): begin=self.nearest_begin(new_word) words=self.full_words(begin) words=tools.unique_list(words) if(len(words)<n): n=len(words) nearest_words=knn.nearest_k(new_word,words,k=n) nearest_words=[distance.decode_digraphs(word_i) for word_i in nearest_words] return nearest_words
def build_forms_histogram(filename, forms2basic, hist_size=0): text = tools.read_text(filename, clean_txt=False) words = tools.find_words(text) words = [code_digraphs(word_i) for word_i in words] forms = [forms2basic[word_i] for word_i in words if (word_i in forms2basic)] # print(len(forms)) forms = tools.unique_list(forms) # list(forms) # print(len(forms)) # print(forms2basic) return build_histogram(forms, laplace_smoothing=True, size=hist_size)
def build_forms_histogram(filename,forms2basic,hist_size=0): text=tools.read_text(filename,clean_txt=False) words=tools.find_words(text) words=[code_digraphs(word_i) for word_i in words] forms=[ forms2basic[word_i] for word_i in words if(word_i in forms2basic)] #print(len(forms)) forms=tools.unique_list(forms)#list(forms) #print(len(forms)) #print(forms2basic) return build_histogram(forms,laplace_smoothing=True,size=hist_size)