def find_sug_words(evt): suget_wrd = "" levn_cost = 0 dict, max = {}, 0 tmp = 0 wrd_lst_pair = "" len_mispld_wrd = 0 len_corpus_wrd = 0 target = "" source = "" columns = 0 rows = 0 m = 0 #Word_selec.delete(1.0,END) value = wrd_lstbox.get(ANCHOR) #value = wrd_lstbox.get(wrd_lstbox .curselection()) print("value", value) #Word_selec.insert(INSERT, value) #Word_selec.tag_add("start", "1.0", END) #Word_selec.tag_config("start",font=("Georgia", "12", "bold"), background="yellow") inpt_str = ''.join(Txt_input.get("1.0", END)) word = Txt_input.get("1.0", END) inpt_txt = re.sub("[^\w]", " ", word).split() #print("Input_str",inpt_str) #new_str = inpt_str.replace("iterate",value) #Txt_input.delete(1.0,END) #Txt_input.insert(INSERT,inpt_str.replace("iterate",value)) #print("New string",new_str) # Words suggestion for Missing Words d = DictWithPWL("en_US", "Word Dicitionary4.txt") print(d.check(value)) chk_status = d.check(value) suggst_str = d.suggest(value) sugst_lstbx.delete(0, END) ''' for sug in suggst_str: sugst_lstbx.insert(0,sug) #print("Sugest string",suggst_str) ''' print("Listbox pressed")
def test_DWPWL_empty(tmp_path): """Test functionality of DictWithPWL using transient dicts.""" d = DictWithPWL("en_US", None, None) assert d.check("hello") assert not d.check("helo") assert not d.check("Flagen") d.add("Flagen") assert d.check("Flagen") d.remove("hello") assert not d.check("hello") d.add("hello") assert d.check("hello")
def test_DWPWL(tmp_path, pwl_path): """Test functionality of DictWithPWL.""" setPWLContents(pwl_path, ["Sazz", "Lozz"]) other_path = tmp_path / "pel.txt" d = DictWithPWL("en_US", str(pwl_path), str(other_path)) assert d.check("Sazz") assert d.check("Lozz") assert d.check("hello") assert not d.check("helo") assert not d.check("Flagen") d.add("Flagen") assert d.check("Flagen") assert "Flagen" in getPWLContents(pwl_path) assert "Flagen" in d.suggest("Flagn") assert "hello" in d.suggest("helo") d.remove("hello") assert not d.check("hello") assert "hello" not in d.suggest("helo") d.remove("Lozz") assert not d.check("Lozz")
class EnchantProxy(object): """Wrapper alla libreria enchant""" def __init__(self, mydict=None, lang='it_IT'): """[str] [,str] Ottiene l'eventuale elenco di parole personalizzate da integrare al dizionario ed il linguaggio da applicare - predefinito Italiano Solleva una eccezione se `mydict` non è accessibile """ self._lang = lang self._custom_dict = mydict try: self._chkr = SpellChecker(lang, filters=[EmailFilter, URLFilter]) self._pwl = DictWithPWL(lang, mydict) if mydict else None except enchant.errors.DictNotFoundError as nodict_err: raise SpellCheckError("Dizionario " + lang + " non trovato") def check(self, text, chunk_idx): """(str, int) -> list of `Error` Esegue il controllo per `testo` e ritorna una lista di oggetti `Errore` con la parola errata e la lista dei suggerimenti. Se la parola non viene trovata viene effettuata una ricerca anche nel dizionario personale (`self._pwl`) se definito `chunk_idx` è l'identificativo del testo da elaborare """ errors = [] self._chkr.set_text(text) for err in self._chkr: if self._pwl and self._pwl.check(err.word): continue error = Error(err.word, self._chkr.suggest(err.word), chunk_idx) error.context = text errors.append(error) return errors def upd_mydict(self, word): """(str) Aggiunge la parola `word` al dizionario personalizzato (attiva per la prossima chiamata a `check`. **L'aggiunta viene fatta solo al dizionario personalizzato IN MEMORIA Utilizzare `add_custom_word` per l'aggiornamento del dizionario personalizzato su disco** """ if not self._pwl: return if self._pwl.is_added(word): raise SpellCheckError("Parola già esistente") self._pwl.add(word) def add_custom_words(self, words): """(list of str) Aggiunge le parole in ``words`` al dizionario personalizzato """ if not self._custom_dict: raise SpellCheckError("Dizionario personalizzato non presente") orig_words = codecs.open(self._custom_dict, encoding='utf-8').split("\n") orig_words.extend([w for w in words if w not in orig_words]) codecs.open( self._custom_dict, mode='w', encoding='utf-8' ).write("\n".join(orig_words))
class Search: ranks = {} keyweights = {} hash_table = {} titles = {} d = {} def __init__(self): f = open(str(os.getcwd()) + "/Google_IITB/data/keyweights.db", "rb") self.keyweights = pickle.load(f) f.close() f = open(str(os.getcwd()) + "/Google_IITB/data/data.db", "rb") self.hash_table = pickle.load(f) f.close() with open(str(os.getcwd()) + "/Google_IITB/data/pageranks.db","rb") as fp: self.ranks = pickle.load(fp) with open(str(os.getcwd()) + "/Google_IITB/data/titles.db","rb") as fp: self.titles = pickle.load(fp) self.d = DictWithPWL("en_US", str(os.getcwd()) + "/Google_IITB/data/allkeys.txt") def swap(self, listOfUrls, i, j): tmp = listOfUrls[i] listOfUrls[i] = listOfUrls[i-1] listOfUrls[i-1] = tmp return def hashFunc(self,key): hashout = 0 for i in range(len(key)): hashout = hashout + ord(key[i]) return hashout def findinKeyTable(self, key, Table): hashkey = self.hashFunc(key) if hashkey in Table: for i in range(len(Table[hashkey])): if Table[hashkey][i][1] == key: return i return False return False def spellCheck(self, word): if self.d.check(word) == True: return True else: suggest = self.d.suggest(word) for i in range(len(suggest)): suggest[i] = suggest[i].lower() #keyweights = getKeyWeights() bestweight = 0 bestword = suggest[0] for entry in suggest: hashkey = self.hashFunc(entry) secKey = self.findinKeyTable(entry, self.keyweights) if secKey != False: if self.keyweights[hashkey][secKey][0] >= bestweight: bestword = self.keyweights[hashkey][secKey][1] bestweight = self.keyweights[hashkey][secKey][0] return bestword return False def ngrams(self, word): Ngrams = [] for i in range(3,len(word)+1): Ngrams.append(word[ : i]) return Ngrams def exactQuery(self, entry): return entry.split() def Query(self, entry): words = entry.split() #words has to be returned somehow searchlist = [] for i in range(len(words)): searchlist = searchlist + self.ngrams(words[i].lower()) #print searchlist return searchlist def Sort(self, listOfUrls): loc_ranks = [] for url in listOfUrls: hashkey = self.hashFunc(url) for i in range(len(self.ranks[hashkey])): if self.ranks[hashkey][i][0] == url: loc_ranks.append(self.ranks[hashkey][i][1]) for i in range(1,len(listOfUrls)): if loc_ranks[i] > loc_ranks[i-1]: self.swap(listOfUrls, i, i-1) return listOfUrls def removeRepeats(self, result): for i in range(len(result)): for j in range(i): if result[i] == result[j]: result[i] = 0 resultFinal = filter(lambda a: a != 0, result) return resultFinal def primarySort(self, result, matches): for i in range(1,len(result)): if matches[i] > matches[i-1]: self.swap(result, i, i-1) resultSort = self.removeRepeats(result) return resultSort def findin(self, key, query, table): for i in range(len(table[key])): if table[key][i][0] == query: return i return -1 def numberOfMatches(self, url, result): Count = 0 for link in result: if url == link: Count = Count + 1 return Count def search(self, query): result = [] searchlist = self.Query(query) for i in range(len(searchlist)): key = self.hashFunc(searchlist[i]) secKey = self.findin(key, searchlist[i], self.hash_table) if secKey == -1: return [] temp = self.hash_table[key][secKey][1 :] for j in range(len(temp)): #if Find(result, temp[j]) == 0: result.append(temp[j]) matches = [] #before sorting, first sort according to no. of matches for link in result: matches.append(self.numberOfMatches(link, result)) result = self.Sort(result) result = self.primarySort(result, matches) final = [[] for i in range(len(result))] for i in range(len(result)): try: if self.titles[result[i]] == '': final[i] = [result[i], result[i]] else: final[i] = [result[i], self.titles[result[i]]] except: final[i] = [result[i], result[i]] return final def searchWSC(self, query): change = False searchlist = query.split() for i in range(len(searchlist)): bestword = self.spellCheck(searchlist[i]) if bestword != True: searchlist[i] = bestword change = True changedEntry = "" result = self.search(query) if change: for i,word in enumerate(searchlist): if i is not (len(searchlist)-1): changedEntry = changedEntry + word + "+" else: changedEntry = changedEntry + word return {'change' : change,'query' : changedEntry , 'search' : result}