예제 #1
0
def test_DWPWL(tmp_path, pwl_path):
    """Test functionality of DictWithPWL."""
    setPWLContents(pwl_path, ["Sazz", "Lozz"])
    other_path = tmp_path / "pel.txt"
    d = DictWithPWL("en_US", str(pwl_path), str(other_path))
    assert d.check("Sazz")
    assert d.check("Lozz")
    assert d.check("hello")
    assert not d.check("helo")
    assert not d.check("Flagen")
    d.add("Flagen")
    assert d.check("Flagen")
    assert "Flagen" in getPWLContents(pwl_path)
    assert "Flagen" in d.suggest("Flagn")
    assert "hello" in d.suggest("helo")
    d.remove("hello")
    assert not d.check("hello")
    assert "hello" not in d.suggest("helo")
    d.remove("Lozz")
    assert not d.check("Lozz")
def find_sug_words(evt):
    suget_wrd = ""
    levn_cost = 0
    dict, max = {}, 0
    tmp = 0
    wrd_lst_pair = ""
    len_mispld_wrd = 0
    len_corpus_wrd = 0
    target = ""
    source = ""
    columns = 0
    rows = 0
    m = 0

    #Word_selec.delete(1.0,END)
    value = wrd_lstbox.get(ANCHOR)

    #value = wrd_lstbox.get(wrd_lstbox .curselection())
    print("value", value)

    #Word_selec.insert(INSERT, value)
    #Word_selec.tag_add("start", "1.0", END)
    #Word_selec.tag_config("start",font=("Georgia", "12", "bold"), background="yellow")

    inpt_str = ''.join(Txt_input.get("1.0", END))

    word = Txt_input.get("1.0", END)
    inpt_txt = re.sub("[^\w]", " ", word).split()
    #print("Input_str",inpt_str)

    #new_str = inpt_str.replace("iterate",value)
    #Txt_input.delete(1.0,END)
    #Txt_input.insert(INSERT,inpt_str.replace("iterate",value))
    #print("New string",new_str)

    # Words suggestion for Missing Words

    d = DictWithPWL("en_US", "Word Dicitionary4.txt")
    print(d.check(value))

    chk_status = d.check(value)

    suggst_str = d.suggest(value)

    sugst_lstbx.delete(0, END)
    '''
    for sug in suggst_str:
        
        sugst_lstbx.insert(0,sug)

    #print("Sugest string",suggst_str)
        
    '''
    print("Listbox pressed")
예제 #3
0
class Search:
	ranks = {}
	keyweights = {}
	hash_table = {}
	titles = {}
	d = {}
		
	def __init__(self):
		f = open(str(os.getcwd()) + "/Google_IITB/data/keyweights.db", "rb")
		self.keyweights = pickle.load(f)
		f.close()
		
		f = open(str(os.getcwd()) + "/Google_IITB/data/data.db", "rb")
		self.hash_table = pickle.load(f)
		f.close()
		
		with open(str(os.getcwd()) + "/Google_IITB/data/pageranks.db","rb") as fp:
			self.ranks = pickle.load(fp)
			
		with open(str(os.getcwd()) + "/Google_IITB/data/titles.db","rb") as fp:
			self.titles = pickle.load(fp)
		
		self.d = DictWithPWL("en_US", str(os.getcwd()) + "/Google_IITB/data/allkeys.txt")
	
	def swap(self, listOfUrls, i, j):
		tmp = listOfUrls[i]
		listOfUrls[i] = listOfUrls[i-1]
		listOfUrls[i-1] = tmp
		return


	def hashFunc(self,key):
		hashout = 0
		for i in range(len(key)):
			hashout = hashout + ord(key[i])
		return hashout
	

	def findinKeyTable(self, key, Table):
		hashkey = self.hashFunc(key)
		if hashkey in Table:
			for i in range(len(Table[hashkey])):
				if Table[hashkey][i][1] == key:
					return i
			return False
		return False


	def spellCheck(self, word):
		if self.d.check(word) == True:
			return True
		else:
			suggest = self.d.suggest(word)
			for i in range(len(suggest)):
				suggest[i] = suggest[i].lower()
			#keyweights = getKeyWeights()
			bestweight = 0
			bestword = suggest[0]
			for entry in suggest:
				hashkey = self.hashFunc(entry)
				secKey = self.findinKeyTable(entry, self.keyweights)
				if secKey != False:
					if self.keyweights[hashkey][secKey][0] >= bestweight:
						bestword = self.keyweights[hashkey][secKey][1]
						bestweight = self.keyweights[hashkey][secKey][0]
				return bestword
			return False
		

	def ngrams(self, word):
		Ngrams = []
		for i in range(3,len(word)+1):
			Ngrams.append(word[ : i])
		return Ngrams
	

	def exactQuery(self, entry):
		return entry.split()


	def Query(self, entry):
		words = entry.split() #words has to be returned somehow
		searchlist = []
		for i in range(len(words)):
			searchlist = searchlist + self.ngrams(words[i].lower())
		#print searchlist
		return searchlist
	
	
	def Sort(self, listOfUrls):
		loc_ranks = []
		for url in listOfUrls:
			hashkey = self.hashFunc(url)
			for i in range(len(self.ranks[hashkey])):
				if self.ranks[hashkey][i][0] == url:
					loc_ranks.append(self.ranks[hashkey][i][1])
		for i in range(1,len(listOfUrls)):
			if loc_ranks[i] > loc_ranks[i-1]:
				self.swap(listOfUrls, i, i-1)
		return listOfUrls
	
	def removeRepeats(self, result):
		for i in range(len(result)):
			for j in range(i):
				if result[i] == result[j]:
					result[i] = 0
		resultFinal = filter(lambda a: a != 0, result)
		return resultFinal
	

	def primarySort(self, result, matches):
		for i in range(1,len(result)):
			if matches[i] > matches[i-1]:
				self.swap(result, i, i-1)
		resultSort = self.removeRepeats(result)
		return resultSort


	def findin(self, key, query, table):
		for i in range(len(table[key])):
			if table[key][i][0] == query:
				return i
		return -1

	def numberOfMatches(self, url, result):
		Count = 0
		for link in result:
			if url == link:
				Count = Count + 1
		return Count

	
	def search(self, query):
		result = []
		searchlist = self.Query(query)
		for i in range(len(searchlist)):
			key = self.hashFunc(searchlist[i])
			secKey = self.findin(key, searchlist[i], self.hash_table)
			if secKey == -1:
				return []
			temp = self.hash_table[key][secKey][1 :]
			for j in range(len(temp)):
				#if Find(result, temp[j]) == 0:
				result.append(temp[j])
		matches = []
		#before sorting, first sort according to no. of matches
		for link in result:
			matches.append(self.numberOfMatches(link, result))
		result = self.Sort(result)
		result = self.primarySort(result, matches)
		final = [[] for i in range(len(result))]
		for i in range(len(result)):
			try:
				if self.titles[result[i]] == '': final[i] = [result[i], result[i]]
				else: final[i] = [result[i], self.titles[result[i]]]
			except:
				final[i] = [result[i], result[i]]
		return final
		
		
	def searchWSC(self, query):
		change = False
		searchlist = query.split()
		for i in range(len(searchlist)):
			bestword = self.spellCheck(searchlist[i])
			if bestword != True: 
				searchlist[i] = bestword
				change = True
		changedEntry = ""
		result = self.search(query)
		if change:
			for i,word in enumerate(searchlist):
				if i is not (len(searchlist)-1): changedEntry = changedEntry + word + "+"
				else: changedEntry = changedEntry + word
		return {'change' : change,'query' : changedEntry , 'search' : result}