def question8(): """ Code for question 8 """ word_list = read_words(WORD_LIST_URL) print check_spelling('humble', 1, word_list) print check_spelling('firefly', 2, word_list)
def run_app_q8(checked_word, dist): """ Question 8 of application. """ word_list = provided.read_words(WORD_LIST_PATH) words = check_spelling(checked_word, dist, word_list) return words
def run_ques_9(): """ Question: 9 """ word_list = provided.read_words(provided.WORD_LIST_URL) checked_word1 = "humble" time1 = time.time() similar_words1 = check_spelling_fast(checked_word1, 1, set(word_list)) time2 = time.time() print similar_words1 print time2 - time1 checked_word2 = "firefly" time3 = time.time() similar_words2 = check_spelling_fast(checked_word2, 2, set(word_list)) time4 = time.time() print similar_words2 print time4 - time3
Iterates through word_list and returns the set of all words that are within edit distance dist of the string checked_word. ''' # Set constants ALPHABET = set(list(string.ascii_lowercase)) DIAG_SCORE = 2 OFF_DIAG_SCORE = 1 DASH_SCORE = 0 # contruct scoring matrix over all lower case letters scoring_matrix = student.build_scoring_matrix(ALPHABET, DIAG_SCORE, OFF_DIAG_SCORE, DASH_SCORE) # Init list to store words close_words = [] # Loop over word in word_list for word in word_list: # compute alignment matrix alignment_matrix = student.compute_alignment_matrix(checked_word, word, scoring_matrix, True) # compute score of global alignments score, align_x, align_y = student.compute_global_alignment(checked_word, word, scoring_matrix, alignment_matrix) # calculate edit distance edit_distance = len(checked_word) + len(word) - score # Compare edit_distance and dist if edit_distance <= dist: # save word close_words.append(word) return close_words def answer_Q8() word_list = provided.read_words(WORD_LIST_URL) return (check_spelling("humble", 1, word_list), check_spelling("firefly", 2, word_list))
output: return a subset of word list which the distance between input word < target distance """ result = set() x = len(checked_word) for item in word_list: y = len(item) if abs(x - y) <= dist: alignment_matrix = student.compute_alignment_matrix(checked_word, item, scoring_matrix, True) score = max(map(max, alignment_matrix)) if (x + y - score) <= dist: result = result.union(set([item])) return result WORD_LIST_URL = "http://storage.googleapis.com/codeskulptor-assets/assets_scrabble_words3.txt" word_list = alg_application4_provided.read_words(WORD_LIST_URL) import time, itertools tStart = time.time() a = check_spelling("humble", 1, word_list) tStop = time.time() print (tStop - tStart) print a tStart = time.time() b = check_spelling("firefly", 2, word_list) tStop = time.time() print (tStop - tStart) print b
input: word, target distance, and word list output: return a subset of word list which the distance between input word < target distance """ result = set() x = len(checked_word) for item in word_list: y = len(item) if abs(x - y) <= dist: alignment_matrix = student.compute_alignment_matrix( checked_word, item, scoring_matrix, True) score = max(map(max, alignment_matrix)) if (x + y - score) <= dist: result = result.union(set([item])) return result WORD_LIST_URL = "http://storage.googleapis.com/codeskulptor-assets/assets_scrabble_words3.txt" word_list = alg_application4_provided.read_words(WORD_LIST_URL) import time, itertools tStart = time.time() a = check_spelling("humble", 1, word_list) tStop = time.time() print(tStop - tStart) print a tStart = time.time() b = check_spelling("firefly", 2, word_list) tStop = time.time() print(tStop - tStart) print b
if checked_word[:number] in word and checked_word[number + 2:] in word: passed = True # 1 insertion passed = True for number in range(len(checked_word)): if checked_word[:number] not in word or checked_word[number + 1:] not in word: passed = False if not passed: continue count += 1 if sol4_7.edit_distance(checked_word, word, scoring_matrix) <= dist: candidate_words.append(word) print count return set(candidate_words) word_list = app4.read_words(app4.WORD_LIST_URL) for dummy in range(5): start_time = time.time() #set_check_humble = check_spelling('humble', 1, word_list) set_check_firefly = check_spelling('firefly', 2, word_list) print time.time() - start_time #print "Candidates of 'humble' wiht '1' edit distance is " + str(set_check_humble) #print "Candidates of 'firefly' with '2' edit distance is " + str(set_check_firefly)
plt.title("Score Distribution") plt.xlabel("Score") plt.ylabel("Normalized Number") plt.show() # Compute mean mean = 0.0 for score in scoring_distribution.keys(): mean += score*scoring_distribution[score] mean = mean/1000.0 # Compute standard deviation dev = 0.0 for score in scoring_distribution.keys(): dev += math.pow(score - mean, 2) * scoring_distribution[score] dev = dev/1000.0 dev = math.pow(dev,0.5) print mean print dev """ Question 8 """ word_list = provided.read_words(provided.WORD_LIST_URL) print Project_4.check_spelling('humble', 1, word_list) print print Project_4.check_spelling('firefly', 2, word_list)