def check_spelling(checked_word, dist, word_list): alphabet = "abcdefghijklmnopqrstuvwxyz" s_m = student.build_scoring_matrix(alphabet, 2, 1, 0) result = [] for word in word_list: a_m = student.compute_alignment_matrix(checked_word, word, s_m, True) score = student.compute_global_alignment(checked_word, word, s_m, a_m) changes = len(checked_word) + len(word) - score[0] if changes <= dist: result.append(word) print score[1], score[2] return result
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): scoring_distribution = {} for i in xrange(num_trials): rand_y = list(seq_y) random.shuffle(rand_y) a_m = student.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False) alignment = student.compute_local_alignment(seq_x, rand_y, scoring_matrix, a_m) score = alignment[0] scoring_distribution[score] = scoring_distribution.get(score, 0) + 1 print i #print alignment[1] #print alignment[2] return scoring_distribution
def distance(word1, word2, s_m): alphabet = "abcdefghijklmnopqrstuvwxyz" a_m = student.compute_alignment_matrix(word1, word2, s_m, True) score = student.compute_global_alignment(word1, word2, s_m, a_m) changes = len(word1) + len(word2) - score[0] return changes
for word in checked_set: if word in word_dict: result_set.update(word_dict[word]) result = [] for word in result_set: if distance(checked_word, word, s_m) <= dist: result.append(word) return result timer = time.time() words = set(read_words(WORD_LIST_URL)) new_words = new_keys(words) print len(new_words) new_words = new_keys_dict(new_words) print len(new_words) #print new_keys_dict(new_keys(["cat"])) alphabet = "abcdefghijklmnopqrstuvwxyz" s_m = student.build_scoring_matrix(alphabet, 2, 1, 0) print "Generating took ", time.time() - timer timer = time.time() print "humble", len(check_spelling("humble", 2, new_words, s_m)) print "Time :", time.time() - timer timer = time.time() print "firefly", len(check_spelling("firefly", 2, new_words, s_m)) print "Time :", time.time() - timer timer = time.time() print "fireflyfosjfoijsfoidsj", len( check_spelling("fireflyfosjfoijsfoidsj", 2, new_words, s_m)) print "Time :", time.time() - timer
result_set = set([]) for word in checked_set: if word in word_dict: result_set.update(word_dict[word]) result = [] for word in result_set: if distance(checked_word, word, s_m) <= dist: result.append(word) return result timer = time.time() words = set(read_words(WORD_LIST_URL)) new_words = new_keys(words) print len(new_words) new_words = new_keys_dict(new_words) print len(new_words) # print new_keys_dict(new_keys(["cat"])) alphabet = "abcdefghijklmnopqrstuvwxyz" s_m = student.build_scoring_matrix(alphabet, 2, 1, 0) print "Generating took ", time.time() - timer timer = time.time() print "humble", len(check_spelling("humble", 2, new_words, s_m)) print "Time :", time.time() - timer timer = time.time() print "firefly", len(check_spelling("firefly", 2, new_words, s_m)) print "Time :", time.time() - timer timer = time.time() print "fireflyfosjfoijsfoidsj", len(check_spelling("fireflyfosjfoijsfoidsj", 2, new_words, s_m)) print "Time :", time.time() - timer