def check_spelling(checked_word, dist, word_list): alphabet = set('abcdefghijklmnopqrstuvwxyz') scoring_matrix = student.build_scoring_matrix(alphabet, 2, 1, 0) result = list() for word in word_list: align = student.compute_alignment_matrix(checked_word, word, scoring_matrix, True) scores = student.compute_global_alignment(checked_word, word, scoring_matrix, align) if (len(checked_word) + len(word) - scores[0]) <= dist: result.append(word) return result
def check_spelling(checked_word, dist, word_list): scoring_matrix = student.build_scoring_matrix( set('abcdefghijklmnopqrstuvwxyz'), 2, 1, 0) def score(x, y): alignment_matrix = student.compute_alignment_matrix( x, y, scoring_matrix, True) return student.compute_global_alignment(x, y, scoring_matrix, alignment_matrix)[0] return list(word for word in word_list if len(checked_word) + len(word) - score(checked_word, word) == dist)
def check_spelling(checked_word, dist, word_list): """ Iterates through word_list and returns the set of all words that are within edit distance dist of the string checked_word. """ alphabet = string.ascii_lowercase + '-' sample_matrix = student.build_scoring_matrix(alphabet, 2, 1, 0) words = set([]) for word in set(word_list): if len(word) + len(checked_word) - student.global_alignment( word, checked_word, sample_matrix)[0] <= dist: words.add(word) return words
def question7(seq_x, seq_y): """ determine scoring matrix of edit distance algorithm """ diag_score = 2 off_diag_score = 1 dash_score = 0 alphabet = "abcdefghijklmnopqrstuvwxyz" score_matrix = student.build_scoring_matrix(alphabet, diag_score, off_diag_score, dash_score) align_matrix = student.compute_alignment_matrix(seq_x, seq_y, score_matrix, True) score, align_x, align_y = student.compute_global_alignment(seq_x, seq_y, score_matrix, align_matrix) edit_distance = len(seq_x) + len(seq_y) - score print "Edit distance: " + str(edit_distance) print align_x print align_y
def check_spelling(checked_word, dist, word_list): diag_score = 2 off_diag_score = 1 dash_score = 0 chars = 'abcdefghijklmnopqrstuvwxyz' alphabet = set([char for char in chars]) len_checkedword = len(checked_word) scoring_matrix = student.build_scoring_matrix(alphabet, diag_score, off_diag_score, dash_score) similar_word_list = [] for word in word_list: global_alignment_matrix = student.compute_alignment_matrix( checked_word, word, scoring_matrix, True) global_alignment_score = student.compute_global_alignment( checked_word, word, scoring_matrix, global_alignment_matrix)[0] edit_dist = len_checkedword + len(word) - global_alignment_score if edit_dist <= dist: similar_word_list.append(word) return similar_word_list
def check_spelling(checked_word, dist, word_list): """ helper function to determine all words edit distance away """ diag_score = 2 off_diag_score = 1 dash_score = 0 alphabet = "abcdefghijklmnopqrstuvwxyz" score_matrix = student.build_scoring_matrix(alphabet, diag_score, off_diag_score, dash_score) words = [] for word in word_list: align_matrix = student.compute_alignment_matrix(checked_word, word, score_matrix, True) score, align_x, align_y = student.compute_global_alignment(checked_word, word, score_matrix, align_matrix) edit_distance = len(checked_word) + len(word) - score if edit_distance <= dist: words.append(word) return words
def check_spelling(checked_word, dist, word_list): """ input: iterates through word_list and returns the set of all words that are within edit distance dist of the string checked_word output: the set of all words that are within edit distance dist of the string checked_word """ result_set = set([]) diag_score = 2 off_diag_score = 1 dash_score = 0 alphabet = set('abcdefghijklmnopqrstuvwxyz') matrix_M = student.build_scoring_matrix(alphabet, diag_score, off_diag_score, dash_score) for word in word_list: matrix_S = student.compute_alignment_matrix(checked_word, word, matrix_M, True) global_align_word = student.compute_global_alignment(checked_word, word, matrix_M, matrix_S) if len(checked_word) + len(word) - global_align_word[0] <= dist: result_set.add(word) return result_set
FruitflyEyelessProtein, PAM50, False) result_Q5 = student.compute_local_alignment(HumanEyelessProtein, FruitflyEyelessProtein, PAM50, alignment_matrix_Q5) z_value = (result_Q5[0] - mean) / sigma print 'mean : ' + str(mean) print 'standard deviation : ' + str(sigma) print 'z value : ' + str(z_value) # Q8 word_list = read_words(WORD_LIST_URL) scoring_matrix = student.build_scoring_matrix('abcdefghijklmnopqrstuvwxyz', 2, 1, 0) def check_spelling(checked_word, dist, word_list): answer = [] for word in word_list: alignment_matrix = student.compute_alignment_matrix( checked_word, word, scoring_matrix, True) result = student.compute_local_alignment(checked_word, word, scoring_matrix, alignment_matrix) if (len(checked_word) + len(word) - result[0]) <= dist: answer.append(word) return answer
import alg_application4_provided import alg_project4_solution as student import string alphabet = list(string.ascii_lowercase) + list(string.ascii_uppercase) scoring_matrix = student.build_scoring_matrix(alphabet, 2, 1, 0) def check_spelling(checked_word, dist, word_list): """ input: word, target distance, and word list output: return a subset of word list which the distance between input word < target distance """ result = set() x = len(checked_word) for item in word_list: y = len(item) if abs(x - y) <= dist: alignment_matrix = student.compute_alignment_matrix(checked_word, item, scoring_matrix, True) score = max(map(max, alignment_matrix)) if (x + y - score) <= dist: result = result.union(set([item])) return result WORD_LIST_URL = "http://storage.googleapis.com/codeskulptor-assets/assets_scrabble_words3.txt" word_list = alg_application4_provided.read_words(WORD_LIST_URL) import time, itertools
import alg_application4_provided import alg_project4_solution as student import string alphabet = list(string.ascii_lowercase) + list(string.ascii_uppercase) scoring_matrix = student.build_scoring_matrix(alphabet, 2, 1, 0) def check_spelling(checked_word, dist, word_list): """ input: word, target distance, and word list output: return a subset of word list which the distance between input word < target distance """ result = set() x = len(checked_word) for item in word_list: y = len(item) if abs(x - y) <= dist: alignment_matrix = student.compute_alignment_matrix( checked_word, item, scoring_matrix, True) score = max(map(max, alignment_matrix)) if (x + y - score) <= dist: result = result.union(set([item])) return result WORD_LIST_URL = "http://storage.googleapis.com/codeskulptor-assets/assets_scrabble_words3.txt" word_list = alg_application4_provided.read_words(WORD_LIST_URL) import time, itertools tStart = time.time()
def check_spelling(checked_word, dist, word_list): scoring_matrix = student.build_scoring_matrix(set('abcdefghijklmnopqrstuvwxyz'), 2, 1, 0) def score(x, y): alignment_matrix = student.compute_alignment_matrix(x, y, scoring_matrix, True) return student.compute_global_alignment(x, y, scoring_matrix, alignment_matrix)[0] return list(word for word in word_list if len(checked_word) + len(word) - score(checked_word, word) == dist)