def check_spelling(checked_word, dist, word_list):
    alphabet = set('abcdefghijklmnopqrstuvwxyz')
    scoring_matrix = student.build_scoring_matrix(alphabet, 2, 1, 0)
    result = list()
    for word in word_list:
        align = student.compute_alignment_matrix(checked_word, word, scoring_matrix, True)
        scores = student.compute_global_alignment(checked_word, word, scoring_matrix, align)
        if (len(checked_word) + len(word) - scores[0]) <= dist:
            result.append(word)
    return result
def check_spelling(checked_word, dist, word_list):
    scoring_matrix = student.build_scoring_matrix(
        set('abcdefghijklmnopqrstuvwxyz'), 2, 1, 0)

    def score(x, y):
        alignment_matrix = student.compute_alignment_matrix(
            x, y, scoring_matrix, True)
        return student.compute_global_alignment(x, y, scoring_matrix,
                                                alignment_matrix)[0]

    return list(word for word in word_list if len(checked_word) + len(word) -
                score(checked_word, word) == dist)
def check_spelling(checked_word, dist, word_list):
    """
    Iterates through word_list and returns the set of all words that are within
    edit distance dist of the string checked_word.
    """
    alphabet = string.ascii_lowercase + '-'
    sample_matrix = student.build_scoring_matrix(alphabet, 2, 1, 0)
    words = set([])
    for word in set(word_list):
        if len(word) + len(checked_word) - student.global_alignment(
            word, checked_word, sample_matrix)[0] <= dist:
            words.add(word)
    return words
def question7(seq_x, seq_y):
    """ determine scoring matrix of edit distance algorithm """
    
    diag_score = 2
    off_diag_score = 1
    dash_score = 0
    alphabet = "abcdefghijklmnopqrstuvwxyz"
    score_matrix = student.build_scoring_matrix(alphabet, diag_score, off_diag_score, dash_score)
    
    align_matrix = student.compute_alignment_matrix(seq_x, seq_y, score_matrix, True)
    score, align_x, align_y = student.compute_global_alignment(seq_x, seq_y, score_matrix, align_matrix)
    
    edit_distance = len(seq_x) + len(seq_y) - score
    
    print "Edit distance: " + str(edit_distance)
    print align_x
    print align_y
Beispiel #5
0
def check_spelling(checked_word, dist, word_list):
    diag_score = 2
    off_diag_score = 1
    dash_score = 0
    chars = 'abcdefghijklmnopqrstuvwxyz'
    alphabet = set([char for char in chars])
    len_checkedword = len(checked_word)
    scoring_matrix = student.build_scoring_matrix(alphabet, diag_score,
                                                  off_diag_score, dash_score)
    similar_word_list = []
    for word in word_list:
        global_alignment_matrix = student.compute_alignment_matrix(
            checked_word, word, scoring_matrix, True)
        global_alignment_score = student.compute_global_alignment(
            checked_word, word, scoring_matrix, global_alignment_matrix)[0]
        edit_dist = len_checkedword + len(word) - global_alignment_score
        if edit_dist <= dist:
            similar_word_list.append(word)
    return similar_word_list
def check_spelling(checked_word, dist, word_list):
    """ helper function to determine all words edit distance away """
    
    diag_score = 2
    off_diag_score = 1
    dash_score = 0
    alphabet = "abcdefghijklmnopqrstuvwxyz"
    score_matrix = student.build_scoring_matrix(alphabet, diag_score, off_diag_score, dash_score)
    
    words = []
    
    for word in word_list:
        align_matrix = student.compute_alignment_matrix(checked_word, word, score_matrix, True)
        score, align_x, align_y = student.compute_global_alignment(checked_word, word,
                                                                   score_matrix, align_matrix)
    
        edit_distance = len(checked_word) + len(word) - score
        
        if edit_distance <= dist:
            words.append(word)
    
    return words
def check_spelling(checked_word, dist, word_list):
    """
    input:
        iterates through word_list and returns the set of all words
        that are within edit distance dist of the string checked_word
    output:
        the set of all words that are within edit distance dist of
        the string checked_word
    """
    
    result_set = set([])
    diag_score = 2
    off_diag_score = 1
    dash_score = 0
    alphabet = set('abcdefghijklmnopqrstuvwxyz')
    matrix_M = student.build_scoring_matrix(alphabet, diag_score, off_diag_score, dash_score)
    for word in word_list:
        matrix_S = student.compute_alignment_matrix(checked_word, word, matrix_M, True)
        global_align_word = student.compute_global_alignment(checked_word, word, matrix_M, matrix_S)
        if len(checked_word) + len(word) - global_align_word[0] <= dist:
            result_set.add(word)
    
    return result_set
Beispiel #8
0
                                                       FruitflyEyelessProtein,
                                                       PAM50, False)
result_Q5 = student.compute_local_alignment(HumanEyelessProtein,
                                            FruitflyEyelessProtein, PAM50,
                                            alignment_matrix_Q5)

z_value = (result_Q5[0] - mean) / sigma

print 'mean : ' + str(mean)
print 'standard deviation : ' + str(sigma)
print 'z value : ' + str(z_value)

# Q8
word_list = read_words(WORD_LIST_URL)

scoring_matrix = student.build_scoring_matrix('abcdefghijklmnopqrstuvwxyz', 2,
                                              1, 0)


def check_spelling(checked_word, dist, word_list):
    answer = []
    for word in word_list:
        alignment_matrix = student.compute_alignment_matrix(
            checked_word, word, scoring_matrix, True)
        result = student.compute_local_alignment(checked_word, word,
                                                 scoring_matrix,
                                                 alignment_matrix)
        if (len(checked_word) + len(word) - result[0]) <= dist:
            answer.append(word)
    return answer

Beispiel #9
0
Datei: test.py Projekt: oahehc/my
import alg_application4_provided
import alg_project4_solution as student

import string

alphabet = list(string.ascii_lowercase) + list(string.ascii_uppercase)
scoring_matrix = student.build_scoring_matrix(alphabet, 2, 1, 0)


def check_spelling(checked_word, dist, word_list):
    """
    input: word, target distance, and word list
    output: return a subset of word list which the distance between input word < target distance
    """
    result = set()
    x = len(checked_word)
    for item in word_list:
        y = len(item)
        if abs(x - y) <= dist:
            alignment_matrix = student.compute_alignment_matrix(checked_word, item, scoring_matrix, True)
            score = max(map(max, alignment_matrix))
            if (x + y - score) <= dist:
                result = result.union(set([item]))
    return result


WORD_LIST_URL = "http://storage.googleapis.com/codeskulptor-assets/assets_scrabble_words3.txt"
word_list = alg_application4_provided.read_words(WORD_LIST_URL)

import time, itertools
Beispiel #10
0
import alg_application4_provided
import alg_project4_solution as student

import string
alphabet = list(string.ascii_lowercase) + list(string.ascii_uppercase)
scoring_matrix = student.build_scoring_matrix(alphabet, 2, 1, 0)


def check_spelling(checked_word, dist, word_list):
    """
    input: word, target distance, and word list
    output: return a subset of word list which the distance between input word < target distance
    """
    result = set()
    x = len(checked_word)
    for item in word_list:
        y = len(item)
        if abs(x - y) <= dist:
            alignment_matrix = student.compute_alignment_matrix(
                checked_word, item, scoring_matrix, True)
            score = max(map(max, alignment_matrix))
            if (x + y - score) <= dist:
                result = result.union(set([item]))
    return result


WORD_LIST_URL = "http://storage.googleapis.com/codeskulptor-assets/assets_scrabble_words3.txt"
word_list = alg_application4_provided.read_words(WORD_LIST_URL)

import time, itertools
tStart = time.time()
def check_spelling(checked_word, dist, word_list):
    scoring_matrix = student.build_scoring_matrix(set('abcdefghijklmnopqrstuvwxyz'), 2, 1, 0)
    def score(x, y):
        alignment_matrix = student.compute_alignment_matrix(x, y, scoring_matrix, True)
        return student.compute_global_alignment(x, y, scoring_matrix, alignment_matrix)[0]
    return list(word for word in word_list if len(checked_word) + len(word) - score(checked_word, word) == dist)