Beispiel #1
0
def ED_xcross_test(diag_score, off_diag_score, dash_score):
    """
    Insertion: abc -> abbc
    Deletion: abc -> ac
    Subsititution: abc -> abd
    """
    scoring_matrix = pj4.build_scoring_matrix({"a", "b", "c", "d"}, diag_score, off_diag_score, dash_score)

    # test I: x = 'abcd', y = 'ad', the edit distance is 4
    test1_ED = edit_distance("ab", "acccc", scoring_matrix)

    # test II: x = 'abc', y = 'abcddd', the edit distance is 3
    test2_ED = edit_distance("acccd", "ad", scoring_matrix)

    # test III: x = 'abcd', y = 'abb', the edit distance is 2
    test3_ED = edit_distance("abcd", "addd", scoring_matrix)

    return (test1_ED, test2_ED, test3_ED)
def check_spelling(checked_word, dist, word_list):
    """
    To iterates through word_list and returns the set of
    all words that are within edit distance dist of the string checked_word
    """
    diag_score = 2
    off_diag_score = 1
    dash_score = 0

    scoring_matrix = pj4.build_scoring_matrix(set('qazwsxedcrfvtgbyhnujmikolp'), diag_score, off_diag_score, dash_score)
    word_list = set(word_list)

    candidate_words = list()
    count = 0
    for word in word_list:
        if len(word) < len(checked_word) - dist or len(word) > len(checked_word) + dist:
            continue
        
        # number of operation = 2
        # 2 insertion
        passed = False
        for number in range(len(checked_word)):
            if checked_word[:number] in word and checked_word[number + 2:] in word:
                passed = True

        # 1 insertion
        passed = True
        for number in range(len(checked_word)):
            if checked_word[:number] not in word or checked_word[number + 1:] not in word:
                passed = False

        if not passed:
            continue
        count += 1
        
        if sol4_7.edit_distance(checked_word, word, scoring_matrix) <= dist:
            candidate_words.append(word)

    print count
    
    return set(candidate_words)