Esempio n. 1
0
def edit_distance(seq_x, seq_y, scoring_matrix):
    """
    calculate the edit distance through seq_x, seq_y and scoring matrix
    by return |seq_x| + |seq_y| - score of the corresponding global alignment
    """
    alignment_matrix = pj4.compute_alignment_matrix(seq_x, seq_y, scoring_matrix, True)
    score, align_x, align_y = pj4.compute_global_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix)
    return len(seq_x) + len(seq_y) - score
Esempio n. 2
0
def ED_xcross_test(diag_score, off_diag_score, dash_score):
    """
    Insertion: abc -> abbc
    Deletion: abc -> ac
    Subsititution: abc -> abd
    """
    scoring_matrix = pj4.build_scoring_matrix({"a", "b", "c", "d"}, diag_score, off_diag_score, dash_score)

    # test I: x = 'abcd', y = 'ad', the edit distance is 4
    test1_ED = edit_distance("ab", "acccc", scoring_matrix)

    # test II: x = 'abc', y = 'abcddd', the edit distance is 3
    test2_ED = edit_distance("acccd", "ad", scoring_matrix)

    # test III: x = 'abcd', y = 'abb', the edit distance is 2
    test3_ED = edit_distance("abcd", "addd", scoring_matrix)

    return (test1_ED, test2_ED, test3_ED)
Esempio n. 3
0
def check_spelling(checked_word, dist, word_list):
    """
    To iterates through word_list and returns the set of
    all words that are within edit distance dist of the string checked_word
    """
    diag_score = 2
    off_diag_score = 1
    dash_score = 0

    scoring_matrix = pj4.build_scoring_matrix(set('qazwsxedcrfvtgbyhnujmikolp'), diag_score, off_diag_score, dash_score)
    word_list = set(word_list)

    candidate_words = list()
    count = 0
    for word in word_list:
        if len(word) < len(checked_word) - dist or len(word) > len(checked_word) + dist:
            continue
        
        # number of operation = 2
        # 2 insertion
        passed = False
        for number in range(len(checked_word)):
            if checked_word[:number] in word and checked_word[number + 2:] in word:
                passed = True

        # 1 insertion
        passed = True
        for number in range(len(checked_word)):
            if checked_word[:number] not in word or checked_word[number + 1:] not in word:
                passed = False

        if not passed:
            continue
        count += 1
        
        if sol4_7.edit_distance(checked_word, word, scoring_matrix) <= dist:
            candidate_words.append(word)

    print count
    
    return set(candidate_words)
Esempio n. 4
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """
    To return a dictionary scoring_distribution that represents an
    un-normalized distribution generated by performing the following local 
    alignment process num_trials times.
    """
    scoring_distribution = dict()

    for dummy_idx in range(num_trials):
        tmp_y = list(seq_y)
        random.shuffle(tmp_y)
        rand_y = ''.join(tmp_y)
        alignment_matrix = pj4.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False)
        score = max([max(value) for value in alignment_matrix])
        #score, align_x, align_y = pj4.compute_local_alignment(seq_x, rand_y, scoring_matrix, alignment_matrix)
        if score not in scoring_distribution.keys():
            scoring_distribution[score] = 1
        else:
            scoring_distribution[score] += 1

    return scoring_distribution
Esempio n. 5
0
"""
Algorithm thinking application 4-1

data: 2015/07/30
Author: You-Hao
"""

import alg_application4_provided as app4
import AT_project_4 as pj4

protein_human = app4.read_protein(app4.HUMAN_EYELESS_URL)
protein_fruitfly = app4.read_protein(app4.FRUITFLY_EYELESS_URL)
scoring_matrix = app4.read_scoring_matrix(app4.PAM50_URL)
alignment_matrix_4_1 = pj4.compute_alignment_matrix(protein_human, protein_fruitfly, scoring_matrix, False)

score_4_1, align_human_4_1, align_fruitfly_4_1 = pj4.compute_local_alignment(protein_human, protein_fruitfly, scoring_matrix, alignment_matrix_4_1)
print score_4_1
print align_human_4_1
print align_fruitfly_4_1
Esempio n. 6
0
seq_human_nodash = ''
seq_fruitfly_nodash = ''

for char in seq_human:
    if char != '-':
        seq_human_nodash = seq_human_nodash + char

for char in seq_fruitfly:
    if char != '-':
        seq_fruitfly_nodash = seq_fruitfly_nodash + char

print len(seq_human_nodash)
print len(seq_fruitfly_nodash)

# for human
alignment_matrix = pj4.compute_alignment_matrix(seq_human_nodash, seq_PAX, scoring_matrix, True)

score_human, align_human, align_PAX_1 = pj4.compute_global_alignment(seq_human_nodash, seq_PAX, scoring_matrix, alignment_matrix)
print score_human
print align_human
print align_PAX_1

match_human = 0
for ind in range(len(align_human)):
    if align_human[ind] == align_PAX_1[ind]:
        match_human += 1
        
print float(match_human) / len(align_human) * 100.

# for fruit fly
alignment_matrix = pj4.compute_alignment_matrix(seq_fruitfly_nodash, seq_PAX, scoring_matrix, True)