Beispiel #1
0
def edit_distance(seq_x, seq_y, scoring_matrix):
    """
    calculate the edit distance through seq_x, seq_y and scoring matrix
    by return |seq_x| + |seq_y| - score of the corresponding global alignment
    """
    alignment_matrix = pj4.compute_alignment_matrix(seq_x, seq_y, scoring_matrix, True)
    score, align_x, align_y = pj4.compute_global_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix)
    return len(seq_x) + len(seq_y) - score
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """
    To return a dictionary scoring_distribution that represents an
    un-normalized distribution generated by performing the following local 
    alignment process num_trials times.
    """
    scoring_distribution = dict()

    for dummy_idx in range(num_trials):
        tmp_y = list(seq_y)
        random.shuffle(tmp_y)
        rand_y = ''.join(tmp_y)
        alignment_matrix = pj4.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False)
        score = max([max(value) for value in alignment_matrix])
        #score, align_x, align_y = pj4.compute_local_alignment(seq_x, rand_y, scoring_matrix, alignment_matrix)
        if score not in scoring_distribution.keys():
            scoring_distribution[score] = 1
        else:
            scoring_distribution[score] += 1

    return scoring_distribution
"""
Algorithm thinking application 4-1

data: 2015/07/30
Author: You-Hao
"""

import alg_application4_provided as app4
import AT_project_4 as pj4

protein_human = app4.read_protein(app4.HUMAN_EYELESS_URL)
protein_fruitfly = app4.read_protein(app4.FRUITFLY_EYELESS_URL)
scoring_matrix = app4.read_scoring_matrix(app4.PAM50_URL)
alignment_matrix_4_1 = pj4.compute_alignment_matrix(protein_human, protein_fruitfly, scoring_matrix, False)

score_4_1, align_human_4_1, align_fruitfly_4_1 = pj4.compute_local_alignment(protein_human, protein_fruitfly, scoring_matrix, alignment_matrix_4_1)
print score_4_1
print align_human_4_1
print align_fruitfly_4_1
seq_human_nodash = ''
seq_fruitfly_nodash = ''

for char in seq_human:
    if char != '-':
        seq_human_nodash = seq_human_nodash + char

for char in seq_fruitfly:
    if char != '-':
        seq_fruitfly_nodash = seq_fruitfly_nodash + char

print len(seq_human_nodash)
print len(seq_fruitfly_nodash)

# for human
alignment_matrix = pj4.compute_alignment_matrix(seq_human_nodash, seq_PAX, scoring_matrix, True)

score_human, align_human, align_PAX_1 = pj4.compute_global_alignment(seq_human_nodash, seq_PAX, scoring_matrix, alignment_matrix)
print score_human
print align_human
print align_PAX_1

match_human = 0
for ind in range(len(align_human)):
    if align_human[ind] == align_PAX_1[ind]:
        match_human += 1
        
print float(match_human) / len(align_human) * 100.

# for fruit fly
alignment_matrix = pj4.compute_alignment_matrix(seq_fruitfly_nodash, seq_PAX, scoring_matrix, True)