def question1(): human = read_protein(HUMAN_EYELESS_URL) fly = read_protein(FRUITFLY_EYELESS_URL) print(len(human), len(fly)) scoring = read_scoring_matrix(PAM50_URL) local_align = compute_alignment_matrix(human, fly, scoring, False) score, xs, ys = compute_local_alignment(human, fly, scoring, local_align) print('Question 1') print(score) print(xs) print(ys) print() print('Question 2') consensus = read_protein(CONSENSUS_PAX_URL) human_nodash = ''.join([x for x in xs if x != '-']) fly_nodash = ''.join([x for x in ys if x != '-']) hc_global_align = compute_alignment_matrix(human_nodash, consensus, scoring, True) fc_global_align = compute_alignment_matrix(fly_nodash, consensus, scoring, True) hc_agree = agreement(human_nodash, consensus, scoring, hc_global_align) fc_agree = agreement(fly_nodash, consensus, scoring, fc_global_align) print('Human vs Consensus agree = %s%%' % hc_agree) print('Fly vs Consensus agree = %s%%' % fc_agree)
def question4(filename): human = read_protein(HUMAN_EYELESS_URL) fly = read_protein(FRUITFLY_EYELESS_URL) scoring = read_scoring_matrix(PAM50_URL) distr, raw = generate_null_distribution(human, fly, scoring, 1000) from pprint import pprint as pp distr = str_keys(distr) pp(distr) distr = norm(distr) pairs = list(distr.iteritems()) pairs = sorted(pairs, key=itemgetter(0)) print(pairs) index = np.arange(len(pairs)) plt.bar(index, map(itemgetter(1), pairs)) plt.xticks(index + 0.4, map(itemgetter(0), pairs), fontsize=8) plt.xlabel('Scores') plt.ylabel('Fraction of total trials') plt.title('Distribution of scores') plt.tight_layout() plt.savefig(filename) s_score = 875 n = 1000 mean = sum(raw) / n std = np.sqrt(sum((x - mean) ** 2 for x in raw) / n) z_score = (s_score - mean) / std print('mean = %f' % mean) print('std = %f' % std) print('z_score = %f' % z_score)
def question_1(): ''' First, load the files HumanEyelessProtein and FruitflyEyelessProtein using the provided code. These files contain the amino acid sequences that form the eyeless proteins in the human and fruit fly genomes, respectively. Then load the scoring matrix PAM50 for sequences of amino acids. This scoring matrix is defined over the alphabet {A,R,N,D,C,Q,E,G,H,I,L,K,M,F,P,S,T,W,Y, V,B,Z,X,-} which represents all possible amino acids and gaps (the "dashes" in the alignment). Next, compute the local alignments of the sequences of HumanEyelessProtein and FruitflyEyelessProtein using the PAM50 scoring matrix and enter the score and local alignments for these two sequences below. Be sure to clearly distinguish which alignment is which and include any dashes ('-') that might appear in the local alignment. ''' human_protein = provided.read_protein(provided.HUMAN_EYELESS_URL) fruitfly_protein = provided.read_protein(provided.FRUITFLY_EYELESS_URL) scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL) alignment_matrix = project4.compute_alignment_matrix( human_protein, fruitfly_protein, scoring_matrix, False) local_alignment = project4.compute_local_alignment(human_protein, fruitfly_protein, scoring_matrix, alignment_matrix) return local_alignment
def question4(filename): human = read_protein(HUMAN_EYELESS_URL) fly = read_protein(FRUITFLY_EYELESS_URL) scoring = read_scoring_matrix(PAM50_URL) distr, raw = generate_null_distribution(human, fly, scoring, 1000) from pprint import pprint as pp distr = str_keys(distr) pp(distr) distr = norm(distr) pairs = list(distr.iteritems()) pairs = sorted(pairs, key=itemgetter(0)) print(pairs) index = np.arange(len(pairs)) plt.bar(index, map(itemgetter(1), pairs)) plt.xticks(index + 0.4, map(itemgetter(0), pairs), fontsize=8) plt.xlabel('Scores') plt.ylabel('Fraction of total trials') plt.title('Distribution of scores') plt.tight_layout() plt.savefig(filename) s_score = 875 n = 1000 mean = sum(raw) / n std = np.sqrt(sum((x - mean)**2 for x in raw) / n) z_score = (s_score - mean) / std print('mean = %f' % mean) print('std = %f' % std) print('z_score = %f' % z_score)
def question1(): """ Code for quetion 1 """ human = read_protein(HUMAN_EYELESS_URL) fruitfly = read_protein(FRUITFLY_EYELESS_URL) score_mat = read_scoring_matrix(PAM50_URL) align_mat = compute_alignment_matrix(human, fruitfly, score_mat, False) result = compute_local_alignment(human, fruitfly, score_mat, align_mat) return result
def local_alignment_eyeless_protein(): """ Question: 1 """ human_eyeless_seq = provided.read_protein(provided.HUMAN_EYELESS_URL) fruitfly_eyeless_seq = provided.read_protein(provided.FRUITFLY_EYELESS_URL) scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL) alignment_matrix = student.compute_alignment_matrix(human_eyeless_seq, fruitfly_eyeless_seq, scoring_matrix, False) local_alignment = student.compute_local_alignment(human_eyeless_seq, fruitfly_eyeless_seq, scoring_matrix, alignment_matrix) return local_alignment
def answer_Q1(): ''' Answers Q1. ''' # load the acid sequences that form the eyeless proteins for humans genomes human_sequence = provided.read_protein(HUMAN_EYELESS_URL) # load the acid sequences that form the eyeless proteins for fruit flies genomes fly_sequence = provided.read_protein(FRUITFLY_EYELESS_URL) # load the PAM50 scoring matrix pam50_scoring_matrix = provided.read_scoring_matrix(PAM50_URL) # compute the alignment method using method Q12 alignment_matrix = student.compute_alignment_matrix(human_sequence, fly_sequence, pam50_scoring_matrix, False) return student.compute_local_alignment(human_sequence, fly_sequence, pam50_scoring_matrix, alignment_matrix)
def local_alignment_eyeless_protein(): """ Question: 1 """ human_eyeless_seq = provided.read_protein(provided.HUMAN_EYELESS_URL) fruitfly_eyeless_seq = provided.read_protein(provided.FRUITFLY_EYELESS_URL) scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL) alignment_matrix = student.compute_alignment_matrix( human_eyeless_seq, fruitfly_eyeless_seq, scoring_matrix, False) local_alignment = student.compute_local_alignment(human_eyeless_seq, fruitfly_eyeless_seq, scoring_matrix, alignment_matrix) return local_alignment
def run_ques_4(): """ Question: 4 & 5 """ seq_x = provided.read_protein(provided.HUMAN_EYELESS_URL) seq_y = provided.read_protein(provided.FRUITFLY_EYELESS_URL) scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL) num_trials = 1000 scoring_distribution = generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials) mean, std_dev = compute_stats(scoring_distribution, num_trials) print mean, ",", std_dev z_score = float(local_alignment_eyeless_protein()[0] - mean) / std_dev print z_score bar_plot(scoring_distribution, num_trials)
def perform_human_fly_trials(): # load the acid sequences that form the eyeless proteins for humans genomes human_sequence = provided.read_protein(HUMAN_EYELESS_URL) # load the acid sequences that form the eyeless proteins for fruit flies genomes fly_sequence = provided.read_protein(FRUITFLY_EYELESS_URL) # load the PAM50 scoring matrix pam50_scoring_matrix = provided.read_scoring_matrix(PAM50_URL) # perform 1000 trials scoring_distribution = generate_null_distribution(human_sequence, fly_sequence, pam50_scoring_matrix, 1000) # conver result to pd dataframe scoring_dist_df = pd.DataFrame(scoring_distribution.values(), index=scoring_distribution.keys(), columns=['Frequency']) # fix index name scoring_dist_df.index.rename('Scores', inplace=True) return scoring_dist_df
def run_app_q1(): """ Question 1 of application. """ scoring_matrix = provided.read_scoring_matrix(PAM50_PATH) human_eyeless_protein = provided.read_protein(HUMAN_EYELESS_PATH) fruitfly_eyeless_protein = provided.read_protein(FRUITFLY_EYELESS_PATH) alignment_matrix = compute_alignment_matrix(human_eyeless_protein, fruitfly_eyeless_protein, scoring_matrix, False) (score, local_human, local_fruitfly) = compute_local_alignment( human_eyeless_protein, fruitfly_eyeless_protein, scoring_matrix, alignment_matrix) return (score, local_human, local_fruitfly)
def question2(): """ Code for question 2 """ q1_result = question1() score_mat = read_scoring_matrix(PAM50_URL) human, fruitfly = q1_result[1], q1_result[2] human = human.replace('-', '') fruitfly = fruitfly.replace('-', '') consensus = read_protein(CONSENSUS_PAX_URL) align_m_h = compute_alignment_matrix(human, consensus, score_mat, True) align_m_f = compute_alignment_matrix(fruitfly, consensus, score_mat, True) global_align_hc = compute_global_alignment(human, consensus, score_mat, align_m_h) global_h, global_ch = global_align_hc[1], global_align_hc[2] per1, per2 = 0, 0 for idx in range(len(global_h)): if global_h[idx] == global_ch[idx]: per1 += 1 print float(per1) / len(global_h) * 100 global_align_fc = compute_global_alignment(fruitfly, consensus, score_mat, align_m_f) global_f, global_cf = global_align_fc[1], global_align_fc[2] for idx in range(len(global_f)): if global_f[idx] == global_cf[idx]: per2 += 1 print float(per2) / len(global_f) * 100
def question5(): """ Code for question 5 """ human = read_protein(HUMAN_EYELESS_URL) fruitfly = read_protein(FRUITFLY_EYELESS_URL) score_mat = read_scoring_matrix(PAM50_URL) dist = generate_null_distribution(human, fruitfly, score_mat, 1000) scores = [] for score, count in dist.iteritems(): scores.extend([score] * count) N = len(scores) mean = float(sum(scores)) / N std = math.sqrt(float(sum([(score - mean) ** 2 for score in scores])) / N) z_score = (875 - mean) / std print mean, std, z_score
def question4_plot(): """ Code for question 4 """ human = read_protein(HUMAN_EYELESS_URL) fruitfly = read_protein(FRUITFLY_EYELESS_URL) score_mat = read_scoring_matrix(PAM50_URL) dist = generate_null_distribution(human, fruitfly, score_mat, 1000) y = [] for count in dist.itervalues(): y.append(count / 1000.0) plt.bar(dist.keys(), y) plt.title("Normalized score distribution") plt.ylabel("Fractions of total trials") plt.xlabel("Scores of local alignments") plt.show() print dist
def run_app_q4(): """ Question 4 of application. """ scoring_matrix = provided.read_scoring_matrix(PAM50_PATH) human_eyeless_protein = provided.read_protein(HUMAN_EYELESS_PATH) fruitfly_eyeless_protein = provided.read_protein(FRUITFLY_EYELESS_PATH) num_trials = 1000 scoring_distribution = generate_null_distribution( human_eyeless_protein, fruitfly_eyeless_protein, scoring_matrix, num_trials) for score in scoring_distribution.keys(): scoring_distribution[score] /= (1.0 * num_trials) plt.bar(scoring_distribution.keys(), scoring_distribution.values(), color='g') plt.grid(True) plt.xlabel('Scores') plt.ylabel('Fraction of Total Trials') plt.title('Normalized Scoring Distribution') plt.show() return scoring_distribution
def run_app_q3(): """ Question 3 of application. """ scoring_matrix = provided.read_scoring_matrix(PAM50_PATH) human_eyeless_protein = provided.read_protein(HUMAN_EYELESS_PATH) fruitfly_eyeless_protein = provided.read_protein(FRUITFLY_EYELESS_PATH) human_rand = gen_random_seqs(len(human_eyeless_protein)) fruitfly_rand = gen_random_seqs(len(fruitfly_eyeless_protein)) alignment_matrix = compute_alignment_matrix(human_rand, fruitfly_rand, scoring_matrix, False) (score, local_human_rand, local_fruitfly_rand) = compute_local_alignment(human_rand, fruitfly_rand, scoring_matrix, alignment_matrix) print score print local_human_rand print local_fruitfly_rand consensus_pax_domain = provided.read_protein(CONSENSUS_PAX_PATH) local_human_rand = remove_dash(local_human_rand) alignment_matrix = compute_alignment_matrix(local_human_rand, consensus_pax_domain, scoring_matrix, True) (score, global_human_rand, global_consensus) = compute_global_alignment( local_human_rand, consensus_pax_domain, scoring_matrix, alignment_matrix) human_match = count_match_percentage(global_human_rand, global_consensus) print human_match local_fruitfly_rand = remove_dash(local_fruitfly_rand) alignment_matrix = compute_alignment_matrix(local_fruitfly_rand, consensus_pax_domain, scoring_matrix, True) (score, global_fruitfly_rand, global_consensus) = compute_global_alignment( local_fruitfly_rand, consensus_pax_domain, scoring_matrix, alignment_matrix) fruitfly_match = count_match_percentage(global_fruitfly_rand, global_consensus) print fruitfly_match
def question_4(): ''' We will take an approach known as statistical hypothesis testing to determine whether the local alignments computed in Question 1 are statistically significant (that is, that the probability that they could have arisen by chance is extremely small). ''' # Use the function generate_null_distribution to create a distribution with # 1000 trials using the protein sequences HumanEyelessProtein and # FruitflyEyelessProtein (using the PAM50 scoring matrix). human_protein = provided.read_protein(provided.HUMAN_EYELESS_URL) fruitfly_protein = provided.read_protein(provided.FRUITFLY_EYELESS_URL) scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL) num_trials = 1000 distribution = generate_null_distribution(human_protein, fruitfly_protein, scoring_matrix, num_trials) # Next, create a bar plot of the normalized version of this distribution. # The horizontal axis should be the scores and the vertical axis should be # the fraction of total trials corresponding to each score. As usual, # choose reasonable labels for the axes and title. normalized_dist = {} for score in distribution: normalized_dist[score] = float(distribution[score]) / num_trials plt.bar(normalized_dist.keys(), normalized_dist.values()) plt.title('Null Distribution for Hypothesis Testing using 1000 Trials') plt.xlabel('Local Alignment Scores') plt.ylabel('Fraction of Total Trials') plt.show() return distribution
def run_app_q2(): """ Question 2 of application. """ scoring_matrix = provided.read_scoring_matrix(PAM50_PATH) (score, local_human, local_fruitfly) = run_app_q1() consensus_pax_domain = provided.read_protein(CONSENSUS_PAX_PATH) # local_human = remove_dash(local_human) local_fruitfly = remove_dash(local_fruitfly) # alignment_matrix = compute_alignment_matrix(local_human, consensus_pax_domain, scoring_matrix, True) # (score, global_human, global_consensus) = compute_global_alignment(local_human, consensus_pax_domain, scoring_matrix, alignment_matrix) # human_match = count_match_percentage(global_human, global_consensus) alignment_matrix = compute_alignment_matrix(local_fruitfly, consensus_pax_domain, scoring_matrix, True) (score, global_fruitfly, global_consensus) = compute_global_alignment( local_fruitfly, consensus_pax_domain, scoring_matrix, alignment_matrix) fruitfly_match = count_match_percentage(global_fruitfly, global_consensus) print fruitfly_match
def global_alignment_consensus(): """ Question: 2 """ ans_similar = [] local_alignments = local_alignment_eyeless_protein() consensus_seq = provided.read_protein(provided.CONSENSUS_PAX_URL) scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL) for idx in range(1, 3): seq_x = local_alignments[idx] seq_x = seq_x.replace("-", "") alignment_matrix = student.compute_alignment_matrix(seq_x, consensus_seq, scoring_matrix, True) global_alignment = student.compute_global_alignment(seq_x, consensus_seq, scoring_matrix, alignment_matrix) similar_count = 0 for letter1, letter2 in zip(global_alignment[1], global_alignment[2]): if letter1 == letter2: similar_count += 1 ans_similar.append(float(similar_count * 100) / len(global_alignment[1])) return ans_similar
def question_2(): ''' To continue our investigation, we next consider the similarity of the two sequences in the local alignment computed in Question 1 to a third sequence. The file ConsensusPAXDomain contains a "consensus" sequence of the PAX domain; that is, the sequence of amino acids in the PAX domain in any organism. In this problem, we will compare each of the two sequences of the local alignment computed in Question 1 to this consensus sequence to determine whether they correspond to the PAX domain. ''' consensus = provided.read_protein(provided.CONSENSUS_PAX_URL) score, human_alignment, fruitfly_alignment = question_1() scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL) # Delete any dashes '-' present in the sequence human = human_alignment.replace('-', '') fruitfly = fruitfly_alignment.replace('-', '') # Compute the global alignment of this dash-less sequence with the # ConsensusPAXDomain sequence. alignment_matrix_human = project4.compute_alignment_matrix( human, consensus, scoring_matrix, True) human_global = project4.compute_global_alignment(human, consensus, scoring_matrix, alignment_matrix_human) alignment_matrix_fruitfly = project4.compute_alignment_matrix( fruitfly, consensus, scoring_matrix, True) fruitfly_global = project4.compute_global_alignment( fruitfly, consensus, scoring_matrix, alignment_matrix_fruitfly) # Compare corresponding elements of these two globally-aligned sequences # (local vs. consensus) and compute the percentage of elements in these two # sequences that agree. human_similarity = compute_similarity(human_global[1], human_global[2]) fruitfly_similarity = compute_similarity(fruitfly_global[1], fruitfly_global[2]) return 'Human:', human_similarity, 'Fruitfly:', fruitfly_similarity
def global_alignment_consensus(): """ Question: 2 """ ans_similar = [] local_alignments = local_alignment_eyeless_protein() consensus_seq = provided.read_protein(provided.CONSENSUS_PAX_URL) scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL) for idx in range(1, 3): seq_x = local_alignments[idx] seq_x = seq_x.replace("-", "") alignment_matrix = student.compute_alignment_matrix( seq_x, consensus_seq, scoring_matrix, True) global_alignment = student.compute_global_alignment( seq_x, consensus_seq, scoring_matrix, alignment_matrix) similar_count = 0 for letter1, letter2 in zip(global_alignment[1], global_alignment[2]): if letter1 == letter2: similar_count += 1 ans_similar.append( float(similar_count * 100) / len(global_alignment[1])) return ans_similar
def percent_match(local_alignment): ''' Computes the percent similarilty between a local alignment to the global alignment of the PAX sequence. ''' # remove the '-' from the local alignment local_alignment = local_alignment.replace('-', '') # load the PAM50 scoring matrix pam50_scoring_matrix = provided.read_scoring_matrix(PAM50_URL) # load the consensus sequence consensus_sequence = provided.read_protein(CONSENSUS_PAX_URL) # compute the global alignment alignment_matrix = student.compute_alignment_matrix(local_alignment, consensus_sequence, pam50_scoring_matrix, True) # compute the global alignment score, global_alignment, consensus_alignment = student.compute_global_alignment(local_alignment, consensus_sequence, pam50_scoring_matrix, alignment_matrix) # Init the variable to store matches match = 0 # loop over each character for char in range(len(global_alignment)): # compare characters between the two alignments if global_alignment[char] == consensus_alignment[char]: # increase the match score by 1 match += 1 return round(match/float(len(global_alignment))*100, 2)
""" Author: Ko-Shin Chen Algorithmic Thinking (Part 2) Application 4: Applications to Genomics and Beyond """ import Project_4 import alg_application4_provided as provided import math import matplotlib.pyplot as plt """ Question 1 """ seq_human = provided.read_protein(provided.HUMAN_EYELESS_URL) seq_fly = provided.read_protein(provided.FRUITFLY_EYELESS_URL) scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL) local_alignment_mx = Project_4.compute_alignment_matrix(seq_human, seq_fly, scoring_matrix, False) result = Project_4.compute_local_alignment(seq_human, seq_fly, scoring_matrix, local_alignment_mx) print 'Score:' + str(result[0]) print 'Human: ' + result[1] print 'Fly: ' + result[2] """ Question 2 """
""" Algorithm thinking application 4-2 data: 2015/07/30 Author: You-Hao """ import alg_application4_provided as app4 import AT_project_4 as pj4 scoring_matrix = app4.read_scoring_matrix(app4.PAM50_URL) seq_PAX = app4.read_protein(app4.CONSENSUS_PAX_URL) seq_human = 'HSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATPEVVSKIAQYKRECPSIFAWEIRDRLLSEGVCTNDNIPSVSSINRVLRNLASEK-QQ' seq_fruitfly = 'HSGVNQLGGVFVGGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATAEVVSKISQYKRECPSIFAWEIRDRLLQENVCTNDNIPSVSSINRVLRNLAAQKEQQ' seq_human_nodash = '' seq_fruitfly_nodash = '' for char in seq_human: if char != '-': seq_human_nodash = seq_human_nodash + char for char in seq_fruitfly: if char != '-': seq_fruitfly_nodash = seq_fruitfly_nodash + char print len(seq_human_nodash) print len(seq_fruitfly_nodash) # for human
""" Application 4 scripts """ import Project4 as help import alg_application4_provided as provided import random seq_x = provided.read_protein(provided.HUMAN_EYELESS_URL) seq_y = provided.read_protein(provided.FRUITFLY_EYELESS_URL) score_matrix = provided.read_scoring_matrix(provided.PAM50_URL) """ # Question1 local_align_matrix = help.compute_alignment_matrix(seq_x, seq_y, score_matrix, False) score, align_x, align_y = help.compute_local_alignment(seq_x, seq_y, score_matrix, local_align_matrix) # Question2 new_seq_x = align_x.rstrip("-QQ") new_seq_x = new_seq_x + "QQ" new_seq_y = align_y seq_consensus = provided.read_protein(provided.CONSENSUS_PAX_URL) global_matrix_x = help.compute_alignment_matrix(new_seq_x, seq_consensus, score_matrix, True) score_x_consensus, align_x1, align_y1 = help.compute_global_alignment(new_seq_x, seq_consensus, score_matrix, global_matrix_x) global_matrix_y = help.compute_alignment_matrix(new_seq_y, seq_consensus, score_matrix, True) score_y_consensus, align_x2, align_y2 = help.compute_global_alignment(new_seq_y, seq_consensus, score_matrix, global_matrix_y) # Question3 alphabets = "ACBEDGFIHKMLNQPSRTWVYXZ" seq1 = "" seq2 = "" for dummy_num in range(len(seq_x)):
for dummy_idx in range(num_trials): tmp_y = list(seq_y) random.shuffle(tmp_y) rand_y = ''.join(tmp_y) alignment_matrix = pj4.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False) score = max([max(value) for value in alignment_matrix]) #score, align_x, align_y = pj4.compute_local_alignment(seq_x, rand_y, scoring_matrix, alignment_matrix) if score not in scoring_distribution.keys(): scoring_distribution[score] = 1 else: scoring_distribution[score] += 1 return scoring_distribution protein_human = app4.read_protein(app4.HUMAN_EYELESS_URL) protein_fruitfly = app4.read_protein(app4.FRUITFLY_EYELESS_URL) scoring_matrix = app4.read_scoring_matrix(app4.PAM50_URL) scoring_distribution = generate_null_distribution(protein_human, protein_fruitfly, scoring_matrix, 1000) #scoring_distribution = {38: 1, 39: 1, 40: 8, 41: 9, 42: 28, 43: 35, 44: 50, 45: 46, 46: 49, 47: 57, 48: 63, 49: 62, 50: 72, 51: 56, 52: 56, 53: 61, 54: 62, 55: 32, 56: 25, 57: 33, 58: 29, 59: 22, 60: 25, 61: 15, 62: 13, 63: 10, 64: 13, 65: 20, 66: 2, 67: 4, 68: 14, 69: 5, 70: 3, 71: 2, 72: 3, 74: 2, 75: 2, 76: 1, 77: 1, 79: 2, 81: 2, 84: 1, 85: 1, 94: 1, 97: 1} print scoring_distribution x_value = scoring_distribution.keys() print x_value x_value.sort() print x_value y_value = [] for score in x_value: y_value.append(scoring_distribution[score] / float(1000) * 100)
""" Author: Tejaswini Dhupad Algorithmic Thinking (Part 2) Application 4: Applications to Genomics and Beyond """ import Project_4 import alg_application4_provided as provided import math import matplotlib.pyplot as plt """ Question 1 """ seq_human = provided.read_protein(provided.HUMAN_EYELESS_URL) seq_fly = provided.read_protein(provided.FRUITFLY_EYELESS_URL) scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL) local_alignment_mx = Project_4.compute_alignment_matrix( seq_human, seq_fly, scoring_matrix, False) result = Project_4.compute_local_alignment(seq_human, seq_fly, scoring_matrix, local_alignment_mx) print 'Score:' + str(result[0]) print 'Human: ' + result[1] print 'Fly: ' + result[2] """ Question 2 """ ali_human = result[1] ali_fly = result[2] seq_con = provided.read_protein(provided.CONSENSUS_PAX_URL)