def question2(): """ Question 2 (2 pts) To continue our investigation, we next consider the similarity of the two sequences in the local alignment computed in Question 1 to a third sequence. The file ConsensusPAXDomain contains a "consensus" sequence of the PAX domain; that is, the sequence of amino acids in the PAX domain in any organism. In this problem, we will compare each of the two sequences of the local alignment computed in Question 1 to this consensus sequence to determine whether they correspond to the PAX domain. Load the file ConsensusPAXDomain. For each of the two sequences of the local alignment computed in Question 1, do the following: Delete any dashes '-' present in the sequence. Compute the global alignment of this dash-less sequence with the ConsensusPAXDomain sequence. Compare corresponding elements of these two globally-aligned sequences (local vs. consensus) and compute the percentage of elements in these two sequences that agree. To reiterate, you will compute the global alignments of local human vs. consensus PAX domain as well as local fruitfly vs. consensus PAX domain. Your answer should be two percentages: one for each global alignment. Enter each percentage below. Be sure to label each answer clearly and include three significant digits of precision. """ # Calculating global alignment of the human-firefly local alignment to the # consensus string q2_human = str(solution[1]) q2_firefly = str(solution[2]) q2_human = q2_human.replace('-', '') q2_firefly = q2_firefly.replace('-', '') q2_human_matrix = project4.compute_alignment_matrix(q2_human, consensus, scoring, True) q2_firefly_matrix = project4.compute_alignment_matrix(q2_firefly, consensus, scoring, True) q2_human_alignment = project4.compute_global_alignment(q2_human, consensus, scoring, q2_human_matrix) q2_firefly_alignment = project4.compute_global_alignment(q2_firefly, consensus, scoring, q2_firefly_matrix) # Calculating adjustment of human protein dim = len(q2_human_alignment[2]) human_match = 0.0 for idx_i in range(dim): if q2_human_alignment[1][idx_i] == q2_human_alignment[2][idx_i]: human_match += 1 # Calculating adjustment of firefly protein dim = len(q2_firefly_alignment[2]) firefly_match = 0.0 for idx_i in range(dim): if q2_firefly_alignment[1][idx_i] == q2_firefly_alignment[2][idx_i]: firefly_match += 1 # Printing the solution print 'Human protein match = ' + str(round((human_match/dim)*100, 2)) + ' %' print 'Firefly protein match = ' + str(round((firefly_match/dim)*100,2)) + ' %' print dim
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): """ creates an un-normalized distribution generated by performing the following process num_trials times: Generate a random permutation rand_y of the sequence seq_y using random.shuffle(). Compute the maximum value score for the local alignment of seq_x and rand_y using the score matrix scoring_matrix. Increment the entry score in the dictionary scoring_distribution by one input: seq_x, seq_y - two sequences scoring_matrix - scoring matrix num_trials - number of trials return: scoring_distribution - a dictionary that represents an un-normalized distribution """ scoring_distribution = {} while num_trials > 0: trial_y = ''.join([str(w) for w in random.sample(seq_y, len(seq_y))]) trial_matrix = project4.compute_alignment_matrix(seq_x, trial_y, scoring_matrix, True) trial_max = 0 for i in range(len(seq_x) + 1): for j in range(len(seq_y) + 1): if trial_matrix[i][j] > trial_max: trial_max = trial_matrix[i][j] if trial_max in scoring_distribution.keys(): scoring_distribution[trial_max] += 1 else: scoring_distribution[trial_max] = 1 num_trials -= 1 return scoring_distribution
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): """ creates an un-normalized distribution generated by performing the following process num_trials times: Generate a random permutation rand_y of the sequence seq_y using random.shuffle(). Compute the maximum value score for the local alignment of seq_x and rand_y using the score matrix scoring_matrix. Increment the entry score in the dictionary scoring_distribution by one input: seq_x, seq_y - two sequences scoring_matrix - scoring matrix num_trials - number of trials return: scoring_distribution - a dictionary that represents an un-normalized distribution """ scoring_distribution = {} while num_trials > 0: trial_y = ''.join([str(w) for w in random.sample(seq_y, len(seq_y))]) trial_matrix = project4.compute_alignment_matrix( seq_x, trial_y, scoring_matrix, True) trial_max = 0 for i in range(len(seq_x) + 1): for j in range(len(seq_y) + 1): if trial_matrix[i][j] > trial_max: trial_max = trial_matrix[i][j] if trial_max in scoring_distribution.keys(): scoring_distribution[trial_max] += 1 else: scoring_distribution[trial_max] = 1 num_trials -= 1 return scoring_distribution
def question2(): """ Question 2 (2 pts) To continue our investigation, we next consider the similarity of the two sequences in the local alignment computed in Question 1 to a third sequence. The file ConsensusPAXDomain contains a "consensus" sequence of the PAX domain; that is, the sequence of amino acids in the PAX domain in any organism. In this problem, we will compare each of the two sequences of the local alignment computed in Question 1 to this consensus sequence to determine whether they correspond to the PAX domain. Load the file ConsensusPAXDomain. For each of the two sequences of the local alignment computed in Question 1, do the following: Delete any dashes '-' present in the sequence. Compute the global alignment of this dash-less sequence with the ConsensusPAXDomain sequence. Compare corresponding elements of these two globally-aligned sequences (local vs. consensus) and compute the percentage of elements in these two sequences that agree. To reiterate, you will compute the global alignments of local human vs. consensus PAX domain as well as local fruitfly vs. consensus PAX domain. Your answer should be two percentages: one for each global alignment. Enter each percentage below. Be sure to label each answer clearly and include three significant digits of precision. """ # Calculating global alignment of the human-firefly local alignment to the # consensus string q2_human = str(solution[1]) q2_firefly = str(solution[2]) q2_human = q2_human.replace('-', '') q2_firefly = q2_firefly.replace('-', '') q2_human_matrix = project4.compute_alignment_matrix( q2_human, consensus, scoring, True) q2_firefly_matrix = project4.compute_alignment_matrix( q2_firefly, consensus, scoring, True) q2_human_alignment = project4.compute_global_alignment( q2_human, consensus, scoring, q2_human_matrix) q2_firefly_alignment = project4.compute_global_alignment( q2_firefly, consensus, scoring, q2_firefly_matrix) # Calculating adjustment of human protein dim = len(q2_human_alignment[2]) human_match = 0.0 for idx_i in range(dim): if q2_human_alignment[1][idx_i] == q2_human_alignment[2][idx_i]: human_match += 1 # Calculating adjustment of firefly protein dim = len(q2_firefly_alignment[2]) firefly_match = 0.0 for idx_i in range(dim): if q2_firefly_alignment[1][idx_i] == q2_firefly_alignment[2][idx_i]: firefly_match += 1 # Printing the solution print 'Human protein match = ' + str(round( (human_match / dim) * 100, 2)) + ' %' print 'Firefly protein match = ' + str( round((firefly_match / dim) * 100, 2)) + ' %' print dim