def question_two(): """ Compute comparison of two human and fruitfly local alignment sequences and return percentage of matches between both. """ scoring_matrix = read_scoring_matrix(PAM50_URL) local_results = question_one() pax_seq = read_protein(CONSENSUS_PAX_URL) dash, new_human, new_fly = "-", "", "" percentages = [] #remove dashes from human and fruit fly sequences for char in local_results[1]: if char != dash: new_human += char print("Old human seq: " + local_results[1]) print("New human seq: " + new_human) for char in local_results[2]: if char != dash: new_fly += char print("Old fly seq: " + local_results[2]) print("New fly seq: " + new_fly) #compute alignment matrices and calculate global alignments between human, fruit and pax print("Computing alignment matrices and global alignments...") align_matrix = prj4.compute_alignment_matrix(new_human, pax_seq, scoring_matrix, True) result_human_comp = prj4.compute_global_alignment(new_human, pax_seq, scoring_matrix, align_matrix) #print result_human_comp align_matrix = prj4.compute_alignment_matrix(new_fly, pax_seq, scoring_matrix, True) result_fly_comp = prj4.compute_global_alignment(new_fly, pax_seq, scoring_matrix, align_matrix) #print result_fly_comp #calculate percantage of matches between human, fruit, and pax matches = 0 for index in range(len(result_human_comp[2])): if result_human_comp[1][index] == result_human_comp[2][index]: matches += 1 percentages.append(matches / float(len(result_human_comp[2]))) matches = 0 for index in range(len(result_fly_comp[2])): if result_fly_comp[1][index] == result_fly_comp[2][index]: matches += 1 percentages.append(matches / float(len(result_fly_comp[2]))) #return the two percentages in a list return percentages
def check_spelling(checked_word, dist, word_list): ''' Iterates through word_list and returns the set of all words that are within edit distance dist of the string checked_word. ''' # Set constants ALPHABET = set(list(string.ascii_lowercase)) DIAG_SCORE = 2 OFF_DIAG_SCORE = 1 DASH_SCORE = 0 # contruct scoring matrix over all lower case letters scoring_matrix = student.build_scoring_matrix(ALPHABET, DIAG_SCORE, OFF_DIAG_SCORE, DASH_SCORE) # Init list to store words close_words = [] # Loop over word in word_list for word in word_list: # compute alignment matrix alignment_matrix = student.compute_alignment_matrix(checked_word, word, scoring_matrix, True) # compute score of global alignments score, align_x, align_y = student.compute_global_alignment(checked_word, word, scoring_matrix, alignment_matrix) # calculate edit distance edit_distance = len(checked_word) + len(word) - score # Compare edit_distance and dist if edit_distance <= dist: # save word close_words.append(word) return close_words
def question_two(): """ Compute comparison of two human and fruitfly local alignment sequences and return percentage of matches between both. """ scoring_matrix = read_scoring_matrix(PAM50_URL) local_results = question_one() pax_seq = read_protein(CONSENSUS_PAX_URL) dash, new_human, new_fly = "-", "", "" percentages = [] #remove dashes from human and fruit fly sequences for char in local_results[1]: if char != dash: new_human += char print "Old human seq: " + local_results[1] print "New human seq: " + new_human for char in local_results[2]: if char != dash: new_fly += char print "Old fly seq: " + local_results[2] print "New fly seq: " + new_fly #compute alignment matrices and calculate global alignments between human, fruit and pax print "Computing alignment matrices and global alignments..." align_matrix = prj4.compute_alignment_matrix(new_human, pax_seq, scoring_matrix, True) result_human_comp = prj4.compute_global_alignment(new_human, pax_seq, scoring_matrix, align_matrix) #print result_human_comp align_matrix = prj4.compute_alignment_matrix(new_fly, pax_seq, scoring_matrix, True) result_fly_comp = prj4.compute_global_alignment(new_fly, pax_seq, scoring_matrix, align_matrix) #print result_fly_comp #calculate percantage of matches between human, fruit, and pax matches = 0 for index in xrange(len(result_human_comp[2])): if result_human_comp[1][index] == result_human_comp[2][index]: matches += 1 percentages.append(matches / float(len(result_human_comp[2]))) matches = 0 for index in xrange(len(result_fly_comp[2])): if result_fly_comp[1][index] == result_fly_comp[2][index]: matches += 1 percentages.append(matches / float(len(result_fly_comp[2]))) #return the two percentages in a list return percentages
def answer_Q7(): alphabet = set(['A', 'C', 'T', 'G']) diag_score = 2 off_diag_score = 1 dash_score = 0 seq_x = 'AA' seq_y = 'TAAT' scoring_matrix = student.build_scoring_matrix(alphabet, diag_score, off_diag_score, dash_score) alignment_matrix = student.compute_alignment_matrix(seq_x, seq_y, scoring_matrix, True) score, align_x, align_y = student.compute_global_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix) edit_distance = len(seq_x) + len(seq_y) - score return (diag_score, off_diag_score, dash_score)
def percent_match(local_alignment): ''' Computes the percent similarilty between a local alignment to the global alignment of the PAX sequence. ''' # remove the '-' from the local alignment local_alignment = local_alignment.replace('-', '') # load the PAM50 scoring matrix pam50_scoring_matrix = provided.read_scoring_matrix(PAM50_URL) # load the consensus sequence consensus_sequence = provided.read_protein(CONSENSUS_PAX_URL) # compute the global alignment alignment_matrix = student.compute_alignment_matrix(local_alignment, consensus_sequence, pam50_scoring_matrix, True) # compute the global alignment score, global_alignment, consensus_alignment = student.compute_global_alignment(local_alignment, consensus_sequence, pam50_scoring_matrix, alignment_matrix) # Init the variable to store matches match = 0 # loop over each character for char in range(len(global_alignment)): # compare characters between the two alignments if global_alignment[char] == consensus_alignment[char]: # increase the match score by 1 match += 1 return round(match/float(len(global_alignment))*100, 2)
print (Project_4.compute_alignment_matrix('', '', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, True)) #expected [[0]] but received [] print (Project_4.compute_alignment_matrix('A', 'A', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, True)) #expected [[0, -4], [-4, 6]] print (Project_4.compute_alignment_matrix('ATG', 'ACG', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, True)) #expected [[0, -4, -8, -12], [-4, 6, 2, -2], [-8, 2, 8, 4], [-12, -2, 4, 14]] if TEST3: print (Project_4.compute_global_alignment('', '', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, [[0]])) #expected tuple of length 3 print (Project_4.compute_global_alignment('A', 'A', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, [[0, -4], [-4, 6]])) #expected 6, 'A', 'A' print (Project_4.compute_global_alignment('ACTACT', 'AGCTA', {'A': {'A': 2, 'C': 1, '-': 0, 'T': 1, 'G': 1}, 'C': {'A': 1, 'C': 2, '-': 0, 'T': 1, 'G': 1}, '-': {'A': 0, 'C': 0, '-': 0, 'T': 0, 'G': 0}, 'T': {'A': 1, 'C': 1, '-': 0, 'T': 2, 'G': 1}, 'G': {'A': 1, 'C': 1, '-': 0, 'T': 1, 'G': 2}}, [[0, 0, 0, 0, 0, 0], [0, 2, 2, 2, 2, 2], [0, 2, 3, 4, 4, 4], [0, 2, 3, 4, 6, 6], [0, 2, 3, 4, 6, 8], [0, 2, 3, 5, 6, 8], [0, 2, 3, 5, 7, 8]])) #expected 8, 'ACTACT', 'AGCTA', print (Project_4.compute_global_alignment('abddcdeffgh', 'aabcddefghij', {'-': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': -1, 'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1,
Question 2 """ ali_human = result[1] ali_fly = result[2] seq_con = provided.read_protein(provided.CONSENSUS_PAX_URL) ali_human = ali_human.replace('-', '') ali_fly = ali_fly.replace('-', '') global_alignment_mx_human = Project_4.compute_alignment_matrix( ali_human, seq_con, scoring_matrix, True) global_alignment_mx_fly = Project_4.compute_alignment_matrix( ali_fly, seq_con, scoring_matrix, True) result2_human = Project_4.compute_global_alignment(ali_human, seq_con, scoring_matrix, global_alignment_mx_human) result2_fly = Project_4.compute_global_alignment(ali_fly, seq_con, scoring_matrix, global_alignment_mx_fly) print 'Score: ' + str(result2_human[0]) print 'Local Human: ' + result2_human[1] print 'Consensus: ' + result2_human[2] print print 'Score: ' + str(result2_fly[0]) print 'Local Fly: ' + result2_fly[1] print 'Consensus: ' + result2_fly[2] len_human = len(result2_human[1]) len_fly = len(result2_fly[1])
print Project_4.compute_alignment_matrix('', '', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, True) #expected [[0]] but received [] print Project_4.compute_alignment_matrix('A', 'A', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, True) #expected [[0, -4], [-4, 6]] print Project_4.compute_alignment_matrix('ATG', 'ACG', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, True) #expected [[0, -4, -8, -12], [-4, 6, 2, -2], [-8, 2, 8, 4], [-12, -2, 4, 14]] if TEST3: print Project_4.compute_global_alignment('', '', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, [[0]]) #expected tuple of length 3 print Project_4.compute_global_alignment('A', 'A', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, [[0, -4], [-4, 6]]) #expected 6, 'A', 'A' print Project_4.compute_global_alignment('ACTACT', 'AGCTA', {'A': {'A': 2, 'C': 1, '-': 0, 'T': 1, 'G': 1}, 'C': {'A': 1, 'C': 2, '-': 0, 'T': 1, 'G': 1}, '-': {'A': 0, 'C': 0, '-': 0, 'T': 0, 'G': 0}, 'T': {'A': 1, 'C': 1, '-': 0, 'T': 2, 'G': 1}, 'G': {'A': 1, 'C': 1, '-': 0, 'T': 1, 'G': 2}}, [[0, 0, 0, 0, 0, 0], [0, 2, 2, 2, 2, 2], [0, 2, 3, 4, 4, 4], [0, 2, 3, 4, 6, 6], [0, 2, 3, 4, 6, 8], [0, 2, 3, 5, 6, 8], [0, 2, 3, 5, 7, 8]]) #expected 8, 'ACTACT', 'AGCTA', print Project_4.compute_global_alignment('abddcdeffgh', 'aabcddefghij', {'-': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': -1, 'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1,
""" Question 2 """ ali_human = result[1] ali_fly = result[2] seq_con = provided.read_protein(provided.CONSENSUS_PAX_URL) ali_human = ali_human.replace('-', '') ali_fly = ali_fly.replace('-', '') global_alignment_mx_human = Project_4.compute_alignment_matrix(ali_human, seq_con, scoring_matrix, True) global_alignment_mx_fly = Project_4.compute_alignment_matrix(ali_fly, seq_con, scoring_matrix, True) result2_human = Project_4.compute_global_alignment(ali_human, seq_con, scoring_matrix, global_alignment_mx_human) result2_fly = Project_4.compute_global_alignment(ali_fly, seq_con, scoring_matrix, global_alignment_mx_fly) print 'Score: ' + str(result2_human[0]) print 'Local Human: ' + result2_human[1] print 'Consensus: ' + result2_human[2] print print 'Score: ' + str(result2_fly[0]) print 'Local Fly: ' + result2_fly[1] print 'Consensus: ' + result2_fly[2] len_human = len(result2_human[1]) len_fly = len(result2_fly[1])