def question2(): """ Compute the global alignments of local human vs concensus PAX domain as well as local fruitfly vs. consensus PAX domain. Return as percentages. """ # Delete any dashes present in local alignments of humans and fruitflies. q1 = question1() dashless_local_human = q1[1].replace('-', '') dashless_local_fly = q1[2].replace('-', '') # Compute global alignments. human_alignment_matrix = project4.compute_alignment_matrix( dashless_local_human, PAX, SCORING_MATRIX, False) fly_alignment_matrix = project4.compute_alignment_matrix( dashless_local_fly, PAX, SCORING_MATRIX, False) human_global = project4.compute_global_alignment(dashless_local_human, PAX, SCORING_MATRIX, human_alignment_matrix) fly_global = project4.compute_global_alignment(dashless_local_fly, PAX, SCORING_MATRIX, fly_alignment_matrix) # Compute percentage of elements in human_global and fly_global that agree with pax human_percent = 0.0 fly_percent = 0.0 for char in range(len(human_global[1])): if human_global[1][char] == human_global[2][char]: human_percent += 1 for char in range(len(fly_global[1])): if fly_global[1][char] == fly_global[2][char]: fly_percent += 1 human_percent = human_percent / len(human_global[1]) fly_percent = fly_percent / len(fly_global[1]) print "human_percent:", human_percent print "fly_percent:", fly_percent
def question2(): """ Code for question 2 """ q1_result = question1() score_mat = read_scoring_matrix(PAM50_URL) human, fruitfly = q1_result[1], q1_result[2] human = human.replace('-', '') fruitfly = fruitfly.replace('-', '') consensus = read_protein(CONSENSUS_PAX_URL) align_m_h = compute_alignment_matrix(human, consensus, score_mat, True) align_m_f = compute_alignment_matrix(fruitfly, consensus, score_mat, True) global_align_hc = compute_global_alignment(human, consensus, score_mat, align_m_h) global_h, global_ch = global_align_hc[1], global_align_hc[2] per1, per2 = 0, 0 for idx in range(len(global_h)): if global_h[idx] == global_ch[idx]: per1 += 1 print float(per1) / len(global_h) * 100 global_align_fc = compute_global_alignment(fruitfly, consensus, score_mat, align_m_f) global_f, global_cf = global_align_fc[1], global_align_fc[2] for idx in range(len(global_f)): if global_f[idx] == global_cf[idx]: per2 += 1 print float(per2) / len(global_f) * 100
def question2(): # QUESTION 2 # delete the dashes in local alignments local_human_new = '' local_fruitfly_new = '' for idx in range(len(local_human)): if (local_human[idx] != '-'): local_human_new += local_human[idx] if (local_fruitfly[idx] != '-'): local_fruitfly_new += local_fruitfly[idx] #print local_human_new #print local_fruitfly_new # compute the global alignment f = open('alg_ConsensusPAXDomain.txt', 'r') consensus = f.read() consensus = consensus[:-2] f.close() align_matrix_human = project4.compute_alignment_matrix( local_human_new, consensus, scores, True) global_align_human = project4.compute_global_alignment( local_human_new, consensus, scores, align_matrix_human) print global_align_human global_human = global_align_human[1] global_consensus_human = global_align_human[2] similarity = 0 for idx in range(len(global_human)): if (global_human[idx] == global_consensus_human[idx]): similarity += 1 human_percentile = similarity / float(len(global_human)) * 100 print human_percentile # align_matrix_fruitfly = project4.compute_alignment_matrix( local_fruitfly_new, consensus, scores, True) global_align_fruitfly = project4.compute_global_alignment( local_fruitfly_new, consensus, scores, align_matrix_fruitfly) print global_align_fruitfly global_fruitfly = global_align_fruitfly[1] global_consensus_fruitfly = global_align_fruitfly[2] similarity = 0 for idx in range(len(global_fruitfly)): if (global_fruitfly[idx] == global_consensus_fruitfly[idx]): similarity += 1 fruitfly_percentile = similarity / float(len(global_fruitfly)) * 100 print fruitfly_percentile
def question2(): # QUESTION 2 # delete the dashes in local alignments local_human_new = '' local_fruitfly_new = '' for idx in range(len(local_human)): if (local_human[idx] != '-'): local_human_new += local_human[idx] if (local_fruitfly[idx] != '-'): local_fruitfly_new += local_fruitfly[idx] #print local_human_new #print local_fruitfly_new # compute the global alignment f = open('alg_ConsensusPAXDomain.txt', 'r') consensus = f.read() consensus = consensus[:-2] f.close() align_matrix_human = project4.compute_alignment_matrix(local_human_new, consensus, scores, True) global_align_human = project4.compute_global_alignment(local_human_new, consensus, scores, align_matrix_human) print global_align_human global_human = global_align_human[1] global_consensus_human = global_align_human[2] similarity = 0 for idx in range(len(global_human)): if (global_human[idx] == global_consensus_human[idx]): similarity += 1 human_percentile = similarity / float(len(global_human)) * 100 print human_percentile # align_matrix_fruitfly = project4.compute_alignment_matrix(local_fruitfly_new, consensus, scores, True) global_align_fruitfly = project4.compute_global_alignment(local_fruitfly_new, consensus, scores, align_matrix_fruitfly) print global_align_fruitfly global_fruitfly = global_align_fruitfly[1] global_consensus_fruitfly = global_align_fruitfly[2] similarity = 0 for idx in range(len(global_fruitfly)): if (global_fruitfly[idx] == global_consensus_fruitfly[idx]): similarity += 1 fruitfly_percentile = similarity / float(len(global_fruitfly)) * 100 print fruitfly_percentile
def find_scoring_matrix(x, y, med, dim): """ Find the scoring matrix that satisifes the definition of minimum edit distance: |x| + |y| - score(x, y) Inputs: x, y: english strings med: minimum edit distance between x, y dim: range of values to test for diag_score, off_score, dash_score note dash_scores will be <= 0 """ alphabet = set(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']) # med(kitten, sitting) = 3 correct = len(x) + len(y) - med # 10 solutions = np.zeros((dim, dim, dim)) for diag in range(dim): for off in range(dim): for dash in range(dim): sm = seq.build_scoring_matrix(alphabet, diag, off, -1 * dash) am = seq.compute_alignment_matrix(x, y, sm) solutions[diag, off, dash] = seq.compute_global_alignment(x, y, sm, am)[0] parameters = np.transpose(np.nonzero(solutions == correct)) parameters[:, 2] *= -1 return parameters
def edit_dist(xs, ys): alphabet = ascii_lowercase # what is ascii_lowercase?? scoring = build_scoring_matrix(alphabet, 2, 1, 0) align = compute_alignment_matrix(xs, ys, scoring, True) # True means global alignment. score, _, _ = compute_global_alignment(xs, ys, scoring, align) return len(xs) + len(ys) - score
def question_2(): human = read_protein(HUMAN_EYELESS_URL) fly = read_protein(FRUITFLY_EYELESS_URL) consensus = read_protein(CONSENSUS_PAX_URL) scoring_matrix = read_scoring_matrix(PAM50_URL) alignment_matrix_local = project4.compute_alignment_matrix(human, fly, scoring_matrix, False) local_aligns = project4.compute_local_alignment(human, fly, scoring_matrix, alignment_matrix_local) human_local_align = local_aligns[1] fly_local_align = local_aligns[2] human_no_dashes = human_local_align.replace('-','') fly_no_dashes = fly_local_align.replace('-','') global_matrix_human_consensus = project4.compute_alignment_matrix(human_no_dashes, consensus, scoring_matrix,True) global_matrix_fly_consensus = project4.compute_alignment_matrix(fly_no_dashes,consensus, scoring_matrix, True) global_align_human_consensus = project4.compute_global_alignment(human_no_dashes,consensus,scoring_matrix,global_matrix_human_consensus) align_global_human = global_align_human_consensus[1] global_align_fly_consensus = project4.compute_global_alignment(fly_no_dashes, consensus,scoring_matrix,global_matrix_fly_consensus) align_global_fly = global_align_fly_consensus[1] count_human = 0 count_fly = 0 #print align_global_human #print align_global_fly #print consensus for pair in zip(align_global_human, consensus): if pair[0] == pair[1]: count_human += 1. for pair in zip(align_global_fly,consensus): if pair[0] == pair[1]: count_fly += 1. human_percentage = (count_human / len(align_global_human)) * 100 fly_percentage = (count_fly / len(align_global_fly)) * 100 print "human percentage: ", human_percentage print "fly percentage: ", fly_percentage
def edit_dist(xs, ys): ''' Helper function for Question 8 ''' alphabet = 'abcdefghijklmnopqrstuvwxyz' scoring = project4.build_scoring_matrix(alphabet, 2, 1, 0) align = project4.compute_alignment_matrix(xs, ys, scoring, True) score, x, y = project4.compute_global_alignment(xs, ys, scoring, align) return len(xs) + len(ys) - score
def question_2(): ''' To continue our investigation, we next consider the similarity of the two sequences in the local alignment computed in Question 1 to a third sequence. The file ConsensusPAXDomain contains a "consensus" sequence of the PAX domain; that is, the sequence of amino acids in the PAX domain in any organism. In this problem, we will compare each of the two sequences of the local alignment computed in Question 1 to this consensus sequence to determine whether they correspond to the PAX domain. ''' consensus = provided.read_protein(provided.CONSENSUS_PAX_URL) score, human_alignment, fruitfly_alignment = question_1() scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL) # Delete any dashes '-' present in the sequence human = human_alignment.replace('-', '') fruitfly = fruitfly_alignment.replace('-', '') # Compute the global alignment of this dash-less sequence with the # ConsensusPAXDomain sequence. alignment_matrix_human = project4.compute_alignment_matrix( human, consensus, scoring_matrix, True) human_global = project4.compute_global_alignment(human, consensus, scoring_matrix, alignment_matrix_human) alignment_matrix_fruitfly = project4.compute_alignment_matrix( fruitfly, consensus, scoring_matrix, True) fruitfly_global = project4.compute_global_alignment( fruitfly, consensus, scoring_matrix, alignment_matrix_fruitfly) # Compare corresponding elements of these two globally-aligned sequences # (local vs. consensus) and compute the percentage of elements in these two # sequences that agree. human_similarity = compute_similarity(human_global[1], human_global[2]) fruitfly_similarity = compute_similarity(fruitfly_global[1], fruitfly_global[2]) return 'Human:', human_similarity, 'Fruitfly:', fruitfly_similarity
def check_spelling(checked_word, dist, word_list): # scoring matrix for edit distaion # edit distance = |x| + |y| - score(X,Y) # diag_socre = 2, off_diag_score = 1, dash_score = 0 alphabets = set("abcdefghijklmnopqrstuvwxyz") scoring_matrix = project4.build_scoring_matrix(alphabets,2,1,0) string_set = set([]) for word in word_list: alignment_matrix = project4.compute_alignment_matrix(checked_word ,word, scoring_matrix, True) score, _, _ = project4.compute_global_alignment(checked_word, word, scoring_matrix, alignment_matrix) score = len(checked_word) + len(word) - score if score <= dist: string_set.add(word) return string_set
def calculate_similar_ratio(): result = align_human_fly_protein() sequence_human = result[1].replace('-', '') sequence_fly = result[2].replace('-', '') protein_consensus = provided.read_protein(provided.CONSENSUS_PAX_URL) alignment_matrix = project4.compute_alignment_matrix(sequence_human, protein_consensus, scoring_matrix, True) result = project4.compute_global_alignment(sequence_human, protein_consensus, scoring_matrix, alignment_matrix) mark = 0 for idx in range(len(result[1])): if result[1][idx] == result[2][idx]: mark += 1 print mark / float(len(result[1])) protein_consensus = provided.read_protein(provided.CONSENSUS_PAX_URL) alignment_matrix = project4.compute_alignment_matrix(sequence_fly, protein_consensus, scoring_matrix, True) result = project4.compute_global_alignment(sequence_fly, protein_consensus, scoring_matrix, alignment_matrix) mark = 0 for idx in range(len(result[1])): if result[1][idx] == result[2][idx]: mark += 1 print mark / float(len(result[1]))
def pax_domain(scoring_matrix, local_alignment): """ Compare the local alignments of human and drosophila eyeless proteins to the consesus PAX domain by computing a global alignment. Return a tuple of percentages: one for human vs consensus, one for drosophila vs consesus, each of which reports how many AAs are the same. """ # load consesus pax domain pax = read_protein(CONSENSUS_PAX_URL) # remove dashes from local alignemnts (human and drosophila) human = re.sub('-', '', local_alignment[1]) drosophila = re.sub('-', '', local_alignment[2]) # compute global alignment for dash-less local alignments vs consesus human_pax_matrix = seq.compute_alignment_matrix(human, pax, scoring_matrix) human_pax = seq.compute_global_alignment(human, pax, scoring_matrix, human_pax_matrix) drosophila_pax_matrix = seq.compute_alignment_matrix(drosophila, pax, scoring_matrix) drosophila_pax = seq.compute_global_alignment(drosophila, pax, scoring_matrix, drosophila_pax_matrix) # compute counts of elements that agree in the two global alignments n_human_pax = len(human_pax[1]) count_human_pax = 0.0 for aa in range(n_human_pax): if human_pax[1][aa] == human_pax[2][aa]: count_human_pax += 1 n_drosophila_pax = len(drosophila_pax[1]) count_drosophila_pax = 0.0 for aa in range(n_drosophila_pax): if drosophila_pax[1][aa] == drosophila_pax[2][aa]: count_drosophila_pax +=1 # return proportion of agreement for two global alignments return (count_human_pax / n_human_pax, count_drosophila_pax / n_drosophila_pax)
def check_spelling(checked_word, dist, word_list): """ Function for Question 8 """ # we should do some pre-processing with the word_list # only consider the words that has length between |checked_word| +- dist # (2) maybe should not consider the words that have letters not existed # in the checked_word #word_list_new = [] #for each_word in word_list: # if (len(each_word) >= (len(checked_word) - dist)) and (len(each_word) <= (len(checked_word) + dist)): # word_list_new.append(each_word) alphabet = set([ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' ]) #print len(alphabet) if (checked_word in word_list): return checked_word score_matrix = project4.build_scoring_matrix(alphabet, 2, 1, 0) words = [] # build a set of chars in checked_word # I can even use a dictionary to check against the number of chars, it # would be more effective checked_word_chars = set(checked_word) num_checks = 0 for each_word in word_list: each_word_chars = set(each_word) num_diffs = 0 for char in each_word_chars: if char not in checked_word_chars: num_diffs += 1 if (len(each_word) >= (len(checked_word) - dist)) and (len(each_word) <= (len(checked_word) + dist) and num_diffs <= 2): align_matrix = project4.compute_alignment_matrix( checked_word, each_word, score_matrix, True) result = project4.compute_global_alignment(checked_word, each_word, score_matrix, align_matrix) if ((len(checked_word) + len(each_word) - result[0]) <= dist): words.append(each_word) num_checks += 1 print num_checks return words
def question7(): """ Question 7 """ alphabet = set(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']) #print len(alphabet) score_matrix = project4.build_scoring_matrix(alphabet, 2, 1, 0) test1 = 'abcde' test2 = 'xycdefg' align_matrix = project4.compute_alignment_matrix(test1, test2, score_matrix, True) result = project4.compute_global_alignment(test1, test2, score_matrix, align_matrix) print test1 print test2 print result print len(test1) + len(test2) - result[0]
def check_spelling(checked_word, dist, word_list): """ Returns a set of words from word_list that are dist edit distance from checked_word """ alphabet = set(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']) candidates = set([]) for word in word_list: smtrx = seq.build_scoring_matrix(alphabet, 2, 1, 0) amtrx = seq.compute_alignment_matrix(checked_word, word, smtrx) score = seq.compute_global_alignment(checked_word, word, smtrx, amtrx)[0] if len(checked_word) + len(word) - score <= dist: candidates.add(word) return candidates
def calculate_edit_distance(xseq, yseq): ''' Return the edit distance of xseq and yseq http://en.wikipedia.org/wiki/Edit_distance ''' alphabet = set(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '-']) scoring_matrix = project4.build_scoring_matrix(alphabet, 2, 1, 0) global_alignment_matrix = project4.compute_alignment_matrix(xseq, yseq, scoring_matrix, True) global_alignment = project4.compute_global_alignment(xseq, yseq, scoring_matrix,global_alignment_matrix) edit_distance = len(xseq) + len(yseq) - global_alignment[0] #print global_alignment # print edit_distance return edit_distance
def check_spelling(checked_word, dist, word_list): """ Function for Question 8 """ # we should do some pre-processing with the word_list # only consider the words that has length between |checked_word| +- dist # (2) maybe should not consider the words that have letters not existed # in the checked_word #word_list_new = [] #for each_word in word_list: # if (len(each_word) >= (len(checked_word) - dist)) and (len(each_word) <= (len(checked_word) + dist)): # word_list_new.append(each_word) alphabet = set(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']) #print len(alphabet) if (checked_word in word_list): return checked_word score_matrix = project4.build_scoring_matrix(alphabet, 2, 1, 0) words = [] # build a set of chars in checked_word # I can even use a dictionary to check against the number of chars, it # would be more effective checked_word_chars = set(checked_word) num_checks = 0 for each_word in word_list: each_word_chars = set(each_word) num_diffs = 0 for char in each_word_chars: if char not in checked_word_chars: num_diffs += 1 if (len(each_word) >= (len(checked_word) - dist)) and (len(each_word) <= (len(checked_word) + dist) and num_diffs <= 2): align_matrix = project4.compute_alignment_matrix(checked_word, each_word, score_matrix, True) result = project4.compute_global_alignment(checked_word, each_word, score_matrix, align_matrix) if ((len(checked_word) + len(each_word) - result[0]) <= dist): words.append(each_word) num_checks += 1 print num_checks return words
def check_spelling(check_word, dist, word_list): """ check spelling of check_word :param check_word: word to check :param dist: edit distance :param word_list: list of wrod (dictionary) :return: set of words from word_list that has the distance of 'dist' from check_word """ result =[] alphabet = list(string.ascii_lowercase) score_matrix = student.build_scoring_matrix(alphabet, 2, 1, 0) for each in word_list: alignment_matrix = student.compute_alignment_matrix(each, check_word, score_matrix, True) global_align = student.compute_global_alignment(each, check_word, score_matrix, alignment_matrix) distance = len(each)+len(check_word)-global_align[0] if distance <= dist: result.append(each) return result
def question7(): """ Question 7 """ alphabet = set([ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' ]) #print len(alphabet) score_matrix = project4.build_scoring_matrix(alphabet, 2, 1, 0) test1 = 'abcde' test2 = 'xycdefg' align_matrix = project4.compute_alignment_matrix(test1, test2, score_matrix, True) result = project4.compute_global_alignment(test1, test2, score_matrix, align_matrix) print test1 print test2 print result print len(test1) + len(test2) - result[0]
def check_spelling(checked_word, dist, word_list): """ Iterates through word_list and returns the set of all words that are within edit distance dist of the string checked_word. """ ans = set([]) scoring_matrix = project4.build_scoring_matrix( 'abcdefghijklmnopqrstuvwxyz', 2, 1, 0) checked_word_length = len(checked_word) for word in word_list: word_length = len(word) alignment_matrix = project4.compute_alignment_matrix( checked_word, word, scoring_matrix, True) global_score = project4.compute_global_alignment( checked_word, word, scoring_matrix, alignment_matrix) edit_dist = checked_word_length + word_length - global_score[0] if edit_dist <= dist: ans.add(word) return ans
def check_spelling(checked_word, dist, word_list): """ Iterates through word_list and returns the set of all words that are within edit distance dist of the string checked_word. Parameters ---------- checked_word: str the word to be checked dist: int the edit distance word_list: list a list of words Returns ------- result: list the list of words that are within edit distance of the checked_word. """ alphabets = "abcdefghijklmnopqrstuvwxyz" score_mat = build_scoring_matrix(alphabets, 2, 1, 0) result = [] for word in word_list: align_mat = compute_alignment_matrix(checked_word, word, score_mat, True) score = compute_global_alignment(checked_word, word, score_mat, align_mat)[0] current_dist = len(checked_word) + len(word) - score if current_dist <= dist: result.append(word) return result
def edit_distance(seq_x, seq_y): alphabet = string.ascii_lowercase scoring_matrix = project4.build_scoring_matrix(alphabet, 2, 1, 0) alignment_matrix = project4.compute_alignment_matrix(seq_x, seq_y, scoring_matrix,True) score = project4.compute_global_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix) return len(seq_x) + len(seq_y) - score[0]
HUMAN_EYELESS_PROTEIN = read_protein(HUMAN_EYELESS_URL) FRUITFLY_EYELESS_PROTEIN = read_protein(FRUITFLY_EYELESS_URL) PAM50_SCORING_MATRIX = read_scoring_matrix(PAM50_URL) CONSENSUS_PAX = read_protein(CONSENSUS_PAX_URL) PAM50_ALIGNMENT_MATRIX = student.compute_alignment_matrix( HUMAN_EYELESS_PROTEIN, FRUITFLY_EYELESS_PROTEIN, PAM50_SCORING_MATRIX, True) SEQ_A = 'HSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATPEVVSKIAQYKRECPSIFAWEIRDRLLSEGVCTNDNIPSVSSINRVLRNLASEKQQ' SEQ_B = 'HSGVNQLGGVFVGGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATAEVVSKISQYKRECPSIFAWEIRDRLLQENVCTNDNIPSVSSINRVLRNLAAQKEQQ' print CONSENSUS_PAX (SCORE1, CONSENSUS_PAX1, SEQ_A1) = student.compute_global_alignment(CONSENSUS_PAX, SEQ_A, PAM50_SCORING_MATRIX, PAM50_ALIGNMENT_MATRIX) (SCORE2, CONSENSUS_PAX2, SEQ_B2) = student.compute_global_alignment(CONSENSUS_PAX, SEQ_B, PAM50_SCORING_MATRIX, PAM50_ALIGNMENT_MATRIX) print SEQ_A1 print CONSENSUS_PAX1 print SEQ_B2 print CONSENSUS_PAX2 Percentage1 = 0.0 Percentage2 = 0.0 for dummy_x in xrange(len(SEQ_A1)): if CONSENSUS_PAX1[dummy_x] == SEQ_A1[dummy_x]: Percentage1 = Percentage1 + 1.0
seq_x, seq_y, scoring_matrix, alignment_matrix) print string_Hu newstring_Hu = "" for elem in string_Hu: if elem != '-': newstring_Hu += elem print newstring_Hu newstring_Fr = "" for elem in string_Fr: if elem != '-': newstring_Fr += elem alignment_matrix_Hum_local_Con = student.compute_alignment_matrix( newstring_Hu, consensusseq, scoring_matrix, True) score1, str_Hu_Con, str_Con_Hu = student.compute_global_alignment( newstring_Hu, consensusseq, scoring_matrix, alignment_matrix_Hum_local_Con) alignment_matrix_Fr_local_Con = student.compute_alignment_matrix( newstring_Fr, consensusseq, scoring_matrix, True) score2, str_Fr_Con, str_Con_Fr = student.compute_global_alignment( newstring_Fr, consensusseq, scoring_matrix, alignment_matrix_Fr_local_Con) def cal_percentage(str1, str2): count = 0 num = len(str1) for i in range(num): if str1[i] == str2[i]: count += 1 return float(count) / num
def agreement(xs, ys, scoring, alignmnet): _, x, _ = compute_global_alignment(xs, ys, scoring, alignmnet) similarity = [1. for (a, b) in zip(x, ys) if a == b] #??? balta2ar wrong? Not Wrong! return 100. * len(similarity) / len(x)