def q2(): def q2_helper(str1, str2): if len(str1) != len(str2): return 0 else: ctr = 0 for char in range(len(str1)): if str1[char] == str2[char]: ctr += 1 return (ctr * 1.0 / len(str1)) * 100 human = data.read_protein(data.HUMAN_EYELESS_URL) fly = data.read_protein(data.FRUITFLY_EYELESS_URL) scores = data.read_scoring_matrix(data.PAM50_URL) c_pax = data.read_protein(data.CONSENSUS_PAX_URL) # get local alignment of human and fly a_matrix = soln.compute_alignment_matrix(human, fly, scores, False) l_score, l_h, l_ff = soln.compute_local_alignment(human, fly, scores, a_matrix) # removing the dashes l_h = l_h.replace("-", "") l_ff = l_ff.replace("-", "") # get global alignment matrix for each local string and pax pax_a_h_matrix = soln.compute_alignment_matrix(l_h, c_pax, scores, True) pax_a_ff_matrix = soln.compute_alignment_matrix(l_ff, c_pax, scores, True) # compute global alignment h_ga = soln.compute_global_alignment(l_h, c_pax, scores, pax_a_h_matrix) ff_ga = soln.compute_global_alignment(l_ff, c_pax, scores, pax_a_ff_matrix) print "human:\t\t", q2_helper(h_ga[1], h_ga[2]) print "fruit fly:\t", q2_helper(ff_ga[1], ff_ga[2])
def q7(): x = "ABCABC" y = "ABCABC" sm = soln.build_scoring_matrix("ABC-", 2, 1, 0) print sm # global am = soln.compute_alignment_matrix(x, y, sm, True) print soln.compute_global_alignment(x, y, sm, am)
def q2_solution(): consensus = read_protein(CONSENSUS_PAX_URL) #print "Consensus PAX domain: \n", consensus #print "\nLength of census PAX domain", len(consensus) local_alignment_human = alignment[1] local_alignment_human_no_dashes = local_alignment_human.replace("-", "") print "\nLocal_alignment_1: \n", local_alignment_human_no_dashes alignment_matrix1 = p.compute_alignment_matrix( consensus, local_alignment_human_no_dashes, scoring_matrix, True) global_alignment_consensus_vs_human = p.compute_global_alignment( consensus, local_alignment_human_no_dashes, scoring_matrix, alignment_matrix1) print "\nGlobal alignment consensus vs human:\n", global_alignment_consensus_vs_human global_human1 = global_alignment_consensus_vs_human[1] global_human2 = global_alignment_consensus_vs_human[2] num_agree1 = 0 for indx in range(len(global_human1)): if global_human1[indx] == global_human2[indx]: num_agree1 += 1 print "Pencentage of ageeing letters in global alignment of local human VS consensus: {}%".format( num_agree1 / float(len(global_human1)) * 100) local_alignment_fruitfly = alignment[2] local_alignment_fruitfly_no_dashes = local_alignment_fruitfly.replace( "-", "") print "\nLocal_alignment_2:\n", local_alignment_fruitfly_no_dashes alignment_matrix2 = p.compute_alignment_matrix( consensus, local_alignment_fruitfly_no_dashes, scoring_matrix, True) global_alignment_consensus_vs_fruitfly = p.compute_global_alignment( consensus, local_alignment_fruitfly_no_dashes, scoring_matrix, alignment_matrix2) print "\nGlobal alignment consensus vs chimp:\n", global_alignment_consensus_vs_fruitfly global_fruitfly1 = global_alignment_consensus_vs_fruitfly[1] global_fruitfly2 = global_alignment_consensus_vs_fruitfly[2] num_agree2 = 0 for indx in range(len(global_fruitfly1)): if global_fruitfly1[indx] == global_fruitfly2[indx]: num_agree2 += 1 print "Pencentage of ageeing letters in global alignment of local fruitfly VS consensus: {}%".format( num_agree2 / float(len(global_fruitfly1)) * 100)
def q1(): human = data.read_protein(data.HUMAN_EYELESS_URL) fly = data.read_protein(data.FRUITFLY_EYELESS_URL) scores = data.read_scoring_matrix(data.PAM50_URL) a_matrix = soln.compute_alignment_matrix(human, fly, scores, False) print soln.compute_local_alignment(human, fly, scores, a_matrix) a_matrix = soln.compute_alignment_matrix(human, fly, scores, True) print soln.compute_global_alignment(human, fly, scores, a_matrix) # local answer b = (875, 'HSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATPEVVSKIAQYKRECPSIFAWEIRDRLLSEGVCTNDNIPSVSSINRVLRNLASEK-QQ', 'HSGVNQLGGVFVGGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATAEVVSKISQYKRECPSIFAWEIRDRLLQENVCTNDNIPSVSSINRVLRNLAAQKEQQ') # global answer a = (4, 'MQN--------------------------------------S--------------HSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATPEVVSKIAQYKRECPSIFAWEIRDRLLSEGVCTNDNIPSVSSINRVLRNLASEK-QQ--------------------------------------------M------------GA----DG-----MYDKLRMLN-------G--Q----T---G-S---WGTR---P----G------------W----YPG----T--------------SV---------------P---------G-Q---P--T-------Q-DGCQQ-QE-G-G-GENTNSISSN-GEDSDEAQMRLQLKRKLQRNRTSFTQEQIEALEKEFERTHYPDVFARERLAAKIDLPEARIQVWFSNRRAKWRREEKLRNQRR--Q-----A-----S---N-T--P------SH-I------P----I---SS-S-FSTSVYQP-----I--PQ-PT-TP-V-SSFTSGSMLGR-T-D-----T--AL-T----NT-Y--S-------AL-P---P-M---P-SF-TM-AN--N--LPM-Q------P-P------V-----PS----Q---T-SS-YSC-M-L---PTSPS----V--N-GR--------------------S-YD--T-YT--PPHM------Q-------------T--H-M--NS-Q-P-MGTS--GTT-STGL----ISPGV-S---V----P--VQ-V-P----G-S---EPDMSQ------YWPRLQ', 'MRNLPCLGTAGGSGLGGIAGKPSPTMEAVEASTASHPHSTSSYFATTYYHLTDDECHSGVNQLGGVFVGGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATAEVVSKISQYKRECPSIFAWEIRDRLLQENVCTNDNIPSVSSINRVLRNLAAQKEQQSTGSGSSSTSAGNSISAKVSVSIGGNVSNVASGSRGTLSSSTDLMQTATPLNSSESGGASNSGEGSEQEAIYEKLRLLNTQHAAGPGPLEPARAAPLVGQSPNHLGTRSSHPQLVHGNHQALQQHQQQSWPPRHYSGSWYPTSLSEIPISSAPNIASVTAYASGPSLAHSLSPPNDIESLASIGHQRNCPVATEDIHLKKELDG-HQSDETGSGEGENSNGGASNIG-NTEDDQARLILKRKLQRNRTSFTNDQIDSLEKEFERTHYPDVFARERLAGKIGLPEARIQVWFSNRRAKWRREEKLRNQRRTPNSTGASATSSSTSATASLTDSPNSLSACSSLLSGSAGGPSVSTINGLSSPSTLSTNVNAPTLGAGIDSSESPTPIPHIRPSCTSDNDNGRQSEDCRRVCSPCPLGVGGHQNTHHIQSNGHAQGHALVPAISPRLNFNSGSFGAMYSNMHHTALSMSDSYGAVTPIPSFNHSAVGPLAPPSPIPQQGDLTPSSLYPCHMTLRPPPMAPAHHHIVPGDGGRPAGVGLGSGQSANLGASCSGSGYEVLSAYALPPPPMASSSAADSSFSAASSASANVTPHHTIAQESCPSPCSSASHFGVAHSSGFSSDPISPAVSSYAHMSYNYASSANTMTPSSASGTSAHVAPGKQQFFASCFYSPWV-')
def check_spelling(checked_word, dist, word_list): answer = set() letters = list("qwertyuiopasdfghjklzxcvbnm") scor_matrix = p.build_scoring_matrix(letters, 2, 1, 0) for word in word_list: align_matrix = p.compute_alignment_matrix(checked_word, word, scor_matrix, True) score = p.compute_global_alignment(checked_word, word, scor_matrix, align_matrix)[0] edit_distance = len(word) + len(checked_word) - score if edit_distance <= dist: answer.add(word) return answer
def q7(): seq_x = "kqistian" seq_y = "kristian" scor_matrix = p.build_scoring_matrix( ["a", "b", "e", "k", "q", "t", "r", "i", "t", "n", "s"], 2, 1, 0) #print scor_matrix #print "" align_matrix = p.compute_alignment_matrix(seq_x, seq_y, scor_matrix, True) print align_matrix score = p.compute_global_alignment(seq_x, seq_y, scor_matrix, align_matrix) print score #q7() #Question 8 """
def check_spelling(checked_word, dist, word_list): wordlist = [] word_len = len(checked_word) # global for word in word_list: am = soln.compute_alignment_matrix(checked_word, word, word_sm, True) g_al, dummy_x, dummy_y = soln.compute_global_alignment(checked_word, word, word_sm, am) score = word_len + len(word) - g_al if score <= dist: wordlist.append(word) print word, score return wordlist