def question2(): scoring_matrix =read_scoring_matrix(PAM50_URL) human = read_protein(HUMAN_EYELESS_URL) fly = read_protein(FRUITFLY_EYELESS_URL) # for question 3 # acids = 'ACBEDGFIHKMLNQPSRTWVYXZ' # hlen = len(human) # flen = len(fly) # human_random, fly_random = '', '' # for dummy_i in xrange(hlen): # human_random = human_random + human[random.randint(1,23)] # for dummy_i in xrange(flen): # fly_random = fly_random + fly[random.randint(1,23)] # human = human_random # fly = fly_random consensusPAX = read_protein(CONSENSUS_PAX_URL) alignment_matrix = student.compute_alignment_matrix(human, fly, scoring_matrix, False) local_result = student.compute_local_alignment(human, fly, scoring_matrix, alignment_matrix) local_human = ''.join(local_result[1].split('-')) local_fly = ''.join(local_result[2].split('-')) human_P = student.compute_alignment_matrix(local_human,consensusPAX,scoring_matrix, True) human_result = student.compute_global_alignment(local_human,consensusPAX, scoring_matrix, human_P) fly_P = student.compute_alignment_matrix(local_fly,consensusPAX, scoring_matrix, True) fly_result = student.compute_global_alignment(local_fly,consensusPAX, scoring_matrix, fly_P) total = len(consensusPAX) human_count, fly_count =0, 0 for dummy_i in xrange(total): if human_result[1][dummy_i] == human_result[2][dummy_i]: human_count += 1 if fly_result[1][dummy_i] ==fly_result[2][dummy_i]: fly_count += 1 print human_count * 1.0 / total print fly_count * 1.0 / total
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): """ null distribution generator """ scoring_distribution = {} for dummy_trial in range(num_trials): y_index = range(len(seq_y)) # shuffle the y sequence random.shuffle(y_index) rand_y = "" for index in y_index: rand_y += seq_y[index] # compute local alignment matrix align_matrix = student.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False) # compute local alignment score score, x_align, y_align = student.compute_local_alignment(seq_x, rand_y, scoring_matrix, align_matrix) if scoring_distribution.has_key(score): scoring_distribution[score] += 1 else: scoring_distribution[score] = 1 return scoring_distribution
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): """ input : two sequences, scoring matrix and number of trial output : dictionary of scoring_distribution """ dict = {} test = 1 seq_list = list(seq_y) while test < num_trials: # shuffle seq y random.shuffle(seq_list) rand_y = ''.join(seq_list) # local alignment of seq_x and rand_y alignment_matrix = student.compute_alignment_matrix( seq_x, rand_y, scoring_matrix, False) local_alignment = student.compute_local_alignment( seq_x, rand_y, scoring_matrix, alignment_matrix) # add score to dict dict[test] = local_alignment[0] # update test time test += 1 return dict
def run_q1(): seq_x = read_protein(HUMAN_EYELESS_URL) seq_y = read_protein(FRUITFLY_EYELESS_URL) scoring_matrix = read_scoring_matrix(PAM50_URL) alignment_matrix = student.compute_alignment_matrix( seq_x, seq_y, scoring_matrix, False) return student.compute_local_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix)
def check_spelling(checked_word, dist, word_list): alphabet = set('abcdefghijklmnopqrstuvwxyz') scoring_matrix = student.build_scoring_matrix(alphabet, 2, 1, 0) result = list() for word in word_list: align = student.compute_alignment_matrix(checked_word, word, scoring_matrix, True) scores = student.compute_global_alignment(checked_word, word, scoring_matrix, align) if (len(checked_word) + len(word) - scores[0]) <= dist: result.append(word) return result
def check_spelling(checked_word, dist, word_list): answer = [] for word in word_list: alignment_matrix = student.compute_alignment_matrix( checked_word, word, scoring_matrix, True) result = student.compute_local_alignment(checked_word, word, scoring_matrix, alignment_matrix) if (len(checked_word) + len(word) - result[0]) <= dist: answer.append(word) return answer
def question2(): """ determine global alignment of consensusPAX with local human and frfly sequences """ # load sequences and scoring matrix score_matrix = read_scoring_matrix(PAM50_URL) human_seq = "HSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATPEVVSKIAQYKRECPSIFAWEIRDRLLSEGVCTNDNIPSVSSINRVLRNLASEKQQ" frfly_seq = "HSGVNQLGGVFVGGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATAEVVSKISQYKRECPSIFAWEIRDRLLQENVCTNDNIPSVSSINRVLRNLAAQKEQQ" consensus_pax = read_protein(CONSENSUS_PAX_URL) # compute human and fruitfly global alignment matrix with consensus pax human_align_matrix = student.compute_alignment_matrix(human_seq, consensus_pax, score_matrix, True) frfly_align_matrix = student.compute_alignment_matrix(frfly_seq, consensus_pax, score_matrix, True) # compute human and fruitfly global alignment sequences score_human, human_align, consensus_align = student.compute_global_alignment(human_seq, consensus_pax, score_matrix, human_align_matrix) score_fly, frfly_align, consensus_align_2 = student.compute_global_alignment(frfly_seq, consensus_pax, score_matrix, frfly_align_matrix) # compute percentages match for human and fruitfly human_count = 0.0 for index in range(len(human_align)): if human_align[index] == consensus_align[index]: human_count += 1 frfly_count = 0.0 for index in range(len(frfly_align)): if frfly_align[index] == consensus_align_2[index]: frfly_count += 1 print "% Human: " + str(human_count / len(human_align) * 100) print "Hmn: " + human_align print "PAX: " + consensus_align print "" print "% FrFly: " + str(frfly_count / len(frfly_align) * 100) print "Fly: " + frfly_align print "PAX: " + consensus_align_2
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): scoring_distribution = {} rand_y = list(seq_y[:]) for trial in range(num_trials): random.shuffle(rand_y) alignment_matrix = student.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False) score = student.compute_local_alignment(seq_x, rand_y, scoring_matrix, alignment_matrix)[0] if score in scoring_distribution: scoring_distribution[score] += 1 else: scoring_distribution[score] = 1 return scoring_distribution
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): scoring_distribution = dict() for num in xrange(num_trials): rand_y = ''.join(random.sample(seq_y, len(seq_y))) align = student.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False) result = student.compute_local_alignment(seq_x, rand_y, scoring_matrix, align) score = result[0] if score not in scoring_distribution: scoring_distribution[score] = 1 else: scoring_distribution[score] += 1 return scoring_distribution
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): from collections import defaultdict scoring_distribution = defaultdict(int) for trial in range(num_trials): chars = list(seq_y) random.shuffle(chars) rand_y = ''.join(chars) local_alignment_matrix = student.compute_alignment_matrix( seq_x, rand_y, scoring_matrix, False) score = student.compute_local_alignment(seq_x, rand_y, scoring_matrix, local_alignment_matrix)[0] scoring_distribution[score] += 1 return scoring_distribution
def q3() : len_gen = len(seq_x) seq_x = [] seq_y = [] for _ in range(len_gen) : seq_x.append(random.choice("ACBEDGFIHKMLNQPSRTWVYXZ")) seq_y.append(random.choice("ACBEDGFIHKMLNQPSRTWVYXZ")) alignment_matrix = student.compute_alignment_matrix(seq_x, seq_y, scoring_matrix, False) score, human_aligen, fruit_aligen = student.compute_local_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix) print score print human_aligen.replace('-', '') print fruit_aligen human_aligen = human_aligen.replace('-', '') fruit_aligen = fruit_aligen.replace('-', '') pax = read_protein(CONSENSUS_PAX_URL) alignment_matrix = student.compute_alignment_matrix(human_aligen, pax, scoring_matrix, False) score, h1, h2 = student.compute_global_alignment(human_aligen, pax, scoring_matrix, alignment_matrix) print len(h1), len(h2) same = 0 for i in range(len(h1)) : if h1[i] == h2[i] : same += 1 print same * 1.0 / len(h1) alignment_matrix = student.compute_alignment_matrix(fruit_aligen, pax, scoring_matrix, False) score, f1, f2 = student.compute_global_alignment(fruit_aligen, pax, scoring_matrix, alignment_matrix) print len(f1), len(f2) same = 0 for i in range(len(f1)) : if f1[i] == f2[i] : same += 1 print same * 1.0 / len(f1)
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): scoring_distribution = {} rand_y = list(seq_y[:]) for trial in range(num_trials): random.shuffle(rand_y) alignment_matrix = student.compute_alignment_matrix( seq_x, rand_y, scoring_matrix, False) score = student.compute_local_alignment(seq_x, rand_y, scoring_matrix, alignment_matrix)[0] if score in scoring_distribution: scoring_distribution[score] += 1 else: scoring_distribution[score] = 1 return scoring_distribution
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): scoring_distribution = {} list_y = list(seq_y) for trial in range(num_trials): temp_y = list_y random.shuffle(temp_y) rand_y = ''.join(temp_y) alignment_matrix = student.compute_alignment_matrix( seq_x, rand_y, scoring_matrix, False) score, _, _ = student.compute_local_alignment(seq_x, rand_y, scoring_matrix, alignment_matrix) scoring_distribution[score] = scoring_distribution.get(score, 0) + 1 return scoring_distribution
def run_q2(origin_seq_x): seq_x = origin_seq_x.replace('-', '') seq_y = 'GHGGVNQLGGVFVNGRPLPDVVRQRIVELAHQGVRPCDISRQLRVSHGCVSKILGRYYETGSIKPGVIGGSKPKVATPKVVEKIAEYKRQNPTMFAWEIRDRLLAERVCDNDTVPSVSSINRIIR' scoring_matrix = read_scoring_matrix(PAM50_URL) alignment_matrix = student.compute_alignment_matrix(seq_x, seq_y, scoring_matrix, True) score, aglin_x, aglin_y = student.compute_global_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix) assert len(aglin_x) == len(aglin_y) length = len(aglin_y) match = 0 print (len(seq_x), len(seq_y), len(aglin_x) , len(aglin_y)) for idx in range(length): if aglin_x[idx] == aglin_y[idx]: match += 1 return match * 1.0 / length
def check_spelling(checked_word, dist, word_list): """ input: word, target distance, and word list output: return a subset of word list which the distance between input word < target distance """ result = set() x = len(checked_word) for item in word_list: y = len(item) if abs(x - y) <= dist: alignment_matrix = student.compute_alignment_matrix(checked_word, item, scoring_matrix, True) score = max(map(max, alignment_matrix)) if (x + y - score) <= dist: result = result.union(set([item])) return result
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials) : score_distribution = {} seq_y = list(seq_y) for count in range(num_trials) : print count random.shuffle(seq_y) alignment_matrix = student.compute_alignment_matrix(seq_x, seq_y, scoring_matrix, False) score, _, _ = student.compute_local_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix) if score not in score_distribution : score_distribution[score] = 0 score_distribution[score] += 1 return score_distribution
def run_q2(origin_seq_x): seq_x = origin_seq_x.replace('-', '') seq_y = 'GHGGVNQLGGVFVNGRPLPDVVRQRIVELAHQGVRPCDISRQLRVSHGCVSKILGRYYETGSIKPGVIGGSKPKVATPKVVEKIAEYKRQNPTMFAWEIRDRLLAERVCDNDTVPSVSSINRIIR' scoring_matrix = read_scoring_matrix(PAM50_URL) alignment_matrix = student.compute_alignment_matrix( seq_x, seq_y, scoring_matrix, True) score, aglin_x, aglin_y = student.compute_global_alignment( seq_x, seq_y, scoring_matrix, alignment_matrix) assert len(aglin_x) == len(aglin_y) length = len(aglin_y) match = 0 print(len(seq_x), len(seq_y), len(aglin_x), len(aglin_y)) for idx in range(length): if aglin_x[idx] == aglin_y[idx]: match += 1 return match * 1.0 / length
def check_spelling(checked_word, dist, word_list): """ input: word, target distance, and word list output: return a subset of word list which the distance between input word < target distance """ result = set() x = len(checked_word) for item in word_list: y = len(item) if abs(x - y) <= dist: alignment_matrix = student.compute_alignment_matrix( checked_word, item, scoring_matrix, True) score = max(map(max, alignment_matrix)) if (x + y - score) <= dist: result = result.union(set([item])) return result
def question7(seq_x, seq_y): """ determine scoring matrix of edit distance algorithm """ diag_score = 2 off_diag_score = 1 dash_score = 0 alphabet = "abcdefghijklmnopqrstuvwxyz" score_matrix = student.build_scoring_matrix(alphabet, diag_score, off_diag_score, dash_score) align_matrix = student.compute_alignment_matrix(seq_x, seq_y, score_matrix, True) score, align_x, align_y = student.compute_global_alignment(seq_x, seq_y, score_matrix, align_matrix) edit_distance = len(seq_x) + len(seq_y) - score print "Edit distance: " + str(edit_distance) print align_x print align_y
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): """ return a dictionary scoring_distribution that represents an un-normalized distribution """ distribution = {} for trial in range(num_trials): start = time.time() rand_y = list(seq_y) random.shuffle(rand_y) rand_y = ''.join(rand_y) alignment_matrix = alg_project4_solution.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False) alignment = alg_project4_solution.compute_local_alignment(seq_x, rand_y, scoring_matrix, alignment_matrix) score = alignment[0] if score in distribution: distribution[score] += 1 else: distribution[score] = 1 return distribution
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trial): scoring_distribution = dict() for dummy_idx in range(num_trial): rand_y = list(seq_y) random.shuffle(rand_y) rand_y = ''.join(rand_y) alignment_matrix = student.compute_alignment_matrix( seq_x, rand_y, scoring_matrix, False) result = student.compute_local_alignment(seq_x, rand_y, scoring_matrix, alignment_matrix) score = result[0] if scoring_distribution.has_key(score): scoring_distribution[score] += 1 else: scoring_distribution[score] = 1 return scoring_distribution
def check_spelling(checked_word, dist, word_list): diag_score = 2 off_diag_score = 1 dash_score = 0 chars = 'abcdefghijklmnopqrstuvwxyz' alphabet = set([char for char in chars]) len_checkedword = len(checked_word) scoring_matrix = student.build_scoring_matrix(alphabet, diag_score, off_diag_score, dash_score) similar_word_list = [] for word in word_list: global_alignment_matrix = student.compute_alignment_matrix( checked_word, word, scoring_matrix, True) global_alignment_score = student.compute_global_alignment( checked_word, word, scoring_matrix, global_alignment_matrix)[0] edit_dist = len_checkedword + len(word) - global_alignment_score if edit_dist <= dist: similar_word_list.append(word) return similar_word_list
def check_spelling(checked_word, dist, word_list): """ input: word, target distance, and word list output: return a subset of word list which the distance between input word < target distance """ result = set() for item in word_list: alignment_matrix = student.compute_alignment_matrix( checked_word, item, scoring_matrix, True) global_alignment = student.compute_global_alignment( checked_word, item, scoring_matrix, alignment_matrix) """ print word_list[index] print alignment_matrix print global_alignment """ if (len(checked_word) + len(item) - global_alignment[0]) <= dist: result = result.union(set([item])) return result
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): """ return a dictionary scoring_distribution that represents an un-normalized distribution """ distribution = {} for trial in range(num_trials): start = time.time() rand_y = list(seq_y) random.shuffle(rand_y) rand_y = ''.join(rand_y) alignment_matrix = alg_project4_solution.compute_alignment_matrix( seq_x, rand_y, scoring_matrix, False) alignment = alg_project4_solution.compute_local_alignment( seq_x, rand_y, scoring_matrix, alignment_matrix) score = alignment[0] if score in distribution: distribution[score] += 1 else: distribution[score] = 1 return distribution
def question1(): """ determine local alignment of human and fruitfly eyeless protein """ # load sequences and scoring matrix score_matrix = read_scoring_matrix(PAM50_URL) human_eyeless = read_protein(HUMAN_EYELESS_URL) fruitfly_eyeless = read_protein(FRUITFLY_EYELESS_URL) # compute local alignment matrix align_matrix = student.compute_alignment_matrix(human_eyeless, fruitfly_eyeless, score_matrix, False) # compute local alignment score and sequences score, human_align, fruitfly_align = student.compute_local_alignment(human_eyeless, fruitfly_eyeless, score_matrix, align_matrix) print "Score: " + str(score) print "Human: " + human_align print "FrFly: " + fruitfly_align return
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): """ str, str, dict of dict, int -> dict Takes two sequences, a scoring matrix, and a number of trials, and returns a dictionary of unnormalized """ scoring_distribution = {} for trial in range(num_trials): list_y = list(seq_y) random.shuffle(list_y) rand_y = ''.join(list_y) alignment_matrix = student.compute_alignment_matrix( human, rand_y, scoring_matrix, False) alignment = student.compute_local_alignment(human, rand_y, scoring_matrix, alignment_matrix) score = alignment[0] if score in scoring_distribution: scoring_distribution[score] += 1 else: scoring_distribution[score] = 1 return scoring_distribution
def check_spelling(checked_word, dist, word_list): """ helper function to determine all words edit distance away """ diag_score = 2 off_diag_score = 1 dash_score = 0 alphabet = "abcdefghijklmnopqrstuvwxyz" score_matrix = student.build_scoring_matrix(alphabet, diag_score, off_diag_score, dash_score) words = [] for word in word_list: align_matrix = student.compute_alignment_matrix(checked_word, word, score_matrix, True) score, align_x, align_y = student.compute_global_alignment(checked_word, word, score_matrix, align_matrix) edit_distance = len(checked_word) + len(word) - score if edit_distance <= dist: words.append(word) return words
def check_spelling(checked_word, dist, word_list): """ input: iterates through word_list and returns the set of all words that are within edit distance dist of the string checked_word output: the set of all words that are within edit distance dist of the string checked_word """ result_set = set([]) diag_score = 2 off_diag_score = 1 dash_score = 0 alphabet = set('abcdefghijklmnopqrstuvwxyz') matrix_M = student.build_scoring_matrix(alphabet, diag_score, off_diag_score, dash_score) for word in word_list: matrix_S = student.compute_alignment_matrix(checked_word, word, matrix_M, True) global_align_word = student.compute_global_alignment(checked_word, word, matrix_M, matrix_S) if len(checked_word) + len(word) - global_align_word[0] <= dist: result_set.add(word) return result_set
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): """ input: two sequences, scoring matrix, number of trials. A trial is defined as: 1. Generate a random permutation rand_y of the sequence seq_y using random.shuffle(). 2. Compute the maximum value score for the local alignment of seq_x and rand_y using the score matrix scoring_matrix. 3. Increment the entry score in the dictionary scoring_distribution by one. output: a dictionary scoring_distribution that represents an un-normalized distribution """ scoring_distribution = {} for i in range(num_trials): rand_y = random.sample(seq_y, len(seq_y)) local_S = student.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False) local_alignment = student.compute_local_alignment(seq_x, rand_y, scoring_matrix, local_S) if local_alignment[0] in scoring_distribution: scoring_distribution[local_alignment[0]] += 1 else: scoring_distribution[local_alignment[0]] = 1 return scoring_distribution
def score(x, y): alignment_matrix = student.compute_alignment_matrix(x, y, scoring_matrix, True) return student.compute_global_alignment(x, y, scoring_matrix, alignment_matrix)[0]
def score(x, y): alignment_matrix = student.compute_alignment_matrix( x, y, scoring_matrix, True) return student.compute_global_alignment(x, y, scoring_matrix, alignment_matrix)[0]
def run_q1(): seq_x = read_protein(HUMAN_EYELESS_URL) seq_y = read_protein(FRUITFLY_EYELESS_URL) scoring_matrix = read_scoring_matrix(PAM50_URL) alignment_matrix = student.compute_alignment_matrix(seq_x, seq_y, scoring_matrix, False) return student.compute_local_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix)
words = word_file.read() # template lines and solution lines list of line string word_list = words.split('\n') print "Loaded a dictionary with", len(word_list), "words" return word_list # Q1 HumanEyelessProtein = read_protein(HUMAN_EYELESS_URL) FruitflyEyelessProtein = read_protein(FRUITFLY_EYELESS_URL) PAM50 = read_scoring_matrix(PAM50_URL) alignment_matrix_Q1 = student.compute_alignment_matrix(HumanEyelessProtein, FruitflyEyelessProtein, PAM50, False) result_Q1 = student.compute_local_alignment(HumanEyelessProtein, FruitflyEyelessProtein, PAM50, alignment_matrix_Q1) # Q2 TempHumanSeq = result_Q1[1] FruitflySeq = result_Q1[2] HumanSeq = TempHumanSeq[:len(TempHumanSeq) - 3] + TempHumanSeq[len(TempHumanSeq) - 2:] ConsensusPAXDomain = read_protein(CONSENSUS_PAX_URL) alignment_matrix_Q2_Human = student.compute_alignment_matrix(
# load assets word_file = open(filename) # read in files as string words = word_file.read() # template lines and solution lines list of line string word_list = words.split('\n') print "Loaded a dictionary with", len(word_list), "words" return word_list scoring_matrix = read_scoring_matrix(PAM50_URL) seq_x = read_protein(HUMAN_EYELESS_URL) seq_y = read_protein(FRUITFLY_EYELESS_URL) alignment_matrix = student.compute_alignment_matrix(seq_x, seq_y, scoring_matrix, False) score, human_aligen, fruit_aligen = student.compute_local_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix) print score exit() def q3() : len_gen = len(seq_x) seq_x = [] seq_y = [] for _ in range(len_gen) : seq_x.append(random.choice("ACBEDGFIHKMLNQPSRTWVYXZ")) seq_y.append(random.choice("ACBEDGFIHKMLNQPSRTWVYXZ")) alignment_matrix = student.compute_alignment_matrix(seq_x, seq_y, scoring_matrix, False) score, human_aligen, fruit_aligen = student.compute_local_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix)
def build_scoring_matrix(alphabet) : """ Make a matrix with diag_score, off_diag_score, dash_score """ matrix = dict() matrix['-'] = dict() matrix['-']['-'] = 0 for rows in alphabet : matrix[rows] = dict() matrix[rows]['-'] = -6 matrix['-'][rows] = -6 for cols in alphabet : if rows == cols : matrix[rows][cols] = 10 else : matrix[rows][cols] = 4 return matrix m = build_scoring_matrix(chset) s = student.compute_alignment_matrix("AA", "TAAT", m, False) print s[0][2] print s[2][0] print s[2][2] print s ali = student.compute_local_alignment("AA", "TAAT", m, s) print ali
# read in files as string words = word_file.read() # template lines and solution lines list of line string word_list = words.split('\n') print "Loaded a dictionary with", len(word_list), "words" return word_list # Question 1 ################################################################## HUMAN_EYELESS = read_protein(HUMAN_EYELESS_URL) FRUITFLY_EYELESS = read_protein(FRUITFLY_EYELESS_URL) SCORING_MATRIX = read_scoring_matrix(PAM50_URL) ALIGNMENT_MATRIX = student.compute_alignment_matrix(HUMAN_EYELESS, \ FRUITFLY_EYELESS,\ SCORING_MATRIX, False) student.compute_local_alignment(HUMAN_EYELESS, FRUITFLY_EYELESS,\ SCORING_MATRIX, ALIGNMENT_MATRIX) # Question 2 ################################################################## PAX = read_protein(CONSENSUS_PAX_URL) loc_score, loc_human, loc_fly = student.compute_local_alignment(HUMAN_EYELESS,\ FRUITFLY_EYELESS,\ SCORING_MATRIX,\ ALIGNMENT_MATRIX) for align in (loc_human, loc_fly): align = align.replace('-', '')
from matplotlib import pyplot PAM50_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_PAM50.txt" HUMAN_EYELESS_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_HumanEyelessProtein.txt" FRUITFLY_EYELESS_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_FruitflyEyelessProtein.txt" CONSENSUS_PAX_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_ConsensusPAXDomain.txt" WORD_LIST_URL = "http://storage.googleapis.com/codeskulptor-assets/assets_scrabble_words3.txt" # question 1 hep = alg_alignment.read_protein(HUMAN_EYELESS_URL) fep = alg_alignment.read_protein(FRUITFLY_EYELESS_URL) scoring_matrix = alg_alignment.read_scoring_matrix(PAM50_URL) hep_fep_local_alignment = alg_project4_solution.compute_local_alignment(hep, fep, scoring_matrix, alg_project4_solution.compute_alignment_matrix( hep, fep, scoring_matrix, False)) human_eyeless_fruitfly_local_alignment_score = hep_fep_local_alignment[0] # question 1 answer print "local alignment for human and fruitfly eyeless genome: " + str(hep_fep_local_alignment) # question 2 cpd = alg_alignment.read_protein(CONSENSUS_PAX_URL) hep_local_alignment = hep_fep_local_alignment[1] fep_local_alignment = hep_fep_local_alignment[2] hep_local_alignment_no_dashes = hep_local_alignment.replace('-', '') hep_no_dashes_cpd_global_alignment = alg_project4_solution.compute_global_alignment(hep_local_alignment_no_dashes, cpd, scoring_matrix, alg_project4_solution.compute_alignment_matrix( hep_local_alignment_no_dashes, cpd, scoring_matrix, True)) fep_local_alignment_no_dashes = fep_local_alignment.replace('-', '')
from matplotlib import pyplot PAM50_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_PAM50.txt" HUMAN_EYELESS_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_HumanEyelessProtein.txt" FRUITFLY_EYELESS_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_FruitflyEyelessProtein.txt" CONSENSUS_PAX_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_ConsensusPAXDomain.txt" WORD_LIST_URL = "http://storage.googleapis.com/codeskulptor-assets/assets_scrabble_words3.txt" # question 1 hep = alg_alignment.read_protein(HUMAN_EYELESS_URL) fep = alg_alignment.read_protein(FRUITFLY_EYELESS_URL) scoring_matrix = alg_alignment.read_scoring_matrix(PAM50_URL) hep_fep_local_alignment = alg_project4_solution.compute_local_alignment( hep, fep, scoring_matrix, alg_project4_solution.compute_alignment_matrix(hep, fep, scoring_matrix, False)) human_eyeless_fruitfly_local_alignment_score = hep_fep_local_alignment[0] # question 1 answer print "local alignment for human and fruitfly eyeless genome: " + str( hep_fep_local_alignment) # question 2 cpd = alg_alignment.read_protein(CONSENSUS_PAX_URL) hep_local_alignment = hep_fep_local_alignment[1] fep_local_alignment = hep_fep_local_alignment[2] hep_local_alignment_no_dashes = hep_local_alignment.replace('-', '') hep_no_dashes_cpd_global_alignment = alg_project4_solution.compute_global_alignment( hep_local_alignment_no_dashes, cpd, scoring_matrix,
def question1(): scoring_matrix =read_scoring_matrix(PAM50_URL) human = read_protein(HUMAN_EYELESS_URL) fly = read_protein(FRUITFLY_EYELESS_URL) alignment_matrix = student.compute_alignment_matrix(human, fly, scoring_matrix, False) print student.compute_local_alignment(human, fly, scoring_matrix, alignment_matrix)
def find_local_align(): score_matrix = read_scoring_matrix(PAM50_URL) seq_human = read_protein(HUMAN_EYELESS_URL) seq_fly = read_protein(FRUITFLY_EYELESS_URL) local_alignment_matrix = student.compute_alignment_matrix( seq_human, seq_fly, score_matrix, False) score, seq_loc_human, seq_loc_fly = student.compute_local_alignment( seq_human, seq_fly, score_matrix, local_alignment_matrix) length = len(seq_loc_fly) agree = 0 for idx in range(length): if seq_loc_fly[idx] == seq_loc_human[idx]: agree += 1 print 'Question 1:\n' print 'score:', score, '\nhuman:', seq_loc_human, '\nfly: ', seq_loc_fly print 'Agree percentage: %.2f' % (100 * float(agree) / length) """ Question 1: local alignment score: 875 human: HSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATPEVVSKIAQYKRECPSIFAWEIRDRLLSEGVCTNDNIPSVSSINRVLRNLASEK-QQ fly: HSGVNQLGGVFVGGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATAEVVSKISQYKRECPSIFAWEIRDRLLQENVCTNDNIPSVSSINRVLRNLAAQKEQQ Agree percentage: 93.98% """ ### Question 2 ### print '\nQuestion 2:\n' seq_loc_human = seq_loc_human.replace('-', '') seq_loc_fly = seq_loc_fly.replace('-', '') seq_pax = read_protein(CONSENSUS_PAX_URL) #Q2 # seq_pax = 'ACBEDGFIHKMLNQPSRTWVYXZ' #Q3 for idx in range(2): if idx == 0: seq = seq_loc_human type = 'human' else: seq = seq_loc_fly type = 'fly' global_alignment_matrix = student.compute_alignment_matrix( seq, seq_pax, score_matrix, True) score, x_glbl, pax_glbl = student.compute_global_alignment( seq, seq_pax, score_matrix, global_alignment_matrix) length = len(x_glbl) agree = 0 for idx in range(length): if x_glbl[idx] == pax_glbl[idx]: agree += 1 print 'score:', score, '\n' + type, x_glbl, '\nPAX: ', pax_glbl print type + ' agree percentage: %.2f' % (100 * float(agree) / length) """ Question 2: human score: 613 human: -HSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATPEVVSKIAQYKRECPSIFAWEIRDRLLSEGVCTNDNIPSVSSINRVLRNLASEKQQ PAX: GHGGVNQLGGVFVNGRPLPDVVRQRIVELAHQGVRPCDISRQLRVSHGCVSKILGRYYETGSIKPGVIGGSKPKVATPKVVEKIAEYKRQNPTMFAWEIRDRLLAERVCDNDTVPSVSSINRIIR-------- human agree percentage: 72.93 flyscore: 586 fly: -HSGVNQLGGVFVGGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATAEVVSKISQYKRECPSIFAWEIRDRLLQENVCTNDNIPSVSSINRVLRNLAAQKEQQ PAX: GHGGVNQLGGVFVNGRPLPDVVRQRIVELAHQGVRPCDISRQLRVSHGCVSKILGRYYETGSIKPGVIGGSKPKVATPKVVEKIAEYKRQNPTMFAWEIRDRLLAERVCDNDTVPSVSSINRIIR--------- fly agree percentage: 70.15 """ """