def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): ''' blah, blah, blah. Input: seq_x, seq_y - two sequences that share a common alphabet scoring_matrix - num_trials - Output: scoring_distribution - a dict of scores, which the key is the score and value is the number of times that score has appeared in the trials ''' # Init dict to store scores scoring_distribution = dict() # Perform trails while sum(scoring_distribution.values()) < num_trials: # convert seq_y into a list list_y = list(seq_y) # generates a random permutation of the list of seq_y random.shuffle(list_y) # convert the list into a string rand_y = ''.join(list_y) # Compute the alignment matrix alignment_matrix = student.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False) # Score the local alignments score, align_x, align_y = student.compute_local_alignment(seq_x, rand_y, scoring_matrix, alignment_matrix) # Check if score is already in scoring_distribution if score in scoring_distribution.keys(): # Increment the score by 1 scoring_distribution[score] += 1 else: # Add score and set value to 1 scoring_distribution[score] = 1 return scoring_distribution
def question_one(): """ Compute local alignments and sequences of Human Eyeless Protein and Fruitfly Eyeless Protein """ scoring_matrix = read_scoring_matrix(PAM50_URL) human_seq = read_protein(HUMAN_EYELESS_URL) fly_seq = read_protein(FRUITFLY_EYELESS_URL) align_matrix = prj4.compute_alignment_matrix(human_seq, fly_seq, scoring_matrix, False) result = prj4.compute_local_alignment(human_seq, fly_seq, scoring_matrix, align_matrix) return result
def answer_Q1(): ''' Answers Q1. ''' # load the acid sequences that form the eyeless proteins for humans genomes human_sequence = provided.read_protein(HUMAN_EYELESS_URL) # load the acid sequences that form the eyeless proteins for fruit flies genomes fly_sequence = provided.read_protein(FRUITFLY_EYELESS_URL) # load the PAM50 scoring matrix pam50_scoring_matrix = provided.read_scoring_matrix(PAM50_URL) # compute the alignment method using method Q12 alignment_matrix = student.compute_alignment_matrix(human_sequence, fly_sequence, pam50_scoring_matrix, False) return student.compute_local_alignment(human_sequence, fly_sequence, pam50_scoring_matrix, alignment_matrix)
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): """ Generates distribution of local alignment sequences stochastically """ distribution = {} #loop through num trials to calculation local alignments of random sequences for trial in xrange(num_trials): rand_y = list(seq_y) random.shuffle(rand_y) rand_y = "".join(rand_y) align_matrix = prj4.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False) score = prj4.compute_local_alignment(seq_x, rand_y, scoring_matrix, align_matrix) if score[0] in distribution: distribution[score[0]] += 1 else: distribution[score[0]] = 1 #return unnormalized distribution of scores return distribution
'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': 2, 'x': -1, 'z': -1}, 'x': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': -1, 'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': 2, 'z': -1}, 'z': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': -1, 'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': 2}}, [[0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12], [-1, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9], [-2, 1, 1, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6], [-3, 0, 0, 2, 2, 4, 3, 2, 1, 0, -1, -2, -3], [-4, -1, -1, 1, 1, 4, 6, 5, 4, 3, 2, 1, 0], [-5, -2, -2, 0, 3, 3, 5, 5, 4, 3, 2, 1, 0], [-6, -3, -3, -1, 2, 5, 5, 4, 4, 3, 2, 1, 0], [-7, -4, -4, -2, 1, 4, 4, 7, 6, 5, 4, 3, 2], [-8, -5, -5, -3, 0, 3, 3, 6, 9, 8, 7, 6, 5], [-9, -6, -6, -4, -1, 2, 2, 5, 8, 8, 7, 6, 5], [-10, -7, -7, -5, -2, 1, 1, 4, 7, 10, 9, 8, 7], [-11, -8, -8, -6, -3, 0, 0, 3, 6, 9, 12, 11, 10]])) #expected score 10 if TEST4: print (Project_4.compute_local_alignment('', '', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, [[0]])) print ("----expected tuple of length 3----") print (Project_4.compute_local_alignment('A', 'A', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, [[0, 0], [0, 6]])) print ("----expected score 6----") print (Project_4.compute_local_alignment('abddcdeffgh', 'aabcddefghij', {'-': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': -1, 'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': -1}, 'a': {'-': -1, 'a': 2, 'c': -1, 'b': -1, 'e': -1, 'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': -1}, 'c': {'-': -1, 'a': -1, 'c': 2, 'b': -1, 'e': -1, 'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': -1}, 'b': {'-': -1, 'a': -1, 'c': -1, 'b': 2, 'e': -1, 'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': -1}, 'e': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': 2, 'd': -1, 'g': -1, 'f': -1, 'i': -1,
""" import Project_4 import alg_application4_provided as provided import math import matplotlib.pyplot as plt """ Question 1 """ seq_human = provided.read_protein(provided.HUMAN_EYELESS_URL) seq_fly = provided.read_protein(provided.FRUITFLY_EYELESS_URL) scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL) local_alignment_mx = Project_4.compute_alignment_matrix( seq_human, seq_fly, scoring_matrix, False) result = Project_4.compute_local_alignment(seq_human, seq_fly, scoring_matrix, local_alignment_mx) print 'Score:' + str(result[0]) print 'Human: ' + result[1] print 'Fly: ' + result[2] """ Question 2 """ ali_human = result[1] ali_fly = result[2] seq_con = provided.read_protein(provided.CONSENSUS_PAX_URL) ali_human = ali_human.replace('-', '') ali_fly = ali_fly.replace('-', '') global_alignment_mx_human = Project_4.compute_alignment_matrix(
'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': 2, 'x': -1, 'z': -1}, 'x': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': -1, 'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': 2, 'z': -1}, 'z': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': -1, 'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': 2}}, [[0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12], [-1, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9], [-2, 1, 1, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6], [-3, 0, 0, 2, 2, 4, 3, 2, 1, 0, -1, -2, -3], [-4, -1, -1, 1, 1, 4, 6, 5, 4, 3, 2, 1, 0], [-5, -2, -2, 0, 3, 3, 5, 5, 4, 3, 2, 1, 0], [-6, -3, -3, -1, 2, 5, 5, 4, 4, 3, 2, 1, 0], [-7, -4, -4, -2, 1, 4, 4, 7, 6, 5, 4, 3, 2], [-8, -5, -5, -3, 0, 3, 3, 6, 9, 8, 7, 6, 5], [-9, -6, -6, -4, -1, 2, 2, 5, 8, 8, 7, 6, 5], [-10, -7, -7, -5, -2, 1, 1, 4, 7, 10, 9, 8, 7], [-11, -8, -8, -6, -3, 0, 0, 3, 6, 9, 12, 11, 10]]) #expected score 10 if TEST4: print Project_4.compute_local_alignment('', '', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, [[0]]) print "----expected tuple of length 3----" print Project_4.compute_local_alignment('A', 'A', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, [[0, 0], [0, 6]]) print "----expected score 6----" print Project_4.compute_local_alignment('abddcdeffgh', 'aabcddefghij', {'-': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': -1, 'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': -1}, 'a': {'-': -1, 'a': 2, 'c': -1, 'b': -1, 'e': -1, 'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': -1}, 'c': {'-': -1, 'a': -1, 'c': 2, 'b': -1, 'e': -1, 'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': -1}, 'b': {'-': -1, 'a': -1, 'c': -1, 'b': 2, 'e': -1, 'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': -1}, 'e': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': 2, 'd': -1, 'g': -1, 'f': -1, 'i': -1,
import Project_4 import alg_application4_provided as provided import math import matplotlib.pyplot as plt """ Question 1 """ seq_human = provided.read_protein(provided.HUMAN_EYELESS_URL) seq_fly = provided.read_protein(provided.FRUITFLY_EYELESS_URL) scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL) local_alignment_mx = Project_4.compute_alignment_matrix(seq_human, seq_fly, scoring_matrix, False) result = Project_4.compute_local_alignment(seq_human, seq_fly, scoring_matrix, local_alignment_mx) print 'Score:' + str(result[0]) print 'Human: ' + result[1] print 'Fly: ' + result[2] """ Question 2 """ ali_human = result[1] ali_fly = result[2] seq_con = provided.read_protein(provided.CONSENSUS_PAX_URL) ali_human = ali_human.replace('-', '')