예제 #1
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    '''
    blah, blah, blah.
    Input:
        seq_x, seq_y - two sequences that share a common alphabet
        scoring_matrix - 
        num_trials - 
    Output:
        scoring_distribution - a dict of scores, which the key is the score and value 
        is the number of times that score has appeared in the trials
    '''
    # Init dict to store scores
    scoring_distribution = dict()
    # Perform trails
    while sum(scoring_distribution.values()) < num_trials:
        # convert seq_y into a list
        list_y = list(seq_y)
        # generates a random permutation of the list of seq_y
        random.shuffle(list_y)
        # convert the list into a string
        rand_y = ''.join(list_y)
        # Compute the alignment matrix
        alignment_matrix = student.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False)
        # Score the local alignments
        score, align_x, align_y = student.compute_local_alignment(seq_x, rand_y, scoring_matrix, alignment_matrix)
        # Check if score is already in scoring_distribution
        if score in scoring_distribution.keys():
            # Increment the score by 1
            scoring_distribution[score] += 1
        else:
            # Add score and set value to 1
            scoring_distribution[score] = 1
    return scoring_distribution
예제 #2
0
def question_one():
    """
    Compute local alignments and sequences of Human Eyeless Protein and Fruitfly Eyeless Protein
    """
    scoring_matrix = read_scoring_matrix(PAM50_URL)
    human_seq = read_protein(HUMAN_EYELESS_URL)
    fly_seq = read_protein(FRUITFLY_EYELESS_URL)
    align_matrix = prj4.compute_alignment_matrix(human_seq, fly_seq, scoring_matrix, False)
    result = prj4.compute_local_alignment(human_seq, fly_seq, scoring_matrix, align_matrix)
    return result
예제 #3
0
def answer_Q1():
    '''
    Answers Q1.
    '''
    # load the acid sequences that form the eyeless proteins for humans genomes
    human_sequence = provided.read_protein(HUMAN_EYELESS_URL)
    # load the acid sequences that form the eyeless proteins for fruit flies genomes
    fly_sequence = provided.read_protein(FRUITFLY_EYELESS_URL)
    # load the PAM50 scoring matrix
    pam50_scoring_matrix = provided.read_scoring_matrix(PAM50_URL)
    # compute the alignment method using method Q12
    alignment_matrix = student.compute_alignment_matrix(human_sequence, fly_sequence, pam50_scoring_matrix, False)
    return student.compute_local_alignment(human_sequence, fly_sequence, pam50_scoring_matrix, alignment_matrix)
예제 #4
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """
    Generates distribution of local alignment sequences stochastically
    """
    distribution = {}

    #loop through num trials to calculation local alignments of random sequences
    for trial in xrange(num_trials):
        rand_y = list(seq_y)
        random.shuffle(rand_y)
        rand_y = "".join(rand_y)
        align_matrix = prj4.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False)
        score = prj4.compute_local_alignment(seq_x, rand_y, scoring_matrix, align_matrix)
        if score[0] in distribution:
            distribution[score[0]] += 1
        else:
            distribution[score[0]] = 1

    #return unnormalized distribution of scores
    return distribution
		'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1,
		'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': 2, 'x': -1, 'z': -1}, 'x': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': -1, 'd': -1,
		'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1,
		'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': 2, 'z': -1}, 'z': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': -1, 'd': -1, 'g': -1,
		'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1,
		't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': 2}}, [[0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12],
		[-1, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9], [-2, 1, 1, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6],
		[-3, 0, 0, 2, 2, 4, 3, 2, 1, 0, -1, -2, -3], [-4, -1, -1, 1, 1, 4, 6, 5, 4, 3, 2, 1, 0], [-5, -2, -2, 0, 3, 3, 5, 5, 4, 3, 2, 1, 0],
		[-6, -3, -3, -1, 2, 5, 5, 4, 4, 3, 2, 1, 0], [-7, -4, -4, -2, 1, 4, 4, 7, 6, 5, 4, 3, 2], [-8, -5, -5, -3, 0, 3, 3, 6, 9, 8, 7, 6, 5],
		[-9, -6, -6, -4, -1, 2, 2, 5, 8, 8, 7, 6, 5], [-10, -7, -7, -5, -2, 1, 1, 4, 7, 10, 9, 8, 7],
		[-11, -8, -8, -6, -3, 0, 0, 3, 6, 9, 12, 11, 10]]))
		#expected score 10

if TEST4:
	print (Project_4.compute_local_alignment('', '', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2},
		'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4},
		'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, [[0]]))
	print ("----expected tuple of length 3----")
	print (Project_4.compute_local_alignment('A', 'A', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 'C': {'A': 2, 'C': 6, '-': -4,
		'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2},
		'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, [[0, 0], [0, 6]]))
	print ("----expected score 6----")
	print (Project_4.compute_local_alignment('abddcdeffgh', 'aabcddefghij', {'-': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': -1, 'd': -1,
		'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1,
		'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': -1}, 'a': {'-': -1, 'a': 2, 'c': -1, 'b': -1, 'e': -1, 'd': -1, 'g': -1,
		'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1,
		't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': -1}, 'c': {'-': -1, 'a': -1, 'c': 2, 'b': -1, 'e': -1, 'd': -1, 'g': -1,
		'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1,
		't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': -1}, 'b': {'-': -1, 'a': -1, 'c': -1, 'b': 2, 'e': -1, 'd': -1, 'g': -1, 'f': -1,
		'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1,
		'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': -1}, 'e': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': 2, 'd': -1, 'g': -1, 'f': -1, 'i': -1,
"""

import Project_4
import alg_application4_provided as provided
import math
import matplotlib.pyplot as plt
"""
Question 1
"""
seq_human = provided.read_protein(provided.HUMAN_EYELESS_URL)
seq_fly = provided.read_protein(provided.FRUITFLY_EYELESS_URL)
scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL)

local_alignment_mx = Project_4.compute_alignment_matrix(
    seq_human, seq_fly, scoring_matrix, False)
result = Project_4.compute_local_alignment(seq_human, seq_fly, scoring_matrix,
                                           local_alignment_mx)

print 'Score:' + str(result[0])
print 'Human: ' + result[1]
print 'Fly: ' + result[2]
"""
Question 2
"""
ali_human = result[1]
ali_fly = result[2]
seq_con = provided.read_protein(provided.CONSENSUS_PAX_URL)

ali_human = ali_human.replace('-', '')
ali_fly = ali_fly.replace('-', '')

global_alignment_mx_human = Project_4.compute_alignment_matrix(
예제 #7
0
		'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 
		'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': 2, 'x': -1, 'z': -1}, 'x': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': -1, 'd': -1, 
		'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 
		'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': 2, 'z': -1}, 'z': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': -1, 'd': -1, 'g': -1, 
		'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 
		't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': 2}}, [[0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12], 
		[-1, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9], [-2, 1, 1, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6], 
		[-3, 0, 0, 2, 2, 4, 3, 2, 1, 0, -1, -2, -3], [-4, -1, -1, 1, 1, 4, 6, 5, 4, 3, 2, 1, 0], [-5, -2, -2, 0, 3, 3, 5, 5, 4, 3, 2, 1, 0], 
		[-6, -3, -3, -1, 2, 5, 5, 4, 4, 3, 2, 1, 0], [-7, -4, -4, -2, 1, 4, 4, 7, 6, 5, 4, 3, 2], [-8, -5, -5, -3, 0, 3, 3, 6, 9, 8, 7, 6, 5], 
		[-9, -6, -6, -4, -1, 2, 2, 5, 8, 8, 7, 6, 5], [-10, -7, -7, -5, -2, 1, 1, 4, 7, 10, 9, 8, 7], 
		[-11, -8, -8, -6, -3, 0, 0, 3, 6, 9, 12, 11, 10]])
		#expected score 10

if TEST4:
	print Project_4.compute_local_alignment('', '', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 
		'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 
		'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, [[0]])
	print "----expected tuple of length 3----"
	print Project_4.compute_local_alignment('A', 'A', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 'C': {'A': 2, 'C': 6, '-': -4, 
		'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 
		'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, [[0, 0], [0, 6]]) 
	print "----expected score 6----"
	print Project_4.compute_local_alignment('abddcdeffgh', 'aabcddefghij', {'-': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': -1, 'd': -1, 
		'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 
		'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': -1}, 'a': {'-': -1, 'a': 2, 'c': -1, 'b': -1, 'e': -1, 'd': -1, 'g': -1, 
		'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 
		't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': -1}, 'c': {'-': -1, 'a': -1, 'c': 2, 'b': -1, 'e': -1, 'd': -1, 'g': -1, 
		'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 
		't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': -1}, 'b': {'-': -1, 'a': -1, 'c': -1, 'b': 2, 'e': -1, 'd': -1, 'g': -1, 'f': -1, 
		'i': -1, 'h': -1, 'k': -1, 'j': -1, 'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 
		'w': -1, 'v': -1, 'y': -1, 'x': -1, 'z': -1}, 'e': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': 2, 'd': -1, 'g': -1, 'f': -1, 'i': -1, 

import Project_4
import alg_application4_provided as provided
import math
import matplotlib.pyplot as plt

"""
Question 1
"""
seq_human = provided.read_protein(provided.HUMAN_EYELESS_URL)
seq_fly = provided.read_protein(provided.FRUITFLY_EYELESS_URL)
scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL)

local_alignment_mx = Project_4.compute_alignment_matrix(seq_human, seq_fly, scoring_matrix, False)
result = Project_4.compute_local_alignment(seq_human, seq_fly, scoring_matrix, local_alignment_mx)


print 'Score:' + str(result[0])
print 'Human: ' + result[1]
print 'Fly: ' + result[2]


"""
Question 2
"""
ali_human = result[1]
ali_fly = result[2]
seq_con = provided.read_protein(provided.CONSENSUS_PAX_URL)

ali_human = ali_human.replace('-', '')