Ejemplo n.º 1
0
def question1():
    human = read_protein(HUMAN_EYELESS_URL)
    fly = read_protein(FRUITFLY_EYELESS_URL)
    print(len(human), len(fly))

    scoring = read_scoring_matrix(PAM50_URL)
    local_align = compute_alignment_matrix(human, fly, scoring, False)
    score, xs, ys = compute_local_alignment(human, fly, scoring, local_align)
    print('Question 1')
    print(score)
    print(xs)
    print(ys)
    print()

    print('Question 2')
    consensus = read_protein(CONSENSUS_PAX_URL)
    human_nodash = ''.join([x for x in xs if x != '-'])
    fly_nodash = ''.join([x for x in ys if x != '-'])

    hc_global_align = compute_alignment_matrix(human_nodash, consensus, scoring, True)
    fc_global_align = compute_alignment_matrix(fly_nodash, consensus, scoring, True)

    hc_agree = agreement(human_nodash, consensus, scoring, hc_global_align)
    fc_agree = agreement(fly_nodash, consensus, scoring, fc_global_align)
    print('Human vs Consensus agree = %s%%' % hc_agree)
    print('Fly vs Consensus agree = %s%%' % fc_agree)
Ejemplo n.º 2
0
def question1():
    human = read_protein(HUMAN_EYELESS_URL)
    fly = read_protein(FRUITFLY_EYELESS_URL)
    print(len(human), len(fly))

    scoring = read_scoring_matrix(PAM50_URL)
    local_align = compute_alignment_matrix(human, fly, scoring, False)
    score, xs, ys = compute_local_alignment(human, fly, scoring, local_align)
    print('Question 1')
    print(score)
    print(xs)
    print(ys)
    print()

    print('Question 2')
    consensus = read_protein(CONSENSUS_PAX_URL)
    human_nodash = ''.join([x for x in xs if x != '-'])
    fly_nodash = ''.join([x for x in ys if x != '-'])

    hc_global_align = compute_alignment_matrix(human_nodash, consensus,
                                               scoring, True)
    fc_global_align = compute_alignment_matrix(fly_nodash, consensus, scoring,
                                               True)

    hc_agree = agreement(human_nodash, consensus, scoring, hc_global_align)
    fc_agree = agreement(fly_nodash, consensus, scoring, fc_global_align)
    print('Human vs Consensus agree = %s%%' % hc_agree)
    print('Fly vs Consensus agree = %s%%' % fc_agree)
def question2():
    """
    Code for question 2
    """
    q1_result = question1()
    score_mat = read_scoring_matrix(PAM50_URL)
    human, fruitfly = q1_result[1], q1_result[2]
    human = human.replace('-', '')
    fruitfly = fruitfly.replace('-', '')
    consensus = read_protein(CONSENSUS_PAX_URL)
    align_m_h = compute_alignment_matrix(human, consensus, score_mat, True)
    align_m_f = compute_alignment_matrix(fruitfly, consensus, score_mat, True)
    global_align_hc = compute_global_alignment(human, consensus,
                                               score_mat, align_m_h)
    global_h, global_ch = global_align_hc[1], global_align_hc[2]
    per1, per2 = 0, 0
    for idx in range(len(global_h)):
        if global_h[idx] == global_ch[idx]:
            per1 += 1
    print float(per1) / len(global_h) * 100

    global_align_fc = compute_global_alignment(fruitfly, consensus,
                                               score_mat, align_m_f)
    global_f, global_cf = global_align_fc[1], global_align_fc[2]
    for idx in range(len(global_f)):
        if global_f[idx] == global_cf[idx]:
            per2 += 1
    print float(per2) / len(global_f) * 100
Ejemplo n.º 4
0
def question4(filename):
    human = read_protein(HUMAN_EYELESS_URL)
    fly = read_protein(FRUITFLY_EYELESS_URL)
    scoring = read_scoring_matrix(PAM50_URL)
    distr, raw = generate_null_distribution(human, fly, scoring, 1000)
    from pprint import pprint as pp
    distr = str_keys(distr)
    pp(distr)
    distr = norm(distr)

    pairs = list(distr.iteritems())
    pairs = sorted(pairs, key=itemgetter(0))
    print(pairs)
    index = np.arange(len(pairs))
    plt.bar(index, map(itemgetter(1), pairs))
    plt.xticks(index + 0.4, map(itemgetter(0), pairs), fontsize=8)
    plt.xlabel('Scores')
    plt.ylabel('Fraction of total trials')
    plt.title('Distribution of scores')
    plt.tight_layout()
    plt.savefig(filename)

    s_score = 875
    n = 1000
    mean = sum(raw) / n
    std = np.sqrt(sum((x - mean) ** 2 for x in raw) / n)
    z_score = (s_score - mean) / std

    print('mean = %f' % mean)
    print('std = %f' % std)
    print('z_score = %f' % z_score)
Ejemplo n.º 5
0
def question4(filename):
    human = read_protein(HUMAN_EYELESS_URL)
    fly = read_protein(FRUITFLY_EYELESS_URL)
    scoring = read_scoring_matrix(PAM50_URL)
    distr, raw = generate_null_distribution(human, fly, scoring, 1000)
    from pprint import pprint as pp
    distr = str_keys(distr)
    pp(distr)
    distr = norm(distr)

    pairs = list(distr.iteritems())
    pairs = sorted(pairs, key=itemgetter(0))
    print(pairs)
    index = np.arange(len(pairs))
    plt.bar(index, map(itemgetter(1), pairs))
    plt.xticks(index + 0.4, map(itemgetter(0), pairs), fontsize=8)
    plt.xlabel('Scores')
    plt.ylabel('Fraction of total trials')
    plt.title('Distribution of scores')
    plt.tight_layout()
    plt.savefig(filename)

    s_score = 875
    n = 1000
    mean = sum(raw) / n
    std = np.sqrt(sum((x - mean)**2 for x in raw) / n)
    z_score = (s_score - mean) / std

    print('mean = %f' % mean)
    print('std = %f' % std)
    print('z_score = %f' % z_score)
def question_1():
    '''
    First, load the files HumanEyelessProtein and FruitflyEyelessProtein using 
    the provided code. These files contain the amino acid sequences that form 
    the eyeless proteins in the human and fruit fly genomes, respectively. Then 
    load the scoring matrix PAM50 for sequences of amino acids. This scoring 
    matrix is defined over the alphabet {A,R,N,D,C,Q,E,G,H,I,L,K,M,F,P,S,T,W,Y,
    V,B,Z,X,-} which represents all possible amino acids and gaps (the "dashes" 
    in the alignment).

    Next, compute the local alignments of the sequences of HumanEyelessProtein 
    and FruitflyEyelessProtein using the PAM50 scoring matrix and enter the 
    score and local alignments for these two sequences below. Be sure to 
    clearly distinguish which alignment is which and include any dashes ('-') 
    that might appear in the local alignment.
    '''

    human_protein = provided.read_protein(provided.HUMAN_EYELESS_URL)
    fruitfly_protein = provided.read_protein(provided.FRUITFLY_EYELESS_URL)
    scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL)

    alignment_matrix = project4.compute_alignment_matrix(
        human_protein, fruitfly_protein, scoring_matrix, False)

    local_alignment = project4.compute_local_alignment(human_protein,
                                                       fruitfly_protein,
                                                       scoring_matrix,
                                                       alignment_matrix)
    return local_alignment
def question1():
    """
    Code for quetion 1
    """
    human = read_protein(HUMAN_EYELESS_URL)
    fruitfly = read_protein(FRUITFLY_EYELESS_URL)
    score_mat = read_scoring_matrix(PAM50_URL)
    align_mat = compute_alignment_matrix(human, fruitfly, score_mat, False)
    result = compute_local_alignment(human, fruitfly, score_mat, align_mat)
    return result
def local_alignment_eyeless_protein():
    """
    Question: 1
    """
    human_eyeless_seq = provided.read_protein(provided.HUMAN_EYELESS_URL)
    fruitfly_eyeless_seq = provided.read_protein(provided.FRUITFLY_EYELESS_URL)
    scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL)
    alignment_matrix = student.compute_alignment_matrix(human_eyeless_seq, fruitfly_eyeless_seq, scoring_matrix, False)
    local_alignment = student.compute_local_alignment(human_eyeless_seq, fruitfly_eyeless_seq, scoring_matrix, alignment_matrix)
    return local_alignment
Ejemplo n.º 9
0
def answer_Q1():
    '''
    Answers Q1.
    '''
    # load the acid sequences that form the eyeless proteins for humans genomes
    human_sequence = provided.read_protein(HUMAN_EYELESS_URL)
    # load the acid sequences that form the eyeless proteins for fruit flies genomes
    fly_sequence = provided.read_protein(FRUITFLY_EYELESS_URL)
    # load the PAM50 scoring matrix
    pam50_scoring_matrix = provided.read_scoring_matrix(PAM50_URL)
    # compute the alignment method using method Q12
    alignment_matrix = student.compute_alignment_matrix(human_sequence, fly_sequence, pam50_scoring_matrix, False)
    return student.compute_local_alignment(human_sequence, fly_sequence, pam50_scoring_matrix, alignment_matrix)
Ejemplo n.º 10
0
def perform_human_fly_trials():
    # load the acid sequences that form the eyeless proteins for humans genomes
    human_sequence = provided.read_protein(HUMAN_EYELESS_URL)
    # load the acid sequences that form the eyeless proteins for fruit flies genomes
    fly_sequence = provided.read_protein(FRUITFLY_EYELESS_URL)
    # load the PAM50 scoring matrix
    pam50_scoring_matrix = provided.read_scoring_matrix(PAM50_URL)
    # perform 1000 trials
    scoring_distribution = generate_null_distribution(human_sequence, fly_sequence, pam50_scoring_matrix, 1000)
    # conver result to pd dataframe
    scoring_dist_df = pd.DataFrame(scoring_distribution.values(), index=scoring_distribution.keys(), columns=['Frequency'])
    # fix index name
    scoring_dist_df.index.rename('Scores', inplace=True)
    return scoring_dist_df
Ejemplo n.º 11
0
def local_alignment_eyeless_protein():
    """
    Question: 1
    """
    human_eyeless_seq = provided.read_protein(provided.HUMAN_EYELESS_URL)
    fruitfly_eyeless_seq = provided.read_protein(provided.FRUITFLY_EYELESS_URL)
    scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL)
    alignment_matrix = student.compute_alignment_matrix(
        human_eyeless_seq, fruitfly_eyeless_seq, scoring_matrix, False)
    local_alignment = student.compute_local_alignment(human_eyeless_seq,
                                                      fruitfly_eyeless_seq,
                                                      scoring_matrix,
                                                      alignment_matrix)
    return local_alignment
def run_app_q1():
    """
    Question 1 of application.
    """
    scoring_matrix = provided.read_scoring_matrix(PAM50_PATH)
    human_eyeless_protein = provided.read_protein(HUMAN_EYELESS_PATH)
    fruitfly_eyeless_protein = provided.read_protein(FRUITFLY_EYELESS_PATH)
    alignment_matrix = compute_alignment_matrix(human_eyeless_protein,
                                                fruitfly_eyeless_protein,
                                                scoring_matrix, False)
    (score, local_human, local_fruitfly) = compute_local_alignment(
        human_eyeless_protein, fruitfly_eyeless_protein, scoring_matrix,
        alignment_matrix)
    return (score, local_human, local_fruitfly)
def run_ques_4():
    """
    Question: 4 & 5
    """
    seq_x = provided.read_protein(provided.HUMAN_EYELESS_URL)
    seq_y = provided.read_protein(provided.FRUITFLY_EYELESS_URL)
    scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL)
    num_trials = 1000
    scoring_distribution = generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials)
    mean, std_dev = compute_stats(scoring_distribution, num_trials)
    print mean, ",", std_dev
    z_score = float(local_alignment_eyeless_protein()[0] - mean) / std_dev
    print z_score
    bar_plot(scoring_distribution, num_trials)
Ejemplo n.º 14
0
def run_ques_4():
    """
    Question: 4 & 5
    """
    seq_x = provided.read_protein(provided.HUMAN_EYELESS_URL)
    seq_y = provided.read_protein(provided.FRUITFLY_EYELESS_URL)
    scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL)
    num_trials = 1000
    scoring_distribution = generate_null_distribution(seq_x, seq_y,
                                                      scoring_matrix,
                                                      num_trials)
    mean, std_dev = compute_stats(scoring_distribution, num_trials)
    print mean, ",", std_dev
    z_score = float(local_alignment_eyeless_protein()[0] - mean) / std_dev
    print z_score
    bar_plot(scoring_distribution, num_trials)
def question5():
    """
    Code for question 5
    """
    human = read_protein(HUMAN_EYELESS_URL)
    fruitfly = read_protein(FRUITFLY_EYELESS_URL)
    score_mat = read_scoring_matrix(PAM50_URL)
    dist = generate_null_distribution(human, fruitfly, score_mat, 1000)
    scores = []
    for score, count in dist.iteritems():
        scores.extend([score] * count)
    N = len(scores)
    mean = float(sum(scores)) / N
    std = math.sqrt(float(sum([(score - mean) ** 2 for score in scores])) / N)
    z_score = (875 - mean) / std
    print mean, std, z_score
def question4_plot():
    """
    Code for question 4
    """
    human = read_protein(HUMAN_EYELESS_URL)
    fruitfly = read_protein(FRUITFLY_EYELESS_URL)
    score_mat = read_scoring_matrix(PAM50_URL)
    dist = generate_null_distribution(human, fruitfly, score_mat, 1000)
    y = []
    for count in dist.itervalues():
        y.append(count / 1000.0)
    plt.bar(dist.keys(), y)
    plt.title("Normalized score distribution")
    plt.ylabel("Fractions of total trials")
    plt.xlabel("Scores of local alignments")
    plt.show()
    print dist
def global_alignment_consensus():
    """
    Question: 2
    """
    ans_similar = []
    local_alignments = local_alignment_eyeless_protein()
    consensus_seq = provided.read_protein(provided.CONSENSUS_PAX_URL)
    scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL)
    for idx in range(1, 3):
        seq_x = local_alignments[idx]
        seq_x = seq_x.replace("-", "")
        alignment_matrix = student.compute_alignment_matrix(seq_x, consensus_seq, scoring_matrix, True)
        global_alignment = student.compute_global_alignment(seq_x, consensus_seq, scoring_matrix, alignment_matrix)
        similar_count = 0
        for letter1, letter2 in zip(global_alignment[1], global_alignment[2]):
            if letter1 == letter2:
                similar_count += 1
        ans_similar.append(float(similar_count * 100) / len(global_alignment[1]))
    return ans_similar
def run_app_q2():
    """
    Question 2 of application.
    """
    scoring_matrix = provided.read_scoring_matrix(PAM50_PATH)
    (score, local_human, local_fruitfly) = run_app_q1()
    consensus_pax_domain = provided.read_protein(CONSENSUS_PAX_PATH)
    # local_human = remove_dash(local_human)
    local_fruitfly = remove_dash(local_fruitfly)
    # alignment_matrix = compute_alignment_matrix(local_human, consensus_pax_domain, scoring_matrix, True)
    # (score, global_human, global_consensus) = compute_global_alignment(local_human, consensus_pax_domain, scoring_matrix, alignment_matrix)
    # human_match = count_match_percentage(global_human, global_consensus)
    alignment_matrix = compute_alignment_matrix(local_fruitfly,
                                                consensus_pax_domain,
                                                scoring_matrix, True)
    (score, global_fruitfly, global_consensus) = compute_global_alignment(
        local_fruitfly, consensus_pax_domain, scoring_matrix, alignment_matrix)
    fruitfly_match = count_match_percentage(global_fruitfly, global_consensus)
    print fruitfly_match
def question_2():
    '''
    To continue our investigation, we next consider the similarity of the two 
    sequences in the local alignment computed in Question 1 to a third 
    sequence. The file ConsensusPAXDomain contains a "consensus" sequence of 
    the PAX domain; that is, the sequence of amino acids in the PAX domain in 
    any organism. In this problem, we will compare each of the two sequences of 
    the local alignment computed in Question 1 to this consensus sequence to 
    determine whether they correspond to the PAX domain.
    '''

    consensus = provided.read_protein(provided.CONSENSUS_PAX_URL)
    score, human_alignment, fruitfly_alignment = question_1()
    scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL)

    # Delete any dashes '-' present in the sequence
    human = human_alignment.replace('-', '')
    fruitfly = fruitfly_alignment.replace('-', '')

    # Compute the global alignment of this dash-less sequence with the
    # ConsensusPAXDomain sequence.
    alignment_matrix_human = project4.compute_alignment_matrix(
        human, consensus, scoring_matrix, True)
    human_global = project4.compute_global_alignment(human, consensus,
                                                     scoring_matrix,
                                                     alignment_matrix_human)

    alignment_matrix_fruitfly = project4.compute_alignment_matrix(
        fruitfly, consensus, scoring_matrix, True)
    fruitfly_global = project4.compute_global_alignment(
        fruitfly, consensus, scoring_matrix, alignment_matrix_fruitfly)

    # Compare corresponding elements of these two globally-aligned sequences
    # (local vs. consensus) and compute the percentage of elements in these two
    # sequences that agree.
    human_similarity = compute_similarity(human_global[1], human_global[2])
    fruitfly_similarity = compute_similarity(fruitfly_global[1],
                                             fruitfly_global[2])

    return 'Human:', human_similarity, 'Fruitfly:', fruitfly_similarity
Ejemplo n.º 20
0
def global_alignment_consensus():
    """
    Question: 2
    """
    ans_similar = []
    local_alignments = local_alignment_eyeless_protein()
    consensus_seq = provided.read_protein(provided.CONSENSUS_PAX_URL)
    scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL)
    for idx in range(1, 3):
        seq_x = local_alignments[idx]
        seq_x = seq_x.replace("-", "")
        alignment_matrix = student.compute_alignment_matrix(
            seq_x, consensus_seq, scoring_matrix, True)
        global_alignment = student.compute_global_alignment(
            seq_x, consensus_seq, scoring_matrix, alignment_matrix)
        similar_count = 0
        for letter1, letter2 in zip(global_alignment[1], global_alignment[2]):
            if letter1 == letter2:
                similar_count += 1
        ans_similar.append(
            float(similar_count * 100) / len(global_alignment[1]))
    return ans_similar
def run_app_q4():
    """
    Question 4 of application.
    """
    scoring_matrix = provided.read_scoring_matrix(PAM50_PATH)
    human_eyeless_protein = provided.read_protein(HUMAN_EYELESS_PATH)
    fruitfly_eyeless_protein = provided.read_protein(FRUITFLY_EYELESS_PATH)
    num_trials = 1000
    scoring_distribution = generate_null_distribution(
        human_eyeless_protein, fruitfly_eyeless_protein, scoring_matrix,
        num_trials)
    for score in scoring_distribution.keys():
        scoring_distribution[score] /= (1.0 * num_trials)
    plt.bar(scoring_distribution.keys(),
            scoring_distribution.values(),
            color='g')
    plt.grid(True)
    plt.xlabel('Scores')
    plt.ylabel('Fraction of Total Trials')
    plt.title('Normalized Scoring Distribution')
    plt.show()
    return scoring_distribution
def run_app_q3():
    """
    Question 3 of application.
    """
    scoring_matrix = provided.read_scoring_matrix(PAM50_PATH)
    human_eyeless_protein = provided.read_protein(HUMAN_EYELESS_PATH)
    fruitfly_eyeless_protein = provided.read_protein(FRUITFLY_EYELESS_PATH)
    human_rand = gen_random_seqs(len(human_eyeless_protein))
    fruitfly_rand = gen_random_seqs(len(fruitfly_eyeless_protein))
    alignment_matrix = compute_alignment_matrix(human_rand, fruitfly_rand,
                                                scoring_matrix, False)
    (score, local_human_rand,
     local_fruitfly_rand) = compute_local_alignment(human_rand, fruitfly_rand,
                                                    scoring_matrix,
                                                    alignment_matrix)
    print score
    print local_human_rand
    print local_fruitfly_rand
    consensus_pax_domain = provided.read_protein(CONSENSUS_PAX_PATH)
    local_human_rand = remove_dash(local_human_rand)
    alignment_matrix = compute_alignment_matrix(local_human_rand,
                                                consensus_pax_domain,
                                                scoring_matrix, True)
    (score, global_human_rand, global_consensus) = compute_global_alignment(
        local_human_rand, consensus_pax_domain, scoring_matrix,
        alignment_matrix)
    human_match = count_match_percentage(global_human_rand, global_consensus)
    print human_match
    local_fruitfly_rand = remove_dash(local_fruitfly_rand)
    alignment_matrix = compute_alignment_matrix(local_fruitfly_rand,
                                                consensus_pax_domain,
                                                scoring_matrix, True)
    (score, global_fruitfly_rand, global_consensus) = compute_global_alignment(
        local_fruitfly_rand, consensus_pax_domain, scoring_matrix,
        alignment_matrix)
    fruitfly_match = count_match_percentage(global_fruitfly_rand,
                                            global_consensus)
    print fruitfly_match
def question_4():
    '''
    We will take an approach known as statistical hypothesis testing to 
    determine whether the local alignments computed in Question 1 are 
    statistically significant (that is, that the probability that they could 
    have arisen by chance is extremely small).
    '''

    # Use the function generate_null_distribution to create a distribution with
    # 1000 trials using the protein sequences HumanEyelessProtein and
    # FruitflyEyelessProtein (using the PAM50 scoring matrix).

    human_protein = provided.read_protein(provided.HUMAN_EYELESS_URL)
    fruitfly_protein = provided.read_protein(provided.FRUITFLY_EYELESS_URL)
    scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL)

    num_trials = 1000

    distribution = generate_null_distribution(human_protein, fruitfly_protein,
                                              scoring_matrix, num_trials)

    # Next, create a bar plot of the normalized version of this distribution.
    # The horizontal axis should be the scores and the vertical axis should be
    # the fraction of total trials corresponding to each score. As usual,
    # choose reasonable labels for the axes and title.

    normalized_dist = {}
    for score in distribution:
        normalized_dist[score] = float(distribution[score]) / num_trials

    plt.bar(normalized_dist.keys(), normalized_dist.values())
    plt.title('Null Distribution for Hypothesis Testing using 1000 Trials')
    plt.xlabel('Local Alignment Scores')
    plt.ylabel('Fraction of Total Trials')
    plt.show()

    return distribution
Ejemplo n.º 24
0
def percent_match(local_alignment):
    ''' 
    Computes the percent similarilty between a local alignment to the 
    global alignment of the PAX sequence.
    '''
    # remove the '-' from the local alignment
    local_alignment = local_alignment.replace('-', '')
    # load the PAM50 scoring matrix
    pam50_scoring_matrix = provided.read_scoring_matrix(PAM50_URL)
    # load the consensus sequence
    consensus_sequence = provided.read_protein(CONSENSUS_PAX_URL)
    # compute the global alignment
    alignment_matrix = student.compute_alignment_matrix(local_alignment, consensus_sequence, pam50_scoring_matrix, True)
    # compute the global alignment
    score, global_alignment, consensus_alignment = student.compute_global_alignment(local_alignment, consensus_sequence, pam50_scoring_matrix, alignment_matrix)
    # Init the variable to store matches
    match = 0
    # loop over each character
    for char in range(len(global_alignment)):
        # compare characters between the two alignments
        if global_alignment[char] == consensus_alignment[char]:
            # increase the match score by 1
            match += 1
    return round(match/float(len(global_alignment))*100, 2)
"""
Application 4 scripts

"""
import Project4 as help
import alg_application4_provided as provided
import random

seq_x = provided.read_protein(provided.HUMAN_EYELESS_URL)
seq_y = provided.read_protein(provided.FRUITFLY_EYELESS_URL)
score_matrix = provided.read_scoring_matrix(provided.PAM50_URL)
"""
# Question1
local_align_matrix = help.compute_alignment_matrix(seq_x, seq_y, score_matrix, False)
score, align_x, align_y = help.compute_local_alignment(seq_x, seq_y, score_matrix, local_align_matrix)

# Question2
new_seq_x = align_x.rstrip("-QQ")
new_seq_x = new_seq_x + "QQ"
new_seq_y = align_y
seq_consensus = provided.read_protein(provided.CONSENSUS_PAX_URL)
global_matrix_x = help.compute_alignment_matrix(new_seq_x, seq_consensus, score_matrix, True)
score_x_consensus, align_x1, align_y1 = help.compute_global_alignment(new_seq_x, seq_consensus, score_matrix, global_matrix_x)
global_matrix_y = help.compute_alignment_matrix(new_seq_y, seq_consensus, score_matrix, True)
score_y_consensus, align_x2, align_y2 = help.compute_global_alignment(new_seq_y, seq_consensus, score_matrix, global_matrix_y)

# Question3
alphabets = "ACBEDGFIHKMLNQPSRTWVYXZ"
seq1 = ""
seq2 = ""
for dummy_num in range(len(seq_x)):
Ejemplo n.º 26
0
        random.shuffle(tmp_y)
        rand_y = ''.join(tmp_y)
        alignment_matrix = pj4.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False)
        score = max([max(value) for value in alignment_matrix])
        #score, align_x, align_y = pj4.compute_local_alignment(seq_x, rand_y, scoring_matrix, alignment_matrix)
        if score not in scoring_distribution.keys():
            scoring_distribution[score] = 1
        else:
            scoring_distribution[score] += 1

    return scoring_distribution


protein_human = app4.read_protein(app4.HUMAN_EYELESS_URL)
protein_fruitfly = app4.read_protein(app4.FRUITFLY_EYELESS_URL)
scoring_matrix = app4.read_scoring_matrix(app4.PAM50_URL)

scoring_distribution = generate_null_distribution(protein_human, protein_fruitfly, scoring_matrix, 1000)
#scoring_distribution = {38: 1, 39: 1, 40: 8, 41: 9, 42: 28, 43: 35, 44: 50, 45: 46, 46: 49, 47: 57, 48: 63, 49: 62, 50: 72, 51: 56, 52: 56, 53: 61, 54: 62, 55: 32, 56: 25, 57: 33, 58: 29, 59: 22, 60: 25, 61: 15, 62: 13, 63: 10, 64: 13, 65: 20, 66: 2, 67: 4, 68: 14, 69: 5, 70: 3, 71: 2, 72: 3, 74: 2, 75: 2, 76: 1, 77: 1, 79: 2, 81: 2, 84: 1, 85: 1, 94: 1, 97: 1}

print scoring_distribution
x_value = scoring_distribution.keys()
print x_value
x_value.sort()
print x_value

y_value = []
for score in x_value:
    y_value.append(scoring_distribution[score] / float(1000) * 100)
print y_value
"""
Author: Tejaswini Dhupad
Algorithmic Thinking (Part 2)
Application 4: Applications to Genomics and Beyond
"""

import Project_4
import alg_application4_provided as provided
import math
import matplotlib.pyplot as plt
"""
Question 1
"""
seq_human = provided.read_protein(provided.HUMAN_EYELESS_URL)
seq_fly = provided.read_protein(provided.FRUITFLY_EYELESS_URL)
scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL)

local_alignment_mx = Project_4.compute_alignment_matrix(
    seq_human, seq_fly, scoring_matrix, False)
result = Project_4.compute_local_alignment(seq_human, seq_fly, scoring_matrix,
                                           local_alignment_mx)

print 'Score:' + str(result[0])
print 'Human: ' + result[1]
print 'Fly: ' + result[2]
"""
Question 2
"""
ali_human = result[1]
ali_fly = result[2]
seq_con = provided.read_protein(provided.CONSENSUS_PAX_URL)