コード例 #1
0
def question2():
    """
    Question 2 (2 pts)

    To continue our investigation, we next consider the similarity of the two sequences
    in the local alignment computed in Question 1 to a third sequence. The file
    ConsensusPAXDomain contains a "consensus" sequence of the PAX domain; that is,
    the sequence of amino acids in the PAX domain in any organism. In this problem,
    we will compare each of the two sequences of the local alignment computed in
    Question 1 to this consensus sequence to determine whether they correspond to
    the PAX domain.
    Load the file ConsensusPAXDomain. For each of the two sequences of the local
    alignment computed in Question 1, do the following:

    Delete any dashes '-' present in the sequence.
    Compute the global alignment of this dash-less sequence with the ConsensusPAXDomain
    sequence.
    Compare corresponding elements of these two globally-aligned sequences (local vs.
    consensus) and compute the percentage of elements in these two sequences that
    agree.
    To reiterate, you will compute the global alignments of local human vs. consensus
    PAX domain as well as local fruitfly vs. consensus PAX domain. Your answer should
    be two percentages: one for each global alignment. Enter each percentage below.
    Be sure to label each answer clearly and include three significant digits of
    precision.
    """

    # Calculating global alignment of the human-firefly local alignment to the
    # consensus string
    q2_human = str(solution[1])
    q2_firefly = str(solution[2])
    q2_human = q2_human.replace('-', '')
    q2_firefly = q2_firefly.replace('-', '')
    q2_human_matrix = project4.compute_alignment_matrix(q2_human, consensus, scoring, True)
    q2_firefly_matrix = project4.compute_alignment_matrix(q2_firefly, consensus, scoring, True)
    q2_human_alignment = project4.compute_global_alignment(q2_human, consensus,
                                                           scoring,
                                                           q2_human_matrix)
    q2_firefly_alignment = project4.compute_global_alignment(q2_firefly, consensus,
                                                           scoring,
                                                           q2_firefly_matrix)

    # Calculating adjustment of human protein
    dim = len(q2_human_alignment[2])
    human_match = 0.0
    for idx_i in range(dim):
        if q2_human_alignment[1][idx_i] == q2_human_alignment[2][idx_i]:
            human_match += 1

    # Calculating adjustment of firefly protein
    dim = len(q2_firefly_alignment[2])
    firefly_match = 0.0
    for idx_i in range(dim):
        if q2_firefly_alignment[1][idx_i] == q2_firefly_alignment[2][idx_i]:
            firefly_match += 1

    # Printing the solution
    print 'Human protein match = ' + str(round((human_match/dim)*100, 2)) + ' %'
    print 'Firefly protein match = ' + str(round((firefly_match/dim)*100,2)) + ' %'
    print dim
コード例 #2
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """
    creates an un-normalized distribution generated by performing the following
    process num_trials times:
        Generate a random permutation rand_y of the sequence seq_y using random.shuffle().
        Compute the maximum value score for the local alignment of seq_x and rand_y using the score matrix scoring_matrix.
        Increment the entry score in the dictionary scoring_distribution by one

    input:
    seq_x, seq_y - two sequences
    scoring_matrix - scoring matrix
    num_trials - number of trials

    return:
    scoring_distribution - a dictionary that represents an un-normalized distribution
    """
    scoring_distribution = {}

    while num_trials > 0:
        trial_y = ''.join([str(w) for w in random.sample(seq_y, len(seq_y))])
        trial_matrix = project4.compute_alignment_matrix(seq_x, trial_y, scoring_matrix, True)
        trial_max = 0
        for i in range(len(seq_x) + 1):
            for j in range(len(seq_y) + 1):
                if trial_matrix[i][j] > trial_max:
                    trial_max = trial_matrix[i][j]

        if trial_max in scoring_distribution.keys():
            scoring_distribution[trial_max] += 1
        else:
            scoring_distribution[trial_max] = 1
        num_trials -= 1

    return scoring_distribution
コード例 #3
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """
    creates an un-normalized distribution generated by performing the following
    process num_trials times:
        Generate a random permutation rand_y of the sequence seq_y using random.shuffle().
        Compute the maximum value score for the local alignment of seq_x and rand_y using the score matrix scoring_matrix.
        Increment the entry score in the dictionary scoring_distribution by one

    input:
    seq_x, seq_y - two sequences
    scoring_matrix - scoring matrix
    num_trials - number of trials

    return:
    scoring_distribution - a dictionary that represents an un-normalized distribution
    """
    scoring_distribution = {}

    while num_trials > 0:
        trial_y = ''.join([str(w) for w in random.sample(seq_y, len(seq_y))])
        trial_matrix = project4.compute_alignment_matrix(
            seq_x, trial_y, scoring_matrix, True)
        trial_max = 0
        for i in range(len(seq_x) + 1):
            for j in range(len(seq_y) + 1):
                if trial_matrix[i][j] > trial_max:
                    trial_max = trial_matrix[i][j]

        if trial_max in scoring_distribution.keys():
            scoring_distribution[trial_max] += 1
        else:
            scoring_distribution[trial_max] = 1
        num_trials -= 1

    return scoring_distribution
コード例 #4
0
def question2():
    """
    Question 2 (2 pts)

    To continue our investigation, we next consider the similarity of the two sequences
    in the local alignment computed in Question 1 to a third sequence. The file
    ConsensusPAXDomain contains a "consensus" sequence of the PAX domain; that is,
    the sequence of amino acids in the PAX domain in any organism. In this problem,
    we will compare each of the two sequences of the local alignment computed in
    Question 1 to this consensus sequence to determine whether they correspond to
    the PAX domain.
    Load the file ConsensusPAXDomain. For each of the two sequences of the local
    alignment computed in Question 1, do the following:

    Delete any dashes '-' present in the sequence.
    Compute the global alignment of this dash-less sequence with the ConsensusPAXDomain
    sequence.
    Compare corresponding elements of these two globally-aligned sequences (local vs.
    consensus) and compute the percentage of elements in these two sequences that
    agree.
    To reiterate, you will compute the global alignments of local human vs. consensus
    PAX domain as well as local fruitfly vs. consensus PAX domain. Your answer should
    be two percentages: one for each global alignment. Enter each percentage below.
    Be sure to label each answer clearly and include three significant digits of
    precision.
    """

    # Calculating global alignment of the human-firefly local alignment to the
    # consensus string
    q2_human = str(solution[1])
    q2_firefly = str(solution[2])
    q2_human = q2_human.replace('-', '')
    q2_firefly = q2_firefly.replace('-', '')
    q2_human_matrix = project4.compute_alignment_matrix(
        q2_human, consensus, scoring, True)
    q2_firefly_matrix = project4.compute_alignment_matrix(
        q2_firefly, consensus, scoring, True)
    q2_human_alignment = project4.compute_global_alignment(
        q2_human, consensus, scoring, q2_human_matrix)
    q2_firefly_alignment = project4.compute_global_alignment(
        q2_firefly, consensus, scoring, q2_firefly_matrix)

    # Calculating adjustment of human protein
    dim = len(q2_human_alignment[2])
    human_match = 0.0
    for idx_i in range(dim):
        if q2_human_alignment[1][idx_i] == q2_human_alignment[2][idx_i]:
            human_match += 1

    # Calculating adjustment of firefly protein
    dim = len(q2_firefly_alignment[2])
    firefly_match = 0.0
    for idx_i in range(dim):
        if q2_firefly_alignment[1][idx_i] == q2_firefly_alignment[2][idx_i]:
            firefly_match += 1

    # Printing the solution
    print 'Human protein match = ' + str(round(
        (human_match / dim) * 100, 2)) + ' %'
    print 'Firefly protein match = ' + str(
        round((firefly_match / dim) * 100, 2)) + ' %'
    print dim