コード例 #1
0
 def test_compute_alignment_matrix(self):
     scoring_matrix_0 = project4.build_scoring_matrix(
         set(['a', 'b', 'c']), 10, 5, -1)
     alignment_0 = project4.compute_alignment_matrix('a',
                                                     'cab',
                                                     scoring_matrix_0,
                                                     global_flag=True)
     alignment_1 = project4.compute_alignment_matrix('a',
                                                     'cab',
                                                     scoring_matrix_0,
                                                     global_flag=False)
     self.assertEqual(alignment_0, [[0, -1, -2, -3], [-1, 5, 9, 8]])
     self.assertEqual(alignment_1, [[0, 0, 0, 0], [0, 5, 10, 9]])
     scoring_matrix_1 = project4.build_scoring_matrix(
         set(['a', 'b', 'c']), 10, 5, -1)
     alignment_2 = project4.compute_alignment_matrix('cc',
                                                     'cab',
                                                     scoring_matrix_1,
                                                     global_flag=True)
     alignment_3 = project4.compute_alignment_matrix('cc',
                                                     'cab',
                                                     scoring_matrix_1,
                                                     global_flag=False)
     self.assertEqual(alignment_2,
                      [[0, -1, -2, -3], [-1, 10, 9, 8], [-2, 9, 15, 14]])
     self.assertEqual(alignment_3,
                      [[0, 0, 0, 0], [0, 10, 9, 8], [0, 10, 15, 14]])
コード例 #2
0
def question2():
    """
    Compute the global alignments of local human vs concensus PAX domain
    as well as local fruitfly vs. consensus PAX domain. Return as percentages.
    """
    # Delete any dashes present in local alignments of humans and fruitflies.
    q1 = question1()
    dashless_local_human = q1[1].replace('-', '')
    dashless_local_fly = q1[2].replace('-', '')
    # Compute global alignments.
    human_alignment_matrix = project4.compute_alignment_matrix(
        dashless_local_human, PAX, SCORING_MATRIX, False)
    fly_alignment_matrix = project4.compute_alignment_matrix(
        dashless_local_fly, PAX, SCORING_MATRIX, False)
    human_global = project4.compute_global_alignment(dashless_local_human, PAX,
                                                     SCORING_MATRIX,
                                                     human_alignment_matrix)
    fly_global = project4.compute_global_alignment(dashless_local_fly, PAX,
                                                   SCORING_MATRIX,
                                                   fly_alignment_matrix)
    # Compute percentage of elements in human_global and fly_global that agree with pax
    human_percent = 0.0
    fly_percent = 0.0
    for char in range(len(human_global[1])):
        if human_global[1][char] == human_global[2][char]:
            human_percent += 1
    for char in range(len(fly_global[1])):
        if fly_global[1][char] == fly_global[2][char]:
            fly_percent += 1
    human_percent = human_percent / len(human_global[1])
    fly_percent = fly_percent / len(fly_global[1])
    print "human_percent:", human_percent
    print "fly_percent:", fly_percent
コード例 #3
0
def question2():
    """
    Code for question 2
    """
    q1_result = question1()
    score_mat = read_scoring_matrix(PAM50_URL)
    human, fruitfly = q1_result[1], q1_result[2]
    human = human.replace('-', '')
    fruitfly = fruitfly.replace('-', '')
    consensus = read_protein(CONSENSUS_PAX_URL)
    align_m_h = compute_alignment_matrix(human, consensus, score_mat, True)
    align_m_f = compute_alignment_matrix(fruitfly, consensus, score_mat, True)
    global_align_hc = compute_global_alignment(human, consensus,
                                               score_mat, align_m_h)
    global_h, global_ch = global_align_hc[1], global_align_hc[2]
    per1, per2 = 0, 0
    for idx in range(len(global_h)):
        if global_h[idx] == global_ch[idx]:
            per1 += 1
    print float(per1) / len(global_h) * 100

    global_align_fc = compute_global_alignment(fruitfly, consensus,
                                               score_mat, align_m_f)
    global_f, global_cf = global_align_fc[1], global_align_fc[2]
    for idx in range(len(global_f)):
        if global_f[idx] == global_cf[idx]:
            per2 += 1
    print float(per2) / len(global_f) * 100
コード例 #4
0
def question2():
    # QUESTION 2
    # delete the dashes in local alignments
    local_human_new = ''
    local_fruitfly_new = ''
    for idx in range(len(local_human)):
        if (local_human[idx] != '-'):
            local_human_new += local_human[idx]
        if (local_fruitfly[idx] != '-'):
            local_fruitfly_new += local_fruitfly[idx]

    #print local_human_new
    #print local_fruitfly_new

    # compute the global alignment

    f = open('alg_ConsensusPAXDomain.txt', 'r')
    consensus = f.read()
    consensus = consensus[:-2]
    f.close()

    align_matrix_human = project4.compute_alignment_matrix(
        local_human_new, consensus, scores, True)
    global_align_human = project4.compute_global_alignment(
        local_human_new, consensus, scores, align_matrix_human)
    print global_align_human
    global_human = global_align_human[1]
    global_consensus_human = global_align_human[2]
    similarity = 0
    for idx in range(len(global_human)):
        if (global_human[idx] == global_consensus_human[idx]):
            similarity += 1
    human_percentile = similarity / float(len(global_human)) * 100
    print human_percentile

    #
    align_matrix_fruitfly = project4.compute_alignment_matrix(
        local_fruitfly_new, consensus, scores, True)
    global_align_fruitfly = project4.compute_global_alignment(
        local_fruitfly_new, consensus, scores, align_matrix_fruitfly)
    print global_align_fruitfly

    global_fruitfly = global_align_fruitfly[1]
    global_consensus_fruitfly = global_align_fruitfly[2]

    similarity = 0
    for idx in range(len(global_fruitfly)):
        if (global_fruitfly[idx] == global_consensus_fruitfly[idx]):
            similarity += 1
    fruitfly_percentile = similarity / float(len(global_fruitfly)) * 100
    print fruitfly_percentile
コード例 #5
0
def question2():
    # QUESTION 2
    # delete the dashes in local alignments
    local_human_new = ''
    local_fruitfly_new = ''
    for idx in range(len(local_human)):
        if (local_human[idx] != '-'):
            local_human_new += local_human[idx]
        if (local_fruitfly[idx] != '-'):
            local_fruitfly_new += local_fruitfly[idx]

    #print local_human_new
    #print local_fruitfly_new

    # compute the global alignment

    f = open('alg_ConsensusPAXDomain.txt', 'r')
    consensus = f.read()
    consensus = consensus[:-2]
    f.close()

    align_matrix_human = project4.compute_alignment_matrix(local_human_new, consensus, scores, True)
    global_align_human = project4.compute_global_alignment(local_human_new, consensus, scores, align_matrix_human)
    print global_align_human
    global_human = global_align_human[1]
    global_consensus_human = global_align_human[2]
    similarity = 0
    for idx in range(len(global_human)):
        if (global_human[idx] == global_consensus_human[idx]):
            similarity += 1
    human_percentile = similarity / float(len(global_human)) * 100
    print human_percentile


    #
    align_matrix_fruitfly = project4.compute_alignment_matrix(local_fruitfly_new, consensus, scores, True)
    global_align_fruitfly = project4.compute_global_alignment(local_fruitfly_new, consensus, scores, align_matrix_fruitfly)
    print global_align_fruitfly

    global_fruitfly = global_align_fruitfly[1]
    global_consensus_fruitfly = global_align_fruitfly[2]

    similarity = 0
    for idx in range(len(global_fruitfly)):
        if (global_fruitfly[idx] == global_consensus_fruitfly[idx]):
            similarity += 1
    fruitfly_percentile = similarity / float(len(global_fruitfly)) * 100
    print fruitfly_percentile
コード例 #6
0
ファイル: app4.py プロジェクト: keithgw/algorthimic_thinking
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """
    Inputs:
        seq_x, seq_y: character strings that share a common alphabet with 
            scoring_matrix.
        scoring_matrix: output of build_scoring_matrix. Dictionary of 
            dictionaries whose [seq_x[i]][seq_y[j]] value is the score of the
            alignment of seq_x[i], seq_y[i].
        num_trials: integer number of simulations to run
    Output:
        scoring_distribution: a list of scores from the simulations.
        
    Randomly shuffle seq_y num_trial times, score the local alignment with 
    seq_x.
    """
    # initialize
    scores = []
    
    # run trials
    for trial in range(num_trials):
        # shuffle seq_y
        _seq_y = list(seq_y)
        random.shuffle(_seq_y)
        rand_y = ''.join(_seq_y)
        
        # compute local alignment of seq_x and random permutation of seq_y
        alignment = seq.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False)
        score = seq.compute_local_alignment(seq_x, rand_y, scoring_matrix, alignment)[0]
        
        # update frequency distribution
        scores.append(score)
            
    return scores
コード例 #7
0
ファイル: app4.py プロジェクト: keithgw/algorthimic_thinking
def find_scoring_matrix(x, y, med, dim):
    """
    Find the scoring matrix that satisifes the definition of minimum edit
    distance: |x| + |y| - score(x, y)
    
    Inputs:
        x, y: english strings
        med: minimum edit distance between x, y
        dim: range of values to test for diag_score, off_score, dash_score
            note dash_scores will be <= 0
    """
    alphabet = set(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
    'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'])
    
    # med(kitten, sitting) = 3
    correct = len(x) + len(y) - med # 10
    solutions = np.zeros((dim, dim, dim))
    for diag in range(dim):
        for off in range(dim):
            for dash in range(dim):
                sm = seq.build_scoring_matrix(alphabet, diag, off, -1 * dash)
                am = seq.compute_alignment_matrix(x, y, sm)
                solutions[diag, off, dash] = seq.compute_global_alignment(x, y, sm, am)[0]
    
    parameters = np.transpose(np.nonzero(solutions == correct))
    parameters[:, 2] *= -1
    return parameters
コード例 #8
0
def question_1():
    '''
    First, load the files HumanEyelessProtein and FruitflyEyelessProtein using 
    the provided code. These files contain the amino acid sequences that form 
    the eyeless proteins in the human and fruit fly genomes, respectively. Then 
    load the scoring matrix PAM50 for sequences of amino acids. This scoring 
    matrix is defined over the alphabet {A,R,N,D,C,Q,E,G,H,I,L,K,M,F,P,S,T,W,Y,
    V,B,Z,X,-} which represents all possible amino acids and gaps (the "dashes" 
    in the alignment).

    Next, compute the local alignments of the sequences of HumanEyelessProtein 
    and FruitflyEyelessProtein using the PAM50 scoring matrix and enter the 
    score and local alignments for these two sequences below. Be sure to 
    clearly distinguish which alignment is which and include any dashes ('-') 
    that might appear in the local alignment.
    '''

    human_protein = provided.read_protein(provided.HUMAN_EYELESS_URL)
    fruitfly_protein = provided.read_protein(provided.FRUITFLY_EYELESS_URL)
    scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL)

    alignment_matrix = project4.compute_alignment_matrix(
        human_protein, fruitfly_protein, scoring_matrix, False)

    local_alignment = project4.compute_local_alignment(human_protein,
                                                       fruitfly_protein,
                                                       scoring_matrix,
                                                       alignment_matrix)
    return local_alignment
コード例 #9
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    '''
    Helper function for Question 4
    Takes as input two sequences seq_x and seq_y, a scoring matrix 
    scoring_matrix, and a number of trials num_trials. This function should 
    return a dictionary scoring_distribution that represents an un-normalized 
    distribution generated by performing the following process num_trials times:

    Generate a random permutation rand_y of the sequence seq_y using 
    random.shuffle().
    Compute the maximum value score for the local alignment of seq_x and rand_y 
    using the score matrix scoring_matrix.
    Increment the entry score in the dictionary scoring_distribution by one.
    '''

    scoring_distribution = {}
    trial = 0

    while trial < num_trials:
        seq_y_list = list(seq_y)
        random.shuffle(seq_y_list)
        rand_y = ''.join(seq_y_list)
        alignment_matrix = project4.compute_alignment_matrix(
            seq_x, rand_y, scoring_matrix, False)
        score = project4.compute_local_alignment(seq_x, rand_y, scoring_matrix,
                                                 alignment_matrix)
        if score[0] not in scoring_distribution:
            scoring_distribution[score[0]] = 1
        else:
            scoring_distribution[score[0]] += 1
        trial += 1
        print trial

    return scoring_distribution
コード例 #10
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """
    Function for question 4
    """
    ## make a copy of seq_y:
    #new_seq_y = ''
    #for each_char in seq_y:
    #    new_seq_y += each_char

    list_seq_y = list(seq_y)
    scoring_distribution = {}
    for dummy_idx in range(num_trials):
        #random.shuffle(new_seq_y)
        random.shuffle(list_seq_y)
        new_seq_y = ''.join(list_seq_y)
        align_matrix = project4.compute_alignment_matrix(seq_x, new_seq_y, scores, False)
        local_result = project4.compute_local_alignment(seq_x, new_seq_y, scores, align_matrix)
        if (local_result[0] in scoring_distribution):
            scoring_distribution[local_result[0]] += 1
        else:    
            scoring_distribution[local_result[0]] = 1

        print dummy_idx

    return scoring_distribution
コード例 #11
0
def generate_null_distribution2(seq_x, seq_y, scoring_matrix, num_trials):
    # This function does work. I don't understand why balta2ar write it this way by using distr.json
    distr = {
    }  # store the whole distribution {score1: count1, score2: count2, ..., scoren: countn}
    raw = [
    ]  # store all the scores: [score1, score2, ..., scoren], could be duplicate

    try:
        with open('distr.json') as f:
            pair = loads(f.read())
            return pair['distr'], pair['raw']
    except Exception as e:
        print('can\'t open file', str(e))

    for _ in range(num_trials):
        temp = list(seq_y)
        shuffle(temp)
        rand_y = ''.join(temp)
        align_matrix = compute_alignment_matrix(seq_x, rand_y, scoring_matrix,
                                                False)
        score, _, _ = compute_local_alignment(seq_x, rand_y, scoring_matrix,
                                              align_matrix)
        if score not in distr:
            distr[score] = 0
        distr[score] += 1
        raw.append(score)

    with open('distr.json', 'w') as f:
        f.write(dumps({'distr': distr, 'raw': raw}))

    return distr, raw
コード例 #12
0
def edit_dist(xs, ys):
    alphabet = ascii_lowercase  # what is ascii_lowercase??
    scoring = build_scoring_matrix(alphabet, 2, 1, 0)
    align = compute_alignment_matrix(xs, ys, scoring,
                                     True)  # True means global alignment.
    score, _, _ = compute_global_alignment(xs, ys, scoring, align)
    return len(xs) + len(ys) - score
コード例 #13
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """
    Function for question 4
    """
    ## make a copy of seq_y:
    #new_seq_y = ''
    #for each_char in seq_y:
    #    new_seq_y += each_char

    list_seq_y = list(seq_y)
    scoring_distribution = {}
    for dummy_idx in range(num_trials):
        #random.shuffle(new_seq_y)
        random.shuffle(list_seq_y)
        new_seq_y = ''.join(list_seq_y)
        align_matrix = project4.compute_alignment_matrix(
            seq_x, new_seq_y, scores, False)
        local_result = project4.compute_local_alignment(
            seq_x, new_seq_y, scores, align_matrix)
        if (local_result[0] in scoring_distribution):
            scoring_distribution[local_result[0]] += 1
        else:
            scoring_distribution[local_result[0]] = 1

        print dummy_idx

    return scoring_distribution
コード例 #14
0
def question_2():

    human = read_protein(HUMAN_EYELESS_URL)
    fly = read_protein(FRUITFLY_EYELESS_URL)
    consensus = read_protein(CONSENSUS_PAX_URL)

    scoring_matrix = read_scoring_matrix(PAM50_URL)

    alignment_matrix_local = project4.compute_alignment_matrix(human, fly, scoring_matrix, False)

    local_aligns = project4.compute_local_alignment(human, fly, scoring_matrix, alignment_matrix_local)

    human_local_align = local_aligns[1]
    fly_local_align = local_aligns[2]

    human_no_dashes = human_local_align.replace('-','')
    fly_no_dashes = fly_local_align.replace('-','')

    global_matrix_human_consensus = project4.compute_alignment_matrix(human_no_dashes, consensus, scoring_matrix,True)
    global_matrix_fly_consensus = project4.compute_alignment_matrix(fly_no_dashes,consensus, scoring_matrix, True)

    global_align_human_consensus = project4.compute_global_alignment(human_no_dashes,consensus,scoring_matrix,global_matrix_human_consensus)
    align_global_human = global_align_human_consensus[1]

    global_align_fly_consensus = project4.compute_global_alignment(fly_no_dashes, consensus,scoring_matrix,global_matrix_fly_consensus)
    align_global_fly = global_align_fly_consensus[1]

    count_human = 0
    count_fly = 0

    #print align_global_human
    #print align_global_fly
    #print consensus

    for pair in zip(align_global_human, consensus):
        if pair[0] == pair[1]:
            count_human += 1.
    for pair in zip(align_global_fly,consensus):
        if pair[0] == pair[1]:
            count_fly += 1.

    human_percentage = (count_human / len(align_global_human)) * 100
    fly_percentage = (count_fly / len(align_global_fly)) * 100

    print "human percentage: ", human_percentage
    print "fly percentage: ", fly_percentage
コード例 #15
0
def edit_dist(xs, ys):
    '''
    Helper function for Question 8
    '''
    alphabet = 'abcdefghijklmnopqrstuvwxyz'
    scoring = project4.build_scoring_matrix(alphabet, 2, 1, 0)
    align = project4.compute_alignment_matrix(xs, ys, scoring, True)
    score, x, y = project4.compute_global_alignment(xs, ys, scoring, align)
    return len(xs) + len(ys) - score
コード例 #16
0
def question1():
    """
    Code for quetion 1
    """
    human = read_protein(HUMAN_EYELESS_URL)
    fruitfly = read_protein(FRUITFLY_EYELESS_URL)
    score_mat = read_scoring_matrix(PAM50_URL)
    align_mat = compute_alignment_matrix(human, fruitfly, score_mat, False)
    result = compute_local_alignment(human, fruitfly, score_mat, align_mat)
    return result
コード例 #17
0
def question1():
    # QUESTION 1
    align_matrix = project4.compute_alignment_matrix(fruitfly_protein, human_protein, scores, False)
    local_alignment_eyeless = project4.compute_local_alignment(fruitfly_protein, human_protein, scores, align_matrix) 
    #
    #for each in local_alignment_eyeless:
    #    print each

    #print local_alignment_eyeless[0]
    local_human = local_alignment_eyeless[2]
    local_fruitfly = local_alignment_eyeless[1]
コード例 #18
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    distribution = {}
    bar = progressbar.ProgressBar(max_value=1000)
    for progress in range(num_trials):
        bar.update(progress)
        rand_y = list(seq_y)
        random.shuffle(rand_y)
        alignment_matrix = project4.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False)
        score = project4.compute_local_alignment(seq_x, rand_y, scoring_matrix, alignment_matrix)[0]
        distribution[score] = distribution.get(score,0) + 1
    save_dict(distribution)
    return distribution
コード例 #19
0
def question_2():
    '''
    To continue our investigation, we next consider the similarity of the two 
    sequences in the local alignment computed in Question 1 to a third 
    sequence. The file ConsensusPAXDomain contains a "consensus" sequence of 
    the PAX domain; that is, the sequence of amino acids in the PAX domain in 
    any organism. In this problem, we will compare each of the two sequences of 
    the local alignment computed in Question 1 to this consensus sequence to 
    determine whether they correspond to the PAX domain.
    '''

    consensus = provided.read_protein(provided.CONSENSUS_PAX_URL)
    score, human_alignment, fruitfly_alignment = question_1()
    scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL)

    # Delete any dashes '-' present in the sequence
    human = human_alignment.replace('-', '')
    fruitfly = fruitfly_alignment.replace('-', '')

    # Compute the global alignment of this dash-less sequence with the
    # ConsensusPAXDomain sequence.
    alignment_matrix_human = project4.compute_alignment_matrix(
        human, consensus, scoring_matrix, True)
    human_global = project4.compute_global_alignment(human, consensus,
                                                     scoring_matrix,
                                                     alignment_matrix_human)

    alignment_matrix_fruitfly = project4.compute_alignment_matrix(
        fruitfly, consensus, scoring_matrix, True)
    fruitfly_global = project4.compute_global_alignment(
        fruitfly, consensus, scoring_matrix, alignment_matrix_fruitfly)

    # Compare corresponding elements of these two globally-aligned sequences
    # (local vs. consensus) and compute the percentage of elements in these two
    # sequences that agree.
    human_similarity = compute_similarity(human_global[1], human_global[2])
    fruitfly_similarity = compute_similarity(fruitfly_global[1],
                                             fruitfly_global[2])

    return 'Human:', human_similarity, 'Fruitfly:', fruitfly_similarity
コード例 #20
0
def question1And2():
    human = read_protein(HUMAN_EYELESS_URL)
    fly = read_protein(FRUITFLY_EYELESS_URL)
    print(len(human), len(fly))

    scoring = read_scoring_matrix(PAM50_URL)
    local_align_matrix = compute_alignment_matrix(human, fly, scoring, False)
    score, xs, ys = compute_local_alignment(human, fly, scoring,
                                            local_align_matrix)
    print('Question 1')
    print('The score of the local alignment is: ', score)
    print('The sequence for the HumanEyelessProtein is: ', xs)
    print('The sequence for the FruitflyEyelessProtein is: ', ys)
    print()

    print('Question2')
    consensus = read_protein(CONSENSUS_PAX_URL)

    # Step1: Delete any dashes '-' present in the sequence.
    human_nodash = ''.join([x for x in xs if x != '-'])
    fly_nodash = ''.join([y for y in ys if y != '-'])

    # Step2: Compute the global alignment of this dash-less sequence with the ConsensusPAXDomain sequence.
    hc_global_align_matrix = compute_alignment_matrix(human_nodash, consensus,
                                                      scoring, True)
    fc_global_align_matrix = compute_alignment_matrix(fly_nodash, consensus,
                                                      scoring, True)

    # Step3: Compare corresponding elements of these two globally-aligned sequences (local vs consensus) and
    # compute the percentage  of elements in these two sequences that agree
    # NOTE: func agreement contains Stpe2 and Step3.
    hc_agree = agreement(human_nodash, consensus, scoring,
                         hc_global_align_matrix)
    fc_agree = agreement(fly_nodash, consensus, scoring,
                         fc_global_align_matrix)

    print('Human vs Consensus agree = %s%%' % hc_agree)
    print('Fly vs Consensus agree = %s%%' % fc_agree)
コード例 #21
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
   '''
   1) Generate a random permutation 'rand_y' of the sequence seq_y
   2) Compute the maximum value 'score' for the local alignment of seq_x and rand_y using the score matrix 'scoring_matrix'

   Return local alignment score
   '''
   temp = list(seq_y)
   random.shuffle(temp)
   seq_y = ''.join(temp)

   local_alignment_matrix = project4.compute_alignment_matrix(seq_x, seq_y, scoring_matrix, False)
   local_alignment = project4.compute_local_alignment(seq_x, seq_y, scoring_matrix, local_alignment_matrix)
   return local_alignment[0]
コード例 #22
0
ファイル: app4.py プロジェクト: keithgw/algorthimic_thinking
def align_eyeless(scoring_matrix):
    """
    compute the local alignment and score of the human eyeless AA sequence and
    the drosophila eyeless AA sequence, using the PAM 50 scoring matrix
    """
    # load eyeless AA strings 
    human = read_protein(HUMAN_EYELESS_URL)
    drosophila = read_protein(FRUITFLY_EYELESS_URL)
    
    # compute local alignment matrix
    la_mtrx = seq.compute_alignment_matrix(human, drosophila, scoring_matrix, False)
    
    # compute local alignment
    return seq.compute_local_alignment(human, drosophila, scoring_matrix, la_mtrx)
コード例 #23
0
def question1():
    # QUESTION 1
    align_matrix = project4.compute_alignment_matrix(fruitfly_protein,
                                                     human_protein, scores,
                                                     False)
    local_alignment_eyeless = project4.compute_local_alignment(
        fruitfly_protein, human_protein, scores, align_matrix)
    #
    #for each in local_alignment_eyeless:
    #    print each

    #print local_alignment_eyeless[0]
    local_human = local_alignment_eyeless[2]
    local_fruitfly = local_alignment_eyeless[1]
コード例 #24
0
def check_spelling(checked_word, dist, word_list):
    # scoring matrix for edit distaion
    # edit distance = |x| + |y| - score(X,Y)
    # diag_socre = 2, off_diag_score = 1, dash_score = 0
    alphabets = set("abcdefghijklmnopqrstuvwxyz")
    scoring_matrix = project4.build_scoring_matrix(alphabets,2,1,0)
    string_set = set([])
    for word in word_list:
        alignment_matrix = project4.compute_alignment_matrix(checked_word ,word, scoring_matrix, True)
        score, _, _ = project4.compute_global_alignment(checked_word, word, scoring_matrix, alignment_matrix)
        score = len(checked_word) + len(word) - score
        if score <= dist:
            string_set.add(word)
    return string_set
コード例 #25
0
def calculate_similar_ratio():
    result = align_human_fly_protein()
    sequence_human = result[1].replace('-', '')
    sequence_fly = result[2].replace('-', '')
    
    protein_consensus = provided.read_protein(provided.CONSENSUS_PAX_URL)
    alignment_matrix = project4.compute_alignment_matrix(sequence_human, protein_consensus, scoring_matrix, True)
    result = project4.compute_global_alignment(sequence_human, protein_consensus, scoring_matrix, alignment_matrix)
    
    mark = 0
    for idx in range(len(result[1])):
        if result[1][idx] == result[2][idx]:
            mark += 1
    print mark / float(len(result[1]))
    
    protein_consensus = provided.read_protein(provided.CONSENSUS_PAX_URL)
    alignment_matrix = project4.compute_alignment_matrix(sequence_fly, protein_consensus, scoring_matrix, True)
    result = project4.compute_global_alignment(sequence_fly, protein_consensus, scoring_matrix, alignment_matrix)
    mark = 0
    for idx in range(len(result[1])):
        if result[1][idx] == result[2][idx]:
            mark += 1
    print mark / float(len(result[1]))
コード例 #26
0
ファイル: app4.py プロジェクト: keithgw/algorthimic_thinking
def pax_domain(scoring_matrix, local_alignment):
    """
    Compare the local alignments of human and drosophila eyeless proteins to
    the consesus PAX domain by computing a global alignment.
    Return a tuple of percentages: one for human vs consensus, one for 
    drosophila vs consesus, each of which reports how many AAs are the same.
    """
    
    # load consesus pax domain
    pax = read_protein(CONSENSUS_PAX_URL)
    
    # remove dashes from local alignemnts (human and drosophila)
    human = re.sub('-', '', local_alignment[1])
    drosophila = re.sub('-', '', local_alignment[2])
    
    # compute global alignment for dash-less local alignments vs consesus
    human_pax_matrix = seq.compute_alignment_matrix(human, pax, scoring_matrix)
    human_pax = seq.compute_global_alignment(human, pax, scoring_matrix, human_pax_matrix)
    
    drosophila_pax_matrix = seq.compute_alignment_matrix(drosophila, pax, scoring_matrix)
    drosophila_pax = seq.compute_global_alignment(drosophila, pax, scoring_matrix, drosophila_pax_matrix)
        
    # compute counts of elements that agree in the two global alignments
    n_human_pax = len(human_pax[1])
    count_human_pax = 0.0
    for aa in range(n_human_pax):
        if human_pax[1][aa] == human_pax[2][aa]:
            count_human_pax += 1
    
    n_drosophila_pax = len(drosophila_pax[1])
    count_drosophila_pax = 0.0
    for aa in range(n_drosophila_pax):
        if drosophila_pax[1][aa] == drosophila_pax[2][aa]:
            count_drosophila_pax +=1
    
    # return proportion of agreement for two global alignments    
    return (count_human_pax / n_human_pax, count_drosophila_pax / n_drosophila_pax)
コード例 #27
0
def check_spelling(checked_word, dist, word_list):
    """
    Function for Question 8
    """
    # we should do some pre-processing with the word_list
    # only consider the words that has length between |checked_word| +- dist
    # (2) maybe should not consider the words that have letters not existed
    # in the checked_word
    #word_list_new = []
    #for each_word in word_list:
    #    if (len(each_word) >= (len(checked_word) - dist)) and (len(each_word) <= (len(checked_word) + dist)):
    #        word_list_new.append(each_word)

    alphabet = set([
        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
        'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
    ])
    #print len(alphabet)
    if (checked_word in word_list):
        return checked_word

    score_matrix = project4.build_scoring_matrix(alphabet, 2, 1, 0)
    words = []
    # build a set of chars in checked_word
    # I can even use a dictionary to check against the number of chars, it
    # would be more effective
    checked_word_chars = set(checked_word)
    num_checks = 0
    for each_word in word_list:
        each_word_chars = set(each_word)
        num_diffs = 0
        for char in each_word_chars:
            if char not in checked_word_chars:
                num_diffs += 1

        if (len(each_word) >=
            (len(checked_word) - dist)) and (len(each_word) <=
                                             (len(checked_word) + dist)
                                             and num_diffs <= 2):
            align_matrix = project4.compute_alignment_matrix(
                checked_word, each_word, score_matrix, True)
            result = project4.compute_global_alignment(checked_word, each_word,
                                                       score_matrix,
                                                       align_matrix)
            if ((len(checked_word) + len(each_word) - result[0]) <= dist):
                words.append(each_word)
            num_checks += 1
    print num_checks
    return words
コード例 #28
0
def question_1():
    human = read_protein(HUMAN_EYELESS_URL)
    fly = read_protein(FRUITFLY_EYELESS_URL)

    scoring_matrix = read_scoring_matrix(PAM50_URL)

    alignment_matrix = project4.compute_alignment_matrix(human, fly, scoring_matrix, False)

    answer = project4.compute_local_alignment(human, fly, scoring_matrix, alignment_matrix)

    print "score =", answer[0]
    print "align human = ", answer[1]
    print "align fly = ", answer[2]

    return answer[0]
コード例 #29
0
def question7():
    """
    Question 7
    """
    alphabet = set(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
                    'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
                    'y', 'z'])
    #print len(alphabet)
    score_matrix = project4.build_scoring_matrix(alphabet, 2, 1, 0)
    test1 = 'abcde'
    test2 = 'xycdefg'
    align_matrix = project4.compute_alignment_matrix(test1, test2, score_matrix, True)
    result = project4.compute_global_alignment(test1, test2, score_matrix, align_matrix)
    print test1
    print test2
    print result
    print len(test1) + len(test2) - result[0]
コード例 #30
0
ファイル: app4.py プロジェクト: keithgw/algorthimic_thinking
def check_spelling(checked_word, dist, word_list):
    """
    Returns a set of words from word_list that are dist edit distance from 
    checked_word
    """
    alphabet = set(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 
    'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'])
    candidates = set([])
        
    for word in word_list:
        smtrx = seq.build_scoring_matrix(alphabet, 2, 1, 0)
        amtrx = seq.compute_alignment_matrix(checked_word, word, smtrx)
        score = seq.compute_global_alignment(checked_word, word, smtrx, amtrx)[0]
        if len(checked_word) + len(word) - score <= dist:
            candidates.add(word)
                
    return candidates
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    scoring_distribution = {}
    while num_trials:
        shuffledlist = list(seq_y)
        random.shuffle(shuffledlist)
        rand_y = "".join(shuffledlist)

        loc_align_matric = student.compute_alignment_matrix(
            seq_x, rand_y, scoring_matrix, False)
        score = max(col for row in loc_align_matric for col in row)

        if score in scoring_distribution.keys():
            scoring_distribution[score] += 1
        else:
            scoring_distribution[score] = 1

        num_trials -= 1
    return scoring_distribution
コード例 #32
0
def  calculate_edit_distance(xseq, yseq):
   '''
   Return the edit distance of xseq and yseq
   http://en.wikipedia.org/wiki/Edit_distance
   '''
   alphabet = set(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
                   'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
                   'y', 'z', '-'])

   scoring_matrix = project4.build_scoring_matrix(alphabet, 2, 1, 0)
   global_alignment_matrix = project4.compute_alignment_matrix(xseq, yseq, scoring_matrix, True)
   global_alignment = project4.compute_global_alignment(xseq, yseq, scoring_matrix,global_alignment_matrix)

   edit_distance = len(xseq) + len(yseq) - global_alignment[0]

   #print global_alignment
   # print  edit_distance
   return edit_distance
コード例 #33
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """
    Calculate a dictionary scoring_distribution that represents
    an un-normalized distribution generated by performing the following
    process num_trials times:

    Generate a random permutation rand_y of the sequence seq_y
    using random.shuffle().
    Compute the maximum value score for the local alignment of
    seq_x and rand_y using the score matrix scoring_matrix.
    Increment the entry score in the dictionary scoring_distribution by one.

    Parameters
    ----------
    seq_x: str
    a sequence

    seq_y: str
    another sequence

    scoring_matrix: dict of dicts
    the scoring matrix

    num_trials: int
    the number of trials


    Returns
    -------
    scoring_distribution: dict
    a dictionary scoring_distribution that represents
    an un-normalized distribution
    """
    scoring_distribution = defaultdict(int)
    for _ in range(num_trials):
        rand_y = list(seq_y)
        shuffle(rand_y)
        align_mat = compute_alignment_matrix(seq_x, rand_y,
                                             scoring_matrix, False)
        alignment = compute_local_alignment(seq_x, rand_y,
                                            scoring_matrix, align_mat)
        score = alignment[0]
        scoring_distribution[score] += 1
    return scoring_distribution
コード例 #34
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """
    Returns a dictionary scoring_distribution that represents an 
    un-normalized distribution based on the given number of trials num_trials.
    """
    scoring_distribution = {}
    for dummy in range(num_trials):
        y_list = list(seq_y)
        random.shuffle(y_list)
        rand_y = ''.join(y_list)
        alignment_matrix = project4.compute_alignment_matrix(
            seq_x, rand_y, scoring_matrix, False)
        score = project4.compute_local_alignment(seq_x, rand_y, scoring_matrix,
                                                 alignment_matrix)[0]
        if score in scoring_distribution.keys():
            scoring_distribution[score] = scoring_distribution[score] + 1
        else:
            scoring_distribution[score] = 1
    return scoring_distribution
コード例 #35
0
def question1():
    """
    Compute the local alignments of the sequences of HumanEyelessProtein and
    FruitflyEyelessProtein using the PAM50 scoring matrix.
    """
    # Compute local alignments.
    alignment_matrix = project4.compute_alignment_matrix(HUMAN,
                                                         FLY,
                                                         SCORING_MATRIX,
                                                         global_flag=False)
    local_alignment = project4.compute_local_alignment(HUMAN, FLY,
                                                       SCORING_MATRIX,
                                                       alignment_matrix)
    align_human = local_alignment[1]
    align_fly = local_alignment[2]
    print "Human local alignment:", align_human
    print "Fruit fly local alignment:", align_fly
    print "score:", local_alignment[0]
    return (local_alignment[0], align_human, align_fly)
コード例 #36
0
def check_spelling(checked_word, dist, word_list):
    """
    Function for Question 8
    """
    # we should do some pre-processing with the word_list
    # only consider the words that has length between |checked_word| +- dist
    # (2) maybe should not consider the words that have letters not existed 
    # in the checked_word
    #word_list_new = []
    #for each_word in word_list:
    #    if (len(each_word) >= (len(checked_word) - dist)) and (len(each_word) <= (len(checked_word) + dist)): 
    #        word_list_new.append(each_word)

    alphabet = set(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
                    'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
                    'y', 'z'])
    #print len(alphabet)
    if (checked_word in word_list):
        return checked_word

    score_matrix = project4.build_scoring_matrix(alphabet, 2, 1, 0)
    words = []
    # build a set of chars in checked_word
    # I can even use a dictionary to check against the number of chars, it
    # would be more effective
    checked_word_chars = set(checked_word)
    num_checks = 0
    for each_word in word_list:
        each_word_chars = set(each_word)
        num_diffs = 0
        for char in each_word_chars:
            if char not in checked_word_chars:
                num_diffs += 1

        if (len(each_word) >= (len(checked_word) - dist)) and (len(each_word) <= (len(checked_word) + dist)
                and num_diffs <= 2): 
            align_matrix = project4.compute_alignment_matrix(checked_word, each_word, score_matrix, True)
            result = project4.compute_global_alignment(checked_word, each_word, score_matrix, align_matrix)
            if ((len(checked_word) + len(each_word) - result[0]) <= dist):
                words.append(each_word)
            num_checks += 1
    print num_checks    
    return words
コード例 #37
0
 def check_spelling(checked_word, dist, word_list):
     """
     Iterates through word_list and returns the set of all
     words that are within edit distance dist of the string
     checked_word.
     """
     ans = set([])
     scoring_matrix = project4.build_scoring_matrix(
         'abcdefghijklmnopqrstuvwxyz', 2, 1, 0)
     checked_word_length = len(checked_word)
     for word in word_list:
         word_length = len(word)
         alignment_matrix = project4.compute_alignment_matrix(
             checked_word, word, scoring_matrix, True)
         global_score = project4.compute_global_alignment(
             checked_word, word, scoring_matrix, alignment_matrix)
         edit_dist = checked_word_length + word_length - global_score[0]
         if edit_dist <= dist:
             ans.add(word)
     return ans
コード例 #38
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """
    generate null distribution of amino acid at specific position
    :param seq_x: seq_x
    :param seq_y: seq_y
    :param scoring_matrix:  scoring matrix
    :param num_trials: number of trials
    :return: a dictionary of scoring_distribution
    """
    scoring_distr= {}
    for i in xrange(1, num_trials+1):
        # random seq from seq_y
        rand_y = ''.join(random.sample(seq_y, len(seq_y)))

        alignment_matrix = student.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False)
        result = student.compute_local_alignment(seq_x, rand_y, scoring_matrix, alignment_matrix)

        scoring_distr[i]= result[0]

    return scoring_distr
コード例 #39
0
def check_spelling(check_word, dist, word_list):
    """
    check spelling of check_word
    :param check_word: word to check
    :param dist: edit distance
    :param word_list: list of wrod (dictionary)
    :return: set of words from word_list that has the distance of 'dist' from check_word
    """
    result =[]
    alphabet = list(string.ascii_lowercase)
    score_matrix = student.build_scoring_matrix(alphabet, 2, 1, 0)

    for each in word_list:
        alignment_matrix = student.compute_alignment_matrix(each, check_word, score_matrix, True)
        global_align = student.compute_global_alignment(each, check_word, score_matrix, alignment_matrix)
        distance = len(each)+len(check_word)-global_align[0]
        if distance <= dist:
            result.append(each)

    return result
コード例 #40
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """
    generate null distribution of amino acid at specific position
    :param seq_x: seq_x
    :param seq_y: seq_y
    :param scoring_matrix:  scoring matrix
    :param num_trials: number of trials
    :return: a dictionary of scoring_distribution
    """
    scoring_distr= {}
    for i in xrange(1, num_trials+1):
        # random seq from seq_y
        rand_y = ''.join(random.sample(seq_y, len(seq_y)))

        alignment_matrix = student.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False)
        result = student.compute_local_alignment(seq_x, rand_y, scoring_matrix, alignment_matrix)

        scoring_distr[i]= result[0]

    return scoring_distr
コード例 #41
0
def check_spelling(check_word, dist, word_list):
    """
    check spelling of check_word
    :param check_word: word to check
    :param dist: edit distance
    :param word_list: list of wrod (dictionary)
    :return: set of words from word_list that has the distance of 'dist' from check_word
    """
    result =[]
    alphabet = list(string.ascii_lowercase)
    score_matrix = student.build_scoring_matrix(alphabet, 2, 1, 0)

    for each in word_list:
        alignment_matrix = student.compute_alignment_matrix(each, check_word, score_matrix, True)
        global_align = student.compute_global_alignment(each, check_word, score_matrix, alignment_matrix)
        distance = len(each)+len(check_word)-global_align[0]
        if distance <= dist:
            result.append(each)

    return result
コード例 #42
0
def question7():
    """
    Question 7
    """
    alphabet = set([
        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
        'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
    ])
    #print len(alphabet)
    score_matrix = project4.build_scoring_matrix(alphabet, 2, 1, 0)
    test1 = 'abcde'
    test2 = 'xycdefg'
    align_matrix = project4.compute_alignment_matrix(test1, test2,
                                                     score_matrix, True)
    result = project4.compute_global_alignment(test1, test2, score_matrix,
                                               align_matrix)
    print test1
    print test2
    print result
    print len(test1) + len(test2) - result[0]
コード例 #43
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    distr = {
    }  # store the whole distribution {score1: count1, score2: count2, ..., scoren: countn}
    raw = [
    ]  # store all the scores: [score1, score2, ..., scoren], could be duplicate

    for _ in range(num_trials):
        temp = list(seq_y)
        shuffle(temp)
        rand_y = ''.join(temp)
        align_matrix = compute_alignment_matrix(
            seq_x, rand_y, scoring_matrix,
            False)  # Returns local alignment matrix.
        score, _, _ = compute_local_alignment(seq_x, rand_y, scoring_matrix,
                                              align_matrix)
        if score not in distr:
            distr[score] = 0
        distr[score] += 1
        raw.append(score)
    return distr, raw
コード例 #44
0
def generate_null_distribution(seq_x, seq_y,scoring_matrix, num_trials):

    scoring_distribution = {}
    scores_list = []

    for i in range(num_trials):
        temp = list(seq_y)
        random.shuffle(temp)
        rand_y = ''.join(temp)

        align_matrix = project4.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False)

        local_align = project4.compute_local_alignment(seq_x, rand_y, scoring_matrix, align_matrix)

        score = local_align[0]

        if score not in scoring_distribution:
            scoring_distribution[score] = 0

        scoring_distribution[score] += 1
        scores_list.append(score)

    return scoring_distribution, scores_list
コード例 #45
0
def check_spelling(checked_word, dist, word_list):
    """
    Iterates through word_list and returns the set
    of all words that are within edit distance dist
    of the string checked_word.

    Parameters
    ----------
    checked_word: str
    the word to be checked

    dist: int
    the edit distance

    word_list: list
    a list of words


    Returns
    -------
    result: list
    the list of words that are within edit distance
    of the checked_word.
    """
    alphabets = "abcdefghijklmnopqrstuvwxyz"
    score_mat = build_scoring_matrix(alphabets, 2, 1, 0)
    result = []
    for word in word_list:
        align_mat = compute_alignment_matrix(checked_word, word,
                                             score_mat, True)
        score = compute_global_alignment(checked_word, word,
                                         score_mat, align_mat)[0]
        current_dist = len(checked_word) + len(word) - score
        if current_dist <= dist:
            result.append(word)
    return result
コード例 #46
0
def align_human_fly_protein():
    alignment_matrix = project4.compute_alignment_matrix(protein_human, protein_fly, scoring_matrix, False)
    result = project4.compute_local_alignment(protein_human, protein_fly, scoring_matrix, alignment_matrix)

    return result
コード例 #47
0
    Returns:
    A string representing the protein
    """
    protein_file = urllib2.urlopen(filename)
    protein_seq = protein_file.read()
    protein_seq = protein_seq.rstrip()
    return protein_seq


# Q1
#compute_local_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix)
seq_fly = read_protein(FRUITFLY_EYELESS_URL)
seq_human = read_protein(HUMAN_EYELESS_URL)
score_matrix = read_scoring_matrix(PAM50_URL)
alignment_matrix = student.compute_alignment_matrix(seq_human, seq_fly, score_matrix, False)

result = student.compute_local_alignment(seq_human, seq_fly, score_matrix, alignment_matrix)
#print result[0]
#human
#print result[1]
#fly
#print result[2]


# Q2
seq_pax = read_protein(CONSENSUS_PAX_URL)

#fly and pax domain
# alignment_matrix_global = student.compute_alignment_matrix(result[2], seq_pax, score_matrix, True)
# result2 = student.compute_global_alignment(result[2], seq_pax, score_matrix, alignment_matrix_global)
    # read in files as string
    words = word_file.read()

    # template lines and solution lines list of line string
    word_list = words.split('\n')
    print "Loaded a dictionary with", len(word_list), "words"
    return word_list


# question 1
scoring_matrix = read_scoring_matrix(PAM50_URL)
seq_x = read_protein(HUMAN_EYELESS_URL)
seq_y = read_protein(FRUITFLY_EYELESS_URL)
consensusseq = read_protein(CONSENSUS_PAX_URL)

alignment_matrix = student.compute_alignment_matrix(seq_x, seq_y,
                                                    scoring_matrix, False)
score, string_Hu, string_Fr = student.compute_local_alignment(
    seq_x, seq_y, scoring_matrix, alignment_matrix)
print string_Hu

newstring_Hu = ""
for elem in string_Hu:
    if elem != '-':
        newstring_Hu += elem
print newstring_Hu
newstring_Fr = ""
for elem in string_Fr:
    if elem != '-':
        newstring_Fr += elem

alignment_matrix_Hum_local_Con = student.compute_alignment_matrix(
コード例 #49
0
def edit_distance(seq_x, seq_y):
    alphabet = string.ascii_lowercase
    scoring_matrix = project4.build_scoring_matrix(alphabet, 2, 1, 0)
    alignment_matrix = project4.compute_alignment_matrix(seq_x, seq_y, scoring_matrix,True)
    score = project4.compute_global_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix)
    return len(seq_x) + len(seq_y) - score[0]
コード例 #50
0
    # read in files as string
    words = word_file.read()

    # template lines and solution lines list of line string
    word_list = words.split('\n')
    print "Loaded a dictionary with", len(word_list), "words"
    return word_list


HUMAN_EYELESS_PROTEIN = read_protein(HUMAN_EYELESS_URL)
FRUITFLY_EYELESS_PROTEIN = read_protein(FRUITFLY_EYELESS_URL)
PAM50_SCORING_MATRIX = read_scoring_matrix(PAM50_URL)
CONSENSUS_PAX = read_protein(CONSENSUS_PAX_URL)

PAM50_ALIGNMENT_MATRIX = student.compute_alignment_matrix(
    HUMAN_EYELESS_PROTEIN, FRUITFLY_EYELESS_PROTEIN, PAM50_SCORING_MATRIX,
    True)
SEQ_A = 'HSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATPEVVSKIAQYKRECPSIFAWEIRDRLLSEGVCTNDNIPSVSSINRVLRNLASEKQQ'
SEQ_B = 'HSGVNQLGGVFVGGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATAEVVSKISQYKRECPSIFAWEIRDRLLQENVCTNDNIPSVSSINRVLRNLAAQKEQQ'
print CONSENSUS_PAX
(SCORE1, CONSENSUS_PAX1,
 SEQ_A1) = student.compute_global_alignment(CONSENSUS_PAX, SEQ_A,
                                            PAM50_SCORING_MATRIX,
                                            PAM50_ALIGNMENT_MATRIX)
(SCORE2, CONSENSUS_PAX2,
 SEQ_B2) = student.compute_global_alignment(CONSENSUS_PAX, SEQ_B,
                                            PAM50_SCORING_MATRIX,
                                            PAM50_ALIGNMENT_MATRIX)
print SEQ_A1
print CONSENSUS_PAX1
print SEQ_B2
コード例 #51
0
    Returns:
    A string representing the protein
    """
    protein_file = urllib2.urlopen(filename)
    protein_seq = protein_file.read()
    protein_seq = protein_seq.rstrip()
    return protein_seq


# Q1
#compute_local_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix)
seq_fly = read_protein(FRUITFLY_EYELESS_URL)
seq_human = read_protein(HUMAN_EYELESS_URL)
score_matrix = read_scoring_matrix(PAM50_URL)
alignment_matrix = student.compute_alignment_matrix(seq_human, seq_fly,
                                                    score_matrix, False)

result = student.compute_local_alignment(seq_human, seq_fly, score_matrix,
                                         alignment_matrix)
#print result[0]
#human
#print result[1]
#fly
#print result[2]

# Q2
seq_pax = read_protein(CONSENSUS_PAX_URL)

#fly and pax domain
# alignment_matrix_global = student.compute_alignment_matrix(result[2], seq_pax, score_matrix, True)
# result2 = student.compute_global_alignment(result[2], seq_pax, score_matrix, alignment_matrix_global)