def question2():
    """
    Compute the global alignments of local human vs concensus PAX domain
    as well as local fruitfly vs. consensus PAX domain. Return as percentages.
    """
    # Delete any dashes present in local alignments of humans and fruitflies.
    q1 = question1()
    dashless_local_human = q1[1].replace('-', '')
    dashless_local_fly = q1[2].replace('-', '')
    # Compute global alignments.
    human_alignment_matrix = project4.compute_alignment_matrix(
        dashless_local_human, PAX, SCORING_MATRIX, False)
    fly_alignment_matrix = project4.compute_alignment_matrix(
        dashless_local_fly, PAX, SCORING_MATRIX, False)
    human_global = project4.compute_global_alignment(dashless_local_human, PAX,
                                                     SCORING_MATRIX,
                                                     human_alignment_matrix)
    fly_global = project4.compute_global_alignment(dashless_local_fly, PAX,
                                                   SCORING_MATRIX,
                                                   fly_alignment_matrix)
    # Compute percentage of elements in human_global and fly_global that agree with pax
    human_percent = 0.0
    fly_percent = 0.0
    for char in range(len(human_global[1])):
        if human_global[1][char] == human_global[2][char]:
            human_percent += 1
    for char in range(len(fly_global[1])):
        if fly_global[1][char] == fly_global[2][char]:
            fly_percent += 1
    human_percent = human_percent / len(human_global[1])
    fly_percent = fly_percent / len(fly_global[1])
    print "human_percent:", human_percent
    print "fly_percent:", fly_percent
def question2():
    """
    Code for question 2
    """
    q1_result = question1()
    score_mat = read_scoring_matrix(PAM50_URL)
    human, fruitfly = q1_result[1], q1_result[2]
    human = human.replace('-', '')
    fruitfly = fruitfly.replace('-', '')
    consensus = read_protein(CONSENSUS_PAX_URL)
    align_m_h = compute_alignment_matrix(human, consensus, score_mat, True)
    align_m_f = compute_alignment_matrix(fruitfly, consensus, score_mat, True)
    global_align_hc = compute_global_alignment(human, consensus,
                                               score_mat, align_m_h)
    global_h, global_ch = global_align_hc[1], global_align_hc[2]
    per1, per2 = 0, 0
    for idx in range(len(global_h)):
        if global_h[idx] == global_ch[idx]:
            per1 += 1
    print float(per1) / len(global_h) * 100

    global_align_fc = compute_global_alignment(fruitfly, consensus,
                                               score_mat, align_m_f)
    global_f, global_cf = global_align_fc[1], global_align_fc[2]
    for idx in range(len(global_f)):
        if global_f[idx] == global_cf[idx]:
            per2 += 1
    print float(per2) / len(global_f) * 100
Beispiel #3
0
def question2():
    # QUESTION 2
    # delete the dashes in local alignments
    local_human_new = ''
    local_fruitfly_new = ''
    for idx in range(len(local_human)):
        if (local_human[idx] != '-'):
            local_human_new += local_human[idx]
        if (local_fruitfly[idx] != '-'):
            local_fruitfly_new += local_fruitfly[idx]

    #print local_human_new
    #print local_fruitfly_new

    # compute the global alignment

    f = open('alg_ConsensusPAXDomain.txt', 'r')
    consensus = f.read()
    consensus = consensus[:-2]
    f.close()

    align_matrix_human = project4.compute_alignment_matrix(
        local_human_new, consensus, scores, True)
    global_align_human = project4.compute_global_alignment(
        local_human_new, consensus, scores, align_matrix_human)
    print global_align_human
    global_human = global_align_human[1]
    global_consensus_human = global_align_human[2]
    similarity = 0
    for idx in range(len(global_human)):
        if (global_human[idx] == global_consensus_human[idx]):
            similarity += 1
    human_percentile = similarity / float(len(global_human)) * 100
    print human_percentile

    #
    align_matrix_fruitfly = project4.compute_alignment_matrix(
        local_fruitfly_new, consensus, scores, True)
    global_align_fruitfly = project4.compute_global_alignment(
        local_fruitfly_new, consensus, scores, align_matrix_fruitfly)
    print global_align_fruitfly

    global_fruitfly = global_align_fruitfly[1]
    global_consensus_fruitfly = global_align_fruitfly[2]

    similarity = 0
    for idx in range(len(global_fruitfly)):
        if (global_fruitfly[idx] == global_consensus_fruitfly[idx]):
            similarity += 1
    fruitfly_percentile = similarity / float(len(global_fruitfly)) * 100
    print fruitfly_percentile
def question2():
    # QUESTION 2
    # delete the dashes in local alignments
    local_human_new = ''
    local_fruitfly_new = ''
    for idx in range(len(local_human)):
        if (local_human[idx] != '-'):
            local_human_new += local_human[idx]
        if (local_fruitfly[idx] != '-'):
            local_fruitfly_new += local_fruitfly[idx]

    #print local_human_new
    #print local_fruitfly_new

    # compute the global alignment

    f = open('alg_ConsensusPAXDomain.txt', 'r')
    consensus = f.read()
    consensus = consensus[:-2]
    f.close()

    align_matrix_human = project4.compute_alignment_matrix(local_human_new, consensus, scores, True)
    global_align_human = project4.compute_global_alignment(local_human_new, consensus, scores, align_matrix_human)
    print global_align_human
    global_human = global_align_human[1]
    global_consensus_human = global_align_human[2]
    similarity = 0
    for idx in range(len(global_human)):
        if (global_human[idx] == global_consensus_human[idx]):
            similarity += 1
    human_percentile = similarity / float(len(global_human)) * 100
    print human_percentile


    #
    align_matrix_fruitfly = project4.compute_alignment_matrix(local_fruitfly_new, consensus, scores, True)
    global_align_fruitfly = project4.compute_global_alignment(local_fruitfly_new, consensus, scores, align_matrix_fruitfly)
    print global_align_fruitfly

    global_fruitfly = global_align_fruitfly[1]
    global_consensus_fruitfly = global_align_fruitfly[2]

    similarity = 0
    for idx in range(len(global_fruitfly)):
        if (global_fruitfly[idx] == global_consensus_fruitfly[idx]):
            similarity += 1
    fruitfly_percentile = similarity / float(len(global_fruitfly)) * 100
    print fruitfly_percentile
Beispiel #5
0
def find_scoring_matrix(x, y, med, dim):
    """
    Find the scoring matrix that satisifes the definition of minimum edit
    distance: |x| + |y| - score(x, y)
    
    Inputs:
        x, y: english strings
        med: minimum edit distance between x, y
        dim: range of values to test for diag_score, off_score, dash_score
            note dash_scores will be <= 0
    """
    alphabet = set(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
    'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'])
    
    # med(kitten, sitting) = 3
    correct = len(x) + len(y) - med # 10
    solutions = np.zeros((dim, dim, dim))
    for diag in range(dim):
        for off in range(dim):
            for dash in range(dim):
                sm = seq.build_scoring_matrix(alphabet, diag, off, -1 * dash)
                am = seq.compute_alignment_matrix(x, y, sm)
                solutions[diag, off, dash] = seq.compute_global_alignment(x, y, sm, am)[0]
    
    parameters = np.transpose(np.nonzero(solutions == correct))
    parameters[:, 2] *= -1
    return parameters
Beispiel #6
0
def edit_dist(xs, ys):
    alphabet = ascii_lowercase  # what is ascii_lowercase??
    scoring = build_scoring_matrix(alphabet, 2, 1, 0)
    align = compute_alignment_matrix(xs, ys, scoring,
                                     True)  # True means global alignment.
    score, _, _ = compute_global_alignment(xs, ys, scoring, align)
    return len(xs) + len(ys) - score
Beispiel #7
0
def question_2():

    human = read_protein(HUMAN_EYELESS_URL)
    fly = read_protein(FRUITFLY_EYELESS_URL)
    consensus = read_protein(CONSENSUS_PAX_URL)

    scoring_matrix = read_scoring_matrix(PAM50_URL)

    alignment_matrix_local = project4.compute_alignment_matrix(human, fly, scoring_matrix, False)

    local_aligns = project4.compute_local_alignment(human, fly, scoring_matrix, alignment_matrix_local)

    human_local_align = local_aligns[1]
    fly_local_align = local_aligns[2]

    human_no_dashes = human_local_align.replace('-','')
    fly_no_dashes = fly_local_align.replace('-','')

    global_matrix_human_consensus = project4.compute_alignment_matrix(human_no_dashes, consensus, scoring_matrix,True)
    global_matrix_fly_consensus = project4.compute_alignment_matrix(fly_no_dashes,consensus, scoring_matrix, True)

    global_align_human_consensus = project4.compute_global_alignment(human_no_dashes,consensus,scoring_matrix,global_matrix_human_consensus)
    align_global_human = global_align_human_consensus[1]

    global_align_fly_consensus = project4.compute_global_alignment(fly_no_dashes, consensus,scoring_matrix,global_matrix_fly_consensus)
    align_global_fly = global_align_fly_consensus[1]

    count_human = 0
    count_fly = 0

    #print align_global_human
    #print align_global_fly
    #print consensus

    for pair in zip(align_global_human, consensus):
        if pair[0] == pair[1]:
            count_human += 1.
    for pair in zip(align_global_fly,consensus):
        if pair[0] == pair[1]:
            count_fly += 1.

    human_percentage = (count_human / len(align_global_human)) * 100
    fly_percentage = (count_fly / len(align_global_fly)) * 100

    print "human percentage: ", human_percentage
    print "fly percentage: ", fly_percentage
def edit_dist(xs, ys):
    '''
    Helper function for Question 8
    '''
    alphabet = 'abcdefghijklmnopqrstuvwxyz'
    scoring = project4.build_scoring_matrix(alphabet, 2, 1, 0)
    align = project4.compute_alignment_matrix(xs, ys, scoring, True)
    score, x, y = project4.compute_global_alignment(xs, ys, scoring, align)
    return len(xs) + len(ys) - score
def question_2():
    '''
    To continue our investigation, we next consider the similarity of the two 
    sequences in the local alignment computed in Question 1 to a third 
    sequence. The file ConsensusPAXDomain contains a "consensus" sequence of 
    the PAX domain; that is, the sequence of amino acids in the PAX domain in 
    any organism. In this problem, we will compare each of the two sequences of 
    the local alignment computed in Question 1 to this consensus sequence to 
    determine whether they correspond to the PAX domain.
    '''

    consensus = provided.read_protein(provided.CONSENSUS_PAX_URL)
    score, human_alignment, fruitfly_alignment = question_1()
    scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL)

    # Delete any dashes '-' present in the sequence
    human = human_alignment.replace('-', '')
    fruitfly = fruitfly_alignment.replace('-', '')

    # Compute the global alignment of this dash-less sequence with the
    # ConsensusPAXDomain sequence.
    alignment_matrix_human = project4.compute_alignment_matrix(
        human, consensus, scoring_matrix, True)
    human_global = project4.compute_global_alignment(human, consensus,
                                                     scoring_matrix,
                                                     alignment_matrix_human)

    alignment_matrix_fruitfly = project4.compute_alignment_matrix(
        fruitfly, consensus, scoring_matrix, True)
    fruitfly_global = project4.compute_global_alignment(
        fruitfly, consensus, scoring_matrix, alignment_matrix_fruitfly)

    # Compare corresponding elements of these two globally-aligned sequences
    # (local vs. consensus) and compute the percentage of elements in these two
    # sequences that agree.
    human_similarity = compute_similarity(human_global[1], human_global[2])
    fruitfly_similarity = compute_similarity(fruitfly_global[1],
                                             fruitfly_global[2])

    return 'Human:', human_similarity, 'Fruitfly:', fruitfly_similarity
def check_spelling(checked_word, dist, word_list):
    # scoring matrix for edit distaion
    # edit distance = |x| + |y| - score(X,Y)
    # diag_socre = 2, off_diag_score = 1, dash_score = 0
    alphabets = set("abcdefghijklmnopqrstuvwxyz")
    scoring_matrix = project4.build_scoring_matrix(alphabets,2,1,0)
    string_set = set([])
    for word in word_list:
        alignment_matrix = project4.compute_alignment_matrix(checked_word ,word, scoring_matrix, True)
        score, _, _ = project4.compute_global_alignment(checked_word, word, scoring_matrix, alignment_matrix)
        score = len(checked_word) + len(word) - score
        if score <= dist:
            string_set.add(word)
    return string_set
def calculate_similar_ratio():
    result = align_human_fly_protein()
    sequence_human = result[1].replace('-', '')
    sequence_fly = result[2].replace('-', '')
    
    protein_consensus = provided.read_protein(provided.CONSENSUS_PAX_URL)
    alignment_matrix = project4.compute_alignment_matrix(sequence_human, protein_consensus, scoring_matrix, True)
    result = project4.compute_global_alignment(sequence_human, protein_consensus, scoring_matrix, alignment_matrix)
    
    mark = 0
    for idx in range(len(result[1])):
        if result[1][idx] == result[2][idx]:
            mark += 1
    print mark / float(len(result[1]))
    
    protein_consensus = provided.read_protein(provided.CONSENSUS_PAX_URL)
    alignment_matrix = project4.compute_alignment_matrix(sequence_fly, protein_consensus, scoring_matrix, True)
    result = project4.compute_global_alignment(sequence_fly, protein_consensus, scoring_matrix, alignment_matrix)
    mark = 0
    for idx in range(len(result[1])):
        if result[1][idx] == result[2][idx]:
            mark += 1
    print mark / float(len(result[1]))
Beispiel #12
0
def pax_domain(scoring_matrix, local_alignment):
    """
    Compare the local alignments of human and drosophila eyeless proteins to
    the consesus PAX domain by computing a global alignment.
    Return a tuple of percentages: one for human vs consensus, one for 
    drosophila vs consesus, each of which reports how many AAs are the same.
    """
    
    # load consesus pax domain
    pax = read_protein(CONSENSUS_PAX_URL)
    
    # remove dashes from local alignemnts (human and drosophila)
    human = re.sub('-', '', local_alignment[1])
    drosophila = re.sub('-', '', local_alignment[2])
    
    # compute global alignment for dash-less local alignments vs consesus
    human_pax_matrix = seq.compute_alignment_matrix(human, pax, scoring_matrix)
    human_pax = seq.compute_global_alignment(human, pax, scoring_matrix, human_pax_matrix)
    
    drosophila_pax_matrix = seq.compute_alignment_matrix(drosophila, pax, scoring_matrix)
    drosophila_pax = seq.compute_global_alignment(drosophila, pax, scoring_matrix, drosophila_pax_matrix)
        
    # compute counts of elements that agree in the two global alignments
    n_human_pax = len(human_pax[1])
    count_human_pax = 0.0
    for aa in range(n_human_pax):
        if human_pax[1][aa] == human_pax[2][aa]:
            count_human_pax += 1
    
    n_drosophila_pax = len(drosophila_pax[1])
    count_drosophila_pax = 0.0
    for aa in range(n_drosophila_pax):
        if drosophila_pax[1][aa] == drosophila_pax[2][aa]:
            count_drosophila_pax +=1
    
    # return proportion of agreement for two global alignments    
    return (count_human_pax / n_human_pax, count_drosophila_pax / n_drosophila_pax)
Beispiel #13
0
def check_spelling(checked_word, dist, word_list):
    """
    Function for Question 8
    """
    # we should do some pre-processing with the word_list
    # only consider the words that has length between |checked_word| +- dist
    # (2) maybe should not consider the words that have letters not existed
    # in the checked_word
    #word_list_new = []
    #for each_word in word_list:
    #    if (len(each_word) >= (len(checked_word) - dist)) and (len(each_word) <= (len(checked_word) + dist)):
    #        word_list_new.append(each_word)

    alphabet = set([
        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
        'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
    ])
    #print len(alphabet)
    if (checked_word in word_list):
        return checked_word

    score_matrix = project4.build_scoring_matrix(alphabet, 2, 1, 0)
    words = []
    # build a set of chars in checked_word
    # I can even use a dictionary to check against the number of chars, it
    # would be more effective
    checked_word_chars = set(checked_word)
    num_checks = 0
    for each_word in word_list:
        each_word_chars = set(each_word)
        num_diffs = 0
        for char in each_word_chars:
            if char not in checked_word_chars:
                num_diffs += 1

        if (len(each_word) >=
            (len(checked_word) - dist)) and (len(each_word) <=
                                             (len(checked_word) + dist)
                                             and num_diffs <= 2):
            align_matrix = project4.compute_alignment_matrix(
                checked_word, each_word, score_matrix, True)
            result = project4.compute_global_alignment(checked_word, each_word,
                                                       score_matrix,
                                                       align_matrix)
            if ((len(checked_word) + len(each_word) - result[0]) <= dist):
                words.append(each_word)
            num_checks += 1
    print num_checks
    return words
def question7():
    """
    Question 7
    """
    alphabet = set(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
                    'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
                    'y', 'z'])
    #print len(alphabet)
    score_matrix = project4.build_scoring_matrix(alphabet, 2, 1, 0)
    test1 = 'abcde'
    test2 = 'xycdefg'
    align_matrix = project4.compute_alignment_matrix(test1, test2, score_matrix, True)
    result = project4.compute_global_alignment(test1, test2, score_matrix, align_matrix)
    print test1
    print test2
    print result
    print len(test1) + len(test2) - result[0]
Beispiel #15
0
def check_spelling(checked_word, dist, word_list):
    """
    Returns a set of words from word_list that are dist edit distance from 
    checked_word
    """
    alphabet = set(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 
    'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'])
    candidates = set([])
        
    for word in word_list:
        smtrx = seq.build_scoring_matrix(alphabet, 2, 1, 0)
        amtrx = seq.compute_alignment_matrix(checked_word, word, smtrx)
        score = seq.compute_global_alignment(checked_word, word, smtrx, amtrx)[0]
        if len(checked_word) + len(word) - score <= dist:
            candidates.add(word)
                
    return candidates
Beispiel #16
0
def  calculate_edit_distance(xseq, yseq):
   '''
   Return the edit distance of xseq and yseq
   http://en.wikipedia.org/wiki/Edit_distance
   '''
   alphabet = set(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
                   'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
                   'y', 'z', '-'])

   scoring_matrix = project4.build_scoring_matrix(alphabet, 2, 1, 0)
   global_alignment_matrix = project4.compute_alignment_matrix(xseq, yseq, scoring_matrix, True)
   global_alignment = project4.compute_global_alignment(xseq, yseq, scoring_matrix,global_alignment_matrix)

   edit_distance = len(xseq) + len(yseq) - global_alignment[0]

   #print global_alignment
   # print  edit_distance
   return edit_distance
def check_spelling(checked_word, dist, word_list):
    """
    Function for Question 8
    """
    # we should do some pre-processing with the word_list
    # only consider the words that has length between |checked_word| +- dist
    # (2) maybe should not consider the words that have letters not existed 
    # in the checked_word
    #word_list_new = []
    #for each_word in word_list:
    #    if (len(each_word) >= (len(checked_word) - dist)) and (len(each_word) <= (len(checked_word) + dist)): 
    #        word_list_new.append(each_word)

    alphabet = set(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
                    'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
                    'y', 'z'])
    #print len(alphabet)
    if (checked_word in word_list):
        return checked_word

    score_matrix = project4.build_scoring_matrix(alphabet, 2, 1, 0)
    words = []
    # build a set of chars in checked_word
    # I can even use a dictionary to check against the number of chars, it
    # would be more effective
    checked_word_chars = set(checked_word)
    num_checks = 0
    for each_word in word_list:
        each_word_chars = set(each_word)
        num_diffs = 0
        for char in each_word_chars:
            if char not in checked_word_chars:
                num_diffs += 1

        if (len(each_word) >= (len(checked_word) - dist)) and (len(each_word) <= (len(checked_word) + dist)
                and num_diffs <= 2): 
            align_matrix = project4.compute_alignment_matrix(checked_word, each_word, score_matrix, True)
            result = project4.compute_global_alignment(checked_word, each_word, score_matrix, align_matrix)
            if ((len(checked_word) + len(each_word) - result[0]) <= dist):
                words.append(each_word)
            num_checks += 1
    print num_checks    
    return words
def check_spelling(check_word, dist, word_list):
    """
    check spelling of check_word
    :param check_word: word to check
    :param dist: edit distance
    :param word_list: list of wrod (dictionary)
    :return: set of words from word_list that has the distance of 'dist' from check_word
    """
    result =[]
    alphabet = list(string.ascii_lowercase)
    score_matrix = student.build_scoring_matrix(alphabet, 2, 1, 0)

    for each in word_list:
        alignment_matrix = student.compute_alignment_matrix(each, check_word, score_matrix, True)
        global_align = student.compute_global_alignment(each, check_word, score_matrix, alignment_matrix)
        distance = len(each)+len(check_word)-global_align[0]
        if distance <= dist:
            result.append(each)

    return result
def check_spelling(check_word, dist, word_list):
    """
    check spelling of check_word
    :param check_word: word to check
    :param dist: edit distance
    :param word_list: list of wrod (dictionary)
    :return: set of words from word_list that has the distance of 'dist' from check_word
    """
    result =[]
    alphabet = list(string.ascii_lowercase)
    score_matrix = student.build_scoring_matrix(alphabet, 2, 1, 0)

    for each in word_list:
        alignment_matrix = student.compute_alignment_matrix(each, check_word, score_matrix, True)
        global_align = student.compute_global_alignment(each, check_word, score_matrix, alignment_matrix)
        distance = len(each)+len(check_word)-global_align[0]
        if distance <= dist:
            result.append(each)

    return result
Beispiel #20
0
def question7():
    """
    Question 7
    """
    alphabet = set([
        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
        'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
    ])
    #print len(alphabet)
    score_matrix = project4.build_scoring_matrix(alphabet, 2, 1, 0)
    test1 = 'abcde'
    test2 = 'xycdefg'
    align_matrix = project4.compute_alignment_matrix(test1, test2,
                                                     score_matrix, True)
    result = project4.compute_global_alignment(test1, test2, score_matrix,
                                               align_matrix)
    print test1
    print test2
    print result
    print len(test1) + len(test2) - result[0]
 def check_spelling(checked_word, dist, word_list):
     """
     Iterates through word_list and returns the set of all
     words that are within edit distance dist of the string
     checked_word.
     """
     ans = set([])
     scoring_matrix = project4.build_scoring_matrix(
         'abcdefghijklmnopqrstuvwxyz', 2, 1, 0)
     checked_word_length = len(checked_word)
     for word in word_list:
         word_length = len(word)
         alignment_matrix = project4.compute_alignment_matrix(
             checked_word, word, scoring_matrix, True)
         global_score = project4.compute_global_alignment(
             checked_word, word, scoring_matrix, alignment_matrix)
         edit_dist = checked_word_length + word_length - global_score[0]
         if edit_dist <= dist:
             ans.add(word)
     return ans
def check_spelling(checked_word, dist, word_list):
    """
    Iterates through word_list and returns the set
    of all words that are within edit distance dist
    of the string checked_word.

    Parameters
    ----------
    checked_word: str
    the word to be checked

    dist: int
    the edit distance

    word_list: list
    a list of words


    Returns
    -------
    result: list
    the list of words that are within edit distance
    of the checked_word.
    """
    alphabets = "abcdefghijklmnopqrstuvwxyz"
    score_mat = build_scoring_matrix(alphabets, 2, 1, 0)
    result = []
    for word in word_list:
        align_mat = compute_alignment_matrix(checked_word, word,
                                             score_mat, True)
        score = compute_global_alignment(checked_word, word,
                                         score_mat, align_mat)[0]
        current_dist = len(checked_word) + len(word) - score
        if current_dist <= dist:
            result.append(word)
    return result
Beispiel #23
0
def edit_distance(seq_x, seq_y):
    alphabet = string.ascii_lowercase
    scoring_matrix = project4.build_scoring_matrix(alphabet, 2, 1, 0)
    alignment_matrix = project4.compute_alignment_matrix(seq_x, seq_y, scoring_matrix,True)
    score = project4.compute_global_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix)
    return len(seq_x) + len(seq_y) - score[0]
Beispiel #24
0

HUMAN_EYELESS_PROTEIN = read_protein(HUMAN_EYELESS_URL)
FRUITFLY_EYELESS_PROTEIN = read_protein(FRUITFLY_EYELESS_URL)
PAM50_SCORING_MATRIX = read_scoring_matrix(PAM50_URL)
CONSENSUS_PAX = read_protein(CONSENSUS_PAX_URL)

PAM50_ALIGNMENT_MATRIX = student.compute_alignment_matrix(
    HUMAN_EYELESS_PROTEIN, FRUITFLY_EYELESS_PROTEIN, PAM50_SCORING_MATRIX,
    True)
SEQ_A = 'HSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATPEVVSKIAQYKRECPSIFAWEIRDRLLSEGVCTNDNIPSVSSINRVLRNLASEKQQ'
SEQ_B = 'HSGVNQLGGVFVGGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATAEVVSKISQYKRECPSIFAWEIRDRLLQENVCTNDNIPSVSSINRVLRNLAAQKEQQ'
print CONSENSUS_PAX
(SCORE1, CONSENSUS_PAX1,
 SEQ_A1) = student.compute_global_alignment(CONSENSUS_PAX, SEQ_A,
                                            PAM50_SCORING_MATRIX,
                                            PAM50_ALIGNMENT_MATRIX)
(SCORE2, CONSENSUS_PAX2,
 SEQ_B2) = student.compute_global_alignment(CONSENSUS_PAX, SEQ_B,
                                            PAM50_SCORING_MATRIX,
                                            PAM50_ALIGNMENT_MATRIX)
print SEQ_A1
print CONSENSUS_PAX1
print SEQ_B2
print CONSENSUS_PAX2

Percentage1 = 0.0
Percentage2 = 0.0
for dummy_x in xrange(len(SEQ_A1)):
    if CONSENSUS_PAX1[dummy_x] == SEQ_A1[dummy_x]:
        Percentage1 = Percentage1 + 1.0
    seq_x, seq_y, scoring_matrix, alignment_matrix)
print string_Hu

newstring_Hu = ""
for elem in string_Hu:
    if elem != '-':
        newstring_Hu += elem
print newstring_Hu
newstring_Fr = ""
for elem in string_Fr:
    if elem != '-':
        newstring_Fr += elem

alignment_matrix_Hum_local_Con = student.compute_alignment_matrix(
    newstring_Hu, consensusseq, scoring_matrix, True)
score1, str_Hu_Con, str_Con_Hu = student.compute_global_alignment(
    newstring_Hu, consensusseq, scoring_matrix, alignment_matrix_Hum_local_Con)

alignment_matrix_Fr_local_Con = student.compute_alignment_matrix(
    newstring_Fr, consensusseq, scoring_matrix, True)
score2, str_Fr_Con, str_Con_Fr = student.compute_global_alignment(
    newstring_Fr, consensusseq, scoring_matrix, alignment_matrix_Fr_local_Con)


def cal_percentage(str1, str2):
    count = 0
    num = len(str1)
    for i in range(num):
        if str1[i] == str2[i]:
            count += 1
    return float(count) / num
Beispiel #26
0
def agreement(xs, ys, scoring, alignmnet):
    _, x, _ = compute_global_alignment(xs, ys, scoring, alignmnet)
    similarity = [1. for (a, b) in zip(x, ys)
                  if a == b]  #??? balta2ar wrong? Not Wrong!
    return 100. * len(similarity) / len(x)