def question2():
    scoring_matrix =read_scoring_matrix(PAM50_URL)
    human = read_protein(HUMAN_EYELESS_URL)
    fly = read_protein(FRUITFLY_EYELESS_URL)
    # for question 3
#    acids = 'ACBEDGFIHKMLNQPSRTWVYXZ'
#    hlen = len(human)
#    flen = len(fly)
#    human_random, fly_random = '', ''
#    for dummy_i in xrange(hlen):
#        human_random = human_random + human[random.randint(1,23)]
#    for dummy_i in xrange(flen):
#        fly_random = fly_random + fly[random.randint(1,23)]
#    human = human_random
#    fly = fly_random
    consensusPAX = read_protein(CONSENSUS_PAX_URL)
    alignment_matrix = student.compute_alignment_matrix(human, fly, scoring_matrix, False)
    local_result = student.compute_local_alignment(human, fly, scoring_matrix, alignment_matrix)
    local_human = ''.join(local_result[1].split('-'))
    local_fly = ''.join(local_result[2].split('-'))
    human_P = student.compute_alignment_matrix(local_human,consensusPAX,scoring_matrix, True)
    human_result = student.compute_global_alignment(local_human,consensusPAX, scoring_matrix, human_P)
    fly_P = student.compute_alignment_matrix(local_fly,consensusPAX, scoring_matrix, True)
    fly_result = student.compute_global_alignment(local_fly,consensusPAX, scoring_matrix, fly_P)
    total = len(consensusPAX)
    human_count, fly_count =0, 0
    for dummy_i in xrange(total):
        if human_result[1][dummy_i] == human_result[2][dummy_i]:
            human_count += 1
        if fly_result[1][dummy_i] ==fly_result[2][dummy_i]:
            fly_count += 1
    print human_count * 1.0 / total
    print fly_count * 1.0 / total
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """ null distribution generator """

    scoring_distribution = {}
    for dummy_trial in range(num_trials):
        y_index = range(len(seq_y))
        # shuffle the y sequence
        random.shuffle(y_index)
        rand_y = ""

        for index in y_index:
            rand_y += seq_y[index]

        # compute local alignment matrix
        align_matrix = student.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False)

        # compute local alignment score
        score, x_align, y_align = student.compute_local_alignment(seq_x, rand_y, scoring_matrix, align_matrix)

        if scoring_distribution.has_key(score):
            scoring_distribution[score] += 1
        else:
            scoring_distribution[score] = 1
    
    return scoring_distribution
Esempio n. 3
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """
    input : two sequences, scoring matrix and number of trial
    output : dictionary of scoring_distribution
    """
    dict = {}
    test = 1
    seq_list = list(seq_y)
    while test < num_trials:
        # shuffle seq y
        random.shuffle(seq_list)
        rand_y = ''.join(seq_list)

        # local alignment of seq_x and rand_y
        alignment_matrix = student.compute_alignment_matrix(
            seq_x, rand_y, scoring_matrix, False)
        local_alignment = student.compute_local_alignment(
            seq_x, rand_y, scoring_matrix, alignment_matrix)

        # add score to dict
        dict[test] = local_alignment[0]

        # update test time
        test += 1

    return dict
def run_q1():
    seq_x = read_protein(HUMAN_EYELESS_URL)
    seq_y = read_protein(FRUITFLY_EYELESS_URL)
    scoring_matrix = read_scoring_matrix(PAM50_URL)
    alignment_matrix = student.compute_alignment_matrix(
        seq_x, seq_y, scoring_matrix, False)
    return student.compute_local_alignment(seq_x, seq_y, scoring_matrix,
                                           alignment_matrix)
def check_spelling(checked_word, dist, word_list):
    alphabet = set('abcdefghijklmnopqrstuvwxyz')
    scoring_matrix = student.build_scoring_matrix(alphabet, 2, 1, 0)
    result = list()
    for word in word_list:
        align = student.compute_alignment_matrix(checked_word, word, scoring_matrix, True)
        scores = student.compute_global_alignment(checked_word, word, scoring_matrix, align)
        if (len(checked_word) + len(word) - scores[0]) <= dist:
            result.append(word)
    return result
Esempio n. 6
0
def check_spelling(checked_word, dist, word_list):
    answer = []
    for word in word_list:
        alignment_matrix = student.compute_alignment_matrix(
            checked_word, word, scoring_matrix, True)
        result = student.compute_local_alignment(checked_word, word,
                                                 scoring_matrix,
                                                 alignment_matrix)
        if (len(checked_word) + len(word) - result[0]) <= dist:
            answer.append(word)
    return answer
def question2():
    """ determine global alignment of consensusPAX 
    with local human and frfly sequences
    """
    
    # load sequences and scoring matrix
    score_matrix = read_scoring_matrix(PAM50_URL)
    human_seq = "HSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATPEVVSKIAQYKRECPSIFAWEIRDRLLSEGVCTNDNIPSVSSINRVLRNLASEKQQ"
    frfly_seq = "HSGVNQLGGVFVGGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATAEVVSKISQYKRECPSIFAWEIRDRLLQENVCTNDNIPSVSSINRVLRNLAAQKEQQ"
    consensus_pax = read_protein(CONSENSUS_PAX_URL)
    
    # compute human and fruitfly global alignment matrix with consensus pax
    human_align_matrix = student.compute_alignment_matrix(human_seq, consensus_pax, score_matrix, True)
    frfly_align_matrix = student.compute_alignment_matrix(frfly_seq, consensus_pax, score_matrix, True)
    
    # compute human and fruitfly global alignment sequences
    score_human, human_align, consensus_align = student.compute_global_alignment(human_seq, consensus_pax, 
                                                                                 score_matrix, human_align_matrix)
    score_fly, frfly_align, consensus_align_2 = student.compute_global_alignment(frfly_seq, consensus_pax,
                                                                                 score_matrix, frfly_align_matrix)
    
    # compute percentages match for human and fruitfly
    human_count = 0.0
    for index in range(len(human_align)):
        if human_align[index] == consensus_align[index]:
            human_count += 1
            
    frfly_count = 0.0
    for index in range(len(frfly_align)):
        if frfly_align[index] == consensus_align_2[index]:
            frfly_count += 1
            
    print "% Human: " + str(human_count / len(human_align) * 100)
    print "Hmn: " + human_align
    print "PAX: " + consensus_align
    
    print ""
    
    print "% FrFly: " + str(frfly_count / len(frfly_align) * 100)
    print "Fly: " + frfly_align
    print "PAX: " + consensus_align_2
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    scoring_distribution = {}
    rand_y = list(seq_y[:])
    for trial in range(num_trials):
        random.shuffle(rand_y)
        alignment_matrix = student.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False)
        score = student.compute_local_alignment(seq_x, rand_y, scoring_matrix, alignment_matrix)[0]
        if score in scoring_distribution:
            scoring_distribution[score] += 1
        else:
            scoring_distribution[score] = 1
    return scoring_distribution
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    scoring_distribution = dict()
    for num in xrange(num_trials):
        rand_y = ''.join(random.sample(seq_y, len(seq_y)))
        align = student.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False)
        result = student.compute_local_alignment(seq_x, rand_y, scoring_matrix, align)
        score = result[0]
        if score not in scoring_distribution:
            scoring_distribution[score] = 1
        else:
            scoring_distribution[score] += 1
    return scoring_distribution
Esempio n. 10
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    from collections import defaultdict
    scoring_distribution = defaultdict(int)
    for trial in range(num_trials):
        chars = list(seq_y)
        random.shuffle(chars)
        rand_y = ''.join(chars)
        local_alignment_matrix = student.compute_alignment_matrix(
            seq_x, rand_y, scoring_matrix, False)
        score = student.compute_local_alignment(seq_x, rand_y, scoring_matrix,
                                                local_alignment_matrix)[0]
        scoring_distribution[score] += 1
    return scoring_distribution
Esempio n. 11
0
def q3() :
    len_gen = len(seq_x)
    seq_x = []
    seq_y = []
    for _ in range(len_gen) :
        seq_x.append(random.choice("ACBEDGFIHKMLNQPSRTWVYXZ"))
        seq_y.append(random.choice("ACBEDGFIHKMLNQPSRTWVYXZ"))

        alignment_matrix = student.compute_alignment_matrix(seq_x, seq_y, scoring_matrix, False)
        score, human_aligen, fruit_aligen = student.compute_local_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix)

    print score
    print human_aligen.replace('-', '')
    print fruit_aligen

    human_aligen = human_aligen.replace('-', '')
    fruit_aligen = fruit_aligen.replace('-', '')
    pax = read_protein(CONSENSUS_PAX_URL)

    alignment_matrix = student.compute_alignment_matrix(human_aligen, pax, scoring_matrix, False)
    score, h1, h2 = student.compute_global_alignment(human_aligen, pax, scoring_matrix, alignment_matrix)

    print len(h1), len(h2)
    same = 0
    for i in range(len(h1)) :
        if h1[i] == h2[i] :
            same += 1
    print same * 1.0 / len(h1)

    alignment_matrix = student.compute_alignment_matrix(fruit_aligen, pax, scoring_matrix, False)
    score, f1, f2 = student.compute_global_alignment(fruit_aligen, pax, scoring_matrix, alignment_matrix)

    print len(f1), len(f2)
    same = 0
    for i in range(len(f1)) :
        if f1[i] == f2[i] :
            same += 1
    print same * 1.0 / len(f1)
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    scoring_distribution = {}
    rand_y = list(seq_y[:])
    for trial in range(num_trials):
        random.shuffle(rand_y)
        alignment_matrix = student.compute_alignment_matrix(
            seq_x, rand_y, scoring_matrix, False)
        score = student.compute_local_alignment(seq_x, rand_y, scoring_matrix,
                                                alignment_matrix)[0]
        if score in scoring_distribution:
            scoring_distribution[score] += 1
        else:
            scoring_distribution[score] = 1
    return scoring_distribution
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    scoring_distribution = {}
    list_y = list(seq_y)
    for trial in range(num_trials):
        temp_y = list_y
        random.shuffle(temp_y)
        rand_y = ''.join(temp_y)
        alignment_matrix = student.compute_alignment_matrix(
            seq_x, rand_y, scoring_matrix, False)
        score, _, _ = student.compute_local_alignment(seq_x, rand_y,
                                                      scoring_matrix,
                                                      alignment_matrix)
        scoring_distribution[score] = scoring_distribution.get(score, 0) + 1
    return scoring_distribution
def run_q2(origin_seq_x):
    seq_x = origin_seq_x.replace('-', '')
    seq_y = 'GHGGVNQLGGVFVNGRPLPDVVRQRIVELAHQGVRPCDISRQLRVSHGCVSKILGRYYETGSIKPGVIGGSKPKVATPKVVEKIAEYKRQNPTMFAWEIRDRLLAERVCDNDTVPSVSSINRIIR'
    scoring_matrix = read_scoring_matrix(PAM50_URL)
    alignment_matrix = student.compute_alignment_matrix(seq_x, seq_y, scoring_matrix, True)
    score, aglin_x, aglin_y = student.compute_global_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix)
    assert len(aglin_x) == len(aglin_y)
    length = len(aglin_y)
    match = 0
    print (len(seq_x), len(seq_y), len(aglin_x) , len(aglin_y))
    for idx in range(length):
        if aglin_x[idx] == aglin_y[idx]:
            match += 1
    return match * 1.0 / length
Esempio n. 15
0
File: test.py Progetto: oahehc/my
def check_spelling(checked_word, dist, word_list):
    """
    input: word, target distance, and word list
    output: return a subset of word list which the distance between input word < target distance
    """
    result = set()
    x = len(checked_word)
    for item in word_list:
        y = len(item)
        if abs(x - y) <= dist:
            alignment_matrix = student.compute_alignment_matrix(checked_word, item, scoring_matrix, True)
            score = max(map(max, alignment_matrix))
            if (x + y - score) <= dist:
                result = result.union(set([item]))
    return result
Esempio n. 16
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials) :
    score_distribution = {}
    seq_y = list(seq_y)
    for count in range(num_trials) :
        print count
        random.shuffle(seq_y)
        alignment_matrix = student.compute_alignment_matrix(seq_x, seq_y, scoring_matrix, False)
        score, _, _ = student.compute_local_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix)

        if score not in score_distribution :
            score_distribution[score] = 0

        score_distribution[score] += 1

    return score_distribution
def run_q2(origin_seq_x):
    seq_x = origin_seq_x.replace('-', '')
    seq_y = 'GHGGVNQLGGVFVNGRPLPDVVRQRIVELAHQGVRPCDISRQLRVSHGCVSKILGRYYETGSIKPGVIGGSKPKVATPKVVEKIAEYKRQNPTMFAWEIRDRLLAERVCDNDTVPSVSSINRIIR'
    scoring_matrix = read_scoring_matrix(PAM50_URL)
    alignment_matrix = student.compute_alignment_matrix(
        seq_x, seq_y, scoring_matrix, True)
    score, aglin_x, aglin_y = student.compute_global_alignment(
        seq_x, seq_y, scoring_matrix, alignment_matrix)
    assert len(aglin_x) == len(aglin_y)
    length = len(aglin_y)
    match = 0
    print(len(seq_x), len(seq_y), len(aglin_x), len(aglin_y))
    for idx in range(length):
        if aglin_x[idx] == aglin_y[idx]:
            match += 1
    return match * 1.0 / length
Esempio n. 18
0
def check_spelling(checked_word, dist, word_list):
    """
    input: word, target distance, and word list
    output: return a subset of word list which the distance between input word < target distance
    """
    result = set()
    x = len(checked_word)
    for item in word_list:
        y = len(item)
        if abs(x - y) <= dist:
            alignment_matrix = student.compute_alignment_matrix(
                checked_word, item, scoring_matrix, True)
            score = max(map(max, alignment_matrix))
            if (x + y - score) <= dist:
                result = result.union(set([item]))
    return result
def question7(seq_x, seq_y):
    """ determine scoring matrix of edit distance algorithm """
    
    diag_score = 2
    off_diag_score = 1
    dash_score = 0
    alphabet = "abcdefghijklmnopqrstuvwxyz"
    score_matrix = student.build_scoring_matrix(alphabet, diag_score, off_diag_score, dash_score)
    
    align_matrix = student.compute_alignment_matrix(seq_x, seq_y, score_matrix, True)
    score, align_x, align_y = student.compute_global_alignment(seq_x, seq_y, score_matrix, align_matrix)
    
    edit_distance = len(seq_x) + len(seq_y) - score
    
    print "Edit distance: " + str(edit_distance)
    print align_x
    print align_y
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """
    return a dictionary scoring_distribution that represents an un-normalized distribution
    """
    distribution = {}
    for trial in range(num_trials):
        start = time.time()
        rand_y = list(seq_y)
        random.shuffle(rand_y)
        rand_y = ''.join(rand_y)
        alignment_matrix = alg_project4_solution.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False)
        alignment = alg_project4_solution.compute_local_alignment(seq_x, rand_y, scoring_matrix, alignment_matrix)
        score = alignment[0]
        if score in distribution:
            distribution[score] += 1
        else:
            distribution[score] = 1
    return distribution
Esempio n. 21
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trial):

    scoring_distribution = dict()

    for dummy_idx in range(num_trial):
        rand_y = list(seq_y)
        random.shuffle(rand_y)
        rand_y = ''.join(rand_y)
        alignment_matrix = student.compute_alignment_matrix(
            seq_x, rand_y, scoring_matrix, False)
        result = student.compute_local_alignment(seq_x, rand_y, scoring_matrix,
                                                 alignment_matrix)
        score = result[0]
        if scoring_distribution.has_key(score):
            scoring_distribution[score] += 1
        else:
            scoring_distribution[score] = 1
    return scoring_distribution
Esempio n. 22
0
def check_spelling(checked_word, dist, word_list):
    diag_score = 2
    off_diag_score = 1
    dash_score = 0
    chars = 'abcdefghijklmnopqrstuvwxyz'
    alphabet = set([char for char in chars])
    len_checkedword = len(checked_word)
    scoring_matrix = student.build_scoring_matrix(alphabet, diag_score,
                                                  off_diag_score, dash_score)
    similar_word_list = []
    for word in word_list:
        global_alignment_matrix = student.compute_alignment_matrix(
            checked_word, word, scoring_matrix, True)
        global_alignment_score = student.compute_global_alignment(
            checked_word, word, scoring_matrix, global_alignment_matrix)[0]
        edit_dist = len_checkedword + len(word) - global_alignment_score
        if edit_dist <= dist:
            similar_word_list.append(word)
    return similar_word_list
Esempio n. 23
0
def check_spelling(checked_word, dist, word_list):
    """
    input: word, target distance, and word list
    output: return a subset of word list which the distance between input word < target distance
    """
    result = set()
    for item in word_list:
        alignment_matrix = student.compute_alignment_matrix(
            checked_word, item, scoring_matrix, True)
        global_alignment = student.compute_global_alignment(
            checked_word, item, scoring_matrix, alignment_matrix)
        """
        print word_list[index]
        print alignment_matrix
        print global_alignment
        """
        if (len(checked_word) + len(item) - global_alignment[0]) <= dist:
            result = result.union(set([item]))
    return result
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """
    return a dictionary scoring_distribution that represents an un-normalized distribution
    """
    distribution = {}
    for trial in range(num_trials):
        start = time.time()
        rand_y = list(seq_y)
        random.shuffle(rand_y)
        rand_y = ''.join(rand_y)
        alignment_matrix = alg_project4_solution.compute_alignment_matrix(
            seq_x, rand_y, scoring_matrix, False)
        alignment = alg_project4_solution.compute_local_alignment(
            seq_x, rand_y, scoring_matrix, alignment_matrix)
        score = alignment[0]
        if score in distribution:
            distribution[score] += 1
        else:
            distribution[score] = 1
    return distribution
def question1():
    """ determine local alignment of human and fruitfly eyeless protein """
    
    # load sequences and scoring matrix
    score_matrix = read_scoring_matrix(PAM50_URL)
    human_eyeless = read_protein(HUMAN_EYELESS_URL)
    fruitfly_eyeless = read_protein(FRUITFLY_EYELESS_URL)
    
    # compute local alignment matrix
    align_matrix = student.compute_alignment_matrix(human_eyeless, fruitfly_eyeless, 
                                                    score_matrix, False)
    
    # compute local alignment score and sequences
    score, human_align, fruitfly_align = student.compute_local_alignment(human_eyeless, fruitfly_eyeless,
                                                                         score_matrix, align_matrix)
    
    print "Score: " + str(score)
    print "Human: " + human_align
    print "FrFly: " + fruitfly_align
    
    return
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """ str, str, dict of dict, int -> dict
    Takes two sequences, a scoring matrix, and a number of trials, and returns
    a dictionary of unnormalized
    """
    scoring_distribution = {}
    for trial in range(num_trials):
        list_y = list(seq_y)
        random.shuffle(list_y)
        rand_y = ''.join(list_y)
        alignment_matrix = student.compute_alignment_matrix(
            human, rand_y, scoring_matrix, False)
        alignment = student.compute_local_alignment(human, rand_y,
                                                    scoring_matrix,
                                                    alignment_matrix)
        score = alignment[0]
        if score in scoring_distribution:
            scoring_distribution[score] += 1
        else:
            scoring_distribution[score] = 1
    return scoring_distribution
def check_spelling(checked_word, dist, word_list):
    """ helper function to determine all words edit distance away """
    
    diag_score = 2
    off_diag_score = 1
    dash_score = 0
    alphabet = "abcdefghijklmnopqrstuvwxyz"
    score_matrix = student.build_scoring_matrix(alphabet, diag_score, off_diag_score, dash_score)
    
    words = []
    
    for word in word_list:
        align_matrix = student.compute_alignment_matrix(checked_word, word, score_matrix, True)
        score, align_x, align_y = student.compute_global_alignment(checked_word, word,
                                                                   score_matrix, align_matrix)
    
        edit_distance = len(checked_word) + len(word) - score
        
        if edit_distance <= dist:
            words.append(word)
    
    return words
Esempio n. 28
0
def check_spelling(checked_word, dist, word_list):
    """
    input:
        iterates through word_list and returns the set of all words
        that are within edit distance dist of the string checked_word
    output:
        the set of all words that are within edit distance dist of
        the string checked_word
    """
    
    result_set = set([])
    diag_score = 2
    off_diag_score = 1
    dash_score = 0
    alphabet = set('abcdefghijklmnopqrstuvwxyz')
    matrix_M = student.build_scoring_matrix(alphabet, diag_score, off_diag_score, dash_score)
    for word in word_list:
        matrix_S = student.compute_alignment_matrix(checked_word, word, matrix_M, True)
        global_align_word = student.compute_global_alignment(checked_word, word, matrix_M, matrix_S)
        if len(checked_word) + len(word) - global_align_word[0] <= dist:
            result_set.add(word)
    
    return result_set
Esempio n. 29
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """
    input:
        two sequences, scoring matrix, number of trials.
        A trial is defined as:
        1. Generate a random permutation rand_y of the sequence seq_y using random.shuffle().
        2. Compute the maximum value score for the local alignment of seq_x and rand_y using the score matrix scoring_matrix.
        3. Increment the entry score in the dictionary scoring_distribution by one.
    output:
        a dictionary scoring_distribution that represents an un-normalized distribution
    """
    
    scoring_distribution = {}
    for i in range(num_trials):
        rand_y = random.sample(seq_y, len(seq_y))
        local_S = student.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False)
        local_alignment = student.compute_local_alignment(seq_x, rand_y, scoring_matrix, local_S)
        if local_alignment[0] in scoring_distribution:
            scoring_distribution[local_alignment[0]] += 1
        else:
            scoring_distribution[local_alignment[0]] = 1
    
    return scoring_distribution
 def score(x, y):
     alignment_matrix = student.compute_alignment_matrix(x, y, scoring_matrix, True)
     return student.compute_global_alignment(x, y, scoring_matrix, alignment_matrix)[0]
 def score(x, y):
     alignment_matrix = student.compute_alignment_matrix(
         x, y, scoring_matrix, True)
     return student.compute_global_alignment(x, y, scoring_matrix,
                                             alignment_matrix)[0]
def run_q1():
    seq_x = read_protein(HUMAN_EYELESS_URL)
    seq_y = read_protein(FRUITFLY_EYELESS_URL)
    scoring_matrix = read_scoring_matrix(PAM50_URL)
    alignment_matrix = student.compute_alignment_matrix(seq_x, seq_y, scoring_matrix, False)
    return student.compute_local_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix)
Esempio n. 33
0
    words = word_file.read()

    # template lines and solution lines list of line string
    word_list = words.split('\n')
    print "Loaded a dictionary with", len(word_list), "words"
    return word_list


# Q1
HumanEyelessProtein = read_protein(HUMAN_EYELESS_URL)
FruitflyEyelessProtein = read_protein(FRUITFLY_EYELESS_URL)

PAM50 = read_scoring_matrix(PAM50_URL)

alignment_matrix_Q1 = student.compute_alignment_matrix(HumanEyelessProtein,
                                                       FruitflyEyelessProtein,
                                                       PAM50, False)

result_Q1 = student.compute_local_alignment(HumanEyelessProtein,
                                            FruitflyEyelessProtein, PAM50,
                                            alignment_matrix_Q1)

# Q2
TempHumanSeq = result_Q1[1]
FruitflySeq = result_Q1[2]
HumanSeq = TempHumanSeq[:len(TempHumanSeq) -
                        3] + TempHumanSeq[len(TempHumanSeq) - 2:]

ConsensusPAXDomain = read_protein(CONSENSUS_PAX_URL)

alignment_matrix_Q2_Human = student.compute_alignment_matrix(
Esempio n. 34
0
    # load assets
    word_file = open(filename)
    
    # read in files as string
    words = word_file.read()
    
    # template lines and solution lines list of line string
    word_list = words.split('\n')
    print "Loaded a dictionary with", len(word_list), "words"
    return word_list

scoring_matrix = read_scoring_matrix(PAM50_URL)
seq_x = read_protein(HUMAN_EYELESS_URL)
seq_y = read_protein(FRUITFLY_EYELESS_URL)

alignment_matrix = student.compute_alignment_matrix(seq_x, seq_y, scoring_matrix, False)
score, human_aligen, fruit_aligen = student.compute_local_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix)
print score
exit()

def q3() :
    len_gen = len(seq_x)
    seq_x = []
    seq_y = []
    for _ in range(len_gen) :
        seq_x.append(random.choice("ACBEDGFIHKMLNQPSRTWVYXZ"))
        seq_y.append(random.choice("ACBEDGFIHKMLNQPSRTWVYXZ"))

        alignment_matrix = student.compute_alignment_matrix(seq_x, seq_y, scoring_matrix, False)
        score, human_aligen, fruit_aligen = student.compute_local_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix)
Esempio n. 35
0
def build_scoring_matrix(alphabet) :
    """
    Make a matrix with diag_score, off_diag_score, dash_score
    """

    matrix = dict()
    matrix['-'] = dict()
    matrix['-']['-'] = 0

    for rows in alphabet :
        matrix[rows] = dict()
        matrix[rows]['-'] = -6
        matrix['-'][rows] = -6
        for cols in alphabet :
            if rows == cols :
                matrix[rows][cols] = 10
            else :
                matrix[rows][cols] = 4

    return matrix

m = build_scoring_matrix(chset)
s = student.compute_alignment_matrix("AA", "TAAT", m, False)
print s[0][2]
print s[2][0]
print s[2][2]
print s
ali = student.compute_local_alignment("AA", "TAAT", m, s)
print ali
    # read in files as string
    words = word_file.read()

    # template lines and solution lines list of line string
    word_list = words.split('\n')
    print "Loaded a dictionary with", len(word_list), "words"
    return word_list


# Question 1 ##################################################################

HUMAN_EYELESS = read_protein(HUMAN_EYELESS_URL)
FRUITFLY_EYELESS = read_protein(FRUITFLY_EYELESS_URL)
SCORING_MATRIX = read_scoring_matrix(PAM50_URL)
ALIGNMENT_MATRIX = student.compute_alignment_matrix(HUMAN_EYELESS, \
                                                    FRUITFLY_EYELESS,\
                                                    SCORING_MATRIX, False)

student.compute_local_alignment(HUMAN_EYELESS, FRUITFLY_EYELESS,\
                                SCORING_MATRIX, ALIGNMENT_MATRIX)

# Question 2 ##################################################################
PAX = read_protein(CONSENSUS_PAX_URL)

loc_score, loc_human, loc_fly = student.compute_local_alignment(HUMAN_EYELESS,\
                                                               FRUITFLY_EYELESS,\
                                                               SCORING_MATRIX,\
                                                               ALIGNMENT_MATRIX)

for align in (loc_human, loc_fly):
    align = align.replace('-', '')
from matplotlib import pyplot

PAM50_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_PAM50.txt"
HUMAN_EYELESS_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_HumanEyelessProtein.txt"
FRUITFLY_EYELESS_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_FruitflyEyelessProtein.txt"
CONSENSUS_PAX_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_ConsensusPAXDomain.txt"
WORD_LIST_URL = "http://storage.googleapis.com/codeskulptor-assets/assets_scrabble_words3.txt"


# question 1
hep = alg_alignment.read_protein(HUMAN_EYELESS_URL)
fep = alg_alignment.read_protein(FRUITFLY_EYELESS_URL)

scoring_matrix = alg_alignment.read_scoring_matrix(PAM50_URL)
hep_fep_local_alignment = alg_project4_solution.compute_local_alignment(hep, fep, scoring_matrix,
                                                                        alg_project4_solution.compute_alignment_matrix(
                                                                            hep, fep, scoring_matrix, False))
human_eyeless_fruitfly_local_alignment_score = hep_fep_local_alignment[0]
# question 1 answer
print "local alignment for human and fruitfly eyeless genome: " + str(hep_fep_local_alignment)

# question 2
cpd = alg_alignment.read_protein(CONSENSUS_PAX_URL)

hep_local_alignment = hep_fep_local_alignment[1]
fep_local_alignment = hep_fep_local_alignment[2]

hep_local_alignment_no_dashes = hep_local_alignment.replace('-', '')

hep_no_dashes_cpd_global_alignment = alg_project4_solution.compute_global_alignment(hep_local_alignment_no_dashes, cpd, scoring_matrix, alg_project4_solution.compute_alignment_matrix(
                                                   hep_local_alignment_no_dashes, cpd, scoring_matrix, True))
fep_local_alignment_no_dashes = fep_local_alignment.replace('-', '')
from matplotlib import pyplot

PAM50_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_PAM50.txt"
HUMAN_EYELESS_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_HumanEyelessProtein.txt"
FRUITFLY_EYELESS_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_FruitflyEyelessProtein.txt"
CONSENSUS_PAX_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_ConsensusPAXDomain.txt"
WORD_LIST_URL = "http://storage.googleapis.com/codeskulptor-assets/assets_scrabble_words3.txt"

# question 1
hep = alg_alignment.read_protein(HUMAN_EYELESS_URL)
fep = alg_alignment.read_protein(FRUITFLY_EYELESS_URL)

scoring_matrix = alg_alignment.read_scoring_matrix(PAM50_URL)
hep_fep_local_alignment = alg_project4_solution.compute_local_alignment(
    hep, fep, scoring_matrix,
    alg_project4_solution.compute_alignment_matrix(hep, fep, scoring_matrix,
                                                   False))
human_eyeless_fruitfly_local_alignment_score = hep_fep_local_alignment[0]
# question 1 answer
print "local alignment for human and fruitfly eyeless genome: " + str(
    hep_fep_local_alignment)

# question 2
cpd = alg_alignment.read_protein(CONSENSUS_PAX_URL)

hep_local_alignment = hep_fep_local_alignment[1]
fep_local_alignment = hep_fep_local_alignment[2]

hep_local_alignment_no_dashes = hep_local_alignment.replace('-', '')

hep_no_dashes_cpd_global_alignment = alg_project4_solution.compute_global_alignment(
    hep_local_alignment_no_dashes, cpd, scoring_matrix,
def question1():
    scoring_matrix =read_scoring_matrix(PAM50_URL)
    human = read_protein(HUMAN_EYELESS_URL)
    fly = read_protein(FRUITFLY_EYELESS_URL)
    alignment_matrix = student.compute_alignment_matrix(human, fly, scoring_matrix, False)
    print student.compute_local_alignment(human, fly, scoring_matrix, alignment_matrix)
Esempio n. 40
0
def find_local_align():
    score_matrix = read_scoring_matrix(PAM50_URL)
    seq_human = read_protein(HUMAN_EYELESS_URL)
    seq_fly = read_protein(FRUITFLY_EYELESS_URL)
    local_alignment_matrix = student.compute_alignment_matrix(
        seq_human, seq_fly, score_matrix, False)
    score, seq_loc_human, seq_loc_fly = student.compute_local_alignment(
        seq_human, seq_fly, score_matrix, local_alignment_matrix)
    length = len(seq_loc_fly)
    agree = 0
    for idx in range(length):
        if seq_loc_fly[idx] == seq_loc_human[idx]:
            agree += 1
    print 'Question 1:\n'
    print 'score:', score, '\nhuman:', seq_loc_human, '\nfly:  ', seq_loc_fly
    print 'Agree percentage: %.2f' % (100 * float(agree) / length)
    """
    Question 1:
    local alignment score: 875 
    human: HSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATPEVVSKIAQYKRECPSIFAWEIRDRLLSEGVCTNDNIPSVSSINRVLRNLASEK-QQ 
    fly:   HSGVNQLGGVFVGGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATAEVVSKISQYKRECPSIFAWEIRDRLLQENVCTNDNIPSVSSINRVLRNLAAQKEQQ
    Agree percentage: 93.98%
    """

    ### Question 2 ###
    print '\nQuestion 2:\n'
    seq_loc_human = seq_loc_human.replace('-', '')
    seq_loc_fly = seq_loc_fly.replace('-', '')
    seq_pax = read_protein(CONSENSUS_PAX_URL)  #Q2
    # seq_pax = 'ACBEDGFIHKMLNQPSRTWVYXZ' #Q3
    for idx in range(2):
        if idx == 0:
            seq = seq_loc_human
            type = 'human'
        else:
            seq = seq_loc_fly
            type = 'fly'
        global_alignment_matrix = student.compute_alignment_matrix(
            seq, seq_pax, score_matrix, True)
        score, x_glbl, pax_glbl = student.compute_global_alignment(
            seq, seq_pax, score_matrix, global_alignment_matrix)
        length = len(x_glbl)
        agree = 0
        for idx in range(length):
            if x_glbl[idx] == pax_glbl[idx]:
                agree += 1

        print 'score:', score, '\n' + type, x_glbl, '\nPAX:  ', pax_glbl
        print type + ' agree percentage: %.2f' % (100 * float(agree) / length)
        """
        Question 2:

        human score: 613 
        human: -HSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATPEVVSKIAQYKRECPSIFAWEIRDRLLSEGVCTNDNIPSVSSINRVLRNLASEKQQ 
        PAX:   GHGGVNQLGGVFVNGRPLPDVVRQRIVELAHQGVRPCDISRQLRVSHGCVSKILGRYYETGSIKPGVIGGSKPKVATPKVVEKIAEYKRQNPTMFAWEIRDRLLAERVCDNDTVPSVSSINRIIR--------
        human agree percentage: 72.93
        
        flyscore: 586 
        fly:  -HSGVNQLGGVFVGGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATAEVVSKISQYKRECPSIFAWEIRDRLLQENVCTNDNIPSVSSINRVLRNLAAQKEQQ 
        PAX:  GHGGVNQLGGVFVNGRPLPDVVRQRIVELAHQGVRPCDISRQLRVSHGCVSKILGRYYETGSIKPGVIGGSKPKVATPKVVEKIAEYKRQNPTMFAWEIRDRLLAERVCDNDTVPSVSSINRIIR---------
        fly agree percentage: 70.15
        """
        """