예제 #1
0
def find_distance(sequence_dictionary):
    ## Takes dictionary generated by readfasta and calculates distance matrix###
    popped_list = []
    p_distance = 0
    evol_distance = 0
    distance_matrix = {}

    copied_dict = sequence_dictionary.copy(
    )  ###Making a copy of sequence_dictionary##
    for key in copied_dict:

        if key not in popped_list:
            seq1 = copied_dict[key]
            popped_list.append(key)

        for i in sequence_dictionary:
            seq2 = sequence_dictionary[i]
            s = alignment_util.readScoringMatrix(
                "/home/kumara3/CSE620K/proj6/Blosum62.txt")
            output_key_i = alignment.SmithWatermanAffine(
                seq1, seq2, s, 7, 1
            )  ###SmithWatermanAffine for calculating the alignment between pair of sequences###
            output = [it
                      for it in output_key_i]  ## alignment stored in a list##
            count_m = 0  ## count for number of matches in sequences##
            count_mis = 0  ## count for number of mismatches in sequence##

            X = [x for x in output[1:2]]  ## sequence 1 from alignment
            Y = [y for y in output[2:3]]  ## sequence 2 from alignment
            X_string = ''.join(X)
            Y_string = ''.join(Y)
            size = max(len(X_string), len(Y_string))

            for k in range(0, size):
                if X_string[k] == Y_string[k]:
                    count_m += 1
                elif X_string[k] != Y_string[k]:
                    count_mis += 1
                else:
                    if X_string[k] == '-' or Y_string[k] == '-':
                        print "IGNORE"

            p_distance = count_mis / float(
                count_m + count_mis)  ## calculating the p distance
            evol_distance = -0.75 * log(
                float(1 - 4 * p_distance /
                      3))  ## calculating distance for evolutionary matrix
            distance_matrix.setdefault(key, {}).setdefault(
                i, evol_distance)  ## creating and filing the distance matrix
            if key == i:
                distance_matrix[key][i] = 0
    return distance_matrix
예제 #2
0
def find_distance(sequence_dictionary):
    ## Takes dictionary generated by readfasta and calculates distance matrix###
    popped_list = []
    p_distance = 0
    evol_distance = 0
    distance_matrix= {}
    
    copied_dict = sequence_dictionary.copy()                                 ###Making a copy of sequence_dictionary##
    for key in copied_dict:
        
        
        if key not in popped_list:
            seq1 = copied_dict[key]
            popped_list.append(key)   
        
        for i in sequence_dictionary:
            seq2 = sequence_dictionary[i]
            s = alignment_util.readScoringMatrix("/home/kumara3/CSE620K/proj6/Blosum62.txt")
            output_key_i = alignment.SmithWatermanAffine(seq1,seq2,s,7,1)  ###SmithWatermanAffine for calculating the alignment between pair of sequences###
            output = [it for it in output_key_i]                            ## alignment stored in a list##
            count_m = 0                                                     ## count for number of matches in sequences##
            count_mis= 0                                                    ## count for number of mismatches in sequence##
           
            X = [x for x in output[1:2]]                                    ## sequence 1 from alignment
            Y = [y for y in output[2:3]]                                    ## sequence 2 from alignment
            X_string = ''.join(X)
            Y_string = ''.join(Y)
            size = max(len(X_string), len(Y_string))                              
                    
            
            for k in range(0,size):
                if X_string[k] == Y_string[k]:
                    count_m += 1
                elif X_string[k] != Y_string[k]:
                    count_mis += 1
                else:
                    if X_string[k] == '-' or Y_string[k] == '-':
                        print "IGNORE"
            
            p_distance = count_mis/float(count_m+count_mis)                 ## calculating the p distance
            evol_distance = -0.75*log(float(1-4*p_distance/3))              ## calculating distance for evolutionary matrix
            distance_matrix.setdefault(key,{}).setdefault(i,evol_distance)  ## creating and filing the distance matrix 
            if key == i:
                distance_matrix[key][i] = 0 
    return distance_matrix                                                
import argparse
import alignment
import alignment_sol
import alignment_util
import random
import sys


labels = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

D1 = alignment_util.readScoringMatrix("DNA1.txt")
D2 = alignment_util.readScoringMatrix("DNA2.txt")
D3 = alignment_util.genScoringMatrix(10, -5)
D4 = alignment_util.genScoringMatrix(5,0)
B  = alignment_util.readScoringMatrix("Blosum62.txt")

#############################
def test_SW(seq1, seq2, S, g):
    s, a1, a2 = alignment.SmithWaterman(seq1, seq2, S, g)
    s_sol, a1_sol, a2_sol = alignment_sol.SmithWaterman(seq1, seq2, S, g)
    score = 0
    
    # First: test that the function has returned has an optimal alignment
    if alignment_util.scoreAlignment(a1, a2, S, g) == s_sol:
        score += 70

    # Second: test that the function has returned an optimal score
    if s == s_sol:
        score += 20

    # Third: Test that the function has returned the correct score for the alignment
 def setUp(self):
     self.S1 = alignment_util.readScoringMatrix("DNA1.txt")
     self.S2 = alignment_util.readScoringMatrix("DNA2.txt")
     self.S3 = alignment_util.genScoringMatrix(10,-10)
     self.S4 = alignment_util.readScoringMatrix("Blosum62.txt")
예제 #5
0
 def setUp(self):
     self.S1 = alignment_util.readScoringMatrix("DNA1.txt")
     self.S2 = alignment_util.readScoringMatrix("DNA2.txt")
     self.S3 = alignment_util.genScoringMatrix(10, -10)
     self.S4 = alignment_util.readScoringMatrix("Blosum62.txt")
import argparse
import alignment
import alignment_sol
import alignment_util
import random
import sys

labels = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

D1 = alignment_util.readScoringMatrix("DNA1.txt")
D2 = alignment_util.readScoringMatrix("DNA2.txt")
D3 = alignment_util.genScoringMatrix(10, -5)
D4 = alignment_util.genScoringMatrix(5, 0)
B = alignment_util.readScoringMatrix("Blosum62.txt")


#############################
def test_SW(seq1, seq2, S, g):
    s, a1, a2 = alignment.SmithWaterman(seq1, seq2, S, g)
    s_sol, a1_sol, a2_sol = alignment_sol.SmithWaterman(seq1, seq2, S, g)
    score = 0

    # First: test that the function has returned has an optimal alignment
    if alignment_util.scoreAlignment(a1, a2, S, g) == s_sol:
        score += 70

    # Second: test that the function has returned an optimal score
    if s == s_sol:
        score += 20

    # Third: Test that the function has returned the correct score for the alignment