def find_distance(sequence_dictionary): ## Takes dictionary generated by readfasta and calculates distance matrix### popped_list = [] p_distance = 0 evol_distance = 0 distance_matrix = {} copied_dict = sequence_dictionary.copy( ) ###Making a copy of sequence_dictionary## for key in copied_dict: if key not in popped_list: seq1 = copied_dict[key] popped_list.append(key) for i in sequence_dictionary: seq2 = sequence_dictionary[i] s = alignment_util.readScoringMatrix( "/home/kumara3/CSE620K/proj6/Blosum62.txt") output_key_i = alignment.SmithWatermanAffine( seq1, seq2, s, 7, 1 ) ###SmithWatermanAffine for calculating the alignment between pair of sequences### output = [it for it in output_key_i] ## alignment stored in a list## count_m = 0 ## count for number of matches in sequences## count_mis = 0 ## count for number of mismatches in sequence## X = [x for x in output[1:2]] ## sequence 1 from alignment Y = [y for y in output[2:3]] ## sequence 2 from alignment X_string = ''.join(X) Y_string = ''.join(Y) size = max(len(X_string), len(Y_string)) for k in range(0, size): if X_string[k] == Y_string[k]: count_m += 1 elif X_string[k] != Y_string[k]: count_mis += 1 else: if X_string[k] == '-' or Y_string[k] == '-': print "IGNORE" p_distance = count_mis / float( count_m + count_mis) ## calculating the p distance evol_distance = -0.75 * log( float(1 - 4 * p_distance / 3)) ## calculating distance for evolutionary matrix distance_matrix.setdefault(key, {}).setdefault( i, evol_distance) ## creating and filing the distance matrix if key == i: distance_matrix[key][i] = 0 return distance_matrix
def find_distance(sequence_dictionary): ## Takes dictionary generated by readfasta and calculates distance matrix### popped_list = [] p_distance = 0 evol_distance = 0 distance_matrix= {} copied_dict = sequence_dictionary.copy() ###Making a copy of sequence_dictionary## for key in copied_dict: if key not in popped_list: seq1 = copied_dict[key] popped_list.append(key) for i in sequence_dictionary: seq2 = sequence_dictionary[i] s = alignment_util.readScoringMatrix("/home/kumara3/CSE620K/proj6/Blosum62.txt") output_key_i = alignment.SmithWatermanAffine(seq1,seq2,s,7,1) ###SmithWatermanAffine for calculating the alignment between pair of sequences### output = [it for it in output_key_i] ## alignment stored in a list## count_m = 0 ## count for number of matches in sequences## count_mis= 0 ## count for number of mismatches in sequence## X = [x for x in output[1:2]] ## sequence 1 from alignment Y = [y for y in output[2:3]] ## sequence 2 from alignment X_string = ''.join(X) Y_string = ''.join(Y) size = max(len(X_string), len(Y_string)) for k in range(0,size): if X_string[k] == Y_string[k]: count_m += 1 elif X_string[k] != Y_string[k]: count_mis += 1 else: if X_string[k] == '-' or Y_string[k] == '-': print "IGNORE" p_distance = count_mis/float(count_m+count_mis) ## calculating the p distance evol_distance = -0.75*log(float(1-4*p_distance/3)) ## calculating distance for evolutionary matrix distance_matrix.setdefault(key,{}).setdefault(i,evol_distance) ## creating and filing the distance matrix if key == i: distance_matrix[key][i] = 0 return distance_matrix
import argparse import alignment import alignment_sol import alignment_util import random import sys labels = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" D1 = alignment_util.readScoringMatrix("DNA1.txt") D2 = alignment_util.readScoringMatrix("DNA2.txt") D3 = alignment_util.genScoringMatrix(10, -5) D4 = alignment_util.genScoringMatrix(5,0) B = alignment_util.readScoringMatrix("Blosum62.txt") ############################# def test_SW(seq1, seq2, S, g): s, a1, a2 = alignment.SmithWaterman(seq1, seq2, S, g) s_sol, a1_sol, a2_sol = alignment_sol.SmithWaterman(seq1, seq2, S, g) score = 0 # First: test that the function has returned has an optimal alignment if alignment_util.scoreAlignment(a1, a2, S, g) == s_sol: score += 70 # Second: test that the function has returned an optimal score if s == s_sol: score += 20 # Third: Test that the function has returned the correct score for the alignment
def setUp(self): self.S1 = alignment_util.readScoringMatrix("DNA1.txt") self.S2 = alignment_util.readScoringMatrix("DNA2.txt") self.S3 = alignment_util.genScoringMatrix(10,-10) self.S4 = alignment_util.readScoringMatrix("Blosum62.txt")
def setUp(self): self.S1 = alignment_util.readScoringMatrix("DNA1.txt") self.S2 = alignment_util.readScoringMatrix("DNA2.txt") self.S3 = alignment_util.genScoringMatrix(10, -10) self.S4 = alignment_util.readScoringMatrix("Blosum62.txt")
import argparse import alignment import alignment_sol import alignment_util import random import sys labels = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" D1 = alignment_util.readScoringMatrix("DNA1.txt") D2 = alignment_util.readScoringMatrix("DNA2.txt") D3 = alignment_util.genScoringMatrix(10, -5) D4 = alignment_util.genScoringMatrix(5, 0) B = alignment_util.readScoringMatrix("Blosum62.txt") ############################# def test_SW(seq1, seq2, S, g): s, a1, a2 = alignment.SmithWaterman(seq1, seq2, S, g) s_sol, a1_sol, a2_sol = alignment_sol.SmithWaterman(seq1, seq2, S, g) score = 0 # First: test that the function has returned has an optimal alignment if alignment_util.scoreAlignment(a1, a2, S, g) == s_sol: score += 70 # Second: test that the function has returned an optimal score if s == s_sol: score += 20 # Third: Test that the function has returned the correct score for the alignment