def find_distance(sequence_dictionary): ## Takes dictionary generated by readfasta and calculates distance matrix### popped_list = [] p_distance = 0 evol_distance = 0 distance_matrix = {} copied_dict = sequence_dictionary.copy( ) ###Making a copy of sequence_dictionary## for key in copied_dict: if key not in popped_list: seq1 = copied_dict[key] popped_list.append(key) for i in sequence_dictionary: seq2 = sequence_dictionary[i] s = alignment_util.readScoringMatrix( "/home/kumara3/CSE620K/proj6/Blosum62.txt") output_key_i = alignment.SmithWatermanAffine( seq1, seq2, s, 7, 1 ) ###SmithWatermanAffine for calculating the alignment between pair of sequences### output = [it for it in output_key_i] ## alignment stored in a list## count_m = 0 ## count for number of matches in sequences## count_mis = 0 ## count for number of mismatches in sequence## X = [x for x in output[1:2]] ## sequence 1 from alignment Y = [y for y in output[2:3]] ## sequence 2 from alignment X_string = ''.join(X) Y_string = ''.join(Y) size = max(len(X_string), len(Y_string)) for k in range(0, size): if X_string[k] == Y_string[k]: count_m += 1 elif X_string[k] != Y_string[k]: count_mis += 1 else: if X_string[k] == '-' or Y_string[k] == '-': print "IGNORE" p_distance = count_mis / float( count_m + count_mis) ## calculating the p distance evol_distance = -0.75 * log( float(1 - 4 * p_distance / 3)) ## calculating distance for evolutionary matrix distance_matrix.setdefault(key, {}).setdefault( i, evol_distance) ## creating and filing the distance matrix if key == i: distance_matrix[key][i] = 0 return distance_matrix
def test1(self): s1 = "AATTA" s2 = "AAA" S = self.S3 o = 4 c = 1 correct_score = 24 s, a1, a2 = alignment.SmithWatermanAffine(s1, s2, S, o, c) self.assertEqual( alignment_util.scoreAlignmentAffine(a1, a2, S, o, c), s, "Returned alignment score does not match actual alignment score") self.assertEqual(alignment_util.scoreAlignmentAffine(a1, a2, S, o, c), correct_score, "Score of returned alignment is not optimal") s, a1, a2 = alignment.SmithWatermanAffine(s2, s1, S, o, c) self.assertEqual( alignment_util.scoreAlignmentAffine(a1, a2, S, o, c), s, "Returned alignment score does not match actual alignment score") self.assertEqual(alignment_util.scoreAlignmentAffine(a1, a2, S, o, c), correct_score, "Score of returned alignment is not optimal")
def mcalliTest(self): s1 = 'CCCCCCCCCAAAAAAAAAAAAAAAAAAATAAACCCCCCC' s2 = 'CCCCCCCCCTTTTTTTTTTTTTTTTTTTTTTTCCCCCCC' S = {x: {y: 100 * self.S2[x][y] for y in "ACGT"} for x in "ACGT"} o = 500 c = 1 correct_score = 5462 s, a1, a2 = alignment.SmithWatermanAffine(s1, s2, S, o, c) self.assertEqual( alignment_util.scoreAlignmentAffine(a1, a2, S, o, c), s, "Returned alignment score does not match actual alignment") self.assertEqual(alignment_util.scoreAlignmentAffine(a1, a2, S, o, c), correct_score, "Score of returned alignment is not optimal") t, b1, b2 = alignment.SmithWatermanAffine(s2, s1, S, o, c) self.assertEqual( alignment_util.scoreAlignmentAffine(b1, b2, S, o, c), t, "Returned alignment score does not match actual alignment") self.assertEqual(alignment_util.scoreAlignmentAffine(b1, b2, S, o, c), correct_score, "Score of returned alignment is not optimal")
def test7(self): s1 = "CCCAAAAATTTAAAAACCCCCGGG" s2 = "GGGAAAAAAAAAATTTCCCCCCCC" S = self.S3 o = 5 c = 2 correct_score = 128 s, a1, a2 = alignment.SmithWatermanAffine(s1, s2, S, o, c) self.assertEqual( alignment_util.scoreAlignmentAffine(a1, a2, S, o, c), s, "Returned alignment score does not match actual alignment score") self.assertEqual(alignment_util.scoreAlignmentAffine(a1, a2, S, o, c), correct_score, "Score of returned alignment is not optimal")
def test_SWA(seq1, seq2, S, o, c): s, a1, a2 = alignment.SmithWatermanAffine(seq1, seq2, S, o, c) s_sol, a1_sol, a2_sol = alignment_sol.SmithWatermanAffine( seq1, seq2, S, o, c) score = 0 # First: test that the function has returned has an optimal alignment if alignment_util.scoreAlignmentAffine(a1, a2, S, o, c) == s_sol: score += 70 # Second: test that the function has returned an optimal score if s == s_sol: score += 20 # Third: Test that the function has returned the correct score for the alignment if alignment_util.scoreAlignmentAffine(a1, a2, S, o, c) == s: score += 10 return score / 100.0