def test_distance_matrix(self): """distance_matrix should obey Names of alignment""" #Names=None aln1_exp = array([[0, 2, 2], [2, 0, 1], [2, 1, 0]]) self.assertEqual(distance_matrix(self.aln1), aln1_exp) a = Alignment(self.aln1.NamedSeqs) a.Names = ['seq_1', 'seq_2', 'seq_0'] a_exp = array([[0, 1, 2], [1, 0, 2], [2, 2, 0]]) self.assertEqual(distance_matrix(a), a_exp)
def test_distance_matrix(self): """distance_matrix should obey Names of alignment""" #Names=None aln1_exp = array([[0,2,2],[2,0,1],[2,1,0]]) self.assertEqual(distance_matrix(self.aln1),aln1_exp) a = Alignment(self.aln1.NamedSeqs) a.Names=['seq_1','seq_2','seq_0'] a_exp = array([[0,1,2],[1,0,2],[2,2,0]]) self.assertEqual(distance_matrix(a),a_exp)
def VA(alignment, distance_method=hamming_distance): """Returns Weight object with seq weights according to the VA method. alignment: Alignment object The VA method (Vingron & Argos 1989) calculates the Hamming distance between all sequences in the alignment. The weight assigned to a sequence is the sum of the distances of all other sequences in the alignment to that sequence, divided by the sum of all pairwise distances. Example: ABBA ABCA CBCB ABBA 0 1 3 ABCA 1 0 2 CBCB 3 2 1 ---------------------------- total 4 3 5 (=12) normal. 0.333 0.25 0.417 so: weight(ABBA) = 0.333, weight(ABCA)=0.25, etc. """ distances = distance_matrix(alignment, distance_method) sum_dist = sum(distances) #total weights are the normalized sum of distances (sum over each column, # divided by the total distance in the matrix weights = sum_dist/sum(sum_dist) #create a dictionary of {seq_id: weight} weight_dict = Weights(dict(list(zip(alignment.Names,weights)))) return weight_dict
def VA(alignment, distance_method=hamming_distance): """Returns Weight object with seq weights according to the VA method. alignment: Alignment object The VA method (Vingron & Argos 1989) calculates the Hamming distance between all sequences in the alignment. The weight assigned to a sequence is the sum of the distances of all other sequences in the alignment to that sequence, divided by the sum of all pairwise distances. Example: ABBA ABCA CBCB ABBA 0 1 3 ABCA 1 0 2 CBCB 3 2 1 ---------------------------- total 4 3 5 (=12) normal. 0.333 0.25 0.417 so: weight(ABBA) = 0.333, weight(ABCA)=0.25, etc. """ distances = distance_matrix(alignment, distance_method) sum_dist = sum(distances) #total weights are the normalized sum of distances (sum over each column, # divided by the total distance in the matrix weights = sum_dist / sum(sum_dist) #create a dictionary of {seq_id: weight} weight_dict = Weights(dict(zip(alignment.Names, weights))) return weight_dict
def SS(alignment): """Returns dict of {seq_id: weight} for sequences in the alignment alignment: Alignment object The SS sequence weighting method is named after Sander and Schneider, who published their method in 1991. Their method starts with the same distance matrix as in the VA method, where distances are the pairwise Hamming distances between the sequences in the alignment. Where the VA method uses the normalized total weights for each sequence, the SS method continues with calculating a self-consistent set of weights. They do this by finding the eigenvector of the distance matrix belonging to the largest eigenvalue for the matrix. This special eigenvector can be found by a numerical method. """ distances = distance_matrix(alignment) v = eigenvector_for_largest_eigenvalue(distances) return Weights(dict(list(zip(alignment.Names,v))))
def SS(alignment): """Returns dict of {seq_id: weight} for sequences in the alignment alignment: Alignment object The SS sequence weighting method is named after Sander and Schneider, who published their method in 1991. Their method starts with the same distance matrix as in the VA method, where distances are the pairwise Hamming distances between the sequences in the alignment. Where the VA method uses the normalized total weights for each sequence, the SS method continues with calculating a self-consistent set of weights. They do this by finding the eigenvector of the distance matrix belonging to the largest eigenvalue for the matrix. This special eigenvector can be found by a numerical method. """ distances = distance_matrix(alignment) v = eigenvector_for_largest_eigenvalue(distances) return Weights(dict(zip(alignment.Names, v)))