Ejemplo n.º 1
0
    def test_distance_matrix(self):
        """distance_matrix should obey Names of alignment"""
        #Names=None
        aln1_exp = array([[0, 2, 2], [2, 0, 1], [2, 1, 0]])
        self.assertEqual(distance_matrix(self.aln1), aln1_exp)

        a = Alignment(self.aln1.NamedSeqs)
        a.Names = ['seq_1', 'seq_2', 'seq_0']
        a_exp = array([[0, 1, 2], [1, 0, 2], [2, 2, 0]])
        self.assertEqual(distance_matrix(a), a_exp)
Ejemplo n.º 2
0
 def test_distance_matrix(self):
     """distance_matrix should obey Names of alignment"""
     #Names=None
     aln1_exp = array([[0,2,2],[2,0,1],[2,1,0]])
     self.assertEqual(distance_matrix(self.aln1),aln1_exp)
     
     a = Alignment(self.aln1.NamedSeqs)
     a.Names=['seq_1','seq_2','seq_0']
     a_exp = array([[0,1,2],[1,0,2],[2,2,0]])
     self.assertEqual(distance_matrix(a),a_exp)
Ejemplo n.º 3
0
def VA(alignment, distance_method=hamming_distance):
    """Returns Weight object with seq weights according to the VA method.

    alignment: Alignment object
    
    The VA method (Vingron & Argos 1989) calculates the Hamming distance
    between all sequences in the alignment. The weight assigned to a sequence
    is the sum of the distances of all other sequences in the alignment to that
    sequence, divided by the sum of all pairwise distances.

    Example:
            ABBA    ABCA    CBCB
    ABBA    0       1       3   
    ABCA    1       0       2
    CBCB    3       2       1
    ----------------------------
    total   4       3       5   (=12)
    normal. 0.333   0.25    0.417

    so: weight(ABBA) = 0.333, weight(ABCA)=0.25, etc.
    """
    distances = distance_matrix(alignment, distance_method)
    sum_dist = sum(distances)
    #total weights are the normalized sum of distances (sum over each column,
    # divided by the total distance in the matrix
    weights = sum_dist/sum(sum_dist)

    #create a dictionary of {seq_id: weight}
    weight_dict = Weights(dict(list(zip(alignment.Names,weights))))
    return weight_dict
Ejemplo n.º 4
0
def VA(alignment, distance_method=hamming_distance):
    """Returns Weight object with seq weights according to the VA method.

    alignment: Alignment object
    
    The VA method (Vingron & Argos 1989) calculates the Hamming distance
    between all sequences in the alignment. The weight assigned to a sequence
    is the sum of the distances of all other sequences in the alignment to that
    sequence, divided by the sum of all pairwise distances.

    Example:
            ABBA    ABCA    CBCB
    ABBA    0       1       3   
    ABCA    1       0       2
    CBCB    3       2       1
    ----------------------------
    total   4       3       5   (=12)
    normal. 0.333   0.25    0.417

    so: weight(ABBA) = 0.333, weight(ABCA)=0.25, etc.
    """
    distances = distance_matrix(alignment, distance_method)
    sum_dist = sum(distances)
    #total weights are the normalized sum of distances (sum over each column,
    # divided by the total distance in the matrix
    weights = sum_dist / sum(sum_dist)

    #create a dictionary of {seq_id: weight}
    weight_dict = Weights(dict(zip(alignment.Names, weights)))
    return weight_dict
Ejemplo n.º 5
0
def SS(alignment):
    """Returns dict of {seq_id: weight} for sequences in the alignment

    alignment: Alignment object

    The SS sequence weighting method is named after Sander and Schneider, 
    who published their method in 1991. 

    Their method starts with the same distance matrix as in the VA method, 
    where distances are the pairwise Hamming distances between the sequences 
    in the alignment. Where the VA method uses the normalized total weights 
    for each sequence, the SS method continues with calculating a 
    self-consistent set of weights.

    They do this by finding the eigenvector of the distance matrix belonging
    to the largest eigenvalue for the matrix. This special eigenvector can 
    be found by a numerical method.
    """

    distances = distance_matrix(alignment)
    v = eigenvector_for_largest_eigenvalue(distances)
    return Weights(dict(list(zip(alignment.Names,v))))
Ejemplo n.º 6
0
def SS(alignment):
    """Returns dict of {seq_id: weight} for sequences in the alignment

    alignment: Alignment object

    The SS sequence weighting method is named after Sander and Schneider, 
    who published their method in 1991. 

    Their method starts with the same distance matrix as in the VA method, 
    where distances are the pairwise Hamming distances between the sequences 
    in the alignment. Where the VA method uses the normalized total weights 
    for each sequence, the SS method continues with calculating a 
    self-consistent set of weights.

    They do this by finding the eigenvector of the distance matrix belonging
    to the largest eigenvalue for the matrix. This special eigenvector can 
    be found by a numerical method.
    """

    distances = distance_matrix(alignment)
    v = eigenvector_for_largest_eigenvalue(distances)
    return Weights(dict(zip(alignment.Names, v)))