コード例 #1
0
    def test_matrix_parse(self):
        if TestCluster.module == 'Bio.Cluster':
            from Bio.Cluster import treecluster
        elif TestCluster.module == 'Pycluster':
            from Pycluster import treecluster

        # Normal matrix, no errors
        data1 = numpy.array([[1.1, 1.2], [1.4, 1.3], [1.1, 1.5], [2.0, 1.5],
                             [1.7, 1.9], [1.7, 1.9], [5.7, 5.9], [5.7, 5.9],
                             [3.1, 3.3], [5.4, 5.3], [5.1, 5.5], [5.0, 5.5],
                             [5.1, 5.2]])

        # Another normal matrix, no errors; written as a list
        data2 = [[1.1, 2.2, 3.3, 4.4, 5.5], [3.1, 3.2, 1.3, 2.4, 1.5],
                 [4.1, 2.2, 0.3, 5.4, 0.5], [12.1, 2.0, 0.0, 5.0, 0.0]]

        # Ragged matrix
        data3 = [[91.1, 92.2, 93.3, 94.4, 95.5], [93.1, 93.2, 91.3, 92.4],
                 [94.1, 92.2, 90.3], [12.1, 92.0, 90.0, 95.0, 90.0]]

        # Matrix with bad cells
        data4 = [[7.1, 7.2, 7.3, 7.4, 7.5], [7.1, 7.2, 7.3, 7.4, 'snoopy'],
                 [7.1, 7.2, 7.3, None, None]]

        # Matrix with a bad row
        data5 = [[23.1, 23.2, 23.3, 23.4, 23.5], None,
                 [23.1, 23.0, 23.0, 23.0, 23.0]]

        # Various references that don't point to matrices at all
        data6 = "snoopy"
        data7 = {'a': [[2.3, 1.2], [3.3, 5.6]]}
        data8 = []
        data9 = [None]

        try:
            treecluster(data1)
        except Exception:  # TODO - Which exceptions?
            self.fail("treecluster failed to accept matrix data1")

        try:
            treecluster(data2)
        except Exception:  # TODO - Which exceptions?
            self.fail("treecluster failed to accept matrix data2")

        self.assertRaises(TypeError, treecluster, data3)
        self.assertRaises(TypeError, treecluster, data4)
        self.assertRaises(TypeError, treecluster, data5)
        self.assertRaises(TypeError, treecluster, data6)
        self.assertRaises(TypeError, treecluster, data7)
        self.assertRaises(TypeError, treecluster, data8)
        self.assertRaises(TypeError, treecluster, data9)
コード例 #2
0
ファイル: SharedFunctions.py プロジェクト: Swart-lab/bleties
def get_clusters_from_seqlist(seqlist, dist_threshold=0.05):
    """Cluster a list of sequences by a distance identity threshold

    Parameters
    ----------
    seqlist : list
        list of sequences as str
    dist_threshold : float
        Max distance value to retain, branches above this length in the 
        hierarchical clustering tree will be cut.

    Returns
    -------
    list
        list of lists - input sequences now grouped by cluster
    list
        list of int - cluster memberships of the originally input list
    """
    if len(seqlist) == 1:
        # Skip alignment if there is only one sequence
        return([seqlist], [0])
    else:
        aligner = PairwiseAligner()
        aligner.mode = "local"

        # Convert sequence list to distance matrix
        distmatrix = []
        for seq1 in seqlist:
            row = []
            for seq2 in seqlist:
                maxlen = max([len(seq1), len(seq2)])
                # Take percentage identity of pairwise alignment score (match base
                # +1, all other operations +0) over the longer sequence in pair
                idval = aligner.align(seq1, seq2).score / maxlen
                distval = 1 - idval  # convert to distance fraction
                row.append(distval)
            distmatrix.append(row)
        # Hierarchical clustering from the distance matrix
        htree = treecluster(data=None, distancematrix=array(distmatrix))
        # Find number of branches with length longer than threshold, and add 1
        # to get number of cuts
        cuts = 1 + len([htree[i].distance for i in range(len(htree))
                        if htree[i].distance > dist_threshold])
        clust_ids = list(htree.cut(cuts))
        clust_seqs_dict = defaultdict(list)
        for i in range(len(seqlist)):
            clust_seqs_dict[clust_ids[i]] += [seqlist[i]]
        # Convert dict of lists to list of lists
        clust_seqs = [clust_seqs_dict[i] for i in clust_seqs_dict]
        return(clust_seqs, clust_ids)
コード例 #3
0
ファイル: test_Cluster.py プロジェクト: BrianLinSu/rop
    def test_matrix_parse(self):
        if TestCluster.module == 'Bio.Cluster':
            from Bio.Cluster import treecluster
        elif TestCluster.module == 'Pycluster':
            from Pycluster import treecluster

        # Normal matrix, no errors
        data1 = numpy.array([[1.1, 1.2],
                             [1.4, 1.3],
                             [1.1, 1.5],
                             [2.0, 1.5],
                             [1.7, 1.9],
                             [1.7, 1.9],
                             [5.7, 5.9],
                             [5.7, 5.9],
                             [3.1, 3.3],
                             [5.4, 5.3],
                             [5.1, 5.5],
                             [5.0, 5.5],
                             [5.1, 5.2]])

        # Another normal matrix, no errors; written as a list
        data2 = [[1.1, 2.2, 3.3, 4.4, 5.5],
                  [3.1, 3.2, 1.3, 2.4, 1.5],
                  [4.1, 2.2, 0.3, 5.4, 0.5],
                  [12.1, 2.0, 0.0, 5.0, 0.0]]

        # Ragged matrix
        data3 = [[91.1, 92.2, 93.3, 94.4, 95.5],
                  [93.1, 93.2, 91.3, 92.4],
                  [94.1, 92.2, 90.3],
                  [12.1, 92.0, 90.0, 95.0, 90.0]]

        # Matrix with bad cells
        data4 = [[7.1, 7.2, 7.3, 7.4, 7.5],
                   [7.1, 7.2, 7.3, 7.4, 'snoopy'],
                   [7.1, 7.2, 7.3, None, None]]

        # Matrix with a bad row
        data5 = [[23.1, 23.2, 23.3, 23.4, 23.5],
                   None,
                   [23.1, 23.0, 23.0, 23.0, 23.0]]

        # Various references that don't point to matrices at all
        data6 = "snoopy"
        data7 = {'a': [[2.3, 1.2], [3.3, 5.6]]}
        data8 = []
        data9 = [None]

        try:
            treecluster(data1)
        except:
            self.fail("treecluster failed to accept matrix data1")

        try:
            treecluster(data2)
        except:
            self.fail("treecluster failed to accept matrix data2")

        self.assertRaises(TypeError, lambda: treecluster(data3))
        self.assertRaises(TypeError, lambda: treecluster(data4))
        self.assertRaises(TypeError, lambda: treecluster(data5))
        self.assertRaises(TypeError, lambda: treecluster(data6))
        self.assertRaises(TypeError, lambda: treecluster(data7))
        self.assertRaises(TypeError, lambda: treecluster(data8))
        self.assertRaises(TypeError, lambda: treecluster(data9))
コード例 #4
0
ファイル: test_Cluster.py プロジェクト: xulesc/biopython
    def test_treecluster(self):
        if TestCluster.module == 'Bio.Cluster':
            from Bio.Cluster import treecluster
        elif TestCluster.module == 'Pycluster':
            from Pycluster import treecluster

        # First data set
        weight1 = [1, 1, 1, 1, 1]
        data1 = numpy.array([[1.1, 2.2, 3.3, 4.4, 5.5],
                                [3.1, 3.2, 1.3, 2.4, 1.5],
                                [4.1, 2.2, 0.3, 5.4, 0.5],
                                [12.1, 2.0, 0.0, 5.0, 0.0]])
        mask1 = numpy.array([[1, 1, 1, 1, 1],
                             [1, 1, 1, 1, 1],
                             [1, 1, 1, 1, 1],
                             [1, 1, 1, 1, 1]], int)

        # TODO - Use a context manager here once we drop Python 2.6
        # Method should be one letter:
        self.assertRaises(ValueError, treecluster,
                          **{"data": data1, "mask": mask1, "weight": weight1,
                             "transpose": 0, "method": "any", "dist": "e"})

        # Distance should be one letter:
        self.assertRaises(ValueError, treecluster,
                          **{"data": data1, "mask": mask1, "weight": weight1,
                             "transpose": 0, "method": "any", "dist": "euclidean"})

        # test first data set
        # Pairwise average-linkage clustering"
        tree = treecluster(data=data1, mask=mask1, weight=weight1,
                           transpose=0, method='a', dist='e')
        self.assertEqual(len(tree), len(data1) - 1)
        self.assertEqual(tree[0].left, 2)
        self.assertEqual(tree[0].right, 1)
        self.assertAlmostEqual(tree[0].distance, 2.600, places=3)
        self.assertEqual(tree[1].left, -1)
        self.assertEqual(tree[1].right, 0)
        self.assertAlmostEqual(tree[1].distance, 7.300, places=3)
        self.assertEqual(tree[2].left, 3)
        self.assertEqual(tree[2].right, -2)
        self.assertAlmostEqual(tree[2].distance, 21.348, places=3)

        # Pairwise single-linkage clustering
        tree = treecluster(data=data1, mask=mask1, weight=weight1,
                           transpose=0, method='s', dist='e')
        self.assertEqual(len(tree), len(data1) - 1)
        self.assertEqual(tree[0].left, 1)
        self.assertEqual(tree[0].right, 2)
        self.assertAlmostEqual(tree[0].distance, 2.600, places=3)
        self.assertEqual(tree[1].left, 0)
        self.assertEqual(tree[1].right, -1)
        self.assertAlmostEqual(tree[1].distance, 5.800, places=3)
        self.assertEqual(tree[2].left, -2)
        self.assertEqual(tree[2].right, 3)
        self.assertAlmostEqual(tree[2].distance, 12.908, places=3)

        # Pairwise centroid-linkage clustering
        tree = treecluster(data=data1, mask=mask1, weight=weight1,
                           transpose=0, method='c', dist='e')
        self.assertEqual(len(tree), len(data1) - 1)
        self.assertEqual(tree[0].left, 1)
        self.assertEqual(tree[0].right, 2)
        self.assertAlmostEqual(tree[0].distance, 2.600, places=3)
        self.assertEqual(tree[1].left, 0)
        self.assertEqual(tree[1].right, -1)
        self.assertAlmostEqual(tree[1].distance, 6.650, places=3)
        self.assertEqual(tree[2].left, -2)
        self.assertEqual(tree[2].right, 3)
        self.assertAlmostEqual(tree[2].distance, 19.437, places=3)

        # Pairwise maximum-linkage clustering
        tree = treecluster(data=data1, mask=mask1, weight=weight1,
                           transpose=0, method='m', dist='e')
        self.assertEqual(len(tree), len(data1) - 1)
        self.assertEqual(tree[0].left, 2)
        self.assertEqual(tree[0].right, 1)
        self.assertAlmostEqual(tree[0].distance, 2.600, places=3)
        self.assertEqual(tree[1].left, -1)
        self.assertEqual(tree[1].right, 0)
        self.assertAlmostEqual(tree[1].distance, 8.800, places=3)
        self.assertEqual(tree[2].left, 3)
        self.assertEqual(tree[2].right, -2)
        self.assertAlmostEqual(tree[2].distance, 32.508, places=3)

        # Second data set
        weight2 = [1, 1]
        data2 = numpy.array([[0.8223, 0.9295],
                             [1.4365, 1.3223],
                             [1.1623, 1.5364],
                             [2.1826, 1.1934],
                             [1.7763, 1.9352],
                             [1.7215, 1.9912],
                             [2.1812, 5.9935],
                             [5.3290, 5.9452],
                             [3.1491, 3.3454],
                             [5.1923, 5.3156],
                             [4.7735, 5.4012],
                             [5.1297, 5.5645],
                             [5.3934, 5.1823]])
        mask2 = numpy.array([[1, 1],
                             [1, 1],
                             [1, 1],
                             [1, 1],
                             [1, 1],
                             [1, 1],
                             [1, 1],
                             [1, 1],
                             [1, 1],
                             [1, 1],
                             [1, 1],
                             [1, 1],
                             [1, 1]], int)

        # Test second data set
        # Pairwise average-linkage clustering
        tree = treecluster(data=data2, mask=mask2, weight=weight2,
                           transpose=0, method='a', dist='e')
        self.assertEqual(len(tree), len(data2) - 1)
        self.assertEqual(tree[0].left, 5)
        self.assertEqual(tree[0].right, 4)
        self.assertAlmostEqual(tree[0].distance, 0.003, places=3)
        self.assertEqual(tree[1].left, 9)
        self.assertEqual(tree[1].right, 12)
        self.assertAlmostEqual(tree[1].distance, 0.029, places=3)
        self.assertEqual(tree[2].left, 2)
        self.assertEqual(tree[2].right, 1)
        self.assertAlmostEqual(tree[2].distance, 0.061, places=3)
        self.assertEqual(tree[3].left, 11)
        self.assertEqual(tree[3].right, -2)
        self.assertAlmostEqual(tree[3].distance, 0.070, places=3)
        self.assertEqual(tree[4].left, -4)
        self.assertEqual(tree[4].right, 10)
        self.assertAlmostEqual(tree[4].distance, 0.128, places=3)
        self.assertEqual(tree[5].left, 7)
        self.assertEqual(tree[5].right, -5)
        self.assertAlmostEqual(tree[5].distance, 0.224, places=3)
        self.assertEqual(tree[6].left, -3)
        self.assertEqual(tree[6].right, 0)
        self.assertAlmostEqual(tree[6].distance, 0.254, places=3)
        self.assertEqual(tree[7].left, -1)
        self.assertEqual(tree[7].right, 3)
        self.assertAlmostEqual(tree[7].distance, 0.391, places=3)
        self.assertEqual(tree[8].left, -8)
        self.assertEqual(tree[8].right, -7)
        self.assertAlmostEqual(tree[8].distance, 0.532, places=3)
        self.assertEqual(tree[9].left, 8)
        self.assertEqual(tree[9].right, -9)
        self.assertAlmostEqual(tree[9].distance, 3.234, places=3)
        self.assertEqual(tree[10].left, -6)
        self.assertEqual(tree[10].right, 6)
        self.assertAlmostEqual(tree[10].distance, 4.636, places=3)
        self.assertEqual(tree[11].left, -11)
        self.assertEqual(tree[11].right, -10)
        self.assertAlmostEqual(tree[11].distance, 12.741, places=3)

        # Pairwise single-linkage clustering
        tree = treecluster(data=data2, mask=mask2, weight=weight2,
                           transpose=0, method='s', dist='e')
        self.assertEqual(len(tree), len(data2) - 1)
        self.assertEqual(tree[0].left, 4)
        self.assertEqual(tree[0].right, 5)
        self.assertAlmostEqual(tree[0].distance, 0.003, places=3)
        self.assertEqual(tree[1].left, 9)
        self.assertEqual(tree[1].right, 12)
        self.assertAlmostEqual(tree[1].distance, 0.029, places=3)
        self.assertEqual(tree[2].left, 11)
        self.assertEqual(tree[2].right, -2)
        self.assertAlmostEqual(tree[2].distance, 0.033, places=3)
        self.assertEqual(tree[3].left, 1)
        self.assertEqual(tree[3].right, 2)
        self.assertAlmostEqual(tree[3].distance, 0.061, places=3)
        self.assertEqual(tree[4].left, 10)
        self.assertEqual(tree[4].right, -3)
        self.assertAlmostEqual(tree[4].distance, 0.077, places=3)
        self.assertEqual(tree[5].left, 7)
        self.assertEqual(tree[5].right, -5)
        self.assertAlmostEqual(tree[5].distance, 0.092, places=3)
        self.assertEqual(tree[6].left, 0)
        self.assertEqual(tree[6].right, -4)
        self.assertAlmostEqual(tree[6].distance, 0.242, places=3)
        self.assertEqual(tree[7].left, -7)
        self.assertEqual(tree[7].right, -1)
        self.assertAlmostEqual(tree[7].distance, 0.246, places=3)
        self.assertEqual(tree[8].left, 3)
        self.assertEqual(tree[8].right, -8)
        self.assertAlmostEqual(tree[8].distance, 0.287, places=3)
        self.assertEqual(tree[9].left, -9)
        self.assertEqual(tree[9].right, 8)
        self.assertAlmostEqual(tree[9].distance, 1.936, places=3)
        self.assertEqual(tree[10].left, -10)
        self.assertEqual(tree[10].right, -6)
        self.assertAlmostEqual(tree[10].distance, 3.432, places=3)
        self.assertEqual(tree[11].left, 6)
        self.assertEqual(tree[11].right, -11)
        self.assertAlmostEqual(tree[11].distance, 3.535, places=3)

        # Pairwise centroid-linkage clustering
        tree = treecluster(data=data2, mask=mask2, weight=weight2,
                           transpose=0, method='c', dist='e')
        self.assertEqual(len(tree), len(data2) - 1)
        self.assertEqual(tree[0].left, 4)
        self.assertEqual(tree[0].right, 5)
        self.assertAlmostEqual(tree[0].distance, 0.003, places=3)
        self.assertEqual(tree[1].left, 12)
        self.assertEqual(tree[1].right, 9)
        self.assertAlmostEqual(tree[1].distance, 0.029, places=3)
        self.assertEqual(tree[2].left, 1)
        self.assertEqual(tree[2].right, 2)
        self.assertAlmostEqual(tree[2].distance, 0.061, places=3)
        self.assertEqual(tree[3].left, -2)
        self.assertEqual(tree[3].right, 11)
        self.assertAlmostEqual(tree[3].distance, 0.063, places=3)
        self.assertEqual(tree[4].left, 10)
        self.assertEqual(tree[4].right, -4)
        self.assertAlmostEqual(tree[4].distance, 0.109, places=3)
        self.assertEqual(tree[5].left, -5)
        self.assertEqual(tree[5].right, 7)
        self.assertAlmostEqual(tree[5].distance, 0.189, places=3)
        self.assertEqual(tree[6].left, 0)
        self.assertEqual(tree[6].right, -3)
        self.assertAlmostEqual(tree[6].distance, 0.239, places=3)
        self.assertEqual(tree[7].left, 3)
        self.assertEqual(tree[7].right, -1)
        self.assertAlmostEqual(tree[7].distance, 0.390, places=3)
        self.assertEqual(tree[8].left, -7)
        self.assertEqual(tree[8].right, -8)
        self.assertAlmostEqual(tree[8].distance, 0.382, places=3)
        self.assertEqual(tree[9].left, -9)
        self.assertEqual(tree[9].right, 8)
        self.assertAlmostEqual(tree[9].distance, 3.063, places=3)
        self.assertEqual(tree[10].left, 6)
        self.assertEqual(tree[10].right, -6)
        self.assertAlmostEqual(tree[10].distance, 4.578, places=3)
        self.assertEqual(tree[11].left, -10)
        self.assertEqual(tree[11].right, -11)
        self.assertAlmostEqual(tree[11].distance, 11.536, places=3)

        # Pairwise maximum-linkage clustering
        tree = treecluster(data=data2, mask=mask2, weight=weight2,
                           transpose=0, method='m', dist='e')
        self.assertEqual(len(tree), len(data2) - 1)
        self.assertEqual(tree[0].left, 5)
        self.assertEqual(tree[0].right, 4)
        self.assertAlmostEqual(tree[0].distance, 0.003, places=3)
        self.assertEqual(tree[1].left, 9)
        self.assertEqual(tree[1].right, 12)
        self.assertAlmostEqual(tree[1].distance, 0.029, places=3)
        self.assertEqual(tree[2].left, 2)
        self.assertEqual(tree[2].right, 1)
        self.assertAlmostEqual(tree[2].distance, 0.061, places=3)
        self.assertEqual(tree[3].left, 11)
        self.assertEqual(tree[3].right, 10)
        self.assertAlmostEqual(tree[3].distance, 0.077, places=3)
        self.assertEqual(tree[4].left, -2)
        self.assertEqual(tree[4].right, -4)
        self.assertAlmostEqual(tree[4].distance, 0.216, places=3)
        self.assertEqual(tree[5].left, -3)
        self.assertEqual(tree[5].right, 0)
        self.assertAlmostEqual(tree[5].distance, 0.266, places=3)
        self.assertEqual(tree[6].left, -5)
        self.assertEqual(tree[6].right, 7)
        self.assertAlmostEqual(tree[6].distance, 0.302, places=3)
        self.assertEqual(tree[7].left, -1)
        self.assertEqual(tree[7].right, 3)
        self.assertAlmostEqual(tree[7].distance, 0.425, places=3)
        self.assertEqual(tree[8].left, -8)
        self.assertEqual(tree[8].right, -6)
        self.assertAlmostEqual(tree[8].distance, 0.968, places=3)
        self.assertEqual(tree[9].left, 8)
        self.assertEqual(tree[9].right, 6)
        self.assertAlmostEqual(tree[9].distance, 3.975, places=3)
        self.assertEqual(tree[10].left, -10)
        self.assertEqual(tree[10].right, -7)
        self.assertAlmostEqual(tree[10].distance, 5.755, places=3)
        self.assertEqual(tree[11].left, -11)
        self.assertEqual(tree[11].right, -9)
        self.assertAlmostEqual(tree[11].distance, 22.734, places=3)
コード例 #5
0
# @Date:   2019-05-27T10:15:26+08:00
# @Email:  [email protected]
# @Filename: BioPy_1730416009_0527.py
# @Last modified time: 2019-05-27T14:31:37+08:00
import pandas as pd
from Bio.Cluster import treecluster

# Use pandas to read the excel-format file
dfrm = pd.read_excel('./ExpressionData.xlsx')
# Convert the table into a matrix/array
data_array = dfrm.drop('ID', axis=1).values
# Perform hierarchical clustering (For Gene/Protein)
tree_gene = treecluster(data_array, transpose=0, method='s', dist='e')
# Perform hierarchical clustering (For Experiment Condiction
# -> transpose=1,method=pairwise single-linkage clustering)
tree_exp = treecluster(data_array, transpose=1, method='m', dist='e')
# Output the result
with open('./Results.txt', 'wt') as outFile:
    outFile.write("# Cluster Tree of Exp Condiction\n")
    outFile.write(str(tree_exp) + '\n')
    outFile.write("# Cluster Tree of Gene\n")
    outFile.write(str(tree_gene) + '\n')
コード例 #6
0
def test_matrix_parse(module):
    if module == 'Bio.Cluster':
        from Bio.Cluster import treecluster
    elif module == 'Pycluster':
        from Pycluster import treecluster
    else:
        raise 'Unknown module name', module
    print "test_matrix_parse:"
    # Normal matrix, no errors
    data1 = array([[1.1, 1.2], [1.4, 1.3], [1.1, 1.5], [2.0, 1.5], [1.7, 1.9],
                   [1.7, 1.9], [5.7, 5.9], [5.7, 5.9], [3.1, 3.3], [5.4, 5.3],
                   [5.1, 5.5], [5.0, 5.5], [5.1, 5.2]])

    # Another normal matrix, no errors; written as a list
    data2 = [[1.1, 2.2, 3.3, 4.4, 5.5], [3.1, 3.2, 1.3, 2.4, 1.5],
             [4.1, 2.2, 0.3, 5.4, 0.5], [12.1, 2.0, 0.0, 5.0, 0.0]]

    # Ragged matrix
    data3 = [[91.1, 92.2, 93.3, 94.4, 95.5], [93.1, 93.2, 91.3, 92.4],
             [94.1, 92.2, 90.3], [12.1, 92.0, 90.0, 95.0, 90.0]]

    # Matrix with bad cells
    data4 = [[
        7.1,
        7.2,
        7.3,
        7.4,
        7.5,
    ], [7.1, 7.2, 7.3, 7.4, 'snoopy'], [7.1, 7.2, 7.3, None, None]]

    # Matrix with a bad row
    data5 = [[23.1, 23.2, 23.3, 23.4, 23.5], None,
             [23.1, 23.0, 23.0, 23.0, 23.0]]

    # Various references that don't point to matrices at all
    data6 = "snoopy"
    data7 = {'a': [[2.3, 1.2], [3.3, 5.6]]}
    data8 = []
    data9 = [None]
    data10 = [[None]]

    try:
        result = treecluster(data1)
        print "Read data1 (correct)"
    except:
        "Error: treecluster failed to accept matrix data1"
    try:
        result = treecluster(data2)
        print "Read data2 (correct)"
    except:
        "Error: treecluster failed to accept matrix data2"
    try:
        result = treecluster(data3)
        print "Error: treecluster incorrectly accepted data3"
    except:
        print "Refused incorrect matrix data3"
    try:
        result = treecluster(data4)
        print "Error: treecluster incorrectly accepted data4"
    except:
        print "Refused incorrect matrix data4"
    try:
        result = treecluster(data5)
        print "Error: treecluster incorrectly accepted data5"
    except:
        print "Refused incorrect matrix data5"
    try:
        result = treecluster(data6)
        print "Error: treecluster incorrectly accepted data6"
    except:
        print "Refused incorrect matrix data6"
    try:
        result = treecluster(data7)
        print "Error: treecluster incorrectly accepted data7"
    except:
        print "Refused incorrect matrix data7"
    try:
        result = treecluster(data8)
        print "Error: treecluster incorrectly accepted data8"
    except:
        print "Refused incorrect matrix data8"
    try:
        result = treecluster(data9)
        print "Error: treecluster incorrectly accepted data9"
    except:
        print "Refused incorrect matrix data9"
    try:
        result = treecluster(data10)
        print "Error: treecluster incorrectly accepted data10"
    except:
        print "Refused incorrect matrix data10"
    print
コード例 #7
0
def test_treecluster(module):
    if module == 'Bio.Cluster':
        from Bio.Cluster import treecluster
    elif module == 'Pycluster':
        from Pycluster import treecluster
    else:
        raise 'Unknown module name', module
    print "test_treecluster:"
    # First data set
    weight1 = [1, 1, 1, 1, 1]
    data1 = array([[1.1, 2.2, 3.3, 4.4, 5.5], [3.1, 3.2, 1.3, 2.4, 1.5],
                   [4.1, 2.2, 0.3, 5.4, 0.5], [12.1, 2.0, 0.0, 5.0, 0.0]])
    mask1 = array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1],
                   [1, 1, 1, 1, 1]])

    # Second data set
    weight2 = [1, 1]
    data2 = array([[0.8223, 0.9295], [1.4365, 1.3223], [1.1623, 1.5364],
                   [2.1826, 1.1934], [1.7763, 1.9352], [1.7215, 1.9912],
                   [2.1812, 5.9935], [5.3290, 5.9452], [3.1491, 3.3454],
                   [5.1923, 5.3156], [4.7735, 5.4012], [5.1297, 5.5645],
                   [5.3934, 5.1823]])
    mask2 = array([[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1],
                   [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1]])

    # test first data set
    print "First data set:"
    print_matrix(data1, mask1)
    print "Pairwise average-linkage clustering"
    result, linkdist = treecluster(data=data1,
                                   mask=mask1,
                                   weight=weight1,
                                   transpose=0,
                                   method='a',
                                   dist='e')
    print "Number of nodes is %d (should be %d)" % (len(result),
                                                    len(data1) - 1)
    print "Number of link distances is %d (should be %d)" % (len(linkdist),
                                                             len(data1) - 1)
    for i in range(len(result)):
        print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (
            i, result[i][0], result[i][1], linkdist[i])

    print "Pairwise single-linkage clustering"
    result, linkdist = treecluster(data=data1,
                                   mask=mask1,
                                   weight=weight1,
                                   transpose=0,
                                   method='s',
                                   dist='e')
    print "Number of nodes is %d (should be %d)" % (len(result),
                                                    len(data1) - 1)
    print "Number of link distances is %d (should be %d)" % (len(linkdist),
                                                             len(data1) - 1)
    for i in range(len(result)):
        print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (
            i, result[i][0], result[i][1], linkdist[i])

    print "Pairwise centroid-linkage clustering"
    result, linkdist = treecluster(data=data1,
                                   mask=mask1,
                                   weight=weight1,
                                   transpose=0,
                                   method='c',
                                   dist='e')
    print "Number of nodes is %d (should be %d)" % (len(result),
                                                    len(data1) - 1)
    print "Number of link distances is %d (should be %d)" % (len(linkdist),
                                                             len(data1) - 1)
    for i in range(len(result)):
        print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (
            i, result[i][0], result[i][1], linkdist[i])

    print "Pairwise maximum-linkage clustering"
    result, linkdist = treecluster(data=data1,
                                   mask=mask1,
                                   weight=weight1,
                                   transpose=0,
                                   method='m',
                                   dist='e')
    print "Number of nodes is %d (should be %d)" % (len(result),
                                                    len(data1) - 1)
    print "Number of link distances is %d (should be %d)" % (len(linkdist),
                                                             len(data1) - 1)
    for i in range(len(result)):
        print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (
            i, result[i][0], result[i][1], linkdist[i])

    # Test second data set
    print "Second data set:"
    print "Pairwise average-linkage clustering"
    result, linkdist = treecluster(data=data2,
                                   mask=mask2,
                                   weight=weight2,
                                   transpose=0,
                                   method='a',
                                   dist='e')
    print "Number of nodes is %d (should be %d)" % (len(result),
                                                    len(data2) - 1)
    print "Number of link distances is %d (should be %d)" % (len(linkdist),
                                                             len(data2) - 1)
    for i in range(len(result)):
        print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (
            i, result[i][0], result[i][1], linkdist[i])

    print "Pairwise single-linkage clustering"
    result, linkdist = treecluster(data=data2,
                                   mask=mask2,
                                   weight=weight2,
                                   transpose=0,
                                   method='s',
                                   dist='e')
    print "Number of nodes is %d (should be %d)" % (len(result),
                                                    len(data2) - 1)
    print "Number of link distances is %d (should be %d)" % (len(linkdist),
                                                             len(data2) - 1)
    for i in range(len(result)):
        print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (
            i, result[i][0], result[i][1], linkdist[i])

    print "Pairwise centroid-linkage clustering"
    result, linkdist = treecluster(data=data2,
                                   mask=mask2,
                                   weight=weight2,
                                   transpose=0,
                                   method='c',
                                   dist='e')
    print "Number of nodes is %d (should be %d)" % (len(result),
                                                    len(data2) - 1)
    print "Number of link distances is %d (should be %d)" % (len(linkdist),
                                                             len(data2) - 1)
    for i in range(len(result)):
        print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (
            i, result[i][0], result[i][1], linkdist[i])

    print "Pairwise maximum-linkage clustering"
    result, linkdist = treecluster(data=data2,
                                   mask=mask2,
                                   weight=weight2,
                                   transpose=0,
                                   method='m',
                                   dist='e')
    print "Number of nodes is %d (should be %d)" % (len(result),
                                                    len(data2) - 1)
    print "Number of link distances is %d (should be %d)" % (len(linkdist),
                                                             len(data2) - 1)
    for i in range(len(result)):
        print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (
            i, result[i][0], result[i][1], linkdist[i])
    print
コード例 #8
0
ファイル: test_Cluster.py プロジェクト: jamescasbon/biopython
    def test_treecluster(self):
        if TestCluster.module == "Bio.Cluster":
            from Bio.Cluster import treecluster
        elif TestCluster.module == "Pycluster":
            from Pycluster import treecluster

        # First data set
        weight1 = [1, 1, 1, 1, 1]
        data1 = numpy.array(
            [
                [1.1, 2.2, 3.3, 4.4, 5.5],
                [3.1, 3.2, 1.3, 2.4, 1.5],
                [4.1, 2.2, 0.3, 5.4, 0.5],
                [12.1, 2.0, 0.0, 5.0, 0.0],
            ]
        )
        mask1 = numpy.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1]], int)

        # test first data set
        # Pairwise average-linkage clustering"
        tree = treecluster(data=data1, mask=mask1, weight=weight1, transpose=0, method="a", dist="e")
        self.assertEqual(len(tree), len(data1) - 1)
        self.assertEqual(tree[0].left, 2)
        self.assertEqual(tree[0].right, 1)
        self.assertAlmostEqual(tree[0].distance, 2.600, 3)
        self.assertEqual(tree[1].left, -1)
        self.assertEqual(tree[1].right, 0)
        self.assertAlmostEqual(tree[1].distance, 7.300, 3)
        self.assertEqual(tree[2].left, 3)
        self.assertEqual(tree[2].right, -2)
        self.assertAlmostEqual(tree[2].distance, 21.348, 3)

        # Pairwise single-linkage clustering
        tree = treecluster(data=data1, mask=mask1, weight=weight1, transpose=0, method="s", dist="e")
        self.assertEqual(len(tree), len(data1) - 1)
        self.assertEqual(tree[0].left, 1)
        self.assertEqual(tree[0].right, 2)
        self.assertAlmostEqual(tree[0].distance, 2.600, 3)
        self.assertEqual(tree[1].left, 0)
        self.assertEqual(tree[1].right, -1)
        self.assertAlmostEqual(tree[1].distance, 5.800, 3)
        self.assertEqual(tree[2].left, -2)
        self.assertEqual(tree[2].right, 3)
        self.assertAlmostEqual(tree[2].distance, 12.908, 3)

        # Pairwise centroid-linkage clustering
        tree = treecluster(data=data1, mask=mask1, weight=weight1, transpose=0, method="c", dist="e")
        self.assertEqual(len(tree), len(data1) - 1)
        self.assertEqual(tree[0].left, 1)
        self.assertEqual(tree[0].right, 2)
        self.assertAlmostEqual(tree[0].distance, 2.600, 3)
        self.assertEqual(tree[1].left, 0)
        self.assertEqual(tree[1].right, -1)
        self.assertAlmostEqual(tree[1].distance, 6.650, 3)
        self.assertEqual(tree[2].left, -2)
        self.assertEqual(tree[2].right, 3)
        self.assertAlmostEqual(tree[2].distance, 19.437, 3)

        # Pairwise maximum-linkage clustering
        tree = treecluster(data=data1, mask=mask1, weight=weight1, transpose=0, method="m", dist="e")
        self.assertEqual(len(tree), len(data1) - 1)
        self.assertEqual(tree[0].left, 2)
        self.assertEqual(tree[0].right, 1)
        self.assertAlmostEqual(tree[0].distance, 2.600, 3)
        self.assertEqual(tree[1].left, -1)
        self.assertEqual(tree[1].right, 0)
        self.assertAlmostEqual(tree[1].distance, 8.800, 3)
        self.assertEqual(tree[2].left, 3)
        self.assertEqual(tree[2].right, -2)
        self.assertAlmostEqual(tree[2].distance, 32.508, 3)

        # Second data set
        weight2 = [1, 1]
        data2 = numpy.array(
            [
                [0.8223, 0.9295],
                [1.4365, 1.3223],
                [1.1623, 1.5364],
                [2.1826, 1.1934],
                [1.7763, 1.9352],
                [1.7215, 1.9912],
                [2.1812, 5.9935],
                [5.3290, 5.9452],
                [3.1491, 3.3454],
                [5.1923, 5.3156],
                [4.7735, 5.4012],
                [5.1297, 5.5645],
                [5.3934, 5.1823],
            ]
        )
        mask2 = numpy.array(
            [[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1]],
            int,
        )

        # Test second data set
        # Pairwise average-linkage clustering
        tree = treecluster(data=data2, mask=mask2, weight=weight2, transpose=0, method="a", dist="e")
        self.assertEqual(len(tree), len(data2) - 1)
        self.assertEqual(tree[0].left, 5)
        self.assertEqual(tree[0].right, 4)
        self.assertAlmostEqual(tree[0].distance, 0.003, 3)
        self.assertEqual(tree[1].left, 9)
        self.assertEqual(tree[1].right, 12)
        self.assertAlmostEqual(tree[1].distance, 0.029, 3)
        self.assertEqual(tree[2].left, 2)
        self.assertEqual(tree[2].right, 1)
        self.assertAlmostEqual(tree[2].distance, 0.061, 3)
        self.assertEqual(tree[3].left, 11)
        self.assertEqual(tree[3].right, -2)
        self.assertAlmostEqual(tree[3].distance, 0.070, 3)
        self.assertEqual(tree[4].left, -4)
        self.assertEqual(tree[4].right, 10)
        self.assertAlmostEqual(tree[4].distance, 0.128, 3)
        self.assertEqual(tree[5].left, 7)
        self.assertEqual(tree[5].right, -5)
        self.assertAlmostEqual(tree[5].distance, 0.224, 3)
        self.assertEqual(tree[6].left, -3)
        self.assertEqual(tree[6].right, 0)
        self.assertAlmostEqual(tree[6].distance, 0.254, 3)
        self.assertEqual(tree[7].left, -1)
        self.assertEqual(tree[7].right, 3)
        self.assertAlmostEqual(tree[7].distance, 0.391, 3)
        self.assertEqual(tree[8].left, -8)
        self.assertEqual(tree[8].right, -7)
        self.assertAlmostEqual(tree[8].distance, 0.532, 3)
        self.assertEqual(tree[9].left, 8)
        self.assertEqual(tree[9].right, -9)
        self.assertAlmostEqual(tree[9].distance, 3.234, 3)
        self.assertEqual(tree[10].left, -6)
        self.assertEqual(tree[10].right, 6)
        self.assertAlmostEqual(tree[10].distance, 4.636, 3)
        self.assertEqual(tree[11].left, -11)
        self.assertEqual(tree[11].right, -10)
        self.assertAlmostEqual(tree[11].distance, 12.741, 3)

        # Pairwise single-linkage clustering
        tree = treecluster(data=data2, mask=mask2, weight=weight2, transpose=0, method="s", dist="e")
        self.assertEqual(len(tree), len(data2) - 1)
        self.assertEqual(tree[0].left, 4)
        self.assertEqual(tree[0].right, 5)
        self.assertAlmostEqual(tree[0].distance, 0.003, 3)
        self.assertEqual(tree[1].left, 9)
        self.assertEqual(tree[1].right, 12)
        self.assertAlmostEqual(tree[1].distance, 0.029, 3)
        self.assertEqual(tree[2].left, 11)
        self.assertEqual(tree[2].right, -2)
        self.assertAlmostEqual(tree[2].distance, 0.033, 3)
        self.assertEqual(tree[3].left, 1)
        self.assertEqual(tree[3].right, 2)
        self.assertAlmostEqual(tree[3].distance, 0.061, 3)
        self.assertEqual(tree[4].left, 10)
        self.assertEqual(tree[4].right, -3)
        self.assertAlmostEqual(tree[4].distance, 0.077, 3)
        self.assertEqual(tree[5].left, 7)
        self.assertEqual(tree[5].right, -5)
        self.assertAlmostEqual(tree[5].distance, 0.092, 3)
        self.assertEqual(tree[6].left, 0)
        self.assertEqual(tree[6].right, -4)
        self.assertAlmostEqual(tree[6].distance, 0.242, 3)
        self.assertEqual(tree[7].left, -7)
        self.assertEqual(tree[7].right, -1)
        self.assertAlmostEqual(tree[7].distance, 0.246, 3)
        self.assertEqual(tree[8].left, 3)
        self.assertEqual(tree[8].right, -8)
        self.assertAlmostEqual(tree[8].distance, 0.287, 3)
        self.assertEqual(tree[9].left, -9)
        self.assertEqual(tree[9].right, 8)
        self.assertAlmostEqual(tree[9].distance, 1.936, 3)
        self.assertEqual(tree[10].left, -10)
        self.assertEqual(tree[10].right, -6)
        self.assertAlmostEqual(tree[10].distance, 3.432, 3)
        self.assertEqual(tree[11].left, 6)
        self.assertEqual(tree[11].right, -11)
        self.assertAlmostEqual(tree[11].distance, 3.535, 3)

        # Pairwise centroid-linkage clustering
        tree = treecluster(data=data2, mask=mask2, weight=weight2, transpose=0, method="c", dist="e")
        self.assertEqual(len(tree), len(data2) - 1)
        self.assertEqual(tree[0].left, 4)
        self.assertEqual(tree[0].right, 5)
        self.assertAlmostEqual(tree[0].distance, 0.003, 3)
        self.assertEqual(tree[1].left, 12)
        self.assertEqual(tree[1].right, 9)
        self.assertAlmostEqual(tree[1].distance, 0.029, 3)
        self.assertEqual(tree[2].left, 1)
        self.assertEqual(tree[2].right, 2)
        self.assertAlmostEqual(tree[2].distance, 0.061, 3)
        self.assertEqual(tree[3].left, -2)
        self.assertEqual(tree[3].right, 11)
        self.assertAlmostEqual(tree[3].distance, 0.063, 3)
        self.assertEqual(tree[4].left, 10)
        self.assertEqual(tree[4].right, -4)
        self.assertAlmostEqual(tree[4].distance, 0.109, 3)
        self.assertEqual(tree[5].left, -5)
        self.assertEqual(tree[5].right, 7)
        self.assertAlmostEqual(tree[5].distance, 0.189, 3)
        self.assertEqual(tree[6].left, 0)
        self.assertEqual(tree[6].right, -3)
        self.assertAlmostEqual(tree[6].distance, 0.239, 3)
        self.assertEqual(tree[7].left, 3)
        self.assertEqual(tree[7].right, -1)
        self.assertAlmostEqual(tree[7].distance, 0.390, 3)
        self.assertEqual(tree[8].left, -7)
        self.assertEqual(tree[8].right, -8)
        self.assertAlmostEqual(tree[8].distance, 0.382, 3)
        self.assertEqual(tree[9].left, -9)
        self.assertEqual(tree[9].right, 8)
        self.assertAlmostEqual(tree[9].distance, 3.063, 3)
        self.assertEqual(tree[10].left, 6)
        self.assertEqual(tree[10].right, -6)
        self.assertAlmostEqual(tree[10].distance, 4.578, 3)
        self.assertEqual(tree[11].left, -10)
        self.assertEqual(tree[11].right, -11)
        self.assertAlmostEqual(tree[11].distance, 11.536, 3)

        # Pairwise maximum-linkage clustering
        tree = treecluster(data=data2, mask=mask2, weight=weight2, transpose=0, method="m", dist="e")
        self.assertEqual(len(tree), len(data2) - 1)
        self.assertEqual(tree[0].left, 5)
        self.assertEqual(tree[0].right, 4)
        self.assertAlmostEqual(tree[0].distance, 0.003, 3)
        self.assertEqual(tree[1].left, 9)
        self.assertEqual(tree[1].right, 12)
        self.assertAlmostEqual(tree[1].distance, 0.029, 3)
        self.assertEqual(tree[2].left, 2)
        self.assertEqual(tree[2].right, 1)
        self.assertAlmostEqual(tree[2].distance, 0.061, 3)
        self.assertEqual(tree[3].left, 11)
        self.assertEqual(tree[3].right, 10)
        self.assertAlmostEqual(tree[3].distance, 0.077, 3)
        self.assertEqual(tree[4].left, -2)
        self.assertEqual(tree[4].right, -4)
        self.assertAlmostEqual(tree[4].distance, 0.216, 3)
        self.assertEqual(tree[5].left, -3)
        self.assertEqual(tree[5].right, 0)
        self.assertAlmostEqual(tree[5].distance, 0.266, 3)
        self.assertEqual(tree[6].left, -5)
        self.assertEqual(tree[6].right, 7)
        self.assertAlmostEqual(tree[6].distance, 0.302, 3)
        self.assertEqual(tree[7].left, -1)
        self.assertEqual(tree[7].right, 3)
        self.assertAlmostEqual(tree[7].distance, 0.425, 3)
        self.assertEqual(tree[8].left, -8)
        self.assertEqual(tree[8].right, -6)
        self.assertAlmostEqual(tree[8].distance, 0.968, 3)
        self.assertEqual(tree[9].left, 8)
        self.assertEqual(tree[9].right, 6)
        self.assertAlmostEqual(tree[9].distance, 3.975, 3)
        self.assertEqual(tree[10].left, -10)
        self.assertEqual(tree[10].right, -7)
        self.assertAlmostEqual(tree[10].distance, 5.755, 3)
        self.assertEqual(tree[11].left, -11)
        self.assertEqual(tree[11].right, -9)
        self.assertAlmostEqual(tree[11].distance, 22.734, 3)
コード例 #9
0
def do_treecluster_images():
    """特征维度对各层次聚类的影响"""
    outDir = '/Users/brobear/PycharmProjects/TextClusteringAnalysis/txt2'
    txt_dict = getWordCount(outDir)

    xx = range(100, 1000, 100)
    xx = [300, 600]
    for topN in xx:
        data, textNames = TC(txt_dict, topN=topN)[:2]
        # # 不降维
        # tfidf_dict = myTFIDF(txt_dict, itc=False)
        # data, textNames, wordName = dict2Array(tfidf_dict)

        # method 's': 最小距离法  'm': 最大距离法 'c': 重心法  'a': 类平均法
        # dist e 欧式距离 u 余弦距离
        tree = treecluster(data=data, method='m', dist='e')
        # tree2 = treecluster(data=data, method='s', dist='e')
        # tree3 = treecluster(data=data, method='a', dist='e')
        # tree4 = treecluster(data=data, method='c', dist='e')
        args = range(2, 50)
        # args = list(range(2, 15, 3)) + [21, 27, 30, 40, 50, 60, 70, 80, 100, 150, 250]
        d = [[], [], [], [], []]  # 轮廓系数
        ksize = [[], [], [], [], []]  # 最大类的大小
        for k in args:
            clusterid = tree.cut(nclusters=k)
            d[0].append(silhouette_score(data, clusterid, metric='euclidean'))
            ksize[0].append(max(size_of_cluster(clusterid)))
            clustering = AgglomerativeClustering(linkage='ward', n_clusters=k)  # ['ward','complete','average']
            clustering.fit(data)
            d[1].append(silhouette_score(data, clustering.labels_, metric='euclidean'))
            ksize[1].append(max(size_of_cluster(clustering.labels_)))
            # clusterid2 = tree2.cut(nclusters=k)
            # d[2].append(silhouette_score(data, clusterid2, metric='euclidean'))
            # ksize[2].append(max(size_of_cluster(clusterid2)))
            # clusterid3 = tree3.cut(nclusters=k)
            # d[3].append(silhouette_score(data, clusterid3, metric='euclidean'))
            # ksize[3].append(max(size_of_cluster(clusterid3)))
            # clusterid4 = tree4.cut(nclusters=k)
            # d[4].append(silhouette_score(data, clusterid4, metric='euclidean'))
            # ksize[4].append(max(size_of_cluster(clusterid4)))

            # d[2].append(hierarchical(data, k, 'complete'))#m,e
            # d[3].append(hierarchical(data, k, 'average'))#a,e
        # 用subplot()方法绘制多幅图形
        plt.figure(figsize=(6, 6))
        # 创建第一个画板
        plt.figure(1)
        # 将第一个画板划分为2行1列组成的区块,并获取到第一块区域
        ax1 = plt.subplot(211)
        realN = 0
        # 在第一个子区域中绘图
        for di in d:
            if len(di) > 1:
                plt.plot(args, di, marker='o')
                realN += 1
        # plt.legend(xx)
        plt.legend(range(realN))
        plt.xlabel = 'k'
        plt.ylabel = 'silhouette'
        # plt.ylim(-1, 1)

        # 选中第二个子区域,并绘图
        ax2 = plt.subplot(212)
        for di in ksize:
            if len(di) > 1:
                plt.plot(args, di, marker='o')
        plt.legend(range(realN))
        plt.xlabel = 'k'
        plt.ylabel = 'MAXcluster'
        # plt.ylim(0, 2000)
        ax1.set_title('feature number=%d by TC' % topN)
        ax2.set_title("max size of clusters")
        plt.savefig('./treecluster_images/feature number=%d by TC 1<k<50' % topN)
        plt.show()
コード例 #10
0
    def test_matrix_parse(self):
        if TestCluster.module == 'Bio.Cluster':
            from Bio.Cluster import treecluster
        elif TestCluster.module == 'Pycluster':
            from Pycluster import treecluster

        # Normal matrix, no errors
        data1 = numpy.array([[1.1, 1.2], [1.4, 1.3], [1.1, 1.5], [2.0, 1.5],
                             [1.7, 1.9], [1.7, 1.9], [5.7, 5.9], [5.7, 5.9],
                             [3.1, 3.3], [5.4, 5.3], [5.1, 5.5], [5.0, 5.5],
                             [5.1, 5.2]])

        # Another normal matrix, no errors; written as a list
        data2 = [[1.1, 2.2, 3.3, 4.4, 5.5], [3.1, 3.2, 1.3, 2.4, 1.5],
                 [4.1, 2.2, 0.3, 5.4, 0.5], [2.1, 2.0, 0.0, 5.0, 0.0]]

        # Rows are not contiguous
        data3 = data1[::2, :]

        # Columns are not contiguous
        data4 = numpy.array(data2)[:, ::2]

        # Matrix using float32
        data5 = numpy.array(
            [[1.1, 2.2, 3.3, 4.4, 5.5], [3.1, 3.2, 1.3, 2.4, 1.5],
             [4.1, 2.2, 0.3, 5.4, 0.5], [2.1, 2.0, 0.0, 5.0, 0.0]],
            numpy.float32)

        # Matrix using int
        data6 = numpy.array([[1, 2, 3, 4, 5], [3, 3, 1, 2, 1], [4, 2, 0, 5, 0],
                             [2, 2, 0, 5, 0]], numpy.int32)
        try:
            treecluster(data1)
        except Exception:
            self.fail("treecluster failed to accept matrix data1")

        try:
            treecluster(data2)
        except Exception:
            self.fail("treecluster failed to accept matrix data2")

        try:
            treecluster(data3)
        except Exception:
            self.fail("treecluster failed to accept matrix data3")

        try:
            treecluster(data4)
        except Exception:
            self.fail("treecluster failed to accept matrix data4")

        try:
            treecluster(data5)
        except Exception:
            self.fail("treecluster failed to accept matrix data5")

        try:
            treecluster(data6)
        except Exception:
            self.fail("treecluster failed to accept matrix data6")

        # Ragged matrix
        data7 = [[91.1, 92.2, 93.3, 94.4, 95.5], [93.1, 93.2, 91.3, 92.4],
                 [94.1, 92.2, 90.3], [12.1, 92.0, 90.0, 95.0, 90.0]]

        # Matrix with bad cells
        data8 = [[7.1, 7.2, 7.3, 7.4, 7.5], [7.1, 7.2, 7.3, 7.4, 'snoopy'],
                 [7.1, 7.2, 7.3, None, None]]

        # Matrix with a bad row
        data9 = [[23.1, 23.2, 23.3, 23.4, 23.5], None,
                 [23.1, 23.0, 23.0, 23.0, 23.0]]

        # Various references that don't point to matrices at all
        data10 = "snoopy"
        data11 = {'a': [[2.3, 1.2], [3.3, 5.6]]}
        data12 = []
        data13 = [None]

        # Array of incorrect rank
        data14 = numpy.array([[[1.1, 1.2], [2.3, 1.2], [3.4, 1.6]],
                              [[1.4, 1.3], [3.2, 4.5], [9.8, 4.9]],
                              [[1.1, 1.5], [1.1, 2.3], [6.5, 0.4]]])

        # Array with non-numerical values
        data15 = numpy.array([['a', 'b', 'c'], ['e', 'f', 'g']], 'c')

        # Empty array
        data16 = numpy.array([[]], 'd')

        self.assertRaises(ValueError, treecluster, data7)
        self.assertRaises(ValueError, treecluster, data8)
        self.assertRaises(ValueError, treecluster, data9)
        self.assertRaises(ValueError, treecluster, data10)
        self.assertRaises(TypeError, treecluster, data11)
        self.assertRaises(ValueError, treecluster, data12)
        self.assertRaises(ValueError, treecluster, data13)
        self.assertRaises(ValueError, treecluster, data14)
        self.assertRaises(ValueError, treecluster, data15)
        self.assertRaises(ValueError, treecluster, data16)
コード例 #11
0
    def test_treecluster(self):
        if TestCluster.module == 'Bio.Cluster':
            from Bio.Cluster import treecluster
        elif TestCluster.module == 'Pycluster':
            from Pycluster import treecluster

        # First data set
        weight1 = [1, 1, 1, 1, 1]
        data1 = numpy.array([[1.1, 2.2, 3.3, 4.4, 5.5],
                             [3.1, 3.2, 1.3, 2.4, 1.5],
                             [4.1, 2.2, 0.3, 5.4, 0.5],
                             [9.7, 2.0, 0.0, 5.0, 0.0]])
        mask1 = numpy.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1],
                             [1, 1, 1, 1, 1]], int)

        # test first data set
        # Pairwise average-linkage clustering
        tree = treecluster(data=data1,
                           mask=mask1,
                           weight=weight1,
                           transpose=0,
                           method='a',
                           dist='e')
        self.assertEqual(len(tree), len(data1) - 1)
        self.assertEqual(tree[0].left, 2)
        self.assertEqual(tree[0].right, 1)
        self.assertAlmostEqual(tree[0].distance, 2.600, places=3)
        self.assertEqual(tree[1].left, -1)
        self.assertEqual(tree[1].right, 0)
        self.assertAlmostEqual(tree[1].distance, 7.300, places=3)
        self.assertEqual(tree[2].left, 3)
        self.assertEqual(tree[2].right, -2)
        self.assertAlmostEqual(tree[2].distance, 13.540, places=3)
        indices = tree.sort([0, 1, 2, 3])
        self.assertEqual(len(indices), len(data1))
        self.assertEqual(indices[0], 0)
        self.assertEqual(indices[1], 1)
        self.assertEqual(indices[2], 2)
        self.assertEqual(indices[3], 3)
        indices = tree.sort([0, 3, 2, 1])
        self.assertEqual(len(indices), len(data1))
        self.assertEqual(indices[0], 3)
        self.assertEqual(indices[1], 0)
        self.assertEqual(indices[2], 2)
        self.assertEqual(indices[3], 1)

        # Pairwise single-linkage clustering
        tree = treecluster(data=data1,
                           mask=mask1,
                           weight=weight1,
                           transpose=0,
                           method='s',
                           dist='e')
        self.assertEqual(len(tree), len(data1) - 1)
        self.assertEqual(tree[0].left, 1)
        self.assertEqual(tree[0].right, 2)
        self.assertAlmostEqual(tree[0].distance, 2.600, places=3)
        self.assertEqual(tree[1].left, 0)
        self.assertEqual(tree[1].right, -1)
        self.assertAlmostEqual(tree[1].distance, 5.800, places=3)
        self.assertEqual(tree[2].left, -2)
        self.assertEqual(tree[2].right, 3)
        self.assertAlmostEqual(tree[2].distance, 6.380, places=3)
        indices = tree.sort([0, 1, 2, 3])
        self.assertEqual(len(indices), len(data1))
        self.assertEqual(indices[0], 0)
        self.assertEqual(indices[1], 1)
        self.assertEqual(indices[2], 2)
        self.assertEqual(indices[3], 3)
        indices = tree.sort([0, 3, 2, 1])
        self.assertEqual(len(indices), len(data1))
        self.assertEqual(indices[0], 3)
        self.assertEqual(indices[1], 0)
        self.assertEqual(indices[2], 2)
        self.assertEqual(indices[3], 1)

        # Pairwise centroid-linkage clustering
        tree = treecluster(data=data1,
                           mask=mask1,
                           weight=weight1,
                           transpose=0,
                           method='c',
                           dist='e')
        self.assertEqual(len(tree), len(data1) - 1)
        self.assertEqual(tree[0].left, 1)
        self.assertEqual(tree[0].right, 2)
        self.assertAlmostEqual(tree[0].distance, 2.600, places=3)
        self.assertEqual(tree[1].left, 0)
        self.assertEqual(tree[1].right, -1)
        self.assertAlmostEqual(tree[1].distance, 6.650, places=3)
        self.assertEqual(tree[2].left, -2)
        self.assertEqual(tree[2].right, 3)
        self.assertAlmostEqual(tree[2].distance, 11.629, places=3)
        indices = tree.sort([0, 1, 2, 3])
        self.assertEqual(len(indices), len(data1))
        self.assertEqual(indices[0], 0)
        self.assertEqual(indices[1], 1)
        self.assertEqual(indices[2], 2)
        self.assertEqual(indices[3], 3)
        indices = tree.sort([0, 3, 2, 1])
        self.assertEqual(len(indices), len(data1))
        self.assertEqual(indices[0], 3)
        self.assertEqual(indices[1], 0)
        self.assertEqual(indices[2], 2)
        self.assertEqual(indices[3], 1)

        # Pairwise maximum-linkage clustering
        tree = treecluster(data=data1,
                           mask=mask1,
                           weight=weight1,
                           transpose=0,
                           method='m',
                           dist='e')
        self.assertEqual(len(tree), len(data1) - 1)
        self.assertEqual(tree[0].left, 2)
        self.assertEqual(tree[0].right, 1)
        self.assertAlmostEqual(tree[0].distance, 2.600, places=3)
        self.assertEqual(tree[1].left, -1)
        self.assertEqual(tree[1].right, 0)
        self.assertAlmostEqual(tree[1].distance, 8.800, places=3)
        self.assertEqual(tree[2].left, 3)
        self.assertEqual(tree[2].right, -2)
        self.assertAlmostEqual(tree[2].distance, 23.100, places=3)
        indices = tree.sort([0, 1, 2, 3])
        self.assertEqual(len(indices), len(data1))
        self.assertEqual(indices[0], 0)
        self.assertEqual(indices[1], 1)
        self.assertEqual(indices[2], 2)
        self.assertEqual(indices[3], 3)
        indices = tree.sort([0, 3, 2, 1])
        self.assertEqual(len(indices), len(data1))
        self.assertEqual(indices[0], 3)
        self.assertEqual(indices[1], 0)
        self.assertEqual(indices[2], 2)
        self.assertEqual(indices[3], 1)

        # Second data set
        weight2 = [1, 1]
        data2 = numpy.array([[0.8223, 0.9295], [1.4365, 1.3223],
                             [1.1623, 1.5364], [2.1826, 1.1934],
                             [1.7763, 1.9352], [1.7215, 1.9912],
                             [2.1812, 5.9935], [5.3290, 5.9452],
                             [3.1491, 3.3454], [5.1923, 5.3156],
                             [4.7735, 5.4012], [5.1297, 5.5645],
                             [5.3934, 5.1823]])
        mask2 = numpy.array(
            [[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1],
             [1, 1], [1, 1], [1, 1], [1, 1], [1, 1]], int)

        # Test second data set
        # Pairwise average-linkage clustering
        tree = treecluster(data=data2,
                           mask=mask2,
                           weight=weight2,
                           transpose=0,
                           method='a',
                           dist='e')
        self.assertEqual(len(tree), len(data2) - 1)
        self.assertEqual(tree[0].left, 5)
        self.assertEqual(tree[0].right, 4)
        self.assertAlmostEqual(tree[0].distance, 0.003, places=3)
        self.assertEqual(tree[1].left, 9)
        self.assertEqual(tree[1].right, 12)
        self.assertAlmostEqual(tree[1].distance, 0.029, places=3)
        self.assertEqual(tree[2].left, 2)
        self.assertEqual(tree[2].right, 1)
        self.assertAlmostEqual(tree[2].distance, 0.061, places=3)
        self.assertEqual(tree[3].left, 11)
        self.assertEqual(tree[3].right, -2)
        self.assertAlmostEqual(tree[3].distance, 0.070, places=3)
        self.assertEqual(tree[4].left, -4)
        self.assertEqual(tree[4].right, 10)
        self.assertAlmostEqual(tree[4].distance, 0.128, places=3)
        self.assertEqual(tree[5].left, 7)
        self.assertEqual(tree[5].right, -5)
        self.assertAlmostEqual(tree[5].distance, 0.224, places=3)
        self.assertEqual(tree[6].left, -3)
        self.assertEqual(tree[6].right, 0)
        self.assertAlmostEqual(tree[6].distance, 0.254, places=3)
        self.assertEqual(tree[7].left, -1)
        self.assertEqual(tree[7].right, 3)
        self.assertAlmostEqual(tree[7].distance, 0.391, places=3)
        self.assertEqual(tree[8].left, -8)
        self.assertEqual(tree[8].right, -7)
        self.assertAlmostEqual(tree[8].distance, 0.532, places=3)
        self.assertEqual(tree[9].left, 8)
        self.assertEqual(tree[9].right, -9)
        self.assertAlmostEqual(tree[9].distance, 3.234, places=3)
        self.assertEqual(tree[10].left, -6)
        self.assertEqual(tree[10].right, 6)
        self.assertAlmostEqual(tree[10].distance, 4.636, places=3)
        self.assertEqual(tree[11].left, -11)
        self.assertEqual(tree[11].right, -10)
        self.assertAlmostEqual(tree[11].distance, 12.741, places=3)
        indices = tree.sort()
        self.assertEqual(len(indices), len(data2))
        self.assertEqual(indices[0], 7)
        self.assertEqual(indices[1], 11)
        self.assertEqual(indices[2], 9)
        self.assertEqual(indices[3], 12)
        self.assertEqual(indices[4], 10)
        self.assertEqual(indices[5], 6)
        self.assertEqual(indices[6], 8)
        self.assertEqual(indices[7], 5)
        self.assertEqual(indices[8], 4)
        self.assertEqual(indices[9], 3)
        self.assertEqual(indices[10], 2)
        self.assertEqual(indices[11], 1)
        self.assertEqual(indices[12], 0)

        # Pairwise single-linkage clustering
        tree = treecluster(data=data2,
                           mask=mask2,
                           weight=weight2,
                           transpose=0,
                           method='s',
                           dist='e')
        self.assertEqual(len(tree), len(data2) - 1)
        self.assertEqual(tree[0].left, 4)
        self.assertEqual(tree[0].right, 5)
        self.assertAlmostEqual(tree[0].distance, 0.003, places=3)
        self.assertEqual(tree[1].left, 9)
        self.assertEqual(tree[1].right, 12)
        self.assertAlmostEqual(tree[1].distance, 0.029, places=3)
        self.assertEqual(tree[2].left, 11)
        self.assertEqual(tree[2].right, -2)
        self.assertAlmostEqual(tree[2].distance, 0.033, places=3)
        self.assertEqual(tree[3].left, 1)
        self.assertEqual(tree[3].right, 2)
        self.assertAlmostEqual(tree[3].distance, 0.061, places=3)
        self.assertEqual(tree[4].left, 10)
        self.assertEqual(tree[4].right, -3)
        self.assertAlmostEqual(tree[4].distance, 0.077, places=3)
        self.assertEqual(tree[5].left, 7)
        self.assertEqual(tree[5].right, -5)
        self.assertAlmostEqual(tree[5].distance, 0.092, places=3)
        self.assertEqual(tree[6].left, 0)
        self.assertEqual(tree[6].right, -4)
        self.assertAlmostEqual(tree[6].distance, 0.242, places=3)
        self.assertEqual(tree[7].left, -7)
        self.assertEqual(tree[7].right, -1)
        self.assertAlmostEqual(tree[7].distance, 0.246, places=3)
        self.assertEqual(tree[8].left, 3)
        self.assertEqual(tree[8].right, -8)
        self.assertAlmostEqual(tree[8].distance, 0.287, places=3)
        self.assertEqual(tree[9].left, -9)
        self.assertEqual(tree[9].right, 8)
        self.assertAlmostEqual(tree[9].distance, 1.936, places=3)
        self.assertEqual(tree[10].left, -10)
        self.assertEqual(tree[10].right, -6)
        self.assertAlmostEqual(tree[10].distance, 3.432, places=3)
        self.assertEqual(tree[11].left, 6)
        self.assertEqual(tree[11].right, -11)
        self.assertAlmostEqual(tree[11].distance, 3.535, places=3)
        indices = tree.sort()
        self.assertEqual(len(indices), len(data2))
        self.assertEqual(indices[0], 6)
        self.assertEqual(indices[1], 3)
        self.assertEqual(indices[2], 0)
        self.assertEqual(indices[3], 1)
        self.assertEqual(indices[4], 2)
        self.assertEqual(indices[5], 4)
        self.assertEqual(indices[6], 5)
        self.assertEqual(indices[7], 8)
        self.assertEqual(indices[8], 7)
        self.assertEqual(indices[9], 10)
        self.assertEqual(indices[10], 11)
        self.assertEqual(indices[11], 9)
        self.assertEqual(indices[12], 12)

        # Pairwise centroid-linkage clustering
        tree = treecluster(data=data2,
                           mask=mask2,
                           weight=weight2,
                           transpose=0,
                           method='c',
                           dist='e')
        self.assertEqual(len(tree), len(data2) - 1)
        self.assertEqual(tree[0].left, 4)
        self.assertEqual(tree[0].right, 5)
        self.assertAlmostEqual(tree[0].distance, 0.003, places=3)
        self.assertEqual(tree[1].left, 12)
        self.assertEqual(tree[1].right, 9)
        self.assertAlmostEqual(tree[1].distance, 0.029, places=3)
        self.assertEqual(tree[2].left, 1)
        self.assertEqual(tree[2].right, 2)
        self.assertAlmostEqual(tree[2].distance, 0.061, places=3)
        self.assertEqual(tree[3].left, -2)
        self.assertEqual(tree[3].right, 11)
        self.assertAlmostEqual(tree[3].distance, 0.063, places=3)
        self.assertEqual(tree[4].left, 10)
        self.assertEqual(tree[4].right, -4)
        self.assertAlmostEqual(tree[4].distance, 0.109, places=3)
        self.assertEqual(tree[5].left, -5)
        self.assertEqual(tree[5].right, 7)
        self.assertAlmostEqual(tree[5].distance, 0.189, places=3)
        self.assertEqual(tree[6].left, 0)
        self.assertEqual(tree[6].right, -3)
        self.assertAlmostEqual(tree[6].distance, 0.239, places=3)
        self.assertEqual(tree[7].left, 3)
        self.assertEqual(tree[7].right, -1)
        self.assertAlmostEqual(tree[7].distance, 0.390, places=3)
        self.assertEqual(tree[8].left, -7)
        self.assertEqual(tree[8].right, -8)
        self.assertAlmostEqual(tree[8].distance, 0.382, places=3)
        self.assertEqual(tree[9].left, -9)
        self.assertEqual(tree[9].right, 8)
        self.assertAlmostEqual(tree[9].distance, 3.063, places=3)
        self.assertEqual(tree[10].left, 6)
        self.assertEqual(tree[10].right, -6)
        self.assertAlmostEqual(tree[10].distance, 4.578, places=3)
        self.assertEqual(tree[11].left, -10)
        self.assertEqual(tree[11].right, -11)
        self.assertAlmostEqual(tree[11].distance, 11.536, places=3)
        indices = tree.sort()
        self.assertEqual(len(indices), len(data2))
        self.assertEqual(indices[0], 0)
        self.assertEqual(indices[1], 1)
        self.assertEqual(indices[2], 2)
        self.assertEqual(indices[3], 3)
        self.assertEqual(indices[4], 4)
        self.assertEqual(indices[5], 5)
        self.assertEqual(indices[6], 8)
        self.assertEqual(indices[7], 6)
        self.assertEqual(indices[8], 10)
        self.assertEqual(indices[9], 12)
        self.assertEqual(indices[10], 9)
        self.assertEqual(indices[11], 11)
        self.assertEqual(indices[12], 7)

        # Pairwise maximum-linkage clustering
        tree = treecluster(data=data2,
                           mask=mask2,
                           weight=weight2,
                           transpose=0,
                           method='m',
                           dist='e')
        self.assertEqual(len(tree), len(data2) - 1)
        self.assertEqual(tree[0].left, 5)
        self.assertEqual(tree[0].right, 4)
        self.assertAlmostEqual(tree[0].distance, 0.003, places=3)
        self.assertEqual(tree[1].left, 9)
        self.assertEqual(tree[1].right, 12)
        self.assertAlmostEqual(tree[1].distance, 0.029, places=3)
        self.assertEqual(tree[2].left, 2)
        self.assertEqual(tree[2].right, 1)
        self.assertAlmostEqual(tree[2].distance, 0.061, places=3)
        self.assertEqual(tree[3].left, 11)
        self.assertEqual(tree[3].right, 10)
        self.assertAlmostEqual(tree[3].distance, 0.077, places=3)
        self.assertEqual(tree[4].left, -2)
        self.assertEqual(tree[4].right, -4)
        self.assertAlmostEqual(tree[4].distance, 0.216, places=3)
        self.assertEqual(tree[5].left, -3)
        self.assertEqual(tree[5].right, 0)
        self.assertAlmostEqual(tree[5].distance, 0.266, places=3)
        self.assertEqual(tree[6].left, -5)
        self.assertEqual(tree[6].right, 7)
        self.assertAlmostEqual(tree[6].distance, 0.302, places=3)
        self.assertEqual(tree[7].left, -1)
        self.assertEqual(tree[7].right, 3)
        self.assertAlmostEqual(tree[7].distance, 0.425, places=3)
        self.assertEqual(tree[8].left, -8)
        self.assertEqual(tree[8].right, -6)
        self.assertAlmostEqual(tree[8].distance, 0.968, places=3)
        self.assertEqual(tree[9].left, 8)
        self.assertEqual(tree[9].right, 6)
        self.assertAlmostEqual(tree[9].distance, 3.975, places=3)
        self.assertEqual(tree[10].left, -10)
        self.assertEqual(tree[10].right, -7)
        self.assertAlmostEqual(tree[10].distance, 5.755, places=3)
        self.assertEqual(tree[11].left, -11)
        self.assertEqual(tree[11].right, -9)
        self.assertAlmostEqual(tree[11].distance, 22.734, places=3)
        indices = tree.sort()
        self.assertEqual(len(indices), len(data2))
        self.assertEqual(indices[0], 8)
        self.assertEqual(indices[1], 6)
        self.assertEqual(indices[2], 9)
        self.assertEqual(indices[3], 12)
        self.assertEqual(indices[4], 11)
        self.assertEqual(indices[5], 10)
        self.assertEqual(indices[6], 7)
        self.assertEqual(indices[7], 5)
        self.assertEqual(indices[8], 4)
        self.assertEqual(indices[9], 3)
        self.assertEqual(indices[10], 2)
        self.assertEqual(indices[11], 1)
        self.assertEqual(indices[12], 0)
コード例 #12
0
    def test_mask_parse(self):
        if TestCluster.module == 'Bio.Cluster':
            from Bio.Cluster import treecluster
        elif TestCluster.module == 'Pycluster':
            from Pycluster import treecluster

        # data matrix
        data = numpy.array([[1.1, 2.2, 3.3, 4.4,
                             5.5], [3.1, 3.2, 1.3, 2.4, 1.5],
                            [4.1, 2.2, 0.3, 5.4, 0.5],
                            [2.1, 2.0, 0.0, 5.0, 0.0]])

        # Normal mask, no errors
        mask1 = numpy.array([[1, 1, 0, 1, 0], [1, 1, 1, 0, 0], [1, 1, 0, 1, 1],
                             [1, 0, 1, 1, 0]])

        # Same mask, no errors; written as a list
        mask2 = [[1, 1, 0, 1, 0], [1, 1, 1, 0, 0], [1, 1, 0, 1, 1],
                 [1, 0, 1, 1, 0]]

        # Rows are not contiguous
        mask3 = numpy.array([[1, 1, 0, 1, 0], [1, 1, 1, 0, 0], [1, 1, 1, 0, 0],
                             [1, 1, 0, 1, 1], [1, 1, 1, 0, 0], [1, 1, 0, 1, 1],
                             [1, 1, 0, 1, 1], [1, 0, 1, 1, 0]])
        mask3 = mask3[::2, :]

        # Columns are not contiguous
        mask4 = numpy.array([[1, 1, 0, 1, 0, 1, 0, 0, 1, 1],
                             [1, 1, 1, 0, 0, 1, 1, 0, 0, 1],
                             [1, 1, 0, 1, 1, 1, 0, 1, 1, 0],
                             [1, 0, 1, 1, 0, 1, 0, 0, 1, 1]])
        mask4 = mask4[:, ::2]

        # Matrix using int16
        mask5 = numpy.array([[1, 1, 0, 1, 0], [1, 1, 1, 0, 0], [1, 1, 1, 0, 0],
                             [1, 1, 0, 1, 1]], numpy.int16)

        # Matrix using float
        mask6 = numpy.array(
            [[1.0, 2.2, 3.1, 4.8, 5.1], [3.3, 3.3, 1.4, 2.4, 1.2],
             [4.1, 2.2, 0.6, 5.5, 0.6], [2.7, 2.5, 0.4, 5.7, 0.2]],
            numpy.float)
        try:
            treecluster(data, mask1)
        except Exception:
            self.fail("treecluster failed to accept matrix mask1")

        try:
            treecluster(data, mask2)
        except Exception:
            self.fail("treecluster failed to accept matrix mask2")

        try:
            treecluster(data, mask3)
        except Exception:
            self.fail("treecluster failed to accept matrix mask3")

        try:
            treecluster(data, mask4)
        except Exception:
            self.fail("treecluster failed to accept matrix mask4")

        try:
            treecluster(data, mask5)
        except Exception:
            self.fail("treecluster failed to accept matrix mask5")

        try:
            treecluster(data, mask6)
        except Exception:
            self.fail("treecluster failed to accept matrix mask6")

        # Ragged mask
        mask7 = [[1, 1, 0, 1], [1, 1, 1, 0, 0], [1, 1, 0, 1, 1], [1, 1, 0]]

        # Mask with incorrect number of rows
        mask8 = numpy.array([[1, 1, 0, 1, 0], [1, 1, 1, 0, 0], [1, 1, 0, 1, 1],
                             [0, 1, 1, 0, 1], [1, 0, 1, 1, 0]])

        # Mask with incorrect number of columns
        mask9 = numpy.array([[1, 1, 0, 1, 0, 1], [1, 1, 1, 0, 0, 0],
                             [0, 1, 1, 0, 1, 1], [1, 0, 1, 1, 0, 1]])

        # Matrix with bad cells
        mask10 = [[1, 1, 0, 1, 0], [1, 1, 1, 0, 'snoopy'], [1, 1, 0, 1, 1],
                  [1, 0, 1, 1, 0]]

        # Matrix with a bad row
        mask11 = [[1, 1, 0, 1, 0], None, [1, 1, 0, 1, 1], [1, 0, 1, 1, 0]]

        # Array with non-numerical values
        mask12 = numpy.array([['a', 'b', 'c'], ['e', 'f', 'g']], 'c')

        # Empty arrays
        mask13 = numpy.array([[]], 'd')
        mask14 = []

        # Array of incorrect rank
        mask15 = numpy.array([[[1, 1], [0, 1], [1, 1]], [[1, 1], [0, 1],
                                                         [1, 1]],
                              [[1, 1], [1, 1], [1, 0]]])

        # References that cannot be converted to a matrix of int
        mask16 = "snoopy"
        mask17 = {'a': [[1, 0], [1, 1]]}
        mask18 = [None]

        self.assertRaises(ValueError, treecluster, data, mask7)
        self.assertRaises(ValueError, treecluster, data, mask8)
        self.assertRaises(ValueError, treecluster, data, mask9)
        self.assertRaises(ValueError, treecluster, data, mask10)
        self.assertRaises(ValueError, treecluster, data, mask11)
        self.assertRaises(ValueError, treecluster, data, mask12)
        self.assertRaises(ValueError, treecluster, data, mask13)
        self.assertRaises(ValueError, treecluster, data, mask14)
        self.assertRaises(ValueError, treecluster, data, mask15)
        self.assertRaises(ValueError, treecluster, data, mask16)
        self.assertRaises(TypeError, treecluster, data, mask17)
        self.assertRaises(TypeError, treecluster, data, mask18)
コード例 #13
0
ファイル: test_Cluster.py プロジェクト: mlyne/Scripts
def test_matrix_parse(module):
  if module=='Bio.Cluster':
    from Bio.Cluster import treecluster
  elif module=='Pycluster':
    from Pycluster import treecluster
  else:
    raise 'Unknown module name', module
  print "test_matrix_parse:"
  # Normal matrix, no errors
  data1 = array([[ 1.1, 1.2 ],
                 [ 1.4, 1.3 ],
                 [ 1.1, 1.5 ],
                 [ 2.0, 1.5 ],
                 [ 1.7, 1.9 ],
                 [ 1.7, 1.9 ],
                 [ 5.7, 5.9 ],
                 [ 5.7, 5.9 ],
                 [ 3.1, 3.3 ],
                 [ 5.4, 5.3 ],
                 [ 5.1, 5.5 ],
                 [ 5.0, 5.5 ],
                 [ 5.1, 5.2 ]])

  # Another normal matrix, no errors; written as a list
  data2 =  [[  1.1, 2.2, 3.3, 4.4, 5.5 ], 
            [  3.1, 3.2, 1.3, 2.4, 1.5 ], 
            [  4.1, 2.2, 0.3, 5.4, 0.5 ], 
            [ 12.1, 2.0, 0.0, 5.0, 0.0 ]]

  # Ragged matrix
  data3 =  [[ 91.1, 92.2, 93.3, 94.4, 95.5], 
            [ 93.1, 93.2, 91.3, 92.4 ], 
            [ 94.1, 92.2, 90.3 ], 
            [ 12.1, 92.0, 90.0, 95.0, 90.0 ]]

  # Matrix with bad cells
  data4 =  [ [ 7.1, 7.2, 7.3, 7.4, 7.5, ],
             [ 7.1, 7.2, 7.3, 7.4, 'snoopy' ], 
             [ 7.1, 7.2, 7.3, None, None]] 

  # Matrix with a bad row
  data5 =  [ [ 23.1, 23.2, 23.3, 23.4, 23.5], 
             None,
             [ 23.1, 23.0, 23.0, 23.0, 23.0]]

  # Various references that don't point to matrices at all
  data6 = "snoopy"
  data7 = {'a': [[2.3,1.2],[3.3,5.6]]}
  data8 = []
  data9 = [None]
  data10 = [[None]]

  try:
    result = treecluster(data1)
    print "Read data1 (correct)"
  except: "Error: treecluster failed to accept matrix data1"
  try:
    result = treecluster(data2)
    print "Read data2 (correct)"
  except: "Error: treecluster failed to accept matrix data2"
  try:
    result = treecluster(data3)
    print "Error: treecluster incorrectly accepted data3"
  except: print "Refused incorrect matrix data3"
  try:
    result = treecluster(data4)
    print "Error: treecluster incorrectly accepted data4"
  except: print "Refused incorrect matrix data4"
  try:
    result = treecluster(data5)
    print "Error: treecluster incorrectly accepted data5"
  except: print "Refused incorrect matrix data5"
  try:
    result = treecluster(data6)
    print "Error: treecluster incorrectly accepted data6"
  except: print "Refused incorrect matrix data6"
  try:
    result = treecluster(data7)
    print "Error: treecluster incorrectly accepted data7"
  except: print "Refused incorrect matrix data7"
  try:
    result = treecluster(data8)
    print "Error: treecluster incorrectly accepted data8"
  except: print "Refused incorrect matrix data8"
  try:
    result = treecluster(data9)
    print "Error: treecluster incorrectly accepted data9"
  except: print "Refused incorrect matrix data9"
  try:
    result = treecluster(data10)
    print "Error: treecluster incorrectly accepted data10"
  except: print "Refused incorrect matrix data10"
  print
コード例 #14
0
ファイル: test_Cluster.py プロジェクト: mlyne/Scripts
def test_treecluster(module):
  if module=='Bio.Cluster':
    from Bio.Cluster import treecluster
  elif module=='Pycluster':
    from Pycluster import treecluster
  else:
    raise 'Unknown module name', module
  print "test_treecluster:"
  # First data set
  weight1 =  [ 1,1,1,1,1 ]
  data1   =  array([[  1.1, 2.2, 3.3, 4.4, 5.5], 
                    [  3.1, 3.2, 1.3, 2.4, 1.5], 
                    [  4.1, 2.2, 0.3, 5.4, 0.5], 
                    [ 12.1, 2.0, 0.0, 5.0, 0.0]])
  mask1 = array([[ 1, 1, 1, 1, 1], 
                 [ 1, 1, 1, 1, 1], 
                 [ 1, 1, 1, 1, 1], 
                 [ 1, 1, 1, 1, 1]])

  # Second data set
  weight2 =  [ 1,1 ]
  data2 = array([[ 0.8223, 0.9295 ],
                 [ 1.4365, 1.3223 ],
                 [ 1.1623, 1.5364 ],
                 [ 2.1826, 1.1934 ],
                 [ 1.7763, 1.9352 ],
                 [ 1.7215, 1.9912 ],
                 [ 2.1812, 5.9935 ],
                 [ 5.3290, 5.9452 ],
                 [ 3.1491, 3.3454 ],
                 [ 5.1923, 5.3156 ],
                 [ 4.7735, 5.4012 ],
                 [ 5.1297, 5.5645 ],
                 [ 5.3934, 5.1823 ]])
  mask2 = array([[ 1, 1 ],
                 [ 1, 1 ],
                 [ 1, 1 ],
                 [ 1, 1 ],
                 [ 1, 1 ],
                 [ 1, 1 ],
                 [ 1, 1 ],
                 [ 1, 1 ],
                 [ 1, 1 ],
                 [ 1, 1 ],
                 [ 1, 1 ],
                 [ 1, 1 ],
                 [ 1, 1 ]])

  # test first data set
  print "First data set:"
  print_matrix(data1, mask1)
  print "Pairwise average-linkage clustering"
  result, linkdist = treecluster(data=data1, mask=mask1, weight=weight1, transpose=0, method='a', dist='e')
  print "Number of nodes is %d (should be %d)" % (len(result), len(data1)-1)
  print "Number of link distances is %d (should be %d)" % (len(linkdist), len(data1)-1)
  for i in range(len(result)):
    print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (i, result[i][0], result[i][1], linkdist[i])

  print "Pairwise single-linkage clustering"
  result, linkdist = treecluster(data=data1, mask=mask1, weight=weight1, transpose=0, method='s', dist='e')
  print "Number of nodes is %d (should be %d)" % (len(result), len(data1)-1)
  print "Number of link distances is %d (should be %d)" % (len(linkdist), len(data1)-1)
  for i in range(len(result)):
    print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (i, result[i][0], result[i][1], linkdist[i])

  print "Pairwise centroid-linkage clustering"
  result, linkdist = treecluster(data=data1, mask=mask1, weight=weight1, transpose=0, method='c', dist='e')
  print "Number of nodes is %d (should be %d)" % (len(result), len(data1)-1)
  print "Number of link distances is %d (should be %d)" % (len(linkdist), len(data1)-1)
  for i in range(len(result)):
    print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (i, result[i][0], result[i][1], linkdist[i])

  print "Pairwise maximum-linkage clustering"
  result, linkdist = treecluster(data=data1, mask=mask1, weight=weight1, transpose=0, method='m', dist='e')
  print "Number of nodes is %d (should be %d)" % (len(result), len(data1)-1)
  print "Number of link distances is %d (should be %d)" % (len(linkdist), len(data1)-1)
  for i in range(len(result)):
    print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (i, result[i][0], result[i][1], linkdist[i])

  # Test second data set
  print "Second data set:"
  print "Pairwise average-linkage clustering"
  result, linkdist = treecluster(data=data2, mask=mask2, weight=weight2, transpose=0, method='a', dist='e')
  print "Number of nodes is %d (should be %d)" % (len(result), len(data2)-1)
  print "Number of link distances is %d (should be %d)" % (len(linkdist), len(data2)-1)
  for i in range(len(result)):
    print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (i, result[i][0], result[i][1], linkdist[i])

  print "Pairwise single-linkage clustering"
  result, linkdist = treecluster(data=data2, mask=mask2, weight=weight2, transpose=0, method='s', dist='e')
  print "Number of nodes is %d (should be %d)" % (len(result), len(data2)-1)
  print "Number of link distances is %d (should be %d)" % (len(linkdist), len(data2)-1)
  for i in range(len(result)):
    print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (i, result[i][0], result[i][1], linkdist[i])

  print "Pairwise centroid-linkage clustering"
  result, linkdist = treecluster(data=data2, mask=mask2, weight=weight2, transpose=0, method='c', dist='e')
  print "Number of nodes is %d (should be %d)" % (len(result), len(data2)-1)
  print "Number of link distances is %d (should be %d)" % (len(linkdist), len(data2)-1)
  for i in range(len(result)):
    print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (i, result[i][0], result[i][1], linkdist[i])

  print "Pairwise maximum-linkage clustering"
  result, linkdist = treecluster(data=data2, mask=mask2, weight=weight2, transpose=0, method='m', dist='e')
  print "Number of nodes is %d (should be %d)" % (len(result), len(data2)-1)
  print "Number of link distances is %d (should be %d)" % (len(linkdist), len(data2)-1)
  for i in range(len(result)):
    print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (i, result[i][0], result[i][1], linkdist[i])
  print
from Bio.Cluster import treecluster
import numpy as np
from Bio.Cluster import distancematrix
data=np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12],[0,1,2,3]])
tree = treecluster(data)
print(tree)
# 예제 데이터도 없어...
# 아무튼 이런 식으로 array로 그릴수도 있고
tree = treecluster(data,dist="b",distancematrix=None)
print(tree)
# 다른 옵션을 줄 수도 있다.
distances=distancematrix((data))
tree = treecluster(data=None,distancematrix=distances)
print(tree)
# Distance matrix를 미리 계산해 그걸로 그릴 수도 있다.
# ValueError: use either data or distancematrix; do not use both
# Data와 Distance matrix중 하나는 None이어야 한다. 안그러면 위 에러가 반긴다.