コード例 #1
0
ファイル: test_ihac.py プロジェクト: frnsys/galaxy
class DistancesTest(unittest.TestCase):
    """
    Tests the management of distances (in the dists matrix).
    """
    def setUp(self):
        self.vecs = [[10], [20], [0], [20]]
        self.initial_vecs =  self.vecs[:2]
        self.h = Hierarchy(metric='euclidean',
                           lower_limit_scale=0.1,
                           upper_limit_scale=1.5)
        self.h.fit(self.initial_vecs)

        children = [self.h.create_node(vec=vec) for vec in self.vecs[2:]]
        n = self.h.create_node(children=children)
        self.h.g.add_child(2, n)

        self.leaves   = [0,1,3,4]
        self.clusters = [2,5]

    def test_distance(self):
        node_k = self.h.create_node(vec=[20])

        # Distances should be symmetric.
        for n in self.leaves:
            d = self.h.get_distance(n, node_k)
            d_ = self.h.get_distance(node_k, n)
            self.assertEqual(d, d_)

    def test_update_distances(self):
        # Create some extra nodes.
        data = np.array([[1],[2],[4],[8],[12]])
        nodes = [self.h.create_node(vec=center) for center in data]

        # Calculate a distance matrix independently to compare to.
        # We include the vector which initialized the hierarchy
        # and the center of the initial cluster node.
        old_data = self.initial_vecs + [self.h.centers[self.clusters[0]]] + self.vecs[2:] + [self.h.centers[self.clusters[1]]]
        data = np.insert(data, 0, old_data, axis=0)
        dist_mat = pairwise_distances(data, metric='euclidean')

        self.assertTrue((dist_mat == self.h.dists).all())

    def test_cdm(self):
        # Expecting the matrix to have rows and columns 0,1,n (n=5)
        # since those are the child nodes.
        expected = [[ 0., 10.,  0.],
                    [10.,  0., 10.],
                    [ 0., 10.,  0.]]
        assert_array_equal(expected, self.h.cdm(2))

    def test_get_closest_leaf(self):
        node_k = self.h.create_node(vec=[11])
        result, dist = self.h.get_closest_leaf(node_k)
        self.assertEqual(result, self.leaves[0])
        self.assertEqual(dist, 1)

    def test_get_nearest_distances(self):
        d = self.h.get_nearest_distances(2)
        expected = [ 0., 10.,  0.]
        assert_array_equal(expected, d)

    def test_get_nearest_child(self):
        """
           2
        +-+--+
        0 1  5
            +-+
            3 4
        """
        i, d = self.h.get_nearest_child(5, 1)
        self.assertEqual(i, 4)
        self.assertEqual(d, 0)

    def test_get_nearest_children(self):
        i, j, d = self.h.get_nearest_children(2)
        self.assertEqual(i, 0)
        self.assertEqual(j, 5)
        self.assertEqual(d, 0)

    def test_get_furthest_nearest_children(self):
        i, j, d = self.h.get_furthest_nearest_children(2)
        self.assertEqual(i, 0)
        self.assertEqual(j, 1)
        self.assertEqual(d, 10)

    def test_get_representative(self):
        r = self.h.get_representative(2)
        self.assertEqual(r, 0)

    def test_most_representative(self):
        # Incorporating these vectors puts the center of all nodes around ~22
        new_vecs = [[30], [40], [40]]
        self.h.fit(new_vecs)

        nodes = self.h.nodes
        rep = self.h.most_representative(nodes)

        # Expecting that the representative node is 1, w/ a center of [20]
        self.assertEqual(rep, 1)
コード例 #2
0
ファイル: test_ihac.py プロジェクト: lapulasitu/galaxy
class DistancesTest(unittest.TestCase):
    """
    Tests the management of distances (in the dists matrix).
    """
    def setUp(self):
        self.vecs = [[10], [20], [0], [20]]
        self.initial_vecs = self.vecs[:2]
        self.h = Hierarchy(metric='euclidean',
                           lower_limit_scale=0.1,
                           upper_limit_scale=1.5)
        self.h.fit(self.initial_vecs)

        children = [self.h.create_node(vec=vec) for vec in self.vecs[2:]]
        n = self.h.create_node(children=children)
        self.h.g.add_child(2, n)

        self.leaves = [0, 1, 3, 4]
        self.clusters = [2, 5]

    def test_distance(self):
        node_k = self.h.create_node(vec=[20])

        # Distances should be symmetric.
        for n in self.leaves:
            d = self.h.get_distance(n, node_k)
            d_ = self.h.get_distance(node_k, n)
            self.assertEqual(d, d_)

    def test_update_distances(self):
        # Create some extra nodes.
        data = np.array([[1], [2], [4], [8], [12]])
        nodes = [self.h.create_node(vec=center) for center in data]

        # Calculate a distance matrix independently to compare to.
        # We include the vector which initialized the hierarchy
        # and the center of the initial cluster node.
        old_data = self.initial_vecs + [
            self.h.centers[self.clusters[0]]
        ] + self.vecs[2:] + [self.h.centers[self.clusters[1]]]
        data = np.insert(data, 0, old_data, axis=0)
        dist_mat = pairwise_distances(data, metric='euclidean')

        self.assertTrue((dist_mat == self.h.dists).all())

    def test_cdm(self):
        # Expecting the matrix to have rows and columns 0,1,n (n=5)
        # since those are the child nodes.
        expected = [[0., 10., 0.], [10., 0., 10.], [0., 10., 0.]]
        assert_array_equal(expected, self.h.cdm(2))

    def test_get_closest_leaf(self):
        node_k = self.h.create_node(vec=[11])
        result, dist = self.h.get_closest_leaf(node_k)
        self.assertEqual(result, self.leaves[0])
        self.assertEqual(dist, 1)

    def test_get_nearest_distances(self):
        d = self.h.get_nearest_distances(2)
        expected = [0., 10., 0.]
        assert_array_equal(expected, d)

    def test_get_nearest_child(self):
        """
           2
        +-+--+
        0 1  5
            +-+
            3 4
        """
        i, d = self.h.get_nearest_child(5, 1)
        self.assertEqual(i, 4)
        self.assertEqual(d, 0)

    def test_get_nearest_children(self):
        i, j, d = self.h.get_nearest_children(2)
        self.assertEqual(i, 0)
        self.assertEqual(j, 5)
        self.assertEqual(d, 0)

    def test_get_furthest_nearest_children(self):
        i, j, d = self.h.get_furthest_nearest_children(2)
        self.assertEqual(i, 0)
        self.assertEqual(j, 1)
        self.assertEqual(d, 10)

    def test_get_representative(self):
        r = self.h.get_representative(2)
        self.assertEqual(r, 0)

    def test_most_representative(self):
        # Incorporating these vectors puts the center of all nodes around ~22
        new_vecs = [[30], [40], [40]]
        self.h.fit(new_vecs)

        nodes = self.h.nodes
        rep = self.h.most_representative(nodes)

        # Expecting that the representative node is 1, w/ a center of [20]
        self.assertEqual(rep, 1)