class DistancesTest(unittest.TestCase): """ Tests the management of distances (in the dists matrix). """ def setUp(self): self.vecs = [[10], [20], [0], [20]] self.initial_vecs = self.vecs[:2] self.h = Hierarchy(metric='euclidean', lower_limit_scale=0.1, upper_limit_scale=1.5) self.h.fit(self.initial_vecs) children = [self.h.create_node(vec=vec) for vec in self.vecs[2:]] n = self.h.create_node(children=children) self.h.g.add_child(2, n) self.leaves = [0,1,3,4] self.clusters = [2,5] def test_distance(self): node_k = self.h.create_node(vec=[20]) # Distances should be symmetric. for n in self.leaves: d = self.h.get_distance(n, node_k) d_ = self.h.get_distance(node_k, n) self.assertEqual(d, d_) def test_update_distances(self): # Create some extra nodes. data = np.array([[1],[2],[4],[8],[12]]) nodes = [self.h.create_node(vec=center) for center in data] # Calculate a distance matrix independently to compare to. # We include the vector which initialized the hierarchy # and the center of the initial cluster node. old_data = self.initial_vecs + [self.h.centers[self.clusters[0]]] + self.vecs[2:] + [self.h.centers[self.clusters[1]]] data = np.insert(data, 0, old_data, axis=0) dist_mat = pairwise_distances(data, metric='euclidean') self.assertTrue((dist_mat == self.h.dists).all()) def test_cdm(self): # Expecting the matrix to have rows and columns 0,1,n (n=5) # since those are the child nodes. expected = [[ 0., 10., 0.], [10., 0., 10.], [ 0., 10., 0.]] assert_array_equal(expected, self.h.cdm(2)) def test_get_closest_leaf(self): node_k = self.h.create_node(vec=[11]) result, dist = self.h.get_closest_leaf(node_k) self.assertEqual(result, self.leaves[0]) self.assertEqual(dist, 1) def test_get_nearest_distances(self): d = self.h.get_nearest_distances(2) expected = [ 0., 10., 0.] assert_array_equal(expected, d) def test_get_nearest_child(self): """ 2 +-+--+ 0 1 5 +-+ 3 4 """ i, d = self.h.get_nearest_child(5, 1) self.assertEqual(i, 4) self.assertEqual(d, 0) def test_get_nearest_children(self): i, j, d = self.h.get_nearest_children(2) self.assertEqual(i, 0) self.assertEqual(j, 5) self.assertEqual(d, 0) def test_get_furthest_nearest_children(self): i, j, d = self.h.get_furthest_nearest_children(2) self.assertEqual(i, 0) self.assertEqual(j, 1) self.assertEqual(d, 10) def test_get_representative(self): r = self.h.get_representative(2) self.assertEqual(r, 0) def test_most_representative(self): # Incorporating these vectors puts the center of all nodes around ~22 new_vecs = [[30], [40], [40]] self.h.fit(new_vecs) nodes = self.h.nodes rep = self.h.most_representative(nodes) # Expecting that the representative node is 1, w/ a center of [20] self.assertEqual(rep, 1)
class DistancesTest(unittest.TestCase): """ Tests the management of distances (in the dists matrix). """ def setUp(self): self.vecs = [[10], [20], [0], [20]] self.initial_vecs = self.vecs[:2] self.h = Hierarchy(metric='euclidean', lower_limit_scale=0.1, upper_limit_scale=1.5) self.h.fit(self.initial_vecs) children = [self.h.create_node(vec=vec) for vec in self.vecs[2:]] n = self.h.create_node(children=children) self.h.g.add_child(2, n) self.leaves = [0, 1, 3, 4] self.clusters = [2, 5] def test_distance(self): node_k = self.h.create_node(vec=[20]) # Distances should be symmetric. for n in self.leaves: d = self.h.get_distance(n, node_k) d_ = self.h.get_distance(node_k, n) self.assertEqual(d, d_) def test_update_distances(self): # Create some extra nodes. data = np.array([[1], [2], [4], [8], [12]]) nodes = [self.h.create_node(vec=center) for center in data] # Calculate a distance matrix independently to compare to. # We include the vector which initialized the hierarchy # and the center of the initial cluster node. old_data = self.initial_vecs + [ self.h.centers[self.clusters[0]] ] + self.vecs[2:] + [self.h.centers[self.clusters[1]]] data = np.insert(data, 0, old_data, axis=0) dist_mat = pairwise_distances(data, metric='euclidean') self.assertTrue((dist_mat == self.h.dists).all()) def test_cdm(self): # Expecting the matrix to have rows and columns 0,1,n (n=5) # since those are the child nodes. expected = [[0., 10., 0.], [10., 0., 10.], [0., 10., 0.]] assert_array_equal(expected, self.h.cdm(2)) def test_get_closest_leaf(self): node_k = self.h.create_node(vec=[11]) result, dist = self.h.get_closest_leaf(node_k) self.assertEqual(result, self.leaves[0]) self.assertEqual(dist, 1) def test_get_nearest_distances(self): d = self.h.get_nearest_distances(2) expected = [0., 10., 0.] assert_array_equal(expected, d) def test_get_nearest_child(self): """ 2 +-+--+ 0 1 5 +-+ 3 4 """ i, d = self.h.get_nearest_child(5, 1) self.assertEqual(i, 4) self.assertEqual(d, 0) def test_get_nearest_children(self): i, j, d = self.h.get_nearest_children(2) self.assertEqual(i, 0) self.assertEqual(j, 5) self.assertEqual(d, 0) def test_get_furthest_nearest_children(self): i, j, d = self.h.get_furthest_nearest_children(2) self.assertEqual(i, 0) self.assertEqual(j, 1) self.assertEqual(d, 10) def test_get_representative(self): r = self.h.get_representative(2) self.assertEqual(r, 0) def test_most_representative(self): # Incorporating these vectors puts the center of all nodes around ~22 new_vecs = [[30], [40], [40]] self.h.fit(new_vecs) nodes = self.h.nodes rep = self.h.most_representative(nodes) # Expecting that the representative node is 1, w/ a center of [20] self.assertEqual(rep, 1)