def test_get_mean_1(self): """Test calculating mean on a single vector""" vect = [1, 2, 3] clus = Cluster() clus.assign(vect) np.testing.assert_array_equal(clus.get_mean(), vect)
def test_assign_retrieve_1(self): """Test retrieving a single vector""" vect = [1, 2, 3] clus = Cluster() clus.assign(vect) self.assertEqual(clus.get_samples(), [vect])
def test_get_distance_1(self): """Test Euclidean distance""" vect = [1, 1, 1, 1] clus = Cluster() clus.assign(vect) self.assertEqual(clus.get_distance([2, 2, 2, 2]), 2)
def test_assign_retrieve_n(self): """Test retrieving multiple vectors""" v_0 = [1, 2, 3] v_1 = [3, 2, 1] v_2 = [4, 5, 6] clus = Cluster() clus.assign(v_0) clus.assign(v_1) clus.assign(v_2) self.assertEqual(clus.get_samples(), [v_0, v_1, v_2])
def test_get_mean_n(self): """Test calculating mean on multiple vectors""" v_0 = [2, 2, 2] v_1 = [3, 5, 1] v_2 = [4, 5, 6] clus = Cluster() clus.assign(v_0) clus.assign(v_1) clus.assign(v_2) expected = [3, 4, 3] np.testing.assert_array_equal(clus.get_mean(), expected)
def test_merge(self): """Test merging clusters""" clus_a = Cluster() clus_a.assign([1, 2, 3]) clus_a.assign([2, 3, 4]) clus_b = Cluster() clus_b.assign([3, 4, 5]) clus_b.assign([4, 5, 6]) clus_a.merge(clus_b) # test values (order not important) self.assertCountEqual(clus_a.get_samples(), [[1, 2, 3], [4, 5, 6], [2, 3, 4], [3, 4, 5]]) # test recalculated mean np.testing.assert_array_equal(clus_a.get_mean(), [2.5, 3.5, 4.5])
def generate(data, num_clusters): """The common interface""" clusters = [] # For each sample in the dataset for index, sample in enumerate(data): # Create initial clusters from first K samples if index < num_clusters: # print("Creating cluster:", index) clust = Cluster() clust.assign(sample) clusters.append(clust) continue # "If the distance between the members of this pair..." clusters = consolidate(clusters) # "In addition, as each new point is processed..." # Get distance from each cluster distances = [c.get_distance(sample) for c in clusters] if min(distances) > ROUGHENING: # print("Adding a new cluster") clust = Cluster() clust.assign(sample) clusters.append(clust) # print("Num clusters is now:", len(clusters)) else: # Assign to nearest clusters[np.argmin(distances)].assign(sample) clusters = consolidate(clusters) return np.array([cl.get_mean() for cl in clusters])