Beispiel #1
0
    def test_get_mean_1(self):
        """Test calculating mean on a single vector"""

        vect = [1, 2, 3]
        clus = Cluster()
        clus.assign(vect)
        np.testing.assert_array_equal(clus.get_mean(), vect)
Beispiel #2
0
    def test_assign_retrieve_1(self):
        """Test retrieving a single vector"""

        vect = [1, 2, 3]
        clus = Cluster()
        clus.assign(vect)
        self.assertEqual(clus.get_samples(), [vect])
Beispiel #3
0
    def test_get_distance_1(self):
        """Test Euclidean distance"""

        vect = [1, 1, 1, 1]

        clus = Cluster()
        clus.assign(vect)

        self.assertEqual(clus.get_distance([2, 2, 2, 2]), 2)
Beispiel #4
0
    def test_assign_retrieve_n(self):
        """Test retrieving multiple vectors"""

        v_0 = [1, 2, 3]
        v_1 = [3, 2, 1]
        v_2 = [4, 5, 6]

        clus = Cluster()
        clus.assign(v_0)
        clus.assign(v_1)
        clus.assign(v_2)

        self.assertEqual(clus.get_samples(), [v_0, v_1, v_2])
Beispiel #5
0
    def test_get_mean_n(self):
        """Test calculating mean on multiple vectors"""

        v_0 = [2, 2, 2]
        v_1 = [3, 5, 1]
        v_2 = [4, 5, 6]

        clus = Cluster()
        clus.assign(v_0)
        clus.assign(v_1)
        clus.assign(v_2)

        expected = [3, 4, 3]

        np.testing.assert_array_equal(clus.get_mean(), expected)
Beispiel #6
0
    def test_merge(self):
        """Test merging clusters"""

        clus_a = Cluster()
        clus_a.assign([1, 2, 3])
        clus_a.assign([2, 3, 4])

        clus_b = Cluster()
        clus_b.assign([3, 4, 5])
        clus_b.assign([4, 5, 6])

        clus_a.merge(clus_b)

        # test values (order not important)
        self.assertCountEqual(clus_a.get_samples(),
                              [[1, 2, 3], [4, 5, 6], [2, 3, 4], [3, 4, 5]])

        # test recalculated mean
        np.testing.assert_array_equal(clus_a.get_mean(), [2.5, 3.5, 4.5])
Beispiel #7
0
def generate(data, num_clusters):
    """The common interface"""

    clusters = []

    # For each sample in the dataset
    for index, sample in enumerate(data):

        # Create initial clusters from first K samples
        if index < num_clusters:
            # print("Creating cluster:", index)
            clust = Cluster()
            clust.assign(sample)
            clusters.append(clust)
            continue

        # "If the distance between the members of this pair..."
        clusters = consolidate(clusters)

        # "In addition, as each new point is processed..."

        # Get distance from each cluster
        distances = [c.get_distance(sample) for c in clusters]

        if min(distances) > ROUGHENING:
            # print("Adding a new cluster")
            clust = Cluster()
            clust.assign(sample)
            clusters.append(clust)
            # print("Num clusters is now:", len(clusters))
        else:
            # Assign to nearest
            clusters[np.argmin(distances)].assign(sample)

        clusters = consolidate(clusters)

    return np.array([cl.get_mean() for cl in clusters])