Example #1
0
def run_clustering(algorithm):
    """Run all datasets"""

    _run_dataset('HART', testloader.load_hartigan(), 3, algorithm)
    _run_dataset('IRIS', testloader.load_iris(), 3, algorithm)
    _run_dataset('SOYS', testloader.load_soy_small(), 4, algorithm)
    _run_dataset('WINE', testloader.load_wine(), 3, algorithm)
    _run_dataset('WBCO', testloader.load_wbco(), 2, algorithm)
Example #2
0
    def test_bigger(self):
        """Try an actual dataset"""
        dataset = testloader.load_iris()

        matrix = self._proc.process(dataset.data)
        ranges = matrix.max(axis=0) - matrix.min(axis=0)

        expected = np.array([1., 1., 1., 1.])
        np.testing.assert_equal(ranges, expected)
    def test_with_iris(self):
        """Crude integration-style test until I can break it down a little.
        Does not trigger the merge stage.
        """

        dataset = testloader.load_iris()

        seeds = ccia.generate(dataset.data, 3)

        # As emitted by the Java
        expected = np.array([[5.006, 3.428, 1.462, 0.246],
                             [6.85384615, 3.07692308, 5.71538462, 2.05384615],
                             [5.88360656, 2.74098361, 4.38852459, 1.43442623]])

        np.testing.assert_array_almost_equal(seeds, expected)
Example #4
0
    def test_against_iris(self):
        """Test run against Iris dataset, as used in the paper"""

        dataset = testloader.load_iris()
        data = dataset.data
        target = dataset.target

        num_clusters = 3

        centroids = yuan.generate(data, num_clusters)

        # sanity check shape
        self.assertEqual(centroids.shape, (num_clusters, 4))

        # run kmeans
        est = KMeans(n_clusters=num_clusters, init=centroids, n_init=1)
        est.fit(data)

        score = accuracy.score(target, est.labels_)

        # Claimed in paper, though quite frankly random will get this a lot
        self.assertAlmostEqual(0.886667, score, places=6)
Example #5
0
"""Temp bootstrap file to run KM++"""
import pathhack

from sklearn.cluster import KMeans

from datasets import testloader
from initialisations import kmeansplusplus as alg


dataset = testloader.load_iris()
num_clusters = 3

# dataset = testloader.load_hartigan()
# num_clusters = 3

# dataset = testloader.load_soy_small()
# num_clusters = 4

data = dataset.data
centroids = alg.generate(data, num_clusters)

print(centroids)

est = KMeans(n_clusters=num_clusters, init=centroids, n_init=1)
est.fit(dataset.data)

print("Final centres:\n", est.cluster_centers_)
Example #6
0
    def _get_test_data(self):
        """Fetch some data to test with"""

        dataset = testloader.load_iris()
        return dataset.data
Example #7
0
    def test_code_runs(self):
        """This needs more, but at least prove it runs"""

        dataset = testloader.load_iris()
        centroids = onoda.generate(dataset.data, 3)
        self.assertEqual((3, 4), centroids.shape)
    def test_code_runs(self):
        """At least prove it runs"""

        dataset = testloader.load_iris()
        centroids = bfinit.generate(dataset.data, 3)
        self.assertEqual((3, 4), centroids.shape)