def run_clustering(algorithm): """Run all datasets""" _run_dataset('HART', testloader.load_hartigan(), 3, algorithm) _run_dataset('IRIS', testloader.load_iris(), 3, algorithm) _run_dataset('SOYS', testloader.load_soy_small(), 4, algorithm) _run_dataset('WINE', testloader.load_wine(), 3, algorithm) _run_dataset('WBCO', testloader.load_wbco(), 2, algorithm)
def test_bigger(self): """Try an actual dataset""" dataset = testloader.load_iris() matrix = self._proc.process(dataset.data) ranges = matrix.max(axis=0) - matrix.min(axis=0) expected = np.array([1., 1., 1., 1.]) np.testing.assert_equal(ranges, expected)
def test_with_iris(self): """Crude integration-style test until I can break it down a little. Does not trigger the merge stage. """ dataset = testloader.load_iris() seeds = ccia.generate(dataset.data, 3) # As emitted by the Java expected = np.array([[5.006, 3.428, 1.462, 0.246], [6.85384615, 3.07692308, 5.71538462, 2.05384615], [5.88360656, 2.74098361, 4.38852459, 1.43442623]]) np.testing.assert_array_almost_equal(seeds, expected)
def test_against_iris(self): """Test run against Iris dataset, as used in the paper""" dataset = testloader.load_iris() data = dataset.data target = dataset.target num_clusters = 3 centroids = yuan.generate(data, num_clusters) # sanity check shape self.assertEqual(centroids.shape, (num_clusters, 4)) # run kmeans est = KMeans(n_clusters=num_clusters, init=centroids, n_init=1) est.fit(data) score = accuracy.score(target, est.labels_) # Claimed in paper, though quite frankly random will get this a lot self.assertAlmostEqual(0.886667, score, places=6)
"""Temp bootstrap file to run KM++""" import pathhack from sklearn.cluster import KMeans from datasets import testloader from initialisations import kmeansplusplus as alg dataset = testloader.load_iris() num_clusters = 3 # dataset = testloader.load_hartigan() # num_clusters = 3 # dataset = testloader.load_soy_small() # num_clusters = 4 data = dataset.data centroids = alg.generate(data, num_clusters) print(centroids) est = KMeans(n_clusters=num_clusters, init=centroids, n_init=1) est.fit(dataset.data) print("Final centres:\n", est.cluster_centers_)
def _get_test_data(self): """Fetch some data to test with""" dataset = testloader.load_iris() return dataset.data
def test_code_runs(self): """This needs more, but at least prove it runs""" dataset = testloader.load_iris() centroids = onoda.generate(dataset.data, 3) self.assertEqual((3, 4), centroids.shape)
def test_code_runs(self): """At least prove it runs""" dataset = testloader.load_iris() centroids = bfinit.generate(dataset.data, 3) self.assertEqual((3, 4), centroids.shape)