Esempio n. 1
0
 def test_fit_predict(self):
   X = np.array([[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]])
   clustering = KMeans(k=5, embedded=False)
   self.assertTrue(not clustering.fitted)
   y_pred = clustering.fit_predict(X)
   self.assertTrue(clustering.fitted)
   self.assertEqual(len(y_pred), X.shape[0])
Esempio n. 2
0
  def test_fit(self):
    X = np.array([[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]])
    clustering = KMeans(k=10, embedded=False)
    self.assertRaises(RuntimeWarning, clustering.fit(X))
    clustering.stop()

    clustering = KMeans(k=5, embedded=False)
    self.assertTrue(not clustering.fitted)
    clustering.fit(X)
    self.assertTrue(clustering.fitted)
Esempio n. 3
0
 def test_init(self):
   clustering = KMeans(embedded=False)
   self.assertEqual(2, clustering.k)
   self.assertEqual('simple', clustering.compressor_method)
   self.assertEqual(100, clustering.bucket_size)
   self.assertEqual(100, clustering.compressed_bucket_size)
   self.assertEqual(10, clustering.bicriteria_base_size)
   self.assertEqual(2, clustering.bucket_length)
   self.assertEqual(0.0, clustering.forgetting_factor)
   self.assertEqual(0.5, clustering.forgetting_threshold)
   self.assertEqual(0, clustering.seed)
   self.asssrtTrue(not clustering.embedded)
   self.assertEqual('euclidean', clustering.distance)
   clustering.stop()
Esempio n. 4
0
 def test_predict(self):
   X = np.array([[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]])
   clustering = KMeans(k=5, embedded=False)
   self.assertRaises(RuntimeError, clustering.predict(X))
   clustering.fit(X)
   y_pred = clustering.predict(X)
   self.assertEqual(len(y_pred), X.shape[0])
========================================

This is a simple example that illustrates Clustering service usage.

"""

from sklearn.datasets import make_blobs

from jubakit.wrapper.clustering import KMeans, GMM, DBSCAN

# make blob dataset using sklearn API.
X, y = make_blobs(n_samples=200, centers=3, n_features=2, random_state=42)

# launch clustering instance
clusterings = [
    KMeans(k=3, bucket_size=200, embedded=False),
    GMM(k=3, bucket_size=200, embedded=False),
    DBSCAN(eps=2.0, bucket_size=200, embedded=False)
]

for clustering in clusterings:
    # fit and predict
    y_pred = clustering.fit_predict(X)
    # print result
    labels = set(y_pred)
    label_counts = {}
    for label in labels:
        label_counts[label] = y_pred.count(label)
    print('{0}: {1}'.format(clustering.__class__.__name__, label_counts))
    # stop clustering service
    clustering.stop()
Esempio n. 6
0
  def test_make_compressor_parameter(self):
    clustering = KMeans(compressor_method='simple', embedded=False)
    compressor_parameter = {'bucket_size': 100}
    self.assertEqual(compressor_parameter,
                     clustering._make_compressor_parameter('simple'))
    clustering.stop()

    clustering = KMeans(compressor_method='compressive', embedded=False)
    compressor_parameter = {
      'bucket_size': 100,
      'compressed_bucket_size': 100,
      'bicriteria_base_size': 10,
      'bucket_length': 2,
      'forgetting_factor': 0.0,
      'forgetting_threshold': 0.5,
      'seed': 0
    }
    self.assertEqual(compressor_parameter
                     clustering._make_compressor_parameter('compressive'))
    clustering.stop()
Esempio n. 7
0
 def test_method(self):
   clustering = KMeans(embedded=False)
   self.assertEqual('kmeans', clustering._method())
   clustering.stop()
Esempio n. 8
0
 def test_embedded(self):
   clustering = KMeans(embedded=True)
Esempio n. 9
0
 def test_simple(self):
   clustering = KMeans(embedded=False)
   clustering.stop()