예제 #1
0
 def test_simple(self):
   config = Config()
   self.assertEqual('kmeans', config['method'])
   self.assertEqual({'k': 3, 'seed': 0}, config['parameter'])
   self.assertEqual('simple', config['compressor_method'])
   self.assertEqual({'bucket_size': 100}, config.get('compressor_parameter'))
   self.assertEqual('euclidean', config.get('distance'))
예제 #2
0
 def test_simple(self):
     config = Config()
     self.assertEqual('kmeans', config['method'])
     self.assertEqual({'k': 3, 'seed': 0}, config['parameter'])
     self.assertEqual('simple', config['compressor_method'])
     self.assertEqual({'bucket_size': 100},
                      config.get('compressor_parameter'))
     self.assertEqual('euclidean', config.get('distance'))
예제 #3
0
    def _test_func_with_legal_and_illegal_config(self, func):
        dataset = self._make_stub_dataset()
        # test illegal method
        config = Config(method='dbscan',
                        compressor_parameter={"bucket_size": 5})
        clustering = self._make_stub_clustering(config, dataset)
        self.assertRaises(RuntimeError, lambda: func(clustering, dataset))
        clustering.stop()

        # test legal method
        config = Config(method='kmeans',
                        compressor_parameter={"bucket_size": 5})
        clustering = self._make_stub_clustering(config, dataset)
        func(clustering, dataset)
        clustering.stop()
예제 #4
0
 def test_get_core_members(self):
     dataset = self._make_stub_dataset()
     config = Config(method='kmeans',
                     compressor_parameter={"bucket_size": 5})
     clustering = self._make_stub_clustering(config, dataset)
     clustering.get_core_members(light=False)
     clustering.get_core_members(light=True)
     clustering.stop()
예제 #5
0
 def test_method_params(self):
     self.assertTrue('k' in Config(method='kmeans')['parameter'])
     self.assertTrue('seed' in Config(method='kmeans')['parameter'])
     self.assertTrue('k' in Config(method='gmm')['parameter'])
     self.assertTrue('seed' in Config(method='gmm')['parameter'])
     self.assertTrue('eps' in Config(method='dbscan')['parameter'])
     self.assertTrue('min_core_point' in Config(
         method='dbscan')['parameter'])
예제 #6
0
 def test_compressor_params(self):
     self.assertTrue('bucket_size' in Config(
         compressor_method='simple')['compressor_parameter'])
     self.assertTrue('bucket_size' in Config(
         compressor_method='compressive')['compressor_parameter'])
     self.assertTrue('bucket_length' in Config(
         compressor_method='compressive')['compressor_parameter'])
     self.assertTrue('compressed_bucket_size' in Config(
         compressor_method='compressive')['compressor_parameter'])
     self.assertTrue('bicriteria_base_size' in Config(
         compressor_method='compressive')['compressor_parameter'])
     self.assertTrue('forgetting_factor' in Config(
         compressor_method='compressive')['compressor_parameter'])
     self.assertTrue('seed' in Config(
         compressor_method='compressive')['compressor_parameter'])
     config = Config(compressor_method='simple',
                     compressor_parameter={'bucket_size': 10})
     self.assertEqual(10, config['compressor_parameter']['bucket_size'])
예제 #7
0
 def test_default(self):
     config = Config.default()
     self.assertEqual('kmeans', config['method'])
     self.assertEqual('simple', config['compressor_method'])
     self.assertEqual('euclidean', config['distance'])
예제 #8
0
 def test_compressor_methods(self):
     config = Config()
     self.assertTrue(isinstance(config.compressor_methods(), list))
예제 #9
0
 def test_distances(self):
     config = Config()
     self.assertTrue(isinstance(config.distances(), list))
예제 #10
0
 def test_compressor_methods(self):
   config = Config()
   self.assertTrue(isinstance(config.compressor_methods(), list))
예제 #11
0
 def test_methods(self):
     config = Config()
     self.assertTrue(isinstance(config.methods(), list))
예제 #12
0
 def test_methods(self):
   config = Config()
   self.assertTrue(isinstance(config.methods(), list))
예제 #13
0
 def test_distances(self):
   config = Config()
   self.assertTrue(isinstance(config.distances(), list))
예제 #14
0
 def test_get_revision(self):
     clustering = Clustering.run(Config())
     self.assertEqual(0, clustering.get_revision())
     clustering.stop()
예제 #15
0
 def test_push(self):
     clustering = Clustering.run(Config())
     dataset = self._make_stub_dataset()
     for (idx, row_id, result) in clustering.push(dataset):
         self.assertEqual(result, True)
     clustering.stop()
예제 #16
0
 def test_embedded(self):
     clustering = Clustering.run(Config(), embedded=True)
     clustering.stop()
예제 #17
0
from jubakit.clustering import Clustering, Schema, Dataset, Config
from jubakit.loader.csv import CSVLoader

# Load a CSV file.
loader = CSVLoader('blobs.csv')

# Define a Schema that defines types for each columns of the CSV file.
schema = Schema({
    'cluster': Schema.ID,
}, Schema.NUMBER)

# Create a Dataset.
dataset = Dataset(loader, schema)

# Create an Clustering Service.
cfg = Config(method='kmeans')
clustering = Clustering.run(cfg)

# Update the Clustering model.
for (idx, row_id, result) in clustering.push(dataset):
    pass

# Get clusters
clusters = clustering.get_core_members(light=False)
# Get centers of each cluster
centers = clustering.get_k_center()

# Calculate SSE: sum of squared errors
sse = 0.0
for cluster, center in zip(clusters, centers):
    # Center of clusters
예제 #18
0
 def test_default(self):
   config = Config.default()
   self.assertEqual('kmeans', config['method'])
   self.assertEqual('simple', config['compressor_method'])
   self.assertEqual('euclidean', config['distance'])