def test_sparse(self): """ Tests that DBSCAN produces the same results with sparse and dense data. """ n_samples = 1500 x, y = make_blobs(n_samples=n_samples, random_state=170) dbscan = DBSCAN(n_regions=1, eps=.15) transformation = [[0.6, -0.6], [-0.4, 0.8]] x = np.dot(x, transformation) x = StandardScaler().fit_transform(x) dense = ds.array(x, block_size=(300, 2)) sparse = ds.array(csr_matrix(x), block_size=(300, 2)) y_dense = dbscan.fit_predict(dense).collect() y_sparse = dbscan.fit_predict(sparse).collect() self.assertTrue(np.array_equal(y_dense, y_sparse))
def test_random_clusters_2(self): """ Tests DBSCAN on random data with multiple clusters. """ # 2 dimensions np.random.seed(2) x = np.random.uniform(0, 10, size=(1000, 2)) ds_x = ds.array(x, block_size=(300, 2)) dbscan = DBSCAN(n_regions=10, max_samples=10, eps=0.5, min_samples=10) y = dbscan.fit_predict(ds_x).collect() self.assertEqual(dbscan.n_clusters, 27) self.assertEqual(np.count_nonzero(y == -1), 206)
def test_random_clusters_3(self): """ Tests DBSCAN on random data with multiple clusters. """ # 3 dimensions np.random.seed(3) x = np.random.uniform(0, 10, size=(1000, 3)) ds_x = ds.array(x, block_size=(300, 3)) dbscan = DBSCAN(n_regions=10, dimensions=[0, 1], eps=0.9, min_samples=4) y = dbscan.fit_predict(ds_x).collect() self.assertEqual(dbscan.n_clusters, 50) self.assertEqual(np.count_nonzero(y == -1), 266)
def test_n_clusters_aniso_dimensions(self): """ Tests that DBSCAN finds the correct number of clusters when dimensions is not None. """ n_samples = 1500 x, y = make_blobs(n_samples=n_samples, random_state=170) dbscan = DBSCAN(n_regions=5, dimensions=[1], eps=.15) transformation = [[0.6, -0.6], [-0.4, 0.8]] x = np.dot(x, transformation) x = StandardScaler().fit_transform(x) ds_x = ds.array(x, block_size=(300, 2)) y_pred = dbscan.fit_predict(ds_x).collect() true_sizes = {19, 496, 491, 488, 6} cluster_sizes = { y_pred[y_pred == -1].size, y_pred[y_pred == 0].size, y_pred[y_pred == 1].size, y_pred[y_pred == 2].size, y_pred[y_pred == 3].size } self.assertEqual(dbscan.n_clusters, 4) self.assertEqual(true_sizes, cluster_sizes)